Annotation of 43BSDTahoe/new/news/misc/report.awk, revision 1.1

1.1     ! root        1: From cbosgd!ucbvax!usenet Mon Oct 13 05:39:17 1986
        !             2: Received: by beno.CSS.GOV (5.54/5.17)
        !             3:        id AA01253; Mon, 13 Oct 86 05:39:12 EDT
        !             4: Received: from cbosgd.UUCP by seismo.CSS.GOV (5.54/1.14)
        !             5:        id AA03513; Mon, 13 Oct 86 05:39:11 EDT
        !             6: Received: by cbosgd.ATT.COM (4.12/UUCP-Project/rel-1.0/06-28-86)
        !             7:        id AA08778; Mon, 13 Oct 86 03:44:14 edt
        !             8: Received: by ucbvax.Berkeley.EDU (5.53/1.17)
        !             9:        id AA15536; Sun, 12 Oct 86 23:51:25 PDT
        !            10: Date: Sun, 12 Oct 86 23:51:25 PDT
        !            11: From: ucbvax!usenet (USENET News Administration)
        !            12: Message-Id: <[email protected]>
        !            13: To: cbosgd!backbone
        !            14: Subject: a handy awk script for netnews log reports
        !            15: Status: R
        !            16: 
        !            17: Since Mark didn't have a copy of this, I will assume that most of the
        !            18: rest of you don't either, and send it along. The comments should be
        !            19: explanation enough... If not, ask me.
        !            20: 
        !            21:        Erik E. Fair    ucbvax!fair     [email protected]
        !            22: -------------------------------------------------------------------------------
        !            23: #  USAGE: awk -f report_awk /usr/lib/news/log
        !            24: #
        !            25: #  AWK script which eats netnews log files and produces a summary of USENET
        !            26: #  traffic and errors over the period of time that the log was collected.
        !            27: #
        !            28: #  August 31, 1986
        !            29: #
        !            30: #  Erik E. Fair <dual!fair>
        !            31: #  Original Author, May 22, 1984
        !            32: #
        !            33: #  Brad Eacker <onyx!brad>
        !            34: #  Modified to simplify the record processing and to sort the output.
        !            35: #
        !            36: #  Erik E. Fair <dual!fair>
        !            37: #  Modifed to provide information about control messages.
        !            38: #
        !            39: #  Erik E. Fair <dual!fair>
        !            40: #  Bug in system name extraction fixed. It was assumed that the forth field
        !            41: #  (system name) always had a dot. local is one that doesn't. Some others
        !            42: #  (including 2.9 sites) don't either.
        !            43: #
        !            44: #  Earl Wallace <pesnta!earlw>
        !            45: #  The "sent" field was changed from $5 to $6 in 2.10.2 (beta)
        !            46: #  named "newstats" and called with no arguments.
        !            47: #
        !            48: #  Erik E. Fair <dual!fair>
        !            49: #  Remove support for 2.10.1, revise for 2.10.2 to provide information
        !            50: #  about junked articles, garbled articles, and bad newsgroups
        !            51: #
        !            52: #  Erik E. Fair <ucbvax!fair>
        !            53: #  Minor bug fix to bad newsgroup reporting, also now counting ``old''
        !            54: #  articles as junked, with counter for number that are `old'.
        !            55: #
        !            56: #  Erik E. Fair <ucbvax!fair>
        !            57: #  Fix up the domain & local hosts support
        !            58: #
        !            59: #  Erik E. Fair <ucbvax!fair>
        !            60: #  Fix up the counting of gatewayed material, add counting of "linecount"
        !            61: #  problems. Additional cleanup to make things faster.
        !            62: #
        !            63: BEGIN{
        !            64: #
        !            65: #      this is the prefix that your site uses in hostnames to identify your
        !            66: #      hosts (e.g. ucbarpa, ucbvax, su-score, mit-mc, mit-ai)
        !            67: #      You will probably want to change (or add to) the following line
        !            68: #
        !            69:        lprefix = "ucb";
        !            70:        lplen = length(lprefix);
        !            71: #
        !            72: #      If you do bi-directional USENET gatewaying (e.g. mailing list
        !            73: #      to newsgroup where the material flows both ways freely), this
        !            74: #      should be the name in the sys file that you use to mail stuff
        !            75: #      to the mailing lists.
        !            76: #
        !            77:        pseudo = "internet";
        !            78:        rptname = "(GATEWAY)";
        !            79: #
        !            80: #      Top level domain names and what network they represent
        !            81: #      (for use in counting stuff that is gatewayed)
        !            82: #
        !            83:        domains["ARPA"] = rptname;
        !            84:        domains["arpa"] = rptname;
        !            85:        domains["EDU"] = rptname;
        !            86:        domains["edu"] = rptname;
        !            87:        domains["GOV"] = rptname;
        !            88:        domains["gov"] = rptname;
        !            89:        domains["COM"] = rptname;
        !            90:        domains["com"] = rptname;
        !            91:        domains["MIL"] = rptname;
        !            92:        domains["mil"] = rptname;
        !            93:        domains["ORG"] = rptname;
        !            94:        domains["org"] = rptname;
        !            95:        domains["NET"] = rptname;
        !            96:        domains["net"] = rptname;
        !            97:        domains["UK"] = rptname;
        !            98:        domains["uk"] = rptname;
        !            99:        domains["DEC"] = rptname;
        !           100:        domains["dec"] = rptname;
        !           101:        domains["CSNET"] = rptname;
        !           102:        domains["csnet"] = rptname;
        !           103:        domains["BITNET"] = rptname;
        !           104:        domains["bitnet"] = rptname;
        !           105:        domains["MAILNET"] = rptname;
        !           106:        domains["mailnet"] = rptname;
        !           107:        domains["UUCP"] = rptname;
        !           108:        domains["uucp"] = rptname;
        !           109:        domains["OZ"] = rptname;
        !           110:        domains["oz"] = rptname;
        !           111:        domains["AU"] = rptname;
        !           112:        domains["au"] = rptname;
        !           113: #
        !           114: #      tilde chosen because it is ASCII 126 (don't change this)
        !           115: #
        !           116:        invalid = "~~~~~~";
        !           117: #
        !           118:        accept[invalid]   = 0;
        !           119:        reject[invalid]   = 0;
        !           120:        xmited[invalid]   = 0;
        !           121:        control[invalid]  = 0;
        !           122:        junked[invalid]   = 0;
        !           123:        neighbor[invalid] = 0;
        !           124:        badgrp  = 0;
        !           125:        garbled = 0;
        !           126:        lcount  = 0;
        !           127:        canfail = 0;
        !           128:        candup  = 0;
        !           129:        insfail = 0;
        !           130:        old     = 0;
        !           131: }
        !           132: #
        !           133: #      Skip some things that we won't bother with
        !           134: #
        !           135: /^$/                           { next }
        !           136: $5 == "from"                   { next }
        !           137: $5 == "make"                   { next }
        !           138: $5 == "Cancelling"             { next }
        !           139: #
        !           140: #      Or that we just count
        !           141: #
        !           142: $5 == "Inbound"                        { garbled++; next }
        !           143: $6 == "cancel"                 { canfail++; next }
        !           144: $6 == "Cancelled"              { candup++; next }
        !           145: $6 == "install"                        { insfail++; next }
        !           146: #
        !           147: #      Articles sent to remote systems (this is what 2.10.2 (beta) says)
        !           148: #
        !           149: $6 == "sent"   {
        !           150:        for(j = 8; j <= NF; j++) {
        !           151:                comma = index( $(j), ",");
        !           152:                if (comma != 0) $(j) = substr( $(j), 1, (comma - 1));
        !           153:                if ($(j) == pseudo) $(j) = rptname;
        !           154:                else neighbor[$(j)] = 1;
        !           155:                xmited[$(j)]++;
        !           156:        }
        !           157:        next;
        !           158: }
        !           159: #
        !           160: #      Articles sent to remote systems (this is what 2.11 says)
        !           161: #
        !           162: $5 == "sent"   {
        !           163:        for(j = 7; j <= NF; j++) {
        !           164:                comma = index( $(j), ",");
        !           165:                if (comma != 0) $(j) = substr( $(j), 1, (comma - 1));
        !           166:                if ($(j) == pseudo) $(j) = rptname;
        !           167:                else neighbor[$(j)] = 1;
        !           168:                xmited[$(j)]++;
        !           169:        }
        !           170:        next;
        !           171: }
        !           172: #
        !           173: #      Get the name of the system that did this,
        !           174: #      taking into account that not everyone believes in domains.
        !           175: #
        !           176: {
        !           177: #      if we get a route addr (we shouldn't, but...), take the last one
        !           178: #
        !           179:        nhosts = split($4, hosts, "@");
        !           180:        hostname = hosts[nhosts];
        !           181: #
        !           182: #      get the root domain name, and the hostname
        !           183: #
        !           184:        ndoms = split(hostname, doms, ".");
        !           185:        domain = doms[ndoms];
        !           186:        sys = doms[1];
        !           187: #
        !           188: #      check for local system, and if not that, then internet sites.
        !           189: #      special case the network name replacement of specific host names,
        !           190: #      such that the network name is there only on a `local' posting
        !           191: #      (which is really gatewaying in disguise)
        !           192: #
        !           193:        if ($5 == "posted") {
        !           194:                prefix = substr(sys, 1, lplen);
        !           195:                if (prefix == lprefix) {
        !           196:                        sys = "local";
        !           197:                } else {
        !           198:                        dom = domains[domain];
        !           199:                        if (dom) sys = dom;
        !           200:                }
        !           201:        }
        !           202: }
        !           203: #  
        !           204: #      Duplicates & receiveds/posted & control messages
        !           205: # 
        !           206: $5 == "posted" || $5 == "received" {
        !           207:        accept[sys]++;
        !           208:        if ($5 == "received") neighbor[sys] = 1;
        !           209:        nng = split($8, ngl, ",");
        !           210:        for(i = 1; i <= nng; i++) {
        !           211:                dot = index(ngl[i], ".");
        !           212:                if (dot) ng = substr(ngl[i], 1, (dot - 1));
        !           213:                else ng = ngl[i];
        !           214:                if (ng) newsgcnt[ng]++;
        !           215:        }
        !           216:        next;
        !           217: }
        !           218: $5 == "Duplicate"      { reject[hostname]++; next }
        !           219: $6 == "valid"          { junked[sys]++; next }
        !           220: $6 == "too"            { junked[sys]++; old++; next }
        !           221: $5 == "Unknown"                {
        !           222:        x = length($7) - 2;
        !           223:        ng = substr($7, 2, x);
        !           224:        badng[ng]++;
        !           225:        badgrp++;
        !           226:        next;
        !           227: }
        !           228: #
        !           229: #      articles who actual line count differs from the Line: header count
        !           230: #
        !           231: $5 == "linecount"      {
        !           232:        expect = $7;
        !           233: # awk does very strange things with non-numeric characters in numbers
        !           234:        comma = index(expect, ",");
        !           235:        if (comma != 0) expect = substr(expect, 1, (comma - 1));
        !           236:        got = $9;
        !           237:        diff = got - expect;
        !           238:        lcount++;
        !           239:        alc_host[sys] = 1;
        !           240:        neighbor[sys] = 1;
        !           241:        if (diff < 0) {
        !           242:                diff = 0 - diff;
        !           243:                a_nshort[sys]++;
        !           244:                a_short[sys] += diff;
        !           245:                if (a_smax[sys] < diff) a_smax[sys] = diff;
        !           246:        } else {
        !           247:                a_nlong[sys]++;
        !           248:                a_long[sys] += diff;
        !           249:                if (a_lmax[sys] < diff) a_lmax[sys] = diff;
        !           250:        }
        !           251:        next;
        !           252: }
        !           253: #
        !           254: #      articles who actual line count is Zero
        !           255: #
        !           256: $7 == "linecount"      {
        !           257:        lcount++;
        !           258:        a_zero[sys]++;
        !           259:        reject[sys]++;
        !           260:        next;
        !           261: }
        !           262: #
        !           263: #      Control messages
        !           264: #
        !           265: $5 == "Ctl"    {
        !           266:        ctot++;
        !           267:        control[sys]++;
        !           268:        ctlcnt[$(10)]++;
        !           269:        next;
        !           270: }
        !           271: #
        !           272: #      Print anything we didn't recognize, it's probably an error message.
        !           273: #      For the submitted report to USENET, do sed -e '1,/^$/d' file | inews
        !           274: #      so that this cruft doesn't get out the door.
        !           275: #
        !           276: {
        !           277:        print;
        !           278: }
        !           279: #
        !           280: #      Summarize and print the report
        !           281: #
        !           282: END{
        !           283: #      special processing for Duplicates, because we can't tell if
        !           284: #      they came from a netnews neighbor or from the gatewaying
        !           285: #      activities until we have processed the entire log.
        !           286: #
        !           287:        for( hostname in reject ) {
        !           288: #
        !           289: #      get the root domain name, and the hostname
        !           290: #
        !           291:                ndoms = split(hostname, doms, ".");
        !           292:                domain = doms[ndoms];
        !           293:                sys = doms[1];
        !           294:                if (! neighbor[sys]) {
        !           295:                        prefix = substr(sys, 1, lplen);
        !           296:                        if (prefix == lprefix) {
        !           297:                                sys = "local";
        !           298:                        } else {
        !           299:                                dom = domains[domain];
        !           300:                                if (dom) sys = dom;
        !           301:                        }
        !           302:                }
        !           303:                i = reject[hostname];
        !           304:                reject[hostname] = 0;
        !           305:                reject[sys] += i;
        !           306:        }
        !           307: 
        !           308:        rtot = 0;
        !           309:        for( i in reject ) {
        !           310:                if (reject[i] > 0) {
        !           311:                        list[i] = 1;
        !           312:                        rtot += reject[i];
        !           313:                }
        !           314:        }
        !           315: 
        !           316:        atot = 0;
        !           317:        for( i in accept ) {
        !           318:                list[i] = 1;
        !           319:                atot += accept[i];
        !           320:        }
        !           321: 
        !           322:        xtot = 0;
        !           323:        for( i in xmited ) {
        !           324:                list[i] = 1;
        !           325:                xtot += xmited[i];
        !           326:        }
        !           327: 
        !           328:        ctot = 0;
        !           329:        for( i in control ) {
        !           330:                list[i] = 1;
        !           331:                ctot += control[i];
        !           332:        }
        !           333: 
        !           334:        jtot = 0;
        !           335:        for( i in junked ) {
        !           336:                list[i] = 1;
        !           337:                jtot += junked[i];
        !           338:        }
        !           339: #
        !           340: # ctot is part of rtot, so we don't add it in to the grand total.
        !           341: #
        !           342:        totarticles = atot + rtot;
        !           343:        if (totarticles == 0) totarticles = 1;
        !           344: 
        !           345:        printf("\nSystem       \tAccept\tReject\tJunked\tXmit to\tControl\t%% total\t%% rejct\n");
        !           346:        for( ; ; ) {
        !           347: # selection sort
        !           348:                i = invalid;
        !           349:                for( j in list ) {
        !           350:                        if ( list[j] > 0 && j < i ) i = j;
        !           351:                }
        !           352:                if ( i == invalid ) break;
        !           353:                list[i] = 0;
        !           354: #
        !           355: #      control & junked are counted under accept.
        !           356: #
        !           357:                sitetot = accept[i] + reject[i];
        !           358:                if (sitetot == 0) sitetot = 1;
        !           359:                articles[i] = sitetot;
        !           360: #
        !           361: # What an 'orrible printf spec
        !           362: #
        !           363:                printf("%-14s\t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", i, accept[i], reject[i], junked[i], xmited[i], control[i], (sitetot * 100) / totarticles, (reject[i] * 100) / sitetot);
        !           364: #
        !           365:        }
        !           366:        printf("\nTOTALS        \t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", atot, rtot, jtot, xtot, ctot, 100, (rtot * 100) / totarticles);
        !           367:        printf("\nTotal Articles processed %d", totarticles);
        !           368:        if (old)        printf(", old %d", old);
        !           369:        if (garbled)    printf(", garbled %d", garbled);
        !           370:        if (insfail)    printf(", uninstallable %d", insfail);
        !           371:        printf("\n");
        !           372: 
        !           373:        if (ctot) {
        !           374:                printf("\nControl       Invocations\n");
        !           375:                for( i in ctlcnt ) {
        !           376:                        if (i == "cancel") {
        !           377:                                printf("%-12s %6d", i, ctlcnt[i]);
        !           378:                                if (canfail) printf(", %d failed", canfail);
        !           379:                                if (candup) printf(", %d duplicate", candup);
        !           380:                                printf("\n");
        !           381:                        } else {
        !           382:                                printf("%-12s %6d\n", i, ctlcnt[i]);
        !           383:                        }
        !           384:                }
        !           385:        }
        !           386: 
        !           387:        if (lcount) {
        !           388:                printf("\nReceived Article Length Problems\n");
        !           389:                printf("System          Zero Short  Smax  Savg  Long  Lmax  Lavg Total %% Tot\n");
        !           390:                for( i in alc_host ) {
        !           391:                        nlong = a_nlong[i];
        !           392:                        nshort = a_nshort[i];
        !           393:                        if (nlong == 0) nlong = 1;
        !           394:                        if (nshort == 0) nshort = 1;
        !           395:                        lavg = a_long[i] / nlong;
        !           396:                        savg = a_short[i] / nshort;
        !           397:                        sitetot = (a_zero[i] + a_nshort[i] + a_nlong[i]);
        !           398:                        printf("%-14s %5d %5d %5d %5d %5d %5d %5d %5d %4d%%\n", i, a_zero[i], a_nshort[i], a_smax[i], savg, a_nlong[i], a_lmax[i], lavg, sitetot, (sitetot * 100) / articles[i]);
        !           399:                }
        !           400:        }
        !           401: 
        !           402:        if (atot) {
        !           403:                printf("\nNetnews Categories Received\n");
        !           404:                l = 0;
        !           405:                for( i in newsgcnt ) {
        !           406:                        if (l < length(i)) l = length(i);
        !           407:                }
        !           408:                fmt = sprintf("%%-%ds %%6d\n", l);
        !           409:                for( ; ; ) {
        !           410: # selection sort
        !           411:                        max = 0;
        !           412:                        for( j in newsgcnt ) {
        !           413:                                if (newsgcnt[j] > max) {
        !           414:                                        i = j;
        !           415:                                        max = newsgcnt[j];
        !           416:                                }
        !           417:                        }
        !           418:                        if (max == 0) break;
        !           419:                        printf(fmt, i, newsgcnt[i]);
        !           420:                        newsgcnt[i] = 0;
        !           421:                }
        !           422:        }
        !           423: 
        !           424:        if (badgrp) {
        !           425:                printf("\nBad Newsgroups Received\n");
        !           426:                l = 0;
        !           427:                for( i in badng ) {
        !           428:                        if (l < length(i)) l = length(i);
        !           429:                }
        !           430:                fmt = sprintf("%%-%ds %%5d\n", l);
        !           431:                for( ; ; ) {
        !           432: # selection sort
        !           433:                        i = invalid;
        !           434:                        for( j in badng ) {
        !           435:                                if (badng[j] > 0 && j < i) i = j;
        !           436:                        }
        !           437:                        if (i == invalid) break;
        !           438:                        printf(fmt, i, badng[i]);
        !           439:                        badng[i] = 0;
        !           440:                }
        !           441:        }
        !           442: }
        !           443: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.