Annotation of 43BSDTahoe/new/news/misc/report.awk, revision 1.1.1.1

1.1       root        1: From cbosgd!ucbvax!usenet Mon Oct 13 05:39:17 1986
                      2: Received: by beno.CSS.GOV (5.54/5.17)
                      3:        id AA01253; Mon, 13 Oct 86 05:39:12 EDT
                      4: Received: from cbosgd.UUCP by seismo.CSS.GOV (5.54/1.14)
                      5:        id AA03513; Mon, 13 Oct 86 05:39:11 EDT
                      6: Received: by cbosgd.ATT.COM (4.12/UUCP-Project/rel-1.0/06-28-86)
                      7:        id AA08778; Mon, 13 Oct 86 03:44:14 edt
                      8: Received: by ucbvax.Berkeley.EDU (5.53/1.17)
                      9:        id AA15536; Sun, 12 Oct 86 23:51:25 PDT
                     10: Date: Sun, 12 Oct 86 23:51:25 PDT
                     11: From: ucbvax!usenet (USENET News Administration)
                     12: Message-Id: <[email protected]>
                     13: To: cbosgd!backbone
                     14: Subject: a handy awk script for netnews log reports
                     15: Status: R
                     16: 
                     17: Since Mark didn't have a copy of this, I will assume that most of the
                     18: rest of you don't either, and send it along. The comments should be
                     19: explanation enough... If not, ask me.
                     20: 
                     21:        Erik E. Fair    ucbvax!fair     [email protected]
                     22: -------------------------------------------------------------------------------
                     23: #  USAGE: awk -f report_awk /usr/lib/news/log
                     24: #
                     25: #  AWK script which eats netnews log files and produces a summary of USENET
                     26: #  traffic and errors over the period of time that the log was collected.
                     27: #
                     28: #  August 31, 1986
                     29: #
                     30: #  Erik E. Fair <dual!fair>
                     31: #  Original Author, May 22, 1984
                     32: #
                     33: #  Brad Eacker <onyx!brad>
                     34: #  Modified to simplify the record processing and to sort the output.
                     35: #
                     36: #  Erik E. Fair <dual!fair>
                     37: #  Modifed to provide information about control messages.
                     38: #
                     39: #  Erik E. Fair <dual!fair>
                     40: #  Bug in system name extraction fixed. It was assumed that the forth field
                     41: #  (system name) always had a dot. local is one that doesn't. Some others
                     42: #  (including 2.9 sites) don't either.
                     43: #
                     44: #  Earl Wallace <pesnta!earlw>
                     45: #  The "sent" field was changed from $5 to $6 in 2.10.2 (beta)
                     46: #  named "newstats" and called with no arguments.
                     47: #
                     48: #  Erik E. Fair <dual!fair>
                     49: #  Remove support for 2.10.1, revise for 2.10.2 to provide information
                     50: #  about junked articles, garbled articles, and bad newsgroups
                     51: #
                     52: #  Erik E. Fair <ucbvax!fair>
                     53: #  Minor bug fix to bad newsgroup reporting, also now counting ``old''
                     54: #  articles as junked, with counter for number that are `old'.
                     55: #
                     56: #  Erik E. Fair <ucbvax!fair>
                     57: #  Fix up the domain & local hosts support
                     58: #
                     59: #  Erik E. Fair <ucbvax!fair>
                     60: #  Fix up the counting of gatewayed material, add counting of "linecount"
                     61: #  problems. Additional cleanup to make things faster.
                     62: #
                     63: BEGIN{
                     64: #
                     65: #      this is the prefix that your site uses in hostnames to identify your
                     66: #      hosts (e.g. ucbarpa, ucbvax, su-score, mit-mc, mit-ai)
                     67: #      You will probably want to change (or add to) the following line
                     68: #
                     69:        lprefix = "ucb";
                     70:        lplen = length(lprefix);
                     71: #
                     72: #      If you do bi-directional USENET gatewaying (e.g. mailing list
                     73: #      to newsgroup where the material flows both ways freely), this
                     74: #      should be the name in the sys file that you use to mail stuff
                     75: #      to the mailing lists.
                     76: #
                     77:        pseudo = "internet";
                     78:        rptname = "(GATEWAY)";
                     79: #
                     80: #      Top level domain names and what network they represent
                     81: #      (for use in counting stuff that is gatewayed)
                     82: #
                     83:        domains["ARPA"] = rptname;
                     84:        domains["arpa"] = rptname;
                     85:        domains["EDU"] = rptname;
                     86:        domains["edu"] = rptname;
                     87:        domains["GOV"] = rptname;
                     88:        domains["gov"] = rptname;
                     89:        domains["COM"] = rptname;
                     90:        domains["com"] = rptname;
                     91:        domains["MIL"] = rptname;
                     92:        domains["mil"] = rptname;
                     93:        domains["ORG"] = rptname;
                     94:        domains["org"] = rptname;
                     95:        domains["NET"] = rptname;
                     96:        domains["net"] = rptname;
                     97:        domains["UK"] = rptname;
                     98:        domains["uk"] = rptname;
                     99:        domains["DEC"] = rptname;
                    100:        domains["dec"] = rptname;
                    101:        domains["CSNET"] = rptname;
                    102:        domains["csnet"] = rptname;
                    103:        domains["BITNET"] = rptname;
                    104:        domains["bitnet"] = rptname;
                    105:        domains["MAILNET"] = rptname;
                    106:        domains["mailnet"] = rptname;
                    107:        domains["UUCP"] = rptname;
                    108:        domains["uucp"] = rptname;
                    109:        domains["OZ"] = rptname;
                    110:        domains["oz"] = rptname;
                    111:        domains["AU"] = rptname;
                    112:        domains["au"] = rptname;
                    113: #
                    114: #      tilde chosen because it is ASCII 126 (don't change this)
                    115: #
                    116:        invalid = "~~~~~~";
                    117: #
                    118:        accept[invalid]   = 0;
                    119:        reject[invalid]   = 0;
                    120:        xmited[invalid]   = 0;
                    121:        control[invalid]  = 0;
                    122:        junked[invalid]   = 0;
                    123:        neighbor[invalid] = 0;
                    124:        badgrp  = 0;
                    125:        garbled = 0;
                    126:        lcount  = 0;
                    127:        canfail = 0;
                    128:        candup  = 0;
                    129:        insfail = 0;
                    130:        old     = 0;
                    131: }
                    132: #
                    133: #      Skip some things that we won't bother with
                    134: #
                    135: /^$/                           { next }
                    136: $5 == "from"                   { next }
                    137: $5 == "make"                   { next }
                    138: $5 == "Cancelling"             { next }
                    139: #
                    140: #      Or that we just count
                    141: #
                    142: $5 == "Inbound"                        { garbled++; next }
                    143: $6 == "cancel"                 { canfail++; next }
                    144: $6 == "Cancelled"              { candup++; next }
                    145: $6 == "install"                        { insfail++; next }
                    146: #
                    147: #      Articles sent to remote systems (this is what 2.10.2 (beta) says)
                    148: #
                    149: $6 == "sent"   {
                    150:        for(j = 8; j <= NF; j++) {
                    151:                comma = index( $(j), ",");
                    152:                if (comma != 0) $(j) = substr( $(j), 1, (comma - 1));
                    153:                if ($(j) == pseudo) $(j) = rptname;
                    154:                else neighbor[$(j)] = 1;
                    155:                xmited[$(j)]++;
                    156:        }
                    157:        next;
                    158: }
                    159: #
                    160: #      Articles sent to remote systems (this is what 2.11 says)
                    161: #
                    162: $5 == "sent"   {
                    163:        for(j = 7; j <= NF; j++) {
                    164:                comma = index( $(j), ",");
                    165:                if (comma != 0) $(j) = substr( $(j), 1, (comma - 1));
                    166:                if ($(j) == pseudo) $(j) = rptname;
                    167:                else neighbor[$(j)] = 1;
                    168:                xmited[$(j)]++;
                    169:        }
                    170:        next;
                    171: }
                    172: #
                    173: #      Get the name of the system that did this,
                    174: #      taking into account that not everyone believes in domains.
                    175: #
                    176: {
                    177: #      if we get a route addr (we shouldn't, but...), take the last one
                    178: #
                    179:        nhosts = split($4, hosts, "@");
                    180:        hostname = hosts[nhosts];
                    181: #
                    182: #      get the root domain name, and the hostname
                    183: #
                    184:        ndoms = split(hostname, doms, ".");
                    185:        domain = doms[ndoms];
                    186:        sys = doms[1];
                    187: #
                    188: #      check for local system, and if not that, then internet sites.
                    189: #      special case the network name replacement of specific host names,
                    190: #      such that the network name is there only on a `local' posting
                    191: #      (which is really gatewaying in disguise)
                    192: #
                    193:        if ($5 == "posted") {
                    194:                prefix = substr(sys, 1, lplen);
                    195:                if (prefix == lprefix) {
                    196:                        sys = "local";
                    197:                } else {
                    198:                        dom = domains[domain];
                    199:                        if (dom) sys = dom;
                    200:                }
                    201:        }
                    202: }
                    203: #  
                    204: #      Duplicates & receiveds/posted & control messages
                    205: # 
                    206: $5 == "posted" || $5 == "received" {
                    207:        accept[sys]++;
                    208:        if ($5 == "received") neighbor[sys] = 1;
                    209:        nng = split($8, ngl, ",");
                    210:        for(i = 1; i <= nng; i++) {
                    211:                dot = index(ngl[i], ".");
                    212:                if (dot) ng = substr(ngl[i], 1, (dot - 1));
                    213:                else ng = ngl[i];
                    214:                if (ng) newsgcnt[ng]++;
                    215:        }
                    216:        next;
                    217: }
                    218: $5 == "Duplicate"      { reject[hostname]++; next }
                    219: $6 == "valid"          { junked[sys]++; next }
                    220: $6 == "too"            { junked[sys]++; old++; next }
                    221: $5 == "Unknown"                {
                    222:        x = length($7) - 2;
                    223:        ng = substr($7, 2, x);
                    224:        badng[ng]++;
                    225:        badgrp++;
                    226:        next;
                    227: }
                    228: #
                    229: #      articles who actual line count differs from the Line: header count
                    230: #
                    231: $5 == "linecount"      {
                    232:        expect = $7;
                    233: # awk does very strange things with non-numeric characters in numbers
                    234:        comma = index(expect, ",");
                    235:        if (comma != 0) expect = substr(expect, 1, (comma - 1));
                    236:        got = $9;
                    237:        diff = got - expect;
                    238:        lcount++;
                    239:        alc_host[sys] = 1;
                    240:        neighbor[sys] = 1;
                    241:        if (diff < 0) {
                    242:                diff = 0 - diff;
                    243:                a_nshort[sys]++;
                    244:                a_short[sys] += diff;
                    245:                if (a_smax[sys] < diff) a_smax[sys] = diff;
                    246:        } else {
                    247:                a_nlong[sys]++;
                    248:                a_long[sys] += diff;
                    249:                if (a_lmax[sys] < diff) a_lmax[sys] = diff;
                    250:        }
                    251:        next;
                    252: }
                    253: #
                    254: #      articles who actual line count is Zero
                    255: #
                    256: $7 == "linecount"      {
                    257:        lcount++;
                    258:        a_zero[sys]++;
                    259:        reject[sys]++;
                    260:        next;
                    261: }
                    262: #
                    263: #      Control messages
                    264: #
                    265: $5 == "Ctl"    {
                    266:        ctot++;
                    267:        control[sys]++;
                    268:        ctlcnt[$(10)]++;
                    269:        next;
                    270: }
                    271: #
                    272: #      Print anything we didn't recognize, it's probably an error message.
                    273: #      For the submitted report to USENET, do sed -e '1,/^$/d' file | inews
                    274: #      so that this cruft doesn't get out the door.
                    275: #
                    276: {
                    277:        print;
                    278: }
                    279: #
                    280: #      Summarize and print the report
                    281: #
                    282: END{
                    283: #      special processing for Duplicates, because we can't tell if
                    284: #      they came from a netnews neighbor or from the gatewaying
                    285: #      activities until we have processed the entire log.
                    286: #
                    287:        for( hostname in reject ) {
                    288: #
                    289: #      get the root domain name, and the hostname
                    290: #
                    291:                ndoms = split(hostname, doms, ".");
                    292:                domain = doms[ndoms];
                    293:                sys = doms[1];
                    294:                if (! neighbor[sys]) {
                    295:                        prefix = substr(sys, 1, lplen);
                    296:                        if (prefix == lprefix) {
                    297:                                sys = "local";
                    298:                        } else {
                    299:                                dom = domains[domain];
                    300:                                if (dom) sys = dom;
                    301:                        }
                    302:                }
                    303:                i = reject[hostname];
                    304:                reject[hostname] = 0;
                    305:                reject[sys] += i;
                    306:        }
                    307: 
                    308:        rtot = 0;
                    309:        for( i in reject ) {
                    310:                if (reject[i] > 0) {
                    311:                        list[i] = 1;
                    312:                        rtot += reject[i];
                    313:                }
                    314:        }
                    315: 
                    316:        atot = 0;
                    317:        for( i in accept ) {
                    318:                list[i] = 1;
                    319:                atot += accept[i];
                    320:        }
                    321: 
                    322:        xtot = 0;
                    323:        for( i in xmited ) {
                    324:                list[i] = 1;
                    325:                xtot += xmited[i];
                    326:        }
                    327: 
                    328:        ctot = 0;
                    329:        for( i in control ) {
                    330:                list[i] = 1;
                    331:                ctot += control[i];
                    332:        }
                    333: 
                    334:        jtot = 0;
                    335:        for( i in junked ) {
                    336:                list[i] = 1;
                    337:                jtot += junked[i];
                    338:        }
                    339: #
                    340: # ctot is part of rtot, so we don't add it in to the grand total.
                    341: #
                    342:        totarticles = atot + rtot;
                    343:        if (totarticles == 0) totarticles = 1;
                    344: 
                    345:        printf("\nSystem       \tAccept\tReject\tJunked\tXmit to\tControl\t%% total\t%% rejct\n");
                    346:        for( ; ; ) {
                    347: # selection sort
                    348:                i = invalid;
                    349:                for( j in list ) {
                    350:                        if ( list[j] > 0 && j < i ) i = j;
                    351:                }
                    352:                if ( i == invalid ) break;
                    353:                list[i] = 0;
                    354: #
                    355: #      control & junked are counted under accept.
                    356: #
                    357:                sitetot = accept[i] + reject[i];
                    358:                if (sitetot == 0) sitetot = 1;
                    359:                articles[i] = sitetot;
                    360: #
                    361: # What an 'orrible printf spec
                    362: #
                    363:                printf("%-14s\t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", i, accept[i], reject[i], junked[i], xmited[i], control[i], (sitetot * 100) / totarticles, (reject[i] * 100) / sitetot);
                    364: #
                    365:        }
                    366:        printf("\nTOTALS        \t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", atot, rtot, jtot, xtot, ctot, 100, (rtot * 100) / totarticles);
                    367:        printf("\nTotal Articles processed %d", totarticles);
                    368:        if (old)        printf(", old %d", old);
                    369:        if (garbled)    printf(", garbled %d", garbled);
                    370:        if (insfail)    printf(", uninstallable %d", insfail);
                    371:        printf("\n");
                    372: 
                    373:        if (ctot) {
                    374:                printf("\nControl       Invocations\n");
                    375:                for( i in ctlcnt ) {
                    376:                        if (i == "cancel") {
                    377:                                printf("%-12s %6d", i, ctlcnt[i]);
                    378:                                if (canfail) printf(", %d failed", canfail);
                    379:                                if (candup) printf(", %d duplicate", candup);
                    380:                                printf("\n");
                    381:                        } else {
                    382:                                printf("%-12s %6d\n", i, ctlcnt[i]);
                    383:                        }
                    384:                }
                    385:        }
                    386: 
                    387:        if (lcount) {
                    388:                printf("\nReceived Article Length Problems\n");
                    389:                printf("System          Zero Short  Smax  Savg  Long  Lmax  Lavg Total %% Tot\n");
                    390:                for( i in alc_host ) {
                    391:                        nlong = a_nlong[i];
                    392:                        nshort = a_nshort[i];
                    393:                        if (nlong == 0) nlong = 1;
                    394:                        if (nshort == 0) nshort = 1;
                    395:                        lavg = a_long[i] / nlong;
                    396:                        savg = a_short[i] / nshort;
                    397:                        sitetot = (a_zero[i] + a_nshort[i] + a_nlong[i]);
                    398:                        printf("%-14s %5d %5d %5d %5d %5d %5d %5d %5d %4d%%\n", i, a_zero[i], a_nshort[i], a_smax[i], savg, a_nlong[i], a_lmax[i], lavg, sitetot, (sitetot * 100) / articles[i]);
                    399:                }
                    400:        }
                    401: 
                    402:        if (atot) {
                    403:                printf("\nNetnews Categories Received\n");
                    404:                l = 0;
                    405:                for( i in newsgcnt ) {
                    406:                        if (l < length(i)) l = length(i);
                    407:                }
                    408:                fmt = sprintf("%%-%ds %%6d\n", l);
                    409:                for( ; ; ) {
                    410: # selection sort
                    411:                        max = 0;
                    412:                        for( j in newsgcnt ) {
                    413:                                if (newsgcnt[j] > max) {
                    414:                                        i = j;
                    415:                                        max = newsgcnt[j];
                    416:                                }
                    417:                        }
                    418:                        if (max == 0) break;
                    419:                        printf(fmt, i, newsgcnt[i]);
                    420:                        newsgcnt[i] = 0;
                    421:                }
                    422:        }
                    423: 
                    424:        if (badgrp) {
                    425:                printf("\nBad Newsgroups Received\n");
                    426:                l = 0;
                    427:                for( i in badng ) {
                    428:                        if (l < length(i)) l = length(i);
                    429:                }
                    430:                fmt = sprintf("%%-%ds %%5d\n", l);
                    431:                for( ; ; ) {
                    432: # selection sort
                    433:                        i = invalid;
                    434:                        for( j in badng ) {
                    435:                                if (badng[j] > 0 && j < i) i = j;
                    436:                        }
                    437:                        if (i == invalid) break;
                    438:                        printf(fmt, i, badng[i]);
                    439:                        badng[i] = 0;
                    440:                }
                    441:        }
                    442: }
                    443: 

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.