|
|
1.1 ! root 1: From cbosgd!ucbvax!usenet Mon Oct 13 05:39:17 1986 ! 2: Received: by beno.CSS.GOV (5.54/5.17) ! 3: id AA01253; Mon, 13 Oct 86 05:39:12 EDT ! 4: Received: from cbosgd.UUCP by seismo.CSS.GOV (5.54/1.14) ! 5: id AA03513; Mon, 13 Oct 86 05:39:11 EDT ! 6: Received: by cbosgd.ATT.COM (4.12/UUCP-Project/rel-1.0/06-28-86) ! 7: id AA08778; Mon, 13 Oct 86 03:44:14 edt ! 8: Received: by ucbvax.Berkeley.EDU (5.53/1.17) ! 9: id AA15536; Sun, 12 Oct 86 23:51:25 PDT ! 10: Date: Sun, 12 Oct 86 23:51:25 PDT ! 11: From: ucbvax!usenet (USENET News Administration) ! 12: Message-Id: <[email protected]> ! 13: To: cbosgd!backbone ! 14: Subject: a handy awk script for netnews log reports ! 15: Status: R ! 16: ! 17: Since Mark didn't have a copy of this, I will assume that most of the ! 18: rest of you don't either, and send it along. The comments should be ! 19: explanation enough... If not, ask me. ! 20: ! 21: Erik E. Fair ucbvax!fair [email protected] ! 22: ------------------------------------------------------------------------------- ! 23: # USAGE: awk -f report_awk /usr/lib/news/log ! 24: # ! 25: # AWK script which eats netnews log files and produces a summary of USENET ! 26: # traffic and errors over the period of time that the log was collected. ! 27: # ! 28: # August 31, 1986 ! 29: # ! 30: # Erik E. Fair <dual!fair> ! 31: # Original Author, May 22, 1984 ! 32: # ! 33: # Brad Eacker <onyx!brad> ! 34: # Modified to simplify the record processing and to sort the output. ! 35: # ! 36: # Erik E. Fair <dual!fair> ! 37: # Modifed to provide information about control messages. ! 38: # ! 39: # Erik E. Fair <dual!fair> ! 40: # Bug in system name extraction fixed. It was assumed that the forth field ! 41: # (system name) always had a dot. local is one that doesn't. Some others ! 42: # (including 2.9 sites) don't either. ! 43: # ! 44: # Earl Wallace <pesnta!earlw> ! 45: # The "sent" field was changed from $5 to $6 in 2.10.2 (beta) ! 46: # named "newstats" and called with no arguments. ! 47: # ! 48: # Erik E. Fair <dual!fair> ! 49: # Remove support for 2.10.1, revise for 2.10.2 to provide information ! 50: # about junked articles, garbled articles, and bad newsgroups ! 51: # ! 52: # Erik E. Fair <ucbvax!fair> ! 53: # Minor bug fix to bad newsgroup reporting, also now counting ``old'' ! 54: # articles as junked, with counter for number that are `old'. ! 55: # ! 56: # Erik E. Fair <ucbvax!fair> ! 57: # Fix up the domain & local hosts support ! 58: # ! 59: # Erik E. Fair <ucbvax!fair> ! 60: # Fix up the counting of gatewayed material, add counting of "linecount" ! 61: # problems. Additional cleanup to make things faster. ! 62: # ! 63: BEGIN{ ! 64: # ! 65: # this is the prefix that your site uses in hostnames to identify your ! 66: # hosts (e.g. ucbarpa, ucbvax, su-score, mit-mc, mit-ai) ! 67: # You will probably want to change (or add to) the following line ! 68: # ! 69: lprefix = "ucb"; ! 70: lplen = length(lprefix); ! 71: # ! 72: # If you do bi-directional USENET gatewaying (e.g. mailing list ! 73: # to newsgroup where the material flows both ways freely), this ! 74: # should be the name in the sys file that you use to mail stuff ! 75: # to the mailing lists. ! 76: # ! 77: pseudo = "internet"; ! 78: rptname = "(GATEWAY)"; ! 79: # ! 80: # Top level domain names and what network they represent ! 81: # (for use in counting stuff that is gatewayed) ! 82: # ! 83: domains["ARPA"] = rptname; ! 84: domains["arpa"] = rptname; ! 85: domains["EDU"] = rptname; ! 86: domains["edu"] = rptname; ! 87: domains["GOV"] = rptname; ! 88: domains["gov"] = rptname; ! 89: domains["COM"] = rptname; ! 90: domains["com"] = rptname; ! 91: domains["MIL"] = rptname; ! 92: domains["mil"] = rptname; ! 93: domains["ORG"] = rptname; ! 94: domains["org"] = rptname; ! 95: domains["NET"] = rptname; ! 96: domains["net"] = rptname; ! 97: domains["UK"] = rptname; ! 98: domains["uk"] = rptname; ! 99: domains["DEC"] = rptname; ! 100: domains["dec"] = rptname; ! 101: domains["CSNET"] = rptname; ! 102: domains["csnet"] = rptname; ! 103: domains["BITNET"] = rptname; ! 104: domains["bitnet"] = rptname; ! 105: domains["MAILNET"] = rptname; ! 106: domains["mailnet"] = rptname; ! 107: domains["UUCP"] = rptname; ! 108: domains["uucp"] = rptname; ! 109: domains["OZ"] = rptname; ! 110: domains["oz"] = rptname; ! 111: domains["AU"] = rptname; ! 112: domains["au"] = rptname; ! 113: # ! 114: # tilde chosen because it is ASCII 126 (don't change this) ! 115: # ! 116: invalid = "~~~~~~"; ! 117: # ! 118: accept[invalid] = 0; ! 119: reject[invalid] = 0; ! 120: xmited[invalid] = 0; ! 121: control[invalid] = 0; ! 122: junked[invalid] = 0; ! 123: neighbor[invalid] = 0; ! 124: badgrp = 0; ! 125: garbled = 0; ! 126: lcount = 0; ! 127: canfail = 0; ! 128: candup = 0; ! 129: insfail = 0; ! 130: old = 0; ! 131: } ! 132: # ! 133: # Skip some things that we won't bother with ! 134: # ! 135: /^$/ { next } ! 136: $5 == "from" { next } ! 137: $5 == "make" { next } ! 138: $5 == "Cancelling" { next } ! 139: # ! 140: # Or that we just count ! 141: # ! 142: $5 == "Inbound" { garbled++; next } ! 143: $6 == "cancel" { canfail++; next } ! 144: $6 == "Cancelled" { candup++; next } ! 145: $6 == "install" { insfail++; next } ! 146: # ! 147: # Articles sent to remote systems (this is what 2.10.2 (beta) says) ! 148: # ! 149: $6 == "sent" { ! 150: for(j = 8; j <= NF; j++) { ! 151: comma = index( $(j), ","); ! 152: if (comma != 0) $(j) = substr( $(j), 1, (comma - 1)); ! 153: if ($(j) == pseudo) $(j) = rptname; ! 154: else neighbor[$(j)] = 1; ! 155: xmited[$(j)]++; ! 156: } ! 157: next; ! 158: } ! 159: # ! 160: # Articles sent to remote systems (this is what 2.11 says) ! 161: # ! 162: $5 == "sent" { ! 163: for(j = 7; j <= NF; j++) { ! 164: comma = index( $(j), ","); ! 165: if (comma != 0) $(j) = substr( $(j), 1, (comma - 1)); ! 166: if ($(j) == pseudo) $(j) = rptname; ! 167: else neighbor[$(j)] = 1; ! 168: xmited[$(j)]++; ! 169: } ! 170: next; ! 171: } ! 172: # ! 173: # Get the name of the system that did this, ! 174: # taking into account that not everyone believes in domains. ! 175: # ! 176: { ! 177: # if we get a route addr (we shouldn't, but...), take the last one ! 178: # ! 179: nhosts = split($4, hosts, "@"); ! 180: hostname = hosts[nhosts]; ! 181: # ! 182: # get the root domain name, and the hostname ! 183: # ! 184: ndoms = split(hostname, doms, "."); ! 185: domain = doms[ndoms]; ! 186: sys = doms[1]; ! 187: # ! 188: # check for local system, and if not that, then internet sites. ! 189: # special case the network name replacement of specific host names, ! 190: # such that the network name is there only on a `local' posting ! 191: # (which is really gatewaying in disguise) ! 192: # ! 193: if ($5 == "posted") { ! 194: prefix = substr(sys, 1, lplen); ! 195: if (prefix == lprefix) { ! 196: sys = "local"; ! 197: } else { ! 198: dom = domains[domain]; ! 199: if (dom) sys = dom; ! 200: } ! 201: } ! 202: } ! 203: # ! 204: # Duplicates & receiveds/posted & control messages ! 205: # ! 206: $5 == "posted" || $5 == "received" { ! 207: accept[sys]++; ! 208: if ($5 == "received") neighbor[sys] = 1; ! 209: nng = split($8, ngl, ","); ! 210: for(i = 1; i <= nng; i++) { ! 211: dot = index(ngl[i], "."); ! 212: if (dot) ng = substr(ngl[i], 1, (dot - 1)); ! 213: else ng = ngl[i]; ! 214: if (ng) newsgcnt[ng]++; ! 215: } ! 216: next; ! 217: } ! 218: $5 == "Duplicate" { reject[hostname]++; next } ! 219: $6 == "valid" { junked[sys]++; next } ! 220: $6 == "too" { junked[sys]++; old++; next } ! 221: $5 == "Unknown" { ! 222: x = length($7) - 2; ! 223: ng = substr($7, 2, x); ! 224: badng[ng]++; ! 225: badgrp++; ! 226: next; ! 227: } ! 228: # ! 229: # articles who actual line count differs from the Line: header count ! 230: # ! 231: $5 == "linecount" { ! 232: expect = $7; ! 233: # awk does very strange things with non-numeric characters in numbers ! 234: comma = index(expect, ","); ! 235: if (comma != 0) expect = substr(expect, 1, (comma - 1)); ! 236: got = $9; ! 237: diff = got - expect; ! 238: lcount++; ! 239: alc_host[sys] = 1; ! 240: neighbor[sys] = 1; ! 241: if (diff < 0) { ! 242: diff = 0 - diff; ! 243: a_nshort[sys]++; ! 244: a_short[sys] += diff; ! 245: if (a_smax[sys] < diff) a_smax[sys] = diff; ! 246: } else { ! 247: a_nlong[sys]++; ! 248: a_long[sys] += diff; ! 249: if (a_lmax[sys] < diff) a_lmax[sys] = diff; ! 250: } ! 251: next; ! 252: } ! 253: # ! 254: # articles who actual line count is Zero ! 255: # ! 256: $7 == "linecount" { ! 257: lcount++; ! 258: a_zero[sys]++; ! 259: reject[sys]++; ! 260: next; ! 261: } ! 262: # ! 263: # Control messages ! 264: # ! 265: $5 == "Ctl" { ! 266: ctot++; ! 267: control[sys]++; ! 268: ctlcnt[$(10)]++; ! 269: next; ! 270: } ! 271: # ! 272: # Print anything we didn't recognize, it's probably an error message. ! 273: # For the submitted report to USENET, do sed -e '1,/^$/d' file | inews ! 274: # so that this cruft doesn't get out the door. ! 275: # ! 276: { ! 277: print; ! 278: } ! 279: # ! 280: # Summarize and print the report ! 281: # ! 282: END{ ! 283: # special processing for Duplicates, because we can't tell if ! 284: # they came from a netnews neighbor or from the gatewaying ! 285: # activities until we have processed the entire log. ! 286: # ! 287: for( hostname in reject ) { ! 288: # ! 289: # get the root domain name, and the hostname ! 290: # ! 291: ndoms = split(hostname, doms, "."); ! 292: domain = doms[ndoms]; ! 293: sys = doms[1]; ! 294: if (! neighbor[sys]) { ! 295: prefix = substr(sys, 1, lplen); ! 296: if (prefix == lprefix) { ! 297: sys = "local"; ! 298: } else { ! 299: dom = domains[domain]; ! 300: if (dom) sys = dom; ! 301: } ! 302: } ! 303: i = reject[hostname]; ! 304: reject[hostname] = 0; ! 305: reject[sys] += i; ! 306: } ! 307: ! 308: rtot = 0; ! 309: for( i in reject ) { ! 310: if (reject[i] > 0) { ! 311: list[i] = 1; ! 312: rtot += reject[i]; ! 313: } ! 314: } ! 315: ! 316: atot = 0; ! 317: for( i in accept ) { ! 318: list[i] = 1; ! 319: atot += accept[i]; ! 320: } ! 321: ! 322: xtot = 0; ! 323: for( i in xmited ) { ! 324: list[i] = 1; ! 325: xtot += xmited[i]; ! 326: } ! 327: ! 328: ctot = 0; ! 329: for( i in control ) { ! 330: list[i] = 1; ! 331: ctot += control[i]; ! 332: } ! 333: ! 334: jtot = 0; ! 335: for( i in junked ) { ! 336: list[i] = 1; ! 337: jtot += junked[i]; ! 338: } ! 339: # ! 340: # ctot is part of rtot, so we don't add it in to the grand total. ! 341: # ! 342: totarticles = atot + rtot; ! 343: if (totarticles == 0) totarticles = 1; ! 344: ! 345: printf("\nSystem \tAccept\tReject\tJunked\tXmit to\tControl\t%% total\t%% rejct\n"); ! 346: for( ; ; ) { ! 347: # selection sort ! 348: i = invalid; ! 349: for( j in list ) { ! 350: if ( list[j] > 0 && j < i ) i = j; ! 351: } ! 352: if ( i == invalid ) break; ! 353: list[i] = 0; ! 354: # ! 355: # control & junked are counted under accept. ! 356: # ! 357: sitetot = accept[i] + reject[i]; ! 358: if (sitetot == 0) sitetot = 1; ! 359: articles[i] = sitetot; ! 360: # ! 361: # What an 'orrible printf spec ! 362: # ! 363: printf("%-14s\t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", i, accept[i], reject[i], junked[i], xmited[i], control[i], (sitetot * 100) / totarticles, (reject[i] * 100) / sitetot); ! 364: # ! 365: } ! 366: printf("\nTOTALS \t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", atot, rtot, jtot, xtot, ctot, 100, (rtot * 100) / totarticles); ! 367: printf("\nTotal Articles processed %d", totarticles); ! 368: if (old) printf(", old %d", old); ! 369: if (garbled) printf(", garbled %d", garbled); ! 370: if (insfail) printf(", uninstallable %d", insfail); ! 371: printf("\n"); ! 372: ! 373: if (ctot) { ! 374: printf("\nControl Invocations\n"); ! 375: for( i in ctlcnt ) { ! 376: if (i == "cancel") { ! 377: printf("%-12s %6d", i, ctlcnt[i]); ! 378: if (canfail) printf(", %d failed", canfail); ! 379: if (candup) printf(", %d duplicate", candup); ! 380: printf("\n"); ! 381: } else { ! 382: printf("%-12s %6d\n", i, ctlcnt[i]); ! 383: } ! 384: } ! 385: } ! 386: ! 387: if (lcount) { ! 388: printf("\nReceived Article Length Problems\n"); ! 389: printf("System Zero Short Smax Savg Long Lmax Lavg Total %% Tot\n"); ! 390: for( i in alc_host ) { ! 391: nlong = a_nlong[i]; ! 392: nshort = a_nshort[i]; ! 393: if (nlong == 0) nlong = 1; ! 394: if (nshort == 0) nshort = 1; ! 395: lavg = a_long[i] / nlong; ! 396: savg = a_short[i] / nshort; ! 397: sitetot = (a_zero[i] + a_nshort[i] + a_nlong[i]); ! 398: printf("%-14s %5d %5d %5d %5d %5d %5d %5d %5d %4d%%\n", i, a_zero[i], a_nshort[i], a_smax[i], savg, a_nlong[i], a_lmax[i], lavg, sitetot, (sitetot * 100) / articles[i]); ! 399: } ! 400: } ! 401: ! 402: if (atot) { ! 403: printf("\nNetnews Categories Received\n"); ! 404: l = 0; ! 405: for( i in newsgcnt ) { ! 406: if (l < length(i)) l = length(i); ! 407: } ! 408: fmt = sprintf("%%-%ds %%6d\n", l); ! 409: for( ; ; ) { ! 410: # selection sort ! 411: max = 0; ! 412: for( j in newsgcnt ) { ! 413: if (newsgcnt[j] > max) { ! 414: i = j; ! 415: max = newsgcnt[j]; ! 416: } ! 417: } ! 418: if (max == 0) break; ! 419: printf(fmt, i, newsgcnt[i]); ! 420: newsgcnt[i] = 0; ! 421: } ! 422: } ! 423: ! 424: if (badgrp) { ! 425: printf("\nBad Newsgroups Received\n"); ! 426: l = 0; ! 427: for( i in badng ) { ! 428: if (l < length(i)) l = length(i); ! 429: } ! 430: fmt = sprintf("%%-%ds %%5d\n", l); ! 431: for( ; ; ) { ! 432: # selection sort ! 433: i = invalid; ! 434: for( j in badng ) { ! 435: if (badng[j] > 0 && j < i) i = j; ! 436: } ! 437: if (i == invalid) break; ! 438: printf(fmt, i, badng[i]); ! 439: badng[i] = 0; ! 440: } ! 441: } ! 442: } ! 443:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.