|
|
1.1 root 1: From cbosgd!ucbvax!usenet Mon Oct 13 05:39:17 1986
2: Received: by beno.CSS.GOV (5.54/5.17)
3: id AA01253; Mon, 13 Oct 86 05:39:12 EDT
4: Received: from cbosgd.UUCP by seismo.CSS.GOV (5.54/1.14)
5: id AA03513; Mon, 13 Oct 86 05:39:11 EDT
6: Received: by cbosgd.ATT.COM (4.12/UUCP-Project/rel-1.0/06-28-86)
7: id AA08778; Mon, 13 Oct 86 03:44:14 edt
8: Received: by ucbvax.Berkeley.EDU (5.53/1.17)
9: id AA15536; Sun, 12 Oct 86 23:51:25 PDT
10: Date: Sun, 12 Oct 86 23:51:25 PDT
11: From: ucbvax!usenet (USENET News Administration)
12: Message-Id: <[email protected]>
13: To: cbosgd!backbone
14: Subject: a handy awk script for netnews log reports
15: Status: R
16:
17: Since Mark didn't have a copy of this, I will assume that most of the
18: rest of you don't either, and send it along. The comments should be
19: explanation enough... If not, ask me.
20:
21: Erik E. Fair ucbvax!fair [email protected]
22: -------------------------------------------------------------------------------
23: # USAGE: awk -f report_awk /usr/lib/news/log
24: #
25: # AWK script which eats netnews log files and produces a summary of USENET
26: # traffic and errors over the period of time that the log was collected.
27: #
28: # August 31, 1986
29: #
30: # Erik E. Fair <dual!fair>
31: # Original Author, May 22, 1984
32: #
33: # Brad Eacker <onyx!brad>
34: # Modified to simplify the record processing and to sort the output.
35: #
36: # Erik E. Fair <dual!fair>
37: # Modifed to provide information about control messages.
38: #
39: # Erik E. Fair <dual!fair>
40: # Bug in system name extraction fixed. It was assumed that the forth field
41: # (system name) always had a dot. local is one that doesn't. Some others
42: # (including 2.9 sites) don't either.
43: #
44: # Earl Wallace <pesnta!earlw>
45: # The "sent" field was changed from $5 to $6 in 2.10.2 (beta)
46: # named "newstats" and called with no arguments.
47: #
48: # Erik E. Fair <dual!fair>
49: # Remove support for 2.10.1, revise for 2.10.2 to provide information
50: # about junked articles, garbled articles, and bad newsgroups
51: #
52: # Erik E. Fair <ucbvax!fair>
53: # Minor bug fix to bad newsgroup reporting, also now counting ``old''
54: # articles as junked, with counter for number that are `old'.
55: #
56: # Erik E. Fair <ucbvax!fair>
57: # Fix up the domain & local hosts support
58: #
59: # Erik E. Fair <ucbvax!fair>
60: # Fix up the counting of gatewayed material, add counting of "linecount"
61: # problems. Additional cleanup to make things faster.
62: #
63: BEGIN{
64: #
65: # this is the prefix that your site uses in hostnames to identify your
66: # hosts (e.g. ucbarpa, ucbvax, su-score, mit-mc, mit-ai)
67: # You will probably want to change (or add to) the following line
68: #
69: lprefix = "ucb";
70: lplen = length(lprefix);
71: #
72: # If you do bi-directional USENET gatewaying (e.g. mailing list
73: # to newsgroup where the material flows both ways freely), this
74: # should be the name in the sys file that you use to mail stuff
75: # to the mailing lists.
76: #
77: pseudo = "internet";
78: rptname = "(GATEWAY)";
79: #
80: # Top level domain names and what network they represent
81: # (for use in counting stuff that is gatewayed)
82: #
83: domains["ARPA"] = rptname;
84: domains["arpa"] = rptname;
85: domains["EDU"] = rptname;
86: domains["edu"] = rptname;
87: domains["GOV"] = rptname;
88: domains["gov"] = rptname;
89: domains["COM"] = rptname;
90: domains["com"] = rptname;
91: domains["MIL"] = rptname;
92: domains["mil"] = rptname;
93: domains["ORG"] = rptname;
94: domains["org"] = rptname;
95: domains["NET"] = rptname;
96: domains["net"] = rptname;
97: domains["UK"] = rptname;
98: domains["uk"] = rptname;
99: domains["DEC"] = rptname;
100: domains["dec"] = rptname;
101: domains["CSNET"] = rptname;
102: domains["csnet"] = rptname;
103: domains["BITNET"] = rptname;
104: domains["bitnet"] = rptname;
105: domains["MAILNET"] = rptname;
106: domains["mailnet"] = rptname;
107: domains["UUCP"] = rptname;
108: domains["uucp"] = rptname;
109: domains["OZ"] = rptname;
110: domains["oz"] = rptname;
111: domains["AU"] = rptname;
112: domains["au"] = rptname;
113: #
114: # tilde chosen because it is ASCII 126 (don't change this)
115: #
116: invalid = "~~~~~~";
117: #
118: accept[invalid] = 0;
119: reject[invalid] = 0;
120: xmited[invalid] = 0;
121: control[invalid] = 0;
122: junked[invalid] = 0;
123: neighbor[invalid] = 0;
124: badgrp = 0;
125: garbled = 0;
126: lcount = 0;
127: canfail = 0;
128: candup = 0;
129: insfail = 0;
130: old = 0;
131: }
132: #
133: # Skip some things that we won't bother with
134: #
135: /^$/ { next }
136: $5 == "from" { next }
137: $5 == "make" { next }
138: $5 == "Cancelling" { next }
139: #
140: # Or that we just count
141: #
142: $5 == "Inbound" { garbled++; next }
143: $6 == "cancel" { canfail++; next }
144: $6 == "Cancelled" { candup++; next }
145: $6 == "install" { insfail++; next }
146: #
147: # Articles sent to remote systems (this is what 2.10.2 (beta) says)
148: #
149: $6 == "sent" {
150: for(j = 8; j <= NF; j++) {
151: comma = index( $(j), ",");
152: if (comma != 0) $(j) = substr( $(j), 1, (comma - 1));
153: if ($(j) == pseudo) $(j) = rptname;
154: else neighbor[$(j)] = 1;
155: xmited[$(j)]++;
156: }
157: next;
158: }
159: #
160: # Articles sent to remote systems (this is what 2.11 says)
161: #
162: $5 == "sent" {
163: for(j = 7; j <= NF; j++) {
164: comma = index( $(j), ",");
165: if (comma != 0) $(j) = substr( $(j), 1, (comma - 1));
166: if ($(j) == pseudo) $(j) = rptname;
167: else neighbor[$(j)] = 1;
168: xmited[$(j)]++;
169: }
170: next;
171: }
172: #
173: # Get the name of the system that did this,
174: # taking into account that not everyone believes in domains.
175: #
176: {
177: # if we get a route addr (we shouldn't, but...), take the last one
178: #
179: nhosts = split($4, hosts, "@");
180: hostname = hosts[nhosts];
181: #
182: # get the root domain name, and the hostname
183: #
184: ndoms = split(hostname, doms, ".");
185: domain = doms[ndoms];
186: sys = doms[1];
187: #
188: # check for local system, and if not that, then internet sites.
189: # special case the network name replacement of specific host names,
190: # such that the network name is there only on a `local' posting
191: # (which is really gatewaying in disguise)
192: #
193: if ($5 == "posted") {
194: prefix = substr(sys, 1, lplen);
195: if (prefix == lprefix) {
196: sys = "local";
197: } else {
198: dom = domains[domain];
199: if (dom) sys = dom;
200: }
201: }
202: }
203: #
204: # Duplicates & receiveds/posted & control messages
205: #
206: $5 == "posted" || $5 == "received" {
207: accept[sys]++;
208: if ($5 == "received") neighbor[sys] = 1;
209: nng = split($8, ngl, ",");
210: for(i = 1; i <= nng; i++) {
211: dot = index(ngl[i], ".");
212: if (dot) ng = substr(ngl[i], 1, (dot - 1));
213: else ng = ngl[i];
214: if (ng) newsgcnt[ng]++;
215: }
216: next;
217: }
218: $5 == "Duplicate" { reject[hostname]++; next }
219: $6 == "valid" { junked[sys]++; next }
220: $6 == "too" { junked[sys]++; old++; next }
221: $5 == "Unknown" {
222: x = length($7) - 2;
223: ng = substr($7, 2, x);
224: badng[ng]++;
225: badgrp++;
226: next;
227: }
228: #
229: # articles who actual line count differs from the Line: header count
230: #
231: $5 == "linecount" {
232: expect = $7;
233: # awk does very strange things with non-numeric characters in numbers
234: comma = index(expect, ",");
235: if (comma != 0) expect = substr(expect, 1, (comma - 1));
236: got = $9;
237: diff = got - expect;
238: lcount++;
239: alc_host[sys] = 1;
240: neighbor[sys] = 1;
241: if (diff < 0) {
242: diff = 0 - diff;
243: a_nshort[sys]++;
244: a_short[sys] += diff;
245: if (a_smax[sys] < diff) a_smax[sys] = diff;
246: } else {
247: a_nlong[sys]++;
248: a_long[sys] += diff;
249: if (a_lmax[sys] < diff) a_lmax[sys] = diff;
250: }
251: next;
252: }
253: #
254: # articles who actual line count is Zero
255: #
256: $7 == "linecount" {
257: lcount++;
258: a_zero[sys]++;
259: reject[sys]++;
260: next;
261: }
262: #
263: # Control messages
264: #
265: $5 == "Ctl" {
266: ctot++;
267: control[sys]++;
268: ctlcnt[$(10)]++;
269: next;
270: }
271: #
272: # Print anything we didn't recognize, it's probably an error message.
273: # For the submitted report to USENET, do sed -e '1,/^$/d' file | inews
274: # so that this cruft doesn't get out the door.
275: #
276: {
277: print;
278: }
279: #
280: # Summarize and print the report
281: #
282: END{
283: # special processing for Duplicates, because we can't tell if
284: # they came from a netnews neighbor or from the gatewaying
285: # activities until we have processed the entire log.
286: #
287: for( hostname in reject ) {
288: #
289: # get the root domain name, and the hostname
290: #
291: ndoms = split(hostname, doms, ".");
292: domain = doms[ndoms];
293: sys = doms[1];
294: if (! neighbor[sys]) {
295: prefix = substr(sys, 1, lplen);
296: if (prefix == lprefix) {
297: sys = "local";
298: } else {
299: dom = domains[domain];
300: if (dom) sys = dom;
301: }
302: }
303: i = reject[hostname];
304: reject[hostname] = 0;
305: reject[sys] += i;
306: }
307:
308: rtot = 0;
309: for( i in reject ) {
310: if (reject[i] > 0) {
311: list[i] = 1;
312: rtot += reject[i];
313: }
314: }
315:
316: atot = 0;
317: for( i in accept ) {
318: list[i] = 1;
319: atot += accept[i];
320: }
321:
322: xtot = 0;
323: for( i in xmited ) {
324: list[i] = 1;
325: xtot += xmited[i];
326: }
327:
328: ctot = 0;
329: for( i in control ) {
330: list[i] = 1;
331: ctot += control[i];
332: }
333:
334: jtot = 0;
335: for( i in junked ) {
336: list[i] = 1;
337: jtot += junked[i];
338: }
339: #
340: # ctot is part of rtot, so we don't add it in to the grand total.
341: #
342: totarticles = atot + rtot;
343: if (totarticles == 0) totarticles = 1;
344:
345: printf("\nSystem \tAccept\tReject\tJunked\tXmit to\tControl\t%% total\t%% rejct\n");
346: for( ; ; ) {
347: # selection sort
348: i = invalid;
349: for( j in list ) {
350: if ( list[j] > 0 && j < i ) i = j;
351: }
352: if ( i == invalid ) break;
353: list[i] = 0;
354: #
355: # control & junked are counted under accept.
356: #
357: sitetot = accept[i] + reject[i];
358: if (sitetot == 0) sitetot = 1;
359: articles[i] = sitetot;
360: #
361: # What an 'orrible printf spec
362: #
363: printf("%-14s\t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", i, accept[i], reject[i], junked[i], xmited[i], control[i], (sitetot * 100) / totarticles, (reject[i] * 100) / sitetot);
364: #
365: }
366: printf("\nTOTALS \t%6d\t%6d\t%6d\t%7d\t%7d\t%6d%%\t%6d%%\n", atot, rtot, jtot, xtot, ctot, 100, (rtot * 100) / totarticles);
367: printf("\nTotal Articles processed %d", totarticles);
368: if (old) printf(", old %d", old);
369: if (garbled) printf(", garbled %d", garbled);
370: if (insfail) printf(", uninstallable %d", insfail);
371: printf("\n");
372:
373: if (ctot) {
374: printf("\nControl Invocations\n");
375: for( i in ctlcnt ) {
376: if (i == "cancel") {
377: printf("%-12s %6d", i, ctlcnt[i]);
378: if (canfail) printf(", %d failed", canfail);
379: if (candup) printf(", %d duplicate", candup);
380: printf("\n");
381: } else {
382: printf("%-12s %6d\n", i, ctlcnt[i]);
383: }
384: }
385: }
386:
387: if (lcount) {
388: printf("\nReceived Article Length Problems\n");
389: printf("System Zero Short Smax Savg Long Lmax Lavg Total %% Tot\n");
390: for( i in alc_host ) {
391: nlong = a_nlong[i];
392: nshort = a_nshort[i];
393: if (nlong == 0) nlong = 1;
394: if (nshort == 0) nshort = 1;
395: lavg = a_long[i] / nlong;
396: savg = a_short[i] / nshort;
397: sitetot = (a_zero[i] + a_nshort[i] + a_nlong[i]);
398: printf("%-14s %5d %5d %5d %5d %5d %5d %5d %5d %4d%%\n", i, a_zero[i], a_nshort[i], a_smax[i], savg, a_nlong[i], a_lmax[i], lavg, sitetot, (sitetot * 100) / articles[i]);
399: }
400: }
401:
402: if (atot) {
403: printf("\nNetnews Categories Received\n");
404: l = 0;
405: for( i in newsgcnt ) {
406: if (l < length(i)) l = length(i);
407: }
408: fmt = sprintf("%%-%ds %%6d\n", l);
409: for( ; ; ) {
410: # selection sort
411: max = 0;
412: for( j in newsgcnt ) {
413: if (newsgcnt[j] > max) {
414: i = j;
415: max = newsgcnt[j];
416: }
417: }
418: if (max == 0) break;
419: printf(fmt, i, newsgcnt[i]);
420: newsgcnt[i] = 0;
421: }
422: }
423:
424: if (badgrp) {
425: printf("\nBad Newsgroups Received\n");
426: l = 0;
427: for( i in badng ) {
428: if (l < length(i)) l = length(i);
429: }
430: fmt = sprintf("%%-%ds %%5d\n", l);
431: for( ; ; ) {
432: # selection sort
433: i = invalid;
434: for( j in badng ) {
435: if (badng[j] > 0 && j < i) i = j;
436: }
437: if (i == invalid) break;
438: printf(fmt, i, badng[i]);
439: badng[i] = 0;
440: }
441: }
442: }
443:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.