|
|
1.1 ! root 1: #! /bin/sh ! 2: # @(#)arbitron 2.3 07/15/86 ! 3: # arbitron -- this program produces rating sweeps for USENET. ! 4: # ! 5: # Usage: arbitron ! 6: # ! 7: # To use this program, edit the "configuration" section below so that the ! 8: # information is correct for your site, and then run it. It will produce a ! 9: # readership survey for your machine and mail that survey to decwrl, with ! 10: # a cc to you. ! 11: # ! 12: # To participate in the international monthly ratings sweeps, ! 13: # run "arbitron" every month. I will run the statistics program on the last ! 14: # day of each month; it will include any report that has reached it by that ! 15: # time. To make sure your site's data is included, run the survey program no ! 16: # later than the 20th day of each month. ! 17: # ! 18: # Brian Reid, DEC Western Research Lab, reid@decwrl ! 19: # Updated and bugfixed by ! 20: # Spencer Thomas, U.of Utah ! 21: # Geoff Kuenning, SAH Consulting ! 22: # Updated to work with 2.10.1 and older news systems by ! 23: # Lindsay Cleveland, AT&T Technologies/Bell Labs ! 24: # Made to work with 16-bit address spaces by ! 25: # Andy Walker, Maths Dept., University of Nottingham, UK ! 26: # ! 27: # Note that the results of this program are dependent on the rate at which ! 28: # you expire news. If you are a small site that expires news rapidly, the ! 29: # results may indicate fewer active readers than you actually have. ! 30: # ! 31: # copied to a certain extent from the "subscribers" ! 32: # script posted by Blonder, McCreery, and Herron. ! 33: ########################################################################### ! 34: # Configuration information. Edit this section to reflect your site data. # ! 35: TMPDIR=/tmp ! 36: NEWS=/usr/lib/news ! 37: SPOOL=/usr/spool/news ! 38: ! 39: # Make a crude stab at determining the system type ! 40: if [ -d /usr/ucb ] ! 41: then ! 42: STYPE="bsd" ! 43: else ! 44: STYPE="usg" ! 45: fi ! 46: ! 47: # Range of /etc/passwd UID's that represent actual people (rather than ! 48: # maintenance accounts or daemons or whatever) ! 49: lowUID=100 ! 50: highUID=9999 ! 51: ! 52: # If you aren't running a distributed news system (nntpd & rrn, usually), ! 53: # leave NEWSHOST blank. Else set it to the name of the host from which you ! 54: # can rcp a copy of the active file. ! 55: NEWSHOST= ! 56: ! 57: # uucp path: {ihnp4, decvax, ucbvax}!decwrl!netsurvey ! 58: # summarypath="[email protected] $USER" ! 59: summarypath="ihnp4!decwrl!netsurvey usenet" ! 60: ! 61: # We need to find the uucp name of your host. If this code doesn't work, ! 62: # then just put it in literally like this: ! 63: # hostname="ihnp4" ! 64: ! 65: case $STYPE in ! 66: bsd) hostname=`(uuname -l || hostname) 2>&-`;; ! 67: sysv)hostname=`(uname -n || uuname -l || hostname) 2>&-`;; ! 68: *) hostname=`(uuname -l) 2>&-`;; ! 69: esac; ! 70: ! 71: PATH=$NEWS:/usr/local/bin:/usr/ucb:/usr/bin:/bin ! 72: ############################################################################ ! 73: export PATH ! 74: # --------------------------------------------------------------------------- ! 75: trap "rm -f $TMPDIR/arb.*.$$; exit" 0 1 2 3 15 ! 76: set `date` ! 77: dat="$2$6" ! 78: # destination="mailx -s arbitron-${dat} $summarypath" ! 79: destination="${MAILER-mail} $summarypath" ! 80: ! 81: ################################ ! 82: # Here are several expressions, each of which figures out approximately how ! 83: # many people use this machine. Comment out all but 1 of them; pick the one ! 84: # you like best. Initially the most universal but least reliable of them is ! 85: # uncommented. ! 86: # # ###### Scheme #1: fast but usually returns too big a number ! 87: nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" </etc/passwd` ! 88: ! 89: # # ###### Scheme #2 (works with BSD systems) ! 90: #nusers=`last | sort -u +0 -1 | wc -l` ! 91: ! 92: # # ###### Scheme #3 (works with USG systems) ! 93: #nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l` ! 94: ! 95: # # ###### Scheme #4 (provided by Lindsay Cleveland) ! 96: # # ###### (Same idea as #1, but excludes various junk accounts) ! 97: #awk -F: "\$3 >= $lowUID && \$3 <= $highUID{printf \"if test -d %s ; then echo %s;egrep : %s/.newsrc; fi\n\",\$6,\$1,\$6}" \ ! 98: # </etc/passwd | sh 2>/dev/null | awk -f $tmpdir/arb.sel.$$ >$tmpdir/arb.tmp.$$ ! 99: #nusers=`awk "BEGIN {N=0} NF == 1{N=N+1}END{print N}" <$tmpdir/arb.tmp.$$` ! 100: ################################ ! 101: # ! 102: # Set up awk scripts; these are too large to pass as arguments on most ! 103: # systems. ! 104: # ! 105: # This awk script generates the actual output report. ! 106: # We use 'sed' to substitute in the shell variables to save ourselves ! 107: # endless hassle trying to find quoting/backslashing problems. ! 108: # ! 109: # The input to this script consists of two types of lines (pre-sorted): ! 110: # ! 111: # (1) Active-file lines. These have four fields: newsgroup name, ! 112: # first existing article, last article number, 'y' or 'n' ! 113: # to allow/disallow posting. ! 114: # mod.mac 00001 00001 y ! 115: # ! 116: # (2) .newsrc-derived lines. These have three fields: the newsgroup ! 117: # name, the user name and the articles-read information. The latter ! 118: # can be arbitrarily complex. It can also be arbitrarily long; ! 119: # this can potentially break either awk or sed, in which ! 120: # case the script will not work. ! 121: # mod.map joe 1-199 ! 122: # ! 123: # The script uses the type 1 lines to define the newsgroups ! 124: # and their active article ranges. The .newsrc (type 2) lines are ! 125: # then used to deduce which users are reading that group (a group ! 126: # is being read if the last article seen is in that group's active ! 127: # article range). The user names are used to keep track of who reads ! 128: # each group, which isn't all that useful but is interesting. When ! 129: # all input has been read, a report is printed summarizing the results. ! 130: # ! 131: sed "/^#/d ! 132: s/NUSERS/$nusers/g ! 133: s/HOSTNAME/$hostname/g ! 134: s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG' ! 135: # makereport -- utility for "arbitron". Early versions were copied from a ! 136: # similar script distributed with "subscribers.sh" by Blonder, McCreery, and ! 137: # Herron. ! 138: # ! 139: BEGIN { rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0} ! 140: # ! 141: # Active file line: dispose of previous group (if any), record group, and ! 142: # record first and last article numbers. Set group's reader count to none. ! 143: NF == 4 { if (grpcount > 0) { ! 144: printf("%d %s\n",grpcount, grpname) ! 145: } ! 146: grpname = $1 ! 147: grpfirst = $3 ! 148: grplast = $2 ! 149: grpcount = 0 ! 150: } ! 151: # ! 152: # .newsrc line. Break out the final number, which is the last article that ! 153: # has actually been read. This is a pretty good indicator of the person's ! 154: # true interest in the group. If 'lastread' for the group is a current ! 155: # (unexpired) article, record a reader for that group. Finally, record ! 156: # the user as a "real" user of the news system. ! 157: # ! 158: NF == 3 { n1 = split($3, n2, "-") ! 159: n3 = split(n2[n1], n4, ",") ! 160: lastread = n4[n3] ! 161: if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) { ! 162: grpcount++ ! 163: if (realuser[$2] != 1) { ! 164: realuser[$2] = 1 ! 165: realusers++ ! 166: } ! 167: } ! 168: } ! 169: # ! 170: # End of file. Print the report in 2 columns. ! 171: END { printf("9999 Host\t\t%s\n","HOSTNAME") ! 172: printf("9998 Users\t\t%d\n",NUSERS) ! 173: printf("9997 NetReaders\t%d\n",realusers) ! 174: printf("9996 ReportDate\t%s\n","DATE") ! 175: printf("9995 SystemType\tnews-arbitron-2.3\n") ! 176: if (grpcount > 0) { ! 177: printf("%d %s\n",grpcount, grpname) ! 178: } ! 179: } ! 180: DOG ! 181: ! 182: cat >$TMPDIR/arb.pwd.$$ <<'MOUSE' ! 183: BEGIN { seen["/"]=1; seen[""] = 1; } ! 184: { if (seen[$6]!=1) { ! 185: printf("if [ -r %s/.newsrc ] ; then ", $6) ! 186: printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6) ! 187: seen[$6]=1; ! 188: } ! 189: } ! 190: MOUSE ! 191: ! 192: # First, make sure we have an active file ! 193: if [ -z "$NEWSHOST" ] ! 194: then ACTIVE=$NEWS/active ! 195: else ACTIVE=/tmp/arb.active.$$ ! 196: rcp $NEWSHOST:$NEWS/active $ACTIVE ! 197: fi ! 198: ! 199: if [ ! -s $ACTIVE ] ! 200: then ! 201: echo arbitron: ACTIVE file missing or empty. Cannot continue. ! 202: exit 1 ! 203: fi ! 204: ! 205: # Next, get the list of .newsrc files with duplicates and unreadable files ! 206: # removed. ! 207: awk -F: -f $TMPDIR/arb.pwd.$$ </etc/passwd | sh >$TMPDIR/arb.tmp.$$ ! 208: ! 209: # Check to make sure that we found some ! 210: if [ -s $TMPDIR/arb.tmp.$$ ] ! 211: then # See if "active" file has 4 fields or only two (pre-2.10.2) ! 212: set `sed 1q $ACTIVE` ! 213: if [ $# -eq 2 ] ! 214: then egrep '^[a-z]*\.' $ACTIVE | ! 215: while read group last ! 216: do dir=`echo "$group" | sed 's;\.;/;g'` ! 217: first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q` ! 218: case $STYPE in ! 219: usg) echo "$group $last ${first:-$last} X";; ! 220: *) echo "$group $last ${first-$last} X" ! 221: esac ! 222: done ! 223: else egrep '^[a-z]*\.' $ACTIVE ! 224: fi | ! 225: sort - $TMPDIR/arb.tmp.$$ | ! 226: awk -f $TMPDIR/arb.fmt.$$ | ! 227: sort -nr | ! 228: sed '/^$/d ! 229: s/^999[0-9] //' | ! 230: $destination ! 231: else echo Unable to find any readable .newsrc files 2>&1 ! 232: exit 1 ! 233: fi
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.