|
|
1.1 root 1: #! /bin/sh
2: # @(#)arbitron 2.3 07/15/86
3: # arbitron -- this program produces rating sweeps for USENET.
4: #
5: # Usage: arbitron
6: #
7: # To use this program, edit the "configuration" section below so that the
8: # information is correct for your site, and then run it. It will produce a
9: # readership survey for your machine and mail that survey to decwrl, with
10: # a cc to you.
11: #
12: # To participate in the international monthly ratings sweeps,
13: # run "arbitron" every month. I will run the statistics program on the last
14: # day of each month; it will include any report that has reached it by that
15: # time. To make sure your site's data is included, run the survey program no
16: # later than the 20th day of each month.
17: #
18: # Brian Reid, DEC Western Research Lab, reid@decwrl
19: # Updated and bugfixed by
20: # Spencer Thomas, U.of Utah
21: # Geoff Kuenning, SAH Consulting
22: # Updated to work with 2.10.1 and older news systems by
23: # Lindsay Cleveland, AT&T Technologies/Bell Labs
24: # Made to work with 16-bit address spaces by
25: # Andy Walker, Maths Dept., University of Nottingham, UK
26: #
27: # Note that the results of this program are dependent on the rate at which
28: # you expire news. If you are a small site that expires news rapidly, the
29: # results may indicate fewer active readers than you actually have.
30: #
31: # copied to a certain extent from the "subscribers"
32: # script posted by Blonder, McCreery, and Herron.
33: ###########################################################################
34: # Configuration information. Edit this section to reflect your site data. #
35: TMPDIR=/tmp
36: NEWS=/usr/lib/news
37: SPOOL=/usr/spool/news
38:
39: # Make a crude stab at determining the system type
40: if [ -d /usr/ucb ]
41: then
42: STYPE="bsd"
43: else
44: STYPE="usg"
45: fi
46:
47: # Range of /etc/passwd UID's that represent actual people (rather than
48: # maintenance accounts or daemons or whatever)
49: lowUID=100
50: highUID=9999
51:
52: # If you aren't running a distributed news system (nntpd & rrn, usually),
53: # leave NEWSHOST blank. Else set it to the name of the host from which you
54: # can rcp a copy of the active file.
55: NEWSHOST=
56:
57: # uucp path: {ihnp4, decvax, ucbvax}!decwrl!netsurvey
58: # summarypath="[email protected] $USER"
59: summarypath="ihnp4!decwrl!netsurvey usenet"
60:
61: # We need to find the uucp name of your host. If this code doesn't work,
62: # then just put it in literally like this:
63: # hostname="ihnp4"
64:
65: case $STYPE in
66: bsd) hostname=`(uuname -l || hostname) 2>&-`;;
67: sysv)hostname=`(uname -n || uuname -l || hostname) 2>&-`;;
68: *) hostname=`(uuname -l) 2>&-`;;
69: esac;
70:
71: PATH=$NEWS:/usr/local/bin:/usr/ucb:/usr/bin:/bin
72: ############################################################################
73: export PATH
74: # ---------------------------------------------------------------------------
75: trap "rm -f $TMPDIR/arb.*.$$; exit" 0 1 2 3 15
76: set `date`
77: dat="$2$6"
78: # destination="mailx -s arbitron-${dat} $summarypath"
79: destination="${MAILER-mail} $summarypath"
80:
81: ################################
82: # Here are several expressions, each of which figures out approximately how
83: # many people use this machine. Comment out all but 1 of them; pick the one
84: # you like best. Initially the most universal but least reliable of them is
85: # uncommented.
86: # # ###### Scheme #1: fast but usually returns too big a number
87: nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" </etc/passwd`
88:
89: # # ###### Scheme #2 (works with BSD systems)
90: #nusers=`last | sort -u +0 -1 | wc -l`
91:
92: # # ###### Scheme #3 (works with USG systems)
93: #nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l`
94:
95: # # ###### Scheme #4 (provided by Lindsay Cleveland)
96: # # ###### (Same idea as #1, but excludes various junk accounts)
97: #awk -F: "\$3 >= $lowUID && \$3 <= $highUID{printf \"if test -d %s ; then echo %s;egrep : %s/.newsrc; fi\n\",\$6,\$1,\$6}" \
98: # </etc/passwd | sh 2>/dev/null | awk -f $tmpdir/arb.sel.$$ >$tmpdir/arb.tmp.$$
99: #nusers=`awk "BEGIN {N=0} NF == 1{N=N+1}END{print N}" <$tmpdir/arb.tmp.$$`
100: ################################
101: #
102: # Set up awk scripts; these are too large to pass as arguments on most
103: # systems.
104: #
105: # This awk script generates the actual output report.
106: # We use 'sed' to substitute in the shell variables to save ourselves
107: # endless hassle trying to find quoting/backslashing problems.
108: #
109: # The input to this script consists of two types of lines (pre-sorted):
110: #
111: # (1) Active-file lines. These have four fields: newsgroup name,
112: # first existing article, last article number, 'y' or 'n'
113: # to allow/disallow posting.
114: # mod.mac 00001 00001 y
115: #
116: # (2) .newsrc-derived lines. These have three fields: the newsgroup
117: # name, the user name and the articles-read information. The latter
118: # can be arbitrarily complex. It can also be arbitrarily long;
119: # this can potentially break either awk or sed, in which
120: # case the script will not work.
121: # mod.map joe 1-199
122: #
123: # The script uses the type 1 lines to define the newsgroups
124: # and their active article ranges. The .newsrc (type 2) lines are
125: # then used to deduce which users are reading that group (a group
126: # is being read if the last article seen is in that group's active
127: # article range). The user names are used to keep track of who reads
128: # each group, which isn't all that useful but is interesting. When
129: # all input has been read, a report is printed summarizing the results.
130: #
131: sed "/^#/d
132: s/NUSERS/$nusers/g
133: s/HOSTNAME/$hostname/g
134: s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
135: # makereport -- utility for "arbitron". Early versions were copied from a
136: # similar script distributed with "subscribers.sh" by Blonder, McCreery, and
137: # Herron.
138: #
139: BEGIN { rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
140: #
141: # Active file line: dispose of previous group (if any), record group, and
142: # record first and last article numbers. Set group's reader count to none.
143: NF == 4 { if (grpcount > 0) {
144: printf("%d %s\n",grpcount, grpname)
145: }
146: grpname = $1
147: grpfirst = $3
148: grplast = $2
149: grpcount = 0
150: }
151: #
152: # .newsrc line. Break out the final number, which is the last article that
153: # has actually been read. This is a pretty good indicator of the person's
154: # true interest in the group. If 'lastread' for the group is a current
155: # (unexpired) article, record a reader for that group. Finally, record
156: # the user as a "real" user of the news system.
157: #
158: NF == 3 { n1 = split($3, n2, "-")
159: n3 = split(n2[n1], n4, ",")
160: lastread = n4[n3]
161: if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) {
162: grpcount++
163: if (realuser[$2] != 1) {
164: realuser[$2] = 1
165: realusers++
166: }
167: }
168: }
169: #
170: # End of file. Print the report in 2 columns.
171: END { printf("9999 Host\t\t%s\n","HOSTNAME")
172: printf("9998 Users\t\t%d\n",NUSERS)
173: printf("9997 NetReaders\t%d\n",realusers)
174: printf("9996 ReportDate\t%s\n","DATE")
175: printf("9995 SystemType\tnews-arbitron-2.3\n")
176: if (grpcount > 0) {
177: printf("%d %s\n",grpcount, grpname)
178: }
179: }
180: DOG
181:
182: cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
183: BEGIN { seen["/"]=1; seen[""] = 1; }
184: { if (seen[$6]!=1) {
185: printf("if [ -r %s/.newsrc ] ; then ", $6)
186: printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6)
187: seen[$6]=1;
188: }
189: }
190: MOUSE
191:
192: # First, make sure we have an active file
193: if [ -z "$NEWSHOST" ]
194: then ACTIVE=$NEWS/active
195: else ACTIVE=/tmp/arb.active.$$
196: rcp $NEWSHOST:$NEWS/active $ACTIVE
197: fi
198:
199: if [ ! -s $ACTIVE ]
200: then
201: echo arbitron: ACTIVE file missing or empty. Cannot continue.
202: exit 1
203: fi
204:
205: # Next, get the list of .newsrc files with duplicates and unreadable files
206: # removed.
207: awk -F: -f $TMPDIR/arb.pwd.$$ </etc/passwd | sh >$TMPDIR/arb.tmp.$$
208:
209: # Check to make sure that we found some
210: if [ -s $TMPDIR/arb.tmp.$$ ]
211: then # See if "active" file has 4 fields or only two (pre-2.10.2)
212: set `sed 1q $ACTIVE`
213: if [ $# -eq 2 ]
214: then egrep '^[a-z]*\.' $ACTIVE |
215: while read group last
216: do dir=`echo "$group" | sed 's;\.;/;g'`
217: first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
218: case $STYPE in
219: usg) echo "$group $last ${first:-$last} X";;
220: *) echo "$group $last ${first-$last} X"
221: esac
222: done
223: else egrep '^[a-z]*\.' $ACTIVE
224: fi |
225: sort - $TMPDIR/arb.tmp.$$ |
226: awk -f $TMPDIR/arb.fmt.$$ |
227: sort -nr |
228: sed '/^$/d
229: s/^999[0-9] //' |
230: $destination
231: else echo Unable to find any readable .newsrc files 2>&1
232: exit 1
233: fi
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.