Annotation of 43BSDTahoe/new/news/misc/arbitron, revision 1.1.1.1

1.1       root        1: #! /bin/sh
                      2: # @(#)arbitron 2.3     07/15/86
                      3: # arbitron -- this program produces rating sweeps for USENET.
                      4: #
                      5: # Usage: arbitron
                      6: #
                      7: # To use this program, edit the "configuration" section below so that the
                      8: # information is correct for your site, and then run it. It will produce a
                      9: # readership survey for your machine and mail that survey to decwrl, with
                     10: # a cc to you.
                     11: #
                     12: # To participate in the international monthly ratings sweeps, 
                     13: # run "arbitron" every month. I will run the statistics program on the last
                     14: # day of each month; it will include any report that has reached it by that
                     15: # time. To make sure your site's data is included, run the survey program no
                     16: # later than the 20th day of each month.
                     17: #
                     18: # Brian Reid, DEC Western Research Lab, reid@decwrl
                     19: # Updated and bugfixed by 
                     20: #      Spencer Thomas, U.of Utah
                     21: #      Geoff Kuenning, SAH Consulting
                     22: # Updated to work with 2.10.1 and older news systems by
                     23: #      Lindsay Cleveland, AT&T Technologies/Bell Labs
                     24: # Made to work with 16-bit address spaces by
                     25: #      Andy Walker, Maths Dept., University of Nottingham, UK
                     26: #
                     27: # Note that the results of this program are dependent on the rate at which
                     28: # you expire news.  If you are a small site that expires news rapidly, the
                     29: # results may indicate fewer active readers than you actually have.
                     30: #
                     31: # copied to a certain extent from the "subscribers"
                     32: # script posted by Blonder, McCreery, and Herron.
                     33: ###########################################################################
                     34: # Configuration information. Edit this section to reflect your site data. #
                     35: TMPDIR=/tmp
                     36: NEWS=/usr/lib/news
                     37: SPOOL=/usr/spool/news
                     38: 
                     39: # Make a crude stab at determining the system type
                     40: if [ -d /usr/ucb ]
                     41: then
                     42:     STYPE="bsd"
                     43: else
                     44:     STYPE="usg"
                     45: fi
                     46: 
                     47: # Range of /etc/passwd UID's that represent actual people (rather than
                     48: # maintenance accounts or daemons or whatever)
                     49: lowUID=100
                     50: highUID=9999
                     51: 
                     52: # If you aren't running a distributed news system (nntpd & rrn, usually),
                     53: # leave NEWSHOST blank. Else set it to the name of the host from which you
                     54: # can rcp a copy of the active file.
                     55: NEWSHOST=
                     56: 
                     57: # uucp path: {ihnp4, decvax, ucbvax}!decwrl!netsurvey
                     58: # summarypath="[email protected] $USER"
                     59: summarypath="ihnp4!decwrl!netsurvey usenet"
                     60: 
                     61: # We need to find the uucp name of your host. If this code doesn't work,
                     62: # then just put it in literally like this:
                     63: #      hostname="ihnp4"
                     64: 
                     65: case $STYPE in
                     66:        bsd) hostname=`(uuname -l || hostname) 2>&-`;;
                     67:         sysv)hostname=`(uname -n || uuname -l ||  hostname) 2>&-`;;
                     68:        *)   hostname=`(uuname -l) 2>&-`;;
                     69: esac;
                     70: 
                     71: PATH=$NEWS:/usr/local/bin:/usr/ucb:/usr/bin:/bin
                     72: ############################################################################
                     73: export PATH
                     74: # ---------------------------------------------------------------------------
                     75: trap "rm -f $TMPDIR/arb.*.$$; exit" 0 1 2 3 15
                     76: set `date`
                     77: dat="$2$6"
                     78: # destination="mailx -s arbitron-${dat} $summarypath"
                     79: destination="${MAILER-mail} $summarypath"
                     80: 
                     81: ################################
                     82: # Here are several expressions, each of which figures out approximately how
                     83: # many people use this machine. Comment out all but 1 of them; pick the one
                     84: # you like best. Initially the most universal but least reliable of them is
                     85: # uncommented.
                     86: # # ###### Scheme #1: fast but usually returns too big a number
                     87: nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" </etc/passwd`
                     88: 
                     89: # # ###### Scheme #2 (works with BSD systems)
                     90: #nusers=`last | sort -u +0 -1 | wc -l`
                     91: 
                     92: # # ###### Scheme #3 (works with USG systems)
                     93: #nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l`
                     94: 
                     95: # # ###### Scheme #4 (provided by Lindsay Cleveland)
                     96: # # ###### (Same idea as #1, but excludes various junk accounts)
                     97: #awk -F: "\$3 >= $lowUID && \$3 <= $highUID{printf \"if test -d %s ; then echo %s;egrep : %s/.newsrc; fi\n\",\$6,\$1,\$6}" \
                     98: #      </etc/passwd | sh 2>/dev/null | awk  -f $tmpdir/arb.sel.$$ >$tmpdir/arb.tmp.$$
                     99: #nusers=`awk "BEGIN {N=0} NF == 1{N=N+1}END{print N}" <$tmpdir/arb.tmp.$$`
                    100: ################################
                    101: #
                    102: # Set up awk scripts;  these are too large to pass as arguments on most
                    103: # systems.
                    104: #
                    105: # This awk script generates the actual output report.
                    106: # We use 'sed' to substitute in the shell variables to save ourselves
                    107: # endless hassle trying to find quoting/backslashing problems.
                    108: #
                    109: # The input to this script consists of two types of lines (pre-sorted):
                    110: #
                    111: #      (1) Active-file lines.  These have four fields:  newsgroup name,
                    112: #          first existing article, last article number, 'y' or 'n'
                    113: #          to allow/disallow posting.
                    114: #                      mod.mac 00001 00001 y
                    115: #
                    116: #      (2) .newsrc-derived lines.  These have three fields:  the newsgroup
                    117: #          name, the user name and the articles-read information.  The latter
                    118: #          can be arbitrarily complex.  It can also be arbitrarily long;
                    119: #          this can potentially break either awk or sed, in which
                    120: #          case the script will not work.
                    121: #                      mod.map joe 1-199
                    122: #
                    123: #      The script uses the type 1 lines to define the newsgroups
                    124: #      and their active article ranges.  The .newsrc (type 2) lines are
                    125: #      then used to deduce which users are reading that group (a group
                    126: #      is being read if the last article seen is in that group's active
                    127: #      article range).  The user names are used to keep track of who reads
                    128: #      each group, which isn't all that useful but is interesting.  When
                    129: #      all input has been read, a report is printed summarizing the results.
                    130: #
                    131: sed "/^#/d
                    132:      s/NUSERS/$nusers/g
                    133:      s/HOSTNAME/$hostname/g
                    134:      s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
                    135: # makereport -- utility for "arbitron". Early versions were copied from a
                    136: # similar script distributed with "subscribers.sh" by Blonder, McCreery, and
                    137: # Herron.
                    138: #
                    139:        BEGIN   { rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
                    140: #
                    141: # Active file line:  dispose of previous group (if any), record group, and
                    142: # record first and last article numbers.  Set group's reader count to none.
                    143:        NF == 4 { if (grpcount > 0) {
                    144:                        printf("%d %s\n",grpcount, grpname)
                    145:                  }
                    146:                  grpname = $1
                    147:                  grpfirst = $3
                    148:                  grplast = $2
                    149:                  grpcount = 0
                    150:                }
                    151: #
                    152: # .newsrc line.  Break out the final number, which is the last article that
                    153: # has actually been read.  This is a pretty good indicator of the person's
                    154: # true interest in the group.  If 'lastread' for the group is a current
                    155: # (unexpired) article, record a reader for that group.  Finally, record
                    156: # the user as a "real" user of the news system.
                    157: #
                    158:        NF == 3 { n1 = split($3, n2, "-")
                    159:                  n3 = split(n2[n1], n4, ",")
                    160:                  lastread = n4[n3]
                    161:        if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) {
                    162:                        grpcount++
                    163:                        if (realuser[$2] != 1) {
                    164:                            realuser[$2] = 1
                    165:                            realusers++
                    166:                        }
                    167:                  }
                    168:                }
                    169: #
                    170: # End of file.  Print the report in 2 columns.
                    171:        END     { printf("9999 Host\t\t%s\n","HOSTNAME")
                    172:                  printf("9998 Users\t\t%d\n",NUSERS)
                    173:                  printf("9997 NetReaders\t%d\n",realusers)
                    174:                  printf("9996 ReportDate\t%s\n","DATE")
                    175:                  printf("9995 SystemType\tnews-arbitron-2.3\n")
                    176:                  if (grpcount > 0) {
                    177:                        printf("%d %s\n",grpcount, grpname)
                    178:                  }
                    179:                }
                    180: DOG
                    181: 
                    182: cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
                    183: BEGIN  { seen["/"]=1; seen[""] = 1; }
                    184:        { if (seen[$6]!=1) {
                    185:                printf("if [ -r %s/.newsrc ] ; then ", $6)
                    186:                printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6)
                    187:                seen[$6]=1;
                    188:          }
                    189: }
                    190: MOUSE
                    191: 
                    192: # First, make sure we have an active file
                    193: if [ -z "$NEWSHOST" ]
                    194: then ACTIVE=$NEWS/active
                    195: else ACTIVE=/tmp/arb.active.$$
                    196:      rcp $NEWSHOST:$NEWS/active $ACTIVE
                    197: fi
                    198: 
                    199: if [ ! -s $ACTIVE ]
                    200: then
                    201:     echo arbitron: ACTIVE file missing or empty. Cannot continue.
                    202:     exit 1
                    203: fi
                    204: 
                    205: # Next, get the list of .newsrc files with duplicates and unreadable files
                    206: # removed.
                    207: awk -F: -f $TMPDIR/arb.pwd.$$ </etc/passwd | sh >$TMPDIR/arb.tmp.$$
                    208: 
                    209: # Check to make sure that we found some
                    210: if [ -s $TMPDIR/arb.tmp.$$ ]
                    211: then # See if "active" file has 4 fields or only two (pre-2.10.2)
                    212:      set `sed 1q $ACTIVE`
                    213:      if [ $# -eq 2 ]
                    214:      then egrep  '^[a-z]*\.' $ACTIVE |
                    215:          while read group last
                    216:          do dir=`echo "$group" | sed 's;\.;/;g'`
                    217:             first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
                    218:             case $STYPE in
                    219:                usg) echo "$group $last ${first:-$last} X";;
                    220:                  *) echo "$group $last ${first-$last} X"
                    221:             esac
                    222:          done
                    223:      else egrep '^[a-z]*\.' $ACTIVE
                    224:      fi |
                    225:      sort - $TMPDIR/arb.tmp.$$ |
                    226:      awk -f $TMPDIR/arb.fmt.$$ |
                    227:      sort -nr |
                    228:      sed '/^$/d
                    229:          s/^999[0-9] //' |
                    230:      $destination
                    231: else echo Unable to find any readable .newsrc files 2>&1
                    232:      exit 1
                    233: fi

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.