Annotation of 43BSDTahoe/new/news/misc/arbitron, revision 1.1

1.1     ! root        1: #! /bin/sh
        !             2: # @(#)arbitron 2.3     07/15/86
        !             3: # arbitron -- this program produces rating sweeps for USENET.
        !             4: #
        !             5: # Usage: arbitron
        !             6: #
        !             7: # To use this program, edit the "configuration" section below so that the
        !             8: # information is correct for your site, and then run it. It will produce a
        !             9: # readership survey for your machine and mail that survey to decwrl, with
        !            10: # a cc to you.
        !            11: #
        !            12: # To participate in the international monthly ratings sweeps, 
        !            13: # run "arbitron" every month. I will run the statistics program on the last
        !            14: # day of each month; it will include any report that has reached it by that
        !            15: # time. To make sure your site's data is included, run the survey program no
        !            16: # later than the 20th day of each month.
        !            17: #
        !            18: # Brian Reid, DEC Western Research Lab, reid@decwrl
        !            19: # Updated and bugfixed by 
        !            20: #      Spencer Thomas, U.of Utah
        !            21: #      Geoff Kuenning, SAH Consulting
        !            22: # Updated to work with 2.10.1 and older news systems by
        !            23: #      Lindsay Cleveland, AT&T Technologies/Bell Labs
        !            24: # Made to work with 16-bit address spaces by
        !            25: #      Andy Walker, Maths Dept., University of Nottingham, UK
        !            26: #
        !            27: # Note that the results of this program are dependent on the rate at which
        !            28: # you expire news.  If you are a small site that expires news rapidly, the
        !            29: # results may indicate fewer active readers than you actually have.
        !            30: #
        !            31: # copied to a certain extent from the "subscribers"
        !            32: # script posted by Blonder, McCreery, and Herron.
        !            33: ###########################################################################
        !            34: # Configuration information. Edit this section to reflect your site data. #
        !            35: TMPDIR=/tmp
        !            36: NEWS=/usr/lib/news
        !            37: SPOOL=/usr/spool/news
        !            38: 
        !            39: # Make a crude stab at determining the system type
        !            40: if [ -d /usr/ucb ]
        !            41: then
        !            42:     STYPE="bsd"
        !            43: else
        !            44:     STYPE="usg"
        !            45: fi
        !            46: 
        !            47: # Range of /etc/passwd UID's that represent actual people (rather than
        !            48: # maintenance accounts or daemons or whatever)
        !            49: lowUID=100
        !            50: highUID=9999
        !            51: 
        !            52: # If you aren't running a distributed news system (nntpd & rrn, usually),
        !            53: # leave NEWSHOST blank. Else set it to the name of the host from which you
        !            54: # can rcp a copy of the active file.
        !            55: NEWSHOST=
        !            56: 
        !            57: # uucp path: {ihnp4, decvax, ucbvax}!decwrl!netsurvey
        !            58: # summarypath="[email protected] $USER"
        !            59: summarypath="ihnp4!decwrl!netsurvey usenet"
        !            60: 
        !            61: # We need to find the uucp name of your host. If this code doesn't work,
        !            62: # then just put it in literally like this:
        !            63: #      hostname="ihnp4"
        !            64: 
        !            65: case $STYPE in
        !            66:        bsd) hostname=`(uuname -l || hostname) 2>&-`;;
        !            67:         sysv)hostname=`(uname -n || uuname -l ||  hostname) 2>&-`;;
        !            68:        *)   hostname=`(uuname -l) 2>&-`;;
        !            69: esac;
        !            70: 
        !            71: PATH=$NEWS:/usr/local/bin:/usr/ucb:/usr/bin:/bin
        !            72: ############################################################################
        !            73: export PATH
        !            74: # ---------------------------------------------------------------------------
        !            75: trap "rm -f $TMPDIR/arb.*.$$; exit" 0 1 2 3 15
        !            76: set `date`
        !            77: dat="$2$6"
        !            78: # destination="mailx -s arbitron-${dat} $summarypath"
        !            79: destination="${MAILER-mail} $summarypath"
        !            80: 
        !            81: ################################
        !            82: # Here are several expressions, each of which figures out approximately how
        !            83: # many people use this machine. Comment out all but 1 of them; pick the one
        !            84: # you like best. Initially the most universal but least reliable of them is
        !            85: # uncommented.
        !            86: # # ###### Scheme #1: fast but usually returns too big a number
        !            87: nusers=`awk -F: "BEGIN {N=0}\\$3>=$lowUID && \\$3<=$highUID{N=N+1}END{print N}" </etc/passwd`
        !            88: 
        !            89: # # ###### Scheme #2 (works with BSD systems)
        !            90: #nusers=`last | sort -u +0 -1 | wc -l`
        !            91: 
        !            92: # # ###### Scheme #3 (works with USG systems)
        !            93: #nusers=`who /etc/wtmp | sort -u +0 -1 | wc -l`
        !            94: 
        !            95: # # ###### Scheme #4 (provided by Lindsay Cleveland)
        !            96: # # ###### (Same idea as #1, but excludes various junk accounts)
        !            97: #awk -F: "\$3 >= $lowUID && \$3 <= $highUID{printf \"if test -d %s ; then echo %s;egrep : %s/.newsrc; fi\n\",\$6,\$1,\$6}" \
        !            98: #      </etc/passwd | sh 2>/dev/null | awk  -f $tmpdir/arb.sel.$$ >$tmpdir/arb.tmp.$$
        !            99: #nusers=`awk "BEGIN {N=0} NF == 1{N=N+1}END{print N}" <$tmpdir/arb.tmp.$$`
        !           100: ################################
        !           101: #
        !           102: # Set up awk scripts;  these are too large to pass as arguments on most
        !           103: # systems.
        !           104: #
        !           105: # This awk script generates the actual output report.
        !           106: # We use 'sed' to substitute in the shell variables to save ourselves
        !           107: # endless hassle trying to find quoting/backslashing problems.
        !           108: #
        !           109: # The input to this script consists of two types of lines (pre-sorted):
        !           110: #
        !           111: #      (1) Active-file lines.  These have four fields:  newsgroup name,
        !           112: #          first existing article, last article number, 'y' or 'n'
        !           113: #          to allow/disallow posting.
        !           114: #                      mod.mac 00001 00001 y
        !           115: #
        !           116: #      (2) .newsrc-derived lines.  These have three fields:  the newsgroup
        !           117: #          name, the user name and the articles-read information.  The latter
        !           118: #          can be arbitrarily complex.  It can also be arbitrarily long;
        !           119: #          this can potentially break either awk or sed, in which
        !           120: #          case the script will not work.
        !           121: #                      mod.map joe 1-199
        !           122: #
        !           123: #      The script uses the type 1 lines to define the newsgroups
        !           124: #      and their active article ranges.  The .newsrc (type 2) lines are
        !           125: #      then used to deduce which users are reading that group (a group
        !           126: #      is being read if the last article seen is in that group's active
        !           127: #      article range).  The user names are used to keep track of who reads
        !           128: #      each group, which isn't all that useful but is interesting.  When
        !           129: #      all input has been read, a report is printed summarizing the results.
        !           130: #
        !           131: sed "/^#/d
        !           132:      s/NUSERS/$nusers/g
        !           133:      s/HOSTNAME/$hostname/g
        !           134:      s/DATE/$dat/g" > $TMPDIR/arb.fmt.$$ << 'DOG'
        !           135: # makereport -- utility for "arbitron". Early versions were copied from a
        !           136: # similar script distributed with "subscribers.sh" by Blonder, McCreery, and
        !           137: # Herron.
        !           138: #
        !           139:        BEGIN   { rdrcount = 0 ; reader = "" ; grpcount = 0 ; realusers = 0}
        !           140: #
        !           141: # Active file line:  dispose of previous group (if any), record group, and
        !           142: # record first and last article numbers.  Set group's reader count to none.
        !           143:        NF == 4 { if (grpcount > 0) {
        !           144:                        printf("%d %s\n",grpcount, grpname)
        !           145:                  }
        !           146:                  grpname = $1
        !           147:                  grpfirst = $3
        !           148:                  grplast = $2
        !           149:                  grpcount = 0
        !           150:                }
        !           151: #
        !           152: # .newsrc line.  Break out the final number, which is the last article that
        !           153: # has actually been read.  This is a pretty good indicator of the person's
        !           154: # true interest in the group.  If 'lastread' for the group is a current
        !           155: # (unexpired) article, record a reader for that group.  Finally, record
        !           156: # the user as a "real" user of the news system.
        !           157: #
        !           158:        NF == 3 { n1 = split($3, n2, "-")
        !           159:                  n3 = split(n2[n1], n4, ",")
        !           160:                  lastread = n4[n3]
        !           161:        if ((grpfirst != grplast) && (lastread >= grpfirst) && (lastread <= grplast)) {
        !           162:                        grpcount++
        !           163:                        if (realuser[$2] != 1) {
        !           164:                            realuser[$2] = 1
        !           165:                            realusers++
        !           166:                        }
        !           167:                  }
        !           168:                }
        !           169: #
        !           170: # End of file.  Print the report in 2 columns.
        !           171:        END     { printf("9999 Host\t\t%s\n","HOSTNAME")
        !           172:                  printf("9998 Users\t\t%d\n",NUSERS)
        !           173:                  printf("9997 NetReaders\t%d\n",realusers)
        !           174:                  printf("9996 ReportDate\t%s\n","DATE")
        !           175:                  printf("9995 SystemType\tnews-arbitron-2.3\n")
        !           176:                  if (grpcount > 0) {
        !           177:                        printf("%d %s\n",grpcount, grpname)
        !           178:                  }
        !           179:                }
        !           180: DOG
        !           181: 
        !           182: cat >$TMPDIR/arb.pwd.$$ <<'MOUSE'
        !           183: BEGIN  { seen["/"]=1; seen[""] = 1; }
        !           184:        { if (seen[$6]!=1) {
        !           185:                printf("if [ -r %s/.newsrc ] ; then ", $6)
        !           186:                printf("sed -n '/: [0-9]/s/:/ %s/p' <%s/.newsrc; fi\n",$1,$6)
        !           187:                seen[$6]=1;
        !           188:          }
        !           189: }
        !           190: MOUSE
        !           191: 
        !           192: # First, make sure we have an active file
        !           193: if [ -z "$NEWSHOST" ]
        !           194: then ACTIVE=$NEWS/active
        !           195: else ACTIVE=/tmp/arb.active.$$
        !           196:      rcp $NEWSHOST:$NEWS/active $ACTIVE
        !           197: fi
        !           198: 
        !           199: if [ ! -s $ACTIVE ]
        !           200: then
        !           201:     echo arbitron: ACTIVE file missing or empty. Cannot continue.
        !           202:     exit 1
        !           203: fi
        !           204: 
        !           205: # Next, get the list of .newsrc files with duplicates and unreadable files
        !           206: # removed.
        !           207: awk -F: -f $TMPDIR/arb.pwd.$$ </etc/passwd | sh >$TMPDIR/arb.tmp.$$
        !           208: 
        !           209: # Check to make sure that we found some
        !           210: if [ -s $TMPDIR/arb.tmp.$$ ]
        !           211: then # See if "active" file has 4 fields or only two (pre-2.10.2)
        !           212:      set `sed 1q $ACTIVE`
        !           213:      if [ $# -eq 2 ]
        !           214:      then egrep  '^[a-z]*\.' $ACTIVE |
        !           215:          while read group last
        !           216:          do dir=`echo "$group" | sed 's;\.;/;g'`
        !           217:             first=`ls $SPOOL/$dir | grep '^[0-9]*' | sort -n | sed 1q`
        !           218:             case $STYPE in
        !           219:                usg) echo "$group $last ${first:-$last} X";;
        !           220:                  *) echo "$group $last ${first-$last} X"
        !           221:             esac
        !           222:          done
        !           223:      else egrep '^[a-z]*\.' $ACTIVE
        !           224:      fi |
        !           225:      sort - $TMPDIR/arb.tmp.$$ |
        !           226:      awk -f $TMPDIR/arb.fmt.$$ |
        !           227:      sort -nr |
        !           228:      sed '/^$/d
        !           229:          s/^999[0-9] //' |
        !           230:      $destination
        !           231: else echo Unable to find any readable .newsrc files 2>&1
        !           232:      exit 1
        !           233: fi

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.