Annotation of lucent/sys/man/1/awk, revision 1.1.1.1

1.1       root        1: .TH AWK 1
                      2: .SH NAME
                      3: awk \- pattern-directed scanning and processing language
                      4: .SH SYNOPSIS
                      5: .B awk
                      6: [
                      7: .BI -F fs
                      8: ]
                      9: [
                     10: .BI -v
                     11: .I var=value
                     12: ]
                     13: [
                     14: .BI -mr n
                     15: ]
                     16: [
                     17: .BI -mf n
                     18: ]
                     19: [
                     20: .B -f
                     21: .I prog
                     22: [
                     23: .I prog
                     24: ]
                     25: [
                     26: .I file ...
                     27: ]
                     28: .SH DESCRIPTION
                     29: .I Awk
                     30: scans each input
                     31: .I file
                     32: for lines that match any of a set of patterns specified literally in
                     33: .IR prog
                     34: or in one or more files
                     35: specified as
                     36: .B -f
                     37: .IR file .
                     38: With each pattern
                     39: there can be an associated action that will be performed
                     40: when a line of a
                     41: .I file
                     42: matches the pattern.
                     43: Each line is matched against the
                     44: pattern portion of every pattern-action statement;
                     45: the associated action is performed for each matched pattern.
                     46: The file name 
                     47: .L -
                     48: means the standard input.
                     49: Any
                     50: .IR file
                     51: of the form
                     52: .I var=value
                     53: is treated as an assignment, not a file name,
                     54: and is executed at the time it would have been opened if it were a file name.
                     55: The option
                     56: .B -v
                     57: followed by
                     58: .I var=value
                     59: is an assignment to be done before
                     60: .I prog
                     61: is executed;
                     62: any number of
                     63: .B -v
                     64: options may be present.
                     65: .PP
                     66: An input line is normally made up of fields separated by white space,
                     67: or by regular expression
                     68: .IR fs .
                     69: The fields are denoted
                     70: .BR $1 ,
                     71: .BR $2 ,
                     72: \&..., while
                     73: .B $0
                     74: refers to the entire line.
                     75: .PP
                     76: To compensate for inadequate implementation of storage management,
                     77: the 
                     78: .B -mr
                     79: option can be used to set the maximum size of the input record,
                     80: and the
                     81: .B -mf
                     82: option to set the maximum number of fields.
                     83: .PP
                     84: A pattern-action statement has the form
                     85: .IP
                     86: .IB pattern " { " action " }
                     87: .PP
                     88: A missing 
                     89: .BI { " action " }
                     90: means print the line;
                     91: a missing pattern always matches.
                     92: Pattern-action statements are separated by newlines or semicolons.
                     93: .PP
                     94: An action is a sequence of statements.
                     95: A statement can be one of the following:
                     96: .PP
                     97: .EX
                     98: .ta \w'\fLdelete array[expression]'u
                     99: if(\fI expression \fP)\fI statement \fP\fR[ \fPelse\fI statement \fP\fR]\fP
                    100: while(\fI expression \fP)\fI statement\fP
                    101: for(\fI expression \fP;\fI expression \fP;\fI expression \fP)\fI statement\fP
                    102: for(\fI var \fPin\fI array \fP)\fI statement\fP
                    103: do\fI statement \fPwhile(\fI expression \fP)
                    104: break
                    105: continue
                    106: {\fR [\fP\fI statement ... \fP\fR] \fP}
                    107: \fIexpression\fP       #\fR commonly\fP\fI var = expression\fP
                    108: print\fR [ \fP\fIexpression-list \fP\fR] \fP\fR[ \fP>\fI expression \fP\fR]\fP
                    109: printf\fI format \fP\fR[ \fP,\fI expression-list \fP\fR] \fP\fR[ \fP>\fI expression \fP\fR]\fP
                    110: return\fR [ \fP\fIexpression \fP\fR]\fP
                    111: next   #\fR skip remaining patterns on this input line\fP
                    112: delete\fI array\fP[\fI expression \fP] #\fR delete an array element\fP
                    113: exit\fR [ \fP\fIexpression \fP\fR]\fP  #\fR exit immediately; status is \fP\fIexpression\fP
                    114: .EE
                    115: .DT
                    116: .PP
                    117: Statements are terminated by
                    118: semicolons, newlines or right braces.
                    119: An empty
                    120: .I expression-list
                    121: stands for
                    122: .BR $0 .
                    123: String constants are quoted \&\fL"\ "\fR,
                    124: with the usual C escapes recognized within.
                    125: Expressions take on string or numeric values as appropriate,
                    126: and are built using the operators
                    127: .B + - * / % ^
                    128: (exponentiation), and concatenation (indicated by white space).
                    129: The operators
                    130: .B
                    131: ! ++ -- += -= *= /= %= ^= > >= < <= == != ?:
                    132: are also available in expressions.
                    133: Variables may be scalars, array elements
                    134: (denoted
                    135: .IB x  [ i ] )
                    136: or fields.
                    137: Variables are initialized to the null string.
                    138: Array subscripts may be any string,
                    139: not necessarily numeric;
                    140: this allows for a form of associative memory.
                    141: Multiple subscripts such as
                    142: .B [i,j,k]
                    143: are permitted; the constituents are concatenated,
                    144: separated by the value of
                    145: .BR SUBSEP .
                    146: .PP
                    147: The
                    148: .B print
                    149: statement prints its arguments on the standard output
                    150: (or on a file if
                    151: .BI > file
                    152: or
                    153: .BI >> file
                    154: is present or on a pipe if
                    155: .BI | cmd
                    156: is present), separated by the current output field separator,
                    157: and terminated by the output record separator.
                    158: .I file
                    159: and
                    160: .I cmd
                    161: may be literal names or parenthesized expressions;
                    162: identical string values in different statements denote
                    163: the same open file.
                    164: The
                    165: .B printf
                    166: statement formats its expression list according to the format
                    167: (see
                    168: .IR fprintf (2)) .
                    169: The built-in function
                    170: .BI close( expr )
                    171: closes the file or pipe
                    172: .IR expr .
                    173: .PP
                    174: The mathematical functions
                    175: .BR exp ,
                    176: .BR log ,
                    177: .BR sqrt ,
                    178: .BR sin ,
                    179: .BR cos ,
                    180: and
                    181: .BR atan2 
                    182: are built in.
                    183: Other built-in functions:
                    184: .TF length
                    185: .TP
                    186: .B length
                    187: the length of its argument
                    188: taken as a string,
                    189: or of
                    190: .B $0
                    191: if no argument.
                    192: .TP
                    193: .B rand
                    194: random number on (0,1)
                    195: .TP
                    196: .B srand
                    197: sets seed for
                    198: .B rand
                    199: and returns the previous seed.
                    200: .TP
                    201: .B int
                    202: truncates to an integer value
                    203: .TP
                    204: .B utf
                    205: converts its numerical argument, a character number, to a
                    206: .SM UTF
                    207: string
                    208: .TP
                    209: .BI substr( s , " m" , " n\fL)
                    210: the
                    211: .IR n -character
                    212: substring of
                    213: .I s
                    214: that begins at position
                    215: .IR m 
                    216: counted from 1.
                    217: .TP
                    218: .BI index( s , " t" )
                    219: the position in
                    220: .I s
                    221: where the string
                    222: .I t
                    223: occurs, or 0 if it does not.
                    224: .TP
                    225: .BI match( s , " r" )
                    226: the position in
                    227: .I s
                    228: where the regular expression
                    229: .I r
                    230: occurs, or 0 if it does not.
                    231: The variables
                    232: .B RSTART
                    233: and
                    234: .B RLENGTH
                    235: are set to the position and length of the matched string.
                    236: .TP
                    237: .BI split( s , " a" , " fs\fL)
                    238: splits the string
                    239: .I s
                    240: into array elements
                    241: .IB a [1]\f1,
                    242: .IB a [2]\f1,
                    243: \&...,
                    244: .IB a [ n ]\f1,
                    245: and returns
                    246: .IR n .
                    247: The separation is done with the regular expression
                    248: .I fs
                    249: or with the field separator
                    250: .B FS
                    251: if
                    252: .I fs
                    253: is not given.
                    254: .TP
                    255: .BI sub( r , " t" , " s\fL)
                    256: substitutes
                    257: .I t
                    258: for the first occurrence of the regular expression
                    259: .I r
                    260: in the string
                    261: .IR s .
                    262: If
                    263: .I s
                    264: is not given,
                    265: .B $0
                    266: is used.
                    267: .TP
                    268: .B gsub
                    269: same as
                    270: .B sub
                    271: except that all occurrences of the regular expression
                    272: are replaced;
                    273: .B sub
                    274: and
                    275: .B gsub
                    276: return the number of replacements.
                    277: .TP
                    278: .BI sprintf( fmt , " expr" , " ...\fL)
                    279: the string resulting from formatting
                    280: .I expr ...
                    281: according to the
                    282: .I printf
                    283: format
                    284: .I fmt
                    285: .TP
                    286: .BI system( cmd )
                    287: executes
                    288: .I cmd
                    289: and returns its exit status
                    290: .PD
                    291: .PP
                    292: The ``function''
                    293: .B getline
                    294: sets
                    295: .B $0
                    296: to
                    297: the next input record from the current input file;
                    298: .B getline
                    299: .BI < file
                    300: sets
                    301: .B $0
                    302: to the next record from
                    303: .IR file .
                    304: .B getline
                    305: .I x
                    306: sets variable
                    307: .I x
                    308: instead.
                    309: Finally,
                    310: .IB cmd " | getline
                    311: pipes the output of
                    312: .I cmd
                    313: into
                    314: .BR getline ;
                    315: each call of
                    316: .B getline
                    317: returns the next line of output from
                    318: .IR cmd .
                    319: In all cases,
                    320: .B getline
                    321: returns 1 for a successful input,
                    322: 0 for end of file, and \-1 for an error.
                    323: .PP
                    324: Patterns are arbitrary Boolean combinations
                    325: (with
                    326: .BR "! || &&" )
                    327: of regular expressions and
                    328: relational expressions.
                    329: Regular expressions are as in
                    330: .IR regexp (6).
                    331: Isolated regular expressions
                    332: in a pattern apply to the entire line.
                    333: Regular expressions may also occur in
                    334: relational expressions, using the operators
                    335: .BR ~
                    336: and
                    337: .BR !~ .
                    338: .BI / re /
                    339: is a constant regular expression;
                    340: any string (constant or variable) may be used
                    341: as a regular expression, except in the position of an isolated regular expression
                    342: in a pattern.
                    343: .PP
                    344: A pattern may consist of two patterns separated by a comma;
                    345: in this case, the action is performed for all lines
                    346: from an occurrence of the first pattern
                    347: though an occurrence of the second.
                    348: .PP
                    349: A relational expression is one of the following:
                    350: .IP
                    351: .I expression matchop regular-expression
                    352: .br
                    353: .I expression relop expression
                    354: .br
                    355: .IB expression " in " array-name
                    356: .br
                    357: .BI ( expr , expr,... ") in " array-name
                    358: .PP
                    359: where a
                    360: .I relop
                    361: is any of the six relational operators in C,
                    362: and a
                    363: .I matchop
                    364: is either
                    365: .B ~
                    366: (matches)
                    367: or
                    368: .B !~
                    369: (does not match).
                    370: A conditional is an arithmetic expression,
                    371: a relational expression,
                    372: or a Boolean combination
                    373: of these.
                    374: .PP
                    375: The special patterns
                    376: .B BEGIN
                    377: and
                    378: .B END
                    379: may be used to capture control before the first input line is read
                    380: and after the last.
                    381: .B BEGIN
                    382: and
                    383: .B END
                    384: do not combine with other patterns.
                    385: .PP
                    386: Variable names with special meanings:
                    387: .TF FILENAME
                    388: .TP
                    389: .B FS
                    390: regular expression used to separate fields; also settable
                    391: by option
                    392: .BI -F fs\f1.
                    393: .TP
                    394: .BR NF
                    395: number of fields in the current record
                    396: .TP
                    397: .B NR
                    398: ordinal number of the current record
                    399: .TP
                    400: .B FNR
                    401: ordinal number of the current record in the current file
                    402: .TP
                    403: .B FILENAME
                    404: the name of the current input file
                    405: .TP
                    406: .B RS
                    407: input record separator (default newline)
                    408: .TP
                    409: .B OFS
                    410: output field separator (default blank)
                    411: .TP
                    412: .B ORS
                    413: output record separator (default newline)
                    414: .TP
                    415: .B OFMT
                    416: output format for numbers (default
                    417: .BR "%.6g" )
                    418: .TP
                    419: .B SUBSEP
                    420: separates multiple subscripts (default 034)
                    421: .TP
                    422: .B ARGC
                    423: argument count, assignable
                    424: .TP
                    425: .B ARGV
                    426: argument array, assignable;
                    427: non-null members are taken as file names
                    428: .TP
                    429: .B ENVIRON
                    430: array of environment variables; subscripts are names.
                    431: .PD
                    432: .PP
                    433: Functions may be defined (at the position of a pattern-action statement) thus:
                    434: .IP
                    435: .L
                    436: function foo(a, b, c) { ...; return x }
                    437: .PP
                    438: Parameters are passed by value if scalar and by reference if array name;
                    439: functions may be called recursively.
                    440: Parameters are local to the function; all other variables are global.
                    441: Thus local variables may be created by providing excess parameters in
                    442: the function definition.
                    443: .SH EXAMPLES
                    444: .TP
                    445: .L
                    446: length > 72
                    447: Print lines longer than 72 characters.
                    448: .TP
                    449: .L
                    450: { print $2, $1 }
                    451: Print first two fields in opposite order.
                    452: .PP
                    453: .EX
                    454: BEGIN { FS = ",[ \et]*|[ \et]+" }
                    455:       { print $2, $1 }
                    456: .EE
                    457: .ns
                    458: .IP
                    459: Same, with input fields separated by comma and/or blanks and tabs.
                    460: .PP
                    461: .EX
                    462:        { s += $1 }
                    463: END    { print "sum is", s, " average is", s/NR }
                    464: .EE
                    465: .ns
                    466: .IP
                    467: Add up first column, print sum and average.
                    468: .TP
                    469: .L
                    470: /start/, /stop/
                    471: Print all lines between start/stop pairs.
                    472: .PP
                    473: .EX
                    474: BEGIN  {       # Simulate echo(1)
                    475:        for (i = 1; i < ARGC; i++) printf "%s ", ARGV[i]
                    476:        printf "\en"
                    477:        exit }
                    478: .EE
                    479: .SH SOURCE
                    480: .B /sys/src/cmd/awk
                    481: .SH SEE ALSO
                    482: .IR sed (1),
                    483: .IR regexp (6),
                    484: .br
                    485: A. V. Aho, B. W. Kernighan, P. J. Weinberger,
                    486: .I
                    487: The AWK Programming Language,
                    488: Addison-Wesley, 1988.
                    489: .SH BUGS
                    490: There are no explicit conversions between numbers and strings.
                    491: To force an expression to be treated as a number add 0 to it;
                    492: to force it to be treated as a string concatenate
                    493: \&\fL""\fP to it.
                    494: .br
                    495: The scope rules for variables in functions are a botch;
                    496: the syntax is worse.

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.