Annotation of researchv10dc/man/adm/man1/awk.1, revision 1.1.1.1

1.1       root        1: .TH AWK 1
                      2: .CT 1 files prog_other
                      3: .SH NAME
                      4: awk \- pattern-directed scanning and processing language
                      5: .SH SYNOPSIS
                      6: .B awk
                      7: [
                      8: .BI -F fs
                      9: ]
                     10: [
                     11: .BI -v
                     12: .I var=value
                     13: ]
                     14: [
                     15: .I prog
                     16: ]
                     17: [
                     18: .I file ...
                     19: ]
                     20: .SH DESCRIPTION
                     21: .I Awk
                     22: scans each input
                     23: .I file
                     24: for lines that match any of a set of patterns specified literally in
                     25: .IR prog
                     26: or in one or more files
                     27: specified as
                     28: .B -f
                     29: .IR file .
                     30: With each pattern
                     31: there can be an associated action that will be performed
                     32: when a line of a
                     33: .I file
                     34: matches the pattern.
                     35: Each line is matched against the
                     36: pattern portion of every pattern-action statement;
                     37: the associated action is performed for each matched pattern.
                     38: The file name 
                     39: .L -
                     40: means the standard input.
                     41: Any
                     42: .IR file
                     43: of the form
                     44: .I var=value
                     45: is treated as an assignment, not a filename,
                     46: and is executed at the time it would have been opened if it were a filename.
                     47: The option
                     48: .B -v
                     49: followed by
                     50: .I var=value
                     51: is an assignment to be done before
                     52: .I prog
                     53: is executed;
                     54: any number of
                     55: .B -v
                     56: options may be present.
                     57: .PP
                     58: An input line is made up of fields separated by white space,
                     59: or by regular expression
                     60: .BR FS .
                     61: The fields are denoted
                     62: .BR $1 ,
                     63: .BR $2 ,
                     64: \&..., while
                     65: .B $0
                     66: refers to the entire line.
                     67: .PP
                     68: A pattern-action statement has the form
                     69: .IP
                     70: .IB pattern " { " action " }
                     71: .PP
                     72: A missing 
                     73: .BI { " action " }
                     74: means print the line;
                     75: a missing pattern always matches.
                     76: Pattern-action statements are separated by newlines or semicolons.
                     77: .PP
                     78: An action is a sequence of statements.
                     79: A statement can be one of the following:
                     80: .PP
                     81: .EX
                     82: .ta \w'\f(CWdelete array[expression]'u
                     83: if(\fI expression \fP)\fI statement \fP\fR[ \fPelse\fI statement \fP\fR]\fP
                     84: while(\fI expression \fP)\fI statement\fP
                     85: for(\fI expression \fP;\fI expression \fP;\fI expression \fP)\fI statement\fP
                     86: for(\fI var \fPin\fI array \fP)\fI statement\fP
                     87: do\fI statement \fPwhile(\fI expression \fP)
                     88: break
                     89: continue
                     90: {\fR [\fP\fI statement ... \fP\fR] \fP}
                     91: \fIexpression\fP       #\fR commonly\fP\fI var = expression\fP
                     92: print\fR [ \fP\fIexpression-list \fP\fR] \fP\fR[ \fP>\fI expression \fP\fR]\fP
                     93: printf\fI format \fP\fR[ \fP,\fI expression-list \fP\fR] \fP\fR[ \fP>\fI expression \fP\fR]\fP
                     94: return\fR [ \fP\fIexpression \fP\fR]\fP
                     95: next   #\fR skip remaining patterns on this input line\fP
                     96: delete\fI array\fP[\fI expression \fP] #\fR delete an array element\fP
                     97: exit\fR [ \fP\fIexpression \fP\fR]\fP  #\fR exit immediately; status is \fP\fIexpression\fP
                     98: .EE
                     99: .DT
                    100: .PP
                    101: Statements are terminated by
                    102: semicolons, newlines or right braces.
                    103: An empty
                    104: .I expression-list
                    105: stands for
                    106: .BR $0 .
                    107: String constants are quoted \&\f(CW"\ "\fR,
                    108: with the usual C escapes recognized within.
                    109: Expressions take on string or numeric values as appropriate,
                    110: and are built using the operators
                    111: .B + - * / % ^
                    112: (exponentiation), and concatenation (indicated by white space).
                    113: The operators
                    114: .B
                    115: ! ++ -- += -= *= /= %= ^= > >= < <= == != && || ?:
                    116: are also available in expressions.
                    117: Variables may be scalars, array elements
                    118: (denoted
                    119: .IB x  [ i ] )
                    120: or fields.
                    121: Variables are initialized to the null string.
                    122: Array subscripts may be any string,
                    123: not necessarily numeric;
                    124: this allows for a form of associative memory.
                    125: Multiple subscripts such as
                    126: .B [i,j,k]
                    127: are permitted; the constituents are concatenated,
                    128: separated by the value of
                    129: .BR SUBSEP .
                    130: .PP
                    131: The
                    132: .B print
                    133: statement prints its arguments on the standard output
                    134: (or on a file if
                    135: .BI > file
                    136: or
                    137: .BI >> file
                    138: is present or on a pipe if
                    139: .BI | cmd
                    140: is present), separated by the current output field separator,
                    141: and terminated by the output record separator.
                    142: .I file
                    143: and
                    144: .I cmd
                    145: may be literal names or parenthesized expressions;
                    146: identical string values in different statements denote
                    147: the same open file.
                    148: The
                    149: .B printf
                    150: statement formats its expression list according to the format
                    151: (see
                    152: .IR printf (3)) .
                    153: The built-in function
                    154: .BI close( expr )
                    155: closes the file or pipe
                    156: .IR expr .
                    157: .PP
                    158: The mathematical functions
                    159: .BR exp ,
                    160: .BR log ,
                    161: .BR sqrt ,
                    162: .BR sin ,
                    163: .BR cos ,
                    164: and
                    165: .BR atan2 
                    166: are built in.
                    167: Other built-in functions:
                    168: .TF length
                    169: .TP
                    170: .B length
                    171: the length of its argument
                    172: taken as a string,
                    173: or of
                    174: .B $0
                    175: if no argument.
                    176: .TP
                    177: .B rand
                    178: random number on (0,1)
                    179: .TP
                    180: .B srand
                    181: sets seed for
                    182: .B rand
                    183: and returns the previous seed.
                    184: .TP
                    185: .B int
                    186: truncates to an integer value
                    187: .TP
                    188: .BI substr( s , " m" , " n\fB)
                    189: the
                    190: .IR n -character
                    191: substring of
                    192: .I s
                    193: that begins at position
                    194: .IR m 
                    195: counted from 1.
                    196: .TP
                    197: .BI index( s , " t" )
                    198: the position in
                    199: .I s
                    200: where the string
                    201: .I t
                    202: occurs, or 0 if it does not.
                    203: .TP
                    204: .BI match( s , " r" )
                    205: the position in
                    206: .I s
                    207: where the regular expression
                    208: .I r
                    209: occurs, or 0 if it does not.
                    210: The variables
                    211: .B RSTART
                    212: and
                    213: .B RLENGTH
                    214: are set to the position and length of the matched string.
                    215: .TP
                    216: .BI split( s , " a" , " fs\fB)
                    217: splits the string
                    218: .I s
                    219: into array elements
                    220: .IB a [1] ,
                    221: .IB a [2] ,
                    222: \&...,
                    223: .IB a [ n ] ,
                    224: and returns
                    225: .IR n .
                    226: The separation is done with the regular expression
                    227: .I fs
                    228: or with the field separator
                    229: .B FS
                    230: if
                    231: .I fs
                    232: is not given.
                    233: .TP
                    234: .BI sub( r , " t" , " s\fB)
                    235: substitutes
                    236: .I t
                    237: for the first occurrence of the regular expression
                    238: .I r
                    239: in the string
                    240: .IR s .
                    241: If
                    242: .I s
                    243: is not given,
                    244: .B $0
                    245: is used.
                    246: .TP
                    247: .B gsub
                    248: same as
                    249: .B sub
                    250: except that all occurrences of the regular expression
                    251: are replaced;
                    252: .B sub
                    253: and
                    254: .B gsub
                    255: return the number of replacements.
                    256: .TP
                    257: .BI sprintf( fmt , " expr" , " ...\fB )
                    258: the string resulting from formatting
                    259: .I expr ...
                    260: according to the
                    261: .IR printf (3)
                    262: format
                    263: .I fmt
                    264: .TP
                    265: .BI system( cmd )
                    266: executes
                    267: .I cmd
                    268: and returns its exit status
                    269: .PD
                    270: .PP
                    271: The ``function''
                    272: .B getline
                    273: sets
                    274: .B $0 to
                    275: the next input record from the current input file;
                    276: .B getline
                    277: .BI < file
                    278: sets
                    279: .B $0
                    280: to the next record from
                    281: .IR file .
                    282: .B getline
                    283: .I x
                    284: sets variable
                    285: .I x
                    286: instead.
                    287: Finally,
                    288: .IB cmd " | getline
                    289: pipes the output of
                    290: .I cmd
                    291: into
                    292: .BR getline ;
                    293: each call of
                    294: .B getline
                    295: returns the next line of output from
                    296: .IR cmd .
                    297: In all cases,
                    298: .B getline
                    299: returns 1 for a successful input,
                    300: 0 for end of file, and \-1 for an error.
                    301: .PP
                    302: Patterns are arbitrary Boolean combinations
                    303: (with
                    304: .BR "! || &&" )
                    305: of regular expressions and
                    306: relational expressions.
                    307: Regular expressions are as in
                    308: .IR egrep ; 
                    309: see
                    310: .IR gre (1).
                    311: Isolated regular expressions
                    312: in a pattern apply to the entire line.
                    313: Regular expressions may also occur in
                    314: relational expressions, using the operators
                    315: .BR ~
                    316: and
                    317: .BR !~ .
                    318: .BI / re /
                    319: is a constant regular expression;
                    320: any string (constant or variable) may be used
                    321: as a regular expression, except in the position of an isolated regular expression
                    322: in a pattern.
                    323: .PP
                    324: A pattern may consist of two patterns separated by a comma;
                    325: in this case, the action is performed for all lines
                    326: from an occurrence of the first pattern
                    327: though an occurrence of the second.
                    328: .PP
                    329: A relational expression is one of the following:
                    330: .IP
                    331: .I expression matchop regular-expression
                    332: .br
                    333: .I expression relop expression
                    334: .br
                    335: .IB expression " in " array-name
                    336: .br
                    337: .BI ( expr , expr,... ") in " array-name
                    338: .PP
                    339: where a relop is any of the six relational operators in C,
                    340: and a matchop is either
                    341: .B ~ 
                    342: (matches)
                    343: or
                    344: .B !~
                    345: (does not match).
                    346: A conditional is an arithmetic expression,
                    347: a relational expression,
                    348: or a Boolean combination
                    349: of these.
                    350: .PP
                    351: The special patterns
                    352: .B BEGIN
                    353: and
                    354: .B END
                    355: may be used to capture control before the first input line is read
                    356: and after the last.
                    357: .B BEGIN
                    358: and
                    359: .B END
                    360: do not combine with other patterns.
                    361: .PP
                    362: Variable names with special meanings:
                    363: .TF FILENAME
                    364: .TP
                    365: .B FS
                    366: regular expression used to separate fields; also settable
                    367: by option
                    368: .BI -F fs.
                    369: .TP
                    370: .BR NF
                    371: number of fields in the current record
                    372: .TP
                    373: .B NR
                    374: ordinal number of the current record
                    375: .TP
                    376: .B FNR
                    377: ordinal number of the current record in the current file
                    378: .TP
                    379: .B FILENAME
                    380: the name of the current input file
                    381: .TP
                    382: .B RS
                    383: input record separator (default newline)
                    384: .TP
                    385: .B OFS
                    386: output field separator (default blank)
                    387: .TP
                    388: .B ORS
                    389: output record separator (default newline)
                    390: .TP
                    391: .B OFMT
                    392: output format for numbers (default
                    393: .BR "%.6g" )
                    394: .TP
                    395: .B SUBSEP
                    396: separates multiple subscripts (default 034)
                    397: .TP
                    398: .B ARGC
                    399: argument count, assignable
                    400: .TP
                    401: .B ARGV
                    402: argument array, assignable;
                    403: non-null members are taken as filenames
                    404: .TP
                    405: .B ENVIRON
                    406: array of environment variables; subscripts are names.
                    407: .PD
                    408: .PP
                    409: Functions may be defined (at the position of a pattern-action statement) thus:
                    410: .IP
                    411: .L
                    412: function foo(a, b, c) { ...; return x }
                    413: .PP
                    414: Parameters are passed by value if scalar and by reference if array name;
                    415: functions may be called recursively.
                    416: Parameters are local to the function; all other variables are global.
                    417: Thus local variables may be created by providing excess parameters in
                    418: the function definition.
                    419: .SH EXAMPLES
                    420: .TP
                    421: .L
                    422: length > 72
                    423: Print lines longer than 72 characters.
                    424: .TP
                    425: .L
                    426: { print $2, $1 }
                    427: Print first two fields in opposite order.
                    428: .PP
                    429: .EX
                    430: BEGIN { FS = ",[ \et]*|[ \et]+" }
                    431:       { print $2, $1 }
                    432: .EE
                    433: .ns
                    434: .IP
                    435: Same, with input fields separated by comma and/or blanks and tabs.
                    436: .PP
                    437: .EX
                    438:        { s += $1 }
                    439: END    { print "sum is", s, " average is", s/NR }
                    440: .EE
                    441: .ns
                    442: .IP
                    443: Add up first column, print sum and average.
                    444: .TP
                    445: .L
                    446: /start/, /stop/
                    447: Print all lines between start/stop pairs.
                    448: .PP
                    449: .EX
                    450: BEGIN  {       # Simulate echo(1)
                    451:        for (i = 1; i < ARGC; i++) printf "%s ", ARGV[i]
                    452:        printf "\en"
                    453:        exit }
                    454: .EE
                    455: .SH SEE ALSO
                    456: .IR gre (1),
                    457: .IR lex (1), 
                    458: .IR sed (1)
                    459: .br
                    460: A. V. Aho, B. W. Kernighan, P. J. Weinberger,
                    461: .I
                    462: The AWK Programming Language,
                    463: Addison-Wesley, 1988.
                    464: .SH BUGS
                    465: There are no explicit conversions between numbers and strings.
                    466: To force an expression to be treated as a number add 0 to it;
                    467: to force it to be treated as a string concatenate
                    468: \&\f(CW""\fP to it.
                    469: .br
                    470: The scope rules for variables in functions are a botch;
                    471: the syntax is worse.

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.