Annotation of researchv10dc/man/adm/man3/re.3, revision 1.1.1.1

1.1       root        1: .TH RE 3
                      2: .CT 2 data_man
                      3: .SH NAME
                      4: re_bm, re_cw, re_re \(mi string and pattern matching
                      5: .SH SYNOPSIS
                      6: .nf
                      7: .2C
                      8: .B "#include <re.h>"
                      9: .PP
                     10: .B "re_bm *re_bmcomp(b, e, map)"
                     11: .B "char *b, *e;"
                     12: .B "unsigned char map[256];"
                     13: .PP
                     14: .B "int re_bmexec(pat, rdfn, matchfn)"
                     15: .B re_bm *pat;
                     16: .B int (*rdfn)(), (*matchfn)();
                     17: .PP
                     18: .B void re_bmfree(pat);
                     19: .B re_bm *pat;
                     20: .PP
                     21: .BR "re_cw *re_cwinit(map)"
                     22: .B unsigned char map[256];
                     23: .PP
                     24: .BR "void re_cwadd(pat, b, e)"
                     25: .B re_cw *pat;
                     26: .B char *b, *e;
                     27: .PP
                     28: .BR "void re_cwcomp(pat)"
                     29: .B re_cw *pat;
                     30: .PP
                     31: .B "int re_cwexec(pat, rdfn, matchfn)"
                     32: .B re_cw *pat;
                     33: .B int (*rdfn)(), (*matchfn)();
                     34: .PP
                     35: .B void re_cwfree(pat);
                     36: .B re_cw *pat;
                     37: .PP
                     38: .BR "re_re *re_recomp(b, e, map)"
                     39: .B char *b, *e;
                     40: .B unsigned char map[256];
                     41: .PP
                     42: .B "re_reexec(pat, b, e, match)"
                     43: .B re_re *pat;
                     44: .B char *b, *e, *match[10][2];
                     45: .PP
                     46: .B void re_refree(pat);
                     47: .B re_re *pat;
                     48: .PP
                     49: .B void re_rerror(str);
                     50: .B char *str;
                     51: .1C
                     52: .fi
                     53: .SH DESCRIPTION
                     54: These routines search for patterns in strings.
                     55: The
                     56: .I re_re
                     57: routines search for general regular expressions (defined below)
                     58: using a lazily evaluated deterministic finite automaton.
                     59: The more specialized and faster
                     60: .I re_cw
                     61: routines search for multiple literal strings
                     62: using the Commentz-Walter algorithm.
                     63: The still more specialized and efficient
                     64: .I re_bm
                     65: routines search for a single string using the Boyer-Moore algorithm.
                     66: The routines handle strings designated by pointers to
                     67: the first character of the string
                     68: and to the character following the string.
                     69: .PP
                     70: To use the
                     71: .I re_bm
                     72: routines, first build a recognizer by calling
                     73: .I re_bmcomp,
                     74: which takes the search string and a character map;
                     75: all characters are compared after mapping.
                     76: The recognizer can be run (multiple times) by calling
                     77: .I re_bmexec,
                     78: which stops and returns the first non-positive return from either
                     79: .I rdfn
                     80: or
                     81: .IR matchfn .
                     82: The recognizer calls the supplied function
                     83: .I rdfn
                     84: to obtain input and
                     85: .I matchfn
                     86: to report text matching the search string.
                     87: .PP
                     88: .I Rdfn
                     89: should be declared as
                     90: .IP
                     91: .EX
                     92: int rdfn(pb, pe)
                     93: char **pb, **pe;
                     94: .EE
                     95: .LP
                     96: where
                     97: .B *pb
                     98: and
                     99: .B *pe
                    100: delimit an as yet unprocessed text fragment
                    101: (none if
                    102: .LR *pb==*pe )
                    103: to be saved across the call to
                    104: .IR rdfn .
                    105: On return,
                    106: .B *pb
                    107: and
                    108: .B *pe
                    109: point to the new text, including the saved fragment.
                    110: .I Rdfn
                    111: returns 0 for EOF, negative for error, and positive otherwise.
                    112: The first call to
                    113: .I rdfn
                    114: from each invocation of
                    115: .I re_bmexec
                    116: has
                    117: .BR *pb==0 .
                    118: .PP
                    119: .I Matchfn
                    120: should be declared as
                    121: .IP
                    122: .EX
                    123: int matchfn(pb, pe)
                    124: char **pb, **pe;
                    125: .EE
                    126: .LP
                    127: where
                    128: .B *pb
                    129: and
                    130: .B *pe
                    131: delimit the matched text.
                    132: .I Matchfn
                    133: sets
                    134: .BR *pb ,
                    135: .BR *pe ,
                    136: and returns a value in the same way as
                    137: .I rdfn.
                    138: .PP
                    139: To use the
                    140: .I re_cw
                    141: routines, first build the recognizer by calling
                    142: .IR re_cwinit ,
                    143: then
                    144: .I re_cwadd
                    145: for each string, and finally
                    146: .IR re_cwcomp .
                    147: The recognizer is run by
                    148: .I re_cwexec
                    149: analogously to
                    150: .IR re_bmexec .
                    151: .PP
                    152: A full regular expression recognizer is compiled by
                    153: .I re_recomp
                    154: and executed by
                    155: .I re_reexec,
                    156: which returns 1 if there was a match and 0 if there wasn't.
                    157: The strings that  match subexpressions are returned in array
                    158: .IR match .
                    159: .L match[0]
                    160: refers to the whole matched expression.
                    161: If
                    162: .I match
                    163: is zero, then no match delimiters are set.
                    164: .PP
                    165: The routine
                    166: .I re_error
                    167: prints its argument on standard error and exits.
                    168: You may supply your own version for specialized error handling.
                    169: .PP
                    170: The recognizers that these routines construct occupy storage
                    171: obtained from
                    172: .IR malloc (3).
                    173: The storage can be deallocated by
                    174: .I free.
                    175: .SS Regular Expressions
                    176: The syntax for a regular expression
                    177: .B e0
                    178: is
                    179: .EX
                    180: e3:  literal | charclass | '.' | '^' | '$' | '\e'\fIn\fP | '(' e0 ')'
                    181: 
                    182: e2:  e3
                    183:   |  e2 REP
                    184: REP: '*' | '+' | '?'
                    185: 
                    186: e1:  e2
                    187:   |  e1 e2
                    188: 
                    189: e0:  e1
                    190:   |  e0 ALT e1
                    191: ALT: '|' | newline
                    192: .EE
                    193: .PP
                    194: A literal is any non-metacharacter or a metacharacter
                    195: (one of
                    196: .BR .*+?[]()|\e^$ )
                    197: preceded by 
                    198: .LR \e .
                    199: .PP
                    200: A charclass is a nonempty string
                    201: .I s
                    202: bracketed
                    203: .BI [ \|s\| ]
                    204: (or
                    205: .BI [^ s\| ]\fR);
                    206: it matches any character in (or not in)
                    207: .I s.
                    208: In 
                    209: .I s,
                    210: the metacharacters other than
                    211: .L ]
                    212: have no special meaning, and
                    213: .L ]
                    214: may only appear as
                    215: the first letter.
                    216: A substring 
                    217: .IB a - b ,
                    218: with
                    219: .I a
                    220: and
                    221: .I b
                    222: in ascending
                    223: .SM ASCII 
                    224: order, stands for the inclusive
                    225: range of
                    226: .SM ASCII 
                    227: characters between
                    228: .I a
                    229: and
                    230: .IR b .
                    231: .PP
                    232: A
                    233: .L \e
                    234: followed by a digit 
                    235: .I n
                    236: matches a copy of the string that the
                    237: parenthesized subexpression beginning with the
                    238: .IR n th
                    239: .LR ( ,
                    240: counting from 1, matched.
                    241: .PP
                    242: A 
                    243: .L .
                    244: matches any character.
                    245: .PP
                    246: A
                    247: .L ^
                    248: matches the beginning of the input string;
                    249: .L $
                    250: matches the end.
                    251: .PP
                    252: The 
                    253: .B REP
                    254: operators match zero or more
                    255: .RB ( * ),
                    256: one or more
                    257: .RB ( + ),
                    258: and zero or one
                    259: .RB ( ? )
                    260: instances respectively of the preceding regular expression 
                    261: .BR e2 .
                    262: .PP
                    263: A concatenated regular expression,
                    264: .BR "e1 e2" ,
                    265: matches a match to 
                    266: .B e1
                    267: followed by a match to
                    268: .BR e2 .
                    269: .PP
                    270: An alternative regular expression,
                    271: .BR "e0 ALT e1" ,
                    272: matches either a match to
                    273: .B e0
                    274: or a match to
                    275: .BR e1 .
                    276: .PP
                    277: A match to any part of a regular expression
                    278: extends as far as possible without preventing
                    279: a match to the remainder of the regular expression.
                    280: .SH SEE ALSO
                    281: .IR regexp (3),
                    282: .IR gre (1)
                    283: .SH DIAGNOSTICS
                    284: Routines that return pointers return 0 on error.
                    285: .SH BUGS
                    286: Between 
                    287: .IR re (3)
                    288: and
                    289: .IR regexp (3)
                    290: there are too many routines.

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.