Annotation of researchv10dc/man/man3/re.3, revision 1.1

1.1     ! root        1: .TH RE 3
        !             2: .CT 2 data_man
        !             3: .SH NAME
        !             4: re_bm, re_cw, re_re \(mi string and pattern matching
        !             5: .SH SYNOPSIS
        !             6: .nf
        !             7: .2C
        !             8: .B "#include <re.h>"
        !             9: .PP
        !            10: .B "re_bm *re_bmcomp(b, e, map)"
        !            11: .B "char *b, *e;"
        !            12: .B "unsigned char map[256];"
        !            13: .PP
        !            14: .B "int re_bmexec(pat, rdfn, matchfn)"
        !            15: .B re_bm *pat;
        !            16: .B int (*rdfn)(), (*matchfn)();
        !            17: .PP
        !            18: .B void re_bmfree(pat);
        !            19: .B re_bm *pat;
        !            20: .PP
        !            21: .BR "re_cw *re_cwinit(map)"
        !            22: .B unsigned char map[256];
        !            23: .PP
        !            24: .BR "void re_cwadd(pat, b, e)"
        !            25: .B re_cw *pat;
        !            26: .B char *b, *e;
        !            27: .PP
        !            28: .BR "void re_cwcomp(pat)"
        !            29: .B re_cw *pat;
        !            30: .PP
        !            31: .B "int re_cwexec(pat, rdfn, matchfn)"
        !            32: .B re_cw *pat;
        !            33: .B int (*rdfn)(), (*matchfn)();
        !            34: .PP
        !            35: .B void re_cwfree(pat);
        !            36: .B re_cw *pat;
        !            37: .PP
        !            38: .BR "re_re *re_recomp(b, e, map)"
        !            39: .B char *b, *e;
        !            40: .B unsigned char map[256];
        !            41: .PP
        !            42: .B "re_reexec(pat, b, e, match)"
        !            43: .B re_re *pat;
        !            44: .B char *b, *e, *match[10][2];
        !            45: .PP
        !            46: .B void re_refree(pat);
        !            47: .B re_re *pat;
        !            48: .PP
        !            49: .B void re_error(str);
        !            50: .B char *str;
        !            51: .1C
        !            52: .fi
        !            53: .SH DESCRIPTION
        !            54: These routines search for patterns in strings.
        !            55: The
        !            56: .I re_re
        !            57: routines search for general regular expressions (defined below)
        !            58: using a lazily evaluated deterministic finite automaton.
        !            59: The more specialized and faster
        !            60: .I re_cw
        !            61: routines search for multiple literal strings
        !            62: using the Commentz-Walter algorithm.
        !            63: The still more specialized and efficient
        !            64: .I re_bm
        !            65: routines search for a single string using the Boyer-Moore algorithm.
        !            66: The routines handle strings designated by pointers to
        !            67: the first character of the string
        !            68: and to the character following the string.
        !            69: .PP
        !            70: To use the
        !            71: .I re_bm
        !            72: routines, first build a recognizer by calling
        !            73: .I re_bmcomp,
        !            74: which takes the search string and a character map;
        !            75: all characters are compared after mapping.
        !            76: Typically,
        !            77: .I map
        !            78: is initialized by a loop similar to
        !            79: .EE
        !            80: for(i = 0; i < 256; i++) map[i] = i;
        !            81: .EX
        !            82: and its value is no longer required after the call to
        !            83: .I re_bmcomp.
        !            84: The recognizer can be run (multiple times) by calling
        !            85: .I re_bmexec,
        !            86: which stops and returns the first non-positive return from either
        !            87: .I rdfn
        !            88: or
        !            89: .IR matchfn .
        !            90: The recognizer calls the supplied function
        !            91: .I rdfn
        !            92: to obtain input and
        !            93: .I matchfn
        !            94: to report text matching the search string.
        !            95: .PP
        !            96: .I Rdfn
        !            97: should be declared as
        !            98: .IP
        !            99: .EX
        !           100: int rdfn(pb, pe)
        !           101: char **pb, **pe;
        !           102: .EE
        !           103: .LP
        !           104: where
        !           105: .B *pb
        !           106: and
        !           107: .B *pe
        !           108: delimit an as yet unprocessed text fragment
        !           109: (none if
        !           110: .LR *pb==*pe )
        !           111: to be saved across the call to
        !           112: .IR rdfn .
        !           113: On return,
        !           114: .B *pb
        !           115: and
        !           116: .B *pe
        !           117: point to the new text, including the saved fragment.
        !           118: .I Rdfn
        !           119: returns 0 for EOF, negative for error, and positive otherwise.
        !           120: The first call to
        !           121: .I rdfn
        !           122: from each invocation of
        !           123: .I re_bmexec
        !           124: has
        !           125: .BR *pb==0 .
        !           126: .PP
        !           127: .I Matchfn
        !           128: should be declared as
        !           129: .IP
        !           130: .EX
        !           131: int matchfn(pb, pe)
        !           132: char **pb, **pe;
        !           133: .EE
        !           134: .LP
        !           135: where
        !           136: .B *pb
        !           137: and
        !           138: .B *pe
        !           139: delimit the matched text.
        !           140: .I Matchfn
        !           141: sets
        !           142: .BR *pb ,
        !           143: .BR *pe ,
        !           144: and returns a value in the same way as
        !           145: .I rdfn.
        !           146: .PP
        !           147: To use the
        !           148: .I re_cw
        !           149: routines, first build the recognizer by calling
        !           150: .IR re_cwinit ,
        !           151: then
        !           152: .I re_cwadd
        !           153: for each string, and finally
        !           154: .IR re_cwcomp .
        !           155: The recognizer is run by
        !           156: .I re_cwexec
        !           157: analogously to
        !           158: .IR re_bmexec .
        !           159: .PP
        !           160: A full regular expression recognizer is compiled by
        !           161: .I re_recomp
        !           162: and executed by
        !           163: .I re_reexec,
        !           164: which returns 1 if there was a match and 0 if there wasn't.
        !           165: The strings that match subexpressions are returned in array
        !           166: .I match
        !           167: using the above convention.
        !           168: .L match[0]
        !           169: refers to the whole matched expression.
        !           170: If
        !           171: .I match
        !           172: is zero, then no match delimiters are set.
        !           173: .PP
        !           174: The routine
        !           175: .I re_error
        !           176: prints its argument on standard error and exits.
        !           177: You may supply your own version for specialized error handling.
        !           178: If
        !           179: .I re_error
        !           180: returns rather than exits, the compiling routines (e.g.
        !           181: .IR re_bmcomp )
        !           182: will return 0.
        !           183: .PP
        !           184: The recognizers that these routines construct occupy storage
        !           185: obtained from
        !           186: .IR malloc (3).
        !           187: The storage can be deallocated by
        !           188: .I re_refree.
        !           189: .SS Regular Expressions
        !           190: The syntax for a regular expression
        !           191: .B e0
        !           192: is
        !           193: .EX
        !           194: e3:  literal | charclass | '.' | '^' | '$' | '\e'\fIn\fP | '(' e0 ')'
        !           195: 
        !           196: e2:  e3
        !           197:   |  e2 REP
        !           198: REP: '*' | '+' | '?' | '\e{' RANGE '\e}'
        !           199: RANGE: int | int ',' | int ',' int
        !           200: 
        !           201: e1:  e2
        !           202:   |  e1 e2
        !           203: 
        !           204: e0:  e1
        !           205:   |  e0 ALT e1
        !           206: ALT: '|' | newline
        !           207: .EE
        !           208: .PP
        !           209: A literal is any non-metacharacter or a metacharacter
        !           210: (one of
        !           211: .BR .*+?[]()|\e^$ )
        !           212: preceded by 
        !           213: .LR \e .
        !           214: .PP
        !           215: A charclass is a nonempty string
        !           216: .I s
        !           217: bracketed
        !           218: .BI [ \|s\| ]
        !           219: (or
        !           220: .BI [^ s\| ]\fR);
        !           221: it matches any character in (or not in)
        !           222: .I s.
        !           223: In 
        !           224: .I s,
        !           225: the metacharacters other than
        !           226: .L ]
        !           227: have no special meaning, and
        !           228: .L ]
        !           229: may only appear as
        !           230: the first letter.
        !           231: A substring 
        !           232: .IB a - b ,
        !           233: with
        !           234: .I a
        !           235: and
        !           236: .I b
        !           237: in ascending
        !           238: .SM ASCII 
        !           239: order, stands for the inclusive
        !           240: range of
        !           241: .SM ASCII 
        !           242: characters between
        !           243: .I a
        !           244: and
        !           245: .IR b .
        !           246: .PP
        !           247: A
        !           248: .L \e
        !           249: followed by a digit 
        !           250: .I n
        !           251: matches a copy of the string that the
        !           252: parenthesized subexpression beginning with the
        !           253: .IR n th
        !           254: .LR ( ,
        !           255: counting from 1, matched.
        !           256: .PP
        !           257: A 
        !           258: .L .
        !           259: matches any character.
        !           260: .PP
        !           261: A
        !           262: .L ^
        !           263: matches the beginning of the input string;
        !           264: .L $
        !           265: matches the end.
        !           266: .PP
        !           267: The 
        !           268: .B REP
        !           269: operators match zero or more
        !           270: .RB ( * ),
        !           271: one or more
        !           272: .RB ( + ),
        !           273: zero or one
        !           274: .RB ( ? ),
        !           275: exactly
        !           276: .I m
        !           277: .BI \f1(\fP\e{ m \e}\f1),\fP
        !           278: .I m
        !           279: or more
        !           280: .BI \f1(\fP\e{ m ,\e}\f1),\fP
        !           281: and any number between
        !           282: .I m
        !           283: and
        !           284: .I n
        !           285: inclusive
        !           286: .BI \f1(\fP\e{ m , n \e}\f1),\fP
        !           287: instances respectively of the preceding regular expression 
        !           288: .BR e2 .
        !           289: .PP
        !           290: A concatenated regular expression,
        !           291: .BR "e1 e2" ,
        !           292: matches a match to 
        !           293: .B e1
        !           294: followed by a match to
        !           295: .BR e2 .
        !           296: .PP
        !           297: An alternative regular expression,
        !           298: .BR "e0 ALT e1" ,
        !           299: matches either a match to
        !           300: .B e0
        !           301: or a match to
        !           302: .BR e1 .
        !           303: .PP
        !           304: A match to any part of a regular expression
        !           305: extends as far as possible without preventing
        !           306: a match to the remainder of the regular expression.
        !           307: .SH SEE ALSO
        !           308: .IR regexp (3),
        !           309: .IR gre (1)
        !           310: .SH DIAGNOSTICS
        !           311: Routines that return pointers return 0 on error.
        !           312: .SH BUGS
        !           313: Between 
        !           314: .IR re (3)
        !           315: and
        !           316: .IR regexp (3)
        !           317: there are too many routines.

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.