|
|
1.1 root 1: /*-
2: * Copyright (c) 1990 The Regents of the University of California.
3: * All rights reserved.
4: *
5: * This code is derived from software contributed to Berkeley by
6: * Vern Paxson.
7: *
8: * The United States Government has rights in this work pursuant
9: * to contract no. DE-AC03-76SF00098 between the United States
10: * Department of Energy and the University of California.
11: *
12: * Redistribution and use in source and binary forms are permitted provided
13: * that: (1) source distributions retain this entire copyright notice and
14: * comment, and (2) distributions including binaries display the following
15: * acknowledgement: ``This product includes software developed by the
16: * University of California, Berkeley and its contributors'' in the
17: * documentation or other materials provided with the distribution and in
18: * all advertising materials mentioning features or use of this software.
19: * Neither the name of the University nor the names of its contributors may
20: * be used to endorse or promote products derived from this software without
21: * specific prior written permission.
22: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
23: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
24: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25: */
26:
27: #ifndef lint
28: char copyright[] =
29: "@(#) Copyright (c) 1990 The Regents of the University of California.\n\
30: All rights reserved.\n";
31: #endif /* not lint */
32:
33: #ifndef lint
34: static char sccsid[] = "@(#)main.c 5.3 (Berkeley) 6/18/90";
35: #endif /* not lint */
36:
37: /* flex - tool to generate fast lexical analyzers */
38:
39: #include "flexdef.h"
40: #include "pathnames.h"
41:
42: static char flex_version[] = "2.3";
43:
44:
45: /* declare functions that have forward references */
46:
47: void flexinit PROTO((int, char**));
48: void readin PROTO(());
49: void set_up_initial_allocations PROTO(());
50:
51:
52: /* these globals are all defined and commented in flexdef.h */
53: int printstats, syntaxerror, eofseen, ddebug, trace, spprdflt;
54: int interactive, caseins, useecs, fulltbl, usemecs;
55: int fullspd, gen_line_dirs, performance_report, backtrack_report, csize;
56: int yymore_used, reject, real_reject, continued_action;
57: int yymore_really_used, reject_really_used;
58: int datapos, dataline, linenum;
59: FILE *skelfile = NULL;
60: char *infilename = NULL;
61: int onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE];
62: int onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp;
63: int current_mns, num_rules, current_max_rules, lastnfa;
64: int *firstst, *lastst, *finalst, *transchar, *trans1, *trans2;
65: int *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum;
66: int current_state_type;
67: int variable_trailing_context_rules;
68: int numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP];
69: int protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE];
70: int numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1];
71: int tecbck[CSIZE + 1];
72: int *xlation = (int *) 0;
73: int num_xlations;
74: int lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc;
75: char **scname;
76: int current_max_dfa_size, current_max_xpairs;
77: int current_max_template_xpairs, current_max_dfas;
78: int lastdfa, *nxt, *chk, *tnxt;
79: int *base, *def, *nultrans, NUL_ec, tblend, firstfree, **dss, *dfasiz;
80: union dfaacc_union *dfaacc;
81: int *accsiz, *dhash, numas;
82: int numsnpairs, jambase, jamstate;
83: int lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse;
84: int current_max_ccl_tbl_size;
85: Char *ccltbl;
86: char *starttime, *endtime, nmstr[MAXLINE];
87: int sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs;
88: int tmpuses, totnst, peakpairs, numuniq, numdup, hshsave;
89: int num_backtracking, bol_needed;
90: FILE *temp_action_file;
91: FILE *backtrack_file;
92: int end_of_buffer_state;
93: char *action_file_name = NULL;
94: char **input_files;
95: int num_input_files;
96: char *program_name;
97:
98: #ifndef SHORT_FILE_NAMES
99: static char *outfile = "lex.yy.c";
100: #else
101: static char *outfile = "lexyy.c";
102: #endif
103: static int outfile_created = 0;
104: static int use_stdout;
105: static char *skelname = NULL;
106:
107:
108: int main( argc, argv )
109: int argc;
110: char **argv;
111:
112: {
113: flexinit( argc, argv );
114:
115: readin();
116:
117: if ( syntaxerror )
118: flexend( 1 );
119:
120: if ( yymore_really_used == REALLY_USED )
121: yymore_used = true;
122: else if ( yymore_really_used == REALLY_NOT_USED )
123: yymore_used = false;
124:
125: if ( reject_really_used == REALLY_USED )
126: reject = true;
127: else if ( reject_really_used == REALLY_NOT_USED )
128: reject = false;
129:
130: if ( performance_report )
131: {
132: if ( interactive )
133: fprintf( stderr,
134: "-I (interactive) entails a minor performance penalty\n" );
135:
136: if ( yymore_used )
137: fprintf( stderr, "yymore() entails a minor performance penalty\n" );
138:
139: if ( reject )
140: fprintf( stderr, "REJECT entails a large performance penalty\n" );
141:
142: if ( variable_trailing_context_rules )
143: fprintf( stderr,
144: "Variable trailing context rules entail a large performance penalty\n" );
145: }
146:
147: if ( reject )
148: real_reject = true;
149:
150: if ( variable_trailing_context_rules )
151: reject = true;
152:
153: if ( (fulltbl || fullspd) && reject )
154: {
155: if ( real_reject )
156: flexerror( "REJECT cannot be used with -f or -F" );
157: else
158: flexerror(
159: "variable trailing context rules cannot be used with -f or -F" );
160: }
161:
162: ntod();
163:
164: /* generate the C state transition tables from the DFA */
165: make_tables();
166:
167: /* note, flexend does not return. It exits with its argument as status. */
168:
169: flexend( 0 );
170:
171: /*NOTREACHED*/
172: }
173:
174:
175: /* flexend - terminate flex
176: *
177: * synopsis
178: * int status;
179: * flexend( status );
180: *
181: * status is exit status.
182: *
183: * note
184: * This routine does not return.
185: */
186:
187: void flexend( status )
188: int status;
189:
190: {
191: int tblsiz;
192: char *flex_gettime();
193:
194: if ( skelfile != NULL )
195: {
196: if ( ferror( skelfile ) )
197: flexfatal( "error occurred when writing skeleton file" );
198:
199: else if ( fclose( skelfile ) )
200: flexfatal( "error occurred when closing skeleton file" );
201: }
202:
203: if ( temp_action_file )
204: {
205: if ( ferror( temp_action_file ) )
206: flexfatal( "error occurred when writing temporary action file" );
207:
208: else if ( fclose( temp_action_file ) )
209: flexfatal( "error occurred when closing temporary action file" );
210:
211: else if ( unlink( action_file_name ) )
212: flexfatal( "error occurred when deleting temporary action file" );
213: }
214:
215: if ( status != 0 && outfile_created )
216: {
217: if ( ferror( stdout ) )
218: flexfatal( "error occurred when writing output file" );
219:
220: else if ( fclose( stdout ) )
221: flexfatal( "error occurred when closing output file" );
222:
223: else if ( unlink( outfile ) )
224: flexfatal( "error occurred when deleting output file" );
225: }
226:
227: if ( backtrack_report && backtrack_file )
228: {
229: if ( num_backtracking == 0 )
230: fprintf( backtrack_file, "No backtracking.\n" );
231: else if ( fullspd || fulltbl )
232: fprintf( backtrack_file,
233: "%d backtracking (non-accepting) states.\n",
234: num_backtracking );
235: else
236: fprintf( backtrack_file, "Compressed tables always backtrack.\n" );
237:
238: if ( ferror( backtrack_file ) )
239: flexfatal( "error occurred when writing backtracking file" );
240:
241: else if ( fclose( backtrack_file ) )
242: flexfatal( "error occurred when closing backtracking file" );
243: }
244:
245: if ( printstats )
246: {
247: endtime = flex_gettime();
248:
249: fprintf( stderr, "%s version %s usage statistics:\n", program_name,
250: flex_version );
251: fprintf( stderr, " started at %s, finished at %s\n",
252: starttime, endtime );
253:
254: fprintf( stderr, " scanner options: -" );
255:
256: if ( backtrack_report )
257: putc( 'b', stderr );
258: if ( ddebug )
259: putc( 'd', stderr );
260: if ( interactive )
261: putc( 'I', stderr );
262: if ( caseins )
263: putc( 'i', stderr );
264: if ( ! gen_line_dirs )
265: putc( 'L', stderr );
266: if ( performance_report )
267: putc( 'p', stderr );
268: if ( spprdflt )
269: putc( 's', stderr );
270: if ( use_stdout )
271: putc( 't', stderr );
272: if ( trace )
273: putc( 'T', stderr );
274: if ( printstats )
275: putc( 'v', stderr ); /* always true! */
276: if ( csize == 256 )
277: putc( '8', stderr );
278:
279: fprintf( stderr, " -C" );
280:
281: if ( fulltbl )
282: putc( 'f', stderr );
283: if ( fullspd )
284: putc( 'F', stderr );
285: if ( useecs )
286: putc( 'e', stderr );
287: if ( usemecs )
288: putc( 'm', stderr );
289:
290: if ( strcmp( skelname, _PATH_SKELETONFILE ) )
291: fprintf( stderr, " -S%s", skelname );
292:
293: putc( '\n', stderr );
294:
295: fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns );
296: fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa,
297: current_max_dfas, totnst );
298: fprintf( stderr,
299: " %d rules\n", num_rules - 1 /* - 1 for def. rule */ );
300:
301: if ( num_backtracking == 0 )
302: fprintf( stderr, " No backtracking\n" );
303: else if ( fullspd || fulltbl )
304: fprintf( stderr, " %d backtracking (non-accepting) states\n",
305: num_backtracking );
306: else
307: fprintf( stderr, " compressed tables always backtrack\n" );
308:
309: if ( bol_needed )
310: fprintf( stderr, " Beginning-of-line patterns used\n" );
311:
312: fprintf( stderr, " %d/%d start conditions\n", lastsc,
313: current_max_scs );
314: fprintf( stderr, " %d epsilon states, %d double epsilon states\n",
315: numeps, eps2 );
316:
317: if ( lastccl == 0 )
318: fprintf( stderr, " no character classes\n" );
319: else
320: fprintf( stderr,
321: " %d/%d character classes needed %d/%d words of storage, %d reused\n",
322: lastccl, current_maxccls,
323: cclmap[lastccl] + ccllen[lastccl],
324: current_max_ccl_tbl_size, cclreuse );
325:
326: fprintf( stderr, " %d state/nextstate pairs created\n", numsnpairs );
327: fprintf( stderr, " %d/%d unique/duplicate transitions\n",
328: numuniq, numdup );
329:
330: if ( fulltbl )
331: {
332: tblsiz = lastdfa * numecs;
333: fprintf( stderr, " %d table entries\n", tblsiz );
334: }
335:
336: else
337: {
338: tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend;
339:
340: fprintf( stderr, " %d/%d base-def entries created\n",
341: lastdfa + numtemps, current_max_dfas );
342: fprintf( stderr, " %d/%d (peak %d) nxt-chk entries created\n",
343: tblend, current_max_xpairs, peakpairs );
344: fprintf( stderr,
345: " %d/%d (peak %d) template nxt-chk entries created\n",
346: numtemps * nummecs, current_max_template_xpairs,
347: numtemps * numecs );
348: fprintf( stderr, " %d empty table entries\n", nummt );
349: fprintf( stderr, " %d protos created\n", numprots );
350: fprintf( stderr, " %d templates created, %d uses\n",
351: numtemps, tmpuses );
352: }
353:
354: if ( useecs )
355: {
356: tblsiz = tblsiz + csize;
357: fprintf( stderr, " %d/%d equivalence classes created\n",
358: numecs, csize );
359: }
360:
361: if ( usemecs )
362: {
363: tblsiz = tblsiz + numecs;
364: fprintf( stderr, " %d/%d meta-equivalence classes created\n",
365: nummecs, csize );
366: }
367:
368: fprintf( stderr, " %d (%d saved) hash collisions, %d DFAs equal\n",
369: hshcol, hshsave, dfaeql );
370: fprintf( stderr, " %d sets of reallocations needed\n", num_reallocs );
371: fprintf( stderr, " %d total table entries needed\n", tblsiz );
372: }
373:
374: #ifndef VMS
375: exit( status );
376: #else
377: exit( status + 1 );
378: #endif
379: }
380:
381:
382: /* flexinit - initialize flex
383: *
384: * synopsis
385: * int argc;
386: * char **argv;
387: * flexinit( argc, argv );
388: */
389:
390: void flexinit( argc, argv )
391: int argc;
392: char **argv;
393:
394: {
395: int i, sawcmpflag;
396: char *arg, *flex_gettime(), *mktemp();
397:
398: printstats = syntaxerror = trace = spprdflt = interactive = caseins = false;
399: backtrack_report = performance_report = ddebug = fulltbl = fullspd = false;
400: yymore_used = continued_action = reject = false;
401: yymore_really_used = reject_really_used = false;
402: gen_line_dirs = usemecs = useecs = true;
403:
404: sawcmpflag = false;
405: use_stdout = false;
406:
407: csize = DEFAULT_CSIZE;
408:
409: program_name = argv[0];
410:
411: /* read flags */
412: for ( --argc, ++argv; argc ; --argc, ++argv )
413: {
414: if ( argv[0][0] != '-' || argv[0][1] == '\0' )
415: break;
416:
417: arg = argv[0];
418:
419: for ( i = 1; arg[i] != '\0'; ++i )
420: switch ( arg[i] )
421: {
422: case 'b':
423: backtrack_report = true;
424: break;
425:
426: case 'c':
427: fprintf( stderr,
428: "%s: Assuming use of deprecated -c flag is really intended to be -C\n",
429: program_name );
430:
431: /* fall through */
432:
433: case 'C':
434: if ( i != 1 )
435: flexerror( "-C flag must be given separately" );
436:
437: if ( ! sawcmpflag )
438: {
439: useecs = false;
440: usemecs = false;
441: fulltbl = false;
442: sawcmpflag = true;
443: }
444:
445: for ( ++i; arg[i] != '\0'; ++i )
446: switch ( arg[i] )
447: {
448: case 'e':
449: useecs = true;
450: break;
451:
452: case 'F':
453: fullspd = true;
454: break;
455:
456: case 'f':
457: fulltbl = true;
458: break;
459:
460: case 'm':
461: usemecs = true;
462: break;
463:
464: default:
465: lerrif( "unknown -C option '%c'",
466: (int) arg[i] );
467: break;
468: }
469:
470: goto get_next_arg;
471:
472: case 'd':
473: ddebug = true;
474: break;
475:
476: case 'f':
477: useecs = usemecs = false;
478: fulltbl = true;
479: break;
480:
481: case 'F':
482: useecs = usemecs = false;
483: fullspd = true;
484: break;
485:
486: case 'I':
487: interactive = true;
488: break;
489:
490: case 'i':
491: caseins = true;
492: break;
493:
494: case 'L':
495: gen_line_dirs = false;
496: break;
497:
498: case 'n':
499: /* stupid do-nothing deprecated option */
500: break;
501:
502: case 'p':
503: performance_report = true;
504: break;
505:
506: case 'S':
507: if ( i != 1 )
508: flexerror( "-S flag must be given separately" );
509:
510: skelname = arg + i + 1;
511: goto get_next_arg;
512:
513: case 's':
514: spprdflt = true;
515: break;
516:
517: case 't':
518: use_stdout = true;
519: break;
520:
521: case 'T':
522: trace = true;
523: break;
524:
525: case 'v':
526: printstats = true;
527: break;
528:
529: case '8':
530: csize = CSIZE;
531: break;
532:
533: default:
534: lerrif( "unknown flag '%c'", (int) arg[i] );
535: break;
536: }
537:
538: get_next_arg: /* used by -C and -S flags in lieu of a "continue 2" control */
539: ;
540: }
541:
542: if ( (fulltbl || fullspd) && usemecs )
543: flexerror( "full table and -Cm don't make sense together" );
544:
545: if ( (fulltbl || fullspd) && interactive )
546: flexerror( "full table and -I are (currently) incompatible" );
547:
548: if ( fulltbl && fullspd )
549: flexerror( "full table and -F are mutually exclusive" );
550:
551: if ( ! skelname )
552: {
553: static char skeleton_name_storage[400];
554:
555: skelname = skeleton_name_storage;
556: (void) strcpy( skelname, _PATH_SKELETONFILE );
557: }
558:
559: if ( ! use_stdout )
560: {
561: FILE *prev_stdout = freopen( outfile, "w", stdout );
562:
563: if ( prev_stdout == NULL )
564: lerrsf( "could not create %s", outfile );
565:
566: outfile_created = 1;
567: }
568:
569: num_input_files = argc;
570: input_files = argv;
571: set_input_file( num_input_files > 0 ? input_files[0] : NULL );
572:
573: if ( backtrack_report )
574: {
575: #ifndef SHORT_FILE_NAMES
576: backtrack_file = fopen( "lex.backtrack", "w" );
577: #else
578: backtrack_file = fopen( "lex.bck", "w" );
579: #endif
580:
581: if ( backtrack_file == NULL )
582: flexerror( "could not create lex.backtrack" );
583: }
584:
585: else
586: backtrack_file = NULL;
587:
588:
589: lastccl = 0;
590: lastsc = 0;
591:
592: /* initialize the statistics */
593: starttime = flex_gettime();
594:
595: if ( (skelfile = fopen( skelname, "r" )) == NULL )
596: lerrsf( "can't open skeleton file %s", skelname );
597:
598: #ifdef SYS_V
599: action_file_name = tmpnam( NULL );
600: #endif
601:
602: if ( action_file_name == NULL )
603: {
604: static char temp_action_file_name[32];
605:
606: #ifndef SHORT_FILE_NAMES
607: (void) strcpy( temp_action_file_name, "/tmp/flexXXXXXX" );
608: #else
609: (void) strcpy( temp_action_file_name, "flexXXXXXX.tmp" );
610: #endif
611: (void) mktemp( temp_action_file_name );
612:
613: action_file_name = temp_action_file_name;
614: }
615:
616: if ( (temp_action_file = fopen( action_file_name, "w" )) == NULL )
617: lerrsf( "can't open temporary action file %s", action_file_name );
618:
619: lastdfa = lastnfa = num_rules = numas = numsnpairs = tmpuses = 0;
620: numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0;
621: numuniq = numdup = hshsave = eofseen = datapos = dataline = 0;
622: num_backtracking = onesp = numprots = 0;
623: variable_trailing_context_rules = bol_needed = false;
624:
625: linenum = sectnum = 1;
626: firstprot = NIL;
627:
628: /* used in mkprot() so that the first proto goes in slot 1
629: * of the proto queue
630: */
631: lastprot = 1;
632:
633: if ( useecs )
634: { /* set up doubly-linked equivalence classes */
635: /* We loop all the way up to csize, since ecgroup[csize] is the
636: * position used for NUL characters
637: */
638: ecgroup[1] = NIL;
639:
640: for ( i = 2; i <= csize; ++i )
641: {
642: ecgroup[i] = i - 1;
643: nextecm[i - 1] = i;
644: }
645:
646: nextecm[csize] = NIL;
647: }
648:
649: else
650: { /* put everything in its own equivalence class */
651: for ( i = 1; i <= csize; ++i )
652: {
653: ecgroup[i] = i;
654: nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */
655: }
656: }
657:
658: set_up_initial_allocations();
659: }
660:
661:
662: /* readin - read in the rules section of the input file(s)
663: *
664: * synopsis
665: * readin();
666: */
667:
668: void readin()
669:
670: {
671: skelout();
672:
673: if ( ddebug )
674: puts( "#define FLEX_DEBUG" );
675:
676: if ( csize == 256 )
677: puts( "#define YY_CHAR unsigned char" );
678: else
679: puts( "#define YY_CHAR char" );
680:
681: line_directive_out( stdout );
682:
683: if ( yyparse() )
684: {
685: pinpoint_message( "fatal parse error" );
686: flexend( 1 );
687: }
688:
689: if ( xlation )
690: {
691: numecs = ecs_from_xlation( ecgroup );
692: useecs = true;
693: }
694:
695: else if ( useecs )
696: numecs = cre8ecs( nextecm, ecgroup, csize );
697:
698: else
699: numecs = csize;
700:
701: /* now map the equivalence class for NUL to its expected place */
702: ecgroup[0] = ecgroup[csize];
703: NUL_ec = abs( ecgroup[0] );
704:
705: if ( useecs )
706: ccl2ecl();
707: }
708:
709:
710:
711: /* set_up_initial_allocations - allocate memory for internal tables */
712:
713: void set_up_initial_allocations()
714:
715: {
716: current_mns = INITIAL_MNS;
717: firstst = allocate_integer_array( current_mns );
718: lastst = allocate_integer_array( current_mns );
719: finalst = allocate_integer_array( current_mns );
720: transchar = allocate_integer_array( current_mns );
721: trans1 = allocate_integer_array( current_mns );
722: trans2 = allocate_integer_array( current_mns );
723: accptnum = allocate_integer_array( current_mns );
724: assoc_rule = allocate_integer_array( current_mns );
725: state_type = allocate_integer_array( current_mns );
726:
727: current_max_rules = INITIAL_MAX_RULES;
728: rule_type = allocate_integer_array( current_max_rules );
729: rule_linenum = allocate_integer_array( current_max_rules );
730:
731: current_max_scs = INITIAL_MAX_SCS;
732: scset = allocate_integer_array( current_max_scs );
733: scbol = allocate_integer_array( current_max_scs );
734: scxclu = allocate_integer_array( current_max_scs );
735: sceof = allocate_integer_array( current_max_scs );
736: scname = allocate_char_ptr_array( current_max_scs );
737: actvsc = allocate_integer_array( current_max_scs );
738:
739: current_maxccls = INITIAL_MAX_CCLS;
740: cclmap = allocate_integer_array( current_maxccls );
741: ccllen = allocate_integer_array( current_maxccls );
742: cclng = allocate_integer_array( current_maxccls );
743:
744: current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE;
745: ccltbl = allocate_character_array( current_max_ccl_tbl_size );
746:
747: current_max_dfa_size = INITIAL_MAX_DFA_SIZE;
748:
749: current_max_xpairs = INITIAL_MAX_XPAIRS;
750: nxt = allocate_integer_array( current_max_xpairs );
751: chk = allocate_integer_array( current_max_xpairs );
752:
753: current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS;
754: tnxt = allocate_integer_array( current_max_template_xpairs );
755:
756: current_max_dfas = INITIAL_MAX_DFAS;
757: base = allocate_integer_array( current_max_dfas );
758: def = allocate_integer_array( current_max_dfas );
759: dfasiz = allocate_integer_array( current_max_dfas );
760: accsiz = allocate_integer_array( current_max_dfas );
761: dhash = allocate_integer_array( current_max_dfas );
762: dss = allocate_int_ptr_array( current_max_dfas );
763: dfaacc = allocate_dfaacc_union( current_max_dfas );
764:
765: nultrans = (int *) 0;
766: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.