|
|
1.1 root 1: /*
2: * RCS file input
3: */
4: #ifndef lint
5: static char rcsid[]= "$Id: rcslex.c,v 4.6 89/05/01 15:13:07 narten Exp $ Purdue CS";
6: #endif
7: /*********************************************************************************
8: * Lexical Analysis.
9: * Character mapping table,
10: * hashtable, Lexinit, nextlex, getlex, getkey,
11: * getid, getnum, readstring, printstring, savestring,
12: * checkid, serror, fatserror, error, faterror, warn, diagnose
13: * fflsbuf, puts, fprintf
14: * Testprogram: define LEXDB
15: *********************************************************************************
16: */
17:
18: /* Copyright (C) 1982, 1988, 1989 Walter Tichy
19: * All rights reserved.
20: *
21: * Redistribution and use in source and binary forms are permitted
22: * provided that the above copyright notice and this paragraph are
23: * duplicated in all such forms and that any documentation,
24: * advertising materials, and other materials related to such
25: * distribution and use acknowledge that the software was developed
26: * by Walter Tichy.
27: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
28: * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
29: * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
30: *
31: * Report all problems and direct all questions to:
32: * [email protected]
33: *
34:
35:
36:
37:
38:
39:
40:
41: */
42:
43:
44:
45: /* $Log: rcslex.c,v $
46: * Revision 4.6 89/05/01 15:13:07 narten
47: * changed copyright header to reflect current distribution rules
48: *
49: * Revision 4.5 88/11/08 12:00:54 narten
50: * changes from [email protected] (Paul Eggert)
51: *
52: * Revision 4.5 88/08/28 15:01:12 eggert
53: * Don't loop when writing error messages to a full filesystem.
54: * Flush stderr/stdout when mixing output.
55: * Yield exit status compatible with diff(1).
56: * Shrink stdio code size; allow cc -R; remove lint.
57: *
58: * Revision 4.4 87/12/18 11:44:47 narten
59: * fixed to use "varargs" in "fprintf"; this is required if it is to
60: * work on a SPARC machine such as a Sun-4
61: *
62: * Revision 4.3 87/10/18 10:37:18 narten
63: * Updating version numbers. Changes relative to 1.1 actually relative
64: * to version 4.1
65: *
66: * Revision 1.3 87/09/24 14:00:17 narten
67: * Sources now pass through lint (if you ignore printf/sprintf/fprintf
68: * warnings)
69: *
70: * Revision 1.2 87/03/27 14:22:33 jenkins
71: * Port to suns
72: *
73: * Revision 1.1 84/01/23 14:50:33 kcs
74: * Initial revision
75: *
76: * Revision 4.1 83/03/25 18:12:51 wft
77: * Only changed $Header to $Id.
78: *
79: * Revision 3.3 82/12/10 16:22:37 wft
80: * Improved error messages, changed exit status on error to 1.
81: *
82: * Revision 3.2 82/11/28 21:27:10 wft
83: * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h.
84: * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations
85: * properly in case there is an IO-error (e.g., file system full).
86: *
87: * Revision 3.1 82/10/11 19:43:56 wft
88: * removed unused label out:;
89: * made sure all calls to getc() return into an integer, not a char.
90: */
91:
92:
93: /*
94: #define LEXDB
95: /* version LEXDB is for testing the lexical analyzer. The testprogram
96: * reads a stream of lexemes, enters the revision numbers into the
97: * hashtable, and prints the recognized tokens. Keywords are recognized
98: * as identifiers.
99: */
100:
101:
102:
103: #include "rcsbase.h"
104: #include <varargs.h>
105:
106:
107:
108: /* character mapping table */
109: enum tokens map[] = {
110: EOFILE, /* this will end up at ctab[-1] */
111: UNKN, INSERT, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
112: UNKN, SPACE, NEWLN, UNKN, SPACE, UNKN, UNKN, UNKN,
113: UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
114: UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
115: SPACE, EXCLA, DQUOTE, HASH, DOLLAR, PERCNT, AMPER, SQUOTE,
116: LPARN, RPARN, TIMES, PLUS, COMMA, MINUS, PERIOD, DIVIDE,
117: DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT,
118: DIGIT, DIGIT, COLON, SEMI, LESS, EQUAL, GREAT, QUEST,
119: AT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
120: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
121: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
122: LETTER, LETTER, LETTER, LBRACK, BACKSL, RBRACK, UPARR, UNDER,
123: ACCENT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
124: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
125: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
126: LETTER, LETTER, LETTER, LBRACE, BAR, RBRACE, TILDE, UNKN
127: };
128:
129:
130:
131:
132: struct hshentry * nexthsh; /*pointer to next hashtable-entry, set by lookup*/
133:
134: enum tokens nexttok; /*next token, set by nextlex */
135:
136: int hshenter; /*if true, next suitable lexeme will be entered */
137: /*into the symbol table. Handle with care. */
138: int nextc; /*next input character, initialized by Lexinit */
139:
140: int eof; /*end-of-file indicator, set to >0 on end of file*/
141: int line; /*current line-number of input */
142: int nerror; /*counter for errors */
143: int nwarn; /*counter for warnings */
144: char * cmdid; /*command identification for error messages */
145: int quietflag; /*indicates quiet mode */
146: FILE * finptr; /*input file descriptor */
147:
148: FILE * frewrite; /*file descriptor for echoing input */
149:
150: int rewriteflag;/*indicates whether to echo to frewrite */
151:
152: char StringTab[strtsize]; /* string table and heap */
153:
154: char * NextString; /*pointer to next identifier in StringTab*/
155: char * Topchar; /*pointer to next free byte in StringTab*/
156: /*set by nextlex, lookup */
157: struct hshentry hshtab[hshsize]; /*hashtable */
158:
159:
160:
161:
162:
163: lookup() {
164:
165: /* Function: Looks up the character string pointed to by NextString in the
166: * hashtable. If the string is not present, a new entry for it is created.
167: * If the string is present, TopChar is moved back to save the space for
168: * the string, and NextString is set to point to the original string.
169: * In any case, the address of the corresponding hashtable entry is placed
170: * into nexthsh.
171: * Algorithm: Quadratic hash, covering all entries.
172: * Assumptions: NextString points at the first character of the string.
173: * Topchar points at the first empty byte after the string.
174: */
175:
176: register int ihash; /* index into hashtable */
177: register char * sp, * np;
178: int c, delta, final, FirstScan; /*loop control*/
179:
180: /* calculate hash code */
181: sp = NextString;
182: ihash = 0;
183: while (*sp) ihash += *sp++;
184:
185: /* set up first search loop (c=0,step=1,until (hshsiz-1)/2 */
186: c=0;delta=1;final=(hshsize-1)/2;
187: FirstScan=true; /*first loop */
188:
189: for (;;) {
190: ihash = (ihash+c)%hshsize; /*next index*/
191:
192: if (hshtab[ihash].num == nil) {
193: /*empty slot found*/
194: hshtab[ihash].num = NextString;
195: nexthsh= &hshtab[ihash];/*save hashtable address*/
196: # ifdef LEXDB
197: VOID printf("\nEntered: %s at %d ",nexthsh->num, ihash);
198: # endif
199: return;
200: }
201: /* compare strings */
202: sp=NextString;np=hshtab[ihash].num;
203: while (*sp == *np++) {
204: if (*sp == 0) {
205: /* match found */
206: nexthsh= &hshtab[ihash];
207: Topchar = NextString;
208: NextString = nexthsh->num;
209: return;
210: } else sp++;
211: }
212:
213: /* neither empty slot nor string found */
214: /* calculate next index and repeat */
215: if (c != final)
216: c += delta;
217: else {
218: if (FirstScan) {
219: /*set up second sweep*/
220: delta = -1; final = 1; FirstScan= false;
221: } else {
222: fatserror("Hashtable overflow");
223: }
224: }
225: }
226: };
227:
228:
229:
230:
231:
232:
233: Lexinit()
234: /* Function: Initialization of lexical analyzer:
235: * initializes the hastable,
236: * initializes nextc, nexttok if finptr != NULL
237: */
238: { register int c;
239:
240: for (c=hshsize-1; c>=0; c--) {
241: hshtab[c].num = nil;
242: }
243:
244: hshenter=true; eof=0; line=1; nerror=0; nwarn=0;
245: NextString=nil; Topchar = &StringTab[0];
246: if (finptr) {
247: nextc = GETC(finptr,frewrite,rewriteflag); /*initial character*/
248: nextlex(); /*initial token*/
249: } else {
250: nextc = '\0';
251: nexttok=EOFILE;
252: }
253: }
254:
255:
256:
257:
258:
259:
260:
261: nextlex()
262:
263: /* Function: Reads the next token and sets nexttok to the next token code.
264: * Only if the hshenter==true, a revision number is entered into the
265: * hashtable and a pointer to it is placed into nexthsh.
266: * This is useful for avoiding that dates are placed into the hashtable.
267: * For ID's and NUM's, NextString is set to the character string in the
268: * string table. Assumption: nextc contains the next character.
269: */
270: { register c;
271: register FILE * fin, * frew;
272: register char * sp;
273: register enum tokens d;
274:
275: if (eof) {
276: nexttok=EOFILE;
277: return;
278: }
279: fin=finptr; frew=frewrite;
280: loop:
281: switch(nexttok=ctab[nextc]) {
282:
283: case UNKN:
284: case IDCHAR:
285: case PERIOD:
286: serror("unknown Character: %c",nextc);
287: nextc=GETC(fin,frew,rewriteflag);
288: goto loop;
289:
290: case NEWLN:
291: line++;
292: # ifdef LEXDB
293: VOID putchar('\n');
294: # endif
295: /* Note: falls into next case */
296:
297: case SPACE:
298: nextc=GETC(fin,frew,rewriteflag);
299: goto loop;
300:
301: case EOFILE:
302: eof++;
303: nexttok=EOFILE;
304: return;
305:
306: case DIGIT:
307: NextString = sp = Topchar;
308: *sp++ = nextc;
309: while ((d=ctab[c=GETC(fin,frew,rewriteflag)])==DIGIT ||
310: d==PERIOD) {
311: *sp++ = c; /* 1.2. and 1.2 are different */
312: }
313: *sp++ = '\0';
314: if (sp >= StringTab+strtsize) {
315: /*may have written outside stringtable already*/
316: fatserror("Stringtable overflow");
317: }
318: Topchar = sp;
319: nextc = c;
320: if (hshenter == true)
321: lookup(); /* lookup updates NextString, Topchar*/
322: nexttok = NUM;
323: return;
324:
325:
326: case LETTER:
327: NextString = sp = Topchar;
328: *sp++ = nextc;
329: while ((d=ctab[c=GETC(fin,frew,rewriteflag)])==LETTER ||
330: d==DIGIT || d==IDCHAR) {
331: *sp++ = c;
332: }
333: *sp++ = '\0';
334: if (sp >= StringTab+strtsize) {
335: /*may have written outside stringtable already*/
336: fatserror("Stringtable overflow");
337: }
338: Topchar = sp;
339: nextc = c;
340: nexttok = ID; /* may be ID or keyword */
341: return;
342:
343: case SBEGIN: /* long string */
344: nexttok = STRING;
345: /* note: only the initial SBEGIN has been read*/
346: /* read the string, and reset nextc afterwards*/
347: return;
348:
349: default:
350: nextc=GETC(fin,frew,rewriteflag);
351: return;
352: }
353: }
354:
355:
356: int getlex(token)
357: enum tokens token;
358: /* Function: Checks if nexttok is the same as token. If so,
359: * advances the input by calling nextlex and returns true.
360: * otherwise returns false.
361: * Doesn't work for strings and keywords; loses the character string for ids.
362: */
363: {
364: if (nexttok==token) {
365: nextlex();
366: return(true);
367: } else return(false);
368: }
369:
370: int getkey (key)
371: char * key;
372: /* Function: If the current token is a keyword identical to key,
373: * getkey advances the input by calling nextlex and returns true;
374: * otherwise returns false.
375: */
376: {
377: register char *s1,*s2;
378:
379: if (nexttok==ID) {
380: s1=key; s2=NextString;
381: while(*s1 == *s2++)
382: if (*s1++ == '\0') {
383: /* match found */
384: Topchar = NextString; /*reset Topchar */
385: nextlex();
386: return(true);
387: }
388: }
389: return(false);
390: }
391:
392:
393:
394: char * getid()
395: /* Function: Checks if nexttok is an identifier. If so,
396: * advances the input by calling nextlex and returns a pointer
397: * to the identifier; otherwise returns nil.
398: * Treats keywords as identifiers.
399: */
400: {
401: register char * name;
402: if (nexttok==ID) {
403: name = NextString;
404: nextlex();
405: return name;
406: } else return nil;
407: }
408:
409:
410: struct hshentry * getnum()
411: /* Function: Checks if nexttok is a number. If so,
412: * advances the input by calling nextlex and returns a pointer
413: * to the hashtable entry. Otherwise returns nil.
414: * Doesn't work if hshenter is false.
415: */
416: {
417: register struct hshentry * num;
418: if (nexttok==NUM) {
419: num=nexthsh;
420: nextlex();
421: return num;
422: } else return nil;
423: }
424:
425:
426: readstring()
427: /* skip over characters until terminating single SDELIM */
428: /* if rewriteflag==true, copy every character read to frewrite.*/
429: /* Does not advance nextlex at the end. */
430: { register c;
431: register FILE * fin, * frew;
432: fin=finptr; frew=frewrite;
433: if (rewriteflag) {
434: /* copy string verbatim to frewrite */
435: while ((c=getc(fin)) != EOF) {
436: VOID putc(c,frew);
437: if (c==SDELIM) {
438: if ((c=getc(fin)) == EOF || putc(c,frew) != SDELIM) {
439: /* end of string */
440: nextc=c;
441: return;
442: }
443: }
444: }
445: } else {
446: /* skip string */
447: while ((c=getc(fin)) != EOF) {
448: if (c==SDELIM) {
449: if ((c=getc(fin)) != SDELIM) {
450: /* end of string */
451: nextc=c;
452: return;
453: }
454: }
455: }
456: }
457: nextc = c;
458: error("Unterminated string");
459: }
460:
461:
462: printstring()
463: /* Function: copy a string to stdout, until terminated with a single SDELIM.
464: * Does not advance nextlex at the end.
465: */
466: {
467: register c;
468: register FILE * fin;
469: fin=finptr;
470: while ((c=getc(fin)) != EOF) {
471: if (c==SDELIM) {
472: if ((c=getc(fin)) != SDELIM) {
473: /* end of string */
474: nextc=c;
475: return;
476: }
477: }
478: VOID putchar(c);
479: }
480: nextc = c;
481: error("Unterminated string");
482: }
483:
484:
485:
486: savestring(target,length)
487: char * target; int length;
488: /* copies a string terminated with SDELIM from file finptr to buffer target,
489: * but not more than length bytes. If the string is longer than length,
490: * the extra characters are skipped. The string may be empty, in which
491: * case a '\0' is placed into target.
492: * Double SDELIM is replaced with SDELIM.
493: * If rewriteflag==true, the string is also copied unchanged to frewrite.
494: * Returns the length of the saved string.
495: * Does not advance nextlex at the end.
496: */
497: {
498: register c;
499: register FILE * fin, * frew;
500: register char * tp, * max;
501:
502: fin=finptr; frew=frewrite;
503: tp=target; max= target+length; /*max is one too large*/
504: while ((c=GETC(fin,frew,rewriteflag))!=EOF) {
505: *tp++ =c;
506: if (c== SDELIM) {
507: if ((c=GETC(fin,frew,rewriteflag))!=SDELIM) {
508: /* end of string */
509: *(tp-1)='\0';
510: nextc=c;
511: return;
512: }
513: }
514: if (tp >= max) {
515: /* overflow */
516: error("string buffer overflow -- truncating string");
517: target[length-1]='\0';
518: /* skip rest of string */
519: while ((c=GETC(fin,frew,rewriteflag))!=EOF) {
520: if ((c==SDELIM) && ((c=GETC(fin,frew,rewriteflag))!=SDELIM)) {
521: /* end of string */
522: nextc=c;
523: return;
524: }
525: }
526: nextc = c;
527: error("Can't find %c to terminate string before end of file",SDELIM);
528: return;
529: }
530: }
531: nextc = c;
532: error("Can't find %c to terminate string before end of file",SDELIM);
533: }
534:
535:
536: char *checkid(id, delim)
537: char *id, delim;
538: /* Function: check whether the string starting at id is an */
539: /* identifier and return a pointer to the last char*/
540: /* of the identifer. White space, delim and '\0' */
541: /* are legal delimeters. Aborts the program if not */
542: /* a legal identifier. Useful for checking commands*/
543: {
544: register enum tokens d;
545: register char *temp;
546: register char c,tc;
547:
548: temp = id;
549: if ( ctab[*id] == LETTER ) {
550: while( (d=ctab[c=(*++id)]) == LETTER || d==DIGIT || d==IDCHAR) ;
551: if ( c!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) {
552: /* append \0 to end of id before error message */
553: tc = c;
554: while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
555: *id = '\0';
556: faterror("Invalid character %c in identifier %s",tc,temp);
557: return nil ;
558: } else
559: return id;
560: } else {
561: /* append \0 to end of id before error message */
562: while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
563: *id = '\0';
564: faterror("Identifier %s does not start with letter",temp);
565: return nil;
566: }
567: }
568:
569: writeerror()
570: {
571: static looping;
572: if (looping)
573: exit(2);
574: looping = 1;
575: faterror("write error");
576: }
577:
578: nlflush(iop)
579: register FILE * iop;
580: {
581: if (putc('\n',iop)==EOF || fflush(iop)==EOF)
582: writeerror();
583: }
584:
585:
586: /*VARARGS1*/
587: serror(e,e1,e2,e3,e4,e5)
588: char * e, * e1, * e2, * e3, * e4, * e5;
589: /* non-fatal syntax error */
590: { nerror++;
591: VOID fprintf(stderr,"%s error, line %d: ", cmdid, line);
592: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
593: nlflush(stderr);
594: }
595:
596: /*VARARGS1*/
597: error(e,e1,e2,e3,e4,e5)
598: char * e, * e1, * e2, * e3, * e4, * e5;
599: /* non-fatal error */
600: { nerror++;
601: VOID fprintf(stderr,"%s error: ",cmdid);
602: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
603: nlflush(stderr);
604: }
605:
606: /*VARARGS1*/
607: fatserror(e,e1,e2,e3,e4,e5)
608: char * e, * e1, * e2, * e3, * e4, * e5;
609: /* fatal syntax error */
610: { nerror++;
611: VOID fprintf(stderr,"%s error, line %d: ", cmdid,line);
612: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
613: VOID fprintf(stderr,"\n%s aborted\n",cmdid);
614: VOID cleanup();
615: exit(2);
616: }
617:
618: /*VARARGS1*/
619: faterror(e,e1,e2,e3,e4,e5)
620: char * e, * e1, * e2, * e3, * e4, * e5;
621: /* fatal error, terminates program after cleanup */
622: { nerror++;
623: VOID fprintf(stderr,"%s error: ",cmdid);
624: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
625: VOID fprintf(stderr,"\n%s aborted\n",cmdid);
626: VOID cleanup();
627: exit(2);
628: }
629:
630: /*VARARGS1*/
631: warn(e,e1,e2,e3,e4,e5)
632: char * e, * e1, * e2, * e3, * e4, * e5;
633: /* prints a warning message */
634: { nwarn++;
635: VOID fprintf(stderr,"%s warning: ",cmdid);
636: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
637: nlflush(stderr);
638: }
639:
640:
641: /*VARARGS1*/
642: diagnose(e,e1,e2,e3,e4,e5)
643: char * e, * e1, * e2, * e3, * e4, * e5;
644: /* prints a diagnostic message */
645: {
646: if (!quietflag) {
647: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
648: nlflush(stderr);
649: }
650: }
651:
652:
653:
654: fflsbuf(c, iop)
655: unsigned c; register FILE * iop;
656: /* Function: Flush iop.
657: * Same routine as _flsbuf in stdio, but aborts program on error.
658: */
659: { register result;
660: if ((result=_flsbuf(c,iop))==EOF)
661: writeerror();
662: return result;
663: }
664:
665:
666: fputs(s, iop)
667: register char *s;
668: register FILE *iop;
669: /* Function: Put string s on file iop, abort on error.
670: * Same as puts in stdio, but with different putc macro.
671: */
672: {
673: register r;
674: register c;
675:
676: while (c = *s++)
677: r = putc(c, iop);
678: return(r);
679: }
680:
681:
682:
683: fprintf(iop, fmt, va_alist)
684: FILE *iop;
685: char *fmt;
686: va_dcl
687: /* Function: formatted output. Same as fprintf in stdio,
688: * but aborts program on error
689: */
690: {
691: register int value;
692: va_list ap;
693:
694: va_start(ap);
695: #ifdef VFPRINTF
696: VOID vfprintf(iop, fmt, ap);
697: #else
698: _doprnt(fmt, ap, iop);
699: #endif
700: if (ferror(iop)) {
701: writeerror();
702: value = EOF;
703: } else value = 0;
704: va_end(ap);
705: return value;
706: }
707:
708:
709:
710: #ifdef LEXDB
711: /* test program reading a stream of lexems and printing the tokens.
712: */
713:
714:
715:
716: main(argc,argv)
717: int argc; char * argv[];
718: {
719: cmdid="lextest";
720: if (argc<2) {
721: VOID fputs("No input file\n",stderr);
722: exit(1);
723: }
724: if ((finptr=fopen(argv[1], "r")) == NULL) {
725: faterror("Can't open input file %s\n",argv[1]);
726: }
727: Lexinit();
728: rewriteflag=false;
729: while (nexttok != EOFILE) {
730: switch (nexttok) {
731:
732: case ID:
733: VOID printf("ID: %s",NextString);
734: break;
735:
736: case NUM:
737: if (hshenter==true)
738: VOID printf("NUM: %s, index: %d",nexthsh->num, nexthsh-hshtab);
739: else
740: VOID printf("NUM, unentered: %s",NextString);
741: hshenter = !hshenter; /*alternate between dates and numbers*/
742: break;
743:
744: case COLON:
745: VOID printf("COLON"); break;
746:
747: case SEMI:
748: VOID printf("SEMI"); break;
749:
750: case STRING:
751: readstring();
752: VOID printf("STRING"); break;
753:
754: case UNKN:
755: VOID printf("UNKN"); break;
756:
757: default:
758: VOID printf("DEFAULT"); break;
759: }
760: VOID printf(" | ");
761: nextlex();
762: }
763: VOID printf("\nEnd of lexical analyzer test\n");
764: }
765:
766: cleanup()
767: /* dummy */
768: {}
769:
770:
771: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.