|
|
1.1 root 1: /*
2: * RCS file input
3: */
4: #ifndef lint
5: static char rcsid[]= "$Id: rcslex.c,v 4.4 87/12/18 11:44:47 narten Exp $ Purdue CS";
6: #endif
7: /*********************************************************************************
8: * Lexical Analysis.
9: * Character mapping table,
10: * hashtable, Lexinit, nextlex, getlex, getkey,
11: * getid, getnum, readstring, printstring, savestring,
12: * checkid, serror, fatserror, error, faterror, warn, diagnose
13: * fflsbuf, puts, fprintf
14: * Testprogram: define LEXDB
15: *********************************************************************************
16: *
17: * Copyright (C) 1982 by Walter F. Tichy
18: * Purdue University
19: * Computer Science Department
20: * West Lafayette, IN 47907
21: *
22: * All rights reserved. No part of this software may be sold or distributed
23: * in any form or by any means without the prior written permission of the
24: * author.
25: * Report problems and direct all inquiries to Tichy@purdue (ARPA net).
26: */
27:
28: /* $Log: rcslex.c,v $
29: * Revision 4.4 87/12/18 11:44:47 narten
30: * fixed to use "varargs" in "fprintf"; this is required if it is to
31: * work on a SPARC machine such as a Sun-4
32: *
33: * Revision 4.3 87/10/18 10:37:18 narten
34: * Updating version numbers. Changes relative to 1.1 actually relative
35: * to version 4.1
36: *
37: * Revision 1.3 87/09/24 14:00:17 narten
38: * Sources now pass through lint (if you ignore printf/sprintf/fprintf
39: * warnings)
40: *
41: * Revision 1.2 87/03/27 14:22:33 jenkins
42: * Port to suns
43: *
44: * Revision 1.1 84/01/23 14:50:33 kcs
45: * Initial revision
46: *
47: * Revision 4.1 83/03/25 18:12:51 wft
48: * Only changed $Header to $Id.
49: *
50: * Revision 3.3 82/12/10 16:22:37 wft
51: * Improved error messages, changed exit status on error to 1.
52: *
53: * Revision 3.2 82/11/28 21:27:10 wft
54: * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h.
55: * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations
56: * properly in case there is an IO-error (e.g., file system full).
57: *
58: * Revision 3.1 82/10/11 19:43:56 wft
59: * removed unused label out:;
60: * made sure all calls to getc() return into an integer, not a char.
61: */
62:
63:
64: /*
65: #define LEXDB
66: /* version LEXDB is for testing the lexical analyzer. The testprogram
67: * reads a stream of lexemes, enters the revision numbers into the
68: * hashtable, and prints the recognized tokens. Keywords are recognized
69: * as identifiers.
70: */
71:
72:
73:
74: #include "rcsbase.h"
75: #include <varargs.h>
76:
77:
78:
79: /* character mapping table */
80: enum tokens map[] = {
81: EOFILE, /* this will end up at ctab[-1] */
82: UNKN, INSERT, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
83: UNKN, SPACE, NEWLN, UNKN, SPACE, UNKN, UNKN, UNKN,
84: UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
85: UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
86: SPACE, EXCLA, DQUOTE, HASH, DOLLAR, PERCNT, AMPER, SQUOTE,
87: LPARN, RPARN, TIMES, PLUS, COMMA, MINUS, PERIOD, DIVIDE,
88: DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT,
89: DIGIT, DIGIT, COLON, SEMI, LESS, EQUAL, GREAT, QUEST,
90: AT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
91: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
92: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
93: LETTER, LETTER, LETTER, LBRACK, BACKSL, RBRACK, UPARR, UNDER,
94: ACCENT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
95: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
96: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
97: LETTER, LETTER, LETTER, LBRACE, BAR, RBRACE, TILDE, UNKN
98: };
99:
100:
101:
102:
103: struct hshentry * nexthsh; /*pointer to next hashtable-entry, set by lookup*/
104:
105: enum tokens nexttok; /*next token, set by nextlex */
106:
107: int hshenter /*if true, next suitable lexeme will be entered */
108: = true; /*into the symbol table. Handle with care. */
109: int nextc; /*next input character, initialized by Lexinit */
110:
111: int eof /*end-of-file indicator, set to >0 on end of file*/
112: = 0;
113: int line /*current line-number of input */
114: = 1;
115: int nerror /*counter for errors */
116: = 0;
117: int nwarn /*counter for warnings */
118: = 0;
119: char * cmdid /*command identification for error messages */
120: = nil;
121: int quietflag /*indicates quiet mode */
122: = false;
123: FILE * finptr; /*input file descriptor */
124:
125: FILE * frewrite; /*file descriptor for echoing input */
126:
127: int rewriteflag;/*indicates whether to echo to frewrite */
128:
129: char StringTab[strtsize]; /* string table and heap */
130:
131: char * NextString /*pointer to next identifier in StringTab*/
132: = nil;
133: char * Topchar /*pointer to next free byte in StringTab*/
134: = &StringTab[0]; /*set by nextlex, lookup */
135: struct hshentry hshtab[hshsize]; /*hashtable */
136:
137:
138:
139:
140:
141: lookup() {
142:
143: /* Function: Looks up the character string pointed to by NextString in the
144: * hashtable. If the string is not present, a new entry for it is created.
145: * If the string is present, TopChar is moved back to save the space for
146: * the string, and NextString is set to point to the original string.
147: * In any case, the address of the corresponding hashtable entry is placed
148: * into nexthsh.
149: * Algorithm: Quadratic hash, covering all entries.
150: * Assumptions: NextString points at the first character of the string.
151: * Topchar points at the first empty byte after the string.
152: */
153:
154: register int ihash; /* index into hashtable */
155: register char * sp, * np;
156: int c, delta, final, FirstScan; /*loop control*/
157:
158: /* calculate hash code */
159: sp = NextString;
160: ihash = 0;
161: while (*sp) ihash += *sp++;
162:
163: /* set up first search loop (c=0,step=1,until (hshsiz-1)/2 */
164: c=0;delta=1;final=(hshsize-1)/2;
165: FirstScan=true; /*first loop */
166:
167: for (;;) {
168: ihash = (ihash+c)%hshsize; /*next index*/
169:
170: if (hshtab[ihash].num == nil) {
171: /*empty slot found*/
172: hshtab[ihash].num = NextString;
173: nexthsh= &hshtab[ihash];/*save hashtable address*/
174: # ifdef LEXDB
175: VOID printf("\nEntered: %s at %d ",nexthsh->num, ihash);
176: # endif
177: return;
178: }
179: /* compare strings */
180: sp=NextString;np=hshtab[ihash].num;
181: while (*sp == *np++) {
182: if (*sp == 0) {
183: /* match found */
184: nexthsh= &hshtab[ihash];
185: Topchar = NextString;
186: NextString = nexthsh->num;
187: return;
188: } else sp++;
189: }
190:
191: /* neither empty slot nor string found */
192: /* calculate next index and repeat */
193: if (c != final)
194: c += delta;
195: else {
196: if (FirstScan) {
197: /*set up second sweep*/
198: delta = -1; final = 1; FirstScan= false;
199: } else {
200: fatserror("Hashtable overflow");
201: }
202: }
203: }
204: };
205:
206:
207:
208:
209:
210:
211: Lexinit()
212: /* Function: Initialization of lexical analyzer:
213: * initializes the hastable,
214: * initializes nextc, nexttok if finptr != NULL
215: */
216: { register int i;
217:
218: for (i=hshsize-1; i>=0; i--) {
219: hshtab[i].num = nil;
220: }
221:
222: hshenter=true; eof=0; line=1; nerror=0; nwarn=0;
223: NextString=nil; Topchar = &StringTab[0];
224: if (finptr) {
225: nextc = GETC(finptr,frewrite,rewriteflag); /*initial character*/
226: nextlex(); /*initial token*/
227: } else {
228: nextc = '\0';
229: nexttok=EOFILE;
230: }
231: }
232:
233:
234:
235:
236:
237:
238:
239: nextlex()
240:
241: /* Function: Reads the next token and sets nexttok to the next token code.
242: * Only if the hshenter==true, a revision number is entered into the
243: * hashtable and a pointer to it is placed into nexthsh.
244: * This is useful for avoiding that dates are placed into the hashtable.
245: * For ID's and NUM's, NextString is set to the character string in the
246: * string table. Assumption: nextc contains the next character.
247: */
248: { register c;
249: register char * sp;
250: register enum tokens d;
251:
252: if (eof) {
253: nexttok=EOFILE;
254: return;
255: }
256: loop:
257: switch(nexttok=ctab[nextc]) {
258:
259: case UNKN:
260: case IDCHAR:
261: case PERIOD:
262: serror("unknown Character: %c",nextc);
263: nextc=GETC(finptr,frewrite,rewriteflag);
264: goto loop;
265:
266: case NEWLN:
267: line++;
268: # ifdef LEXDB
269: VOID putchar('\n');
270: # endif
271: /* Note: falls into next case */
272:
273: case SPACE:
274: nextc=GETC(finptr,frewrite,rewriteflag);
275: goto loop;
276:
277: case EOFILE:
278: eof++;
279: nexttok=EOFILE;
280: return;
281:
282: case DIGIT:
283: NextString = sp = Topchar;
284: *sp++ = nextc;
285: while ((d=ctab[c=GETC(finptr,frewrite,rewriteflag)])==DIGIT ||
286: d==PERIOD) {
287: *sp++ = c; /* 1.2. and 1.2 are different */
288: }
289: *sp++ = '\0';
290: if (sp >= StringTab+strtsize) {
291: /*may have written outside stringtable already*/
292: fatserror("Stringtable overflow");
293: }
294: Topchar = sp;
295: nextc = c;
296: if (hshenter == true)
297: lookup(); /* lookup updates NextString, Topchar*/
298: nexttok = NUM;
299: return;
300:
301:
302: case LETTER:
303: NextString = sp = Topchar;
304: *sp++ = nextc;
305: while ((d=ctab[c=GETC(finptr,frewrite,rewriteflag)])==LETTER ||
306: d==DIGIT || d==IDCHAR) {
307: *sp++ = c;
308: }
309: *sp++ = '\0';
310: if (sp >= StringTab+strtsize) {
311: /*may have written outside stringtable already*/
312: fatserror("Stringtable overflow");
313: }
314: Topchar = sp;
315: nextc = c;
316: nexttok = ID; /* may be ID or keyword */
317: return;
318:
319: case SBEGIN: /* long string */
320: nexttok = STRING;
321: /* note: only the initial SBEGIN has been read*/
322: /* read the string, and reset nextc afterwards*/
323: return;
324:
325: default:
326: nextc=GETC(finptr,frewrite,rewriteflag);
327: return;
328: }
329: }
330:
331:
332: int getlex(token)
333: enum tokens token;
334: /* Function: Checks if nexttok is the same as token. If so,
335: * advances the input by calling nextlex and returns true.
336: * otherwise returns false.
337: * Doesn't work for strings and keywords; loses the character string for ids.
338: */
339: {
340: if (nexttok==token) {
341: nextlex();
342: return(true);
343: } else return(false);
344: }
345:
346: int getkey (key)
347: char * key;
348: /* Function: If the current token is a keyword identical to key,
349: * getkey advances the input by calling nextlex and returns true;
350: * otherwise returns false.
351: */
352: {
353: register char *s1,*s2;
354:
355: if (nexttok==ID) {
356: s1=key; s2=NextString;
357: while(*s1 == *s2++)
358: if (*s1++ == '\0') {
359: /* match found */
360: Topchar = NextString; /*reset Topchar */
361: nextlex();
362: return(true);
363: }
364: }
365: return(false);
366: }
367:
368:
369:
370: char * getid()
371: /* Function: Checks if nexttok is an identifier. If so,
372: * advances the input by calling nextlex and returns a pointer
373: * to the identifier; otherwise returns nil.
374: * Treats keywords as identifiers.
375: */
376: {
377: register char * name;
378: if (nexttok==ID) {
379: name = NextString;
380: nextlex();
381: return name;
382: } else return nil;
383: }
384:
385:
386: struct hshentry * getnum()
387: /* Function: Checks if nexttok is a number. If so,
388: * advances the input by calling nextlex and returns a pointer
389: * to the hashtable entry. Otherwise returns nil.
390: * Doesn't work if hshenter is false.
391: */
392: {
393: register struct hshentry * num;
394: if (nexttok==NUM) {
395: num=nexthsh;
396: nextlex();
397: return num;
398: } else return nil;
399: }
400:
401:
402: readstring()
403: /* skip over characters until terminating single SDELIM */
404: /* if rewriteflag==true, copy every character read to frewrite.*/
405: /* Does not advance nextlex at the end. */
406: { register c;
407: if (rewriteflag) {
408: /* copy string verbatim to frewrite */
409: while ((c=putc(getc(finptr),frewrite)) != EOF) {
410: if (c==SDELIM) {
411: if ((c=putc(getc(finptr),frewrite)) != SDELIM) {
412: /* end of string */
413: nextc=c;
414: return;
415: }
416: }
417: }
418: } else {
419: /* skip string */
420: while ((c=getc(finptr)) != EOF) {
421: if (c==SDELIM) {
422: if ((c=getc(finptr)) != SDELIM) {
423: /* end of string */
424: nextc=c;
425: return;
426: }
427: }
428: }
429: }
430: nextc = c;
431: error("Unterminated string");
432: }
433:
434:
435: printstring()
436: /* Function: copy a string to stdout, until terminated with a single SDELIM.
437: * Does not advance nextlex at the end.
438: */
439: {
440: register c;
441: while ((c=getc(finptr)) != EOF) {
442: if (c==SDELIM) {
443: if ((c=getc(finptr)) != SDELIM) {
444: /* end of string */
445: nextc=c;
446: return;
447: }
448: }
449: VOID putchar(c);
450: }
451: nextc = c;
452: error("Unterminated string");
453: }
454:
455:
456:
457: int savestring(target,length)
458: char * target; int length;
459: /* copies a string terminated with SDELIM from file finptr to buffer target,
460: * but not more than length bytes. If the string is longer than length,
461: * the extra characters are skipped. The string may be empty, in which
462: * case a '\0' is placed into target.
463: * Double SDELIM is replaced with SDELIM.
464: * If rewriteflag==true, the string is also copied unchanged to frewrite.
465: * Returns the length of the saved string.
466: * Does not advance nextlex at the end.
467: */
468: {
469: register char * tp, * max;
470: register c;
471:
472: tp=target; max= target+length; /*max is one too large*/
473: while ((c=GETC(finptr,frewrite,rewriteflag))!=EOF) {
474: *tp++ =c;
475: if (c== SDELIM) {
476: if ((c=GETC(finptr,frewrite,rewriteflag))!=SDELIM) {
477: /* end of string */
478: *(tp-1)='\0';
479: nextc=c;
480: return tp-target;
481: }
482: }
483: if (tp >= max) {
484: /* overflow */
485: error("string buffer overflow -- truncating string");
486: target[length-1]='\0';
487: /* skip rest of string */
488: while ((c=GETC(finptr,frewrite,rewriteflag))!=EOF) {
489: if ((c==SDELIM) && ((c=GETC(finptr,frewrite,rewriteflag))!=SDELIM)) {
490: /* end of string */
491: nextc=c;
492: return length;
493: }
494: }
495: nextc = c;
496: error("Can't find %c to terminate string before end of file",SDELIM);
497: return length;
498: }
499: }
500: nextc = c;
501: error("Can't find %c to terminate string before end of file",SDELIM);
502: return length;
503: }
504:
505:
506: char *checkid(id, delim)
507: char *id, delim;
508: /* Function: check whether the string starting at id is an */
509: /* identifier and return a pointer to the last char*/
510: /* of the identifer. White space, delim and '\0' */
511: /* are legal delimeters. Aborts the program if not */
512: /* a legal identifier. Useful for checking commands*/
513: {
514: register enum tokens d;
515: register char *temp;
516: register char c,tc;
517:
518: temp = id;
519: if ( ctab[*id] == LETTER ) {
520: while( (d=ctab[c=(*++id)]) == LETTER || d==DIGIT || d==IDCHAR) ;
521: if ( c!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) {
522: /* append \0 to end of id before error message */
523: tc = c;
524: while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
525: *id = '\0';
526: faterror("Invalid character %c in identifier %s",tc,temp);
527: return nil ;
528: } else
529: return id;
530: } else {
531: /* append \0 to end of id before error message */
532: while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
533: *id = '\0';
534: faterror("Identifier %s does not start with letter",temp);
535: return nil;
536: }
537: }
538:
539:
540: /*VARARGS1*/
541: serror(e,e1,e2,e3,e4,e5)
542: char * e, * e1, * e2, * e3, * e4, * e5;
543: /* non-fatal syntax error */
544: { nerror++;
545: VOID fprintf(stderr,"%s error, line %d: ", cmdid, line);
546: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
547: VOID putc('\n',stderr);
548: }
549:
550: /*VARARGS1*/
551: error(e,e1,e2,e3,e4,e5)
552: char * e, * e1, * e2, * e3, * e4, * e5;
553: /* non-fatal error */
554: { nerror++;
555: VOID fprintf(stderr,"%s error: ",cmdid);
556: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
557: VOID putc('\n',stderr);
558: }
559:
560: /*VARARGS1*/
561: fatserror(e,e1,e2,e3,e4,e5)
562: char * e, * e1, * e2, * e3, * e4, * e5;
563: /* fatal syntax error */
564: { nerror++;
565: VOID fprintf(stderr,"%s error, line %d: ", cmdid,line);
566: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
567: VOID fprintf(stderr,"\n%s aborted\n",cmdid);
568: VOID cleanup();
569: exit(1);
570: }
571:
572: /*VARARGS1*/
573: faterror(e,e1,e2,e3,e4,e5)
574: char * e, * e1, * e2, * e3, * e4, * e5;
575: /* fatal error, terminates program after cleanup */
576: { nerror++;
577: VOID fprintf(stderr,"%s error: ",cmdid);
578: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
579: VOID fprintf(stderr,"\n%s aborted\n",cmdid);
580: VOID cleanup();
581: exit(1);
582: }
583:
584: /*VARARGS1*/
585: warn(e,e1,e2,e3,e4,e5)
586: char * e, * e1, * e2, * e3, * e4, * e5;
587: /* prints a warning message */
588: { nwarn++;
589: VOID fprintf(stderr,"%s warning: ",cmdid);
590: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
591: VOID putc('\n',stderr);
592: }
593:
594:
595: /*VARARGS1*/
596: diagnose(e,e1,e2,e3,e4,e5)
597: char * e, * e1, * e2, * e3, * e4, * e5;
598: /* prints a diagnostic message */
599: {
600: if (!quietflag) {
601: VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
602: VOID putc('\n',stderr);
603: }
604: }
605:
606:
607:
608: fflsbuf(c, iop)
609: unsigned c; register FILE * iop;
610: /* Function: Flush iop.
611: * Same routine as _flsbuf in stdio, but aborts program on error.
612: */
613: { register result;
614: if ((result=_flsbuf(c,iop))==EOF)
615: faterror("write error");
616: return result;
617: }
618:
619:
620: fputs(s, iop)
621: register char *s;
622: register FILE *iop;
623: /* Function: Put string s on file iop, abort on error.
624: * Same as puts in stdio, but with different putc macro.
625: */
626: {
627: register r;
628: register c;
629:
630: while (c = *s++)
631: r = putc(c, iop);
632: return(r);
633: }
634:
635:
636:
637: fprintf(iop, fmt, va_alist)
638: FILE *iop;
639: char *fmt;
640: va_dcl
641: /* Function: formatted output. Same as fprintf in stdio,
642: * but aborts program on error
643: */
644: {
645: register int value;
646: va_list ap;
647:
648: va_start(ap);
649: #ifdef VFPRINTF
650: VOID vfprintf(iop, fmt, ap);
651: #else
652: _doprnt(fmt, ap, iop);
653: #endif VFPRINTF
654: if (ferror(iop)) {
655: faterror("write error");
656: value = EOF;
657: } else value = 0;
658: va_end(ap);
659: return value;
660: }
661:
662:
663:
664: #ifdef LEXDB
665: /* test program reading a stream of lexems and printing the tokens.
666: */
667:
668:
669:
670: main(argc,argv)
671: int argc; char * argv[];
672: {
673: cmdid="lextest";
674: if (argc<2) {
675: VOID fputs("No input file\n",stderr);
676: exit(1);
677: }
678: if ((finptr=fopen(argv[1], "r")) == NULL) {
679: faterror("Can't open input file %s\n",argv[1]);
680: }
681: Lexinit();
682: rewriteflag=false;
683: while (nexttok != EOFILE) {
684: switch (nexttok) {
685:
686: case ID:
687: VOID printf("ID: %s",NextString);
688: break;
689:
690: case NUM:
691: if (hshenter==true)
692: VOID printf("NUM: %s, index: %d",nexthsh->num, nexthsh-hshtab);
693: else
694: VOID printf("NUM, unentered: %s",NextString);
695: hshenter = !hshenter; /*alternate between dates and numbers*/
696: break;
697:
698: case COLON:
699: VOID printf("COLON"); break;
700:
701: case SEMI:
702: VOID printf("SEMI"); break;
703:
704: case STRING:
705: readstring();
706: VOID printf("STRING"); break;
707:
708: case UNKN:
709: VOID printf("UNKN"); break;
710:
711: default:
712: VOID printf("DEFAULT"); break;
713: }
714: VOID printf(" | ");
715: nextlex();
716: }
717: VOID printf("\nEnd of lexical analyzer test\n");
718: }
719:
720: cleanup()
721: /* dummy */
722: {}
723:
724:
725: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.