|
|
1.1 root 1: /*
2: * RCS file input
3: */
4: static char rcsid[]=
5: "$Header: /usr/wft/RCS/SRC/RCS/rcslex.c,v 3.3 82/12/10 16:22:37 wft Exp $ Purdue CS";
6: /*********************************************************************************
7: * Lexical Analysis.
8: * Character mapping table,
9: * hashtable, Lexinit, nextlex, getlex, getkey,
10: * getid, getnum, readstring, printstring, savestring,
11: * checkid, serror, fatserror, error, faterror, warn, diagnose
12: * fflsbuf, puts, fprintf
13: * Testprogram: define LEXDB
14: *********************************************************************************
15: *
16: * Copyright (C) 1982 by Walter F. Tichy
17: * Purdue University
18: * Computer Science Department
19: * West Lafayette, IN 47907
20: *
21: * All rights reserved. No part of this software may be sold or distributed
22: * in any form or by any means without the prior written permission of the
23: * author.
24: * Report problems and direct all inquiries to Tichy@purdue (ARPA net).
25: */
26:
27: /* $Log: rcslex.c,v $
28: * Revision 3.3 82/12/10 16:22:37 wft
29: * Improved error messages, changed exit status on error to 1.
30: *
31: * Revision 3.2 82/11/28 21:27:10 wft
32: * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h.
33: * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations
34: * properly in case there is an IO-error (e.g., file system full).
35: *
36: * Revision 3.1 82/10/11 19:43:56 wft
37: * removed unused label out:;
38: * made sure all calls to getc() return into an integer, not a char.
39: */
40:
41:
42: /*
43: #define LEXDB
44: /* version LEXDB is for testing the lexical analyzer. The testprogram
45: * reads a stream of lexemes, enters the revision numbers into the
46: * hashtable, and prints the recognized tokens. Keywords are recognized
47: * as identifiers.
48: */
49:
50:
51:
52: #include "rcsbase.h"
53:
54:
55:
56: /* character mapping table */
57: enum tokens map[] = {
58: EOFILE, /* this will end up at ctab[-1] */
59: UNKN, INSERT, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
60: UNKN, SPACE, NEWLN, UNKN, SPACE, UNKN, UNKN, UNKN,
61: UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
62: UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
63: SPACE, EXCLA, DQUOTE, HASH, DOLLAR, PERCNT, AMPER, SQUOTE,
64: LPARN, RPARN, TIMES, PLUS, COMMA, MINUS, PERIOD, DIVIDE,
65: DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT,
66: DIGIT, DIGIT, COLON, SEMI, LESS, EQUAL, GREAT, QUEST,
67: AT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
68: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
69: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
70: LETTER, LETTER, LETTER, LBRACK, BACKSL, RBRACK, UPARR, UNDER,
71: ACCENT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
72: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
73: LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
74: LETTER, LETTER, LETTER, LBRACE, BAR, RBRACE, TILDE, UNKN
75: };
76:
77:
78:
79:
80: struct hshentry * nexthsh; /*pointer to next hashtable-entry, set by lookup*/
81:
82: enum tokens nexttok; /*next token, set by nextlex */
83:
84: int hshenter /*if true, next suitable lexeme will be entered */
85: = true; /*into the symbol table. Handle with care. */
86: int nextc; /*next input character, initialized by Lexinit */
87:
88: int eof /*end-of-file indicator, set to >0 on end of file*/
89: = 0;
90: int line /*current line-number of input */
91: = 1;
92: int nerror /*counter for errors */
93: = 0;
94: int nwarn /*counter for warnings */
95: = 0;
96: char * cmdid /*command identification for error messages */
97: = nil;
98: int quietflag /*indicates quiet mode */
99: = false;
100: FILE * finptr; /*input file descriptor */
101:
102: FILE * frewrite; /*file descriptor for echoing input */
103:
104: int rewriteflag;/*indicates whether to echo to frewrite */
105:
106: char StringTab[strtsize]; /* string table and heap */
107:
108: char * NextString /*pointer to next identifier in StringTab*/
109: = nil;
110: char * Topchar /*pointer to next free byte in StringTab*/
111: = &StringTab[0]; /*set by nextlex, lookup */
112: struct hshentry hshtab[hshsize]; /*hashtable */
113:
114:
115:
116:
117:
118: lookup() {
119:
120: /* Function: Looks up the character string pointed to by NextString in the
121: * hashtable. If the string is not present, a new entry for it is created.
122: * If the string is present, TopChar is moved back to save the space for
123: * the string, and NextString is set to point to the original string.
124: * In any case, the address of the corresponding hashtable entry is placed
125: * into nexthsh.
126: * Algorithm: Quadratic hash, covering all entries.
127: * Assumptions: NextString points at the first character of the string.
128: * Topchar points at the first empty byte after the string.
129: */
130:
131: register int ihash; /* index into hashtable */
132: register char * sp, * np;
133: int c, delta, final, FirstScan; /*loop control*/
134:
135: /* calculate hash code */
136: sp = NextString;
137: ihash = 0;
138: while (*sp) ihash += *sp++;
139:
140: /* set up first search loop (c=0,step=1,until (hshsiz-1)/2 */
141: c=0;delta=1;final=(hshsize-1)/2;
142: FirstScan=true; /*first loop */
143:
144: for (;;) {
145: ihash = (ihash+c)%hshsize; /*next index*/
146:
147: if (hshtab[ihash].num == nil) {
148: /*empty slot found*/
149: hshtab[ihash].num = NextString;
150: nexthsh= &hshtab[ihash];/*save hashtable address*/
151: # ifdef LEXDB
152: printf("\nEntered: %s at %d ",nexthsh->num, ihash);
153: # endif
154: return;
155: }
156: /* compare strings */
157: sp=NextString;np=hshtab[ihash].num;
158: while (*sp == *np++) {
159: if (*sp == 0) {
160: /* match found */
161: nexthsh= &hshtab[ihash];
162: Topchar = NextString;
163: NextString = nexthsh->num;
164: return;
165: } else sp++;
166: }
167:
168: /* neither empty slot nor string found */
169: /* calculate next index and repeat */
170: if (c != final)
171: c += delta;
172: else {
173: if (FirstScan) {
174: /*set up second sweep*/
175: delta = -1; final = 1; FirstScan= false;
176: } else {
177: fatserror("Hashtable overflow");
178: }
179: }
180: }
181: };
182:
183:
184:
185:
186:
187:
188: Lexinit()
189: /* Function: Initialization of lexical analyzer:
190: * initializes the hastable,
191: * initializes nextc, nexttok if finptr != NULL
192: */
193: { register int i;
194:
195: for (i=hshsize-1; i>=0; i--) {
196: hshtab[i].num = nil;
197: }
198:
199: hshenter=true; eof=0; line=1; nerror=0; nwarn=0;
200: NextString=nil; Topchar = &StringTab[0];
201: if (finptr) {
202: nextc = GETC(finptr,frewrite,rewriteflag); /*initial character*/
203: nextlex(); /*initial token*/
204: } else {
205: nextc = '\0';
206: nexttok=EOFILE;
207: }
208: }
209:
210:
211:
212:
213:
214:
215:
216: nextlex()
217:
218: /* Function: Reads the next token and sets nexttok to the next token code.
219: * Only if the hshenter==true, a revision number is entered into the
220: * hashtable and a pointer to it is placed into nexthsh.
221: * This is useful for avoiding that dates are placed into the hashtable.
222: * For ID's and NUM's, NextString is set to the character string in the
223: * string table. Assumption: nextc contains the next character.
224: */
225: { register c;
226: register char * sp;
227: register enum tokens d;
228:
229: if (eof) {
230: nexttok=EOFILE;
231: return;
232: }
233: loop:
234: switch(nexttok=ctab[nextc]) {
235:
236: case UNKN:
237: case IDCHAR:
238: case PERIOD:
239: serror("unknown Character: %c",nextc);
240: nextc=GETC(finptr,frewrite,rewriteflag);
241: goto loop;
242:
243: case NEWLN:
244: line++;
245: # ifdef LEXDB
246: putchar('\n');
247: # endif
248: /* Note: falls into next case */
249:
250: case SPACE:
251: nextc=GETC(finptr,frewrite,rewriteflag);
252: goto loop;
253:
254: case EOFILE:
255: eof++;
256: nexttok=EOFILE;
257: return;
258:
259: case DIGIT:
260: NextString = sp = Topchar;
261: *sp++ = nextc;
262: while ((d=ctab[c=GETC(finptr,frewrite,rewriteflag)])==DIGIT ||
263: d==PERIOD) {
264: *sp++ = c; /* 1.2. and 1.2 are different */
265: }
266: *sp++ = '\0';
267: if (sp >= StringTab+strtsize) {
268: /*may have written outside stringtable already*/
269: fatserror("Stringtable overflow");
270: }
271: Topchar = sp;
272: nextc = c;
273: if (hshenter == true)
274: lookup(); /* lookup updates NextString, Topchar*/
275: nexttok = NUM;
276: return;
277:
278:
279: case LETTER:
280: NextString = sp = Topchar;
281: *sp++ = nextc;
282: while ((d=ctab[c=GETC(finptr,frewrite,rewriteflag)])==LETTER ||
283: d==DIGIT || d==IDCHAR) {
284: *sp++ = c;
285: }
286: *sp++ = '\0';
287: if (sp >= StringTab+strtsize) {
288: /*may have written outside stringtable already*/
289: fatserror("Stringtable overflow");
290: }
291: Topchar = sp;
292: nextc = c;
293: nexttok = ID; /* may be ID or keyword */
294: return;
295:
296: case SBEGIN: /* long string */
297: nexttok = STRING;
298: /* note: only the initial SBEGIN has been read*/
299: /* read the string, and reset nextc afterwards*/
300: return;
301:
302: default:
303: nextc=GETC(finptr,frewrite,rewriteflag);
304: return;
305: }
306: }
307:
308:
309: int getlex(token)
310: enum tokens token;
311: /* Function: Checks if nexttok is the same as token. If so,
312: * advances the input by calling nextlex and returns true.
313: * otherwise returns false.
314: * Doesn't work for strings and keywords; loses the character string for ids.
315: */
316: {
317: if (nexttok==token) {
318: nextlex();
319: return(true);
320: } else return(false);
321: }
322:
323: int getkey (key)
324: char * key;
325: /* Function: If the current token is a keyword identical to key,
326: * getkey advances the input by calling nextlex and returns true;
327: * otherwise returns false.
328: */
329: {
330: register char *s1,*s2;
331:
332: if (nexttok==ID) {
333: s1=key; s2=NextString;
334: while(*s1 == *s2++)
335: if (*s1++ == '\0') {
336: /* match found */
337: Topchar = NextString; /*reset Topchar */
338: nextlex();
339: return(true);
340: }
341: }
342: return(false);
343: }
344:
345:
346:
347: char * getid()
348: /* Function: Checks if nexttok is an identifier. If so,
349: * advances the input by calling nextlex and returns a pointer
350: * to the identifier; otherwise returns nil.
351: * Treats keywords as identifiers.
352: */
353: {
354: register char * name;
355: if (nexttok==ID) {
356: name = NextString;
357: nextlex();
358: return name;
359: } else return nil;
360: }
361:
362:
363: struct hshentry * getnum()
364: /* Function: Checks if nexttok is a number. If so,
365: * advances the input by calling nextlex and returns a pointer
366: * to the hashtable entry. Otherwise returns nil.
367: * Doesn't work if hshenter is false.
368: */
369: {
370: register struct hshentry * num;
371: if (nexttok==NUM) {
372: num=nexthsh;
373: nextlex();
374: return num;
375: } else return nil;
376: }
377:
378:
379: readstring()
380: /* skip over characters until terminating single SDELIM */
381: /* if rewriteflag==true, copy every character read to frewrite.*/
382: /* Does not advance nextlex at the end. */
383: { register c;
384: if (rewriteflag) {
385: /* copy string verbatim to frewrite */
386: while ((c=putc(getc(finptr),frewrite)) != EOF) {
387: if (c==SDELIM) {
388: if ((c=putc(getc(finptr),frewrite)) != SDELIM) {
389: /* end of string */
390: nextc=c;
391: return;
392: }
393: }
394: }
395: } else {
396: /* skip string */
397: while ((c=getc(finptr)) != EOF) {
398: if (c==SDELIM) {
399: if ((c=getc(finptr)) != SDELIM) {
400: /* end of string */
401: nextc=c;
402: return;
403: }
404: }
405: }
406: }
407: nextc = c;
408: error("Unterminated string");
409: }
410:
411:
412: printstring()
413: /* Function: copy a string to stdout, until terminated with a single SDELIM.
414: * Does not advance nextlex at the end.
415: */
416: {
417: register c;
418: while ((c=getc(finptr)) != EOF) {
419: if (c==SDELIM) {
420: if ((c=getc(finptr)) != SDELIM) {
421: /* end of string */
422: nextc=c;
423: return;
424: }
425: }
426: putchar(c);
427: }
428: nextc = c;
429: error("Unterminated string");
430: }
431:
432:
433:
434: int savestring(target,length)
435: char * target; int length;
436: /* copies a string terminated with SDELIM from file finptr to buffer target,
437: * but not more than length bytes. If the string is longer than length,
438: * the extra characters are skipped. The string may be empty, in which
439: * case a '\0' is placed into target.
440: * Double SDELIM is replaced with SDELIM.
441: * If rewriteflag==true, the string is also copied unchanged to frewrite.
442: * Returns the length of the saved string.
443: * Does not advance nextlex at the end.
444: */
445: {
446: register char * tp, * max;
447: register c;
448:
449: tp=target; max= target+length; /*max is one too large*/
450: while ((c=GETC(finptr,frewrite,rewriteflag))!=EOF) {
451: *tp++ =c;
452: if (c== SDELIM) {
453: if ((c=GETC(finptr,frewrite,rewriteflag))!=SDELIM) {
454: /* end of string */
455: *(tp-1)='\0';
456: nextc=c;
457: return tp-target;
458: }
459: }
460: if (tp >= max) {
461: /* overflow */
462: error("string buffer overflow -- truncating string");
463: target[length-1]='\0';
464: /* skip rest of string */
465: while ((c=GETC(finptr,frewrite,rewriteflag))!=EOF) {
466: if ((c==SDELIM) && ((c=GETC(finptr,frewrite,rewriteflag))!=SDELIM)) {
467: /* end of string */
468: nextc=c;
469: return length;
470: }
471: }
472: nextc = c;
473: error("Can't find %c to terminate string before end of file",SDELIM);
474: return length;
475: }
476: }
477: nextc = c;
478: error("Can't find %c to terminate string before end of file",SDELIM);
479: return length;
480: }
481:
482:
483: char *checkid(id, delim)
484: char *id, delim;
485: /* Function: check whether the string starting at id is an */
486: /* identifier and return a pointer to the last char*/
487: /* of the identifer. White space, delim and '\0' */
488: /* are legal delimeters. Aborts the program if not */
489: /* a legal identifier. Useful for checking commands*/
490: {
491: register enum tokens d;
492: register char *temp;
493: register char c,tc;
494:
495: temp = id;
496: if ( ctab[*id] == LETTER ) {
497: while( (d=ctab[c=(*++id)]) == LETTER || d==DIGIT || d==IDCHAR) ;
498: if ( c!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) {
499: /* append \0 to end of id before error message */
500: tc = c;
501: while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
502: *id = '\0';
503: faterror("Invalid character %c in identifier %s",tc,temp);
504: return nil ;
505: } else
506: return id;
507: } else {
508: /* append \0 to end of id before error message */
509: while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
510: *id = '\0';
511: faterror("Identifier %s does not start with letter",temp);
512: return nil;
513: }
514: }
515:
516:
517: serror(e,e1,e2,e3,e4,e5)
518: char * e, * e1;
519: /* non-fatal syntax error */
520: { nerror++;
521: fprintf(stderr,"%s error, line %d: ", cmdid, line);
522: fprintf(stderr,e, e1, e2, e3, e4, e5);
523: putc('\n',stderr);
524: }
525:
526: error(e,e1,e2,e3,e4,e5)
527: char * e, * e1;
528: /* non-fatal error */
529: { nerror++;
530: fprintf(stderr,"%s error: ",cmdid);
531: fprintf(stderr,e, e1, e2, e3, e4, e5);
532: putc('\n',stderr);
533: }
534:
535: fatserror(e,e1,e2,e3,e4,e5)
536: char * e, * e1;
537: /* fatal syntax error */
538: { nerror++;
539: fprintf(stderr,"%s error, line %d: ", cmdid,line);
540: fprintf(stderr,e, e1, e2, e3, e4, e5);
541: fprintf(stderr,"\n%s aborted\n",cmdid);
542: cleanup();
543: exit(1);
544: }
545:
546: faterror(e,e1,e2,e3,e4,e5)
547: char * e, * e1;
548: /* fatal error, terminates program after cleanup */
549: { nerror++;
550: fprintf(stderr,"%s error: ",cmdid);
551: fprintf(stderr,e, e1, e2, e3, e4, e5);
552: fprintf(stderr,"\n%s aborted\n",cmdid);
553: cleanup();
554: exit(1);
555: }
556:
557: warn(e,e1,e2,e3,e4,e5)
558: char * e, * e1;
559: /* prints a warning message */
560: { nwarn++;
561: fprintf(stderr,"%s warning: ",cmdid);
562: fprintf(stderr,e, e1, e2, e3, e4, e5);
563: putc('\n',stderr);
564: }
565:
566:
567: diagnose(e,e1,e2,e3,e4,e5)
568: char * e, * e1;
569: /* prints a diagnostic message */
570: {
571: if (!quietflag) {
572: fprintf(stderr,e, e1, e2, e3, e4, e5);
573: putc('\n',stderr);
574: }
575: }
576:
577:
578:
579: fflsbuf(c, iop)
580: int c; register FILE * iop;
581: /* Function: Flush iop.
582: * Same routine as _flsbuf in stdio, but aborts program on error.
583: */
584: { register result;
585: if ((result=_flsbuf(c,iop))==EOF)
586: faterror("write error");
587: return result;
588: }
589:
590:
591: fputs(s, iop)
592: register char *s;
593: register FILE *iop;
594: /* Function: Put string s on file iop, abort on error.
595: * Same as puts in stdio, but with different putc macro.
596: */
597: {
598: register r;
599: register c;
600:
601: while (c = *s++)
602: r = putc(c, iop);
603: return(r);
604: }
605:
606:
607:
608: fprintf(iop, fmt, args)
609: FILE *iop;
610: char *fmt;
611: /* Function: formatted output. Same as fprintf in stdio,
612: * but aborts program on error
613: */
614: {
615: _doprnt(fmt, &args, iop);
616: if (ferror(iop)) {
617: faterror("write error");
618: return EOF;
619: } else return 0;
620: }
621:
622:
623:
624: #ifdef LEXDB
625: /* test program reading a stream of lexems and printing the tokens.
626: */
627:
628:
629:
630: main(argc,argv)
631: int argc; char * argv[];
632: {
633: cmdid="lextest";
634: if (argc<2) {
635: fputs("No input file\n",stderr);
636: exit(1);
637: }
638: if ((finptr=fopen(argv[1], "r")) == NULL) {
639: faterror("Can't open input file %s\n",argv[1]);
640: }
641: Lexinit();
642: rewriteflag=false;
643: while (nexttok != EOFILE) {
644: switch (nexttok) {
645:
646: case ID:
647: printf("ID: %s",NextString);
648: break;
649:
650: case NUM:
651: if (hshenter==true)
652: printf("NUM: %s, index: %d",nexthsh->num, nexthsh-hshtab);
653: else
654: printf("NUM, unentered: %s",NextString);
655: hshenter = !hshenter; /*alternate between dates and numbers*/
656: break;
657:
658: case COLON:
659: printf("COLON"); break;
660:
661: case SEMI:
662: printf("SEMI"); break;
663:
664: case STRING:
665: readstring();
666: printf("STRING"); break;
667:
668: case UNKN:
669: printf("UNKN"); break;
670:
671: default:
672: printf("DEFAULT"); break;
673: }
674: printf(" | ");
675: nextlex();
676: }
677: printf("\nEnd of lexical analyzer test\n");
678: }
679:
680: cleanup()
681: /* dummy */
682: {}
683:
684:
685: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.