|
|
1.1 root 1: /*
2: * lexical analysis and source input
3: */
4:
5: static char *RCSid = "$Header: lex.c,v 3.1 88/11/03 09:16:43 egisin Exp $";
6:
7: #include <stddef.h>
8: #include <stdlib.h>
9: #include <stdio.h>
10: #include <string.h>
11: #include <errno.h>
12: #include <setjmp.h>
13: #include <unistd.h>
14: #include "sh.h"
15: #include "lex.h"
16: #include "tree.h"
17: #include "table.h"
18: #include "expand.h"
19:
20: int ttyfd = -1; /* tty fd for edit and jobs */
21: char *history[HISTORY]; /* saved commands */
22: char **histptr = history - 1; /* last history item */
23: int histpush; /* number of pushed fc commands */
24:
25: static int alias;
26: static int getsc_ ARGS((void));
27:
28: /* optimized getsc_() */
29: #define getsc() ((*source->str != 0) ? *source->str++ : getsc_())
30: #define ungetsc() ((source->str != null) ? (source->str--) : source->str)
31:
32: /*
33: * Lexical analyzer
34: *
35: * tokens are not regular expressions, they are LL(1).
36: * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
37: * hence the state stack.
38: */
39:
40: int
41: yylex(cf)
42: int cf;
43: {
44: register int c, state;
45: char states [64], *statep = states;
46: XString ws; /* expandable output word */
47: register char *wp; /* output word pointer */
48: register char *sp, *dp;
49: int istate;
50: int c2;
51:
52: Again:
53: Xinit(ws, wp, 256);
54: if (alias) { /* trailing ' ' in alias definition */
55: alias = 0;
56: cf |= ALIAS;
57: }
58:
59: if (cf&ONEWORD)
60: istate = SWORD;
61: else { /* normal lexing */
62: istate = SBASE;
63: while ((c = getsc()) == ' ' || c == '\t')
64: ;
65: if (c == '#')
66: while ((c = getsc()) != 0 && c != '\n')
67: ;
68: ungetsc();
69: }
70:
71: /* collect non-special or quoted characters to form word */
72: for (*statep = state = istate;
73: !((c = getsc()) == 0 || state == SBASE && ctype(c, C_LEX1)); ) {
74: Xcheck(ws, wp);
75: switch (state) {
76: case SBASE:
77: Sbase:
78: switch (c) {
79: case '\\':
80: c = getsc();
81: if (c != '\n')
82: *wp++ = QCHAR, *wp++ = c;
83: else
84: if (wp == Xstring(ws, wp))
85: goto Again;
86: break;
87: case '\'':
88: *++statep = state = SSQUOTE;
89: *wp++ = OQUOTE;
90: break;
91: case '"':
92: *++statep = state = SDQUOTE;
93: *wp++ = OQUOTE;
94: break;
95: default:
96: goto Subst;
97: }
98: break;
99:
100: Subst:
101: switch (c) {
102: case '\\':
103: c = getsc();
104: switch (c) {
105: case '\n':
106: break;
107: case '"': case '\\':
108: case '$': case '`':
109: *wp++ = QCHAR, *wp++ = c;
110: break;
111: default:
112: *wp++ = CHAR, *wp++ = '\\';
113: *wp++ = CHAR, *wp++ = c;
114: break;
115: }
116: break;
117: case '$':
118: c = getsc();
119: if (c == '(') {
120: *++statep = state = SPAREN;
121: *wp++ = COMSUB;
122: } else
123: if (c == '{') {
124: *++statep = state = SBRACE;
125: *wp++ = OSUBST;
126: c = getsc();
127: do {
128: Xcheck(ws, wp);
129: *wp++ = c;
130: c = getsc();
131: } while (ctype(c, C_ALPHA|C_DIGIT));
132: *wp++ = 0;
133: /* todo: more compile-time checking */
134: if (c == '}')
135: ungetsc();
136: else if (c == '#' || c == '%') {
137: /* Korn pattern trimming */
138: if (getsc() == c)
139: c |= 0x80;
140: else
141: ungetsc();
142: *wp++ = c;
143: } else if (c == ':')
144: *wp++ = 0x80|getsc();
145: else
146: *wp++ = c;
147: } else if (ctype(c, C_ALPHA)) {
148: *wp++ = OSUBST;
149: do {
150: *wp++ = c;
151: c = getsc();
152: } while (ctype(c, C_ALPHA|C_DIGIT));
153: *wp++ = 0;
154: *wp++ = CSUBST;
155: ungetsc();
156: } else if (ctype(c, C_DIGIT|C_VAR1)) {
157: *wp++ = OSUBST;
158: *wp++ = c;
159: *wp++ = 0;
160: *wp++ = CSUBST;
161: } else {
162: *wp++ = CHAR, *wp++ = '$';
163: *wp++ = CHAR, *wp++ = c;
164: }
165: break;
166: case '`':
167: *++statep = state = SBQUOTE;
168: *wp++ = COMSUB;
169: break;
170: default:
171: *wp++ = CHAR, *wp++ = c;
172: }
173: break;
174:
175: case SSQUOTE:
176: if (c == '\'') {
177: state = *--statep;
178: *wp++ = CQUOTE;
179: } else
180: *wp++ = QCHAR, *wp++ = c;
181: break;
182:
183: case SDQUOTE:
184: if (c == '"') {
185: state = *--statep;
186: *wp++ = CQUOTE;
187: } else
188: goto Subst;
189: break;
190:
191: case SPAREN:
192: if (c == '(')
193: *++statep = state;
194: else if (c == ')')
195: state = *--statep;
196: if (state == SPAREN)
197: *wp++ = c;
198: else
199: *wp++ = 0; /* end of COMSUB */
200: break;
201:
202: case SBRACE:
203: if (c == '}') {
204: state = *--statep;
205: *wp++ = CSUBST;
206: } else
207: goto Sbase;
208: break;
209:
210: case SBQUOTE:
211: if (c == '`') {
212: *wp++ = 0;
213: state = *--statep;
214: } else /* todo: handle silly `\`` escapes */
215: /* todo: both \" and \` in "`...`" */
216: *wp++ = c;
217: break;
218:
219: case SWORD: /* ONEWORD */
220: goto Subst;
221: }
222: }
223: if (state != istate)
224: yyerror("no closing quote");
225:
226: if (c == '<' || c == '>') {
227: char *cp = Xstring(ws, wp);
228: if (wp > cp && cp[0] == CHAR && digit(cp[1])) {
229: wp = cp; /* throw away word */
230: iounit = cp[1] - '0';
231: } else
232: iounit = -1; /* default */
233: }
234:
235: if (wp == Xstring(ws, wp) && state == SBASE) {
236: Xfree(ws, sp); /* free word */
237: /* no word, process LEX1 character */
238: switch (c) {
239: default:
240: return c;
241:
242: case '|':
243: case '&':
244: case ';':
245: if (getsc() == c)
246: c = (c == ';') ? BREAK :
247: (c == '|') ? LOGOR :
248: (c == '&') ? LOGAND :
249: YYERRCODE;
250: else
251: ungetsc();
252: return c;
253:
254: case '>':
255: case '<':
256: c2 = getsc();
257: if (c2 == '>' || c2 == '<') {
258: if (c2 != c)
259: yyerror("syntax error");
260: yylval.i = c == '>'? IOWRITE|IOCAT: IOHERE;
261: c2 = getsc();
262: } else
263: yylval.i = c == '>'? IOWRITE: IOREAD;
264: if (c2 != '&' || yylval.i == IOHERE)
265: ungetsc();
266: else
267: yylval.i |= IODUP;
268: return REDIR;
269:
270: case '\n':
271: gethere();
272: if (cf & CONTIN)
273: goto Again;
274: return c;
275:
276: case '(':
277: c2 = getsc();
278: if (c2 == ')')
279: c = MPAREN;
280: else if (c2 == '(')
281: yyerror("(( not supported");
282: else
283: ungetsc();
284: case ')':
285: return c;
286: }
287: }
288:
289: *wp++ = EOS; /* terminate word */
290: yylval.cp = Xclose(ws, wp);
291: if (state == SWORD) /* ONEWORD? */
292: return LWORD;
293: ungetsc(); /* unget terminator */
294:
295: /* copy word to unprefixed string ident */
296: for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
297: *dp++ = *sp++;
298: *dp = 0;
299: #if 0
300: if (*ident == '~' || (dp = strchr(ident, '=')) != NULL && dp[1] == '~')
301: "Tilde expansion";
302: #endif
303: if (c != EOS)
304: *ident = 0; /* word is not unquoted */
305:
306: if (*ident != 0 && (cf&(KEYWORD|ALIAS))) {
307: register struct tbl *p;
308:
309: p = tsearch(&lexicals, ident, hash(ident));
310: if (p != NULL && (p->flag&ISSET))
311: if (p->type == CKEYWD && (cf&KEYWORD)) {
312: afree(yylval.cp, ATEMP);
313: return p->val.i;
314: } else
315: if (p->type == CALIAS && (cf&ALIAS)) {
316: register Source *s;
317:
318: /* check for recursive aliasing */
319: for (s = source; s->type == SALIAS; s = s->next)
320: if (s->u.tblp == p)
321: return LWORD;
322: afree(yylval.cp, ATEMP);
323:
324: /* push alias expansion */
325: s = pushs(SALIAS);
326: s->str = p->val.s;
327: s->u.tblp = p;
328: s->next = source;
329: source = s;
330: goto Again;
331: }
332: }
333:
334: return LWORD;
335: }
336:
337: static void readhere();
338:
339: gethere()
340: {
341: register struct ioword **p;
342:
343: for (p = heres; p < herep; p++)
344: readhere(*p);
345: herep = heres;
346: }
347:
348: /*
349: * read "<<word" text into temp file
350: * todo: set up E_ERR to fclose(f) on unwind
351: */
352:
353: static void
354: readhere(iop)
355: register struct ioword *iop;
356: {
357: register FILE *f;
358: struct temp *h;
359: register int c;
360: char *eof;
361: register char *cp;
362: int strip;
363: char line [LINE+1];
364:
365: eof = evalstr(iop->name, 0);
366: strip = *eof == '-';
367: eof += strip; /* skip '-' */
368:
369: h = maketemp(ATEMP);
370: h->next = e.temps; e.temps = h;
371: iop->name = h->name;
372: f = fopen(h->name, "w");
373: if (f == NULL)
374: errorf("Cannot create temporary file\n");
375: setvbuf(f, (char *)NULL, _IOFBF, BUFSIZ);
376:
377: for (;;) {
378: cp = line;
379: while ((c = getsc()) != '\n') {
380: if (c == 0)
381: errorf("here document `%s' unclosed\n", eof);
382: if (cp >= line+LINE)
383: break;
384: *cp++ = c;
385: }
386: ungetsc();
387: *cp = 0;
388: for (cp = line; strip && *cp == '\t'; cp++)
389: ;
390: if (strcmp(eof, cp) == 0 || c == 0)
391: break;
392: while ((c = *cp++) != '\0')
393: putc(c, f);
394: while ((c = getsc()) != '\n') {
395: if (c == 0)
396: errorf("here document `%s' unclosed\n", eof);
397: putc(c, f);
398: }
399: putc(c, f);
400: }
401: fclose(f);
402: }
403:
404: void
405: yyerror(msg)
406: Const char *msg;
407: {
408: yynerrs++;
409: while (source->type == SALIAS) /* pop aliases */
410: source = source->next;
411: if (source->file != NULL)
412: shellf("%s[%d]: ", source->file, source->line);
413: source->str = null; /* zap pending input */
414: errorf("%s\n", msg);
415: }
416:
417: /*
418: * input for yylex with alias expansion
419: */
420:
421: Source *
422: pushs(type)
423: int type;
424: {
425: register Source *s;
426:
427: s = (Source *) alloc(sizeof(Source), ATEMP);
428: s->type = type;
429: s->str = null; /* "" */
430: s->line = 0;
431: s->file = NULL;
432: s->echo = 0;
433: s->next = NULL;
434: return s;
435: }
436:
437: int
438: getsc_()
439: {
440: register Source *s = source;
441: register int c;
442:
443: while ((c = *s->str++) == 0) {
444: s->str = NULL; /* return 0 for EOF by default */
445: switch (s->type) {
446: case SEOF:
447: s->str = null;
448: return 0;
449:
450: case STTY:
451: if (histpush < 0) { /* commands pushed by dofc */
452: s->type = SHIST;
453: s->str = null;
454: continue;
455: }
456: #if COHERENT
457: mail();
458: #endif
459: s->line++;
460: s->str = line;
461: line[0] = '\0';
462: pprompt(prompt);
463: flushshf(1); flushshf(2);
464: #if EDIT
465: if (flag[FEMACS])
466: c = x_read(ttyfd, line, LINE);
467: else
468: #endif
469: c = read(ttyfd, line, LINE);
470: if (c < 0) /* read error */
471: c = 0;
472: if (c == 0) /* EOF */
473: s->str = NULL; /* was NULL */
474: prompt = strval(global("PS2"));
475: line[c] = '\0';
476: if (line[0] != '\n')
477: histsave(line);
478: else
479: s->line--;
480: break;
481:
482: case SHIST:
483: if (histpush == 0) {
484: s->type = STTY;
485: s->str = null;
486: continue;
487: }
488: s->line++;
489: s->str = histptr[++histpush];
490: pprompt("!< "); /* todo: PS9 */
491: shellf("%s\n", s->str);
492: strcpy(line, s->str);
493: s->str = strchr(line, 0);
494: *s->str++ = '\n';
495: *s->str = 0;
496: s->str = line;
497: break;
498:
499: case SFILE:
500: s->line++;
501: s->str = fgets(line, LINE, s->u.file);
502: if (s->str == NULL)
503: if (s->u.file != stdin)
504: fclose(s->u.file);
505: break;
506:
507: case SWSTR:
508: break;
509:
510: case SSTRING:
511: s->str = "\n";
512: s->type = SEOF;
513: break;
514:
515: case SWORDS:
516: s->str = *s->u.strv++;
517: s->type = SWORDSEP;
518: break;
519:
520: case SWORDSEP:
521: if (*s->u.strv == NULL) {
522: s->str = "\n";
523: s->type = SEOF;
524: } else {
525: s->str = " ";
526: s->type = SWORDS;
527: }
528: break;
529:
530: case SALIAS:
531: s->str = s->u.tblp->val.s;
532: if (s->str[0] != 0 && strchr(s->str, 0)[-1] == ' ')
533: alias = 1; /* trailing ' ' */
534: source = s = s->next; /* pop source stack */
535: continue;
536: }
537: if (s->str == NULL) {
538: s->type = SEOF;
539: s->str = null; /* "" */
540: return 0;
541: }
542: if (s->echo)
543: fputs(s->str, shlout);
544: }
545: return c;
546: }
547:
548: pprompt(cp)
549: register char *cp;
550: {
551: while (*cp != 0)
552: if (*cp != '!')
553: putc(*cp++, shlout);
554: else
555: if (*++cp == '!')
556: putc(*cp++, shlout);
557: else
558: shellf("%d", source->line);
559: fflush(shlout);
560: }
561:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.