|
|
1.1 root 1: (* Lexical analyzer generator for Standard ML.
2: Version 1.1, February 1989
3:
4: Copyright (c) 1989 by Andrew W. Appel, James S. Mattson, David R. Tarditi
5:
6: This software comes with ABSOLUTELY NO WARRANTY.
7: This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY
8: COPYRIGHT NOTICE, LICENSE AND DISCLAIMER, (in the file "COPYRIGHT",
9: distributed with this software). You may copy and distribute this software;
10: see the COPYRIGHT NOTICE for details and restrictions.
11:
12: Changes:
13: 7/25/89(drt): added %header declaration, code to place
14: user declarations at same level as makeLexer, etc.
15: This is needed for the parser generator.
16:
17: 10/89(awa): added %arg declaration (see lexgen.doc).
18: *)
19:
20: functor RedBlack(B : sig type key
21: val > : key*key->bool
22: end):
23: sig type tree
24: type key
25: val empty : tree
26: val insert : key * tree -> tree
27: val lookup : key * tree -> key
28: exception notfound of key
29: end =
30: struct
31: open B
32: datatype color = RED | BLACK
33: datatype tree = empty | tree of key * color * tree * tree
34: exception notfound of key
35:
36: fun insert (key,t) =
37: let fun f empty = tree(key,RED,empty,empty)
38: | f (tree(k,BLACK,l,r)) =
39: if key>k
40: then case f r
41: of r as tree(rk,RED, rl as tree(rlk,RED,rll,rlr),rr) =>
42: (case l
43: of tree(lk,RED,ll,lr) =>
44: tree(k,RED,tree(lk,BLACK,ll,lr),
45: tree(rk,BLACK,rl,rr))
46: | _ => tree(rlk,BLACK,tree(k,RED,l,rll),
47: tree(rk,RED,rlr,rr)))
48: | r as tree(rk,RED,rl, rr as tree(rrk,RED,rrl,rrr)) =>
49: (case l
50: of tree(lk,RED,ll,lr) =>
51: tree(k,RED,tree(lk,BLACK,ll,lr),
52: tree(rk,BLACK,rl,rr))
53: | _ => tree(rk,BLACK,tree(k,RED,l,rl),rr))
54: | r => tree(k,BLACK,l,r)
55: else if k>key
56: then case f l
57: of l as tree(lk,RED,ll, lr as tree(lrk,RED,lrl,lrr)) =>
58: (case r
59: of tree(rk,RED,rl,rr) =>
60: tree(k,RED,tree(lk,BLACK,ll,lr),
61: tree(rk,BLACK,rl,rr))
62: | _ => tree(lrk,BLACK,tree(lk,RED,ll,lrl),
63: tree(k,RED,lrr,r)))
64: | l as tree(lk,RED, ll as tree(llk,RED,lll,llr), lr) =>
65: (case r
66: of tree(rk,RED,rl,rr) =>
67: tree(k,RED,tree(lk,BLACK,ll,lr),
68: tree(rk,BLACK,rl,rr))
69: | _ => tree(lk,BLACK,ll,tree(k,RED,lr,r)))
70: | l => tree(k,BLACK,l,r)
71: else tree(key,BLACK,l,r)
72: | f (tree(k,RED,l,r)) =
73: if key>k then tree(k,RED,l, f r)
74: else if k>key then tree(k,RED, f l, r)
75: else tree(key,RED,l,r)
76: in case f t
77: of tree(k,RED, l as tree(_,RED,_,_), r) => tree(k,BLACK,l,r)
78: | tree(k,RED, l, r as tree(_,RED,_,_)) => tree(k,BLACK,l,r)
79: | t => t
80: end
81:
82:
83: fun lookup (key,t) =
84: let fun look empty = raise (notfound key)
85: | look (tree(k,_,l,r)) =
86: if k>key then look l
87: else if key>k then look r
88: else k
89: in look t
90: end
91:
92: end
93:
94: signature LEXGEN =
95: sig
96: val lexGen: string -> unit
97: end
98:
99: structure LexGen: LEXGEN =
100: struct
101:
102: datatype token = CHARS of bool array | QMARK | STAR | PLUS | BAR
103: | LP | RP | CARAT | DOLLAR | SLASH | STATE of string list
104: | REPS of int * int | ID of string | ACTION of string
105: | BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES |
106: COUNT | REJECT | FULLCHARSET | STRUCT | HEADER | ARG
107:
108: datatype exp = EPS | CLASS of bool array * int | CLOSURE of exp
109: | ALT of exp * exp | CAT of exp * exp | TRAIL of int
110: | END of int
111:
112: (* flags describing input Lex spec. - unnecessary code is omitted *)
113: (* if possible *)
114:
115: val CharFormat = ref false;
116: val UsesTrailingContext = ref false;
117: val UsesPrevNewLine = ref false;
118:
119: (* flags for various bells & whistles that Lex has. These slow the
120: lexer down and should be omitted from production lexers (if you
121: really want speed) *)
122:
123: val CountNewLines = ref false;
124: val HaveReject = ref false;
125:
126: (* Can increase size of character set *)
127:
128: val CharSetSize = ref 128;
129:
130: (* Can name structure or declare header code *)
131:
132: val StrName = ref "Mlex"
133: val HeaderCode = ref ""
134: val HeaderDecl = ref false
135: val ArgCode = ref (NONE: string option)
136: val StrDecl = ref false
137:
138: val ResetFlags = fn () => (CountNewLines := false; HaveReject := false;
139: CharSetSize := 128; StrName := "Mlex";
140: HeaderCode := ""; HeaderDecl:= false;
141: ArgCode := NONE;
142: StrDecl := false)
143:
144: val LexOut = ref(std_out);
145: val say = fn x => output (!LexOut) x
146:
147: (* Union: merge two sorted lists of integers *)
148:
149: fun union(a,b) = let val rec merge = fn
150: (nil,nil,z) => z
151: | (nil,el::more,z) => merge(nil,more,el::z)
152: | (el::more,nil,z) => merge(more,nil,el::z)
153: | (x::morex,y::morey,z) => if (x:int)=(y:int)
154: then merge(morex,morey,x::z)
155: else if x>y then merge(morex,y::morey,x::z)
156: else merge(x::morex,morey,y::z)
157: in merge(rev a,rev b,nil)
158: end
159:
160: (* Nullable: compute if a important expression parse tree node is nullable *)
161:
162: val rec nullable = fn
163: EPS => true
164: | CLASS(_) => false
165: | CLOSURE(_) => true
166: | ALT(n1,n2) => nullable(n1) orelse nullable(n2)
167: | CAT(n1,n2) => nullable(n1) andalso nullable(n2)
168: | TRAIL(_) => true
169: | END(_) => false
170:
171: (* FIRSTPOS: firstpos function for parse tree expressions *)
172:
173: and firstpos = fn
174: EPS => nil
175: | CLASS(_,i) => [i]
176: | CLOSURE(n) => firstpos(n)
177: | ALT(n1,n2) => union(firstpos(n1),firstpos(n2))
178: | CAT(n1,n2) => if nullable(n1) then union(firstpos(n1),firstpos(n2))
179: else firstpos(n1)
180: | TRAIL(i) => [i]
181: | END(i) => [i]
182:
183: (* LASTPOS: Lastpos function for parse tree expressions *)
184:
185: and lastpos = fn
186: EPS => nil
187: | CLASS(_,i) => [i]
188: | CLOSURE(n) => lastpos(n)
189: | ALT(n1,n2) => union(lastpos(n1),lastpos(n2))
190: | CAT(n1,n2) => if nullable(n2) then union(lastpos(n1),lastpos(n2))
191: else lastpos(n2)
192: | TRAIL(i) => [i]
193: | END(i) => [i]
194: ;
195:
196: (* ++: Increment an integer reference *)
197:
198: fun ++(x) : int = (x := !x + 1; !x);
199:
200: structure dict =
201: struct
202: type 'a relation = 'a * 'a -> bool
203: abstype ('b,'a) dictionary = DATA of { Table : ('b * 'a) list,
204: Leq : 'b * 'b -> bool }
205: with
206: exception LOOKUP
207: fun create Leqfunc = DATA { Table = nil, Leq = Leqfunc }
208: fun lookup (DATA { Table = entrylist, Leq = leq }) key =
209: let fun search [] = raise LOOKUP
210: | search((k,item)::entries) =
211: if leq(key,k)
212: then if leq(k,key) then item else raise LOOKUP
213: else search entries
214: in search entrylist
215: end
216: fun enter (DATA { Table = entrylist, Leq = leq })
217: (newentry as (key : 'b,item :'a)) : ('b,'a) dictionary =
218: let val gt = fn a => fn b => not (leq(a,b))
219: val eq = fn k => fn k' => (leq(k,k')) andalso (leq(k',k))
220: fun update nil = [ newentry ]
221: | update ((entry as (k,_))::entries) =
222: if (eq key k) then newentry::entries
223: else if gt k key then newentry::(entry::entries)
224: else entry::(update entries)
225: in DATA { Table = update entrylist, Leq = leq }
226: end
227: fun listofdict (DATA { Table = entrylist,Leq = leq}) =
228: let fun f (nil,r) = rev r
229: | f (a::b,r) = f (b,a::r)
230: in f(entrylist,nil)
231: end
232: end
233: end
234:
235: open dict;
236:
237: (* INPUT.ML : Input w/ one character push back capability *)
238:
239: val LineNum = ref 1;
240:
241: abstype ibuf =
242: BUF of instream * {b : string ref, p : int ref}
243: with
244: fun make_ibuf(s) = BUF (s, {b=ref"", p = ref 0})
245: fun close_ibuf (BUF (s,_)) = close_in(s)
246: exception eof
247: fun getch (a as (BUF(s,{b,p}))) =
248: if (!p = (size (!b)))
249: then (b := input s (max(1,min(1024,can_input s)));
250: p := 0;
251: if (size (!b))=0
252: then raise eof
253: else getch a)
254: else (let val ch = substring(!b,!p,1)
255: in (if ch = "\n"
256: then LineNum := !LineNum + 1
257: else ();
258: p := !p + 1;
259: ch)
260: end)
261: fun ungetch(BUF(s,{b,p})) = (
262: if substring(!b,!p,1) = "\n"
263: then LineNum := !LineNum - 1
264: else ();
265: p := !p - 1)
266: end;
267:
268: exception error
269:
270: val pr_err = fn x => (output std_out ("mlex: syntax error in line "^
271: (makestring (!LineNum))^
272: ": "^x^"\n"); raise error)
273:
274: exception syntax_error; (* error in user's input file *)
275:
276: exception lex_error; (* unexpected error in lexer *)
277:
278: val LexBuf = ref(make_ibuf(std_in));
279: val LexState = ref 0;
280: val NextTok = ref BOF;
281: val inquote = ref false;
282:
283: fun AdvanceTok () : unit =
284:
285: let fun isletter(x:string) = x>="a" andalso x<="z" orelse x>="A" andalso x<="Z";
286: fun isdigit(x:string) = x>="0" andalso x<="9";
287: (* check for valid (non-leading) identifier character (added by JHR) *)
288: fun isidentchr c = (
289: (isletter c) orelse (isdigit c) orelse (c = "_") orelse (c = "'"))
290: fun atoi(s:string) : int =
291: let val rec num = fn
292: (x::y,n) => if isdigit(x) then num(y,10*n+ord(x)-(ord "0")) else n
293: | (_,n) => n
294: in num(explode(s),0)
295: end;
296:
297: val rec skipws = fn () => case nextch() of
298: " " => skipws()
299: | "\t" => skipws()
300: | "\n" => skipws()
301: | x => x
302:
303: and nextch = fn () => getch(!LexBuf)
304:
305: and escaped = fn () => case nextch() of
306: "b" => "\008"
307: | "n" => "\n"
308: | "t" => "\t"
309: | x =>
310: let fun f(n,c,t) =
311: if c=3 then
312: if n>= (!CharSetSize) then
313: pr_err("illegal ascii escape '"^t^"'")
314: else chr n
315: else let val ch=nextch()
316: in if isdigit ch then
317: f(n*10+(ord ch)-(ord "0"),c+1,t^ch)
318: else pr_err("illegal ascii escape '"^t^"'")
319: end
320: in if isdigit x then
321: f((ord x)-ord("0"),1,x)
322: else x
323: end
324:
325: and onechar = fn x => let val c = array(!CharSetSize,false) in
326: update(c,ord(x),true); CHARS(c)
327: end
328:
329: in case !LexState of 0 => let val makeTok = fn () =>
330: case skipws() of
331: (* Lex % operators *)
332: "%" => (case nextch() of
333: "%" => LEXMARK
334: | a => let fun f s =
335: let val a = nextch()
336: in if isletter a then f(a::s)
337: else (ungetch(!LexBuf);
338: implode(rev s))
339: end
340: val command = f [a]
341: in if command = "reject" then REJECT
342: else if command = "count" then COUNT
343: else if command = "full" then FULLCHARSET
344: else if command = "s" then LEXSTATES
345: else if command = "S" then LEXSTATES
346: else if command = "structure" then STRUCT
347: else if command = "header" then HEADER
348: else if command = "arg" then ARG
349: else pr_err "unknown % operator "
350: end
351: )
352: (* semicolon (for end of LEXSTATES) *)
353: | ";" => SEMI
354: (* anything else *)
355: | ch => if isletter(ch) then
356: let fun getID matched =
357: let val x = nextch()
358: (**** fix by JHR
359: in if isletter(x) orelse isdigit(x) orelse
360: x = "_" orelse x = "'"
361: ****)
362: in if (isidentchr x)
363: then getID (x::matched)
364: else (ungetch(!LexBuf); implode(rev matched))
365: end
366: in ID(getID [ch])
367: end
368: else (pr_err ("bad character: " ^ ch))
369: in NextTok := makeTok()
370: end
371: | 1 => let val rec makeTok = fn () =>
372: if !inquote then case nextch() of
373: (* inside quoted string *)
374: "\\" => onechar(escaped())
375: | "\"" => (inquote := false; makeTok())
376: | x => onechar(x)
377: else case skipws() of
378: (* single character operators *)
379: "?" => QMARK
380: | "*" => STAR
381: | "+" => PLUS
382: | "|" => BAR
383: | "(" => LP
384: | ")" => RP
385: | "^" => CARAT
386: | "$" => DOLLAR
387: | "/" => SLASH
388: | ";" => SEMI
389: | "." => let val c = array(!CharSetSize,true) in
390: update(c,10,false); CHARS(c)
391: end
392: (* assign and arrow *)
393: | "=" => let val c = nextch() in
394: if c=">" then ARROW else (ungetch(!LexBuf); ASSIGN)
395: end
396: (* character set *)
397: | "[" => let val rec classch = fn () => let val x = skipws()
398: in if x="\\" then escaped() else x
399: end;
400: val first = classch();
401: val flag = (first<>"^");
402: val c = array(!CharSetSize,not flag);
403: val rec add = fn x => if x="" then ()
404: else update(c,ord(x),flag)
405: and range = fn (x,y) =>
406: if x>y then (pr_err "bad char. range")
407: else let val i = ref(ord(x)) and j = ord(y)
408: in while !i<=j do (add(chr(!i)); i := !i + 1)
409: end
410: and getClass = fn (last) => case classch() of
411: "]" => (add(last); c)
412: | "-" => if last<>"" then
413: let val x = classch() in
414: if x="]" then (add(last);add("-"); c)
415: else (range(last,x);getClass(""))
416: end
417: else getClass("-")
418: | x => (add(last); getClass(x))
419: in CHARS(getClass(if first="^" then "" else first))
420: end
421: (* Start States specification *)
422: | "<" => let val rec get_state = fn (prev,matched) =>
423: case nextch() of
424: ">" => matched::prev
425: | "," => get_state(matched::prev,"")
426: | x => if isletter(x) then get_state(prev,matched^x)
427: else (pr_err "bad start state list")
428: in STATE(get_state(nil,""))
429: end
430: (* {id} or repititions *)
431: | "{" => let val ch = nextch() in if isletter(ch) then
432: let val rec getID = fn (matched) =>
433: case nextch() of
434: "}" => matched
435: (**** fix by JHR
436: | x => if isletter(x) orelse isdigit(x) then
437: ****)
438: | x => if (isidentchr x) then
439: getID(matched^x)
440: else (pr_err "invalid char. class name")
441: in ID(getID(ch))
442: end
443: else if isdigit(ch) then
444: let val rec get_r = fn
445: (matched,r1) => case nextch() of
446: "}" => let val n = atoi(matched) in
447: if r1 = ~1 then (n,n) else (r1,n)
448: end
449: | "," => if r1 = ~1 then get_r("",atoi(matched))
450: else (pr_err "invalid repetitions spec.")
451: | x => if isdigit(x) then get_r(matched^x,r1)
452: else (pr_err "invalid char in repetitions spec")
453: in REPS(get_r(ch,~1))
454: end
455: else (pr_err "bad repetitions spec")
456: end
457: (* Lex % operators *)
458: | "%" => if nextch() = "%" then LEXMARK else
459: (ungetch(!LexBuf); onechar ("%"))
460: (* backslash escape *)
461: | "\\" => onechar(escaped())
462: (* start quoted string *)
463: | "\"" => (inquote := true; makeTok())
464: (* anything else *)
465: | ch => onechar(ch)
466: in NextTok := makeTok()
467: end
468: | 2 => NextTok :=
469: (case skipws()
470: of "(" => let fun GetAct (lpct,x) =
471: case getch(!LexBuf)
472: of "(" => GetAct (lpct+1,"("::x)
473: | ")" => if lpct = 0 then (implode (rev x))
474: else GetAct(lpct-1,")"::x)
475: | y => GetAct(lpct,y::x)
476: in ACTION (GetAct (0,nil))
477: end
478: | ";" => SEMI
479: | c => (pr_err ("invalid character "^c)))
480: | _ => raise lex_error
481: end
482: handle eof => NextTok := EOF ;
483:
484: fun GetTok (_:unit) : token =
485: let val t = !NextTok in AdvanceTok(); t
486: end;
487: val SymTab = ref (create String.<=) : (string,exp) dictionary ref
488:
489: fun GetExp () : exp =
490:
491: let val rec optional = fn e => ALT(EPS,e)
492:
493: and newline = fn () => let val c = array(!CharSetSize,false) in
494: update(c,10,true); c
495: end
496:
497: and endline = fn e => trail(e,CLASS(newline(),0))
498:
499: and trail = fn (e1,e2) => CAT(CAT(e1,TRAIL(0)),e2)
500:
501: and closure1 = fn e => CAT(e,CLOSURE(e))
502:
503: and repeat = fn (min,max,e) => let val rec rep = fn
504: (0,0) => EPS
505: | (0,1) => ALT(e,EPS)
506: | (0,i) => CAT(rep(0,1),rep(0,i-1))
507: | (i,j) => CAT(e,rep(i-1,j-1))
508: in rep(min,max)
509: end
510:
511: and exp0 = fn () => case GetTok() of
512: CHARS(c) => exp1(CLASS(c,0))
513: | LP => let val e = exp0() in
514: if !NextTok = RP then
515: (AdvanceTok(); exp1(e))
516: else (pr_err "missing '('") end
517: | ID(name) => exp1(lookup(!SymTab)(name))
518: | _ => raise syntax_error
519:
520: and exp1 = fn (e) => case !NextTok of
521: SEMI => e
522: | ARROW => e
523: | EOF => e
524: | LP => exp2(e,exp0())
525: | RP => e
526: | t => (AdvanceTok(); case t of
527: QMARK => exp1(optional(e))
528: | STAR => exp1(CLOSURE(e))
529: | PLUS => exp1(closure1(e))
530: | CHARS(c) => exp2(e,CLASS(c,0))
531: | BAR => ALT(e,exp0())
532: | DOLLAR => endline(e)
533: | SLASH => trail(e,exp0())
534: | REPS(i,j) => exp1(repeat(i,j,e))
535: | ID(name) => exp2(e,lookup(!SymTab)(name))
536: | _ => raise syntax_error)
537:
538: and exp2 = fn (e1,e2) => case !NextTok of
539: SEMI => CAT(e1,e2)
540: | ARROW => CAT(e1,e2)
541: | EOF => CAT(e1,e2)
542: | LP => exp2(CAT(e1,e2),exp0())
543: | RP => CAT(e1,e2)
544: | t => (AdvanceTok(); case t of
545: QMARK => exp1(CAT(e1,optional(e2)))
546: | STAR => exp1(CAT(e1,CLOSURE(e2)))
547: | PLUS => exp1(CAT(e1,closure1(e2)))
548: | CHARS(c) => exp2(CAT(e1,e2),CLASS(c,0))
549: | BAR => ALT(CAT(e1,e2),exp0())
550: | DOLLAR => endline(CAT(e1,e2))
551: | SLASH => trail(CAT(e1,e2),exp0())
552: | REPS(i,j) => exp1(CAT(e1,repeat(i,j,e2)))
553: | ID(name) => exp2(CAT(e1,e2),lookup(!SymTab)(name))
554: | _ => raise syntax_error)
555: in exp0()
556: end;
557: val StateTab = ref(create(String.<=)) : (string,int) dictionary ref
558:
559: val StateNum = ref 0;
560:
561: fun GetStates () : int list =
562:
563: let fun add nil sl = sl
564: | add (x::y) sl = add y (union ([lookup (!StateTab)(x)],sl))
565:
566: fun addall i sl =
567: if i <= !StateNum then addall (i+2) (union ([i],sl))
568: else sl
569:
570: fun incall (x::y) = (x+1)::incall y
571: | incall nil = nil
572:
573: fun addincs nil = nil
574: | addincs (x::y) = x::(x+1)::addincs y
575:
576: val state_list =
577: case !NextTok of
578: STATE s => (AdvanceTok(); LexState := 1; add s nil)
579: | _ => addall 1 nil
580:
581: in case !NextTok
582: of CARAT => (LexState := 1; AdvanceTok(); UsesPrevNewLine := true;
583: incall state_list)
584: | _ => addincs state_list
585: end
586:
587: val LeafNum = ref ~1;
588:
589: fun renum(e : exp) : exp =
590: let val rec label = fn
591: EPS => EPS
592: | CLASS(x,_) => CLASS(x,++LeafNum)
593: | CLOSURE(e) => CLOSURE(label(e))
594: | ALT(e1,e2) => ALT(label(e1),label(e2))
595: | CAT(e1,e2) => CAT(label(e1),label(e2))
596: | TRAIL(i) => TRAIL(++LeafNum)
597: | END(i) => END(++LeafNum)
598: in label(e)
599: end;
600:
601: exception parse_error;
602:
603: fun parse() : (string * (int list * exp) list * ((string,string) dictionary)) =
604: let val Accept = ref (create String.<=) : (string,string) dictionary ref
605: val rec ParseRtns = fn l => case getch(!LexBuf) of
606: "%" => let val c = getch(!LexBuf) in
607: if c="%" then (implode (rev l))
608: else ParseRtns(c::"%"::l)
609: end
610: | c => ParseRtns(c::l)
611: and ParseDefs = fn () =>
612: (LexState:=0; AdvanceTok(); case !NextTok of
613: LEXMARK => ()
614: | LEXSTATES =>
615: let fun f () = (case !NextTok of (ID i) =>
616: (StateTab := enter(!StateTab)(i,++StateNum);
617: ++StateNum; AdvanceTok(); f())
618: | _ => ())
619: in AdvanceTok(); f ();
620: if !NextTok=SEMI then ParseDefs() else
621: (pr_err "expected ';'")
622: end
623: | ID x => (LexState:=1; AdvanceTok(); if GetTok() = ASSIGN
624: then (SymTab := enter(!SymTab)(x,GetExp());
625: if !NextTok = SEMI then ParseDefs()
626: else (pr_err "expected ';'"))
627: else raise syntax_error)
628: | REJECT => (HaveReject := true; ParseDefs())
629: | COUNT => (CountNewLines := true; ParseDefs())
630: | FULLCHARSET => (CharSetSize := 256; ParseDefs())
631: | HEADER => (LexState := 2; AdvanceTok();
632: case GetTok()
633: of ACTION s =>
634: if (!StrDecl) then
635: (pr_err "cannot have both %s and %header \
636: \declarations")
637: else if (!HeaderDecl) then
638: (pr_err "duplicate %header declarations")
639: else
640: (HeaderCode := s; LexState := 0;
641: HeaderDecl := true; ParseDefs())
642: | _ => raise syntax_error)
643: | ARG => (LexState := 2; AdvanceTok();
644: case GetTok()
645: of ACTION s =>
646: (case !ArgCode
647: of SOME _ => pr_err "duplicate %arg declarations"
648: | NONE => ArgCode := SOME s;
649: LexState := 0;
650: ParseDefs())
651: | _ => raise syntax_error)
652: | STRUCT => (AdvanceTok();
653: case !NextTok of
654: (ID i) =>
655: if (!HeaderDecl) then
656: (pr_err "cannot have both %s and %header \
657: \declarations")
658: else if (!StrDecl) then
659: (pr_err "duplicate %s declarations")
660: else StrName := i
661: | _ => (pr_err "expected ID");
662: ParseDefs())
663: | _ => raise syntax_error)
664: and ParseRules =
665: fn rules => (LexState:=1; AdvanceTok(); case !NextTok of
666: LEXMARK => rules
667: | EOF => rules
668: | _ =>
669: let val s = GetStates()
670: val e = renum(CAT(GetExp(),END(0)))
671: in
672: if !NextTok = ARROW then
673: (LexState:=2; AdvanceTok();
674: case GetTok() of ACTION(act) =>
675: if !NextTok=SEMI then
676: (Accept:=enter(!Accept) (makestring (!LeafNum),act);
677: ParseRules((s,e)::rules))
678: else (pr_err "expected ';'")
679: | _ => raise syntax_error)
680: else (pr_err "expected '=>'")
681: end)
682: in let val usercode = ParseRtns nil
683: in (ParseDefs(); (usercode,ParseRules(nil),!Accept))
684: end
685: end handle syntax_error => (pr_err "")
686:
687: fun makebegin () : unit =
688: let fun make nil = ()
689: | make ((x,n:int)::y)=(say "val "; say x; say " = " ;
690: say "STARTSTATE ";
691: say (makestring n); say ";\n"; make y)
692: in say "\n(* start state definitions *)\n\n"; make(listofdict(!StateTab))
693: end
694:
695: structure L =
696: struct
697: nonfix >
698: type key = int list * string
699: fun > ((key,item:string),(key',item')) =
700: let fun f ((a:int)::a') (b::b') = if Integer.> (a,b) then true
701: else if a=b then f a' b'
702: else false
703: | f _ _ = false
704: in f key key'
705: end
706: end
707:
708: structure RB = RedBlack(L)
709:
710: fun maketable (fins:(int * (int list)) list,
711: tcs :(int * (int list)) list,
712: tcpairs: (int * int) list,
713: trans : (int*(int list)) list) : unit =
714:
715: (* Fins = (state #, list of final leaves for the state) list
716: tcs = (state #, list of trailing context leaves which begin in this state)
717: list
718: tcpairs = (trailing context leaf, end leaf) list
719: trans = (state #,list of transitions for state) list *)
720:
721: let datatype elem = N of int | T of int | D of int
722: val count = ref 0
723: val _ = (if length(trans)<256 then CharFormat := true
724: else CharFormat := false;
725: if length(tcpairs)> 0 then
726: (UsesTrailingContext := true;
727: say "\ndatatype yyfinstate = N of int | \
728: \ T of int | D of int\n")
729: else (UsesTrailingContext := false;
730: say "\ndatatype yyfinstate = N of int");
731: say "\ntype statedata = {fin : yyfinstate list, trans: ";
732: case !CharFormat of
733: true => say "string}"
734: | false => say "int array}";
735: say "\n(* transition & final state table *)\nval tab = let\n")
736: val newfins =
737: let fun IsEndLeaf t =
738: let fun f ((l,e)::r) = if (e=t) then true else f r
739: | f nil = false in f tcpairs end
740:
741: fun GetEndLeaf t =
742: let fun f ((tl,el)::r) = if (tl=t) then el else f r
743: in f tcpairs
744: end
745: fun GetTrConLeaves s =
746: let fun f ((s',l)::r) = if (s = s') then l else f r
747: | f nil = nil
748: in f tcs
749: end
750: fun sort_leaves s =
751: let fun insert (x:int) (a::b) =
752: if (x <= a) then x::(a::b)
753: else a::(insert x b)
754: | insert x nil = [x]
755: in fold (fn (x,r) => insert x r) s nil
756: end
757: fun conv a = if (IsEndLeaf a) then (D a) else (N a)
758: fun merge (a::a',b::b') =
759: if (a <= b) then (conv a)::merge(a',b::b')
760: else (T b)::(merge(a::a',b'))
761: | merge (a::a',nil) = (conv a)::(merge (a',nil))
762: | merge (nil,b::b') = (T b)::(merge (b',nil))
763: | merge (nil,nil) = nil
764:
765: in map (fn (x,l) =>
766: rev (merge (l,
767: sort_leaves (map (fn x => GetEndLeaf x) (GetTrConLeaves x)))))
768: fins
769: end
770:
771: val rs =
772: let open RB
773: fun makeItems x =
774: let fun MakeList(nil,i) = ()
775: | MakeList([(x:int)],i) = say (makestring x)
776: | MakeList(x::tl,16) =
777: (say "\n"; say (makestring x); say ","; MakeList(tl,1))
778: | MakeList(x::tl,i) =
779: (say (makestring x); say ","; MakeList(tl,i+1))
780: fun MakeString(nil,i) = ()
781: | MakeString(((x:int)::tl),i) =
782: let val x = (makestring x)
783: val x' = (case size x of
784: 1 => "00" ^ x | 2 => "0" ^ x | 3 => x)
785: in if i=16
786: then (say "\\\n\\\\"; say x'; MakeString(tl,1))
787: else (say "\\"; say x'; MakeString(tl,i+1))
788: end
789: in case !CharFormat of
790: true => (say " =\n\""; MakeString(x,0); say "\"\n")
791: | false => (say " = arrayoflist\n["; MakeList(x,0); say "]\n")
792: end
793: fun makeEntry(nil,rs,t) = rev rs
794: | makeEntry(((l:int,x)::y),rs,t) =
795: let val name = "s" ^ (makestring l)
796: in let val (r,n) = lookup ((x,name),t)
797: in makeEntry(y,(n::rs),t)
798: end handle notfound _ => (count := !count+1;
799: say "val "; say name; makeItems x;
800: makeEntry(y,(name::rs),(insert ((x,name),t))))
801: end
802: in (makeEntry(trans,nil,empty))
803: end
804:
805: fun makeTable(nil,nil) = ()
806: | makeTable(a::a',b::b') =
807: let fun makeItems nil = ()
808: | makeItems (hd::tl) =
809: let val (t,n) =
810: case hd of
811: (N i) => ("(N ",i)
812: | (T i) => ("(T ",i)
813: | (D i) => ("(D ",i)
814: in (say t; say (makestring n); say ")";
815: if null tl
816: then ()
817: else (say ","; makeItems tl))
818: end
819: in (say "{fin = ["; makeItems b;
820: say "], trans = "; say a; say "}";
821: if null a'
822: then ()
823: else (say ",\n"; makeTable(a',b')))
824: end
825:
826: fun msg x = output std_out x
827:
828: in (say "in arrayoflist\n["; makeTable(rs,newfins); say "]\nend\n";
829: msg ("\nNumber of states = " ^ (makestring (length trans)));
830: msg ("\nNumber of distinct rows = " ^ (makestring (!count)));
831: msg ("\nApprox. memory size of trans. table = " ^
832: (makestring (!count*(!CharSetSize)*(if !CharFormat then 1 else 8))));
833: msg " bytes\n")
834: end
835:
836: (* makeaccept: Takes a (string,string) dictionary, prints case statement for
837: accepting leaf actions. The key strings are the leaf #'s, the data strings
838: are the actions *)
839:
840: fun makeaccept ends =
841: let fun startline f = if f then say " " else say "| "
842: fun make(nil,f) = (startline f; say "_ => raise Internal.LexerError\n")
843: | make((x,a)::y,f) = (startline f; say x; say " => (";
844: say a; say ")\n"; make(y,false))
845: in make (listofdict(ends),true)
846: end
847:
848: fun leafdata(e:(int list * exp) list) =
849: let val fp = array(!LeafNum + 1,nil)
850: and leaf = array(!LeafNum + 1,EPS)
851: and tcpairs = ref nil
852: and trailmark = ref ~1;
853: val rec add = fn
854: (nil,x) => ()
855: | (hd::tl,x) => (update(fp,hd,union(fp sub hd,x));
856: add(tl,x))
857: and moredata = fn
858: CLOSURE(e1) =>
859: (moredata(e1); add(lastpos(e1),firstpos(e1)))
860: | ALT(e1,e2) => (moredata(e1); moredata(e2))
861: | CAT(e1,e2) => (moredata(e1); moredata(e2);
862: add(lastpos(e1),firstpos(e2)))
863: | CLASS(x,i) => update(leaf,i,CLASS(x,i))
864: | TRAIL(i) => (update(leaf,i,TRAIL(i)); if !trailmark = ~1
865: then trailmark := i else ())
866: | END(i) => (update(leaf,i,END(i)); if !trailmark <> ~1
867: then (tcpairs := (!trailmark,i)::(!tcpairs);
868: trailmark := ~1) else ())
869: | _ => ()
870: and makedata = fn
871: nil => ()
872: | (_,x)::tl => (moredata(x);makedata(tl))
873: in trailmark := ~1; makedata(e); (fp,leaf,!tcpairs)
874: end;
875:
876: fun makedfa(rules) =
877: let val StateTab = ref (create(String.<=)) : (string,int) dictionary ref
878: val fintab = ref (create(Integer.<=)) : (int,(int list)) dictionary ref
879: val transtab = ref (create(Integer.<=)) : (int,int list) dictionary ref
880: val tctab = ref (create(Integer.<=)) : (int,(int list)) dictionary ref
881: val (fp, leaf, tcpairs) = leafdata(rules);
882:
883: fun visit (state,statenum) =
884: let val transitions = gettrans(state) in
885: fintab := enter(!fintab)(statenum,getfin(state));
886: tctab := enter(!tctab)(statenum,gettc(state));
887: transtab := enter(!transtab)(statenum,transitions)
888: end
889:
890: and visitstarts (states) =
891: let fun vs nil i = ()
892: | vs (hd::tl) i = (visit (hd,i); vs tl (i+1))
893: in vs states 0
894: end
895:
896: and hashstate(s: int list) =
897: let val rec hs =
898: fn (nil,z) => z
899: | ((x:int)::y,z) => hs(y,z ^ " " ^ (makestring x))
900: in hs(s,"")
901: end
902:
903: and find(s) = lookup(!StateTab)(hashstate(s))
904:
905: and add(s,n) = StateTab := enter(!StateTab)(hashstate(s),n)
906:
907: and getstate (state) =
908: find(state)
909: handle LOOKUP => let val n = ++StateNum in
910: add(state,n); visit(state,n); n
911: end
912:
913: and getfin state =
914: let fun f nil fins = fins
915: | f (hd::tl) fins =
916: case (leaf sub hd)
917: of END _ => f tl (hd::fins)
918: | _ => f tl fins
919: in f state nil
920: end
921:
922: and gettc state =
923: let fun f nil fins = fins
924: | f (hd::tl) fins =
925: case (leaf sub hd)
926: of TRAIL _ => f tl (hd::fins)
927: | _ => f tl fins
928: in f state nil
929: end
930:
931: and gettrans (state) =
932: let fun loop c tlist =
933: let fun cktrans nil r = r
934: | cktrans (hd::tl) r =
935: case (leaf sub hd) of
936: CLASS(i,_)=>
937: (if (i sub c) then cktrans tl (union(r,fp sub hd))
938: else cktrans tl r handle Subscript =>
939: cktrans tl r
940: )
941: | _ => cktrans tl r
942: in if c >= 0 then
943: let val v=cktrans state nil
944: in loop (c-1) (if v=nil then 0::tlist else (getstate v)::tlist)
945: end
946: else tlist
947: end
948: in loop ((!CharSetSize) - 1) nil
949: end
950:
951: and startstates() =
952: let val startarray = array(!StateNum + 1, nil);
953: fun listofarray(a,n) =
954: let fun f i l = if i >= 0 then f (i-1) ((a sub i)::l) else l
955: in f (n-1) nil end
956: val rec makess = fn
957: nil => ()
958: | (startlist,e)::tl => (fix(startlist,firstpos(e));makess(tl))
959: and fix = fn
960: (nil,_) => ()
961: | (s::tl,firsts) => (update(startarray,s,
962: union(firsts,startarray sub s));
963: fix(tl,firsts))
964: in makess(rules);listofarray(startarray, !StateNum + 1)
965: end
966:
967: in visitstarts(startstates());
968: (listofdict(!fintab),listofdict(!transtab),listofdict(!tctab),tcpairs)
969: end
970:
971: val skel_hd =
972: " struct\n\
973: \ structure UserDeclarations =\n\
974: \ struct\n\
975: \"
976:
977:
978: val skel_mid2 =
979: " | Internal.D i =>\n\
980: \ let val newrs =\n\
981: \ if (List.exists (fn x => i=x) rs) then rs\n\
982: \ else i::rs\n\
983: \ in action (i,(acts::l),newrs)\n\
984: \ end\n\
985: \ | Internal.T k =>\n\
986: \ let fun f (a::b,r) =\n\
987: \ if a=k\n\
988: \ then action(i,(((Internal.N a)::acts)::l),(b@r))\n\
989: \ else f (b,a::r)\n\
990: \ | f (nil,r) = action(i,(acts::l),rs)\n\
991: \ in f (rs,nil)\n\
992: \ end\n\
993: \"
994:
995: fun lexGen(infile) =
996: let val outfile = infile ^ ".sml"
997: fun PrintLexer (ends) =
998: let val sayln = fn x => (say x; say "\n")
999: in case !ArgCode
1000: of NONE => (sayln "fun lex () : Internal.result =";
1001: sayln "let fun continue() = lex() in")
1002: | SOME s => (say "fun lex "; say "(yyarg as ("; say s; sayln ")) =";
1003: sayln "let fun continue() : Internal.result = ");
1004: say " let fun scan (s,AcceptingLeaves : Internal.yyfinstate";
1005: sayln " list list,l,i0) =";
1006: if !UsesTrailingContext
1007: then say "\tlet fun action (i,nil,rs)"
1008: else say "\tlet fun action (i,nil)";
1009: sayln " = raise LexError";
1010: if !UsesTrailingContext
1011: then sayln "\t| action (i,nil::l,rs) = action(i-1,l,rs)"
1012: else sayln "\t| action (i,nil::l) = action (i-1,l)";
1013: if !UsesTrailingContext
1014: then sayln "\t| action (i,(node::acts)::l,rs) ="
1015: else sayln "\t| action (i,(node::acts)::l) =";
1016: sayln "\t\tcase node of";
1017: sayln "\t\t Internal.N yyk => ";
1018: sayln "\t\t\t(let val yytext = substring(!yyb,i0,i-i0)";
1019: if !CountNewLines
1020: then (sayln "\t\t\tval _ = yylineno :=";
1021: sayln "(fold (fn (x,r) => if x =\"\\n\" then r+1 else r)";
1022: sayln "(explode yytext) (!yylineno))")
1023: else ();
1024: sayln "\t\t\topen UserDeclarations Internal.StartStates";
1025: sayln " in (yypos := i; case yyk of ";
1026: sayln "";
1027: sayln "\t\t\t(* Application actions *)\n";
1028: makeaccept(ends);
1029: say "\n\t\t) end ";
1030: if !HaveReject
1031: then (say "handle Reject => action(i,acts::l";
1032: if !UsesTrailingContext
1033: then say ",rs)"
1034: else say ")")
1035: else ();
1036: say ")\n\n";
1037: if (!UsesTrailingContext) then say skel_mid2 else ();
1038: sayln "\tval {fin,trans} = Internal.tab sub s";
1039: sayln "\tval NewAcceptingLeaves = fin::AcceptingLeaves";
1040: sayln "\tin if l = !yybl then";
1041: sayln "\t let val newchars= if !yydone then \"\" else yyinput 1024";
1042: sayln "\t in if (size newchars)=0";
1043: sayln "\t\t then (yydone := true;";
1044: say "\t\t if (l=i0) then UserDeclarations.eof ";
1045: sayln (case !ArgCode of NONE => "()" | SOME _ => "yyarg");
1046: say "\t\t else action(l,NewAcceptingLeaves";
1047: if !UsesTrailingContext then
1048: sayln ",nil))" else sayln "))";
1049: sayln "\t\t else (if i0=l then yyb := newchars";
1050: sayln "\t\t else yyb := substring(!yyb,i0,l-i0)^newchars;";
1051: sayln "\t\t yybl := size (!yyb);";
1052: sayln "\t\t scan (s,AcceptingLeaves,l-i0,0))";
1053: sayln "\t end";
1054: sayln "\t else let val NewChar = ordof(!yyb,l)";
1055: (say "\t\tval NewState = ";
1056: case (!CharFormat)
1057: of true => sayln "ordof(trans,NewChar)"
1058: | false => sayln "(trans sub NewChar)");
1059: say "\t\tin if NewState=0 then action(l,NewAcceptingLeaves";
1060: if !UsesTrailingContext then sayln ",nil)" else sayln ")";
1061: sayln "\t\telse scan(NewState,NewAcceptingLeaves,l+1,i0)";
1062: sayln "\tend";
1063: sayln "\tend";
1064: if !UsesPrevNewLine then () else sayln "(*";
1065: sayln "\tval start= if substring(!yyb,!yypos-1,1)=\"\\n\"";
1066: sayln "then !yybegin+1 else !yybegin";
1067: if !UsesPrevNewLine then () else sayln "*)";
1068: say "\tin scan(";
1069: if !UsesPrevNewLine then say "start"
1070: else say "!yybegin (* start *)";
1071: sayln ",nil,!yypos,!yypos)";
1072: sayln " end";
1073: sayln (case !ArgCode of NONE => "end" | SOME _ => "in continue end");
1074: sayln " in lex";
1075: sayln " end";
1076: sayln "end"
1077: end
1078:
1079: in (UsesPrevNewLine := false;
1080: ResetFlags();
1081: LexBuf := make_ibuf(open_in infile);
1082: LexOut := open_out(outfile);
1083: StateNum := 2;
1084: LineNum := 1;
1085: StateTab := enter(create(String.<=))("INITIAL",1);
1086: LeafNum := ~1;
1087: let
1088: val (user_code,rules,ends) = parse();
1089: val (fins,trans,tctab,tcpairs) = makedfa(rules)
1090: in
1091: if (!HeaderDecl)
1092: then say (!HeaderCode)
1093: else say ("structure " ^ (!StrName));
1094: say "=\n";
1095: say skel_hd;
1096: say user_code;
1097: say "end (* end of user routines *)\n";
1098: say "exception LexError (* raised if illegal leaf ";
1099: say "action tried *)\n";
1100: say "structure Internal =\n\tstruct\n";
1101: maketable(fins,tctab,tcpairs,trans);
1102: say "structure StartStates =\n\tstruct\n";
1103: say "\tdatatype yystartstate = STARTSTATE of int\n";
1104: makebegin();
1105: say "\nend\n";
1106: say "type result = UserDeclarations.lexresult\n";
1107: say "\texception LexerError (* raised if illegal leaf ";
1108: say "action tried *)\n";
1109: if !HaveReject
1110: then say "\texception Reject\t(* for implementing REJECT *)\n"
1111: else ();
1112: say "end\n\n";
1113: if !CountNewLines then say "val yylineno = ref 0\n\n" else ();
1114: say "fun makeLexer yyinput = \n";
1115: say "let \n";
1116: say "\tval yyb = ref \"\\n\" \t\t(* buffer *)\n\
1117: \\tval yybl = ref 1\t\t(*buffer length *)\n\
1118: \\tval yypos = ref 1\t\t(* location of next character to use *)\n\
1119: \\tval yydone = ref false\t\t(* eof found yet? *)\n\
1120: \\tval yybegin = ref 1\t\t(*Current 'start state' for lexer *)\n\
1121: \\n\tval YYBEGIN = fn (Internal.StartStates.STARTSTATE x) =>\n\
1122: \\t\t yybegin := x\n\n";
1123: if !HaveReject
1124: then say "\tval REJECT = fn () => raise Internal.Reject\n\n"
1125: else ();
1126: PrintLexer(ends);
1127: close_ibuf(!LexBuf);
1128: close_out(!LexOut)
1129: end)
1130: end
1131: end
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.