|
|
1.1 root 1: (* April 1989, Jussi Rintanen, Helsinki University of Technology *)
2:
3: (* Parser is a structure containing a type definition for
4: specification elements and a function, which maps
5: a specification file to a ML-representation of the specification.
6:
7: The ML-representation is a tuple of prologue/insert code-fragments,
8: list of tree translation rules, default code fragment for cost,
9: a string containing the name for the resulting structure, a datatype
10: definition for the result type, and the list of symbols appearing in
11: the specification.
12:
13: *)
14:
15: signature PARSER =
16: sig
17:
18: exception ParserError of string
19:
20: datatype symbol = Node of string * int | Label of string
21:
22: datatype cost = Cost of string list | NoCost
23: and action = Action of string list
24: and tree_pattern = Leaf of symbol | Tree of (symbol * tree_pattern list)
25: and ruletype = Ordinary | Topdown | Rewrite
26: and rule = Rule of (int * ruletype * symbol * tree_pattern * cost * action)
27:
28: datatype code_fragment = Prologue of string list | Insert of string list
29:
30: val specification : instream ->
31: (code_fragment list * rule list * cost * string * string * symbol list)
32:
33: end;
34:
35: (*
36: The parser is a straightforward predictive parser. It interfaces to the
37: lexer through the signature LEXER declared elsewhere. The symbol table
38: takes care of various book-keeping tasks of the parser and the
39: type checking of the symbols appearing in the rules of a specification.
40: *)
41:
42: (* The symbol table is represented by an object of type stable. Declare_label
43: and declare_node are used by the functions parsing node and label
44: declarations. They add a symbol to the symbol table. By check_symbol, the
45: functions parsing rules, the tree pattern in rules, assert a property
46: of a symbol. The symbol table checks whether it agrees, and if not,
47: it raises the exception SymbolConflict. Otherwise it returns the
48: type of the symbol. The function next_rule is used by the
49: parser for numbering the rules.
50: *)
51:
52: signature SYMBOLTABLE =
53: sig
54: exception SymbolConflict of string
55: datatype class = NonTerminal | Terminal of int | Unknown
56:
57: type stable
58:
59: val empty_symboltable : stable
60:
61: val check_symbol : stable * string * class -> class
62: val declare_label : stable * string * string -> stable
63: val declare_node : stable * string * int -> stable
64:
65: val get_labels : stable -> (string * string) list
66: val get_nodes' : stable -> (string * int) list
67:
68: val next_rule : stable -> int * stable
69:
70: end;
71:
72: structure Symboltable : SYMBOLTABLE =
73: struct
74:
75: exception SymbolConflict of string
76:
77: datatype class = NonTerminal | Terminal of int | Unknown
78:
79: type stable = (string -> class) * int * (string * string) list * (string * int) list
80:
81: fun conflict s = raise SymbolConflict s
82:
83: val empty_symboltable =
84: (fn s => conflict ("symbol "^s^" is not declared"),0,[],[])
85:
86: val int2str : int -> string = makestring
87:
88: (* Check checks, whether a symbol is of correct type. It impossible for the
89: parser to distinguish a label from a node symbol of arity 0 (a leaf symbol),
90: and therefore the parser may assert the property unknown for labels and
91: symbols of arity 0.
92: *)
93:
94: fun check_symbol ((symbols,rn,ls,ns),s,p') =
95: let val p = symbols s
96: in
97: (case p of
98: NonTerminal =>
99: (case p' of
100: Terminal _ => conflict ("symbol "^s^" was declared a node")
101: | _ => p)
102: | Terminal a =>
103: (case p' of
104: Unknown =>
105: if a=0 then p
106: else conflict ("symbol "^s^" is a node symboll with arity "^(int2str a))
107: | Terminal a' => if a=a'
108: then p
109: else conflict ("symbol "^s^" is of arity "^(int2str a))
110: | _ => conflict ("symbol "^s^" is a node symbol"))
111: | _ => conflict "internal error, unknown symbol in table")
112: end
113:
114: (* Declare_label and declare_node just extend the function. *)
115:
116: fun declare_label (st as (symbols,rn,ls,ns),s,ty) =
117: ((fn s' => if s' = s then NonTerminal else symbols s'),
118: rn,(s,ty)::ls,ns)
119:
120: fun declare_node (st as (symbols,rn,ls,ns),s,a) =
121: ((fn s' => if s' = s then Terminal a else symbols s'),
122: rn,ls,(s,a)::ns)
123:
124: fun next_rule (symbols, rn, ls, ns) = (rn,(symbols, rn+1, ls, ns))
125:
126: fun get_labels (_,_,ls,_) = ls
127: fun get_nodes' (_,_,_,ns) = ns
128: end;
129:
130:
131: functor MAKEparser (structure Symboltable : SYMBOLTABLE
132: and Lexer : LEXER) : PARSER =
133: struct
134:
135: open Symboltable Lexer
136:
137: exception ParserError of string
138:
139: datatype symbol = Node of string * int | Label of string
140:
141: datatype cost = Cost of string list | NoCost
142: and action = Action of string list
143: and tree_pattern = Leaf of symbol | Tree of (symbol * tree_pattern list)
144: and ruletype = Ordinary | Topdown | Rewrite
145: and rule = Rule of (int * ruletype * symbol * tree_pattern * cost * action)
146:
147: datatype code_fragment = Prologue of string list | Insert of string list
148:
149: val lexf : (unit -> lexresult) ref = ref (fn () => COLON)
150:
151: val int2str : int -> string = makestring
152:
153: fun error s =
154: raise ParserError ("Error in Line "^(int2str (current_line()))^" : "^s)
155:
156: local
157: fun digit c = ("0" <= c) andalso (c <= "9")
158: fun str2int' (a,c::r) =
159: if digit c
160: then str2int' (a*10 + ord c - ord "0" ,r)
161: else (a,c::r)
162: | str2int' r = r
163: in
164: fun str2int s =
165: case str2int' (0,(explode s)) of
166: (i,[]) => i
167: | _ => error "parser internal"
168: end
169:
170: fun treeref_string' (s,[]) = s
171: | treeref_string' (s, h::t) =
172: treeref_string' ("(get_subtree (" ^ (int2str h) ^ "," ^ s ^ "))", t)
173:
174: fun treeref_string l = treeref_string' ("ir",l)
175:
176: fun parsespecification (s,st) =
177: case lexer() of
178: IDENTIFIER "prologue" => parse_prologue (s,st)
179: | IDENTIFIER "insert" => parse_insert (s,st)
180: | IDENTIFIER "node" => parse_node (s,st)
181: | IDENTIFIER "label" => parse_label (s,st)
182: | IDENTIFIER "default_cost" => parse_defaultcost (s,st)
183: | IDENTIFIER "structure_name" => parse_structurename (s,st)
184: | IDENTIFIER n =>
185: let val replacement = if n = "TOPDOWN" orelse n = "REWRITE"
186: then
187: case lexer() of
188: IDENTIFIER n' => n'
189: | _ => error ("identifier expected after rule kind specifier" ^n)
190: else n
191: val _ = check_symbol (st,replacement,NonTerminal)
192: in parse_rule (case n of
193: "TOPDOWN" => Topdown
194: | "REWRITE" => Rewrite
195: | _ => Ordinary,
196: Label replacement, s, st)
197: end
198: | EOF => (s,st)
199: | _ => error "Syntax Error"
200:
201: and parse_prologue (([],a,b,c), st) =
202: parsespecification (([Prologue (read_ml_semicolon ())],a,b,c), st)
203: | parse_prologue _ = error "duplicate prologue"
204:
205: and parse_insert(([],a,b,c), st) = error "insert may not precede prologue"
206: | parse_insert ((l,a,b,c), st) =
207: parsespecification ((l @ [Insert (read_ml_semicolon ())],a,b,c), st)
208:
209: and parse_defaultcost ((a,b,NoCost,d),st) =
210: parsespecification ((a,b,Cost (read_ml_semicolon ()),d), st)
211: | parse_defaultcost _ = error "duplicate default cost"
212:
213: and parse_node (s, st) =
214: case lexer() of
215: IDENTIFIER id =>
216: (case lexer() of
217: LPAREN =>
218: (case lexer() of
219: INT i =>
220: (case lexer() of
221: RPAREN =>
222: (case lexer() of
223: COMMA => parse_node (s,declare_node (st, id, str2int i))
224: | SEMICOLON => parsespecification (s, declare_node (st, id, str2int i))
225: | _ => error ", or ; expected")
226: | _ => error "')' expected")
227: | _ => error "integer expected")
228: | _ => error " ( expected")
229: | _ => error "node declaration"
230:
231: and readtype () =
232: case lexerS() of
233: OTHER "|" => ("",OTHER "|")
234: | SEMICOLON => ("",SEMICOLON)
235: | EOF => error "premature eof"
236: | t => let val (s,r) = readtype() in ((token2str t) ^ s,r) end
237:
238: and parse_label (s, st) =
239: case lexer() of
240: IDENTIFIER id =>
241: (case lexer() of
242: IDENTIFIER "of" =>
243: let val (tyexpr, token) = readtype ()
244: in
245: case token of
246: OTHER "|" => parse_label (s,declare_label(st,id,tyexpr))
247: | SEMICOLON => parsespecification(s,declare_label(st,id,tyexpr))
248: | _ => error "label list"
249: end
250: | _ => error "label list"
251: )
252: | _ => error "label list"
253:
254: and token2str t =
255: case t of
256: IDENTIFIER s => s
257: | INT s => s
258: | EQ => "="
259: | COLON => ":"
260: | SEMICOLON => ";"
261: | COMMA => ","
262: | LPAREN => "("
263: | RPAREN => ")"
264: | TREEREF _ => error "tree references not allowed here"
265: | OTHER s => s
266: | SPACE s => s
267: | EOF => error "cannot convert EOF"
268:
269: and token2strX t =
270: case t of
271: IDENTIFIER s => s
272: | INT s => s
273: | EQ => "="
274: | COLON => ":"
275: | SEMICOLON => ";"
276: | COMMA => ","
277: | TREEREF l => treeref_string l
278: | LPAREN => "("
279: | RPAREN => ")"
280: | OTHER s => s
281: | SPACE s => s
282: | EOF => error "cannot convert EOF"
283:
284: and read_ml_semicolon' () =
285: case lexerS() of
286: LPAREN => ("(" :: read_ml_semicolon'()) @ (")" :: (read_ml_semicolon'()))
287: | RPAREN => []
288: | EOF => error "premature EOF"
289: | t => (token2str t):: (read_ml_semicolon' ())
290:
291: and read_ml_semicolon () =
292: if lexer() <> LPAREN then error "'(' expected"
293: else
294: let val r = read_ml_semicolon' () in
295: if lexer() <> SEMICOLON then error "missing semicolon"
296: else r
297: end
298:
299: and rulecode' () =
300: case lexerS() of
301: LPAREN => ("(" :: rulecode'()) @ (rulecode'())
302: | RPAREN => [" )"]
303: | EOF => error "premature EOF"
304: | t => (token2strX t):: (rulecode' ())
305:
306: and rulecode () =
307: if lexer() <> LPAREN then error "'(' expected"
308: else "( " :: (rulecode'())
309:
310: and parse_restaction (rn, ty, nt, pattern, costcode, (a,l,b,c), st) =
311: let val mlcode = Action (rulecode ())
312: in
313: if lexer() <> SEMICOLON then error "missing semicolon"
314: else
315: let val ps = Rule(rn,ty,nt,pattern,costcode,mlcode)
316: in parsespecification ((a,ps::l,b,c), st)
317: end
318: end
319:
320: and parse_restcost (rn, ty, nt, pattern, s as (a,l,b,c), st) =
321: let val mlcode = Cost (rulecode ())
322: in
323: case lexer() of
324: EQ => parse_restaction(rn,ty,nt,pattern,mlcode, s, st)
325: | _ => error " = expected"
326: end
327:
328: and parse_rule (ty, nt, s as (a,l,b,c), st) =
329: let val (pattern, token) = parse_pattern st
330: val (rn,st') = next_rule st
331: in
332: case token of
333: COLON => parse_restcost (rn, ty, nt, pattern, s, st')
334: | EQ =>
335: (case b of
336: NoCost => error "Must specify cost, no default cost defined"
337: | _ => parse_restaction (rn, ty, nt, pattern, NoCost, s, st'))
338: | _ => error "one of : = expected"
339: end
340:
341: and parse_pattern st =
342: case lexer() of
343: IDENTIFIER id =>
344: (case lexer() of
345: LPAREN => let val subtrees = parse_subtrees st
346: val arity = length subtrees
347: val _ = check_symbol (st, id, Terminal arity)
348: in
349: (Tree (Node (id,arity), subtrees), lexer())
350: end
351: | token => let val p = check_symbol (st, id, Unknown)
352: in
353: (case p of
354: NonTerminal => Leaf (Label id)
355: | _ => Leaf (Node (id,0)),
356: token)
357: end)
358: | _ => error "ill-formed tree pattern"
359:
360: and parse_subtrees st =
361: let val (pat, token) = parse_pattern st
362: in
363: case token of
364: COMMA => let val rest = parse_subtrees st
365: in (pat :: rest)
366: end
367: | RPAREN => [pat]
368: | _ => error "one of , ) expected in tree pattern"
369: end
370:
371: and parse_structurename ((a,b,c,_),st) =
372: case lexer() of
373: IDENTIFIER s => if lexer() = SEMICOLON
374: then parsespecification ((a,b,c,s),st)
375: else error "missing ; after structure name"
376: | _ => error "structure name was not given"
377:
378: and construct_resulttype st =
379: let val first::rest = (rev o get_labels) st
380: and makeCon = (fn (n,t) => "_" ^ n ^ " of " ^ t)
381: in
382: fold (fn (a,b) => a ^ " | " ^ b)
383: (map makeCon rest)
384: (makeCon first)
385: end
386:
387: and specification instream =
388: let val dummy = (lexf := make_lexer instream)
389: val ((a,b,c,d), st) = parsespecification
390: (([],[],NoCost,"TreeProcessor"), empty_symboltable)
391: in
392: if b = nil orelse a = nil
393: then error "Prologue and rules are obligatory"
394: else
395: (a,b,c,d,construct_resulttype st,
396: (map (fn (s,_) => Label s) (get_labels st))@
397: (map (fn n => Node n) (get_nodes' st)))
398: end
399: handle SymbolConflict s => error ("Symbol Table : "^s)
400: | LexError => error ("Lexical Error")
401:
402: and lexerS () = (!lexf)()
403:
404: and lexer () =
405: case lexerS () of SPACE _ => lexer() | t => t
406:
407: end;
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.