|
|
1.1 ! root 1: (* April 1989, Jussi Rintanen, Helsinki University of Technology *) ! 2: ! 3: (* Parser is a structure containing a type definition for ! 4: specification elements and a function, which maps ! 5: a specification file to a ML-representation of the specification. ! 6: ! 7: The ML-representation is a tuple of prologue/insert code-fragments, ! 8: list of tree translation rules, default code fragment for cost, ! 9: a string containing the name for the resulting structure, a datatype ! 10: definition for the result type, and the list of symbols appearing in ! 11: the specification. ! 12: ! 13: *) ! 14: ! 15: signature PARSER = ! 16: sig ! 17: ! 18: exception ParserError of string ! 19: ! 20: datatype symbol = Node of string * int | Label of string ! 21: ! 22: datatype cost = Cost of string list | NoCost ! 23: and action = Action of string list ! 24: and tree_pattern = Leaf of symbol | Tree of (symbol * tree_pattern list) ! 25: and ruletype = Ordinary | Topdown | Rewrite ! 26: and rule = Rule of (int * ruletype * symbol * tree_pattern * cost * action) ! 27: ! 28: datatype code_fragment = Prologue of string list | Insert of string list ! 29: ! 30: val specification : instream -> ! 31: (code_fragment list * rule list * cost * string * string * symbol list) ! 32: ! 33: end; ! 34: ! 35: (* ! 36: The parser is a straightforward predictive parser. It interfaces to the ! 37: lexer through the signature LEXER declared elsewhere. The symbol table ! 38: takes care of various book-keeping tasks of the parser and the ! 39: type checking of the symbols appearing in the rules of a specification. ! 40: *) ! 41: ! 42: (* The symbol table is represented by an object of type stable. Declare_label ! 43: and declare_node are used by the functions parsing node and label ! 44: declarations. They add a symbol to the symbol table. By check_symbol, the ! 45: functions parsing rules, the tree pattern in rules, assert a property ! 46: of a symbol. The symbol table checks whether it agrees, and if not, ! 47: it raises the exception SymbolConflict. Otherwise it returns the ! 48: type of the symbol. The function next_rule is used by the ! 49: parser for numbering the rules. ! 50: *) ! 51: ! 52: signature SYMBOLTABLE = ! 53: sig ! 54: exception SymbolConflict of string ! 55: datatype class = NonTerminal | Terminal of int | Unknown ! 56: ! 57: type stable ! 58: ! 59: val empty_symboltable : stable ! 60: ! 61: val check_symbol : stable * string * class -> class ! 62: val declare_label : stable * string * string -> stable ! 63: val declare_node : stable * string * int -> stable ! 64: ! 65: val get_labels : stable -> (string * string) list ! 66: val get_nodes' : stable -> (string * int) list ! 67: ! 68: val next_rule : stable -> int * stable ! 69: ! 70: end; ! 71: ! 72: structure Symboltable : SYMBOLTABLE = ! 73: struct ! 74: ! 75: exception SymbolConflict of string ! 76: ! 77: datatype class = NonTerminal | Terminal of int | Unknown ! 78: ! 79: type stable = (string -> class) * int * (string * string) list * (string * int) list ! 80: ! 81: fun conflict s = raise SymbolConflict s ! 82: ! 83: val empty_symboltable = ! 84: (fn s => conflict ("symbol "^s^" is not declared"),0,[],[]) ! 85: ! 86: val int2str : int -> string = makestring ! 87: ! 88: (* Check checks, whether a symbol is of correct type. It impossible for the ! 89: parser to distinguish a label from a node symbol of arity 0 (a leaf symbol), ! 90: and therefore the parser may assert the property unknown for labels and ! 91: symbols of arity 0. ! 92: *) ! 93: ! 94: fun check_symbol ((symbols,rn,ls,ns),s,p') = ! 95: let val p = symbols s ! 96: in ! 97: (case p of ! 98: NonTerminal => ! 99: (case p' of ! 100: Terminal _ => conflict ("symbol "^s^" was declared a node") ! 101: | _ => p) ! 102: | Terminal a => ! 103: (case p' of ! 104: Unknown => ! 105: if a=0 then p ! 106: else conflict ("symbol "^s^" is a node symboll with arity "^(int2str a)) ! 107: | Terminal a' => if a=a' ! 108: then p ! 109: else conflict ("symbol "^s^" is of arity "^(int2str a)) ! 110: | _ => conflict ("symbol "^s^" is a node symbol")) ! 111: | _ => conflict "internal error, unknown symbol in table") ! 112: end ! 113: ! 114: (* Declare_label and declare_node just extend the function. *) ! 115: ! 116: fun declare_label (st as (symbols,rn,ls,ns),s,ty) = ! 117: ((fn s' => if s' = s then NonTerminal else symbols s'), ! 118: rn,(s,ty)::ls,ns) ! 119: ! 120: fun declare_node (st as (symbols,rn,ls,ns),s,a) = ! 121: ((fn s' => if s' = s then Terminal a else symbols s'), ! 122: rn,ls,(s,a)::ns) ! 123: ! 124: fun next_rule (symbols, rn, ls, ns) = (rn,(symbols, rn+1, ls, ns)) ! 125: ! 126: fun get_labels (_,_,ls,_) = ls ! 127: fun get_nodes' (_,_,_,ns) = ns ! 128: end; ! 129: ! 130: ! 131: functor MAKEparser (structure Symboltable : SYMBOLTABLE ! 132: and Lexer : LEXER) : PARSER = ! 133: struct ! 134: ! 135: open Symboltable Lexer ! 136: ! 137: exception ParserError of string ! 138: ! 139: datatype symbol = Node of string * int | Label of string ! 140: ! 141: datatype cost = Cost of string list | NoCost ! 142: and action = Action of string list ! 143: and tree_pattern = Leaf of symbol | Tree of (symbol * tree_pattern list) ! 144: and ruletype = Ordinary | Topdown | Rewrite ! 145: and rule = Rule of (int * ruletype * symbol * tree_pattern * cost * action) ! 146: ! 147: datatype code_fragment = Prologue of string list | Insert of string list ! 148: ! 149: val lexf : (unit -> lexresult) ref = ref (fn () => COLON) ! 150: ! 151: val int2str : int -> string = makestring ! 152: ! 153: fun error s = ! 154: raise ParserError ("Error in Line "^(int2str (current_line()))^" : "^s) ! 155: ! 156: local ! 157: fun digit c = ("0" <= c) andalso (c <= "9") ! 158: fun str2int' (a,c::r) = ! 159: if digit c ! 160: then str2int' (a*10 + ord c - ord "0" ,r) ! 161: else (a,c::r) ! 162: | str2int' r = r ! 163: in ! 164: fun str2int s = ! 165: case str2int' (0,(explode s)) of ! 166: (i,[]) => i ! 167: | _ => error "parser internal" ! 168: end ! 169: ! 170: fun treeref_string' (s,[]) = s ! 171: | treeref_string' (s, h::t) = ! 172: treeref_string' ("(get_subtree (" ^ (int2str h) ^ "," ^ s ^ "))", t) ! 173: ! 174: fun treeref_string l = treeref_string' ("ir",l) ! 175: ! 176: fun parsespecification (s,st) = ! 177: case lexer() of ! 178: IDENTIFIER "prologue" => parse_prologue (s,st) ! 179: | IDENTIFIER "insert" => parse_insert (s,st) ! 180: | IDENTIFIER "node" => parse_node (s,st) ! 181: | IDENTIFIER "label" => parse_label (s,st) ! 182: | IDENTIFIER "default_cost" => parse_defaultcost (s,st) ! 183: | IDENTIFIER "structure_name" => parse_structurename (s,st) ! 184: | IDENTIFIER n => ! 185: let val replacement = if n = "TOPDOWN" orelse n = "REWRITE" ! 186: then ! 187: case lexer() of ! 188: IDENTIFIER n' => n' ! 189: | _ => error ("identifier expected after rule kind specifier" ^n) ! 190: else n ! 191: val _ = check_symbol (st,replacement,NonTerminal) ! 192: in parse_rule (case n of ! 193: "TOPDOWN" => Topdown ! 194: | "REWRITE" => Rewrite ! 195: | _ => Ordinary, ! 196: Label replacement, s, st) ! 197: end ! 198: | EOF => (s,st) ! 199: | _ => error "Syntax Error" ! 200: ! 201: and parse_prologue (([],a,b,c), st) = ! 202: parsespecification (([Prologue (read_ml_semicolon ())],a,b,c), st) ! 203: | parse_prologue _ = error "duplicate prologue" ! 204: ! 205: and parse_insert(([],a,b,c), st) = error "insert may not precede prologue" ! 206: | parse_insert ((l,a,b,c), st) = ! 207: parsespecification ((l @ [Insert (read_ml_semicolon ())],a,b,c), st) ! 208: ! 209: and parse_defaultcost ((a,b,NoCost,d),st) = ! 210: parsespecification ((a,b,Cost (read_ml_semicolon ()),d), st) ! 211: | parse_defaultcost _ = error "duplicate default cost" ! 212: ! 213: and parse_node (s, st) = ! 214: case lexer() of ! 215: IDENTIFIER id => ! 216: (case lexer() of ! 217: LPAREN => ! 218: (case lexer() of ! 219: INT i => ! 220: (case lexer() of ! 221: RPAREN => ! 222: (case lexer() of ! 223: COMMA => parse_node (s,declare_node (st, id, str2int i)) ! 224: | SEMICOLON => parsespecification (s, declare_node (st, id, str2int i)) ! 225: | _ => error ", or ; expected") ! 226: | _ => error "')' expected") ! 227: | _ => error "integer expected") ! 228: | _ => error " ( expected") ! 229: | _ => error "node declaration" ! 230: ! 231: and readtype () = ! 232: case lexerS() of ! 233: OTHER "|" => ("",OTHER "|") ! 234: | SEMICOLON => ("",SEMICOLON) ! 235: | EOF => error "premature eof" ! 236: | t => let val (s,r) = readtype() in ((token2str t) ^ s,r) end ! 237: ! 238: and parse_label (s, st) = ! 239: case lexer() of ! 240: IDENTIFIER id => ! 241: (case lexer() of ! 242: IDENTIFIER "of" => ! 243: let val (tyexpr, token) = readtype () ! 244: in ! 245: case token of ! 246: OTHER "|" => parse_label (s,declare_label(st,id,tyexpr)) ! 247: | SEMICOLON => parsespecification(s,declare_label(st,id,tyexpr)) ! 248: | _ => error "label list" ! 249: end ! 250: | _ => error "label list" ! 251: ) ! 252: | _ => error "label list" ! 253: ! 254: and token2str t = ! 255: case t of ! 256: IDENTIFIER s => s ! 257: | INT s => s ! 258: | EQ => "=" ! 259: | COLON => ":" ! 260: | SEMICOLON => ";" ! 261: | COMMA => "," ! 262: | LPAREN => "(" ! 263: | RPAREN => ")" ! 264: | TREEREF _ => error "tree references not allowed here" ! 265: | OTHER s => s ! 266: | SPACE s => s ! 267: | EOF => error "cannot convert EOF" ! 268: ! 269: and token2strX t = ! 270: case t of ! 271: IDENTIFIER s => s ! 272: | INT s => s ! 273: | EQ => "=" ! 274: | COLON => ":" ! 275: | SEMICOLON => ";" ! 276: | COMMA => "," ! 277: | TREEREF l => treeref_string l ! 278: | LPAREN => "(" ! 279: | RPAREN => ")" ! 280: | OTHER s => s ! 281: | SPACE s => s ! 282: | EOF => error "cannot convert EOF" ! 283: ! 284: and read_ml_semicolon' () = ! 285: case lexerS() of ! 286: LPAREN => ("(" :: read_ml_semicolon'()) @ (")" :: (read_ml_semicolon'())) ! 287: | RPAREN => [] ! 288: | EOF => error "premature EOF" ! 289: | t => (token2str t):: (read_ml_semicolon' ()) ! 290: ! 291: and read_ml_semicolon () = ! 292: if lexer() <> LPAREN then error "'(' expected" ! 293: else ! 294: let val r = read_ml_semicolon' () in ! 295: if lexer() <> SEMICOLON then error "missing semicolon" ! 296: else r ! 297: end ! 298: ! 299: and rulecode' () = ! 300: case lexerS() of ! 301: LPAREN => ("(" :: rulecode'()) @ (rulecode'()) ! 302: | RPAREN => [" )"] ! 303: | EOF => error "premature EOF" ! 304: | t => (token2strX t):: (rulecode' ()) ! 305: ! 306: and rulecode () = ! 307: if lexer() <> LPAREN then error "'(' expected" ! 308: else "( " :: (rulecode'()) ! 309: ! 310: and parse_restaction (rn, ty, nt, pattern, costcode, (a,l,b,c), st) = ! 311: let val mlcode = Action (rulecode ()) ! 312: in ! 313: if lexer() <> SEMICOLON then error "missing semicolon" ! 314: else ! 315: let val ps = Rule(rn,ty,nt,pattern,costcode,mlcode) ! 316: in parsespecification ((a,ps::l,b,c), st) ! 317: end ! 318: end ! 319: ! 320: and parse_restcost (rn, ty, nt, pattern, s as (a,l,b,c), st) = ! 321: let val mlcode = Cost (rulecode ()) ! 322: in ! 323: case lexer() of ! 324: EQ => parse_restaction(rn,ty,nt,pattern,mlcode, s, st) ! 325: | _ => error " = expected" ! 326: end ! 327: ! 328: and parse_rule (ty, nt, s as (a,l,b,c), st) = ! 329: let val (pattern, token) = parse_pattern st ! 330: val (rn,st') = next_rule st ! 331: in ! 332: case token of ! 333: COLON => parse_restcost (rn, ty, nt, pattern, s, st') ! 334: | EQ => ! 335: (case b of ! 336: NoCost => error "Must specify cost, no default cost defined" ! 337: | _ => parse_restaction (rn, ty, nt, pattern, NoCost, s, st')) ! 338: | _ => error "one of : = expected" ! 339: end ! 340: ! 341: and parse_pattern st = ! 342: case lexer() of ! 343: IDENTIFIER id => ! 344: (case lexer() of ! 345: LPAREN => let val subtrees = parse_subtrees st ! 346: val arity = length subtrees ! 347: val _ = check_symbol (st, id, Terminal arity) ! 348: in ! 349: (Tree (Node (id,arity), subtrees), lexer()) ! 350: end ! 351: | token => let val p = check_symbol (st, id, Unknown) ! 352: in ! 353: (case p of ! 354: NonTerminal => Leaf (Label id) ! 355: | _ => Leaf (Node (id,0)), ! 356: token) ! 357: end) ! 358: | _ => error "ill-formed tree pattern" ! 359: ! 360: and parse_subtrees st = ! 361: let val (pat, token) = parse_pattern st ! 362: in ! 363: case token of ! 364: COMMA => let val rest = parse_subtrees st ! 365: in (pat :: rest) ! 366: end ! 367: | RPAREN => [pat] ! 368: | _ => error "one of , ) expected in tree pattern" ! 369: end ! 370: ! 371: and parse_structurename ((a,b,c,_),st) = ! 372: case lexer() of ! 373: IDENTIFIER s => if lexer() = SEMICOLON ! 374: then parsespecification ((a,b,c,s),st) ! 375: else error "missing ; after structure name" ! 376: | _ => error "structure name was not given" ! 377: ! 378: and construct_resulttype st = ! 379: let val first::rest = (rev o get_labels) st ! 380: and makeCon = (fn (n,t) => "_" ^ n ^ " of " ^ t) ! 381: in ! 382: fold (fn (a,b) => a ^ " | " ^ b) ! 383: (map makeCon rest) ! 384: (makeCon first) ! 385: end ! 386: ! 387: and specification instream = ! 388: let val dummy = (lexf := make_lexer instream) ! 389: val ((a,b,c,d), st) = parsespecification ! 390: (([],[],NoCost,"TreeProcessor"), empty_symboltable) ! 391: in ! 392: if b = nil orelse a = nil ! 393: then error "Prologue and rules are obligatory" ! 394: else ! 395: (a,b,c,d,construct_resulttype st, ! 396: (map (fn (s,_) => Label s) (get_labels st))@ ! 397: (map (fn n => Node n) (get_nodes' st))) ! 398: end ! 399: handle SymbolConflict s => error ("Symbol Table : "^s) ! 400: | LexError => error ("Lexical Error") ! 401: ! 402: and lexerS () = (!lexf)() ! 403: ! 404: and lexer () = ! 405: case lexerS () of SPACE _ => lexer() | t => t ! 406: ! 407: end;
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.