Annotation of researchv10no/cmd/sml/lib/twig/parser.sml, revision 1.1.1.1

1.1       root        1: (* April 1989, Jussi Rintanen, Helsinki University of Technology *)
                      2: 
                      3: (* Parser is a structure containing a type definition for
                      4:    specification elements and a function, which maps
                      5:    a specification file to a ML-representation of the specification.
                      6: 
                      7:    The ML-representation is a tuple of prologue/insert code-fragments,
                      8:    list of tree translation rules, default code fragment for cost,
                      9:    a string containing the name for the resulting structure, a datatype
                     10:    definition for the result type, and the list of symbols appearing in
                     11:    the specification.
                     12: 
                     13: *)
                     14: 
                     15: signature PARSER =
                     16:   sig
                     17: 
                     18:     exception ParserError of string
                     19: 
                     20:     datatype symbol = Node of string * int | Label of string
                     21: 
                     22:     datatype cost = Cost of string list | NoCost
                     23:     and action = Action of string list
                     24:     and tree_pattern = Leaf of symbol | Tree of (symbol * tree_pattern list)
                     25:     and ruletype = Ordinary | Topdown | Rewrite
                     26:     and rule = Rule of (int * ruletype * symbol * tree_pattern * cost * action)
                     27: 
                     28:     datatype code_fragment = Prologue of string list | Insert of string list
                     29: 
                     30:     val specification :        instream ->
                     31:        (code_fragment list * rule list * cost * string * string * symbol list)
                     32: 
                     33: end;
                     34: 
                     35: (*
                     36:    The parser is a straightforward predictive parser. It interfaces to the
                     37:    lexer through the signature LEXER declared elsewhere. The symbol table
                     38:    takes care of various book-keeping tasks of the parser and the
                     39:    type checking of the symbols appearing in the rules of a specification.
                     40: *)
                     41: 
                     42: (* The symbol table is represented by an object of type stable. Declare_label
                     43:    and declare_node are used by the functions parsing node and label
                     44:    declarations. They add a symbol to the symbol table. By check_symbol, the
                     45:    functions parsing rules, the tree pattern in rules, assert a property
                     46:    of a symbol. The symbol table checks whether it agrees, and if not,
                     47:    it raises the exception SymbolConflict. Otherwise it returns the
                     48:    type of the symbol. The function next_rule is used by the
                     49:    parser for numbering the rules.
                     50: *)
                     51: 
                     52: signature SYMBOLTABLE =
                     53:   sig
                     54:     exception SymbolConflict of string
                     55:     datatype class = NonTerminal | Terminal of int | Unknown
                     56: 
                     57:     type stable
                     58: 
                     59:     val empty_symboltable : stable
                     60: 
                     61:     val check_symbol : stable * string * class -> class
                     62:     val declare_label : stable * string * string -> stable
                     63:     val declare_node : stable * string * int -> stable
                     64: 
                     65:     val get_labels : stable -> (string * string) list
                     66:     val get_nodes' : stable -> (string * int) list
                     67: 
                     68:     val next_rule : stable -> int * stable
                     69: 
                     70: end;
                     71: 
                     72: structure Symboltable : SYMBOLTABLE =
                     73:   struct
                     74: 
                     75:     exception SymbolConflict of string
                     76: 
                     77:     datatype class = NonTerminal | Terminal of int | Unknown
                     78: 
                     79:     type stable = (string -> class) * int * (string * string) list * (string * int) list
                     80: 
                     81:     fun conflict s = raise SymbolConflict s
                     82: 
                     83:     val empty_symboltable =
                     84:       (fn s => conflict ("symbol "^s^" is not declared"),0,[],[])
                     85: 
                     86:     val int2str : int -> string = makestring
                     87: 
                     88: (* Check checks, whether a symbol is of correct type. It impossible for the
                     89:    parser to distinguish a label from a node symbol of arity 0 (a leaf symbol),
                     90:    and therefore the parser may assert the property unknown for labels and
                     91:    symbols of arity 0.
                     92: *)
                     93: 
                     94:     fun check_symbol ((symbols,rn,ls,ns),s,p') =
                     95:       let val p = symbols s
                     96:       in
                     97:        (case p of
                     98:           NonTerminal =>
                     99:             (case p' of
                    100:                Terminal _ => conflict ("symbol "^s^" was declared a node")
                    101:              | _ => p)
                    102:         | Terminal a =>
                    103:             (case p' of
                    104:                Unknown =>
                    105:                  if a=0 then p
                    106:                  else conflict ("symbol "^s^" is a node symboll with arity "^(int2str a))
                    107:              | Terminal a' => if a=a'
                    108:                                 then p
                    109:                               else conflict ("symbol "^s^" is of arity "^(int2str a))
                    110:              | _ => conflict ("symbol "^s^" is a node symbol"))
                    111:         | _ => conflict "internal error, unknown symbol in table")
                    112:       end
                    113: 
                    114: (* Declare_label and declare_node just extend the function. *)
                    115: 
                    116:     fun declare_label (st as (symbols,rn,ls,ns),s,ty) =
                    117:       ((fn s' => if s' = s then NonTerminal else symbols s'),
                    118:        rn,(s,ty)::ls,ns)
                    119: 
                    120:     fun declare_node (st as (symbols,rn,ls,ns),s,a) =
                    121:       ((fn s' => if s' = s then Terminal a else symbols s'),
                    122:        rn,ls,(s,a)::ns)
                    123: 
                    124:     fun next_rule (symbols, rn, ls, ns) = (rn,(symbols, rn+1, ls, ns))
                    125: 
                    126:     fun get_labels (_,_,ls,_) = ls
                    127:     fun get_nodes' (_,_,_,ns) = ns
                    128:   end;
                    129: 
                    130: 
                    131: functor MAKEparser (structure Symboltable : SYMBOLTABLE
                    132:                    and Lexer : LEXER) : PARSER =
                    133:   struct
                    134:       
                    135:     open Symboltable Lexer
                    136:     
                    137:     exception ParserError of string
                    138: 
                    139:     datatype symbol = Node of string * int | Label of string
                    140: 
                    141:     datatype cost = Cost of string list | NoCost
                    142:     and action = Action of string list
                    143:     and tree_pattern = Leaf of symbol | Tree of (symbol * tree_pattern list)
                    144:     and ruletype = Ordinary | Topdown | Rewrite
                    145:     and rule = Rule of (int * ruletype * symbol * tree_pattern * cost * action)
                    146: 
                    147:     datatype code_fragment = Prologue of string list | Insert of string list
                    148: 
                    149:     val lexf : (unit -> lexresult) ref = ref (fn () => COLON)
                    150: 
                    151:     val int2str : int -> string = makestring
                    152: 
                    153:     fun error s =
                    154:       raise ParserError ("Error in Line "^(int2str (current_line()))^" : "^s)
                    155: 
                    156:     local
                    157:       fun digit c = ("0" <= c) andalso (c <= "9")
                    158:       fun str2int' (a,c::r) =
                    159:        if digit c
                    160:          then str2int' (a*10 + ord c - ord "0" ,r)
                    161:        else (a,c::r)
                    162:        | str2int' r = r
                    163:     in
                    164:       fun str2int s =
                    165:        case str2int' (0,(explode s)) of
                    166:          (i,[]) => i
                    167:        | _ => error "parser internal"
                    168:     end
                    169: 
                    170:     fun treeref_string' (s,[]) = s
                    171:       | treeref_string' (s, h::t) =
                    172:        treeref_string' ("(get_subtree (" ^ (int2str h) ^ "," ^ s ^ "))", t)
                    173: 
                    174:     fun treeref_string l = treeref_string' ("ir",l)
                    175: 
                    176:     fun parsespecification (s,st) =
                    177:       case lexer() of
                    178:        IDENTIFIER "prologue" => parse_prologue (s,st)
                    179:       | IDENTIFIER "insert" => parse_insert (s,st)
                    180:       | IDENTIFIER "node" => parse_node (s,st)
                    181:       | IDENTIFIER "label" => parse_label (s,st)
                    182:       | IDENTIFIER "default_cost" => parse_defaultcost (s,st)
                    183:       | IDENTIFIER "structure_name" => parse_structurename (s,st)
                    184:       | IDENTIFIER n =>
                    185:          let val replacement = if n = "TOPDOWN" orelse n = "REWRITE"
                    186:                                  then
                    187:                                    case lexer() of
                    188:                                      IDENTIFIER n' => n'
                    189:                                    | _ => error ("identifier expected after rule kind specifier" ^n)
                    190:                                else n
                    191:              val _ = check_symbol (st,replacement,NonTerminal)
                    192:          in parse_rule (case n of
                    193:                         "TOPDOWN" => Topdown
                    194:                       | "REWRITE" => Rewrite
                    195:                       | _ => Ordinary,
                    196:                           Label replacement, s, st)
                    197:          end
                    198:       | EOF => (s,st)
                    199:       | _ => error "Syntax Error"
                    200: 
                    201:     and parse_prologue (([],a,b,c), st) =
                    202:       parsespecification (([Prologue (read_ml_semicolon ())],a,b,c), st)
                    203:       | parse_prologue _ = error "duplicate prologue"
                    204: 
                    205:     and parse_insert(([],a,b,c), st) = error "insert may not precede prologue"
                    206:       | parse_insert ((l,a,b,c), st) =
                    207:        parsespecification ((l @ [Insert (read_ml_semicolon ())],a,b,c), st)
                    208: 
                    209:     and parse_defaultcost ((a,b,NoCost,d),st) =
                    210:       parsespecification ((a,b,Cost (read_ml_semicolon ()),d), st)
                    211:       | parse_defaultcost _ = error "duplicate default cost"
                    212: 
                    213:     and parse_node (s, st) =
                    214:       case lexer() of
                    215:        IDENTIFIER id =>
                    216:          (case lexer() of
                    217:             LPAREN =>
                    218:               (case lexer() of
                    219:                  INT i =>
                    220:                    (case lexer() of
                    221:                       RPAREN =>
                    222:                         (case lexer() of
                    223:                            COMMA => parse_node (s,declare_node (st, id, str2int i))
                    224:                          | SEMICOLON => parsespecification (s, declare_node (st, id, str2int i))
                    225:                          | _ => error ", or ; expected")
                    226:                     | _ => error "')' expected")
                    227:                | _ => error "integer expected")
                    228:           | _ => error " ( expected")
                    229:       | _ => error "node declaration"
                    230: 
                    231:     and readtype () =
                    232:       case lexerS() of
                    233:        OTHER "|" => ("",OTHER "|")
                    234:       | SEMICOLON => ("",SEMICOLON)
                    235:       | EOF => error "premature eof"
                    236:       | t => let val (s,r) = readtype() in ((token2str t) ^ s,r) end
                    237:          
                    238:     and parse_label (s, st) =
                    239:       case lexer() of
                    240:        IDENTIFIER id =>
                    241:          (case lexer() of
                    242:             IDENTIFIER "of" =>
                    243:               let val (tyexpr, token) = readtype ()
                    244:               in
                    245:                 case token of
                    246:                   OTHER "|" => parse_label (s,declare_label(st,id,tyexpr))
                    247:                 | SEMICOLON => parsespecification(s,declare_label(st,id,tyexpr))
                    248:                 | _ => error "label list"
                    249:               end
                    250:           | _ => error "label list"
                    251:               )
                    252:       | _ => error "label list"
                    253:       
                    254:     and token2str t =
                    255:       case t of
                    256:        IDENTIFIER s => s
                    257:       | INT s => s
                    258:       | EQ => "="
                    259:       | COLON => ":"
                    260:       | SEMICOLON => ";"
                    261:       | COMMA => ","
                    262:       | LPAREN => "("
                    263:       | RPAREN => ")"
                    264:       | TREEREF _ => error "tree references not allowed here"
                    265:       | OTHER s => s
                    266:       | SPACE s => s
                    267:       | EOF => error "cannot convert EOF"
                    268: 
                    269:     and token2strX t =
                    270:       case t of
                    271:        IDENTIFIER s => s
                    272:       | INT s => s
                    273:       | EQ => "="
                    274:       | COLON => ":"
                    275:       | SEMICOLON => ";"
                    276:       | COMMA => ","
                    277:       | TREEREF l => treeref_string l
                    278:       | LPAREN => "("
                    279:       | RPAREN => ")"
                    280:       | OTHER s => s
                    281:       | SPACE s => s
                    282:       | EOF => error "cannot convert EOF"
                    283: 
                    284:     and read_ml_semicolon' () =
                    285:       case lexerS() of
                    286:       LPAREN => ("(" :: read_ml_semicolon'()) @ (")" :: (read_ml_semicolon'()))
                    287:       | RPAREN => []
                    288:       | EOF => error "premature EOF"
                    289:       | t => (token2str t):: (read_ml_semicolon' ())
                    290: 
                    291:     and read_ml_semicolon () =
                    292:       if lexer() <> LPAREN then error "'(' expected"
                    293:       else
                    294:        let val r = read_ml_semicolon' () in
                    295:          if lexer() <> SEMICOLON then error "missing semicolon"
                    296:          else r
                    297:        end
                    298: 
                    299:     and rulecode' () =
                    300:       case lexerS() of
                    301:       LPAREN => ("(" :: rulecode'()) @ (rulecode'())
                    302:       | RPAREN => [" )"]
                    303:       | EOF => error "premature EOF"
                    304:       | t => (token2strX t):: (rulecode' ())
                    305: 
                    306:     and rulecode () =
                    307:        if lexer() <> LPAREN then error "'(' expected"
                    308:        else "( " :: (rulecode'())
                    309: 
                    310:     and parse_restaction (rn, ty, nt, pattern, costcode, (a,l,b,c), st) =
                    311:       let val mlcode = Action (rulecode ())
                    312:       in
                    313:        if lexer() <> SEMICOLON then error "missing semicolon"
                    314:        else
                    315:          let val ps = Rule(rn,ty,nt,pattern,costcode,mlcode)
                    316:          in parsespecification ((a,ps::l,b,c), st)
                    317:          end
                    318:       end
                    319: 
                    320:     and parse_restcost (rn, ty, nt, pattern, s as (a,l,b,c), st) =
                    321:       let val mlcode = Cost (rulecode ())
                    322:       in
                    323:        case lexer() of
                    324:          EQ => parse_restaction(rn,ty,nt,pattern,mlcode, s, st)
                    325:        | _ => error " = expected"
                    326:       end
                    327: 
                    328:     and parse_rule (ty, nt, s as (a,l,b,c), st) =
                    329:       let val (pattern, token) = parse_pattern st
                    330:        val (rn,st') = next_rule st
                    331:       in
                    332:        case token of
                    333:          COLON => parse_restcost (rn, ty, nt, pattern, s, st')
                    334:        | EQ =>
                    335:            (case b of
                    336:               NoCost => error "Must specify cost, no default cost defined"
                    337:             | _ => parse_restaction (rn, ty, nt, pattern, NoCost, s, st'))
                    338:        | _ => error "one of : = expected"
                    339:       end
                    340: 
                    341:     and parse_pattern st =
                    342:       case lexer() of
                    343:        IDENTIFIER id =>
                    344:          (case lexer() of
                    345:             LPAREN => let val subtrees = parse_subtrees st
                    346:                           val arity = length subtrees
                    347:                           val _ = check_symbol (st, id, Terminal arity)
                    348:                       in
                    349:                         (Tree (Node (id,arity), subtrees), lexer())
                    350:                       end
                    351:           | token => let val p = check_symbol (st, id, Unknown)
                    352:                      in
                    353:                        (case p of
                    354:                           NonTerminal => Leaf (Label id)
                    355:                         | _ => Leaf (Node (id,0)),
                    356:                             token)
                    357:                      end)
                    358:       | _ => error "ill-formed tree pattern"
                    359: 
                    360:     and parse_subtrees st =
                    361:       let val (pat, token) = parse_pattern st
                    362:       in
                    363:        case token of
                    364:          COMMA => let val rest = parse_subtrees st
                    365:                   in (pat :: rest)
                    366:                   end
                    367:        | RPAREN => [pat]
                    368:        | _ => error "one of , ) expected in tree pattern"
                    369:       end
                    370: 
                    371:     and parse_structurename ((a,b,c,_),st) =
                    372:       case lexer() of
                    373:         IDENTIFIER s => if lexer() = SEMICOLON
                    374:                          then parsespecification ((a,b,c,s),st)
                    375:                        else error "missing ; after structure name"
                    376:       | _ => error "structure name was not given"
                    377: 
                    378:     and construct_resulttype st =
                    379:       let val first::rest = (rev o get_labels) st
                    380:          and makeCon = (fn (n,t) => "_" ^ n ^ " of " ^ t)
                    381:       in
                    382:        fold (fn (a,b) => a ^ " | " ^ b)
                    383:          (map makeCon rest)
                    384:           (makeCon first)
                    385:       end
                    386: 
                    387:     and specification instream =
                    388:       let val dummy = (lexf := make_lexer instream)
                    389:          val ((a,b,c,d), st) = parsespecification
                    390:            (([],[],NoCost,"TreeProcessor"), empty_symboltable)
                    391:       in
                    392:        if b = nil orelse a = nil
                    393:          then error "Prologue and rules are obligatory"
                    394:        else
                    395:          (a,b,c,d,construct_resulttype st,
                    396:           (map (fn (s,_) => Label s) (get_labels st))@
                    397:           (map (fn n => Node n) (get_nodes' st)))
                    398:       end
                    399:     handle SymbolConflict s => error ("Symbol Table : "^s)
                    400:         | LexError => error ("Lexical Error")
                    401: 
                    402:     and lexerS () = (!lexf)()
                    403:                   
                    404:     and lexer () =
                    405:       case lexerS () of        SPACE _ => lexer() | t => t
                    406: 
                    407:   end;

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.