Annotation of researchv10no/cmd/sml/lib/twig/parser.sml, revision 1.1

1.1     ! root        1: (* April 1989, Jussi Rintanen, Helsinki University of Technology *)
        !             2: 
        !             3: (* Parser is a structure containing a type definition for
        !             4:    specification elements and a function, which maps
        !             5:    a specification file to a ML-representation of the specification.
        !             6: 
        !             7:    The ML-representation is a tuple of prologue/insert code-fragments,
        !             8:    list of tree translation rules, default code fragment for cost,
        !             9:    a string containing the name for the resulting structure, a datatype
        !            10:    definition for the result type, and the list of symbols appearing in
        !            11:    the specification.
        !            12: 
        !            13: *)
        !            14: 
        !            15: signature PARSER =
        !            16:   sig
        !            17: 
        !            18:     exception ParserError of string
        !            19: 
        !            20:     datatype symbol = Node of string * int | Label of string
        !            21: 
        !            22:     datatype cost = Cost of string list | NoCost
        !            23:     and action = Action of string list
        !            24:     and tree_pattern = Leaf of symbol | Tree of (symbol * tree_pattern list)
        !            25:     and ruletype = Ordinary | Topdown | Rewrite
        !            26:     and rule = Rule of (int * ruletype * symbol * tree_pattern * cost * action)
        !            27: 
        !            28:     datatype code_fragment = Prologue of string list | Insert of string list
        !            29: 
        !            30:     val specification :        instream ->
        !            31:        (code_fragment list * rule list * cost * string * string * symbol list)
        !            32: 
        !            33: end;
        !            34: 
        !            35: (*
        !            36:    The parser is a straightforward predictive parser. It interfaces to the
        !            37:    lexer through the signature LEXER declared elsewhere. The symbol table
        !            38:    takes care of various book-keeping tasks of the parser and the
        !            39:    type checking of the symbols appearing in the rules of a specification.
        !            40: *)
        !            41: 
        !            42: (* The symbol table is represented by an object of type stable. Declare_label
        !            43:    and declare_node are used by the functions parsing node and label
        !            44:    declarations. They add a symbol to the symbol table. By check_symbol, the
        !            45:    functions parsing rules, the tree pattern in rules, assert a property
        !            46:    of a symbol. The symbol table checks whether it agrees, and if not,
        !            47:    it raises the exception SymbolConflict. Otherwise it returns the
        !            48:    type of the symbol. The function next_rule is used by the
        !            49:    parser for numbering the rules.
        !            50: *)
        !            51: 
        !            52: signature SYMBOLTABLE =
        !            53:   sig
        !            54:     exception SymbolConflict of string
        !            55:     datatype class = NonTerminal | Terminal of int | Unknown
        !            56: 
        !            57:     type stable
        !            58: 
        !            59:     val empty_symboltable : stable
        !            60: 
        !            61:     val check_symbol : stable * string * class -> class
        !            62:     val declare_label : stable * string * string -> stable
        !            63:     val declare_node : stable * string * int -> stable
        !            64: 
        !            65:     val get_labels : stable -> (string * string) list
        !            66:     val get_nodes' : stable -> (string * int) list
        !            67: 
        !            68:     val next_rule : stable -> int * stable
        !            69: 
        !            70: end;
        !            71: 
        !            72: structure Symboltable : SYMBOLTABLE =
        !            73:   struct
        !            74: 
        !            75:     exception SymbolConflict of string
        !            76: 
        !            77:     datatype class = NonTerminal | Terminal of int | Unknown
        !            78: 
        !            79:     type stable = (string -> class) * int * (string * string) list * (string * int) list
        !            80: 
        !            81:     fun conflict s = raise SymbolConflict s
        !            82: 
        !            83:     val empty_symboltable =
        !            84:       (fn s => conflict ("symbol "^s^" is not declared"),0,[],[])
        !            85: 
        !            86:     val int2str : int -> string = makestring
        !            87: 
        !            88: (* Check checks, whether a symbol is of correct type. It impossible for the
        !            89:    parser to distinguish a label from a node symbol of arity 0 (a leaf symbol),
        !            90:    and therefore the parser may assert the property unknown for labels and
        !            91:    symbols of arity 0.
        !            92: *)
        !            93: 
        !            94:     fun check_symbol ((symbols,rn,ls,ns),s,p') =
        !            95:       let val p = symbols s
        !            96:       in
        !            97:        (case p of
        !            98:           NonTerminal =>
        !            99:             (case p' of
        !           100:                Terminal _ => conflict ("symbol "^s^" was declared a node")
        !           101:              | _ => p)
        !           102:         | Terminal a =>
        !           103:             (case p' of
        !           104:                Unknown =>
        !           105:                  if a=0 then p
        !           106:                  else conflict ("symbol "^s^" is a node symboll with arity "^(int2str a))
        !           107:              | Terminal a' => if a=a'
        !           108:                                 then p
        !           109:                               else conflict ("symbol "^s^" is of arity "^(int2str a))
        !           110:              | _ => conflict ("symbol "^s^" is a node symbol"))
        !           111:         | _ => conflict "internal error, unknown symbol in table")
        !           112:       end
        !           113: 
        !           114: (* Declare_label and declare_node just extend the function. *)
        !           115: 
        !           116:     fun declare_label (st as (symbols,rn,ls,ns),s,ty) =
        !           117:       ((fn s' => if s' = s then NonTerminal else symbols s'),
        !           118:        rn,(s,ty)::ls,ns)
        !           119: 
        !           120:     fun declare_node (st as (symbols,rn,ls,ns),s,a) =
        !           121:       ((fn s' => if s' = s then Terminal a else symbols s'),
        !           122:        rn,ls,(s,a)::ns)
        !           123: 
        !           124:     fun next_rule (symbols, rn, ls, ns) = (rn,(symbols, rn+1, ls, ns))
        !           125: 
        !           126:     fun get_labels (_,_,ls,_) = ls
        !           127:     fun get_nodes' (_,_,_,ns) = ns
        !           128:   end;
        !           129: 
        !           130: 
        !           131: functor MAKEparser (structure Symboltable : SYMBOLTABLE
        !           132:                    and Lexer : LEXER) : PARSER =
        !           133:   struct
        !           134:       
        !           135:     open Symboltable Lexer
        !           136:     
        !           137:     exception ParserError of string
        !           138: 
        !           139:     datatype symbol = Node of string * int | Label of string
        !           140: 
        !           141:     datatype cost = Cost of string list | NoCost
        !           142:     and action = Action of string list
        !           143:     and tree_pattern = Leaf of symbol | Tree of (symbol * tree_pattern list)
        !           144:     and ruletype = Ordinary | Topdown | Rewrite
        !           145:     and rule = Rule of (int * ruletype * symbol * tree_pattern * cost * action)
        !           146: 
        !           147:     datatype code_fragment = Prologue of string list | Insert of string list
        !           148: 
        !           149:     val lexf : (unit -> lexresult) ref = ref (fn () => COLON)
        !           150: 
        !           151:     val int2str : int -> string = makestring
        !           152: 
        !           153:     fun error s =
        !           154:       raise ParserError ("Error in Line "^(int2str (current_line()))^" : "^s)
        !           155: 
        !           156:     local
        !           157:       fun digit c = ("0" <= c) andalso (c <= "9")
        !           158:       fun str2int' (a,c::r) =
        !           159:        if digit c
        !           160:          then str2int' (a*10 + ord c - ord "0" ,r)
        !           161:        else (a,c::r)
        !           162:        | str2int' r = r
        !           163:     in
        !           164:       fun str2int s =
        !           165:        case str2int' (0,(explode s)) of
        !           166:          (i,[]) => i
        !           167:        | _ => error "parser internal"
        !           168:     end
        !           169: 
        !           170:     fun treeref_string' (s,[]) = s
        !           171:       | treeref_string' (s, h::t) =
        !           172:        treeref_string' ("(get_subtree (" ^ (int2str h) ^ "," ^ s ^ "))", t)
        !           173: 
        !           174:     fun treeref_string l = treeref_string' ("ir",l)
        !           175: 
        !           176:     fun parsespecification (s,st) =
        !           177:       case lexer() of
        !           178:        IDENTIFIER "prologue" => parse_prologue (s,st)
        !           179:       | IDENTIFIER "insert" => parse_insert (s,st)
        !           180:       | IDENTIFIER "node" => parse_node (s,st)
        !           181:       | IDENTIFIER "label" => parse_label (s,st)
        !           182:       | IDENTIFIER "default_cost" => parse_defaultcost (s,st)
        !           183:       | IDENTIFIER "structure_name" => parse_structurename (s,st)
        !           184:       | IDENTIFIER n =>
        !           185:          let val replacement = if n = "TOPDOWN" orelse n = "REWRITE"
        !           186:                                  then
        !           187:                                    case lexer() of
        !           188:                                      IDENTIFIER n' => n'
        !           189:                                    | _ => error ("identifier expected after rule kind specifier" ^n)
        !           190:                                else n
        !           191:              val _ = check_symbol (st,replacement,NonTerminal)
        !           192:          in parse_rule (case n of
        !           193:                         "TOPDOWN" => Topdown
        !           194:                       | "REWRITE" => Rewrite
        !           195:                       | _ => Ordinary,
        !           196:                           Label replacement, s, st)
        !           197:          end
        !           198:       | EOF => (s,st)
        !           199:       | _ => error "Syntax Error"
        !           200: 
        !           201:     and parse_prologue (([],a,b,c), st) =
        !           202:       parsespecification (([Prologue (read_ml_semicolon ())],a,b,c), st)
        !           203:       | parse_prologue _ = error "duplicate prologue"
        !           204: 
        !           205:     and parse_insert(([],a,b,c), st) = error "insert may not precede prologue"
        !           206:       | parse_insert ((l,a,b,c), st) =
        !           207:        parsespecification ((l @ [Insert (read_ml_semicolon ())],a,b,c), st)
        !           208: 
        !           209:     and parse_defaultcost ((a,b,NoCost,d),st) =
        !           210:       parsespecification ((a,b,Cost (read_ml_semicolon ()),d), st)
        !           211:       | parse_defaultcost _ = error "duplicate default cost"
        !           212: 
        !           213:     and parse_node (s, st) =
        !           214:       case lexer() of
        !           215:        IDENTIFIER id =>
        !           216:          (case lexer() of
        !           217:             LPAREN =>
        !           218:               (case lexer() of
        !           219:                  INT i =>
        !           220:                    (case lexer() of
        !           221:                       RPAREN =>
        !           222:                         (case lexer() of
        !           223:                            COMMA => parse_node (s,declare_node (st, id, str2int i))
        !           224:                          | SEMICOLON => parsespecification (s, declare_node (st, id, str2int i))
        !           225:                          | _ => error ", or ; expected")
        !           226:                     | _ => error "')' expected")
        !           227:                | _ => error "integer expected")
        !           228:           | _ => error " ( expected")
        !           229:       | _ => error "node declaration"
        !           230: 
        !           231:     and readtype () =
        !           232:       case lexerS() of
        !           233:        OTHER "|" => ("",OTHER "|")
        !           234:       | SEMICOLON => ("",SEMICOLON)
        !           235:       | EOF => error "premature eof"
        !           236:       | t => let val (s,r) = readtype() in ((token2str t) ^ s,r) end
        !           237:          
        !           238:     and parse_label (s, st) =
        !           239:       case lexer() of
        !           240:        IDENTIFIER id =>
        !           241:          (case lexer() of
        !           242:             IDENTIFIER "of" =>
        !           243:               let val (tyexpr, token) = readtype ()
        !           244:               in
        !           245:                 case token of
        !           246:                   OTHER "|" => parse_label (s,declare_label(st,id,tyexpr))
        !           247:                 | SEMICOLON => parsespecification(s,declare_label(st,id,tyexpr))
        !           248:                 | _ => error "label list"
        !           249:               end
        !           250:           | _ => error "label list"
        !           251:               )
        !           252:       | _ => error "label list"
        !           253:       
        !           254:     and token2str t =
        !           255:       case t of
        !           256:        IDENTIFIER s => s
        !           257:       | INT s => s
        !           258:       | EQ => "="
        !           259:       | COLON => ":"
        !           260:       | SEMICOLON => ";"
        !           261:       | COMMA => ","
        !           262:       | LPAREN => "("
        !           263:       | RPAREN => ")"
        !           264:       | TREEREF _ => error "tree references not allowed here"
        !           265:       | OTHER s => s
        !           266:       | SPACE s => s
        !           267:       | EOF => error "cannot convert EOF"
        !           268: 
        !           269:     and token2strX t =
        !           270:       case t of
        !           271:        IDENTIFIER s => s
        !           272:       | INT s => s
        !           273:       | EQ => "="
        !           274:       | COLON => ":"
        !           275:       | SEMICOLON => ";"
        !           276:       | COMMA => ","
        !           277:       | TREEREF l => treeref_string l
        !           278:       | LPAREN => "("
        !           279:       | RPAREN => ")"
        !           280:       | OTHER s => s
        !           281:       | SPACE s => s
        !           282:       | EOF => error "cannot convert EOF"
        !           283: 
        !           284:     and read_ml_semicolon' () =
        !           285:       case lexerS() of
        !           286:       LPAREN => ("(" :: read_ml_semicolon'()) @ (")" :: (read_ml_semicolon'()))
        !           287:       | RPAREN => []
        !           288:       | EOF => error "premature EOF"
        !           289:       | t => (token2str t):: (read_ml_semicolon' ())
        !           290: 
        !           291:     and read_ml_semicolon () =
        !           292:       if lexer() <> LPAREN then error "'(' expected"
        !           293:       else
        !           294:        let val r = read_ml_semicolon' () in
        !           295:          if lexer() <> SEMICOLON then error "missing semicolon"
        !           296:          else r
        !           297:        end
        !           298: 
        !           299:     and rulecode' () =
        !           300:       case lexerS() of
        !           301:       LPAREN => ("(" :: rulecode'()) @ (rulecode'())
        !           302:       | RPAREN => [" )"]
        !           303:       | EOF => error "premature EOF"
        !           304:       | t => (token2strX t):: (rulecode' ())
        !           305: 
        !           306:     and rulecode () =
        !           307:        if lexer() <> LPAREN then error "'(' expected"
        !           308:        else "( " :: (rulecode'())
        !           309: 
        !           310:     and parse_restaction (rn, ty, nt, pattern, costcode, (a,l,b,c), st) =
        !           311:       let val mlcode = Action (rulecode ())
        !           312:       in
        !           313:        if lexer() <> SEMICOLON then error "missing semicolon"
        !           314:        else
        !           315:          let val ps = Rule(rn,ty,nt,pattern,costcode,mlcode)
        !           316:          in parsespecification ((a,ps::l,b,c), st)
        !           317:          end
        !           318:       end
        !           319: 
        !           320:     and parse_restcost (rn, ty, nt, pattern, s as (a,l,b,c), st) =
        !           321:       let val mlcode = Cost (rulecode ())
        !           322:       in
        !           323:        case lexer() of
        !           324:          EQ => parse_restaction(rn,ty,nt,pattern,mlcode, s, st)
        !           325:        | _ => error " = expected"
        !           326:       end
        !           327: 
        !           328:     and parse_rule (ty, nt, s as (a,l,b,c), st) =
        !           329:       let val (pattern, token) = parse_pattern st
        !           330:        val (rn,st') = next_rule st
        !           331:       in
        !           332:        case token of
        !           333:          COLON => parse_restcost (rn, ty, nt, pattern, s, st')
        !           334:        | EQ =>
        !           335:            (case b of
        !           336:               NoCost => error "Must specify cost, no default cost defined"
        !           337:             | _ => parse_restaction (rn, ty, nt, pattern, NoCost, s, st'))
        !           338:        | _ => error "one of : = expected"
        !           339:       end
        !           340: 
        !           341:     and parse_pattern st =
        !           342:       case lexer() of
        !           343:        IDENTIFIER id =>
        !           344:          (case lexer() of
        !           345:             LPAREN => let val subtrees = parse_subtrees st
        !           346:                           val arity = length subtrees
        !           347:                           val _ = check_symbol (st, id, Terminal arity)
        !           348:                       in
        !           349:                         (Tree (Node (id,arity), subtrees), lexer())
        !           350:                       end
        !           351:           | token => let val p = check_symbol (st, id, Unknown)
        !           352:                      in
        !           353:                        (case p of
        !           354:                           NonTerminal => Leaf (Label id)
        !           355:                         | _ => Leaf (Node (id,0)),
        !           356:                             token)
        !           357:                      end)
        !           358:       | _ => error "ill-formed tree pattern"
        !           359: 
        !           360:     and parse_subtrees st =
        !           361:       let val (pat, token) = parse_pattern st
        !           362:       in
        !           363:        case token of
        !           364:          COMMA => let val rest = parse_subtrees st
        !           365:                   in (pat :: rest)
        !           366:                   end
        !           367:        | RPAREN => [pat]
        !           368:        | _ => error "one of , ) expected in tree pattern"
        !           369:       end
        !           370: 
        !           371:     and parse_structurename ((a,b,c,_),st) =
        !           372:       case lexer() of
        !           373:         IDENTIFIER s => if lexer() = SEMICOLON
        !           374:                          then parsespecification ((a,b,c,s),st)
        !           375:                        else error "missing ; after structure name"
        !           376:       | _ => error "structure name was not given"
        !           377: 
        !           378:     and construct_resulttype st =
        !           379:       let val first::rest = (rev o get_labels) st
        !           380:          and makeCon = (fn (n,t) => "_" ^ n ^ " of " ^ t)
        !           381:       in
        !           382:        fold (fn (a,b) => a ^ " | " ^ b)
        !           383:          (map makeCon rest)
        !           384:           (makeCon first)
        !           385:       end
        !           386: 
        !           387:     and specification instream =
        !           388:       let val dummy = (lexf := make_lexer instream)
        !           389:          val ((a,b,c,d), st) = parsespecification
        !           390:            (([],[],NoCost,"TreeProcessor"), empty_symboltable)
        !           391:       in
        !           392:        if b = nil orelse a = nil
        !           393:          then error "Prologue and rules are obligatory"
        !           394:        else
        !           395:          (a,b,c,d,construct_resulttype st,
        !           396:           (map (fn (s,_) => Label s) (get_labels st))@
        !           397:           (map (fn n => Node n) (get_nodes' st)))
        !           398:       end
        !           399:     handle SymbolConflict s => error ("Symbol Table : "^s)
        !           400:         | LexError => error ("Lexical Error")
        !           401: 
        !           402:     and lexerS () = (!lexf)()
        !           403:                   
        !           404:     and lexer () =
        !           405:       case lexerS () of        SPACE _ => lexer() | t => t
        !           406: 
        !           407:   end;

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.