File:  [Research Unix] / researchv10no / cmd / sml / lib / twig / parser.sml
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 17:21:34 2018 UTC (8 years, 1 month ago) by root
Branches: belllabs, MAIN
CVS tags: researchv10, HEAD
researchv10 Norman

(* April 1989, Jussi Rintanen, Helsinki University of Technology *)

(* Parser is a structure containing a type definition for
   specification elements and a function, which maps
   a specification file to a ML-representation of the specification.

   The ML-representation is a tuple of prologue/insert code-fragments,
   list of tree translation rules, default code fragment for cost,
   a string containing the name for the resulting structure, a datatype
   definition for the result type, and the list of symbols appearing in
   the specification.

*)

signature PARSER =
  sig

    exception ParserError of string

    datatype symbol = Node of string * int | Label of string

    datatype cost = Cost of string list | NoCost
    and action = Action of string list
    and tree_pattern = Leaf of symbol | Tree of (symbol * tree_pattern list)
    and ruletype = Ordinary | Topdown | Rewrite
    and rule = Rule of (int * ruletype * symbol * tree_pattern * cost * action)

    datatype code_fragment = Prologue of string list | Insert of string list

    val specification :	instream ->
	(code_fragment list * rule list * cost * string * string * symbol list)

end;

(*
   The parser is a straightforward predictive parser. It interfaces to the
   lexer through the signature LEXER declared elsewhere. The symbol table
   takes care of various book-keeping tasks of the parser and the
   type checking of the symbols appearing in the rules of a specification.
*)

(* The symbol table is represented by an object of type stable. Declare_label
   and declare_node are used by the functions parsing node and label
   declarations. They add a symbol to the symbol table. By check_symbol, the
   functions parsing rules, the tree pattern in rules, assert a property
   of a symbol. The symbol table checks whether it agrees, and if not,
   it raises the exception SymbolConflict. Otherwise it returns the
   type of the symbol. The function next_rule is used by the
   parser for numbering the rules.
*)

signature SYMBOLTABLE =
  sig
    exception SymbolConflict of string
    datatype class = NonTerminal | Terminal of int | Unknown

    type stable

    val empty_symboltable : stable

    val check_symbol : stable * string * class -> class
    val declare_label : stable * string * string -> stable
    val declare_node : stable * string * int -> stable

    val get_labels : stable -> (string * string) list
    val get_nodes' : stable -> (string * int) list

    val next_rule : stable -> int * stable

end;

structure Symboltable : SYMBOLTABLE =
  struct

    exception SymbolConflict of string

    datatype class = NonTerminal | Terminal of int | Unknown

    type stable = (string -> class) * int * (string * string) list * (string * int) list

    fun conflict s = raise SymbolConflict s

    val empty_symboltable =
      (fn s => conflict ("symbol "^s^" is not declared"),0,[],[])

    val int2str : int -> string = makestring

(* Check checks, whether a symbol is of correct type. It impossible for the
   parser to distinguish a label from a node symbol of arity 0 (a leaf symbol),
   and therefore the parser may assert the property unknown for labels and
   symbols of arity 0.
*)

    fun check_symbol ((symbols,rn,ls,ns),s,p') =
      let val p = symbols s
      in
	(case p of
	   NonTerminal =>
	     (case p' of
		Terminal _ => conflict ("symbol "^s^" was declared a node")
	      | _ => p)
	 | Terminal a =>
	     (case p' of
		Unknown =>
		  if a=0 then p
		  else conflict ("symbol "^s^" is a node symboll with arity "^(int2str a))
	      | Terminal a' => if a=a'
				 then p
			       else conflict ("symbol "^s^" is of arity "^(int2str a))
	      | _ => conflict ("symbol "^s^" is a node symbol"))
	 | _ => conflict "internal error, unknown symbol in table")
      end

(* Declare_label and declare_node just extend the function. *)

    fun declare_label (st as (symbols,rn,ls,ns),s,ty) =
      ((fn s' => if s' = s then NonTerminal else symbols s'),
       rn,(s,ty)::ls,ns)

    fun declare_node (st as (symbols,rn,ls,ns),s,a) =
      ((fn s' => if s' = s then Terminal a else symbols s'),
       rn,ls,(s,a)::ns)

    fun next_rule (symbols, rn, ls, ns) = (rn,(symbols, rn+1, ls, ns))

    fun get_labels (_,_,ls,_) = ls
    fun get_nodes' (_,_,_,ns) = ns
  end;


functor MAKEparser (structure Symboltable : SYMBOLTABLE
		    and Lexer : LEXER) : PARSER =
  struct
      
    open Symboltable Lexer
    
    exception ParserError of string

    datatype symbol = Node of string * int | Label of string

    datatype cost = Cost of string list | NoCost
    and action = Action of string list
    and tree_pattern = Leaf of symbol | Tree of (symbol * tree_pattern list)
    and ruletype = Ordinary | Topdown | Rewrite
    and rule = Rule of (int * ruletype * symbol * tree_pattern * cost * action)

    datatype code_fragment = Prologue of string list | Insert of string list

    val lexf : (unit -> lexresult) ref = ref (fn () => COLON)

    val int2str : int -> string = makestring

    fun error s =
      raise ParserError ("Error in Line "^(int2str (current_line()))^" : "^s)

    local
      fun digit c = ("0" <= c) andalso (c <= "9")
      fun str2int' (a,c::r) =
	if digit c
	  then str2int' (a*10 + ord c - ord "0" ,r)
	else (a,c::r)
	| str2int' r = r
    in
      fun str2int s =
	case str2int' (0,(explode s)) of
	  (i,[]) => i
	| _ => error "parser internal"
    end

    fun treeref_string' (s,[]) = s
      | treeref_string' (s, h::t) =
	treeref_string' ("(get_subtree (" ^ (int2str h) ^ "," ^ s ^ "))", t)

    fun treeref_string l = treeref_string' ("ir",l)

    fun parsespecification (s,st) =
      case lexer() of
	IDENTIFIER "prologue" => parse_prologue (s,st)
      | IDENTIFIER "insert" => parse_insert (s,st)
      | IDENTIFIER "node" => parse_node (s,st)
      | IDENTIFIER "label" => parse_label (s,st)
      | IDENTIFIER "default_cost" => parse_defaultcost (s,st)
      | IDENTIFIER "structure_name" => parse_structurename (s,st)
      | IDENTIFIER n =>
	  let val replacement = if n = "TOPDOWN" orelse n = "REWRITE"
				  then
				    case lexer() of
				      IDENTIFIER n' => n'
				    | _ => error ("identifier expected after rule kind specifier" ^n)
				else n
	      val _ = check_symbol (st,replacement,NonTerminal)
	  in parse_rule (case n of
			 "TOPDOWN" => Topdown
		       | "REWRITE" => Rewrite
		       | _ => Ordinary,
			   Label replacement, s, st)
	  end
      | EOF => (s,st)
      | _ => error "Syntax Error"

    and parse_prologue (([],a,b,c), st) =
      parsespecification (([Prologue (read_ml_semicolon ())],a,b,c), st)
      | parse_prologue _ = error "duplicate prologue"

    and parse_insert(([],a,b,c), st) = error "insert may not precede prologue"
      | parse_insert ((l,a,b,c), st) =
	parsespecification ((l @ [Insert (read_ml_semicolon ())],a,b,c), st)

    and parse_defaultcost ((a,b,NoCost,d),st) =
      parsespecification ((a,b,Cost (read_ml_semicolon ()),d), st)
      | parse_defaultcost _ = error "duplicate default cost"

    and parse_node (s, st) =
      case lexer() of
	IDENTIFIER id =>
	  (case lexer() of
	     LPAREN =>
	       (case lexer() of
		  INT i =>
		    (case lexer() of
		       RPAREN =>
			 (case lexer() of
			    COMMA => parse_node (s,declare_node (st, id, str2int i))
			  | SEMICOLON => parsespecification (s, declare_node (st, id, str2int i))
			  | _ => error ", or ; expected")
		     | _ => error "')' expected")
		| _ => error "integer expected")
	   | _ => error " ( expected")
      | _ => error "node declaration"

    and readtype () =
      case lexerS() of
	OTHER "|" => ("",OTHER "|")
      | SEMICOLON => ("",SEMICOLON)
      | EOF => error "premature eof"
      | t => let val (s,r) = readtype() in ((token2str t) ^ s,r) end
	  
    and parse_label (s, st) =
      case lexer() of
	IDENTIFIER id =>
	  (case lexer() of
	     IDENTIFIER "of" =>
	       let val (tyexpr, token) = readtype ()
	       in
		 case token of
		   OTHER "|" => parse_label (s,declare_label(st,id,tyexpr))
		 | SEMICOLON => parsespecification(s,declare_label(st,id,tyexpr))
		 | _ => error "label list"
	       end
	   | _ => error "label list"
	       )
      | _ => error "label list"
      
    and token2str t =
      case t of
	IDENTIFIER s => s
      | INT s => s
      | EQ => "="
      | COLON => ":"
      | SEMICOLON => ";"
      | COMMA => ","
      | LPAREN => "("
      | RPAREN => ")"
      | TREEREF _ => error "tree references not allowed here"
      | OTHER s => s
      | SPACE s => s
      | EOF => error "cannot convert EOF"

    and token2strX t =
      case t of
	IDENTIFIER s => s
      | INT s => s
      | EQ => "="
      | COLON => ":"
      | SEMICOLON => ";"
      | COMMA => ","
      | TREEREF l => treeref_string l
      | LPAREN => "("
      | RPAREN => ")"
      | OTHER s => s
      | SPACE s => s
      | EOF => error "cannot convert EOF"

    and read_ml_semicolon' () =
      case lexerS() of
      LPAREN => ("(" :: read_ml_semicolon'()) @ (")" :: (read_ml_semicolon'()))
      | RPAREN => []
      | EOF => error "premature EOF"
      | t => (token2str t):: (read_ml_semicolon' ())

    and read_ml_semicolon () =
      if lexer() <> LPAREN then error "'(' expected"
      else
	let val r = read_ml_semicolon' () in
	  if lexer() <> SEMICOLON then error "missing semicolon"
	  else r
	end

    and rulecode' () =
      case lexerS() of
      LPAREN => ("(" :: rulecode'()) @ (rulecode'())
      | RPAREN => [" )"]
      | EOF => error "premature EOF"
      | t => (token2strX t):: (rulecode' ())

    and rulecode () =
	if lexer() <> LPAREN then error "'(' expected"
	else "( " :: (rulecode'())

    and parse_restaction (rn, ty, nt, pattern, costcode, (a,l,b,c), st) =
      let val mlcode = Action (rulecode ())
      in
	if lexer() <> SEMICOLON then error "missing semicolon"
	else
	  let val ps = Rule(rn,ty,nt,pattern,costcode,mlcode)
	  in parsespecification ((a,ps::l,b,c), st)
	  end
      end

    and parse_restcost (rn, ty, nt, pattern, s as (a,l,b,c), st) =
      let val mlcode = Cost (rulecode ())
      in
	case lexer() of
	  EQ => parse_restaction(rn,ty,nt,pattern,mlcode, s, st)
	| _ => error " = expected"
      end

    and parse_rule (ty, nt, s as (a,l,b,c), st) =
      let val (pattern, token) = parse_pattern st
	val (rn,st') = next_rule st
      in
	case token of
	  COLON => parse_restcost (rn, ty, nt, pattern, s, st')
	| EQ =>
	    (case b of
	       NoCost => error "Must specify cost, no default cost defined"
	     | _ => parse_restaction (rn, ty, nt, pattern, NoCost, s, st'))
	| _ => error "one of : = expected"
      end

    and parse_pattern st =
      case lexer() of
	IDENTIFIER id =>
	  (case lexer() of
	     LPAREN => let val subtrees = parse_subtrees st
			   val arity = length subtrees
			   val _ = check_symbol (st, id, Terminal arity)
		       in
			 (Tree (Node (id,arity), subtrees), lexer())
		       end
	   | token => let val p = check_symbol (st, id, Unknown)
		      in
			(case p of
			   NonTerminal => Leaf (Label id)
			 | _ => Leaf (Node (id,0)),
			     token)
		      end)
      | _ => error "ill-formed tree pattern"

    and parse_subtrees st =
      let val (pat, token) = parse_pattern st
      in
	case token of
	  COMMA => let val rest = parse_subtrees st
		   in (pat :: rest)
		   end
	| RPAREN => [pat]
	| _ => error "one of , ) expected in tree pattern"
      end

    and parse_structurename ((a,b,c,_),st) =
      case lexer() of
        IDENTIFIER s => if lexer() = SEMICOLON
			  then parsespecification ((a,b,c,s),st)
			else error "missing ; after structure name"
      | _ => error "structure name was not given"

    and construct_resulttype st =
      let val first::rest = (rev o get_labels) st
	  and makeCon = (fn (n,t) => "_" ^ n ^ " of " ^ t)
      in
	fold (fn (a,b) => a ^ " | " ^ b)
	  (map makeCon rest)
	   (makeCon first)
      end

    and specification instream =
      let val dummy = (lexf := make_lexer instream)
	  val ((a,b,c,d), st) = parsespecification
	    (([],[],NoCost,"TreeProcessor"), empty_symboltable)
      in
	if b = nil orelse a = nil
	  then error "Prologue and rules are obligatory"
	else
	  (a,b,c,d,construct_resulttype st,
	   (map (fn (s,_) => Label s) (get_labels st))@
	   (map (fn n => Node n) (get_nodes' st)))
      end
    handle SymbolConflict s => error ("Symbol Table : "^s)
	 | LexError => error ("Lexical Error")

    and lexerS () = (!lexf)()
		   
    and lexer () =
      case lexerS () of	SPACE _ => lexer() | t => t

  end;

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.