Annotation of researchv10no/cmd/sml/lib/twig/automata.sml, revision 1.1.1.1

1.1       root        1: 
                      2: (* April 1989, Jussi Rintanen, Helsinki University of Technology *)
                      3: 
                      4: (* The tree pattern matching automata builder
                      5: 
                      6:   This is the final version of the machine builder of ML-Twig.
                      7:   The machine builder takes as input a list of rules, constructs
                      8:   a finite state automaton directly from the tree patterns. In the prototype
                      9:   version, a string was constructed and returned, but the complexity of
                     10:   constructing large strings from small constituent strings by catenating
                     11:   them is not very good, so we have a separate function for printing
                     12:   the automaton.
                     13: 
                     14: *)
                     15: 
                     16: signature AUTOMATA =
                     17:   sig
                     18:     exception AutomatonError of string
                     19:     structure Parser : PARSER
                     20:     val build_automaton: outstream * Parser.symbol list * Parser.rule list -> unit
                     21: end;
                     22: 
                     23: (* This is the second version of ML-Twig Automata Builder.
                     24: We note, that the first version was purely functional (without side-effects)
                     25: and was based on a general structure constructing Aho-Corasick automata
                     26: for string matching. However, the first version was considered too complex
                     27: and inefficient, and we decided to rewrite it from scratch.
                     28:   This revised version constructs a trie, which is represented by an array.
                     29: The trie is built directly from tree patterns, and explicit construction
                     30: of path strings is avoided.
                     31: For detailed description we refer to [Aho,Corasick] and [Hoffmann,O'Donnell].
                     32: *)
                     33: 
                     34: functor MAKEautomata (structure Parser : PARSER): AUTOMATA =
                     35:   struct
                     36:     exception AutomatonError of string
                     37: 
                     38:     fun fatal s = raise AutomatonError s
                     39:       
                     40: (* This structure represents an abstract trie with extensions for
                     41:    string pattern matching automaton construction. We have assumed,
                     42:    that the implementation has side-effects, and for efficiency
                     43:    an array is used. *)
                     44:       
                     45:     structure Implementation :
                     46:       sig
                     47:        structure Parser : PARSER
                     48:        datatype alpha = Sym of Parser.symbol | Child of int
                     49:        type automaton
                     50:        val empty_automaton : unit -> automaton
                     51:        val add_arc : automaton * int * alpha -> automaton * int
                     52:        val add_finals : automaton * int * (int * int * Parser.symbol) list -> automaton
                     53:        val set_failure : automaton * int * int -> automaton
                     54:        val get_failure : automaton * int -> int
                     55:        val get_finals : automaton * int -> (int * int * Parser.symbol) list
                     56:        val get_transitions : automaton * int -> (alpha * int) list
                     57:        val last_state : automaton -> int
                     58:       end
                     59:     =
                     60:     struct
                     61:       structure Parser : PARSER = Parser
                     62:       open Parser
                     63:       datatype alpha = Sym of symbol | Child of int
                     64:       type state = ((int * int * symbol) list * (alpha * int) list * int)
                     65:       type automaton = state array * int * int
                     66: 
                     67:       fun empty_automaton () = (array (400, ([],[],0)), 400, 1)
                     68: 
                     69:       fun add_arc (trie as (a,b,c),i,iota) =
                     70:        let val (fs,ts,f) = a sub i
                     71:          val rec go = (fn nil => ~1 | ((on,to) :: t) =>
                     72:                        if iota = on then to else go t)
                     73:          val destination = go ts
                     74:        in
                     75:          if destination <> ~1
                     76:            then (trie,destination)
                     77:          else
                     78:            if b = c
                     79:              then
                     80:                let val newsize = b*3 div 2
                     81:                  val newa = array (newsize, ([],[],0))
                     82:                  val rec copya = fn 0 => update(newa,0,a sub 0)
                     83:                                   | n => (update(newa,n,a sub n); copya (n-1))
                     84:                in
                     85:                  copya (b-1); ((newa,newsize,c+1),c)
                     86:                end
                     87:            else (update(a,i,(fs,(iota,c)::ts,f));((a,b,c+1),c))
                     88:        end
                     89:          
                     90:       fun set_failure (trie as (a,b,c),i,f) =
                     91:        let val (fs,ts,f') = a sub i
                     92:        in
                     93:          (update (a,i,(fs,ts,f));
                     94:           trie)
                     95:        end
                     96:       
                     97:       fun add_finals (trie as (a,b,c),i,f) =
                     98:        let val (fs,ts,s) = a sub i
                     99:        in
                    100:          (update (a,i,(f@fs,ts,s));
                    101:           trie)
                    102:        end
                    103:       
                    104:       fun get_finals ((a,b,c),i) = let val (fs,ts,s) = a sub i in fs end
                    105:       fun get_failure ((a,b,c),i) = let val (fs,ts,s) = a sub i in s end
                    106:       fun get_transitions ((a,b,c),i) = let val (fs,ts,s) = a sub i in ts end
                    107:       fun last_state (a,b,c) = c-1
                    108: 
                    109:     end
                    110: 
                    111:     structure Parser = Parser
                    112: 
                    113:     open Implementation Parser
                    114: 
                    115:     val int2str : int -> string = makestring
                    116: 
                    117:     val accum = revfold
                    118: 
                    119: (* This function traverses a tree pattern and concurrently adds arcs
                    120:    to the trie representation of a tree pattern matching automaton. *)
                    121: 
                    122:     fun add_pattern (autom, rule1, nont, Leaf n, state, len) =
                    123:       let val (autom', state') = add_arc (autom, state, Sym n)
                    124:       in add_finals (autom', state', [(len,rule1,nont)]) end
                    125:       | add_pattern (autom, rule1, nont, Tree (n, cs), state, len) =
                    126:        let val (autom', state') = add_arc (autom, state, Sym n)
                    127:          val (autom'''',_) =
                    128:            accum
                    129:            (fn (c,(autom'', cn)) =>
                    130:               let val (autom''', state'') = add_arc (autom'', state', Child cn)
                    131:               in
                    132:                 (add_pattern (autom''', rule1, nont, c, state'', len+1), cn + 1)
                    133:               end)
                    134:               cs (autom', 1)
                    135:        in
                    136:          autom''''
                    137:        end
                    138: 
                    139:     fun go (au, s, i) =
                    140:       let
                    141:        val ts = get_transitions(au, s)
                    142:        val rec g = fn nil => ~1 | ((p,q)::t) => if p=i then q else g t
                    143:       in g ts
                    144:       end
                    145: 
                    146:     fun oflevel1 au = let val ts = get_transitions(au, 0)
                    147:                      in map (fn (p,q) => q) ts
                    148:                      end
                    149:                        
                    150:     fun iterate (au, nil, nil) = au
                    151:       | iterate (au, nil, next) = iterate (au,next,nil)
                    152:       | iterate (au, h::t, next) =
                    153:        let val f = get_failure (au, h)
                    154:          val ts = get_transitions (au, h)
                    155:          val au' = accum (fn ((i,s),aut) =>
                    156:                           let val rec fail = fn state =>
                    157:                             if go (aut,state,i) <> ~1
                    158:                               then go (aut,state,i)
                    159:                             else if state=0
                    160:                                    then 0
                    161:                                  else fail (get_failure (aut, state))
                    162:                           in
                    163:                             add_finals(set_failure (aut, s, fail f),
                    164:                                        s,
                    165:                                        get_finals (aut, fail f))
                    166:                           end
                    167:                         ) ts au
                    168:        in
                    169:          iterate (au',t,(map (fn (p,q) => q) ts) @ next)
                    170:        end
                    171: 
                    172:     fun construct_failure au = iterate (au, oflevel1 au, [])
                    173:       
                    174:     fun construct_automaton rules =
                    175:       let val t1 = (* Trie & final state values *)
                    176:        accum
                    177:        (fn (Rule(n,_,r,p,_,_),a) => add_pattern (a,n,r,p,0,1))
                    178:           rules
                    179:           (empty_automaton ())
                    180:       in
                    181:        construct_failure t1 (* Failure & final state values *)
                    182:       end
                    183: 
                    184:     fun symbol2str (Label s) = "__"^s
                    185:       | symbol2str (Node (s,_)) = s
                    186: 
                    187:     fun arc2str (Sym s) = symbol2str s
                    188:       | arc2str (Child n) = "(ARC "^(int2str n)^")"
                    189: 
                    190:     fun output_symbols (out,symbols) =
                    191:       (out "datatype symbols = ARC of int";
                    192:        map
                    193:        (fn s => out (" | "^(symbol2str s)))
                    194:          symbols;
                    195:          out "\n")
                    196: 
                    197:     fun output_finals' (out,au,n) =
                    198:       if n <= last_state au
                    199:        then
                    200:          let val finals = get_finals (au,n)
                    201:            fun outfinal (i,j,s) =
                    202:              (out "(";
                    203:               out (int2str i);
                    204:               out ",";
                    205:               out (int2str j);
                    206:               out ",";
                    207:               out (symbol2str s);
                    208:               out ")")
                    209:          in
                    210:            out (int2str n);
                    211:            out " => [";
                    212:              case finals of
                    213:                nil => ()
                    214:              | [h] => outfinal h
                    215:              | (h::t) => (outfinal h;app (fn h => (out",";outfinal h)) t);
                    216:            out "]\n  | ";
                    217:            output_finals' (out,au,n+1)
                    218:          end
                    219:       else ()
                    220: 
                    221:     fun output_finals (out,au) =
                    222:       (out "fun get_finals s =\n";
                    223:        out "  case s of\n";
                    224:        output_finals' (out,au,0);
                    225:        out "_ => nil\n\n")
                    226: 
                    227:     fun output_goto' (out,au,n) =
                    228:       if n <= last_state au
                    229:        then
                    230:          let val transitions = get_transitions (au,n)
                    231:          in
                    232:            out (int2str n);
                    233:            out " => (case a of ";
                    234:            app
                    235:            (fn (i,s) =>
                    236:               (out (arc2str i);
                    237:               out " => ";
                    238:               out (int2str s);
                    239:               out " | "))
                    240:               transitions;
                    241:             out " _ => ";
                    242:             if n = 0 then (out "0")
                    243:             else (out "go ("; out (int2str (get_failure (au,n))); out ",a)");
                    244:            out ")\n  | ";
                    245:            output_goto' (out,au,n+1)
                    246:          end
                    247:       else ()
                    248: 
                    249:     fun output_goto (out,au)  =
                    250:       (out "fun go (s,a) =\n";
                    251:        out "  case s of\n";
                    252:        output_goto' (out,au,0);
                    253:        out "_ => 0\n\n")
                    254: 
                    255:     fun output_automaton (outstr,au,symbols) =
                    256:        let val out = output outstr
                    257:        in
                    258:         output_finals (out,au);
                    259:         output_goto (out,au);
                    260:         out "val go_f = get_finals o go\n";
                    261:         out "fun childsymbol s = ARC s\n";
                    262:         out "val initialstate = 0\n";
                    263:         out "type state = int\n"
                    264:        end
                    265:        
                    266:      fun build_automaton (outstr,symbols,rules) =
                    267:        output_automaton(outstr,construct_automaton rules,symbols)
                    268: 
                    269:   end;

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.