|
|
1.1 ! root 1: /* ! 2: * The machine used here is the same as the one used in fgrep. ! 3: * Logically, it is a trie of all the strings to be recognized. ! 4: * In this case, the strings are the paths of tree patterns. ! 5: * The nodes of the trie are the states of the machine and leaves ! 6: * are accepting states. The trie is augmented by failure transitions. ! 7: * ! 8: * Each machine state is a linked list of machine_node's and referred to ! 9: * by pointing to the head of the list. The machine_node represents a ! 10: * transition. The nst field points to the next state when the current ! 11: * input is MI_EQUAL to the inp field. If the input is not MI_EQUAL to any ! 12: * of the inp fields in the list, the next state is the one that fail ! 13: * points to. The fail field must be the same for all machine_node's in ! 14: * a state. Accepting states have inp.value equal to -1, and the match ! 15: * field points to a list of match structures. The match structures record ! 16: * which trees have been partially matched. ! 17: * ! 18: * The index is used by the machine generator to convert the machine into ! 19: * a list of integers. ! 20: * ! 21: * Further details about the data structure and algorithms can be ! 22: * found in: ! 23: * Knuth, Morris, Pratt in SIAM J Computing 6:3 ! 24: * Aho, Corasick in Comm ACM 18:6 ! 25: * Hoffman, O'Donnell in JACM 29:1 ! 26: */ ! 27: struct machine_input { ! 28: short value; ! 29: struct symbol_entry *sym; ! 30: }; ! 31: #define MI_EQUAL(x,y) ((x).value==(y).value) ! 32: ! 33: #include "machcomm.h" ! 34: ! 35: extern int path_getsym(); ! 36: ! 37: struct machine_node { ! 38: struct machine_input inp; ! 39: struct machine_node *nst; ! 40: struct machine_node *link; ! 41: struct machine_node *fail; ! 42: struct match *match; ! 43: short int index; ! 44: }; ! 45: ! 46: struct match { ! 47: struct match *next; ! 48: short int depth; ! 49: LabelData *tree; ! 50: }; ! 51: ! 52: extern struct machine_node *machine; ! 53: ! 54: extern void machine_build();
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.