|
|
1.1 root 1:
2: (* November 1988, Jussi Rintanen, Helsinki University of Technology *)
3:
4: (* This is the specification of ML-Twig lexical analyzer.
5:
6: This lexer correctly recognizes all Standard ML tokens, as specified
7: in [Harper, MacQueen and Milner, 1986].
8: *)
9:
10: datatype lexresult =
11: IDENTIFIER of string
12: | INT of string
13: | EQ
14: | RPAREN
15: | LPAREN
16: | COLON
17: | SEMICOLON
18: | COMMA
19: | TREEREF of int list
20: | OTHER of string
21: | SPACE of string
22: | EOF
23:
24: local
25: fun digit c = ("0" <= c) andalso (c <= "9")
26: fun str2 (a,c::r) =
27: if digit c
28: then str2 (a*10 + ord c - ord "0" ,r)
29: else (a,c::r)
30: | str2 r = r
31: in
32: fun str0int s = str2 (0,s)
33: end
34:
35: local
36: fun parse_treeref' nil = nil
37: | parse_treeref' ["$"] = nil
38: | parse_treeref' s =
39: let val (i,r) = str0int s
40: in
41: i :: parse_treeref' (tl r)
42: end
43: in
44: val parse_treeref = (parse_treeref' o tl o explode)
45: end
46:
47: val current_line_number : int ref = ref 1
48: val commentlevel : int ref = ref 0
49:
50: fun current_line () = !current_line_number
51:
52: fun eof() = EOF
53:
54: %%
55:
56: %structure TwigLexer
57:
58: %s C;
59:
60: idbegin=[a-zA-Z'_];
61: idchar=[a-zA-Z0-9'_];
62:
63: sidbegin=[!%&$+/:<=>?@~'|*\\^-];
64: sidchar=[!%&$+/:<=>?@~'|#*\\^-];
65:
66: digit=[0-9];
67: whitespace=[\ \t];
68: newline=[\n];
69: controllable=[a-zA-Z];
70:
71: %%
72:
73: <INITIAL>"$" ({digit}+("."{digit}+)*)? "$" => (TREEREF (parse_treeref yytext));
74:
75: <INITIAL>{digit}+ => (INT yytext);
76:
77: <INITIAL>{digit}+("."{digit}+)?(E"~"?{digit}+)? => (OTHER yytext);
78:
79: <INITIAL>{sidbegin}{sidchar}* => (case yytext of
80: ":" => COLON
81: | "=" => EQ
82: | _ => OTHER yytext);
83:
84: <INITIAL>{idbegin}{idchar}* => (IDENTIFIER yytext);
85:
86: <INITIAL>(({idbegin}{idchar}*) | ({sidbegin}{sidchar}*))
87: ("."(({idbegin}{idchar}*) | ({sidchar}+)))+
88: => (OTHER yytext);
89:
90: <INITIAL>"(" => (LPAREN);
91: <INITIAL>")" => (RPAREN);
92: <INITIAL>";" => (SEMICOLON);
93: <INITIAL>"," => (COMMA);
94: <INITIAL>"]" => (OTHER yytext);
95: <INITIAL>"[" => (OTHER yytext);
96: <INITIAL>"..." => (OTHER yytext);
97: <INITIAL>"#"{digit}+ => (OTHER yytext);
98:
99: <INITIAL>\"((\\([nt\\"]|([0-9]{3})|("^"[a-zA-Z])|([\ \t\n]+\\)))|[^\n\\"])*\"
100: => (let val dummy = (current_line_number :=
101: fold (fn (a,b) => b+(if a="\n" then 1 else 0))
102: (explode yytext) (!current_line_number))
103: in
104: OTHER yytext
105: end);
106:
107: <INITIAL>"{"|"}"|"_" => (OTHER yytext);
108:
109: <INITIAL>{whitespace}+ => (SPACE yytext);
110:
111: <INITIAL>{newline} => ( inc current_line_number; SPACE yytext);
112:
113: <INITIAL>"(*" => ( YYBEGIN C; inc commentlevel; SPACE yytext );
114:
115: <C>"(*" => ( inc commentlevel; SPACE yytext );
116: <C>"*)" => ( dec commentlevel; if !commentlevel = 0 then YYBEGIN INITIAL else (); SPACE yytext );
117: <C>{newline} => ( inc current_line_number; SPACE yytext );
118: <C>"*"{newline} => ( inc current_line_number; SPACE yytext );
119: <C>(("("[^*\n])|[^(*\n]|("*"[^)\n]))+ => ( SPACE yytext );
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.