|
|
1.1 root 1: (* lex.sml *)
2:
3: (* two versions of rudimentary lexical analyzers that break a string (representing
4: and input line,say) into a list of "words" defined as sequences of nonwhile-space
5: characters *)
6:
7: signature LEX =
8: sig
9: val words: string -> string list
10: end
11:
12: structure Lex1 : LEX =
13: struct
14:
15: fun separator " " = true
16: | separator "\t" = true
17: | separator "\n" = true
18: | separator _ = false
19:
20: fun words s =
21: let fun getword(w,[]) = [implode(rev w)]
22: | getword(w,c::rest) =
23: if separator(c)
24: then implode(rev w) :: skip rest
25: else getword(c::w,rest)
26: and skip [] = []
27: | skip(c::rest) =
28: if separator c
29: then skip rest
30: else getword([c],rest)
31: in skip(explode s)
32: end
33:
34: end (* Lex1 *)
35:
36: structure Lex2 : LEX =
37: struct
38:
39: fun separator " " = true
40: | separator "\t" = true
41: | separator "\n" = true
42: | separator _ = false
43:
44: fun words(s: string) =
45: let val len = length s
46: fun skip n =
47: let fun getword m =
48: if m>=len orelse separator(substring(s,m,1))
49: then substring(s,n,(m-n))::skip(m+1)
50: else getword(m+1)
51: in if n>=len
52: then []
53: else if separator(substring(s,n,1))
54: then skip(n+1)
55: else getword(n+1)
56: end
57: in skip 0
58: end
59:
60: end (* Lex2 *)
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.