|
|
1.1 root 1: /*
2: * Metaphone algorithm for translating a word to
3: * a short phonetic equivalent for lookup.
4: * char * metaphone(char * word)
5: *
6: * Original algorithm by Larry Phillips
7: * Algorithim is in public domain.
8: */
9: #include <ctype.h>
10:
11: static char vsvfn[26] = {
12: /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */
13: 1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0
14: };
15:
16: #define FN(x) ((c = (x)) ? vsvfn[c - 'A'] : 0)
17: #define VOWEL(x) (FN(x) & 1) /* AEIOU */
18: #define SAME(x) (FN(x) & 2) /* FJLMNR */
19: #define VARSON(x) (FN(x) & 4) /* CGPST */
20: #define FRONTV(x) (FN(x) & 8) /* EIY */
21: #define NOGHF(x) (FN(x) & 16) /* BDH */
22:
23: #define SAY(x) { *Metaph++ = (x); continue; }
24:
25: #define MAXMET 4 /* size of thing created */
26: #define TRANS 32
27:
28: char *
29: metaphone(word)
30: unsigned char *word;
31: {
32: unsigned char *n, *n_start, *n_end, c;
33: unsigned char *Metaph, *metaph_end;
34: int KSflag; /* state flag */
35: unsigned char ntrans[TRANS + 2];
36: static unsigned char metaph[MAXMET + 1];
37:
38: /* clear work areas */
39: memset(ntrans, '\0', TRANS + 2);
40: memset(metaph, '\0', MAXMET + 1);
41:
42: /* Isolate word and make upper case */
43: for (n = ntrans + 1, n_end = ntrans + TRANS;
44: (c = *word++) && n < n_end;)
45: if (isalpha(c))
46: *n++ = toupper(c);
47: else
48: break;
49:
50: n_end = n;
51: n = ntrans + 1;
52:
53: /* process first character */
54: switch (*n) {
55: case 0:
56: return (n); /* no word found */
57:
58: case 'P':
59: case 'G':
60: case 'K':
61: if ('N' == n[1])
62: *n++ = '\0';
63: break;
64:
65: case 'A':
66: if ('E' == n[1])
67: *n++ = '\0';
68: break;
69:
70: case 'W':
71: switch (n[1]) {
72: case 'H':
73: n[1] = *n;
74: *n++ = 'E';
75: break;
76: case 'R':
77: *n++ = 0;
78: }
79: break;
80:
81: case 'X':
82: *n = 'S';
83: }
84:
85: /* Process rest of word SAY does continue */
86: KSflag = 0;
87: metaph_end = (Metaph = metaph) + MAXMET;
88: n_start = n;
89: for (; Metaph < metaph_end; n++) {
90: if (KSflag) {
91: KSflag = 0;
92: SAY(*n)
93: }
94: if (n >= n_end)
95: break;
96:
97: /* Drop double letters except CC */
98: if (n[-1] == *n && *n != 'C')
99: continue;
100:
101: /* check for FJLMNR or first letter vowel */
102: if (SAME(*n) || ((n == n_start) && VOWEL(*n)))
103: SAY(*n)
104:
105: switch (*n) {
106: case 'B':
107: if (n < n_end || n[-1] != 'M')
108: SAY(*n)
109: break;
110:
111: case 'C':
112: if (n[-1] != 'S' || !FRONTV(n[1])) {
113: if (n[1] == 'I' && n[2] == 'A')
114: SAY('X')
115: if (FRONTV(n[1]))
116: SAY('S')
117: if (n[1] == 'H')
118: if ((n == n_start && !VOWEL(n[2])) ||
119: n[-1] == 'S')
120: SAY('K')
121: else
122: SAY('X')
123: else
124: SAY('K')
125: }
126: break;
127:
128: case 'D':
129: SAY((n[1] == 'G' && FRONTV(n[2])) ? 'J' : 'T')
130:
131: case 'G':
132: if ((n[1] != 'H' || VOWEL(n[2])) &&
133: (n[1] != 'N' || ((n + 1) < n_end &&
134: (n[2] != 'E' || n[3] != 'D'))) &&
135: (n[-1] != 'D' || !FRONTV(n[1])))
136: SAY((FRONTV(n[1]) && n[2] != 'G') ? 'J' : 'K')
137: break;
138:
139: case 'H':
140: if (!VARSON(n[-1]) && (!VOWEL(n[-1]) || VOWEL(n[1])))
141: SAY('H')
142: break;
143:
144: case 'K':
145: if (n[-1] != 'C')
146: SAY('K')
147: break;
148:
149: case 'P':
150: SAY(n[1] == 'H' ? 'F' : 'P')
151:
152: case 'Q':
153: SAY('K')
154:
155: case 'S':
156: if (n[1] == 'H' || (n[1] == 'I' &&
157: (n[2] == 'O' || n[2] == 'A')))
158: SAY('X')
159: else
160: SAY('S')
161:
162: case 'T':
163: if (n[1] == 'I' &&
164: (n[2] == 'O' || n[2] == 'A'))
165: SAY('X')
166: if (n[1] == 'H')
167: SAY('O')
168: if (n[1] != 'C' || n[2] == 'H')
169: SAY('T')
170: break;
171:
172: case 'V':
173: SAY('F')
174:
175: case 'W':
176: if(VOWEL(n[-1]))
177: SAY(*n)
178: break;
179:
180: case 'Y':
181: if(!VOWEL(n[1]))
182: SAY(*n)
183: break;
184:
185: case 'X':
186: if (n == n_start)
187: SAY('S')
188: KSflag = 1;
189: SAY('K')
190:
191: case 'Z':
192: SAY('S')
193: }
194: }
195: return (metaph);
196: }
197:
198: #ifdef TEST
199: #include <misc.h>
200: main()
201: {
202: char buf[80];
203:
204: while (NULL != ask(buf, "string"))
205: printf("%s\n", metaphone(buf));
206: }
207: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.