|
|
1.1 ! root 1: /* ! 2: * Metaphone algorithm for translating a word to ! 3: * a short phonetic equivalent for lookup. ! 4: * char * metaphone(char * word) ! 5: * ! 6: * Original algorithm by Larry Phillips ! 7: * Algorithim is in public domain. ! 8: */ ! 9: #include <ctype.h> ! 10: ! 11: static char vsvfn[26] = { ! 12: /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ ! 13: 1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0 ! 14: }; ! 15: ! 16: #define FN(x) ((c = (x)) ? vsvfn[c - 'A'] : 0) ! 17: #define VOWEL(x) (FN(x) & 1) /* AEIOU */ ! 18: #define SAME(x) (FN(x) & 2) /* FJLMNR */ ! 19: #define VARSON(x) (FN(x) & 4) /* CGPST */ ! 20: #define FRONTV(x) (FN(x) & 8) /* EIY */ ! 21: #define NOGHF(x) (FN(x) & 16) /* BDH */ ! 22: ! 23: #define SAY(x) { *Metaph++ = (x); continue; } ! 24: ! 25: #define MAXMET 4 /* size of thing created */ ! 26: #define TRANS 32 ! 27: ! 28: char * ! 29: metaphone(word) ! 30: unsigned char *word; ! 31: { ! 32: unsigned char *n, *n_start, *n_end, c; ! 33: unsigned char *Metaph, *metaph_end; ! 34: int KSflag; /* state flag */ ! 35: unsigned char ntrans[TRANS + 2]; ! 36: static unsigned char metaph[MAXMET + 1]; ! 37: ! 38: /* clear work areas */ ! 39: memset(ntrans, '\0', TRANS + 2); ! 40: memset(metaph, '\0', MAXMET + 1); ! 41: ! 42: /* Isolate word and make upper case */ ! 43: for (n = ntrans + 1, n_end = ntrans + TRANS; ! 44: (c = *word++) && n < n_end;) ! 45: if (isalpha(c)) ! 46: *n++ = toupper(c); ! 47: else ! 48: break; ! 49: ! 50: n_end = n; ! 51: n = ntrans + 1; ! 52: ! 53: /* process first character */ ! 54: switch (*n) { ! 55: case 0: ! 56: return (n); /* no word found */ ! 57: ! 58: case 'P': ! 59: case 'G': ! 60: case 'K': ! 61: if ('N' == n[1]) ! 62: *n++ = '\0'; ! 63: break; ! 64: ! 65: case 'A': ! 66: if ('E' == n[1]) ! 67: *n++ = '\0'; ! 68: break; ! 69: ! 70: case 'W': ! 71: switch (n[1]) { ! 72: case 'H': ! 73: n[1] = *n; ! 74: *n++ = 'E'; ! 75: break; ! 76: case 'R': ! 77: *n++ = 0; ! 78: } ! 79: break; ! 80: ! 81: case 'X': ! 82: *n = 'S'; ! 83: } ! 84: ! 85: /* Process rest of word SAY does continue */ ! 86: KSflag = 0; ! 87: metaph_end = (Metaph = metaph) + MAXMET; ! 88: n_start = n; ! 89: for (; Metaph < metaph_end; n++) { ! 90: if (KSflag) { ! 91: KSflag = 0; ! 92: SAY(*n) ! 93: } ! 94: if (n >= n_end) ! 95: break; ! 96: ! 97: /* Drop double letters except CC */ ! 98: if (n[-1] == *n && *n != 'C') ! 99: continue; ! 100: ! 101: /* check for FJLMNR or first letter vowel */ ! 102: if (SAME(*n) || ((n == n_start) && VOWEL(*n))) ! 103: SAY(*n) ! 104: ! 105: switch (*n) { ! 106: case 'B': ! 107: if (n < n_end || n[-1] != 'M') ! 108: SAY(*n) ! 109: break; ! 110: ! 111: case 'C': ! 112: if (n[-1] != 'S' || !FRONTV(n[1])) { ! 113: if (n[1] == 'I' && n[2] == 'A') ! 114: SAY('X') ! 115: if (FRONTV(n[1])) ! 116: SAY('S') ! 117: if (n[1] == 'H') ! 118: if ((n == n_start && !VOWEL(n[2])) || ! 119: n[-1] == 'S') ! 120: SAY('K') ! 121: else ! 122: SAY('X') ! 123: else ! 124: SAY('K') ! 125: } ! 126: break; ! 127: ! 128: case 'D': ! 129: SAY((n[1] == 'G' && FRONTV(n[2])) ? 'J' : 'T') ! 130: ! 131: case 'G': ! 132: if ((n[1] != 'H' || VOWEL(n[2])) && ! 133: (n[1] != 'N' || ((n + 1) < n_end && ! 134: (n[2] != 'E' || n[3] != 'D'))) && ! 135: (n[-1] != 'D' || !FRONTV(n[1]))) ! 136: SAY((FRONTV(n[1]) && n[2] != 'G') ? 'J' : 'K') ! 137: break; ! 138: ! 139: case 'H': ! 140: if (!VARSON(n[-1]) && (!VOWEL(n[-1]) || VOWEL(n[1]))) ! 141: SAY('H') ! 142: break; ! 143: ! 144: case 'K': ! 145: if (n[-1] != 'C') ! 146: SAY('K') ! 147: break; ! 148: ! 149: case 'P': ! 150: SAY(n[1] == 'H' ? 'F' : 'P') ! 151: ! 152: case 'Q': ! 153: SAY('K') ! 154: ! 155: case 'S': ! 156: if (n[1] == 'H' || (n[1] == 'I' && ! 157: (n[2] == 'O' || n[2] == 'A'))) ! 158: SAY('X') ! 159: else ! 160: SAY('S') ! 161: ! 162: case 'T': ! 163: if (n[1] == 'I' && ! 164: (n[2] == 'O' || n[2] == 'A')) ! 165: SAY('X') ! 166: if (n[1] == 'H') ! 167: SAY('O') ! 168: if (n[1] != 'C' || n[2] == 'H') ! 169: SAY('T') ! 170: break; ! 171: ! 172: case 'V': ! 173: SAY('F') ! 174: ! 175: case 'W': ! 176: if(VOWEL(n[-1])) ! 177: SAY(*n) ! 178: break; ! 179: ! 180: case 'Y': ! 181: if(!VOWEL(n[1])) ! 182: SAY(*n) ! 183: break; ! 184: ! 185: case 'X': ! 186: if (n == n_start) ! 187: SAY('S') ! 188: KSflag = 1; ! 189: SAY('K') ! 190: ! 191: case 'Z': ! 192: SAY('S') ! 193: } ! 194: } ! 195: return (metaph); ! 196: } ! 197: ! 198: #ifdef TEST ! 199: #include <misc.h> ! 200: main() ! 201: { ! 202: char buf[80]; ! 203: ! 204: while (NULL != ask(buf, "string")) ! 205: printf("%s\n", metaphone(buf)); ! 206: } ! 207: #endif
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.