|
|
1.1 ! root 1: /* ! 2: * ! 3: * Rune library routines for supporting UTF encoding. ! 4: * ! 5: */ ! 6: ! 7: #include "rune.h" ! 8: ! 9: #ifdef RUNELIB ! 10: enum ! 11: { ! 12: Char1 = Runeself, Rune1 = Runeself, ! 13: Char21 = 0xA1, Rune21 = 0x0100, ! 14: Char22 = 0xF6, Rune22 = 0x4016, ! 15: Char3 = 0xFC, Rune3 = 0x10000, /* really 0x38E2E */ ! 16: Esc = 0xBE, Bad = Runeerror, ! 17: }; ! 18: ! 19: static unsigned char U[256]; ! 20: static unsigned char T[256]; ! 21: ! 22: static ! 23: void ! 24: mktable() ! 25: { ! 26: int i, u; ! 27: ! 28: for(i=0; i<256; i++) { ! 29: u = i + (0x5E-0xA0); ! 30: if(i < 0xA0) ! 31: u = i + (0xDF-0x7F); ! 32: if(i < 0x7F) ! 33: u = i + (0x00-0x21); ! 34: if(i < 0x21) ! 35: u = i + (0xBE-0x00); ! 36: U[i] = u; ! 37: T[u] = i; ! 38: } ! 39: } ! 40: ! 41: int ! 42: chartorune(rune, str) ! 43: Rune *rune; ! 44: char *str; ! 45: { ! 46: int c, c1, c2; ! 47: long l; ! 48: ! 49: if(U[0] == 0) ! 50: mktable(); ! 51: ! 52: /* ! 53: * one character sequence ! 54: * 00000-0009F => 00-9F ! 55: */ ! 56: c = *(unsigned char*)str; ! 57: if(c < Char1) { ! 58: *rune = c; ! 59: return 1; ! 60: } ! 61: ! 62: /* ! 63: * two character sequence ! 64: * 000A0-000FF => A0; A0-FF ! 65: */ ! 66: c1 = *(unsigned char*)(str+1); ! 67: if(c < Char21) { ! 68: if(c1 >= Rune1 && c1 < Rune21) { ! 69: *rune = c1; ! 70: return 2; ! 71: } ! 72: goto bad; ! 73: } ! 74: ! 75: /* ! 76: * two character sequence ! 77: * 00100-04015 => A1-F5; 21-7E/A0-FF ! 78: */ ! 79: c1 = U[c1]; ! 80: if(c1 >= Esc) ! 81: goto bad; ! 82: if(c < Char22) { ! 83: *rune = (c-Char21)*Esc + c1 + Rune21; ! 84: return 2; ! 85: } ! 86: ! 87: /* ! 88: * three character sequence ! 89: * 04016-38E2D => A6-FB; 21-7E/A0-FF ! 90: */ ! 91: c2 = U[*(unsigned char*)(str+2)]; ! 92: if(c2 >= Esc) ! 93: goto bad; ! 94: if(c < Char3) { ! 95: l = (c-Char22)*Esc*Esc + c1*Esc + c2 + Rune22; ! 96: if(l >= Rune3) ! 97: goto bad; ! 98: *rune = l; ! 99: return 3; ! 100: } ! 101: ! 102: /* ! 103: * bad decoding ! 104: */ ! 105: bad: ! 106: *rune = Bad; ! 107: return 1; ! 108: } ! 109: ! 110: int ! 111: runetochar(str, rune) ! 112: char *str; ! 113: Rune *rune; ! 114: { ! 115: long c; ! 116: ! 117: if(T[0] == 0) ! 118: mktable(); ! 119: ! 120: /* ! 121: * one character sequence ! 122: * 00000-0009F => 00-9F ! 123: */ ! 124: c = *rune; ! 125: if(c < Rune1) { ! 126: str[0] = c; ! 127: return 1; ! 128: } ! 129: ! 130: /* ! 131: * two character sequence ! 132: * 000A0-000FF => A0; A0-FF ! 133: */ ! 134: if(c < Rune21) { ! 135: str[0] = Char1; ! 136: str[1] = c; ! 137: return 2; ! 138: } ! 139: ! 140: /* ! 141: * two character sequence ! 142: * 00100-04015 => A1-F5; 21-7E/A0-FF ! 143: */ ! 144: if(c < Rune22) { ! 145: c -= Rune21; ! 146: str[0] = c/Esc + Char21; ! 147: str[1] = T[c%Esc]; ! 148: return 2; ! 149: } ! 150: ! 151: /* ! 152: * three character sequence ! 153: * 04016-38E2D => A6-FB; 21-7E/A0-FF ! 154: */ ! 155: c -= Rune22; ! 156: str[0] = c/(Esc*Esc) + Char22; ! 157: str[1] = T[c/Esc%Esc]; ! 158: str[2] = T[c%Esc]; ! 159: return 3; ! 160: } ! 161: ! 162: int ! 163: runelen(c) ! 164: long c; ! 165: { ! 166: Rune rune; ! 167: char str[10]; ! 168: ! 169: rune = c; ! 170: return runetochar(str, &rune); ! 171: } ! 172: ! 173: int ! 174: fullrune(str, n) ! 175: char *str; ! 176: int n; ! 177: { ! 178: int c; ! 179: ! 180: if(n > 0) { ! 181: c = *(unsigned char*)str; ! 182: if(c < Char1) ! 183: return 1; ! 184: if(n > 1) ! 185: if(c < Char22 || n > 2) ! 186: return 1; ! 187: } ! 188: return 0; ! 189: } ! 190: ! 191: #endif RUNELIB
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.