|
|
1.1 root 1: /*
2: *
3: * Rune library routines for supporting UTF encoding.
4: *
5: */
6:
7: #include "rune.h"
8:
9: #ifdef RUNELIB
10: enum
11: {
12: Char1 = Runeself, Rune1 = Runeself,
13: Char21 = 0xA1, Rune21 = 0x0100,
14: Char22 = 0xF6, Rune22 = 0x4016,
15: Char3 = 0xFC, Rune3 = 0x10000, /* really 0x38E2E */
16: Esc = 0xBE, Bad = Runeerror,
17: };
18:
19: static unsigned char U[256];
20: static unsigned char T[256];
21:
22: static
23: void
24: mktable()
25: {
26: int i, u;
27:
28: for(i=0; i<256; i++) {
29: u = i + (0x5E-0xA0);
30: if(i < 0xA0)
31: u = i + (0xDF-0x7F);
32: if(i < 0x7F)
33: u = i + (0x00-0x21);
34: if(i < 0x21)
35: u = i + (0xBE-0x00);
36: U[i] = u;
37: T[u] = i;
38: }
39: }
40:
41: int
42: chartorune(rune, str)
43: Rune *rune;
44: char *str;
45: {
46: int c, c1, c2;
47: long l;
48:
49: if(U[0] == 0)
50: mktable();
51:
52: /*
53: * one character sequence
54: * 00000-0009F => 00-9F
55: */
56: c = *(unsigned char*)str;
57: if(c < Char1) {
58: *rune = c;
59: return 1;
60: }
61:
62: /*
63: * two character sequence
64: * 000A0-000FF => A0; A0-FF
65: */
66: c1 = *(unsigned char*)(str+1);
67: if(c < Char21) {
68: if(c1 >= Rune1 && c1 < Rune21) {
69: *rune = c1;
70: return 2;
71: }
72: goto bad;
73: }
74:
75: /*
76: * two character sequence
77: * 00100-04015 => A1-F5; 21-7E/A0-FF
78: */
79: c1 = U[c1];
80: if(c1 >= Esc)
81: goto bad;
82: if(c < Char22) {
83: *rune = (c-Char21)*Esc + c1 + Rune21;
84: return 2;
85: }
86:
87: /*
88: * three character sequence
89: * 04016-38E2D => A6-FB; 21-7E/A0-FF
90: */
91: c2 = U[*(unsigned char*)(str+2)];
92: if(c2 >= Esc)
93: goto bad;
94: if(c < Char3) {
95: l = (c-Char22)*Esc*Esc + c1*Esc + c2 + Rune22;
96: if(l >= Rune3)
97: goto bad;
98: *rune = l;
99: return 3;
100: }
101:
102: /*
103: * bad decoding
104: */
105: bad:
106: *rune = Bad;
107: return 1;
108: }
109:
110: int
111: runetochar(str, rune)
112: char *str;
113: Rune *rune;
114: {
115: long c;
116:
117: if(T[0] == 0)
118: mktable();
119:
120: /*
121: * one character sequence
122: * 00000-0009F => 00-9F
123: */
124: c = *rune;
125: if(c < Rune1) {
126: str[0] = c;
127: return 1;
128: }
129:
130: /*
131: * two character sequence
132: * 000A0-000FF => A0; A0-FF
133: */
134: if(c < Rune21) {
135: str[0] = Char1;
136: str[1] = c;
137: return 2;
138: }
139:
140: /*
141: * two character sequence
142: * 00100-04015 => A1-F5; 21-7E/A0-FF
143: */
144: if(c < Rune22) {
145: c -= Rune21;
146: str[0] = c/Esc + Char21;
147: str[1] = T[c%Esc];
148: return 2;
149: }
150:
151: /*
152: * three character sequence
153: * 04016-38E2D => A6-FB; 21-7E/A0-FF
154: */
155: c -= Rune22;
156: str[0] = c/(Esc*Esc) + Char22;
157: str[1] = T[c/Esc%Esc];
158: str[2] = T[c%Esc];
159: return 3;
160: }
161:
162: int
163: runelen(c)
164: long c;
165: {
166: Rune rune;
167: char str[10];
168:
169: rune = c;
170: return runetochar(str, &rune);
171: }
172:
173: int
174: fullrune(str, n)
175: char *str;
176: int n;
177: {
178: int c;
179:
180: if(n > 0) {
181: c = *(unsigned char*)str;
182: if(c < Char1)
183: return 1;
184: if(n > 1)
185: if(c < Char22 || n > 2)
186: return 1;
187: }
188: return 0;
189: }
190:
191: #endif RUNELIB
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.