|
|
1.1 root 1: #include <ctype.h>
2: #include "tdef.h"
3: #include "ext.h"
4: #define HY_BIT 0200 /* stuff in here only works for ascii */
5:
6: /*
7: * troff8.c
8: *
9: * hyphenation
10: */
11:
12: char hbuf[NHEX];
13: char *nexth = hbuf;
14: tchar *hyend;
15: #define THRESH 160 /*digram goodness threshold*/
16: int thresh = THRESH;
17:
18: hyphen(wp)
19: tchar *wp;
20: {
21: register j;
22: register tchar *i;
23:
24: i = wp;
25: while (punct(cbits(*i++)))
26: ;
27: if (!alph(cbits(*--i)))
28: return;
29: wdstart = i++;
30: while (alph(cbits(*i++)))
31: ;
32: hyend = wdend = --i - 1;
33: while (punct(cbits(*i++)))
34: ;
35: if (*--i)
36: return;
37: if (wdend - wdstart - 4 < 0)
38: return;
39: hyp = hyptr;
40: *hyp = 0;
41: hyoff = 2;
42: if (!exword() && !suffix())
43: digram();
44: *hyp++ = 0;
45: if (*hyptr)
46: for (j = 1; j; ) {
47: j = 0;
48: for (hyp = hyptr + 1; *hyp != 0; hyp++) {
49: if (*(hyp - 1) > *hyp) {
50: j++;
51: i = *hyp;
52: *hyp = *(hyp - 1);
53: *(hyp - 1) = i;
54: }
55: }
56: }
57: }
58:
59:
60: punct(i)
61: {
62: if (!i || alph(i))
63: return(0);
64: else
65: return(1);
66: }
67:
68:
69: alph(i)
70: {
71: if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z')
72: return(1);
73: else
74: return(0);
75: }
76:
77:
78: caseht()
79: {
80: thresh = THRESH;
81: if (skip())
82: return;
83: noscale++;
84: thresh = atoi();
85: noscale = 0;
86: }
87:
88:
89: casehw()
90: {
91: register i, k;
92: register char *j;
93: tchar t;
94:
95: k = 0;
96: while (!skip()) {
97: if ((j = nexth) >= hbuf + NHEX - 2)
98: goto full;
99: for (; ; ) {
100: if (ismot(t = getch()))
101: continue;
102: i = cbits(t);
103: if (i == ' ' || i == '\n') {
104: *j++ = 0;
105: nexth = j;
106: *j = 0;
107: if (i == ' ')
108: break;
109: else
110: return;
111: }
112: if (i == '-') {
113: k = HY_BIT;
114: continue;
115: }
116: *j++ = maplow(i) | k;
117: k = 0;
118: if (j >= (hbuf + NHEX - 2))
119: goto full;
120: }
121: }
122: return;
123: full:
124: errprint("exception word list full.");
125: *nexth = 0;
126: }
127:
128:
129: exword()
130: {
131: register tchar *w;
132: register char *e;
133: char *save;
134:
135: e = hbuf;
136: while (1) {
137: save = e;
138: if (*e == 0)
139: return(0);
140: w = wdstart;
141: while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) {
142: e++;
143: w++;
144: };
145: if (!*e) {
146: if (w-1 == hyend || (w == wdend && maplow(cbits(*w)) == 's')) {
147: w = wdstart;
148: for (e = save; *e; e++) {
149: if (*e & HY_BIT)
150: *hyp++ = w;
151: if (hyp > (hyptr + NHYP - 1))
152: hyp = hyptr + NHYP - 1;
153: w++;
154: }
155: return(1);
156: } else {
157: e++;
158: continue;
159: }
160: } else
161: while (*e++)
162: ;
163: }
164: }
165:
166:
167: suffix()
168: {
169: register tchar *w;
170: register char *s, *s0;
171: tchar i;
172: extern char *suftab[];
173: extern tchar *chkvow();
174:
175: again:
176: if (!alph(i = cbits(*hyend)))
177: return(0);
178: if (i < 'a')
179: i -= 'A' - 'a';
180: if ((s0 = suftab[i-'a']) == 0)
181: return(0);
182: for (;;) {
183: if ((i = *s0 & 017) == 0)
184: return(0);
185: s = s0 + i - 1;
186: w = hyend - 1;
187: while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) {
188: s--;
189: w--;
190: }
191: if (s == s0)
192: break;
193: s0 += i;
194: }
195: s = s0 + i - 1;
196: w = hyend;
197: if (*s0 & HY_BIT)
198: goto mark;
199: while (s > s0) {
200: w--;
201: if (*s-- & HY_BIT) {
202: mark:
203: hyend = w - 1;
204: if (*s0 & 0100)
205: continue;
206: if (!chkvow(w))
207: return(0);
208: *hyp++ = w;
209: }
210: }
211: if (*s0 & 040)
212: return(0);
213: if (exword())
214: return(1);
215: goto again;
216: }
217:
218:
219: maplow(i)
220: register int i;
221: {
222: if (isupper(i))
223: i = tolower(i);
224: return(i);
225: }
226:
227:
228: vowel(i)
229: int i;
230: {
231: switch (i) {
232: case 'a': case 'A':
233: case 'e': case 'E':
234: case 'i': case 'I':
235: case 'o': case 'O':
236: case 'u': case 'U':
237: case 'y': case 'Y':
238: return(1);
239: default:
240: return(0);
241: }
242: }
243:
244:
245: tchar *chkvow(w)
246: tchar *w;
247: {
248: while (--w >= wdstart)
249: if (vowel(cbits(*w)))
250: return(w);
251: return(0);
252: }
253:
254:
255: digram()
256: {
257: register tchar *w;
258: register val;
259: tchar * nhyend, *maxw;
260: int maxval;
261: extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];
262:
263: again:
264: if (!(w = chkvow(hyend + 1)))
265: return;
266: hyend = w;
267: if (!(w = chkvow(hyend)))
268: return;
269: nhyend = w;
270: maxval = 0;
271: w--;
272: while (++w < hyend && w < wdend - 1) {
273: val = 1;
274: if (w == wdstart)
275: val *= dilook('a', cbits(*w), bxh);
276: else if (w == wdstart + 1)
277: val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);
278: else
279: val *= dilook(cbits(*(w-1)), cbits(*w), xxh);
280: val *= dilook(cbits(*w), cbits(*(w+1)), xhx);
281: val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);
282: if (val > maxval) {
283: maxval = val;
284: maxw = w + 1;
285: }
286: }
287: hyend = nhyend;
288: if (maxval > thresh)
289: *hyp++ = maxw;
290: goto again;
291: }
292:
293:
294: dilook(a, b, t)
295: int a, b;
296: char t[26][13];
297: {
298: register i, j;
299:
300: i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
301: if (!(j & 01))
302: i >>= 4;
303: return(i & 017);
304: }
305:
306:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.