|
|
1.1 root 1: #include <ctype.h>
2: #include "tdef.h"
3: #define HY_BIT 0200 /* stuff in here only works for ascii */
4:
5: /*
6: troff8.c
7:
8: hyphenation
9: */
10:
11: #include <sgtty.h>
12: #include "ext.h"
13: char hbuf[NHEX];
14: char *nexth = hbuf;
15: tchar *hyend;
16: #define THRESH 160 /*digram goodness threshold*/
17: int thresh = THRESH;
18:
19: hyphen(wp)
20: tchar *wp;
21: {
22: register j;
23: register tchar *i;
24:
25: i = wp;
26: while (punct(cbits(*i++)))
27: ;
28: if (!alph(cbits(*--i)))
29: return;
30: wdstart = i++;
31: while (alph(cbits(*i++)))
32: ;
33: hyend = wdend = --i - 1;
34: while (punct(cbits(*i++)))
35: ;
36: if (*--i)
37: return;
38: if ((wdend - wdstart - 4) < 0)
39: return;
40: hyp = hyptr;
41: *hyp = 0;
42: hyoff = 2;
43: if (!exword() && !suffix())
44: digram();
45: *hyp++ = 0;
46: if (*hyptr)
47: for (j = 1; j; ) {
48: j = 0;
49: for (hyp = hyptr + 1; *hyp != 0; hyp++) {
50: if (*(hyp - 1) > *hyp) {
51: j++;
52: i = *hyp;
53: *hyp = *(hyp - 1);
54: *(hyp - 1) = i;
55: }
56: }
57: }
58: }
59:
60:
61: punct(i)
62: {
63: if (!i || alph(i))
64: return(0);
65: else
66: return(1);
67: }
68:
69:
70: alph(i)
71: {
72: if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z')
73: return(1);
74: else
75: return(0);
76: }
77:
78:
79: caseht()
80: {
81: thresh = THRESH;
82: if (skip())
83: return;
84: noscale++;
85: thresh = atoi();
86: noscale = 0;
87: }
88:
89:
90: casehw()
91: {
92: register i, k;
93: register char *j;
94: tchar t;
95:
96: k = 0;
97: while (!skip()) {
98: if ((j = nexth) >= (hbuf + NHEX - 2))
99: goto full;
100: for (; ; ) {
101: if (ismot(t = getch()))
102: continue;
103: i = cbits(t);
104: if (i == ' ' || i == '\n') {
105: *j++ = 0;
106: nexth = j;
107: *j = 0;
108: if (i == ' ')
109: break;
110: else
111: return;
112: }
113: if (i == '-') {
114: k = HY_BIT;
115: continue;
116: }
117: *j++ = maplow(i) | k;
118: k = 0;
119: if (j >= (hbuf + NHEX - 2))
120: goto full;
121: }
122: }
123: return;
124: full:
125: fprintf(stderr, "troff: exception word list full.\n");
126: *nexth = 0;
127: }
128:
129:
130: exword()
131: {
132: register tchar *w;
133: register char *e;
134: char *save;
135:
136: e = hbuf;
137: while (1) {
138: save = e;
139: if (*e == 0)
140: return(0);
141: w = wdstart;
142: while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) {
143: e++;
144: w++;
145: };
146: if (!*e) {
147: if (w-1 == hyend || (w == wdend && maplow(cbits(*w)) == 's')) {
148: w = wdstart;
149: for (e = save; *e; e++) {
150: if (*e & HY_BIT)
151: *hyp++ = w;
152: if (hyp > (hyptr + NHYP - 1))
153: hyp = hyptr + NHYP - 1;
154: w++;
155: }
156: return(1);
157: } else {
158: e++;
159: continue;
160: }
161: } else
162: while (*e++)
163: ;
164: }
165: }
166:
167:
168: suffix()
169: {
170: register tchar *w;
171: register char *s, *s0;
172: tchar i;
173: extern char *suftab[];
174: extern tchar *chkvow();
175:
176: again:
177: if (!alph(cbits(i = cbits(*hyend))))
178: return(0);
179: if (i < 'a')
180: i -= 'A' - 'a';
181: if ((s0 = suftab[i-'a']) == 0)
182: return(0);
183: for (; ; ) {
184: if ((i = *s0 & 017) == 0)
185: return(0);
186: s = s0 + i - 1;
187: w = hyend - 1;
188: while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) {
189: s--;
190: w--;
191: }
192: if (s == s0)
193: break;
194: s0 += i;
195: }
196: s = s0 + i - 1;
197: w = hyend;
198: if (*s0 & HY_BIT)
199: goto mark;
200: while (s > s0) {
201: w--;
202: if (*s-- & HY_BIT) {
203: mark:
204: hyend = w - 1;
205: if (*s0 & 0100)
206: continue;
207: if (!chkvow(w))
208: return(0);
209: *hyp++ = w;
210: }
211: }
212: if (*s0 & 040)
213: return(0);
214: if (exword())
215: return(1);
216: goto again;
217: }
218:
219:
220: maplow(i)
221: int i;
222: {
223: if (isupper(i))
224: i = tolower(i);
225: return(i);
226: }
227:
228:
229: vowel(i)
230: int i;
231: {
232: switch (maplow(i)) {
233: case 'a':
234: case 'e':
235: case 'i':
236: case 'o':
237: case 'u':
238: case 'y':
239: return(1);
240: default:
241: return(0);
242: }
243: }
244:
245:
246: tchar *chkvow(w)
247: tchar *w;
248: {
249: while (--w >= wdstart)
250: if (vowel(cbits(*w)))
251: return(w);
252: return(0);
253: }
254:
255:
256: digram()
257: {
258: register tchar *w;
259: register val;
260: tchar * nhyend, *maxw;
261: int maxval;
262: extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];
263:
264: again:
265: if (!(w = chkvow(hyend + 1)))
266: return;
267: hyend = w;
268: if (!(w = chkvow(hyend)))
269: return;
270: nhyend = w;
271: maxval = 0;
272: w--;
273: while ((++w < hyend) && (w < (wdend - 1))) {
274: val = 1;
275: if (w == wdstart)
276: val *= dilook('a', cbits(*w), bxh);
277: else if (w == wdstart + 1)
278: val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);
279: else
280: val *= dilook(cbits(*(w-1)), cbits(*w), xxh);
281: val *= dilook(cbits(*w), cbits(*(w+1)), xhx);
282: val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);
283: if (val > maxval) {
284: maxval = val;
285: maxw = w + 1;
286: }
287: }
288: hyend = nhyend;
289: if (maxval > thresh)
290: *hyp++ = maxw;
291: goto again;
292: }
293:
294:
295: dilook(a, b, t)
296: int a, b;
297: char t[26][13];
298: {
299: register i, j;
300:
301: i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
302: if (!(j & 01))
303: i >>= 4;
304: return(i & 017);
305: }
306:
307:
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.