|
|
1.1 root 1: #include "tdef.hd"
2: #include "strs.hd"
3: #ifndef INCORE
4: #include "uns.hd"
5: #endif
6:
7: /*
8: troff8.c
9:
10: hyphenation
11: */
12:
13: char hbuf[NHEX];
14: char *nexth = hbuf;
15: int *hyend;
16:
17: #ifndef INCORE
18: extern struct envblock eblock;
19: #else
20: extern struct envblock eblock[NEV];
21: extern int ev;
22: extern char *sufind[];
23: #endif
24:
25: extern long atoi0();
26: extern int **hyp;
27: extern int suffid;
28: unsigned sufoff = -1;
29: extern int noscale;
30: #define THRESH 160 /*digram goodness threshold*/
31: int thresh = THRESH;
32: hyphen(wp)
33: int *wp;
34: {
35: register *i, found;
36:
37: hyp = hyptr;
38: *hyp = 0;
39: i = wp;
40: while(punct(*i++));
41: if(!alph(*--i))return;
42: wdstart = i++;
43: while(alph(*i++));
44: hyend = wdend = --i-1;
45: while(punct(*i++));
46: if(*--i)return;
47: if((wdend-wdstart-4) < 0)return;
48: hyoff = 2;
49: if(!exword() && !suffix()) digram();
50: *hyp++ = 0;
51: if(*hyptr)
52: do {
53: found = 0;
54: for(hyp = hyptr+1; *hyp != 0; hyp++)
55: if(*(hyp-1) > *hyp){
56: found++;
57: i = *hyp;
58: *hyp = *(hyp-1);
59: *(hyp-1) = i; }}
60: while (found);
61: }
62: punct(i)
63: int i;
64: {
65: if (!i || alph(i)) return(0);
66: else return(1);
67: }
68: alph(i)
69: int i;
70: {
71: register j;
72:
73: j = i & CMASK;
74: if(((j >= 'A') && (j <= 'Z')) || ((j >= 'a') && (j <= 'z')))
75: return(1);
76: else return(0);
77: }
78: caseht(){
79:
80: thresh = THRESH;
81: if(skip())return;
82: noscale++;
83: thresh = atoi();
84: noscale = 0;
85: }
86: casehw(){
87: register i, k;
88: register char *j;
89:
90: k = 0;
91: while(!skip()){
92: if((j = nexth) >= (hbuf + NHEX - 2)){
93: full:
94: prstr("Exception word list full.\n");
95: *nexth = 0;
96: return;
97: }
98: while(1){
99: if((i = getch()) & MOT)continue;
100: if(((i &= CMASK) == ' ') || (i == '\n')){
101: *j++ = 0;
102: nexth = j;
103: *j = 0;
104: if(i == ' ')break;
105: else return;
106: }
107: if(i == '-'){
108: k = 0200;
109: continue;
110: }
111: *j++ = maplow(i) | k;
112: k = 0;
113: if(j >= (hbuf + NHEX - 2))goto full;
114: }
115: }
116: }
117: exword(){
118: register int *w;
119: register char *e;
120: char *save;
121:
122: e = hbuf;
123: while(1){
124: save = e;
125: if(*e == 0)return(0);
126: w = wdstart;
127: while((*e && (w <= hyend)) &&
128: ((*e & 0177) == maplow(*w & CMASK))){e++; w++;};
129: if(!*e){
130: if(((w-1) == hyend) ||
131: ((w == wdend) && (maplow(*w & CMASK) == 's'))){
132: w = wdstart;
133: for(e = save; *e; e++){
134: if(*e & 0200)*hyp++ = w;
135: if(hyp > (hyptr+NHYP-1))
136: hyp = hyptr+NHYP-1;
137: w++;
138: }
139: return(1);
140: }else{e++; continue;}
141: }else while(*e++);
142: }
143: }
144: suffix(){
145: register int *w;
146: register char *s, *s0;
147: int i;
148: #ifndef INCORE
149: unsigned off;
150: #else
151: char *off;
152: #endif
153: extern int *chkvow();
154: #ifdef INCORE
155: #define getsuf(adr) adr
156: #else
157: extern char *getsuf();
158: #endif
159:
160: again:
161: if(!alph(i = *hyend & CMASK))return(0);
162: if(i < 'a')i -= 'A'; else i -= 'a';
163: #ifndef INCORE
164: if(!(off = sufind.uns[i]))return(0);
165: #else
166: if (!(off = sufind[i])) return(0);
167: #endif
168: while(1){
169: if((i = *(s0 = getsuf(off)) & 017) == 0)return(0);
170: off += i;
171: s = s0 + i - 1;
172: w = hyend - 1;
173: while(((s > s0) && (w >= wdstart)) &&
174: ((*s & 0177) == maplow(*w))){s--; w--;};
175: if(s == s0)break;
176: }
177: s = s0 + i - 1;
178: w = hyend;
179: if(*s0 & 0200)goto mark;
180: while(s > s0){
181: w--;
182: if(*s-- & 0200){
183: mark:
184: hyend = w - 1;
185: if(*s0 & 0100)continue;
186: if(!chkvow(w))return(0);
187: *hyp++ = w;
188: }
189: }
190: if(*s0 & 040)return(0);
191: if(exword())return(1);
192: goto again;
193: }
194: vowel(i)
195: int i;
196: {
197: switch(maplow(i)){
198: case 'a':
199: case 'e':
200: case 'i':
201: case 'o':
202: case 'u':
203: case 'y':
204: return(1);
205: default:
206: return(0);
207: }
208: }
209: int *chkvow(w)
210: int *w;
211: {
212: while(--w >= wdstart)if(vowel(*w & CMASK))return(w);
213: return(0);
214: }
215: #ifndef INCORE
216: char *getsuf(x)
217: unsigned x;
218: {
219: register char *s;
220: register cnt;
221: static char suff[20];
222:
223: s = suff;
224: for(cnt = ((*s++ = rdsufb(x++)) & 017); cnt > 1; cnt--)
225: *s++ = rdsufb(x++);
226: suff[suff[0] & 017] = 0;
227: return(suff);
228: }
229: #define SBSZ 128 /*suffix file buffer size*/
230: rdsufb(i)
231: unsigned i;
232: {
233: register unsigned j;
234: static char sufbuf[SBSZ];
235:
236: if((j = i & ~(SBSZ-1)) != sufoff){
237: lseek(suffid, (long)(sufoff = j), 0);
238: read(suffid, sufbuf, SBSZ);
239: }
240: return(sufbuf[i & (SBSZ-1)]);
241: }
242: #endif
243: digram(){
244: register *w, val;
245: int *nhyend, *maxw, maxval;
246: extern char bxh[26][13],bxxh[26][13],xxh[26][13],xhx[26][13],
247: hxx[26][13];
248:
249: again:
250: if(!(w=chkvow(hyend+1)))return;
251: hyend = w;
252: if(!(w=chkvow(hyend)))return;
253: nhyend = w;
254: maxval = 0;
255: w--;
256: while((++w < hyend) && (w < (wdend-1))){
257: val = 1;
258: if(w == wdstart)val *= dilook('a',*w,bxh);
259: else if(w == wdstart+1)val *= dilook(*(w-1),*w,bxxh);
260: else val *= dilook(*(w-1),*w,xxh);
261: val *= dilook(*w, *(w+1), xhx);
262: val *= dilook(*(w+1), *(w+2), hxx);
263: if(val > maxval){
264: maxval = val;
265: maxw = w + 1;
266: }
267: }
268: hyend = nhyend;
269: if(maxval > thresh)*hyp++ = maxw;
270: goto again;
271: }
272: dilook(a,b,t)
273: int a, b;
274: char t[26][13];
275: {
276: register i, j;
277:
278: i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
279: if(!(j & 01))i >>= 4;
280: return(i & 017);
281: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.