|
|
1.1 root 1: /*-
2: * Copyright (c) 1990 The Regents of the University of California.
3: * All rights reserved.
4: *
5: * This code is derived from software contributed to Berkeley by
6: * Vern Paxson.
7: *
8: * The United States Government has rights in this work pursuant
9: * to contract no. DE-AC03-76SF00098 between the United States
10: * Department of Energy and the University of California.
11: *
12: * Redistribution and use in source and binary forms are permitted provided
13: * that: (1) source distributions retain this entire copyright notice and
14: * comment, and (2) distributions including binaries display the following
15: * acknowledgement: ``This product includes software developed by the
16: * University of California, Berkeley and its contributors'' in the
17: * documentation or other materials provided with the distribution and in
18: * all advertising materials mentioning features or use of this software.
19: * Neither the name of the University nor the names of its contributors may
20: * be used to endorse or promote products derived from this software without
21: * specific prior written permission.
22: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
23: * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
24: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25: */
26:
27: #ifndef lint
28: static char sccsid[] = "@(#)ecs.c 5.2 (Berkeley) 6/18/90";
29: #endif /* not lint */
30:
31: /* ecs - equivalence class routines */
32:
33: #include "flexdef.h"
34:
35: /* ccl2ecl - convert character classes to set of equivalence classes
36: *
37: * synopsis
38: * ccl2ecl();
39: */
40:
41: void ccl2ecl()
42:
43: {
44: int i, ich, newlen, cclp, ccls, cclmec;
45:
46: for ( i = 1; i <= lastccl; ++i )
47: {
48: /* we loop through each character class, and for each character
49: * in the class, add the character's equivalence class to the
50: * new "character" class we are creating. Thus when we are all
51: * done, character classes will really consist of collections
52: * of equivalence classes
53: */
54:
55: newlen = 0;
56: cclp = cclmap[i];
57:
58: for ( ccls = 0; ccls < ccllen[i]; ++ccls )
59: {
60: ich = ccltbl[cclp + ccls];
61: cclmec = ecgroup[ich];
62:
63: if ( xlation && cclmec < 0 )
64: {
65: /* special hack--if we're doing %t tables then it's
66: * possible that no representative of this character's
67: * equivalence class is in the ccl. So waiting till
68: * we see the representative would be disastrous. Instead,
69: * we add this character's equivalence class anyway, if it's
70: * not already present.
71: */
72: int j;
73:
74: /* this loop makes this whole process n^2; but we don't
75: * really care about %t performance anyway
76: */
77: for ( j = 0; j < newlen; ++j )
78: if ( ccltbl[cclp + j] == -cclmec )
79: break;
80:
81: if ( j >= newlen )
82: { /* no representative yet, add this one in */
83: ccltbl[cclp + newlen] = -cclmec;
84: ++newlen;
85: }
86: }
87:
88: else if ( cclmec > 0 )
89: {
90: ccltbl[cclp + newlen] = cclmec;
91: ++newlen;
92: }
93: }
94:
95: ccllen[i] = newlen;
96: }
97: }
98:
99:
100: /* cre8ecs - associate equivalence class numbers with class members
101: *
102: * synopsis
103: * int cre8ecs();
104: * number of classes = cre8ecs( fwd, bck, num );
105: *
106: * fwd is the forward linked-list of equivalence class members. bck
107: * is the backward linked-list, and num is the number of class members.
108: *
109: * Returned is the number of classes.
110: */
111:
112: int cre8ecs( fwd, bck, num )
113: int fwd[], bck[], num;
114:
115: {
116: int i, j, numcl;
117:
118: numcl = 0;
119:
120: /* create equivalence class numbers. From now on, abs( bck(x) )
121: * is the equivalence class number for object x. If bck(x)
122: * is positive, then x is the representative of its equivalence
123: * class.
124: */
125: for ( i = 1; i <= num; ++i )
126: if ( bck[i] == NIL )
127: {
128: bck[i] = ++numcl;
129: for ( j = fwd[i]; j != NIL; j = fwd[j] )
130: bck[j] = -numcl;
131: }
132:
133: return ( numcl );
134: }
135:
136:
137: /* ecs_from_xlation - associate equivalence class numbers using %t table
138: *
139: * synopsis
140: * numecs = ecs_from_xlation( ecmap );
141: *
142: * Upon return, ecmap will map each character code to its equivalence
143: * class. The mapping will be positive if the character is the representative
144: * of its class, negative otherwise.
145: *
146: * Returns the number of equivalence classes used.
147: */
148:
149: int ecs_from_xlation( ecmap )
150: int ecmap[];
151:
152: {
153: int i;
154: int nul_is_alone = false;
155: int did_default_xlation_class = false;
156:
157: if ( xlation[0] != 0 )
158: {
159: /* if NUL shares its translation with other characters, choose one
160: * of the other characters as the representative for the equivalence
161: * class. This allows a cheap test later to see whether we can
162: * do away with NUL's equivalence class.
163: */
164: for ( i = 1; i < csize; ++i )
165: if ( xlation[i] == -xlation[0] )
166: {
167: xlation[i] = xlation[0];
168: ecmap[0] = -xlation[0];
169: break;
170: }
171:
172: if ( i >= csize )
173: /* didn't find a companion character--remember this fact */
174: nul_is_alone = true;
175: }
176:
177: for ( i = 1; i < csize; ++i )
178: if ( xlation[i] == 0 )
179: {
180: if ( did_default_xlation_class )
181: ecmap[i] = -num_xlations;
182:
183: else
184: {
185: /* make an equivalence class for those characters not
186: * specified in the %t table
187: */
188: ++num_xlations;
189: ecmap[i] = num_xlations;
190: did_default_xlation_class = true;
191: }
192: }
193:
194: else
195: ecmap[i] = xlation[i];
196:
197: if ( nul_is_alone )
198: /* force NUL's equivalence class to be the last one */
199: {
200: ++num_xlations;
201: ecmap[0] = num_xlations;
202:
203: /* there's actually a bug here: if someone is fanatic enough to
204: * put every character in its own translation class, then right
205: * now we just promoted NUL's equivalence class to be csize + 1;
206: * we can handle NUL's class number being == csize (by instead
207: * putting it in its own table), but we can't handle some *other*
208: * character having to be put in its own table, too. So in
209: * this case we bail out.
210: */
211: if ( num_xlations > csize )
212: flexfatal( "too many %t classes!" );
213: }
214:
215: return num_xlations;
216: }
217:
218:
219: /* mkeccl - update equivalence classes based on character class xtions
220: *
221: * synopsis
222: * Char ccls[];
223: * int lenccl, fwd[llsiz], bck[llsiz], llsiz, NUL_mapping;
224: * mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping );
225: *
226: * where ccls contains the elements of the character class, lenccl is the
227: * number of elements in the ccl, fwd is the forward link-list of equivalent
228: * characters, bck is the backward link-list, and llsiz size of the link-list
229: *
230: * NUL_mapping is the value which NUL (0) should be mapped to.
231: */
232:
233: void mkeccl( ccls, lenccl, fwd, bck, llsiz, NUL_mapping )
234: Char ccls[];
235: int lenccl, fwd[], bck[], llsiz, NUL_mapping;
236:
237: {
238: int cclp, oldec, newec;
239: int cclm, i, j;
240: static unsigned char cclflags[CSIZE]; /* initialized to all '\0' */
241:
242: /* note that it doesn't matter whether or not the character class is
243: * negated. The same results will be obtained in either case.
244: */
245:
246: cclp = 0;
247:
248: while ( cclp < lenccl )
249: {
250: cclm = ccls[cclp];
251:
252: if ( NUL_mapping && cclm == 0 )
253: cclm = NUL_mapping;
254:
255: oldec = bck[cclm];
256: newec = cclm;
257:
258: j = cclp + 1;
259:
260: for ( i = fwd[cclm]; i != NIL && i <= llsiz; i = fwd[i] )
261: { /* look for the symbol in the character class */
262: for ( ; j < lenccl; ++j )
263: {
264: register int ccl_char;
265:
266: if ( NUL_mapping && ccls[j] == 0 )
267: ccl_char = NUL_mapping;
268: else
269: ccl_char = ccls[j];
270:
271: if ( ccl_char > i )
272: break;
273:
274: if ( ccl_char == i && ! cclflags[j] )
275: {
276: /* we found an old companion of cclm in the ccl.
277: * link it into the new equivalence class and flag it as
278: * having been processed
279: */
280:
281: bck[i] = newec;
282: fwd[newec] = i;
283: newec = i;
284: cclflags[j] = 1; /* set flag so we don't reprocess */
285:
286: /* get next equivalence class member */
287: /* continue 2 */
288: goto next_pt;
289: }
290: }
291:
292: /* symbol isn't in character class. Put it in the old equivalence
293: * class
294: */
295:
296: bck[i] = oldec;
297:
298: if ( oldec != NIL )
299: fwd[oldec] = i;
300:
301: oldec = i;
302: next_pt:
303: ;
304: }
305:
306: if ( bck[cclm] != NIL || oldec != bck[cclm] )
307: {
308: bck[cclm] = NIL;
309: fwd[oldec] = NIL;
310: }
311:
312: fwd[newec] = NIL;
313:
314: /* find next ccl member to process */
315:
316: for ( ++cclp; cclflags[cclp] && cclp < lenccl; ++cclp )
317: {
318: /* reset "doesn't need processing" flag */
319: cclflags[cclp] = 0;
320: }
321: }
322: }
323:
324:
325: /* mkechar - create equivalence class for single character
326: *
327: * synopsis
328: * int tch, fwd[], bck[];
329: * mkechar( tch, fwd, bck );
330: */
331:
332: void mkechar( tch, fwd, bck )
333: int tch, fwd[], bck[];
334:
335: {
336: /* if until now the character has been a proper subset of
337: * an equivalence class, break it away to create a new ec
338: */
339:
340: if ( fwd[tch] != NIL )
341: bck[fwd[tch]] = bck[tch];
342:
343: if ( bck[tch] != NIL )
344: fwd[bck[tch]] = fwd[tch];
345:
346: fwd[tch] = NIL;
347: bck[tch] = NIL;
348: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.