|
|
1.1 root 1: # include <stdio.h>
2: # include <ingres.h>
3: # include <aux.h>
4: # include <symbol.h>
5: # include <access.h>
6: # include <func.h>
7: # include <sccs.h>
8:
9: SCCSID(@(#)ksort.c 7.3 8/24/83)
10:
11: # define N 7
12: # define MEM (32768 - 2)
13: # define BUCKETSIZE 4
14: # define ENDKEY MAXDOM + 1
15:
16:
17:
18: /*
19: ** Parameters:
20: **
21: ** argv[1]: Fileset
22: ** argv[2]: trace info (see below)
23: ** argv[3]: file from which Desc is read
24: ** argv[4]: Infile
25: ** argv[5]: Outfile
26: **
27: **
28: ** Trace info comes from trace flag Z37 passed as the
29: ** second parameter. The bits used are:
30: **
31: ** 0001 main trace info
32: ** 0002 secondary trace info
33: ** 0004 terciary trace info
34: ** 0010 don't truncate temps
35: ** 0020 don't unlink temps
36: ** 0040 print am page refs
37: ** 0100 print am tuple gets
38: ** 0200 print tuples as output
39: */
40:
41: char *Infile;
42: char *Outfile;
43: DESC Desc;
44: char Descsort[MAXDOM+1];
45: FILE *Oiop;
46: int Trace;
47: int Tupsize;
48: int Bucket;
49: char File[15];
50: char *Fileset;
51: char *Filep;
52: int Nfiles = 1;
53: int Nlines;
54: long Ccount;
55: char **Lspace;
56: char *Tspace;
57: extern int cmpa();
58: long Tupsout;
59: short Tr[100];
60:
61: main(argc, argv)
62: int argc;
63: char **argv;
64: {
65: extern char *Proc_name;
66: register int i;
67: register int j;
68: unsigned int mem;
69: char *start;
70: int maxkey, rev;
71: extern char *malloc();
72:
73:
74: Proc_name = "KSORT";
75: tT = Tr;
76: if (argc != 6)
77: syserr("argc");
78: Fileset = argv[1];
79: Trace = atoi(argv[2]);
80: if ((i = open(argv[3], 0)) < 0)
81: cant(argv[3]);
82: if (read(i, &Desc, sizeof Desc) < sizeof Desc)
83: syserr("read(Desc)");
84: close(i);
85:
86: /* set up Descsort to indicate the sort order for tuple */
87: /* if domain zero is given prepare to generate "hash bucket"
88: ** value for tuple */
89:
90: maxkey = 0;
91: for (i = 0; i <= Desc.reldum.relatts; i++)
92: if (j = Desc.relgiven[i])
93: {
94: if ((rev = j) < 0)
95: j = -j;
96: if (maxkey < j)
97: maxkey = j;
98: Descsort[--j] = rev < 0 ? -i : i;
99: }
100:
101: Descsort[maxkey] = ENDKEY; /* mark end of list */
102:
103: Tupsize = Desc.reldum.relwid;
104:
105: if (Bucket = (Descsort[0] == 0))
106: {
107: /* we will be generating hash bucket */
108: Tupsize += BUCKETSIZE;
109: Desc.relfrml[0] = BUCKETSIZE;
110: Desc.relfrmt[0] = INT;
111: Desc.reloff[0] = Desc.reldum.relwid;
112: }
113:
114: if (Trace & 01)
115: {
116: printf("ksort: reldum.relatts is %d\n", Desc.reldum.relatts);
117: printf("Bucket is %d,Sort is:\n", Bucket);
118: for (i = 0; (j = Descsort[i]) != ENDKEY; i++)
119: printf("Descsort[%d]=%d\n", i, j);
120: }
121: if (i = (maxkey - Bucket - Desc.reldum.relatts))
122: syserr("%d domains missing\n", -i);
123: Infile = argv[4];
124: Outfile = argv[5];
125:
126: /* get up to 2**15 - 1 bytes of memory for buffers */
127: /* note that mem must end up positive so that Nlines computation is right */
128: mem = MEM; /* take at most 2**15 - 1 bytes */
129: while ((Lspace = (char **) malloc(mem)) == NULL)
130: mem -= 1024;
131:
132: /* compute pointers and sizes into buffer memory */
133: Nlines = mem / (Tupsize + sizeof(char *));
134: Tspace = (char *) (Lspace + Nlines);
135: if (Trace & 01)
136: printf("Tspace=%x,Lspace=%x,Nlines=%x,mem=%d\n",
137: Tspace, Lspace, Nlines, mem);
138:
139: /* set up temp files */
140: concat(ztack("_SYSS", Fileset), "Xaa", File);
141: Filep = File;
142: while (*Filep != 'X')
143: Filep++;
144: Filep++;
145:
146: /* sort stage -- create a bunch of temporaries */
147: Ccount = 0;
148: if (Trace & 01)
149: printf("sorting\n");
150: sort();
151: close(0);
152: if (Trace & 01)
153: {
154: printf("done sorting\n%ld tuples written to %d files\n", Tupsout, Nfiles - 1);
155: printf("sort required %ld compares\n", Ccount);
156: }
157:
158: /* merge stage -- merge up to N temps into a new temp */
159: Ccount = 0;
160: for (i = 1; i + N < Nfiles; i += N)
161: {
162: newfile();
163: merge(i, i + N);
164: }
165:
166: /* merge last set of temps into target file */
167: if (i != Nfiles)
168: {
169: oldfile();
170: merge(i, Nfiles);
171: }
172: if (Trace & 01)
173: {
174: printf("%ld tuples in out file\n", Tupsout);
175: printf("merge required %ld compares\n", Ccount);
176: }
177: term(0);
178: }
179: /*
180: ** SORT
181: */
182:
183: sort()
184: {
185: register char *cp;
186: register char **lp;
187: register int i;
188: int done;
189: long ntups;
190: struct tup_id tid, ltid;
191: char *xp;
192: long pageid;
193: long rhash();
194:
195: done = 0;
196: ntups = 0;
197: Tupsout = 0;
198: if ((Desc.relfp = open(Infile, 0)) < 0)
199: cant(Infile);
200: Desc.relopn = (Desc.relfp + 1) * 5;
201:
202: /* initialize tids for full scan */
203: pageid = 0;
204: tid.line_id = -1;
205: stuff_page(&tid, &pageid);
206: pageid = -1;
207: ltid.line_id = -1;
208: stuff_page(<id, &pageid);
209:
210: do
211: {
212: cp = Tspace;
213: lp = Lspace;
214: while (lp < Lspace + Nlines)
215: {
216: if ((i = get(&Desc, &tid, <id, cp, TRUE)) != 0)
217: {
218: if (i < 0)
219: syserr("get %d", i);
220: close(Desc.relfp);
221: Desc.relopn = 0;
222: done++;
223: break;
224: }
225: if (Trace & 0100)
226: printup(&Desc, cp);
227: if (Bucket)
228: {
229: /* compute hash bucket and insert at end */
230: pageid = rhash(&Desc, cp);
231: bmove(&pageid, cp + Desc.reldum.relwid, BUCKETSIZE);
232: }
233: *lp++ = cp;
234: cp += Tupsize;
235: ntups++;
236: }
237: qsort(Lspace, lp - Lspace, sizeof(char *), cmpa);
238: if (done == 0 || Nfiles != 1)
239: newfile();
240: else
241: oldfile();
242: while (lp > Lspace)
243: {
244: cp = *--lp;
245: xp = cp;
246: if ((lp == Lspace) || (cmpa(&xp, &lp[-1]) != 0))
247: {
248: if (Trace & 0200)
249: {
250: printf("writing ");
251: printup(&Desc, cp);
252: }
253: if ((i = fwrite(cp, 1, Tupsize, Oiop)) != Tupsize)
254: syserr("cant write outfile %d (%d)", i, Nfiles);
255: Tupsout++;
256: }
257: }
258: fclose(Oiop);
259: } while (done == 0);
260: if (Trace & 01)
261: printf("%ld tuples in\n", ntups);
262: }
263: /*
264: ** MERGE
265: */
266:
267: struct merg
268: {
269: char tup[MAXTUP+BUCKETSIZE];
270: int filedes;
271: FILE *fiop;
272: };
273:
274: merge(a, b)
275: int a;
276: int b;
277: {
278: register struct merg *merg;
279: register int i, j;
280: char *f, *yesno;
281: struct merg *mbuf[N + 1];
282: char *setfil();
283:
284: if (Trace & 02)
285: printf("merge %d to %d\n", a, b);
286: merg = (struct merg *) Lspace;
287: j = 0;
288: for (i = a; i < b; i++)
289: {
290: f = setfil(i);
291: mbuf[j] = merg;
292: merg->filedes = i;
293: if ((merg->fiop = fopen(f, "r")) == NULL)
294: cant(f);
295: if (!rline(merg))
296: j++;
297: merg++;
298: }
299:
300: i = j - 1;
301: if (Trace & 04)
302: printf("start merg with %d\n", i);
303: while (i >= 0)
304: {
305: if (Trace & 04)
306: printf("mintup %d\n", i);
307: if (mintup(mbuf, i, cmpa))
308: {
309: if (fwrite(mbuf[i]->tup, 1, Tupsize, Oiop) != Tupsize)
310: syserr("cant write merge output");
311: Tupsout++;
312: }
313: merg = mbuf[i];
314: if (rline(merg))
315: {
316: yesno = "not ";
317: if (!(Trace & 010))
318: {
319: /* truncate temporary files to zero length */
320: yesno = "";
321: close(creat(setfil(merg->filedes), 0600));
322: }
323: if (Trace & 02 || Trace & 010)
324: printf("dropping and %struncating %s\n", yesno, setfil(merg->filedes));
325: i--;
326: }
327: }
328:
329: fclose(Oiop);
330: }
331: /*
332: ** Mintup puts the smallest tuple in mbuf[cnt-1].
333: ** If the tuple is a duplicate of another then
334: ** mintup returns 0, else 1.
335: **
336: ** Cnt is the number of compares to make; i.e.
337: ** mbuf[cnt] is the last element.
338: */
339:
340: mintup(mbuf, cnt, cmpfunc)
341: struct merg *mbuf[];
342: int cnt;
343: int (*cmpfunc)();
344: {
345: register struct merg **next, **last;
346: struct merg *temp;
347: register int nodup;
348: int j;
349:
350: nodup = TRUE;
351: next = mbuf;
352: last = &next[cnt];
353:
354: while (cnt--)
355: {
356: if (j = (*cmpfunc)(last, next))
357: {
358: /* tuples not equal. keep smallest */
359: if (j < 0)
360: {
361: /* exchange */
362: temp = *last;
363: *last = *next;
364: *next = temp;
365: nodup = TRUE;
366: }
367: }
368: else
369: nodup = FALSE;
370:
371: next++;
372: }
373: return (nodup);
374: }
375:
376:
377: rline(mp)
378: struct merg *mp;
379: {
380: register struct merg *merg;
381: register int i;
382:
383: merg = mp;
384: if ((i = fread(merg->tup, 1, Tupsize, merg->fiop)) != Tupsize)
385: {
386: if (i == 0)
387: {
388: fclose(merg->fiop);
389: return (1);
390: }
391: syserr("rd err %d on %s", i, setfil(merg->filedes));
392: }
393: return (0);
394: }
395:
396: newfile()
397: {
398: char *setfil();
399:
400: makfile(setfil(Nfiles));
401: Nfiles++;
402: }
403: /*
404: ** Convert the number i to a char
405: ** sequence aa, ab, ..., az, ba, etc.
406: */
407:
408: char *
409: setfil(i)
410: int i;
411: {
412: register int j;
413:
414: j = i;
415: j--;
416: Filep[0] = j/26 + 'a';
417: Filep[1] = j%26 + 'a';
418: return (File);
419: }
420:
421: oldfile()
422: {
423: makfile(Outfile);
424: Tupsout = 0;
425: }
426: /*
427: ** Create a file by the name "name"
428: ** and place its fio pointer in Oiop
429: */
430:
431: makfile(name)
432: char *name;
433: {
434: if ((Oiop = fopen(name, "w")) == NULL)
435: cant(name);
436: }
437:
438: cant(f)
439: char *f;
440: {
441: syserr("open %s", f);
442: }
443:
444: term(error)
445: int error;
446: {
447: register int i;
448:
449: if (Nfiles == 1)
450: Nfiles++;
451: if (Trace & 020)
452: printf("temp files not removed\n");
453: else
454: for (i = 1; i < Nfiles; i++)
455: {
456: unlink(setfil(i));
457: }
458: exit(error);
459: }
460: /*
461: ** CMPA -- compare tuples
462: */
463:
464: cmpa(a, b)
465: char **a;
466: char **b;
467: {
468: int af[4];
469: int bf[4];
470: char *pa, *pb;
471: register union anytype *tupa, *tupb;
472: int dom;
473: register int frml;
474: int frmt;
475: int off;
476: int temp;
477: int rt;
478: char *dp;
479:
480: pa = *a;
481: pb = *b;
482: Ccount++;
483: dp = Descsort;
484: while ((temp = *dp++) != ENDKEY)
485: {
486: if ((dom = temp) < 0)
487: dom = -temp;
488: frml = Desc.relfrml[dom];
489: frmt = Desc.relfrmt[dom];
490: off = Desc.reloff[dom];
491: tupa = (union anytype *) &pa[off];
492: tupb = (union anytype *) &pb[off];
493: if (temp < 0)
494: {
495: tupb = tupa;
496: tupa = (union anytype *) &pb[off];
497: }
498: if (frmt == CHAR)
499: {
500: frml &= 0377;
501: if (rt = scompare(tupb, frml, tupa, frml))
502: return (rt);
503: continue;
504: }
505:
506: /* domain is a numeric type */
507: if (bequal(tupa, tupb, frml))
508: continue;
509: /* copy to even word boundary */
510: bmove(tupa, af, frml);
511: bmove(tupb, bf, frml);
512: tupa = (union anytype *) af;
513: tupb = (union anytype *) bf;
514:
515: switch (frmt)
516: {
517:
518: case INT:
519: switch (frml)
520: {
521:
522: case 1:
523: return (tupa->i1type > tupb->i1type ? -1 : 1);
524:
525: case 2:
526: return (tupa->i2type > tupb->i2type ? -1 : 1);
527:
528: case 4:
529: return (tupa->i4type > tupb->i4type ? -1 : 1);
530: }
531:
532: case FLOAT:
533: switch (frml)
534: {
535:
536: case 4:
537: return (tupa->f4type > tupb->f4type ? -1 : 1);
538:
539: case 8:
540: return (tupa->f8type > tupb->f8type ? -1 : 1);
541: }
542: }
543: }
544: return (0);
545: }
546: /*
547: ** Replacement for access method routine get_page();
548: ** and associated globals and routines.
549: */
550:
551: struct accbuf *Acc_head, Accbuf;
552: long Accuread, Accuwrite;
553:
554: get_page(d, tid)
555: register DESC *d;
556: struct tup_id *tid;
557: {
558: register int i;
559: long pageid;
560: register struct accbuf *b;
561:
562: if (Trace & 0100)
563: {
564: printf("get_page: %.14s,", d->reldum.relid);
565: dumptid(tid);
566: }
567: b = Acc_head;
568: if (b == 0)
569: {
570: /* initialize buffer */
571: Acc_head = &Accbuf;
572: b = &Accbuf;
573: b->thispage = -1;
574: }
575: pluck_page(tid, &pageid);
576: i = 0;
577: if (b->thispage != pageid)
578: {
579: if (Trace & 040)
580: printf("get_page: rdg pg %ld\n", pageid);
581: b->thispage = pageid;
582: if ((lseek(d->relfp, pageid * PGSIZE, 0) < 0) ||
583: ((read(d->relfp, b, PGSIZE)) != PGSIZE))
584: {
585: i = AMREAD_ERR;
586: }
587: Accuread++;
588: }
589: return (i);
590: }
591:
592: resetacc(buf)
593: struct accbuf *buf;
594: {
595: return (0);
596: }
597:
598: acc_err(err)
599: int err;
600: {
601: return (err);
602:
603: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.