|
|
1.1 root 1: # include <ingres.h>
2: # include <catalog.h>
3: # include <aux.h>
4: # include <tree.h>
5: # include <symbol.h>
6: # include <pv.h>
7: # include "globs.h"
8: # include <access.h>
9: # include <sccs.h>
10:
11: SCCSID(@(#)reformat.c 8.3 1/16/85)
12:
13: /*
14: ** REFORMAT.C -- "reformat" module of DECOMPOSITION
15: **
16: ** reformat() examines each relation which will be
17: ** involved in a one variable subquery to see if it
18: ** is cost effective to modify the relation to a more
19: ** usefull structure. Included in this file are all the
20: ** routines associated with reformat:
21: **
22: ** reformat -- reformats each relation if cost effective
23: **
24: ** findlinks -- determines what one variable clauses (if any)
25: ** are associated with the current variable
26: ** and the substitution variable.
27: **
28: ** rangrf -- decides whether to actually reformat and does it.
29: **
30: ** primrf -- performs a projection on a user relation
31: **
32: ** ckpkey -- determines if relation is already structured usefully.
33: **
34: ** findwid -- determines width of target list.
35: **
36: ** rel_tup -- returns how many tuples fit on one page
37: */
38: /*
39: ** Reformat -- Examine each variable to see if it should be reformated.
40: **
41: ** The algorithm is:
42: ** (1) Find all variables for which, after tuple substitution,
43: ** will have one variable equality clauses on them.
44: ** (2) Estimate how much it will cost to execute them.
45: ** (3) Ignore those relations which are already keyed usefully.
46: ** (4) If it is a user relation, determine if it will be less costly
47: ** to first project (and possibly) modify the relation.
48: ** (5) If it is a _SYS relation, determine if it will be less costly
49: ** to modify the relation to hash on the linking domains.
50: */
51:
52: reformat(var, sqlist, locrang, buf, tree)
53: int var;
54: QTREE *sqlist[];
55: int locrang[];
56: char buf[];
57: QTREE *tree;
58: {
59: register QTREE *sq;
60: register char *lmap;
61: register int mainvar;
62: int i, j;
63: char linkmap[MAXDOM];
64:
65: # ifdef xDTR1
66: if (tTf(39, -1))
67: printf("REFORMAT: subvar=%d\n", var);
68: # endif
69:
70: /* if the main tree is single var then put it in sq list */
71: mainvar = -1;
72: if (tree->sym.value.sym_root.tvarc == 1)
73: {
74: mainvar = bitpos(tree->sym.value.sym_root.lvarm | tree->sym.value.sym_root.rvarm);
75: if (sqlist[mainvar] != 0)
76: syserr("help reformat");
77: sqlist[mainvar] = tree;
78: # ifdef xDTR1
79: if (tTf(39, 0))
80: printf("including var %d\n", mainvar);
81: # endif
82: }
83: for(i = 0; i < MAXRANGE; i++)
84: if (sq = sqlist[i])
85: {
86: # ifdef xDTR1
87: if (tTf(39, 0))
88: printf("Sqlist[%d]:\n", i);
89: # endif
90: lmap = linkmap;
91: for (j = 0; j < MAXDOM; j++)
92: *lmap++ = 0;
93: if (findlinks(sq->right, var, i, linkmap))
94: {
95: # ifdef xDTR1
96: if (tTf(39, 1))
97: prlinks("Findlinks found", linkmap);
98: # endif
99: rangrf(i, var, sq, buf, linkmap, tree, locrang);
100: }
101: }
102: if (mainvar >= 0)
103: sqlist[mainvar] = 0;
104: }
105: /*
106: ** Findlinks -- Determine whether there are any equalify clauses
107: ** between the "linkv" and the variable selected for tuple
108: ** substitution "selv".
109: **
110: ** Return:
111: ** TRUE if there is a linking variable
112: ** FALSE if not
113: **
114: ** Side Effects:
115: ** The linkmap is set to 1 for each linking domains.
116: ** ie. if domains 3 is a linking domains then
117: ** linkmap[3] = 1.
118: */
119:
120: findlinks(node, selv, linkv, linkmap)
121: QTREE *node;
122: int selv, linkv;
123: char linkmap[];
124: {
125: register QTREE *b, *a;
126: register int s;
127: QTREE *temp;
128: extern QTREE *ckvar();
129:
130: a = node;
131: # ifdef xDTR1
132: if (tTf(39, 2))
133: {
134: printf("FINDLINKS:");
135: nodepr(a);
136: }
137: # endif
138: if (a->sym.type == QLEND)
139: return (0);
140: s = findlinks(a->right, selv, linkv, linkmap);
141:
142: /*
143: ** This mess is looking for a clause of the form:
144: ** EQ
145: ** Var Var
146: ** linkv selv
147: ** Where the VARS can be in either order
148: */
149: if ((b = a->left)->sym.type != BOP || b->sym.value.sym_op.opno!= opEQ ||
150: b->left->sym.type!=VAR || b->right->sym.type!=VAR)
151: return (s);
152:
153: a = ckvar(b->left);
154: b = ckvar(b->right);
155: if (b->sym.value.sym_var.varno == linkv)
156: {
157: temp = a;
158: a = b;
159: b = temp;
160: }
161: if (a->sym.value.sym_var.varno != linkv || (selv >= 0 && b->sym.value.sym_var.varno != selv))
162: return (s);
163:
164: linkmap[a->sym.value.sym_var.attno] = 1;
165: # ifdef xDTR1
166: if (tTf(39, 3))
167: printf("found:attno=%d\n", a->sym.value.sym_var.attno);
168: # endif
169: return (1);
170: }
171: /*
172: ** Rangrf -- reformat the variable "var" if it is cost effective.
173: **
174: ** Rangrf does the actual work of reformat. If the relation is
175: ** already keyed usefully then rangrf does nothing.
176: ** Otherwise rangrf is split into two decisions:
177: ** A user relation must first be projected and then modified;
178: ** a _SYS relation can be modified directly.
179: **
180: ** It may be cost effective to just project a user relation.
181: */
182:
183: /*ARGSUSED*/
184: rangrf(var, substvar, sq, buf, linkmap, tree, locrang)
185: int var, substvar;
186: QTREE *sq;
187: char buf[];
188: char linkmap[];
189: QTREE *tree;
190: int locrang[];
191: {
192: register struct rang_tab *r, *rs;
193: register int j;
194: char nums[2];
195: int i, newwid;
196: QTREE *pq;
197: long npages, newpages;
198: long origcost, modcost, projcost;
199: extern char *rangename();
200: extern long rel_pages(), hashcost();
201: extern QTREE *makroot();
202: extern DESC *openr1();
203: extern QTREE **mksqlist();
204:
205: r = &De.de_rangev[var];
206: rs = &De.de_rangev[substvar];
207: npages = rel_pages(r->rtcnt, r->rtwid);
208:
209: /* calculate original cost of substitution */
210:
211: origcost = rs->rtcnt * npages;
212:
213: # ifdef xDTR1
214: if (tTf(39, 5))
215: printf("eval of mod for var %d. orig cost=%ld\n", var, origcost);
216: # endif
217:
218: /* if relation is already keyed usefully, just return */
219: if (i = ckpkey(linkmap, var))
220: {
221: # ifdef xDTR1
222: if (tTf(39, 4))
223: printf("already keyed ok %d\n", i);
224: # endif
225: return;
226: }
227:
228: /* if this is a primary relation, a projection must be done
229: ** before any modify can be performed */
230:
231: if (!rnum_temp(r->relnum))
232: {
233: /* evaluation for primary, user relation */
234:
235: /* to save some time, don't evaluate if origcost is very small */
236: if (origcost < OVHDP + 1 + npages)
237: return;
238:
239: j = markbuf(buf);
240:
241: /* build a projection tree but don't actually create the projection */
242: pq = makroot(buf);
243: dfind(sq, buf, mksqlist(pq, var));
244:
245: newwid = findwid(pq);
246: newpages = rel_pages(r->rtcnt, newwid);
247:
248: /*
249: ** Calculate cost of projection + new cost of substitution
250: ** projcost = readoldpages + writenewpages + runquery + overhead
251: */
252:
253: projcost = npages + newpages + rs->rtcnt * newpages + OVHDP;
254:
255:
256: /* CALCULATE COST OF MODIFY = COST OF PROJECTION + COST OF MODIFY
257: * AND NEW COST OF SUBSTITUTION AFTER MODIFY */
258:
259: modcost = (npages + newpages) +
260: hashcost(newpages) +
261: rs->rtcnt +
262: OVHDP + OVHDM;
263:
264: # ifdef xDTR1
265: if (tTf(39, 5))
266: {
267: printf("primary rel: proj cost=%ld\t", projcost);
268: printf("mod cost=%ld\n", modcost);
269: }
270: # endif
271:
272: if (origcost <= modcost)
273: if (origcost <= projcost)
274: {
275: freebuf(buf, j);
276: return;
277: }
278:
279: # ifdef xDTR1
280: if (tTf(39, 5))
281: printf("doing projection\n");
282: # endif
283:
284: /* i will be TRUE if the proj was aborted */
285: i = primrf(var, pq, locrang);
286: freebuf(buf, j);
287: if ((projcost <= modcost) || i)
288: return;
289: }
290:
291: /* IF THIS IS A TEMPORARY RELATION, A MODIFY CAN BE DONE DIRECTLY */
292:
293: else
294: {
295:
296: /* CALCULATE MODIFY COST (which does not include a projection)
297: * AND NEW COST OF SUBSTITUTION */
298:
299: modcost = hashcost(npages)
300: + rs->rtcnt + OVHDM;
301:
302: # ifdef xDTR1
303: if (tTf(39, 5))
304: printf("temp rel: mod cost=%ld\n", modcost);
305: # endif
306:
307: if (origcost <= modcost)
308: return;
309: }
310:
311: # ifdef xDTR1
312: if (tTf(39, 5))
313: printf("doing modify\n");
314: # endif
315:
316: initp();
317: setp(PV_STR, rangename(var));
318: setp(PV_STR, "hash"); /* modify to hash */
319: setp(PV_STR, "num"); /* passing domain numbers - not names */
320:
321: nums[1] = '\0';
322: for (j = 0; j < MAXDOM; j++)
323: if (linkmap[j])
324: {
325: *nums = j;
326: setp(PV_STR, nums);
327: }
328:
329: /* fill relation completely */
330: setp(PV_STR, "");
331: setp(PV_STR, "fillfactor");
332: setp(PV_STR, "100");
333: setp(PV_STR, "minpages");
334: setp(PV_STR, "1");
335: closer1(var);
336: call_dbu(mdMODIFY, FALSE);
337:
338: /* re-open modified range to get accurate descriptor */
339: openr1(var);
340: }
341: /*
342: ** Primrf -- Replace a user relation with a projection of the needed domains.
343: **
344: ** Primrf retrieves into a temporary relation, the domains
345: ** specified by the "pq" tree. The range table is updated to
346: ** reflect the new range.
347: **
348: ** In fact a projection is not an accurate way to describe what
349: ** happens. In order to avoid changing any attribute numbers in
350: ** the query, the needed domains are projected and the domains
351: ** inbetween are created as type "c0". This way they occupy
352: ** no space and the attribute numbering is unchanged. Of course,
353: ** any attributes beyond the last one needed are simply discarded.
354: **
355: ** In previous versions attempts were made to project only the needed
356: ** domains and to renumber the query tree. This proved to be very
357: ** expensive and difficult and was not always as optimal as this
358: ** method.
359: **
360: ** The routines newquery() and endquery() will undo the effects
361: ** of primrf and restore the range table back to its original state.
362: */
363:
364: primrf(var, pq, locrang)
365: int var;
366: QTREE *pq;
367: int locrang[];
368: {
369: register QTREE *q, **np;
370: register int i;
371: int maxresno, rnum;
372: QTREE *node1[MAXDOM+1], *node2[MAXDOM+1];
373: static char czero[QT_HDR_SIZ + sizeof (struct resdomnode)]; /* a dummy resdom */
374: extern DESC *openr1();
375: extern char *rnum_convert();
376:
377: # ifdef xDTR1
378: if (tTf(39, 6))
379: printf("PRIMRF:\n");
380: # endif
381:
382: /* renumber RESDOMs to match their VARs */
383: for (q = pq->left; q->sym.type != TREE; q = q->left)
384: q->sym.value.sym_resdom.resno = q->right->sym.value.sym_var.attno;
385:
386: /* form list of RESDOMs from outermost inward */
387: node1[lnode(pq->left, node1, 0)] = 0;
388:
389: /* form a dummy RESDOM with type CHAR and length 0 */
390: q = (QTREE *) czero;
391: q->sym.value.sym_resdom.resfrmt = CHAR;
392: q->sym.value.sym_resdom.resfrml = 0;
393:
394: /* zero node2 */
395: for (np = node2, i = 0; i < MAXDOM + 1; i++)
396: *np++ = 0;
397:
398: /* sort RESDOMs into node2 */
399: maxresno = 0;
400: for (np = node1; q = *np++; )
401: {
402: if ((i = q->sym.value.sym_resdom.resno) == 0)
403: return (1); /* abort. Tid is referenced */
404: if (i > maxresno)
405: maxresno = i;
406: node2[i-1] = q;
407: }
408:
409: /* fill missing RESDOMs with czero */
410: for (np = node2, i = 0; i < maxresno; i++, np++)
411: if (*np == 0)
412: *np = (QTREE *) czero;
413:
414:
415: /* set up params for the create */
416: initp();
417: setp(PV_STR, "0"); /* initial relstat field */
418: rnum = rnum_alloc();
419: setp(PV_STR, rnum_convert(rnum));
420: domnam(node2, "f");
421: call_dbu(mdCREATE, FALSE);
422:
423: /* now run projection */
424: i = var;
425: execsq1(pq, i, rnum);
426: new_range(i, rnum);
427: /* save the name of the new relation */
428: savrang(locrang, i);
429: openr1(i);
430: return (0);
431: }
432: /*
433: ** Ckpkey -- determine if a relation is already keyed usefully.
434: **
435: ** Ckpkey gets the key structure from paramd() and compares
436: ** it to the linkmap. If the relation is ISAM and the first keyed
437: ** domain is in linkmap, or if it is HASH and all keyed domains
438: ** are in the linkmap, then ckpkey() returns >0, else
439: ** ckpkey looks for secondary indexes which are usable.
440: ** if none, then ckpkey() returns 0.
441: **
442: ** The value returned is an estimate of the number of
443: ** pages which must be read to satisfy a query given
444: ** equality clauses on the linkmap domains and the
445: ** structure of the relation. The (crude) estimates are:
446: ** hash 1 page
447: ** isam 2 pages
448: ** hash sec index 2 pages
449: ** isam sec index 3 pages
450: */
451:
452: ckpkey(linkmap, var)
453: char linkmap[];
454: int var;
455: {
456: register DESC *d;
457: register int i;
458: struct index itup;
459: struct accessparam ap;
460: TID lotid, hitid;
461: extern DESC *readopen(), *openr1(), Inddes;
462:
463:
464: # ifdef xDTR1
465: if (tTf(39, 11))
466: printf("CKPKEY:%s\n", rangename(var));
467: # endif
468:
469: /* if relation is an unindexed heap, then it cannot be keyed usefully */
470: d = openr1(var);
471: if (d->reldum.relspec != M_HEAP || d->reldum.relindxd > 0)
472: {
473: d = readopen(var); /* open relation if it hasn't been already */
474: paramd(d, &ap);
475: if (ckpkey1(linkmap, &ap) == 0)
476: return (ap.mode == EXACTKEY ? 1 : 2); /* success */
477:
478: if (d->reldum.relindxd > 0)
479: {
480: opencatalog("indexes", OR_READ);
481: setkey(&Inddes, (char *)&itup, d->reldum.relid, IRELIDP);
482: setkey(&Inddes, (char *)&itup, d->reldum.relowner, IOWNERP);
483: if (i = find(&Inddes, EXACTKEY, &lotid, &hitid, (char *)&itup))
484: syserr("ckpkey:find %d", i);
485:
486: while ((i = get(&Inddes, &lotid, &hitid, (char *)&itup, TRUE)) == 0)
487: {
488: if (!bequal(itup.irelidp, d->reldum.relid, MAXNAME + 2))
489: continue;
490:
491: parami(&itup, &ap);
492: if (ckpkey1(linkmap, &ap) == 0)
493: return (ap.mode == EXACTKEY ? 2 : 3); /* success */
494: }
495: }
496: }
497: return (0); /* failure. no useful structure */
498: }
499:
500:
501:
502: ckpkey1(linkmap, ap)
503: char linkmap[];
504: struct accessparam *ap;
505: {
506: register int i, k, anykey;
507:
508: if (ap->mode == NOKEY)
509: return (1);
510: anykey = 0;
511: for (i = 0; k = ap->keydno[i]; i++)
512: {
513: if (linkmap[k] == 0)
514: {
515: if (ap->mode == EXACTKEY)
516: return (1);
517: else
518: break;
519: }
520: anykey++;
521: }
522: return (!anykey);
523: }
524: /*
525: ** Findwid -- scan the target list of the projection tree
526: ** to determine the resultant tuple size.
527: **
528: ** Return:
529: ** Size of projected tuple.
530: */
531:
532: findwid(tree)
533: QTREE *tree;
534: {
535: register QTREE *nod, *t;
536: register int wid;
537:
538: wid = 0;
539: t = tree;
540:
541: for (nod = t->left; nod && nod->sym.type != TREE; nod = nod->left)
542: {
543: wid += nod->sym.value.sym_var.varfrml & I1MASK;
544: }
545:
546: return (wid);
547: }
548: /*
549: ** HASHCOST -- determine cost to modify to hash
550: **
551: ** Estimates the cost to modify the relation to hash.
552: ** The estimate is crude since it assumes that there
553: ** are no duplicates and that a "unix" page will be
554: ** the same size as an "ingres" page.
555: **
556: ** The cost is based on the parameters which signify
557: ** the size of the in-core sort buffer and the n-way
558: ** merge sort plus the cost to read the sorted file
559: ** and write the new relation twice (that's the was it works!).
560: **
561: ** Parameters:
562: ** npages - the number of pages (estimate) which the
563: ** relation currently occupies.
564: **
565: ** Returns:
566: ** the cost to hash the relation.
567: **
568: ** Side Effects:
569: ** none
570: **
571: ** Called By:
572: ** rangrf
573: */
574:
575: # define COREBUFSIZE 32767 / PGSIZE
576: # define MERGESIZE 7
577:
578: long
579: hashcost(npages)
580: long npages;
581: {
582: long sortpages, total;
583: register int nfiles;
584:
585: nfiles = npages / COREBUFSIZE;
586: sortpages = 2 * npages;
587:
588: while (nfiles > 1)
589: {
590: nfiles = (nfiles + MERGESIZE - 1) / MERGESIZE;
591: sortpages += 2 * npages;
592: }
593:
594: total = sortpages + npages + npages + npages;
595: # ifdef xDTR1
596: if (tTf(39, 5))
597: printf("hashcost is %ld\n", total);
598: # endif
599:
600: return (total);
601: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.