|
|
1.1 ! root 1: # include <ingres.h> ! 2: # include <catalog.h> ! 3: # include <aux.h> ! 4: # include <tree.h> ! 5: # include <symbol.h> ! 6: # include <pv.h> ! 7: # include "globs.h" ! 8: # include <access.h> ! 9: # include <sccs.h> ! 10: ! 11: SCCSID(@(#)reformat.c 7.1 2/5/81) ! 12: ! 13: /* ! 14: ** REFORMAT.C -- "reformat" module of DECOMPOSITION ! 15: ** ! 16: ** reformat() examines each relation which will be ! 17: ** involved in a one variable subquery to see if it ! 18: ** is cost effective to modify the relation to a more ! 19: ** usefull structure. Included in this file are all the ! 20: ** routines associated with reformat: ! 21: ** ! 22: ** reformat -- reformats each relation if cost effective ! 23: ** ! 24: ** findlinks -- determines what one variable clauses (if any) ! 25: ** are associated with the current variable ! 26: ** and the substitution variable. ! 27: ** ! 28: ** rangrf -- decides whether to actually reformat and does it. ! 29: ** ! 30: ** primrf -- performs a projection on a user relation ! 31: ** ! 32: ** ckpkey -- determines if relation is already structured usefully. ! 33: ** ! 34: ** findwid -- determines width of target list. ! 35: ** ! 36: ** rel_tup -- returns how many tuples fit on one page ! 37: */ ! 38: /* ! 39: ** Reformat -- Examine each variable to see if it should be reformated. ! 40: ** ! 41: ** The algorithm is: ! 42: ** (1) Find all variables for which, after tuple substitution, ! 43: ** will have one variable equality clauses on them. ! 44: ** (2) Estimate how much it will cost to execute them. ! 45: ** (3) Ignore those relations which are already keyed usefully. ! 46: ** (4) If it is a user relation, determine if it will be less costly ! 47: ** to first project (and possibly) modify the relation. ! 48: ** (5) If it is a _SYS relation, determine if it will be less costly ! 49: ** to modify the relation to hash on the linking domains. ! 50: */ ! 51: ! 52: reformat(var, sqlist, locrang, buf, tree) ! 53: int var; ! 54: QTREE *sqlist[]; ! 55: int locrang[]; ! 56: char buf[]; ! 57: QTREE *tree; ! 58: { ! 59: register QTREE *sq; ! 60: register char *lmap; ! 61: register int mainvar; ! 62: int i, j; ! 63: char linkmap[MAXDOM]; ! 64: ! 65: # ifdef xDTR1 ! 66: if (tTf(39, -1)) ! 67: printf("REFORMAT: subvar=%d\n", var); ! 68: # endif ! 69: ! 70: /* if the main tree is single var then put it in sq list */ ! 71: mainvar = -1; ! 72: if (tree->sym.value.sym_root.tvarc == 1) ! 73: { ! 74: mainvar = bitpos(tree->sym.value.sym_root.lvarm | tree->sym.value.sym_root.rvarm); ! 75: if (sqlist[mainvar] != 0) ! 76: syserr("help reformat"); ! 77: sqlist[mainvar] = tree; ! 78: # ifdef xDTR1 ! 79: if (tTf(39, 0)) ! 80: printf("including var %d\n", mainvar); ! 81: # endif ! 82: } ! 83: for(i = 0; i < MAXRANGE; i++) ! 84: if (sq = sqlist[i]) ! 85: { ! 86: # ifdef xDTR1 ! 87: if (tTf(39, 0)) ! 88: printf("Sqlist[%d]:\n", i); ! 89: # endif ! 90: lmap = linkmap; ! 91: for (j = 0; j < MAXDOM; j++) ! 92: *lmap++ = 0; ! 93: if (findlinks(sq->right, var, i, linkmap)) ! 94: { ! 95: # ifdef xDTR1 ! 96: if (tTf(39, 1)) ! 97: prlinks("Findlinks found", linkmap); ! 98: # endif ! 99: rangrf(i, var, sq, buf, linkmap, tree, locrang); ! 100: } ! 101: } ! 102: if (mainvar >= 0) ! 103: sqlist[mainvar] = 0; ! 104: } ! 105: /* ! 106: ** Findlinks -- Determine whether there are any equalify clauses ! 107: ** between the "linkv" and the variable selected for tuple ! 108: ** substitution "selv". ! 109: ** ! 110: ** Return: ! 111: ** TRUE if there is a linking variable ! 112: ** FALSE if not ! 113: ** ! 114: ** Side Effects: ! 115: ** The linkmap is set to 1 for each linking domains. ! 116: ** ie. if domains 3 is a linking domains then ! 117: ** linkmap[3] = 1. ! 118: */ ! 119: ! 120: findlinks(node, selv, linkv, linkmap) ! 121: QTREE *node; ! 122: int selv, linkv; ! 123: char linkmap[]; ! 124: { ! 125: register QTREE *b, *a; ! 126: register int s; ! 127: QTREE *temp; ! 128: extern QTREE *ckvar(); ! 129: ! 130: a = node; ! 131: # ifdef xDTR1 ! 132: if (tTf(39, 2)) ! 133: { ! 134: printf("FINDLINKS:"); ! 135: nodepr(a); ! 136: } ! 137: # endif ! 138: if (a->sym.type == QLEND) ! 139: return (0); ! 140: s = findlinks(a->right, selv, linkv, linkmap); ! 141: ! 142: /* ! 143: ** This mess is looking for a clause of the form: ! 144: ** EQ ! 145: ** Var Var ! 146: ** linkv selv ! 147: ** Where the VARS can be in either order ! 148: */ ! 149: if ((b = a->left)->sym.type != BOP || b->sym.value.sym_op.opno!= opEQ || ! 150: b->left->sym.type!=VAR || b->right->sym.type!=VAR) ! 151: return (s); ! 152: ! 153: a = ckvar(b->left); ! 154: b = ckvar(b->right); ! 155: if (b->sym.value.sym_var.varno == linkv) ! 156: { ! 157: temp = a; ! 158: a = b; ! 159: b = temp; ! 160: } ! 161: if (a->sym.value.sym_var.varno != linkv || (selv >= 0 && b->sym.value.sym_var.varno != selv)) ! 162: return (s); ! 163: ! 164: linkmap[a->sym.value.sym_var.attno] = 1; ! 165: # ifdef xDTR1 ! 166: if (tTf(39, 3)) ! 167: printf("found:attno=%d\n", a->sym.value.sym_var.attno); ! 168: # endif ! 169: return (1); ! 170: } ! 171: /* ! 172: ** Rangrf -- reformat the variable "var" if it is cost effective. ! 173: ** ! 174: ** Rangrf does the actual work of reformat. If the relation is ! 175: ** already keyed usefully then rangrf does nothing. ! 176: ** Otherwise rangrf is split into two decisions: ! 177: ** A user relation must first be projected and then modified; ! 178: ** a _SYS relation can be modified directly. ! 179: ** ! 180: ** It may be cost effective to just project a user relation. ! 181: */ ! 182: ! 183: /*ARGSUSED*/ ! 184: rangrf(var, substvar, sq, buf, linkmap, tree, locrang) ! 185: int var, substvar; ! 186: QTREE *sq; ! 187: char buf[]; ! 188: char linkmap[]; ! 189: QTREE *tree; ! 190: int locrang[]; ! 191: { ! 192: register struct rang_tab *r, *rs; ! 193: register int j; ! 194: char nums[2]; ! 195: int i, newwid; ! 196: QTREE *pq; ! 197: long npages, newpages; ! 198: long origcost, modcost, projcost; ! 199: extern char *rangename(); ! 200: extern long rel_pages(), hashcost(); ! 201: extern QTREE *makroot(); ! 202: extern DESC *openr1(); ! 203: extern QTREE **mksqlist(); ! 204: ! 205: r = &De.de_rangev[var]; ! 206: rs = &De.de_rangev[substvar]; ! 207: npages = rel_pages(r->rtcnt, r->rtwid); ! 208: ! 209: /* calculate original cost of substitution */ ! 210: ! 211: origcost = rs->rtcnt * npages; ! 212: ! 213: # ifdef xDTR1 ! 214: if (tTf(39, 5)) ! 215: printf("eval of mod for var %d. orig cost=%ld\n", var, origcost); ! 216: # endif ! 217: ! 218: /* if relation is already keyed usefully, just return */ ! 219: if (i = ckpkey(linkmap, var)) ! 220: { ! 221: # ifdef xDTR1 ! 222: if (tTf(39, 4)) ! 223: printf("already keyed ok %d\n", i); ! 224: # endif ! 225: return; ! 226: } ! 227: ! 228: /* if this is a primary relation, a projection must be done ! 229: ** before any modify can be performed */ ! 230: ! 231: if (!rnum_temp(r->relnum)) ! 232: { ! 233: /* evaluation for primary, user relation */ ! 234: ! 235: /* to save some time, don't evaluate if origcost is very small */ ! 236: if (origcost < OVHDP + 1 + npages) ! 237: return; ! 238: ! 239: j = markbuf(buf); ! 240: ! 241: /* build a projection tree but don't actually create the projection */ ! 242: pq = makroot(buf); ! 243: dfind(sq, buf, mksqlist(pq, var)); ! 244: ! 245: newwid = findwid(pq); ! 246: newpages = rel_pages(r->rtcnt, newwid); ! 247: ! 248: /* ! 249: ** Calculate cost of projection + new cost of substitution ! 250: ** projcost = readoldpages + writenewpages + runquery + overhead ! 251: */ ! 252: ! 253: projcost = npages + newpages + rs->rtcnt * newpages + OVHDP; ! 254: ! 255: ! 256: /* CALCULATE COST OF MODIFY = COST OF PROJECTION + COST OF MODIFY ! 257: * AND NEW COST OF SUBSTITUTION AFTER MODIFY */ ! 258: ! 259: modcost = (npages + newpages) + ! 260: hashcost(newpages) + ! 261: rs->rtcnt + ! 262: OVHDP + OVHDM; ! 263: ! 264: # ifdef xDTR1 ! 265: if (tTf(39, 5)) ! 266: { ! 267: printf("primary rel: proj cost=%ld\t", projcost); ! 268: printf("mod cost=%ld\n", modcost); ! 269: } ! 270: # endif ! 271: ! 272: if (origcost <= modcost) ! 273: if (origcost <= projcost) ! 274: { ! 275: freebuf(buf, j); ! 276: return; ! 277: } ! 278: ! 279: # ifdef xDTR1 ! 280: if (tTf(39, 5)) ! 281: printf("doing projection\n"); ! 282: # endif ! 283: ! 284: /* i will be TRUE if the proj was aborted */ ! 285: i = primrf(var, pq, locrang); ! 286: freebuf(buf, j); ! 287: if ((projcost <= modcost) || i) ! 288: return; ! 289: } ! 290: ! 291: /* IF THIS IS A TEMPORARY RELATION, A MODIFY CAN BE DONE DIRECTLY */ ! 292: ! 293: else ! 294: { ! 295: ! 296: /* CALCULATE MODIFY COST (which does not include a projection) ! 297: * AND NEW COST OF SUBSTITUTION */ ! 298: ! 299: modcost = hashcost(npages) ! 300: + rs->rtcnt + OVHDM; ! 301: ! 302: # ifdef xDTR1 ! 303: if (tTf(39, 5)) ! 304: printf("temp rel: mod cost=%ld\n", modcost); ! 305: # endif ! 306: ! 307: if (origcost <= modcost) ! 308: return; ! 309: } ! 310: ! 311: # ifdef xDTR1 ! 312: if (tTf(39, 5)) ! 313: printf("doing modify\n"); ! 314: # endif ! 315: ! 316: initp(); ! 317: setp(PV_STR, rangename(var)); ! 318: setp(PV_STR, "hash"); /* modify to hash */ ! 319: setp(PV_STR, "num"); /* passing domain numbers - not names */ ! 320: ! 321: nums[1] = '\0'; ! 322: for (j = 0; j < MAXDOM; j++) ! 323: if (linkmap[j]) ! 324: { ! 325: *nums = j; ! 326: setp(PV_STR, nums); ! 327: } ! 328: ! 329: /* fill relation completely */ ! 330: setp(PV_STR, ""); ! 331: setp(PV_STR, "fillfactor"); ! 332: setp(PV_STR, "100"); ! 333: setp(PV_STR, "minpages"); ! 334: setp(PV_STR, "1"); ! 335: closer1(var); ! 336: call_dbu(mdMODIFY, FALSE); ! 337: ! 338: /* re-open modified range to get accurate descriptor */ ! 339: openr1(var); ! 340: } ! 341: /* ! 342: ** Primrf -- Replace a user relation with a projection of the needed domains. ! 343: ** ! 344: ** Primrf retrieves into a temporary relation, the domains ! 345: ** specified by the "pq" tree. The range table is updated to ! 346: ** reflect the new range. ! 347: ** ! 348: ** In fact a projection is not an accurate way to describe what ! 349: ** happens. In order to avoid changing any attribute numbers in ! 350: ** the query, the needed domains are projected and the domains ! 351: ** inbetween are created as type "c0". This way they occupy ! 352: ** no space and the attribute numbering is unchanged. Of course, ! 353: ** any attributes beyond the last one needed are simply discarded. ! 354: ** ! 355: ** In previous versions attempts were made to project only the needed ! 356: ** domains and to renumber the query tree. This proved to be very ! 357: ** expensive and difficult and was not always as optimal as this ! 358: ** method. ! 359: ** ! 360: ** The routines newquery() and endquery() will undo the effects ! 361: ** of primrf and restore the range table back to its original state. ! 362: */ ! 363: ! 364: primrf(var, pq, locrang) ! 365: int var; ! 366: QTREE *pq; ! 367: int locrang[]; ! 368: { ! 369: register QTREE *q, **np; ! 370: register int i; ! 371: int maxresno, rnum; ! 372: QTREE *node1[MAXDOM+1], *node2[MAXDOM+1]; ! 373: static char czero[QT_HDR_SIZ + sizeof (struct resdomnode)]; /* a dummy resdom */ ! 374: extern DESC *openr1(); ! 375: extern char *rnum_convert(); ! 376: ! 377: # ifdef xDTR1 ! 378: if (tTf(39, 6)) ! 379: printf("PRIMRF:\n"); ! 380: # endif ! 381: ! 382: /* renumber RESDOMs to match their VARs */ ! 383: for (q = pq->left; q->sym.type != TREE; q = q->left) ! 384: q->sym.value.sym_resdom.resno = q->right->sym.value.sym_var.attno; ! 385: ! 386: /* form list of RESDOMs from outermost inward */ ! 387: node1[lnode(pq->left, node1, 0)] = 0; ! 388: ! 389: /* form a dummy RESDOM with type CHAR and length 0 */ ! 390: q = (QTREE *) czero; ! 391: q->sym.value.sym_resdom.resfrmt = CHAR; ! 392: q->sym.value.sym_resdom.resfrml = 0; ! 393: ! 394: /* zero node2 */ ! 395: for (np = node2, i = 0; i < MAXDOM + 1; i++) ! 396: *np++ = 0; ! 397: ! 398: /* sort RESDOMs into node2 */ ! 399: maxresno = 0; ! 400: for (np = node1; q = *np++; ) ! 401: { ! 402: if ((i = q->sym.value.sym_resdom.resno) == 0) ! 403: return (1); /* abort. Tid is referenced */ ! 404: if (i > maxresno) ! 405: maxresno = i; ! 406: node2[i-1] = q; ! 407: } ! 408: ! 409: /* fill missing RESDOMs with czero */ ! 410: for (np = node2, i = 0; i < maxresno; i++, np++) ! 411: if (*np == 0) ! 412: *np = (QTREE *) czero; ! 413: ! 414: ! 415: /* set up params for the create */ ! 416: initp(); ! 417: setp(PV_STR, "0"); /* initial relstat field */ ! 418: rnum = rnum_alloc(); ! 419: setp(PV_STR, rnum_convert(rnum)); ! 420: domnam(node2, "f"); ! 421: call_dbu(mdCREATE, FALSE); ! 422: ! 423: /* now run projection */ ! 424: i = var; ! 425: execsq1(pq, i, rnum); ! 426: new_range(i, rnum); ! 427: /* save the name of the new relation */ ! 428: savrang(locrang, i); ! 429: openr1(i); ! 430: return (0); ! 431: } ! 432: /* ! 433: ** Ckpkey -- determine if a relation is already keyed usefully. ! 434: ** ! 435: ** Ckpkey gets the key structure from paramd() and compares ! 436: ** it to the linkmap. If the relation is ISAM and the first keyed ! 437: ** domain is in linkmap, or if it is HASH and all keyed domains ! 438: ** are in the linkmap, then ckpkey() returns >0, else ! 439: ** ckpkey looks for secondary indexes which are usable. ! 440: ** if none, then ckpkey() returns 0. ! 441: ** ! 442: ** The value returned is an estimate of the number of ! 443: ** pages which must be read to satisfy a query given ! 444: ** equality clauses on the linkmap domains and the ! 445: ** structure of the relation. The (crude) estimates are: ! 446: ** hash 1 page ! 447: ** isam 2 pages ! 448: ** hash sec index 2 pages ! 449: ** isam sec index 3 pages ! 450: */ ! 451: ! 452: ckpkey(linkmap, var) ! 453: char linkmap[]; ! 454: int var; ! 455: { ! 456: register DESC *d; ! 457: register int i; ! 458: struct index itup; ! 459: struct accessparam ap; ! 460: TID lotid, hitid; ! 461: extern DESC *readopen(), *openr1(), Inddes; ! 462: ! 463: ! 464: # ifdef xDTR1 ! 465: if (tTf(39, 11)) ! 466: printf("CKPKEY:%s\n", rangename(var)); ! 467: # endif ! 468: ! 469: /* if relation is an unindexed heap, then it cannot be keyed usefully */ ! 470: d = openr1(var); ! 471: if (d->reldum.relspec != M_HEAP || d->reldum.relindxd > 0) ! 472: { ! 473: d = readopen(var); /* open relation if it hasn't been already */ ! 474: paramd(d, &ap); ! 475: if (ckpkey1(linkmap, &ap) == 0) ! 476: return (ap.mode == EXACTKEY ? 1 : 2); /* success */ ! 477: ! 478: if (d->reldum.relindxd > 0) ! 479: { ! 480: opencatalog("indexes", 0); ! 481: setkey(&Inddes, (char *)&itup, d->reldum.relid, IRELIDP); ! 482: setkey(&Inddes, (char *)&itup, d->reldum.relowner, IOWNERP); ! 483: if (i = find(&Inddes, EXACTKEY, &lotid, &hitid, (char *)&itup)) ! 484: syserr("ckpkey:find %d", i); ! 485: ! 486: while ((i = get(&Inddes, &lotid, &hitid, (char *)&itup, TRUE)) == 0) ! 487: { ! 488: if (!bequal(itup.irelidp, d->reldum.relid, MAXNAME + 2)) ! 489: continue; ! 490: ! 491: parami(&itup, &ap); ! 492: if (ckpkey1(linkmap, &ap) == 0) ! 493: return (ap.mode == EXACTKEY ? 2 : 3); /* success */ ! 494: } ! 495: } ! 496: } ! 497: return (0); /* failure. no useful structure */ ! 498: } ! 499: ! 500: ! 501: ! 502: ckpkey1(linkmap, ap) ! 503: char linkmap[]; ! 504: struct accessparam *ap; ! 505: { ! 506: register int i, k, anykey; ! 507: ! 508: if (ap->mode == NOKEY) ! 509: return (1); ! 510: anykey = 0; ! 511: for (i = 0; k = ap->keydno[i]; i++) ! 512: { ! 513: if (linkmap[k] == 0) ! 514: { ! 515: if (ap->mode == EXACTKEY) ! 516: return (1); ! 517: else ! 518: break; ! 519: } ! 520: anykey++; ! 521: } ! 522: return (!anykey); ! 523: } ! 524: /* ! 525: ** Findwid -- scan the target list of the projection tree ! 526: ** to determine the resultant tuple size. ! 527: ** ! 528: ** Return: ! 529: ** Size of projected tuple. ! 530: */ ! 531: ! 532: findwid(tree) ! 533: QTREE *tree; ! 534: { ! 535: register QTREE *nod, *t; ! 536: register int wid; ! 537: ! 538: wid = 0; ! 539: t = tree; ! 540: ! 541: for (nod = t->left; nod && nod->sym.type != TREE; nod = nod->left) ! 542: { ! 543: wid += nod->sym.value.sym_var.varfrml & 0377; ! 544: } ! 545: ! 546: return (wid); ! 547: } ! 548: /* ! 549: ** HASHCOST -- determine cost to modify to hash ! 550: ** ! 551: ** Estimates the cost to modify the relation to hash. ! 552: ** The estimate is crude since it assumes that there ! 553: ** are no duplicates and that a "unix" page will be ! 554: ** the same size as an "ingres" page. ! 555: ** ! 556: ** The cost is based on the parameters which signify ! 557: ** the size of the in-core sort buffer and the n-way ! 558: ** merge sort plus the cost to read the sorted file ! 559: ** and write the new relation twice (that's the was it works!). ! 560: ** ! 561: ** Parameters: ! 562: ** npages - the number of pages (estimate) which the ! 563: ** relation currently occupies. ! 564: ** ! 565: ** Returns: ! 566: ** the cost to hash the relation. ! 567: ** ! 568: ** Side Effects: ! 569: ** none ! 570: ** ! 571: ** Called By: ! 572: ** rangrf ! 573: */ ! 574: ! 575: # define COREBUFSIZE 32767 / PGSIZE ! 576: # define MERGESIZE 7 ! 577: ! 578: long ! 579: hashcost(npages) ! 580: long npages; ! 581: { ! 582: long sortpages, total; ! 583: register int nfiles; ! 584: ! 585: nfiles = npages / COREBUFSIZE; ! 586: sortpages = 2 * npages; ! 587: ! 588: while (nfiles > 1) ! 589: { ! 590: nfiles = (nfiles + MERGESIZE - 1) / MERGESIZE; ! 591: sortpages += 2 * npages; ! 592: } ! 593: ! 594: total = sortpages + npages + npages + npages; ! 595: # ifdef xDTR1 ! 596: if (tTf(39, 5)) ! 597: printf("hashcost is %ld\n", total); ! 598: # endif ! 599: ! 600: return (total); ! 601: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.