Annotation of 42BSD/ingres/source/decomp/reformat.c, revision 1.1.1.1

1.1       root        1: # include      <ingres.h>
                      2: # include      <catalog.h>
                      3: # include      <aux.h>
                      4: # include      <tree.h>
                      5: # include      <symbol.h>
                      6: # include      <pv.h>
                      7: # include      "globs.h"
                      8: # include      <access.h>
                      9: # include      <sccs.h>
                     10: 
                     11: SCCSID(@(#)reformat.c  7.1     2/5/81)
                     12: 
                     13: /*
                     14: **     REFORMAT.C -- "reformat" module of DECOMPOSITION
                     15: **
                     16: **     reformat() examines each relation which will be
                     17: **     involved in a one variable subquery to see if it
                     18: **     is cost effective to modify the relation to a more
                     19: **     usefull structure. Included in this file are all the
                     20: **     routines associated with reformat:
                     21: **
                     22: **             reformat -- reformats each relation if cost effective
                     23: **
                     24: **             findlinks -- determines what one variable clauses (if any)
                     25: **                             are associated with the current variable
                     26: **                             and the substitution variable.
                     27: **
                     28: **             rangrf    -- decides whether to actually reformat and does it.
                     29: **
                     30: **             primrf    -- performs a projection on a user relation
                     31: **
                     32: **             ckpkey    -- determines if relation is already structured usefully.
                     33: **
                     34: **             findwid   -- determines width of target list.
                     35: **
                     36: **             rel_tup   -- returns how many tuples fit on one page
                     37: */
                     38: /*
                     39: ** Reformat -- Examine each variable to see if it should be reformated.
                     40: **
                     41: **     The algorithm is:
                     42: **     (1) Find all variables for which, after tuple substitution,
                     43: **         will have one variable equality clauses on them.
                     44: **     (2) Estimate how much it will cost to execute them.
                     45: **     (3) Ignore those relations which are already keyed usefully.
                     46: **     (4) If it is a user relation, determine if it will be less costly
                     47: **         to first project (and possibly) modify the relation.
                     48: **     (5) If it is a _SYS relation, determine if it will be less costly
                     49: **         to modify the relation to hash on the linking domains.
                     50: */
                     51: 
                     52: reformat(var, sqlist, locrang, buf, tree)
                     53: int    var;
                     54: QTREE  *sqlist[];
                     55: int    locrang[];
                     56: char   buf[];
                     57: QTREE  *tree;
                     58: {
                     59:        register QTREE  *sq;
                     60:        register char   *lmap;
                     61:        register int    mainvar;
                     62:        int             i, j;
                     63:        char            linkmap[MAXDOM];
                     64: 
                     65: #      ifdef xDTR1
                     66:        if (tTf(39, -1))
                     67:                printf("REFORMAT: subvar=%d\n", var);
                     68: #      endif
                     69: 
                     70:        /* if the main tree is single var then put it in sq list */
                     71:        mainvar = -1;
                     72:        if (tree->sym.value.sym_root.tvarc == 1)
                     73:        {
                     74:                mainvar = bitpos(tree->sym.value.sym_root.lvarm | tree->sym.value.sym_root.rvarm);
                     75:                if (sqlist[mainvar] != 0)
                     76:                        syserr("help reformat");
                     77:                sqlist[mainvar] = tree;
                     78: #              ifdef xDTR1
                     79:                if (tTf(39, 0))
                     80:                        printf("including var %d\n", mainvar);
                     81: #              endif
                     82:        }
                     83:        for(i = 0; i < MAXRANGE; i++)
                     84:                if (sq = sqlist[i])
                     85:                {
                     86: #                      ifdef xDTR1
                     87:                        if (tTf(39, 0))
                     88:                                printf("Sqlist[%d]:\n", i);
                     89: #                      endif
                     90:                        lmap = linkmap;
                     91:                        for (j = 0; j < MAXDOM; j++)
                     92:                                *lmap++ = 0;
                     93:                        if (findlinks(sq->right, var, i, linkmap))
                     94:                        {
                     95: #                              ifdef xDTR1
                     96:                                if (tTf(39, 1))
                     97:                                        prlinks("Findlinks found", linkmap);
                     98: #                              endif
                     99:                                rangrf(i, var, sq, buf, linkmap, tree, locrang);
                    100:                        }
                    101:                }
                    102:        if (mainvar >= 0)
                    103:                sqlist[mainvar] = 0;
                    104: }
                    105: /*
                    106: ** Findlinks -- Determine whether there are any equalify clauses
                    107: **     between the "linkv" and the variable selected for tuple
                    108: **     substitution "selv".
                    109: **
                    110: **     Return:
                    111: **             TRUE if there is a linking variable
                    112: **             FALSE if not
                    113: **
                    114: **     Side Effects:
                    115: **             The linkmap is set to 1 for each linking domains.
                    116: **             ie. if domains 3 is a linking domains then
                    117: **             linkmap[3] = 1.
                    118: */
                    119: 
                    120: findlinks(node, selv, linkv, linkmap)
                    121: QTREE  *node;
                    122: int    selv, linkv;
                    123: char   linkmap[];
                    124: {
                    125:        register QTREE  *b, *a;
                    126:        register int    s;
                    127:        QTREE           *temp;
                    128:        extern QTREE    *ckvar();
                    129: 
                    130:        a = node;
                    131: #      ifdef xDTR1
                    132:        if (tTf(39, 2))
                    133:        {
                    134:                printf("FINDLINKS:");
                    135:                nodepr(a);
                    136:        }
                    137: #      endif
                    138:        if (a->sym.type == QLEND) 
                    139:                return (0);
                    140:        s = findlinks(a->right, selv, linkv, linkmap);
                    141: 
                    142:        /*
                    143:        ** This mess is looking for a clause of the form:
                    144:        **              EQ
                    145:        **      Var             Var
                    146:        **      linkv           selv
                    147:        ** Where the VARS can be in either order
                    148:        */
                    149:        if ((b = a->left)->sym.type != BOP || b->sym.value.sym_op.opno!= opEQ ||
                    150:                b->left->sym.type!=VAR || b->right->sym.type!=VAR)
                    151:                        return (s);
                    152: 
                    153:        a = ckvar(b->left);
                    154:        b = ckvar(b->right);
                    155:        if (b->sym.value.sym_var.varno == linkv) 
                    156:        {
                    157:                temp = a;
                    158:                a = b;
                    159:                b = temp;
                    160:        }
                    161:        if (a->sym.value.sym_var.varno != linkv || (selv >= 0 && b->sym.value.sym_var.varno != selv))
                    162:                return (s);
                    163: 
                    164:        linkmap[a->sym.value.sym_var.attno] = 1;
                    165: #      ifdef xDTR1
                    166:        if (tTf(39, 3))
                    167:                printf("found:attno=%d\n", a->sym.value.sym_var.attno);
                    168: #      endif
                    169:        return (1);
                    170: }
                    171: /*
                    172: ** Rangrf -- reformat the variable "var" if it is cost effective.
                    173: **
                    174: **     Rangrf does the actual work of reformat. If the relation is
                    175: **     already keyed usefully then rangrf does nothing.
                    176: **     Otherwise rangrf is split into two decisions:
                    177: **     A user relation must first be projected and then modified;
                    178: **     a _SYS relation can be modified directly.
                    179: **
                    180: **     It may be cost effective to just project a user relation.
                    181: */
                    182: 
                    183: /*ARGSUSED*/
                    184: rangrf(var, substvar, sq, buf, linkmap, tree, locrang)
                    185: int    var, substvar;
                    186: QTREE  *sq;
                    187: char   buf[];
                    188: char   linkmap[];
                    189: QTREE  *tree;
                    190: int    locrang[];
                    191: {
                    192:        register struct rang_tab        *r, *rs;
                    193:        register int                    j;
                    194:        char                            nums[2];
                    195:        int                             i, newwid;
                    196:        QTREE                           *pq;
                    197:        long                            npages, newpages;
                    198:        long                            origcost, modcost, projcost;
                    199:        extern char                     *rangename();
                    200:        extern long                     rel_pages(), hashcost();
                    201:        extern QTREE                    *makroot();
                    202:        extern DESC                     *openr1();
                    203:        extern QTREE                    **mksqlist();
                    204: 
                    205:        r = &De.de_rangev[var];
                    206:        rs = &De.de_rangev[substvar];
                    207:        npages = rel_pages(r->rtcnt, r->rtwid);
                    208: 
                    209:        /* calculate original cost of substitution */
                    210: 
                    211:        origcost = rs->rtcnt * npages;
                    212: 
                    213: #      ifdef xDTR1
                    214:        if (tTf(39, 5))
                    215:                printf("eval of mod for var %d. orig cost=%ld\n", var, origcost);
                    216: #      endif
                    217: 
                    218:        /* if relation is already keyed usefully, just return */
                    219:        if (i = ckpkey(linkmap, var))
                    220:        {
                    221: #              ifdef xDTR1
                    222:                if (tTf(39, 4))
                    223:                        printf("already keyed ok %d\n", i);
                    224: #              endif
                    225:                return;
                    226:        }
                    227: 
                    228:        /* if this is a primary relation, a projection must be done
                    229:        ** before any modify can be performed */
                    230: 
                    231:        if (!rnum_temp(r->relnum))
                    232:        {
                    233:                /* evaluation for primary, user relation */
                    234: 
                    235:                /* to save some time, don't evaluate if origcost is very small */
                    236:                if (origcost < OVHDP + 1 + npages)
                    237:                        return;
                    238: 
                    239:                j = markbuf(buf);
                    240: 
                    241:                /* build a projection tree but don't actually create the projection */
                    242:                pq = makroot(buf);
                    243:                dfind(sq, buf, mksqlist(pq, var));
                    244: 
                    245:                newwid = findwid(pq);
                    246:                newpages = rel_pages(r->rtcnt, newwid);
                    247: 
                    248:                /*
                    249:                ** Calculate cost of projection + new cost of substitution
                    250:                ** projcost = readoldpages + writenewpages + runquery + overhead
                    251:                */
                    252: 
                    253:                projcost = npages + newpages + rs->rtcnt * newpages + OVHDP;
                    254: 
                    255: 
                    256:                /*  CALCULATE COST OF MODIFY = COST OF PROJECTION + COST OF MODIFY
                    257:                 *      AND NEW COST OF SUBSTITUTION AFTER MODIFY    */
                    258: 
                    259:                modcost = (npages + newpages) +
                    260:                                hashcost(newpages) +
                    261:                                rs->rtcnt +
                    262:                                OVHDP + OVHDM;
                    263: 
                    264: #              ifdef xDTR1
                    265:                if (tTf(39, 5))
                    266:                {
                    267:                        printf("primary rel: proj cost=%ld\t", projcost);
                    268:                        printf("mod cost=%ld\n", modcost);
                    269:                }
                    270: #              endif
                    271: 
                    272:                if (origcost <= modcost)
                    273:                        if (origcost <= projcost)
                    274:                        {
                    275:                                freebuf(buf, j);
                    276:                                return;
                    277:                        }
                    278: 
                    279: #              ifdef xDTR1
                    280:                if (tTf(39, 5))
                    281:                        printf("doing projection\n");
                    282: #              endif
                    283: 
                    284:                /* i will be TRUE if the proj was aborted */
                    285:                i = primrf(var, pq, locrang);
                    286:                freebuf(buf, j);
                    287:                if ((projcost <= modcost) || i)
                    288:                        return;
                    289:        }
                    290: 
                    291:        /*  IF THIS IS A TEMPORARY RELATION, A MODIFY CAN BE DONE DIRECTLY  */
                    292: 
                    293:        else
                    294:        {
                    295: 
                    296:                /*  CALCULATE MODIFY COST (which does not include a projection)
                    297:                 *  AND NEW COST OF SUBSTITUTION                                  */
                    298: 
                    299:                modcost = hashcost(npages)
                    300:                          + rs->rtcnt + OVHDM;
                    301: 
                    302: #              ifdef xDTR1
                    303:                if (tTf(39, 5))
                    304:                        printf("temp rel: mod cost=%ld\n", modcost);
                    305: #              endif
                    306: 
                    307:                if (origcost <= modcost)
                    308:                        return;
                    309:        }
                    310: 
                    311: #      ifdef xDTR1
                    312:        if (tTf(39, 5))
                    313:                printf("doing modify\n");
                    314: #      endif
                    315: 
                    316:        initp();
                    317:        setp(PV_STR, rangename(var));
                    318:        setp(PV_STR, "hash");   /* modify to hash */
                    319:        setp(PV_STR, "num");    /* passing domain numbers - not names */
                    320: 
                    321:        nums[1] = '\0';
                    322:        for (j = 0; j < MAXDOM; j++)
                    323:                if (linkmap[j])
                    324:                {
                    325:                        *nums = j;
                    326:                        setp(PV_STR, nums);
                    327:                }
                    328: 
                    329:        /* fill relation completely */
                    330:        setp(PV_STR, "");
                    331:        setp(PV_STR, "fillfactor");
                    332:        setp(PV_STR, "100");
                    333:        setp(PV_STR, "minpages");
                    334:        setp(PV_STR, "1");
                    335:        closer1(var);
                    336:        call_dbu(mdMODIFY, FALSE);
                    337: 
                    338:        /* re-open modified range to get accurate descriptor */
                    339:        openr1(var);
                    340: }
                    341: /*
                    342: ** Primrf -- Replace a user relation with a projection of the needed domains.
                    343: **
                    344: **     Primrf retrieves into a temporary relation, the domains
                    345: **     specified by the "pq" tree. The range table is updated to
                    346: **     reflect the new range.
                    347: **
                    348: **     In fact a projection is not an accurate way to describe what
                    349: **     happens. In order to avoid changing any attribute numbers in
                    350: **     the query, the needed domains are projected and the domains
                    351: **     inbetween are created as type "c0". This way they occupy
                    352: **     no space and the attribute numbering is unchanged. Of course,
                    353: **     any attributes beyond the last one needed are simply discarded.
                    354: **
                    355: **     In previous versions attempts were made to project only the needed
                    356: **     domains and to renumber the query tree. This proved to be very
                    357: **     expensive and difficult and was not always as optimal as this
                    358: **     method.
                    359: **
                    360: **     The routines newquery() and endquery() will undo the effects
                    361: **     of primrf and restore the range table back to its original state.
                    362: */
                    363: 
                    364: primrf(var, pq, locrang)
                    365: int    var;
                    366: QTREE  *pq;
                    367: int    locrang[];
                    368: {
                    369:        register QTREE  *q, **np;
                    370:        register int    i;
                    371:        int             maxresno, rnum;
                    372:        QTREE           *node1[MAXDOM+1], *node2[MAXDOM+1];
                    373:        static char     czero[QT_HDR_SIZ + sizeof (struct resdomnode)]; /* a dummy resdom */
                    374:        extern DESC     *openr1();
                    375:        extern char     *rnum_convert();
                    376: 
                    377: #      ifdef xDTR1
                    378:        if (tTf(39, 6))
                    379:                printf("PRIMRF:\n");
                    380: #      endif
                    381: 
                    382:        /* renumber RESDOMs to match their VARs */
                    383:        for (q = pq->left; q->sym.type != TREE; q = q->left)
                    384:                q->sym.value.sym_resdom.resno = q->right->sym.value.sym_var.attno;
                    385: 
                    386:        /* form list of RESDOMs from outermost inward */
                    387:        node1[lnode(pq->left, node1, 0)] = 0;
                    388: 
                    389:        /* form a dummy RESDOM with type CHAR and length 0 */
                    390:        q = (QTREE *) czero;
                    391:        q->sym.value.sym_resdom.resfrmt = CHAR;
                    392:        q->sym.value.sym_resdom.resfrml = 0;
                    393: 
                    394:        /* zero node2 */
                    395:        for (np = node2, i = 0; i < MAXDOM + 1; i++)
                    396:                *np++ = 0;
                    397: 
                    398:        /* sort RESDOMs into node2 */
                    399:        maxresno = 0;
                    400:        for (np = node1; q = *np++; )
                    401:        {
                    402:                if ((i = q->sym.value.sym_resdom.resno) == 0)
                    403:                        return (1);     /* abort. Tid is referenced */
                    404:                if (i > maxresno)
                    405:                        maxresno = i;
                    406:                node2[i-1] = q;
                    407:        }
                    408: 
                    409:        /* fill missing RESDOMs with czero */
                    410:        for (np = node2, i = 0; i < maxresno; i++, np++)
                    411:                if (*np == 0)
                    412:                        *np = (QTREE *) czero;
                    413: 
                    414: 
                    415:        /* set up params for the create */
                    416:        initp();
                    417:        setp(PV_STR, "0");      /* initial relstat field */
                    418:        rnum = rnum_alloc();
                    419:        setp(PV_STR, rnum_convert(rnum));
                    420:        domnam(node2, "f");
                    421:        call_dbu(mdCREATE, FALSE);
                    422: 
                    423:        /* now run projection */
                    424:        i = var;
                    425:        execsq1(pq, i, rnum);
                    426:        new_range(i, rnum);
                    427:        /* save the name of the new relation */
                    428:        savrang(locrang, i);
                    429:        openr1(i);
                    430:        return (0);
                    431: }
                    432: /*
                    433: ** Ckpkey -- determine if a relation is already keyed usefully.
                    434: **
                    435: **     Ckpkey gets the key structure from paramd() and compares
                    436: **     it to the linkmap. If the relation is ISAM and the first keyed
                    437: **     domain is in linkmap, or if it is HASH and all keyed domains
                    438: **     are in the linkmap, then ckpkey() returns >0, else
                    439: **     ckpkey looks for secondary indexes which are usable.
                    440: **     if none, then ckpkey() returns 0.
                    441: **
                    442: **     The value returned is an estimate of the number of
                    443: **     pages which must be read to satisfy a query given
                    444: **     equality clauses on the linkmap domains and the
                    445: **     structure of the relation. The (crude) estimates are:
                    446: **             hash    1 page
                    447: **             isam    2 pages
                    448: **             hash sec index  2 pages
                    449: **             isam sec index  3 pages
                    450: */
                    451: 
                    452: ckpkey(linkmap, var)
                    453: char   linkmap[];
                    454: int    var;
                    455: {
                    456:        register DESC           *d;
                    457:        register int            i;
                    458:        struct index            itup;
                    459:        struct accessparam      ap;
                    460:        TID                     lotid, hitid;
                    461:        extern DESC             *readopen(), *openr1(), Inddes;
                    462: 
                    463: 
                    464: #      ifdef  xDTR1
                    465:        if (tTf(39, 11))
                    466:                printf("CKPKEY:%s\n", rangename(var));
                    467: #      endif
                    468: 
                    469:        /* if relation is an unindexed heap, then it cannot be keyed usefully */
                    470:        d = openr1(var);
                    471:        if (d->reldum.relspec != M_HEAP || d->reldum.relindxd > 0)
                    472:        {
                    473:                d = readopen(var);      /* open relation if it hasn't been already */
                    474:                paramd(d, &ap);
                    475:                if (ckpkey1(linkmap, &ap) == 0)
                    476:                        return (ap.mode == EXACTKEY ? 1 : 2);   /* success */
                    477:                
                    478:                if (d->reldum.relindxd > 0)
                    479:                {
                    480:                        opencatalog("indexes", 0);
                    481:                        setkey(&Inddes, (char *)&itup, d->reldum.relid, IRELIDP);
                    482:                        setkey(&Inddes, (char *)&itup, d->reldum.relowner, IOWNERP);
                    483:                        if (i = find(&Inddes, EXACTKEY, &lotid, &hitid, (char *)&itup))
                    484:                                syserr("ckpkey:find %d", i);
                    485: 
                    486:                        while ((i = get(&Inddes, &lotid, &hitid, (char *)&itup, TRUE)) == 0)
                    487:                        {
                    488:                                if (!bequal(itup.irelidp, d->reldum.relid, MAXNAME + 2))
                    489:                                        continue;
                    490:                                
                    491:                                parami(&itup, &ap);
                    492:                                if (ckpkey1(linkmap, &ap) == 0)
                    493:                                        return (ap.mode == EXACTKEY ? 2 : 3);   /* success */
                    494:                        }
                    495:                }
                    496:        }
                    497:        return (0);     /* failure. no useful structure */
                    498: }
                    499: 
                    500: 
                    501: 
                    502: ckpkey1(linkmap, ap)
                    503: char                   linkmap[];
                    504: struct accessparam     *ap;
                    505: {
                    506:        register int    i, k, anykey;
                    507: 
                    508:        if (ap->mode == NOKEY)
                    509:                return (1);
                    510:        anykey = 0;
                    511:        for (i = 0; k = ap->keydno[i]; i++)
                    512:        {
                    513:                if (linkmap[k] == 0)
                    514:                {
                    515:                        if (ap->mode == EXACTKEY) 
                    516:                                return (1);
                    517:                        else 
                    518:                                break;
                    519:                }
                    520:                anykey++;
                    521:        }
                    522:        return (!anykey);
                    523: }
                    524: /*
                    525: ** Findwid -- scan the target list of the projection tree
                    526: **     to determine the resultant tuple size.
                    527: **
                    528: **     Return:
                    529: **             Size of projected tuple.
                    530: */
                    531: 
                    532: findwid(tree)
                    533: QTREE  *tree;
                    534: {
                    535:        register QTREE  *nod, *t;
                    536:        register int    wid;
                    537: 
                    538:        wid = 0;
                    539:        t = tree;
                    540: 
                    541:        for (nod = t->left; nod && nod->sym.type != TREE; nod = nod->left)
                    542:        {
                    543:                wid += nod->sym.value.sym_var.varfrml & 0377;
                    544:        }
                    545: 
                    546:        return (wid);
                    547: }
                    548: /*
                    549: **  HASHCOST -- determine cost to modify to hash
                    550: **
                    551: **     Estimates the cost to modify the relation to hash.
                    552: **     The estimate is crude since it assumes that there
                    553: **     are no duplicates and that a "unix" page will be
                    554: **     the same size as an "ingres" page.
                    555: **
                    556: **     The cost is based on the parameters which signify
                    557: **     the size of the in-core sort buffer and the n-way
                    558: **     merge sort plus the cost to read the sorted file
                    559: **     and write the new relation twice (that's the was it works!).
                    560: **
                    561: **     Parameters:
                    562: **             npages - the number of pages (estimate) which the
                    563: **                             relation currently occupies.
                    564: **
                    565: **     Returns:
                    566: **             the cost to hash the relation.
                    567: **
                    568: **     Side Effects:
                    569: **             none
                    570: **
                    571: **     Called By:
                    572: **             rangrf
                    573: */
                    574: 
                    575: # define       COREBUFSIZE     32767 / PGSIZE
                    576: # define       MERGESIZE       7
                    577: 
                    578: long
                    579: hashcost(npages)
                    580: long   npages;
                    581: {
                    582:        long            sortpages, total;
                    583:        register int    nfiles;
                    584: 
                    585:        nfiles = npages / COREBUFSIZE;
                    586:        sortpages = 2 * npages;
                    587: 
                    588:        while (nfiles > 1)
                    589:        {
                    590:                nfiles = (nfiles + MERGESIZE - 1) / MERGESIZE;
                    591:                sortpages += 2 * npages;
                    592:        }
                    593: 
                    594:        total = sortpages + npages + npages + npages;
                    595: #      ifdef xDTR1
                    596:        if (tTf(39, 5))
                    597:                printf("hashcost is %ld\n", total);
                    598: #      endif
                    599: 
                    600:        return (total);
                    601: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.