Annotation of GNUtools/bison/reduce.c, revision 1.1.1.1

1.1       root        1: /* Grammar reduction for Bison.
                      2:    Copyright (C) 1988, 1989 Free Software Foundation, Inc.
                      3: 
                      4: This file is part of Bison, the GNU Compiler Compiler.
                      5: 
                      6: Bison is free software; you can redistribute it and/or modify
                      7: it under the terms of the GNU General Public License as published by
                      8: the Free Software Foundation; either version 2, or (at your option)
                      9: any later version.
                     10: 
                     11: Bison is distributed in the hope that it will be useful,
                     12: but WITHOUT ANY WARRANTY; without even the implied warranty of
                     13: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     14: GNU General Public License for more details.
                     15: 
                     16: You should have received a copy of the GNU General Public License
                     17: along with Bison; see the file COPYING.  If not, write to
                     18: the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
                     19: 
                     20: 
                     21: /*
                     22:  * Reduce the grammar:  Find and eliminate unreachable terminals,
                     23:  * nonterminals, and productions.  David S. Bakin.
                     24:  */
                     25: 
                     26: /*
                     27:  * Don't eliminate unreachable terminals:  They may be used by the user's
                     28:  * parser.
                     29:  */
                     30: 
                     31: #include <stdio.h>
                     32: #include "system.h"
                     33: #include "files.h"
                     34: #include "gram.h"
                     35: #include "machine.h"
                     36: #include "new.h"
                     37: 
                     38: 
                     39: extern char   **tags;          /* reader.c */
                     40: extern int      verboseflag;   /* getargs.c */
                     41: static int      statisticsflag;        /* XXXXXXX */
                     42: 
                     43: #ifndef TRUE
                     44: #define TRUE   (1)
                     45: #define FALSE  (0)
                     46: #endif
                     47: typedef int bool;
                     48: typedef unsigned *BSet;
                     49: typedef short  *rule;
                     50: 
                     51: 
                     52: /*
                     53:  * N is set of all nonterminals which are not useless.  P is set of all rules
                     54:  * which have no useless nonterminals in their RHS.  V is the set of all
                     55:  * accessible symbols.
                     56:  */
                     57: 
                     58: static BSet     N, P, V, V1;
                     59: 
                     60: static int      nuseful_productions, nuseless_productions,
                     61:                 nuseful_nonterminals, nuseless_nonterminals;
                     62: 
                     63: 
                     64: static void useless_nonterminals();
                     65: static void inaccessable_symbols();
                     66: static void reduce_grammar_tables();
                     67: static void print_results();
                     68: static void print_notices();
                     69: void dump_grammar();
                     70: 
                     71: extern void fatals ();
                     72: 
                     73: 
                     74: bool
                     75: bits_equal (L, R, n)
                     76: BSet L;
                     77: BSet R;
                     78: int n;
                     79: {
                     80:   int i;
                     81: 
                     82:   for (i = n - 1; i >= 0; i--)
                     83:     if (L[i] != R[i])
                     84:       return FALSE;
                     85:   return TRUE;
                     86: }
                     87: 
                     88: 
                     89: int
                     90: nbits (i)
                     91: unsigned i;
                     92: {
                     93:   int count = 0;
                     94: 
                     95:   while (i != 0) {
                     96:     i ^= (i & -i);
                     97:     ++count;
                     98:   }
                     99:   return count;
                    100: }
                    101: 
                    102: 
                    103: int
                    104: bits_size (S, n)
                    105: BSet S;
                    106: int n;
                    107: {
                    108:   int i, count = 0;
                    109: 
                    110:   for (i = n - 1; i >= 0; i--)
                    111:     count += nbits(S[i]);
                    112:   return count;
                    113: }
                    114: 
                    115: void
                    116: reduce_grammar ()
                    117: {
                    118:   bool reduced;
                    119: 
                    120:   /* Allocate the global sets used to compute the reduced grammar */
                    121: 
                    122:   N = NEW2(WORDSIZE(nvars), unsigned);
                    123:   P = NEW2(WORDSIZE(nrules + 1), unsigned);
                    124:   V = NEW2(WORDSIZE(nsyms), unsigned);
                    125:   V1 = NEW2(WORDSIZE(nsyms), unsigned);
                    126: 
                    127:   useless_nonterminals();
                    128:   inaccessable_symbols();
                    129: 
                    130:   reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
                    131: 
                    132:   if (verboseflag)
                    133:     print_results();
                    134: 
                    135:   if (reduced == FALSE)
                    136:     goto done_reducing;
                    137: 
                    138:   print_notices();
                    139: 
                    140:   if (!BITISSET(N, start_symbol - ntokens))
                    141:     fatals("Start symbol %s does not derive any sentence.",
                    142:           tags[start_symbol]);
                    143: 
                    144:   reduce_grammar_tables();
                    145:   /* if (verboseflag) {
                    146:      fprintf(foutput, "REDUCED GRAMMAR\n\n");
                    147:      dump_grammar();
                    148:      }
                    149:      */
                    150: 
                    151:   /**/ statisticsflag = FALSE; /* someday getopts should handle this */
                    152:   if (statisticsflag == TRUE)
                    153:     fprintf(stderr,
                    154:            "reduced %s defines %d terminal%s, %d nonterminal%s\
                    155: , and %d production%s.\n", infile,
                    156:            ntokens, (ntokens == 1 ? "" : "s"),
                    157:            nvars,   (nvars   == 1 ? "" : "s"),
                    158:            nrules,  (nrules  == 1 ? "" : "s"));
                    159: 
                    160:  done_reducing:
                    161: 
                    162:   /* Free the global sets used to compute the reduced grammar */
                    163: 
                    164:   FREE(N);
                    165:   FREE(V);
                    166:   FREE(P);
                    167: 
                    168: }
                    169: 
                    170: /*
                    171:  * Another way to do this would be with a set for each production and then do
                    172:  * subset tests against N, but even for the C grammar the whole reducing
                    173:  * process takes only 2 seconds on my 8Mhz AT.
                    174:  */
                    175: 
                    176: static bool 
                    177: useful_production (i, N)
                    178: int  i;
                    179: BSet N;
                    180: {
                    181:   rule  r;
                    182:   short n;
                    183: 
                    184:   /*
                    185:    * A production is useful if all of the nonterminals in its RHS
                    186:    * appear in the set of useful nonterminals.
                    187:    */
                    188: 
                    189:   for (r = &ritem[rrhs[i]]; *r > 0; r++)
                    190:     if (ISVAR(n = *r))
                    191:       if (!BITISSET(N, n - ntokens))
                    192:        return FALSE;
                    193:   return TRUE;
                    194: }
                    195: 
                    196: 
                    197: /* Remember that rules are 1-origin, symbols are 0-origin. */
                    198: 
                    199: static void 
                    200: useless_nonterminals ()
                    201: {
                    202:   BSet Np, Ns;
                    203:   int  i, n;
                    204: 
                    205:   /*
                    206:    * N is set as built.  Np is set being built this iteration. P is set
                    207:    * of all productions which have a RHS all in N.
                    208:    */
                    209: 
                    210:   Np = NEW2(WORDSIZE(nvars), unsigned);
                    211: 
                    212:   /*
                    213:    * The set being computed is a set of nonterminals which can derive
                    214:    * the empty string or strings consisting of all terminals. At each
                    215:    * iteration a nonterminal is added to the set if there is a
                    216:    * production with that nonterminal as its LHS for which all the
                    217:    * nonterminals in its RHS are already in the set.  Iterate until the
                    218:    * set being computed remains unchanged.  Any nonterminals not in the
                    219:    * set at that point are useless in that they will never be used in
                    220:    * deriving a sentence of the language.
                    221:    * 
                    222:    * This iteration doesn't use any special traversal over the
                    223:    * productions.  A set is kept of all productions for which all the
                    224:    * nonterminals in the RHS are in useful.  Only productions not in
                    225:    * this set are scanned on each iteration.  At the end, this set is
                    226:    * saved to be used when finding useful productions: only productions
                    227:    * in this set will appear in the final grammar.
                    228:    */
                    229: 
                    230:   n = 0;
                    231:   while (1)
                    232:     {
                    233:       for (i = WORDSIZE(nvars) - 1; i >= 0; i--)
                    234:        Np[i] = N[i];
                    235:       for (i = 1; i <= nrules; i++)
                    236:        {
                    237:          if (!BITISSET(P, i))
                    238:            {
                    239:              if (useful_production(i, N))
                    240:                {
                    241:                  SETBIT(Np, rlhs[i] - ntokens);
                    242:                  SETBIT(P, i);
                    243:                }
                    244:            }
                    245:        }
                    246:       if (bits_equal(N, Np, WORDSIZE(nvars)))
                    247:        break;
                    248:       Ns = Np;
                    249:       Np = N;
                    250:       N = Ns;
                    251:     }
                    252:   FREE(N);
                    253:   N = Np;
                    254: }
                    255: 
                    256: static void 
                    257: inaccessable_symbols ()
                    258: {
                    259:   BSet  Vp, Vs, Pp;
                    260:   int   i, n;
                    261:   short t;
                    262:   rule  r;
                    263: 
                    264:   /*
                    265:    * Find out which productions are reachable and which symbols are
                    266:    * used.  Starting with an empty set of productions and a set of
                    267:    * symbols which only has the start symbol in it, iterate over all
                    268:    * productions until the set of productions remains unchanged for an
                    269:    * iteration.  For each production which has a LHS in the set of
                    270:    * reachable symbols, add the production to the set of reachable
                    271:    * productions, and add all of the nonterminals in the RHS of the
                    272:    * production to the set of reachable symbols.
                    273:    * 
                    274:    * Consider only the (partially) reduced grammar which has only
                    275:    * nonterminals in N and productions in P.
                    276:    * 
                    277:    * The result is the set P of productions in the reduced grammar, and
                    278:    * the set V of symbols in the reduced grammar.
                    279:    * 
                    280:    * Although this algorithm also computes the set of terminals which are
                    281:    * reachable, no terminal will be deleted from the grammar. Some
                    282:    * terminals might not be in the grammar but might be generated by
                    283:    * semantic routines, and so the user might want them available with
                    284:    * specified numbers.  (Is this true?)  However, the nonreachable
                    285:    * terminals are printed (if running in verbose mode) so that the user
                    286:    * can know.
                    287:    */
                    288: 
                    289:   Vp = NEW2(WORDSIZE(nsyms), unsigned);
                    290:   Pp = NEW2(WORDSIZE(nrules + 1), unsigned);
                    291: 
                    292:   /* If the start symbol isn't useful, then nothing will be useful. */
                    293:   if (!BITISSET(N, start_symbol - ntokens))
                    294:     goto end_iteration;
                    295: 
                    296:   SETBIT(V, start_symbol);
                    297: 
                    298:   n = 0;
                    299:   while (1)
                    300:     {
                    301:       for (i = WORDSIZE(nsyms) - 1; i >= 0; i--)
                    302:        Vp[i] = V[i];
                    303:       for (i = 1; i <= nrules; i++)
                    304:        {
                    305:          if (!BITISSET(Pp, i) && BITISSET(P, i) && 
                    306:              BITISSET(V, rlhs[i]))
                    307:            {
                    308:              for (r = &ritem[rrhs[i]]; *r >= 0; r++)
                    309:                {
                    310:                  if (ISTOKEN(t = *r)
                    311:                      || BITISSET(N, t - ntokens))
                    312:                    {
                    313:                      SETBIT(Vp, t);
                    314:                    }
                    315:                }
                    316:              SETBIT(Pp, i);
                    317:            }
                    318:        }
                    319:       if (bits_equal(V, Vp, WORDSIZE(nsyms)))
                    320:        {
                    321:          break;
                    322:        }
                    323:       Vs = Vp;
                    324:       Vp = V;
                    325:       V = Vs;
                    326:     }
                    327:  end_iteration:
                    328: 
                    329:   FREE(V);
                    330:   V = Vp;
                    331: 
                    332:   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */
                    333:   SETBIT(V, 0);                        /* end-of-input token */
                    334:   SETBIT(V, 1);                        /* error token */
                    335:   SETBIT(V, 2);                        /* illegal token */
                    336: 
                    337:   FREE(P);
                    338:   P = Pp;
                    339: 
                    340:   nuseful_productions = bits_size(P, WORDSIZE(nrules + 1));
                    341:   nuseless_productions = nrules - nuseful_productions;
                    342: 
                    343:   nuseful_nonterminals = 0;
                    344:   for (i = ntokens; i < nsyms; i++)
                    345:     if (BITISSET(V, i))
                    346:       nuseful_nonterminals++;
                    347:   nuseless_nonterminals = nvars - nuseful_nonterminals;
                    348: 
                    349:   /* A token that was used in %prec should not be warned about.  */
                    350:   for (i = 1; i < nrules; i++)
                    351:     if (rprecsym[i] != 0)
                    352:       SETBIT(V1, rprecsym[i]);
                    353: }
                    354: 
                    355: static void 
                    356: reduce_grammar_tables ()
                    357: {
                    358: /* This is turned off because we would need to change the numbers
                    359:    in the case statements in the actions file.  */
                    360: #if 0
                    361:   /* remove useless productions */
                    362:   if (nuseless_productions > 0)
                    363:     {
                    364:       short np, pn, ni, pi;
                    365: 
                    366:       np = 0;
                    367:       ni = 0;
                    368:       for (pn = 1; pn <= nrules; pn++)
                    369:        {
                    370:          if (BITISSET(P, pn))
                    371:            {
                    372:              np++;
                    373:              if (pn != np)
                    374:                {
                    375:                  rlhs[np] = rlhs[pn];
                    376:                  rline[np] = rline[pn];
                    377:                  rprec[np] = rprec[pn];
                    378:                  rassoc[np] = rassoc[pn];
                    379:                  rrhs[np] = rrhs[pn];
                    380:                  if (rrhs[np] != ni)
                    381:                    {
                    382:                      pi = rrhs[np];
                    383:                      rrhs[np] = ni;
                    384:                      while (ritem[pi] >= 0)
                    385:                        ritem[ni++] = ritem[pi++];
                    386:                      ritem[ni++] = -np;
                    387:                    }
                    388:                } else {
                    389:                  while (ritem[ni++] >= 0);
                    390:                }
                    391:            }
                    392:        }
                    393:       ritem[ni] = 0;
                    394:       nrules -= nuseless_productions;
                    395:       nitems = ni;
                    396: 
                    397:       /*
                    398:        * Is it worth it to reduce the amount of memory for the
                    399:        * grammar? Probably not.
                    400:        */
                    401: 
                    402:     }
                    403: #endif /* 0 */
                    404:   /* Disable useless productions,
                    405:      since they may contain useless nonterms
                    406:      that would get mapped below to -1 and confuse everyone.  */
                    407:   if (nuseless_productions > 0)
                    408:     {
                    409:       int pn;
                    410: 
                    411:       for (pn = 1; pn <= nrules; pn++)
                    412:        {
                    413:          if (!BITISSET(P, pn))
                    414:            {
                    415:              rlhs[pn] = -1;
                    416:            }
                    417:        }
                    418:     }
                    419: 
                    420:   /* remove useless symbols */
                    421:   if (nuseless_nonterminals > 0)
                    422:     {
                    423: 
                    424:       int    i, n;
                    425: /*      short  j; JF unused */
                    426:       short *nontermmap;
                    427:       rule   r;
                    428: 
                    429:       /*
                    430:        * create a map of nonterminal number to new nonterminal
                    431:        * number. -1 in the map means it was useless and is being
                    432:        * eliminated.
                    433:        */
                    434: 
                    435:       nontermmap = NEW2(nvars, short) - ntokens;
                    436:       for (i = ntokens; i < nsyms; i++)
                    437:        nontermmap[i] = -1;
                    438: 
                    439:       n = ntokens;
                    440:       for (i = ntokens; i < nsyms; i++)
                    441:        if (BITISSET(V, i))
                    442:          nontermmap[i] = n++;
                    443: 
                    444:       /* Shuffle elements of tables indexed by symbol number.  */
                    445: 
                    446:       for (i = ntokens; i < nsyms; i++)
                    447:        {
                    448:          n = nontermmap[i];
                    449:          if (n >= 0)
                    450:            {
                    451:              sassoc[n] = sassoc[i];
                    452:              sprec[n] = sprec[i];
                    453:              tags[n] = tags[i];
                    454:            } else {
                    455:              free(tags[i]);
                    456:            }
                    457:        }
                    458: 
                    459:       /* Replace all symbol numbers in valid data structures.  */
                    460: 
                    461:       for (i = 1; i <= nrules; i++)
                    462:        {
                    463:          /* Ignore the rules disabled above.  */
                    464:          if (rlhs[i] >= 0)
                    465:            rlhs[i] = nontermmap[rlhs[i]];
                    466:          if (ISVAR (rprecsym[i]))
                    467:            /* Can this happen?  */
                    468:            rprecsym[i] = nontermmap[rprecsym[i]];
                    469:        }
                    470: 
                    471:       for (r = ritem; *r; r++)
                    472:        if (ISVAR(*r))
                    473:          *r = nontermmap[*r];
                    474: 
                    475:       start_symbol = nontermmap[start_symbol];
                    476: 
                    477:       nsyms -= nuseless_nonterminals;
                    478:       nvars -= nuseless_nonterminals;
                    479: 
                    480:       free(&nontermmap[ntokens]);
                    481:     }
                    482: }
                    483: 
                    484: static void 
                    485: print_results ()
                    486: {
                    487:   int   i;
                    488: /*  short j; JF unused */
                    489:   rule  r;
                    490:   bool  b;
                    491: 
                    492:   if (nuseless_nonterminals > 0)
                    493:     {
                    494:       fprintf(foutput, "Useless nonterminals:\n\n");
                    495:       for (i = ntokens; i < nsyms; i++)
                    496:        if (!BITISSET(V, i))
                    497:          fprintf(foutput, "   %s\n", tags[i]);
                    498:     }
                    499:   b = FALSE;
                    500:   for (i = 0; i < ntokens; i++)
                    501:     {
                    502:       if (!BITISSET(V, i) && !BITISSET(V1, i))
                    503:        {
                    504:          if (!b)
                    505:            {
                    506:              fprintf(foutput, "\n\nTerminals which are not used:\n\n");
                    507:              b = TRUE;
                    508:            }
                    509:          fprintf(foutput, "   %s\n", tags[i]);
                    510:        }
                    511:     }
                    512: 
                    513:   if (nuseless_productions > 0)
                    514:     {
                    515:       fprintf(foutput, "\n\nUseless rules:\n\n");
                    516:       for (i = 1; i <= nrules; i++)
                    517:        {
                    518:          if (!BITISSET(P, i))
                    519:            {
                    520:              fprintf(foutput, "#%-4d  ", i);
                    521:              fprintf(foutput, "%s :\t", tags[rlhs[i]]);
                    522:              for (r = &ritem[rrhs[i]]; *r >= 0; r++)
                    523:                {
                    524:                  fprintf(foutput, " %s", tags[*r]);
                    525:                }
                    526:              fprintf(foutput, ";\n");
                    527:            }
                    528:        }
                    529:     }
                    530:   if (nuseless_nonterminals > 0 || nuseless_productions > 0 || b)
                    531:     fprintf(foutput, "\n\n");
                    532: }
                    533: 
                    534: void 
                    535: dump_grammar ()
                    536: {
                    537:   int i;
                    538:   rule r;
                    539: 
                    540:   fprintf(foutput,
                    541:          "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
                    542:          ntokens, nvars, nsyms, nrules, nitems);
                    543:   fprintf(foutput, "Variables\n---------\n\n");
                    544:   fprintf(foutput, "Value  Sprec    Sassoc    Tag\n");
                    545:   for (i = ntokens; i < nsyms; i++)
                    546:     fprintf(foutput, "%5d  %5d  %5d  %s\n",
                    547:            i, sprec[i], sassoc[i], tags[i]);
                    548:   fprintf(foutput, "\n\n");
                    549:   fprintf(foutput, "Rules\n-----\n\n");
                    550:   for (i = 1; i <= nrules; i++)
                    551:     {
                    552:       fprintf(foutput, "%-5d(%5d%5d)%5d : (@%-5d)", 
                    553:              i, rprec[i], rassoc[i], rlhs[i], rrhs[i]);
                    554:       for (r = &ritem[rrhs[i]]; *r > 0; r++)
                    555:        fprintf(foutput, "%5d", *r);
                    556:       fprintf(foutput, " [%d]\n", -(*r));
                    557:     }
                    558:   fprintf(foutput, "\n\n");
                    559:   fprintf(foutput, "Rules interpreted\n-----------------\n\n");
                    560:   for (i = 1; i <= nrules; i++)
                    561:     {
                    562:       fprintf(foutput, "%-5d  %s :", i, tags[rlhs[i]]);
                    563:       for (r = &ritem[rrhs[i]]; *r > 0; r++)
                    564:        fprintf(foutput, " %s", tags[*r]);
                    565:       fprintf(foutput, "\n");
                    566:     }
                    567:   fprintf(foutput, "\n\n");
                    568: }
                    569: 
                    570: 
                    571: static void 
                    572: print_notices ()
                    573: {
                    574:   extern int fixed_outfiles;
                    575: 
                    576:   if (fixed_outfiles && nuseless_productions)
                    577:     fprintf(stderr, "%d rules never reduced\n", nuseless_productions);
                    578: 
                    579:   fprintf(stderr, "%s contains ", infile);
                    580: 
                    581:   if (nuseless_nonterminals > 0)
                    582:     {
                    583:       fprintf(stderr, "%d useless nonterminal%s",
                    584:              nuseless_nonterminals,
                    585:              (nuseless_nonterminals == 1 ? "" : "s"));
                    586:     }
                    587:   if (nuseless_nonterminals > 0 && nuseless_productions > 0)
                    588:     fprintf(stderr, " and ");
                    589: 
                    590:   if (nuseless_productions > 0)
                    591:     {
                    592:       fprintf(stderr, "%d useless rule%s",
                    593:              nuseless_productions,
                    594:              (nuseless_productions == 1 ? "" : "s"));
                    595:     }
                    596:   fprintf(stderr, ".\n");
                    597:   fflush(stderr);
                    598: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.