Annotation of GNUtools/emacs/man/texindex.c, revision 1.1

1.1     ! root        1: /* Prepare Tex index dribble output into an actual index.
        !             2:    Copyright (C) 1987 Free Software Foundation, Inc.
        !             3: 
        !             4:     This program is free software; you can redistribute it and/or modify
        !             5:     it under the terms of the GNU General Public License as published by
        !             6:     the Free Software Foundation; either version 1, or (at your option)
        !             7:     any later version.
        !             8: 
        !             9:     This program is distributed in the hope that it will be useful,
        !            10:     but WITHOUT ANY WARRANTY; without even the implied warranty of
        !            11:     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
        !            12:     GNU General Public License for more details.
        !            13: 
        !            14:     You should have received a copy of the GNU General Public License
        !            15:     along with this program; if not, write to the Free Software
        !            16:     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
        !            17: 
        !            18:  In other words, you are welcome to use, share and improve this program.
        !            19:  You are forbidden to forbid anyone else to use, share and improve
        !            20:  what you give them.   Help stamp out software-hoarding!  */
        !            21: 
        !            22: 
        !            23: #include <stdio.h>
        !            24: #include <ctype.h>
        !            25: 
        !            26: #ifdef VMS
        !            27: #include <file.h>
        !            28: 
        !            29: #define EXIT_SUCCESS ((1 << 28) | 1)
        !            30: #define EXIT_FATAL ((1 << 28) | 4)
        !            31: #define unlink delete
        !            32: #define tell(fd) lseek(fd, 0L, 1)
        !            33: #else
        !            34: #include <sys/types.h>
        !            35: #include <sys/fcntl.h>
        !            36: #include <sys/file.h>
        !            37: 
        !            38: #define EXIT_SUCCESS 0
        !            39: #define EXIT_FATAL 1
        !            40: #endif
        !            41: 
        !            42: 
        !            43: #ifndef L_XTND
        !            44: #define L_XTND 2
        !            45: #endif
        !            46: 
        !            47: /* When sorting in core, this structure describes one line
        !            48:  and the position and length of its first keyfield.  */
        !            49: 
        !            50: struct lineinfo
        !            51:   {
        !            52:     char *text;                /* The actual text of the line */
        !            53:     union
        !            54:       {                        /* The start of the key (for textual comparison) */
        !            55:        char *text;
        !            56:        long number;    /* or the numeric value (for numeric comparison) */
        !            57:       } key;
        !            58:     long keylen;       /* Length of key field */
        !            59:   };
        !            60: 
        !            61: /* This structure describes a field to use as a sort key */
        !            62: 
        !            63: struct keyfield
        !            64:   {
        !            65:     int startwords;            /* # words to skip  */
        !            66:     int startchars;            /*  and # additional chars to skip, to start of field */
        !            67:     int endwords;              /* similar, from beg (or end) of line, to find end of field */
        !            68:     int endchars;
        !            69:     char ignore_blanks;                /* Ignore spaces and tabs within the field */
        !            70:     char fold_case;            /* Convert upper case to lower before comparing */
        !            71:     char reverse;              /* Compare in reverse order */
        !            72:     char numeric;              /* Parse text as an integer and compare the integers */
        !            73:     char positional;           /* Sort according to position within the file */
        !            74:     char braced;               /* Count balanced-braced groupings as fields */
        !            75:   };
        !            76: 
        !            77: /* Vector of keyfields to use */
        !            78: 
        !            79: struct keyfield keyfields[3];
        !            80: 
        !            81: /* Number of keyfields stored in that vector.  */
        !            82: 
        !            83: int num_keyfields = 3;
        !            84: 
        !            85: /* Vector of input file names, terminated with a zero (null pointer) */
        !            86: 
        !            87: char **infiles;
        !            88: 
        !            89: /* Vector of corresponding output file names, or zero meaning default it */
        !            90: 
        !            91: char **outfiles;
        !            92: 
        !            93: /* Length of `infiles' */
        !            94: 
        !            95: int num_infiles;
        !            96: 
        !            97: /* Pointer to the array of pointers to lines being sorted */
        !            98: 
        !            99: char **linearray;
        !           100: 
        !           101: /* The allocated length of `linearray'. */
        !           102: 
        !           103: long lines;
        !           104: 
        !           105: /* Directory to use for temporary files.  On Unix, it ends with a slash.  */
        !           106: 
        !           107: char *tempdir;
        !           108: 
        !           109: /* Start of filename to use for temporary files.  */
        !           110: 
        !           111: char *tempbase;
        !           112: 
        !           113: /* Number of last temporary file.  */
        !           114: 
        !           115: int tempcount;
        !           116: 
        !           117: /* Number of last temporary file already deleted.
        !           118:  Temporary files are deleted by `flush_tempfiles' in order of creation.  */
        !           119: 
        !           120: int last_deleted_tempcount;
        !           121: 
        !           122: /* During in-core sort, this points to the base of the data block
        !           123:  which contains all the lines of data.  */
        !           124: 
        !           125: char *text_base;
        !           126: 
        !           127: /* Additional command switches */
        !           128: 
        !           129: int keep_tempfiles;    /* Nonzero means do not delete tempfiles -- for debugging */
        !           130: 
        !           131: /* Forward declarations of functions in this file */
        !           132: 
        !           133: void decode_command ();
        !           134: void sort_in_core ();
        !           135: void sort_offline ();
        !           136: char **parsefile ();
        !           137: char *find_field ();
        !           138: char *find_pos ();
        !           139: long find_value ();
        !           140: char *find_braced_pos ();
        !           141: char *find_braced_end ();
        !           142: void writelines ();
        !           143: int compare_full ();
        !           144: long readline ();
        !           145: int merge_files ();
        !           146: int merge_direct ();
        !           147: char *concat ();
        !           148: char *maketempname ();
        !           149: void flush_tempfiles ();
        !           150: char *tempcopy ();
        !           151: 
        !           152: extern char *mktemp ();
        !           153: 
        !           154: #define MAX_IN_CORE_SORT 500000
        !           155: 
        !           156: int
        !           157: main (argc, argv)
        !           158:      int argc;
        !           159:      char **argv;
        !           160: {
        !           161:   int i;
        !           162: 
        !           163:   tempcount = 0;
        !           164:   last_deleted_tempcount = 0;
        !           165: 
        !           166:   /* Describe the kind of sorting to do. */
        !           167:   /* The first keyfield uses the first braced field and folds case */
        !           168:   keyfields[0].braced = 1;
        !           169:   keyfields[0].fold_case = 1;
        !           170:   keyfields[0].endwords = -1;
        !           171:   keyfields[0].endchars = -1;
        !           172:   /* The second keyfield uses the second braced field, numerically */
        !           173:   keyfields[1].braced = 1;
        !           174:   keyfields[1].numeric = 1;
        !           175:   keyfields[1].startwords = 1;
        !           176:   keyfields[1].endwords = -1;
        !           177:   keyfields[1].endchars = -1;
        !           178:   /* The third keyfield (which is ignored while discarding duplicates)
        !           179:      compares the whole line */
        !           180:   keyfields[2].endwords = -1;
        !           181:   keyfields[2].endchars = -1;
        !           182: 
        !           183:   decode_command (argc, argv);
        !           184: 
        !           185:   tempbase = mktemp (concat ("txiXXXXXX", "", ""));
        !           186: 
        !           187:   /* Process input files completely, one by one.  */
        !           188: 
        !           189:   for (i = 0; i < num_infiles; i++)
        !           190:     {
        !           191:       int desc;
        !           192:       long ptr;
        !           193:       char *outfile;
        !           194:       char *p;
        !           195: 
        !           196:       desc = open (infiles[i], 0, 0);
        !           197:       if (desc < 0) pfatal_with_name (infiles[i]);
        !           198:       lseek (desc, 0, L_XTND);
        !           199:       ptr = tell (desc);
        !           200:       close (desc);
        !           201: 
        !           202:       outfile = outfiles[i];
        !           203:       if (!outfile)
        !           204:        {
        !           205:          outfile = concat (infiles[i], "s", "");
        !           206:        }
        !           207: 
        !           208:       if (ptr < MAX_IN_CORE_SORT)
        !           209:         /* Sort a small amount of data */
        !           210:         sort_in_core (infiles[i], ptr, outfile);
        !           211:       else
        !           212:         sort_offline (infiles[i], ptr, outfile);
        !           213:     }
        !           214: 
        !           215:   flush_tempfiles (tempcount);
        !           216:   exit (EXIT_SUCCESS);
        !           217: }
        !           218: 
        !           219: /* This page decodes the command line arguments to set the parameter variables
        !           220:  and set up the vector of keyfields and the vector of input files */
        !           221: 
        !           222: void
        !           223: decode_command (argc, argv)
        !           224:      int argc;
        !           225:      char **argv;
        !           226: {
        !           227:   int i;
        !           228:   char **ip;
        !           229:   char **op;
        !           230: 
        !           231:   /* Store default values into parameter variables */
        !           232: 
        !           233: #ifdef VMS
        !           234:   tempdir = "sys$scratch:";
        !           235: #else
        !           236:   tempdir = "/tmp/";
        !           237: #endif
        !           238: 
        !           239:   keep_tempfiles = 0;
        !           240: 
        !           241:   /* Allocate argc input files, which must be enough.  */
        !           242: 
        !           243:   infiles = (char **) xmalloc (argc * sizeof (char *));
        !           244:   outfiles = (char **) xmalloc (argc * sizeof (char *));
        !           245:   ip = infiles;
        !           246:   op = outfiles;
        !           247: 
        !           248:   /* First find all switches that control the default kind-of-sort */
        !           249: 
        !           250:   for (i = 1; i < argc; i++)
        !           251:     {
        !           252:       int tem = classify_arg (argv[i]);
        !           253:       char c;
        !           254:       char *p;
        !           255: 
        !           256:       if (tem <= 0)
        !           257:        {
        !           258:          *ip++ = argv[i];
        !           259:          *op++ = 0;
        !           260:          continue;
        !           261:        }
        !           262:       if (tem > 1)
        !           263:        {
        !           264:          if (i + 1 == argc)
        !           265:            fatal ("switch %s given with no argument following it", argv[i]);
        !           266:          else if (!strcmp (argv[i], "-T"))
        !           267:            tempdir = argv[i + 1];
        !           268:          else if (!strcmp (argv[i], "-o"))
        !           269:            *(op - 1) = argv[i + 1];
        !           270:          i += tem - 1;
        !           271:          continue;
        !           272:        }
        !           273: 
        !           274:       p = &argv[i][1];
        !           275:       while (c = *p++)
        !           276:        switch (c)
        !           277:          {
        !           278:          case 'k':
        !           279:            keep_tempfiles = 1;
        !           280:            break;
        !           281: 
        !           282:          default:
        !           283:            fatal ("invalid command switch %c", c);
        !           284:          }
        !           285:     switchdone: ;
        !           286:     }
        !           287: 
        !           288:   /* Record number of keyfields, terminate list of filenames */
        !           289: 
        !           290:   num_infiles = ip - infiles;
        !           291:   *ip = 0;
        !           292: }
        !           293: 
        !           294: /* Return 0 for an argument that is not a switch;
        !           295:  for a switch, return 1 plus the number of following arguments that the switch swallows.
        !           296: */
        !           297: 
        !           298: int
        !           299: classify_arg (arg)
        !           300:      char *arg;
        !           301: {
        !           302:   if (!strcmp (arg, "-T") || !strcmp (arg, "-o"))
        !           303:     return 2;
        !           304:   if (arg[0] == '-')
        !           305:     return 1;
        !           306:   return 0;
        !           307: }
        !           308: 
        !           309: /* Create a name for a temporary file */
        !           310: 
        !           311: char *
        !           312: maketempname (count)
        !           313:      int count;
        !           314: {
        !           315:   char tempsuffix[10];
        !           316:   sprintf (tempsuffix, "%d", count);
        !           317:   return concat (tempdir, tempbase, tempsuffix);
        !           318: }
        !           319: 
        !           320: /* Delete all temporary files up to the specified count */
        !           321: 
        !           322: void
        !           323: flush_tempfiles (to_count)
        !           324:      int to_count;
        !           325: {
        !           326:   if (keep_tempfiles) return;
        !           327:   while (last_deleted_tempcount < to_count)
        !           328:     unlink (maketempname (++last_deleted_tempcount));
        !           329: }
        !           330: 
        !           331: /* Copy an input file into a temporary file, and return the temporary file name */
        !           332: 
        !           333: #define BUFSIZE 1024
        !           334: 
        !           335: char *
        !           336: tempcopy (idesc)
        !           337:      int idesc;
        !           338: {
        !           339:   char *outfile = maketempname (++tempcount);
        !           340:   int odesc;
        !           341:   char buffer[BUFSIZE];
        !           342: 
        !           343:   odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
        !           344: 
        !           345:   if (odesc < 0) pfatal_with_name (outfile);
        !           346: 
        !           347:   while (1)
        !           348:     {
        !           349:       int nread = read (idesc, buffer, BUFSIZE);
        !           350:       write (odesc, buffer, nread);
        !           351:       if (!nread) break;
        !           352:     }
        !           353: 
        !           354:   close (odesc);
        !           355: 
        !           356:   return outfile;
        !           357: }
        !           358: 
        !           359: /* Compare two lines, provided as pointers to pointers to text,
        !           360:  according to the specified set of keyfields */
        !           361: 
        !           362: int
        !           363: compare_full (line1, line2)
        !           364:      char **line1, **line2;
        !           365: {
        !           366:   int i;
        !           367: 
        !           368:   /* Compare using the first keyfield;
        !           369:      if that does not distinguish the lines, try the second keyfield; and so on. */
        !           370: 
        !           371:   for (i = 0; i < num_keyfields; i++)
        !           372:     {
        !           373:       long length1, length2;
        !           374:       char *start1 = find_field (&keyfields[i], *line1, &length1);
        !           375:       char *start2 = find_field (&keyfields[i], *line2, &length2);
        !           376:       int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
        !           377:                                              start2, length2, *line2 - text_base);
        !           378:       if (tem)
        !           379:        {
        !           380:          if (keyfields[i].reverse)
        !           381:            return - tem;
        !           382:           return tem;
        !           383:        }
        !           384:     }
        !           385: 
        !           386:   return 0;    /* Lines match exactly */
        !           387: }
        !           388: 
        !           389: /* Compare two lines described by structures
        !           390:  in which the first keyfield is identified in advance.
        !           391:  For positional sorting, assumes that the order of the lines in core
        !           392:  reflects their nominal order.  */
        !           393: 
        !           394: int
        !           395: compare_prepared (line1, line2)
        !           396:      struct lineinfo *line1, *line2;
        !           397: {
        !           398:   int i;
        !           399:   int tem;
        !           400:   char *text1, *text2;
        !           401: 
        !           402:   /* Compare using the first keyfield, which has been found for us already */
        !           403:   if (keyfields->positional)
        !           404:     {
        !           405:       if (line1->text - text_base > line2->text - text_base)
        !           406:        tem = 1;
        !           407:       else
        !           408:        tem = -1;
        !           409:     }
        !           410:   else if (keyfields->numeric)
        !           411:     tem = line1->key.number - line2->key.number;
        !           412:   else
        !           413:     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, line2->key.text, line2->keylen, 0);
        !           414:   if (tem)
        !           415:     {
        !           416:       if (keyfields->reverse)
        !           417:        return - tem;
        !           418:       return tem;
        !           419:     }
        !           420: 
        !           421:   text1 = line1->text;
        !           422:   text2 = line2->text;
        !           423: 
        !           424:   /* Compare using the second keyfield;
        !           425:      if that does not distinguish the lines, try the third keyfield; and so on. */
        !           426: 
        !           427:   for (i = 1; i < num_keyfields; i++)
        !           428:     {
        !           429:       long length1, length2;
        !           430:       char *start1 = find_field (&keyfields[i], text1, &length1);
        !           431:       char *start2 = find_field (&keyfields[i], text2, &length2);
        !           432:       int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
        !           433:                                              start2, length2, text2 - text_base);
        !           434:       if (tem)
        !           435:        {
        !           436:          if (keyfields[i].reverse)
        !           437:            return - tem;
        !           438:           return tem;
        !           439:        }
        !           440:     }
        !           441: 
        !           442:   return 0;    /* Lines match exactly */
        !           443: }
        !           444: 
        !           445: /* Like compare_full but more general.
        !           446:  You can pass any strings, and you can say how many keyfields to use.
        !           447:  `pos1' and `pos2' should indicate the nominal positional ordering of
        !           448:  the two lines in the input.  */
        !           449: 
        !           450: int
        !           451: compare_general (str1, str2, pos1, pos2, use_keyfields)
        !           452:      char *str1, *str2;
        !           453:      long pos1, pos2;
        !           454:      int use_keyfields;
        !           455: {
        !           456:   int i;
        !           457: 
        !           458:   /* Compare using the first keyfield;
        !           459:      if that does not distinguish the lines, try the second keyfield; and so on. */
        !           460: 
        !           461:   for (i = 0; i < use_keyfields; i++)
        !           462:     {
        !           463:       long length1, length2;
        !           464:       char *start1 = find_field (&keyfields[i], str1, &length1);
        !           465:       char *start2 = find_field (&keyfields[i], str2, &length2);
        !           466:       int tem = compare_field (&keyfields[i], start1, length1, pos1, start2, length2, pos2);
        !           467:       if (tem)
        !           468:        {
        !           469:          if (keyfields[i].reverse)
        !           470:            return - tem;
        !           471:           return tem;
        !           472:        }
        !           473:     }
        !           474: 
        !           475:   return 0;    /* Lines match exactly */
        !           476: }
        !           477: 
        !           478: /* Find the start and length of a field in `str' according to `keyfield'.
        !           479:  A pointer to the starting character is returned, and the length
        !           480:  is stored into the int that `lengthptr' points to.  */
        !           481: 
        !           482: char *
        !           483: find_field (keyfield, str, lengthptr)
        !           484:      struct keyfield *keyfield;
        !           485:      char *str;
        !           486:      long *lengthptr;
        !           487: {
        !           488:   char *start;
        !           489:   char *end;
        !           490:   char *(*fun) ();
        !           491: 
        !           492:   if (keyfield->braced) fun = find_braced_pos;
        !           493:   else fun = find_pos;
        !           494: 
        !           495:   start = ( *fun )(str, keyfield->startwords, keyfield->startchars,
        !           496:               keyfield->ignore_blanks);
        !           497:   if (keyfield->endwords < 0)
        !           498:     {
        !           499:       if (keyfield->braced)
        !           500:        end = find_braced_end (start);
        !           501:       else
        !           502:        {
        !           503:          end = start;
        !           504:          while (*end && *end != '\n') end++;
        !           505:        }
        !           506:     }
        !           507:   else
        !           508:     {
        !           509:       end = ( *fun )(str, keyfield->endwords, keyfield->endchars, 0);
        !           510:       if (end - str < start - str) end = start;
        !           511:     }
        !           512:   *lengthptr = end - start;
        !           513:   return start;
        !           514: }
        !           515: 
        !           516: /* Find a pointer to a specified place within `str',
        !           517:  skipping (from the beginning) `words' words and then `chars' chars.
        !           518:  If `ignore_blanks' is nonzero, we skip all blanks
        !           519:  after finding the specified word.  */
        !           520: 
        !           521: char *
        !           522: find_pos (str, words, chars, ignore_blanks)
        !           523:      char *str;
        !           524:      int words, chars;
        !           525:      int ignore_blanks;
        !           526: {
        !           527:   int i;
        !           528:   char *p = str;
        !           529: 
        !           530:   for (i = 0; i < words; i++)
        !           531:     {
        !           532:       char c;
        !           533:       /* Find next bunch of nonblanks and skip them. */
        !           534:       while ((c = *p) == ' ' || c == '\t') p++;
        !           535:       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) p++;
        !           536:       if (!*p || *p == '\n') return p;
        !           537:     }
        !           538: 
        !           539:   while (*p == ' ' || *p == '\t') p++;
        !           540: 
        !           541:   for (i = 0; i < chars; i++)
        !           542:     {
        !           543:       if (!*p  || *p == '\n') break;
        !           544:       p++;
        !           545:     }
        !           546:   return p;
        !           547: }
        !           548: 
        !           549: /* Like find_pos but assumes that each field is surrounded by braces
        !           550:  and that braces within fields are balanced. */
        !           551: 
        !           552: char *
        !           553: find_braced_pos (str, words, chars, ignore_blanks)
        !           554:      char *str;
        !           555:      int words, chars;
        !           556:      int ignore_blanks;
        !           557: {
        !           558:   int i;
        !           559:   int bracelevel;
        !           560:   char *p = str;
        !           561:   char c;
        !           562: 
        !           563:   for (i = 0; i < words; i++)
        !           564:     {
        !           565:       bracelevel = 1;
        !           566:       while ((c = *p++) != '{' && c != '\n' && c);
        !           567:       if (c != '{')
        !           568:        return p - 1;
        !           569:       while (bracelevel)
        !           570:        {
        !           571:          c = *p++;
        !           572:          if (c == '{') bracelevel++;
        !           573:          if (c == '}') bracelevel--;
        !           574:          if (c == '\\') c = *p++;      /* \ quotes braces and \ */
        !           575:          if (c == 0 || c == '\n') return p-1;
        !           576:        }
        !           577:     }
        !           578: 
        !           579:   while ((c = *p++) != '{' && c != '\n' && c);
        !           580: 
        !           581:   if (c != '{')
        !           582:     return p-1;
        !           583: 
        !           584:   if (ignore_blanks)
        !           585:     while ((c = *p) == ' ' || c == '\t') p++;
        !           586:   
        !           587:   for (i = 0; i < chars; i++)
        !           588:     {
        !           589:       if (!*p  || *p == '\n') break;
        !           590:       p++;
        !           591:     }
        !           592:   return p;
        !           593: }
        !           594: 
        !           595: /* Find the end of the balanced-brace field which starts at `str'.
        !           596:   The position returned is just before the closing brace. */
        !           597: 
        !           598: char *
        !           599: find_braced_end (str)
        !           600:      char *str;
        !           601: {
        !           602:   int bracelevel;
        !           603:   char *p = str;
        !           604:   char c;
        !           605: 
        !           606:   bracelevel = 1;
        !           607:   while (bracelevel)
        !           608:     {
        !           609:       c = *p++;
        !           610:       if (c == '{') bracelevel++;
        !           611:       if (c == '}') bracelevel--;
        !           612:       if (c == '\\') c = *p++;
        !           613:       if (c == 0 || c == '\n') return p-1;
        !           614:     }
        !           615:   return p - 1;
        !           616: }
        !           617: 
        !           618: long
        !           619: find_value (start, length)
        !           620:      char *start;
        !           621:      long length;
        !           622: {
        !           623:   while (length != 0L) {
        !           624:     if (isdigit(*start))
        !           625:       return atol(start);
        !           626:     length--;
        !           627:     start++;
        !           628:   }
        !           629:   return 0l;
        !           630: }
        !           631: 
        !           632: /* Vector used to translate characters for comparison.
        !           633:    This is how we make all alphanumerics follow all else,
        !           634:    and ignore case in the first sorting.  */
        !           635: int char_order[256];
        !           636: 
        !           637: init_char_order ()
        !           638: {
        !           639:   int i;
        !           640:   for (i = 1; i < 256; i++)
        !           641:     char_order[i] = i;
        !           642: 
        !           643:   for (i = '0'; i <= '9'; i++)
        !           644:     char_order[i] += 512;
        !           645: 
        !           646:   for (i = 'a'; i <= 'z'; i++) {
        !           647:     char_order[i] = 512 + i;
        !           648:     char_order[i + 'A' - 'a'] = 512 + i;
        !           649:   }
        !           650: }
        !           651: 
        !           652: /* Compare two fields (each specified as a start pointer and a character count)
        !           653:  according to `keyfield'.  The sign of the value reports the relation between the fields */
        !           654: 
        !           655: int
        !           656: compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
        !           657:      struct keyfield *keyfield;
        !           658:      char *start1;
        !           659:      long length1;
        !           660:      long pos1;
        !           661:      char *start2;
        !           662:      long length2;
        !           663:      long pos2;
        !           664: {
        !           665:   if (keyfields->positional)
        !           666:     {
        !           667:       if (pos1 > pos2)
        !           668:        return 1;
        !           669:       else
        !           670:        return -1;
        !           671:     }
        !           672:   if (keyfield->numeric)
        !           673:     {
        !           674:        long value = find_value (start1, length1) - find_value (start2, length2);
        !           675:       if (value > 0) return 1;
        !           676:       if (value < 0) return -1;
        !           677:       return 0;
        !           678:     }
        !           679:   else
        !           680:     {
        !           681:       char *p1 = start1;
        !           682:       char *p2 = start2;
        !           683:       char *e1 = start1 + length1;
        !           684:       char *e2 = start2 + length2;
        !           685: 
        !           686:       int fold_case = keyfield->fold_case;
        !           687: 
        !           688:       while (1)
        !           689:        {
        !           690:          int c1, c2;
        !           691: 
        !           692:          if (p1 == e1) c1 = 0;
        !           693:          else c1 = *p1++;
        !           694:          if (p2 == e2) c2 = 0;
        !           695:          else c2 = *p2++;
        !           696: 
        !           697:          if (char_order[c1] != char_order[c2])
        !           698:            return char_order[c1] - char_order[c2];
        !           699:          if (!c1) break;
        !           700:        }
        !           701: 
        !           702:       /* Strings are equal except possibly for case.  */
        !           703:       p1 = start1;
        !           704:       p2 = start2;
        !           705:       while (1)
        !           706:        {
        !           707:          int c1, c2;
        !           708: 
        !           709:          if (p1 == e1) c1 = 0;
        !           710:          else c1 = *p1++;
        !           711:          if (p2 == e2) c2 = 0;
        !           712:          else c2 = *p2++;
        !           713: 
        !           714:          if (c1 != c2)
        !           715:            /* Reverse sign here so upper case comes out last.  */
        !           716:            return c2 - c1;
        !           717:          if (!c1) break;
        !           718:        }
        !           719: 
        !           720:       return 0;
        !           721:     }
        !           722: }
        !           723: 
        !           724: /* A `struct linebuffer' is a structure which holds a line of text.
        !           725:  `readline' reads a line from a stream into a linebuffer
        !           726:  and works regardless of the length of the line.  */
        !           727: 
        !           728: struct linebuffer
        !           729:   {
        !           730:     long size;
        !           731:     char *buffer;
        !           732:   };
        !           733: 
        !           734: /* Initialize a linebuffer for use */
        !           735: 
        !           736: void
        !           737: initbuffer (linebuffer)
        !           738:      struct linebuffer *linebuffer;
        !           739: {
        !           740:   linebuffer->size = 200;
        !           741:   linebuffer->buffer = (char *) xmalloc (200);
        !           742: }
        !           743: 
        !           744: /* Read a line of text from `stream' into `linebuffer'.
        !           745:  Return the length of the line.  */
        !           746: 
        !           747: long
        !           748: readline (linebuffer, stream)
        !           749:      struct linebuffer *linebuffer;
        !           750:      FILE *stream;
        !           751: {
        !           752:   char *buffer = linebuffer->buffer;
        !           753:   char *p = linebuffer->buffer;
        !           754:   char *end = p + linebuffer->size;
        !           755: 
        !           756:   while (1)
        !           757:     {
        !           758:       int c = getc (stream);
        !           759:       if (p == end)
        !           760:        {
        !           761:          buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
        !           762:          p += buffer - linebuffer->buffer;
        !           763:          end += buffer - linebuffer->buffer;
        !           764:          linebuffer->buffer = buffer;
        !           765:        }
        !           766:       if (c < 0 || c == '\n')
        !           767:        {
        !           768:          *p = 0;
        !           769:          break;
        !           770:        }
        !           771:       *p++ = c;
        !           772:     }
        !           773: 
        !           774:   return p - buffer;
        !           775: }
        !           776: 
        !           777: /* Sort an input file too big to sort in core.  */
        !           778: 
        !           779: void
        !           780: sort_offline (infile, nfiles, total, outfile)
        !           781:      char *infile;
        !           782:      long total;
        !           783:      char *outfile;
        !           784: {
        !           785:   int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;  /* More than enough */
        !           786:   char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
        !           787:   FILE *istream = fopen (infile, "r");
        !           788:   int i;
        !           789:   struct linebuffer lb;
        !           790:   long linelength;
        !           791:   int failure = 0;
        !           792: 
        !           793:   initbuffer (&lb);
        !           794: 
        !           795:   /* Read in one line of input data.  */
        !           796: 
        !           797:   linelength = readline (&lb, istream);
        !           798: 
        !           799:   if (lb.buffer[0] != '\\')
        !           800:     {
        !           801:       error ("%s: not a texinfo index file", infile);
        !           802:       return;
        !           803:     }
        !           804: 
        !           805:   /* Split up the input into `ntemps' temporary files, or maybe fewer,
        !           806:     and put the new files' names into `tempfiles' */
        !           807: 
        !           808:   for (i = 0; i < ntemps; i++)
        !           809:     {
        !           810:       char *outname = maketempname (++tempcount);
        !           811:       FILE *ostream = fopen (outname, "w");
        !           812:       long tempsize = 0;
        !           813: 
        !           814:       if (!ostream) pfatal_with_name (outname);
        !           815:       tempfiles[i] = outname;
        !           816: 
        !           817:       /* Copy lines into this temp file as long as it does not make file "too big"
        !           818:        or until there are no more lines.  */
        !           819: 
        !           820:       while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
        !           821:        {
        !           822:          tempsize += linelength + 1;
        !           823:          fputs (lb.buffer, ostream);
        !           824:          putc ('\n', ostream);
        !           825: 
        !           826:          /* Read another line of input data.  */
        !           827: 
        !           828:          linelength = readline (&lb, istream);
        !           829:          if (!linelength && feof (istream)) break;
        !           830: 
        !           831:          if (lb.buffer[0] != '\\')
        !           832:            {
        !           833:              error ("%s: not a texinfo index file", infile);
        !           834:              failure = 1;
        !           835:              goto fail;
        !           836:            }
        !           837:        }
        !           838:       fclose (ostream);
        !           839:       if (feof (istream)) break;
        !           840:     }
        !           841: 
        !           842:   free (lb.buffer);
        !           843: 
        !           844:  fail:
        !           845:   /* Record number of temp files we actually needed.  */
        !           846: 
        !           847:   ntemps = i;
        !           848: 
        !           849:   /* Sort each tempfile into another tempfile.
        !           850:     Delete the first set of tempfiles and put the names of the second into `tempfiles' */
        !           851: 
        !           852:   for (i = 0; i < ntemps; i++)
        !           853:     {
        !           854:       char *newtemp = maketempname (++tempcount);
        !           855:       sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
        !           856:       if (!keep_tempfiles)
        !           857:         unlink (tempfiles[i]);
        !           858:       tempfiles[i] = newtemp;
        !           859:     }
        !           860: 
        !           861:   if (failure)
        !           862:     return;
        !           863: 
        !           864:   /* Merge the tempfiles together and indexify */
        !           865: 
        !           866:   merge_files (tempfiles, ntemps, outfile);
        !           867: }
        !           868: 
        !           869: /* Sort `infile', whose size is `total',
        !           870:  assuming that is small enough to be done in-core,
        !           871:  then indexify it and send the output to `outfile' (or to stdout).  */
        !           872: 
        !           873: void
        !           874: sort_in_core (infile, total, outfile)
        !           875:      char *infile;
        !           876:      long total;
        !           877:      char *outfile;
        !           878: {
        !           879:   char **nextline;
        !           880:   char *data = (char *) xmalloc (total + 1);
        !           881:   char *file_data;
        !           882:   long file_size;
        !           883:   int i;
        !           884:   FILE *ostream = stdout;
        !           885:   struct lineinfo *lineinfo;
        !           886: 
        !           887:   /* Read the contents of the file into the moby array `data' */
        !           888: 
        !           889:   int desc = open (infile, 0, 0);
        !           890: 
        !           891:   if (desc < 0)
        !           892:     fatal ("failure reopening %s", infile);
        !           893:   for (file_size = 0; ; )
        !           894:     {
        !           895:       if ((i = read (desc, data + file_size, total - file_size)) <= 0)
        !           896:        break;
        !           897:       file_size += i;
        !           898:     }
        !           899:   file_data = data;
        !           900:   data[file_size] = 0;
        !           901: 
        !           902:   close (desc);
        !           903: 
        !           904:   if (file_size > 0 && data[0] != '\\')
        !           905:     {
        !           906:       error ("%s: not a texinfo index file", infile);
        !           907:       return;
        !           908:     }
        !           909: 
        !           910:   init_char_order ();
        !           911: 
        !           912:   /* Sort routines want to know this address */
        !           913: 
        !           914:   text_base = data;
        !           915: 
        !           916:   /* Create the array of pointers to lines, with a default size frequently enough.  */
        !           917: 
        !           918:   lines = total / 50;
        !           919:   if (!lines) lines = 2;
        !           920:   linearray = (char **) xmalloc (lines * sizeof (char *));
        !           921: 
        !           922:   /* `nextline' points to the next free slot in this array.
        !           923:      `lines' is the allocated size.  */
        !           924: 
        !           925:   nextline = linearray;
        !           926: 
        !           927:   /* Parse the input file's data, and make entries for the lines.  */
        !           928: 
        !           929:   nextline = parsefile (infile, nextline, file_data, file_size);
        !           930:   if (nextline == 0)
        !           931:     {
        !           932:       error ("%s: not a texinfo index file", infile);
        !           933:       return;
        !           934:     }
        !           935: 
        !           936:   /* Sort the lines */
        !           937: 
        !           938:   /* If we have enough space, find the first keyfield of each line in advance.
        !           939:     Make a `struct lineinfo' for each line, which records the keyfield
        !           940:     as well as the line, and sort them.  */
        !           941: 
        !           942:   lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
        !           943: 
        !           944:   if (lineinfo)
        !           945:     {
        !           946:       struct lineinfo *lp;
        !           947:       char **p;
        !           948: 
        !           949:       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
        !           950:        {
        !           951:          lp->text = *p;
        !           952:          lp->key.text = find_field (keyfields, *p, &lp->keylen);
        !           953:          if (keyfields->numeric)
        !           954:            lp->key.number = find_value (lp->key.text, lp->keylen);
        !           955:        }
        !           956: 
        !           957:       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
        !           958: 
        !           959:       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
        !           960:        *p = lp->text;
        !           961: 
        !           962:       free (lineinfo);
        !           963:     }
        !           964:   else
        !           965:     qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
        !           966: 
        !           967:   /* Open the output file */
        !           968: 
        !           969:   if (outfile)
        !           970:     {
        !           971:       ostream = fopen (outfile, "w");
        !           972:       if (!ostream)
        !           973:        pfatal_with_name (outfile);
        !           974:     }
        !           975: 
        !           976:   writelines (linearray, nextline - linearray, ostream);
        !           977:   if (outfile) fclose (ostream);
        !           978: 
        !           979:   free (linearray);
        !           980:   free (data);
        !           981: }
        !           982: 
        !           983: /* Parse an input string in core into lines.
        !           984:    DATA is the input string, and SIZE is its length.
        !           985:    Data goes in LINEARRAY starting at NEXTLINE.
        !           986:    The value returned is the first entry in LINEARRAY still unused.
        !           987:    Value 0 means input file contents are invalid.  */
        !           988: 
        !           989: char **
        !           990: parsefile (filename, nextline, data, size)
        !           991:      char *filename;
        !           992:      char **nextline;
        !           993:      char *data;
        !           994:      long size;
        !           995: {
        !           996:   char *p, *end;
        !           997:   char **line = nextline;
        !           998: 
        !           999:   p = data;
        !          1000:   end = p + size;
        !          1001:   *end = 0;
        !          1002: 
        !          1003:   while (p != end)
        !          1004:     {
        !          1005:       if (p[0] != '\\')
        !          1006:        return 0;
        !          1007: 
        !          1008:       *line = p;
        !          1009:       while (*p && *p != '\n') p++;
        !          1010:       if (p != end) p++;
        !          1011: 
        !          1012:       line++;
        !          1013:       if (line == linearray + lines)
        !          1014:        {
        !          1015:          char **old = linearray;
        !          1016:          linearray = (char **) xrealloc (linearray, sizeof (char *) * (lines *= 4));
        !          1017:          line += linearray - old;
        !          1018:        }
        !          1019:     }
        !          1020: 
        !          1021:   return line;
        !          1022: }
        !          1023: 
        !          1024: /* Indexification is a filter applied to the sorted lines
        !          1025:  as they are being written to the output file.
        !          1026:  Multiple entries for the same name, with different page numbers,
        !          1027:  get combined into a single entry with multiple page numbers.
        !          1028:  The first braced field, which is used for sorting, is discarded.
        !          1029:  However, its first character is examined, folded to lower case,
        !          1030:  and if it is different from that in the previous line fed to us
        !          1031:  a \initial line is written with one argument, the new initial.
        !          1032: 
        !          1033:  If an entry has four braced fields, then the second and third
        !          1034:  constitute primary and secondary names.
        !          1035:  In this case, each change of primary name
        !          1036:  generates a \primary line which contains only the primary name,
        !          1037:  and in between these are \secondary lines which contain
        !          1038:  just a secondary name and page numbers.
        !          1039: */
        !          1040: 
        !          1041: /* The last primary name we wrote a \primary entry for.
        !          1042:  If only one level of indexing is being done, this is the last name seen */
        !          1043: char *lastprimary;
        !          1044: int lastprimarylength;  /* Length of storage allocated for lastprimary */
        !          1045: 
        !          1046: /* Similar, for the secondary name. */
        !          1047: char *lastsecondary;
        !          1048: int lastsecondarylength;
        !          1049: 
        !          1050: /* Zero if we are not in the middle of writing an entry.
        !          1051:  One if we have written the beginning of an entry but have not
        !          1052:   yet written any page numbers into it.
        !          1053:  Greater than one if we have written the beginning of an entry
        !          1054:   plus at least one page number. */
        !          1055: int pending;
        !          1056: 
        !          1057: /* The initial (for sorting purposes) of the last primary entry written.
        !          1058:  When this changes, a \initial {c} line is written */
        !          1059: 
        !          1060: char * lastinitial;
        !          1061: 
        !          1062: int lastinitiallength;
        !          1063: 
        !          1064: /* When we need a string of length 1 for the value of lastinitial,
        !          1065:    store it here.  */
        !          1066: 
        !          1067: char lastinitial1[2];
        !          1068: 
        !          1069: /* Initialize static storage for writing an index */
        !          1070: 
        !          1071: void
        !          1072: init_index ()
        !          1073: {
        !          1074:   pending = 0;
        !          1075:   lastinitial = lastinitial1;
        !          1076:   lastinitial1[0] = 0;
        !          1077:   lastinitial1[1] = 0;
        !          1078:   lastinitiallength = 0;
        !          1079:   lastprimarylength = 100;
        !          1080:   lastprimary = (char *) xmalloc (lastprimarylength + 1);
        !          1081:   bzero (lastprimary, lastprimarylength + 1);
        !          1082:   lastsecondarylength = 100;
        !          1083:   lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
        !          1084:   bzero (lastsecondary, lastsecondarylength + 1);
        !          1085: }
        !          1086: 
        !          1087: /* Indexify.  Merge entries for the same name,
        !          1088:  insert headers for each initial character, etc.  */
        !          1089: 
        !          1090: indexify (line, ostream)
        !          1091:      char *line;
        !          1092:      FILE *ostream;
        !          1093: {
        !          1094:   char *primary, *secondary, *pagenumber;
        !          1095:   int primarylength, secondarylength, pagelength;
        !          1096:   int len = strlen (line);
        !          1097:   int nosecondary;
        !          1098:   int initiallength;
        !          1099:   char *initial;
        !          1100:   char initial1[2];
        !          1101:   register char *p;
        !          1102: 
        !          1103:   /* First, analyze the parts of the entry fed to us this time */
        !          1104: 
        !          1105:   p = find_braced_pos (line, 0, 0, 0);
        !          1106:   if (*p == '{')
        !          1107:     {
        !          1108:       initial = p;
        !          1109:       /* Get length of inner pair of braces starting at p,
        !          1110:         including that inner pair of braces.  */
        !          1111:       initiallength = find_braced_end (p + 1) + 1 - p;
        !          1112:     }
        !          1113:   else
        !          1114:     {
        !          1115:       initial = initial1;
        !          1116:       initial1[0] = *p;
        !          1117:       initial1[1] = 0;
        !          1118:       initiallength = 1;
        !          1119: 
        !          1120:       if (initial1[0] >= 'a' && initial1[0] <= 'z')
        !          1121:        initial1[0] -= 040;
        !          1122:     }
        !          1123: 
        !          1124:   pagenumber = find_braced_pos (line, 1, 0, 0);
        !          1125:   pagelength = find_braced_end (pagenumber) - pagenumber;
        !          1126:   if (pagelength == 0)
        !          1127:     abort ();
        !          1128: 
        !          1129:   primary = find_braced_pos (line, 2, 0, 0);
        !          1130:   primarylength = find_braced_end (primary) - primary;
        !          1131: 
        !          1132:   secondary = find_braced_pos (line, 3, 0, 0);
        !          1133:   nosecondary = !*secondary;
        !          1134:   if (!nosecondary)
        !          1135:     secondarylength = find_braced_end (secondary) - secondary;
        !          1136: 
        !          1137:   /* If the primary is different from before, make a new primary entry */
        !          1138:   if (strncmp (primary, lastprimary, primarylength))
        !          1139:     {
        !          1140:       /* Close off current secondary entry first, if one is open */
        !          1141:       if (pending)
        !          1142:        {
        !          1143:          fputs ("}\n", ostream);
        !          1144:          pending = 0;
        !          1145:        }
        !          1146: 
        !          1147:       /* If this primary has a different initial, include an entry for the initial */
        !          1148:       if (initiallength != lastinitiallength ||
        !          1149:          strncmp (initial, lastinitial, initiallength))
        !          1150:        {
        !          1151:          fprintf (ostream, "\\initial {");
        !          1152:          fwrite (initial, 1, initiallength, ostream);
        !          1153:          fprintf (ostream, "}\n", initial);
        !          1154:          if (initial == initial1)
        !          1155:            {
        !          1156:              lastinitial = lastinitial1;
        !          1157:              *lastinitial1 = *initial1;
        !          1158:            }
        !          1159:          else
        !          1160:            {
        !          1161:              lastinitial = initial;
        !          1162:            }
        !          1163:          lastinitiallength = initiallength;
        !          1164:        }
        !          1165: 
        !          1166:       /* Make the entry for the primary.  */
        !          1167:       if (nosecondary)
        !          1168:        fputs ("\\entry {", ostream);
        !          1169:       else
        !          1170:        fputs ("\\primary {", ostream);
        !          1171:       fwrite (primary, primarylength, 1, ostream);
        !          1172:       if (nosecondary)
        !          1173:        {
        !          1174:          fputs ("}{", ostream);
        !          1175:          pending = 1;
        !          1176:        }
        !          1177:       else
        !          1178:        fputs ("}\n", ostream);
        !          1179: 
        !          1180:       /* Record name of most recent primary */
        !          1181:       if (lastprimarylength < primarylength)
        !          1182:        {
        !          1183:           lastprimarylength = primarylength + 100;
        !          1184:          lastprimary = (char *) xrealloc (lastprimary,
        !          1185:                                           1 + lastprimarylength);
        !          1186:        }
        !          1187:       strncpy (lastprimary, primary, primarylength);
        !          1188:       lastprimary[primarylength] = 0;
        !          1189: 
        !          1190:       /* There is no current secondary within this primary, now */
        !          1191:       lastsecondary[0] = 0;
        !          1192:     }
        !          1193: 
        !          1194:   /* Should not have an entry with no subtopic following one with a subtopic */
        !          1195: 
        !          1196:   if (nosecondary && *lastsecondary)
        !          1197:     error ("entry %s follows an entry with a secondary name", line);
        !          1198: 
        !          1199:   /* Start a new secondary entry if necessary */
        !          1200:   if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
        !          1201:     {
        !          1202:       if (pending)
        !          1203:        {
        !          1204:          fputs ("}\n", ostream);
        !          1205:          pending = 0;
        !          1206:        }
        !          1207: 
        !          1208:       /* Write the entry for the secondary.  */
        !          1209:       fputs ("\\secondary {", ostream);
        !          1210:       fwrite (secondary, secondarylength, 1, ostream);
        !          1211:       fputs ("}{", ostream);
        !          1212:       pending = 1;
        !          1213: 
        !          1214:       /* Record name of most recent secondary */
        !          1215:       if (lastsecondarylength < secondarylength)
        !          1216:        {
        !          1217:           lastsecondarylength = secondarylength + 100;
        !          1218:          lastsecondary = (char *) xrealloc (lastsecondary,
        !          1219:                                           1 + lastsecondarylength);
        !          1220:        }
        !          1221:       strncpy (lastsecondary, secondary, secondarylength);
        !          1222:       lastsecondary[secondarylength] = 0;
        !          1223:     }
        !          1224: 
        !          1225:   /* Here to add one more page number to the current entry */
        !          1226:   if (pending++ != 1)
        !          1227:     fputs (", ", ostream);     /* Punctuate first, if this is not the first */
        !          1228:   fwrite (pagenumber, pagelength, 1, ostream);
        !          1229: }
        !          1230: 
        !          1231: /* Close out any unfinished output entry */
        !          1232: 
        !          1233: void
        !          1234: finish_index (ostream)
        !          1235:      FILE *ostream;
        !          1236: {
        !          1237:   if (pending)
        !          1238:     fputs ("}\n", ostream);
        !          1239:   free (lastprimary);
        !          1240:   free (lastsecondary);
        !          1241: }
        !          1242: 
        !          1243: /* Copy the lines in the sorted order.
        !          1244:  Each line is copied out of the input file it was found in. */
        !          1245: 
        !          1246: void
        !          1247: writelines (linearray, nlines, ostream)
        !          1248:      char **linearray;
        !          1249:      int nlines;
        !          1250:      FILE *ostream;
        !          1251: {
        !          1252:   char **stop_line = linearray + nlines;
        !          1253:   char **next_line;
        !          1254: 
        !          1255:   init_index ();
        !          1256: 
        !          1257:   /* Output the text of the lines, and free the buffer space */
        !          1258: 
        !          1259:   for (next_line = linearray; next_line != stop_line; next_line++)
        !          1260:     {
        !          1261:       /* If -u was specified, output the line only if distinct from previous one.  */
        !          1262:       if (next_line == linearray
        !          1263:          /* Compare previous line with this one, using only the explicitly specd keyfields */
        !          1264:          || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
        !          1265:        {
        !          1266:          char *p = *next_line;
        !          1267:          char c;
        !          1268:          while ((c = *p++) && c != '\n');
        !          1269:          *(p-1) = 0;
        !          1270:          indexify (*next_line, ostream);
        !          1271:        }
        !          1272:     }
        !          1273: 
        !          1274:   finish_index (ostream);
        !          1275: }
        !          1276: 
        !          1277: /* Assume (and optionally verify) that each input file is sorted;
        !          1278:  merge them and output the result.
        !          1279:  Returns nonzero if any input file fails to be sorted.
        !          1280: 
        !          1281:  This is the high-level interface that can handle an unlimited number of files.  */
        !          1282: 
        !          1283: #define MAX_DIRECT_MERGE 10
        !          1284: 
        !          1285: int
        !          1286: merge_files (infiles, nfiles, outfile)
        !          1287:      char **infiles;
        !          1288:      int nfiles;
        !          1289:      char *outfile;
        !          1290: {
        !          1291:   char **tempfiles;
        !          1292:   int ntemps;
        !          1293:   int i;
        !          1294:   int value = 0;
        !          1295:   int start_tempcount = tempcount;
        !          1296: 
        !          1297:   if (nfiles <= MAX_DIRECT_MERGE)
        !          1298:     return merge_direct (infiles, nfiles, outfile);
        !          1299: 
        !          1300:   /* Merge groups of MAX_DIRECT_MERGE input files at a time,
        !          1301:      making a temporary file to hold each group's result.  */
        !          1302: 
        !          1303:   ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
        !          1304:   tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
        !          1305:   for (i = 0; i < ntemps; i++)
        !          1306:     {
        !          1307:       int nf = MAX_DIRECT_MERGE;
        !          1308:       if (i + 1 == ntemps)
        !          1309:        nf = nfiles - i * MAX_DIRECT_MERGE;
        !          1310:       tempfiles[i] = maketempname (++tempcount);
        !          1311:       value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
        !          1312:     }
        !          1313: 
        !          1314:   /* All temporary files that existed before are no longer needed
        !          1315:      since their contents have been merged into our new tempfiles.
        !          1316:      So delete them.  */
        !          1317:   flush_tempfiles (start_tempcount);
        !          1318: 
        !          1319:   /* Now merge the temporary files we created.  */
        !          1320: 
        !          1321:   merge_files (tempfiles, ntemps, outfile);
        !          1322: 
        !          1323:   free (tempfiles);
        !          1324: 
        !          1325:   return value;
        !          1326: }  
        !          1327: 
        !          1328: /* Assume (and optionally verify) that each input file is sorted;
        !          1329:  merge them and output the result.
        !          1330:  Returns nonzero if any input file fails to be sorted.
        !          1331: 
        !          1332:  This version of merging will not work if the number of
        !          1333:  input files gets too high.  Higher level functions
        !          1334:  use it only with a bounded number of input files.  */
        !          1335: 
        !          1336: int
        !          1337: merge_direct (infiles, nfiles, outfile)
        !          1338:      char **infiles;
        !          1339:      int nfiles;
        !          1340:      char *outfile;
        !          1341: {
        !          1342:   char **ip = infiles;
        !          1343:   struct linebuffer *lb1, *lb2;
        !          1344:   struct linebuffer **thisline, **prevline;
        !          1345:   FILE **streams;
        !          1346:   int i;
        !          1347:   int nleft;
        !          1348:   int lossage = 0;
        !          1349:   int *file_lossage;
        !          1350:   struct linebuffer *prev_out = 0;
        !          1351:   FILE *ostream = stdout;
        !          1352: 
        !          1353:   if (outfile)
        !          1354:     {
        !          1355:       ostream = fopen (outfile, "w");
        !          1356:     }
        !          1357:   if (!ostream) pfatal_with_name (outfile);
        !          1358: 
        !          1359:   init_index ();
        !          1360: 
        !          1361:   if (nfiles == 0)
        !          1362:     {
        !          1363:       if (outfile)
        !          1364:         fclose (ostream);
        !          1365:       return 0;
        !          1366:     }
        !          1367: 
        !          1368:   /* For each file, make two line buffers.
        !          1369:      Also, for each file, there is an element of `thisline'
        !          1370:      which points at any time to one of the file's two buffers,
        !          1371:      and an element of `prevline' which points to the other buffer.
        !          1372:      `thisline' is supposed to point to the next available line from the file,
        !          1373:      while `prevline' holds the last file line used,
        !          1374:      which is remembered so that we can verify that the file is properly sorted. */
        !          1375: 
        !          1376:   /* lb1 and lb2 contain one buffer each per file */
        !          1377:   lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
        !          1378:   lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
        !          1379: 
        !          1380:   /* thisline[i] points to the linebuffer holding the next available line in file i,
        !          1381:      or is zero if there are no lines left in that file.  */
        !          1382:   thisline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
        !          1383:   /* prevline[i] points to the linebuffer holding the last used line from file i.
        !          1384:      This is just for verifying that file i is properly sorted.  */
        !          1385:   prevline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
        !          1386:   /* streams[i] holds the input stream for file i.  */
        !          1387:   streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
        !          1388:   /* file_lossage[i] is nonzero if we already know file i is not properly sorted.  */
        !          1389:   file_lossage = (int *) xmalloc (nfiles * sizeof (int));
        !          1390: 
        !          1391:   /* Allocate and initialize all that storage */
        !          1392: 
        !          1393:   for (i = 0; i < nfiles; i++)
        !          1394:     {
        !          1395:       initbuffer (&lb1[i]);
        !          1396:       initbuffer (&lb2[i]);
        !          1397:       thisline[i] = &lb1[i];
        !          1398:       prevline[i] = &lb2[i];
        !          1399:       file_lossage[i] = 0;
        !          1400:       streams[i] = fopen (infiles[i], "r");
        !          1401:       if (!streams[i])
        !          1402:        pfatal_with_name (infiles[i]);
        !          1403: 
        !          1404:       readline (thisline[i], streams[i]);
        !          1405:     }
        !          1406: 
        !          1407:   /* Keep count of number of files not at eof */
        !          1408:   nleft = nfiles;
        !          1409: 
        !          1410:   while (nleft)
        !          1411:     {
        !          1412:       struct linebuffer *best = 0;
        !          1413:       struct linebuffer *exch;
        !          1414:       int bestfile = -1;
        !          1415:       int i;
        !          1416: 
        !          1417:       /* Look at the next avail line of each file; choose the least one.  */
        !          1418: 
        !          1419:       for (i = 0; i < nfiles; i++)
        !          1420:        {
        !          1421:          if (thisline[i] &&
        !          1422:              (!best ||
        !          1423:               0 < compare_general (best->buffer, thisline[i]->buffer,
        !          1424:                                    (long) bestfile, (long) i, num_keyfields)))
        !          1425:            {
        !          1426:              best = thisline[i];
        !          1427:              bestfile = i;
        !          1428:            }
        !          1429:        }
        !          1430: 
        !          1431:       /* Output that line, unless it matches the previous one and we don't want duplicates */
        !          1432: 
        !          1433:       if (!(prev_out &&
        !          1434:            !compare_general (prev_out->buffer, best->buffer, 0L, 1L, num_keyfields - 1)))
        !          1435:        indexify (best->buffer, ostream);
        !          1436:       prev_out = best;
        !          1437: 
        !          1438:       /* Now make the line the previous of its file, and fetch a new line from that file */
        !          1439: 
        !          1440:       exch = prevline[bestfile];
        !          1441:       prevline[bestfile] = thisline[bestfile];
        !          1442:       thisline[bestfile] = exch;
        !          1443: 
        !          1444:       while (1)
        !          1445:        {
        !          1446:          /* If the file has no more, mark it empty */
        !          1447: 
        !          1448:          if (feof (streams[bestfile]))
        !          1449:            {
        !          1450:              thisline[bestfile] = 0;
        !          1451:              nleft--;          /* Update the number of files still not empty */
        !          1452:              break;
        !          1453:            }
        !          1454:          readline (thisline[bestfile], streams[bestfile]);
        !          1455:          if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) break;
        !          1456:        }
        !          1457:     }
        !          1458: 
        !          1459:   finish_index (ostream);
        !          1460: 
        !          1461:   /* Free all storage and close all input streams */
        !          1462: 
        !          1463:   for (i = 0; i < nfiles; i++)
        !          1464:     {
        !          1465:       fclose (streams[i]);
        !          1466:       free (lb1[i].buffer);
        !          1467:       free (lb2[i].buffer);
        !          1468:     }
        !          1469:   free (file_lossage);
        !          1470:   free (lb1);
        !          1471:   free (lb2);
        !          1472:   free (thisline);
        !          1473:   free (prevline);
        !          1474:   free (streams);
        !          1475: 
        !          1476:   if (outfile)
        !          1477:     fclose (ostream);
        !          1478: 
        !          1479:   return lossage;
        !          1480: }
        !          1481: 
        !          1482: /* Print error message and exit.  */
        !          1483: 
        !          1484: fatal (s1, s2)
        !          1485:      char *s1, *s2;
        !          1486: {
        !          1487:   error (s1, s2);
        !          1488:   exit (EXIT_FATAL);
        !          1489: }
        !          1490: 
        !          1491: /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
        !          1492: 
        !          1493: error (s1, s2)
        !          1494:      char *s1, *s2;
        !          1495: {
        !          1496:   printf ("texindex: ");
        !          1497:   printf (s1, s2);
        !          1498:   printf ("\n");
        !          1499: }
        !          1500: 
        !          1501: perror_with_name (name)
        !          1502:      char *name;
        !          1503: {
        !          1504: #ifdef VMS
        !          1505: #include <errno.h>
        !          1506:   extern noshare int sys_nerr;
        !          1507:   extern noshare char *sys_errlist[];
        !          1508: #else
        !          1509:   extern int errno, sys_nerr;
        !          1510:   extern char *sys_errlist[];
        !          1511: #endif
        !          1512:   char *s;
        !          1513: 
        !          1514:   if (errno < sys_nerr)
        !          1515:     s = concat ("", sys_errlist[errno], " for %s");
        !          1516:   else
        !          1517:     s = "cannot open %s";
        !          1518:   error (s, name);
        !          1519: }
        !          1520: 
        !          1521: pfatal_with_name (name)
        !          1522:      char *name;
        !          1523: {
        !          1524:   extern int errno, sys_nerr;
        !          1525:   extern char *sys_errlist[];
        !          1526:   char *s;
        !          1527: 
        !          1528:   if (errno < sys_nerr)
        !          1529:     s = concat ("", sys_errlist[errno], " for %s");
        !          1530:   else
        !          1531:     s = "cannot open %s";
        !          1532:   fatal (s, name);
        !          1533: }
        !          1534: 
        !          1535: /* Return a newly-allocated string whose contents concatenate those of s1, s2, s3.  */
        !          1536: 
        !          1537: char *
        !          1538: concat (s1, s2, s3)
        !          1539:      char *s1, *s2, *s3;
        !          1540: {
        !          1541:   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
        !          1542:   char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
        !          1543: 
        !          1544:   strcpy (result, s1);
        !          1545:   strcpy (result + len1, s2);
        !          1546:   strcpy (result + len1 + len2, s3);
        !          1547:   *(result + len1 + len2 + len3) = 0;
        !          1548: 
        !          1549:   return result;
        !          1550: }
        !          1551: 
        !          1552: /* Like malloc but get fatal error if memory is exhausted.  */
        !          1553: 
        !          1554: int
        !          1555: xmalloc (size)
        !          1556:      int size;
        !          1557: {
        !          1558:   int result = malloc (size);
        !          1559:   if (!result)
        !          1560:     fatal ("virtual memory exhausted", 0);
        !          1561:   return result;
        !          1562: }
        !          1563: 
        !          1564: 
        !          1565: int
        !          1566: xrealloc (ptr, size)
        !          1567:      char *ptr;
        !          1568:      int size;
        !          1569: {
        !          1570:   int result = realloc (ptr, size);
        !          1571:   if (!result)
        !          1572:     fatal ("virtual memory exhausted");
        !          1573:   return result;
        !          1574: }
        !          1575: 
        !          1576: bzero (b, length)
        !          1577:      register char *b;
        !          1578:      register int length;
        !          1579: {
        !          1580: #ifdef VMS
        !          1581:   short zero = 0;
        !          1582:   long max_str = 65535;
        !          1583: 
        !          1584:   while (length > max_str) {
        !          1585:     (void) LIB$MOVC5 (&zero, &zero, &zero, &max_str, b);
        !          1586:     length -= max_str;
        !          1587:     b += max_str;
        !          1588:   }
        !          1589:   (void) LIB$MOVC5 (&zero, &zero, &zero, &length, b);
        !          1590: #else
        !          1591:   while (length-- > 0)
        !          1592:     *b++ = 0;
        !          1593: #endif /* not VMS */
        !          1594: }

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.