|
|
1.1 ! root 1: /* Prepare Tex index dribble output into an actual index. ! 2: Copyright (C) 1987 Free Software Foundation, Inc. ! 3: ! 4: This program is free software; you can redistribute it and/or modify ! 5: it under the terms of the GNU General Public License as published by ! 6: the Free Software Foundation; either version 1, or (at your option) ! 7: any later version. ! 8: ! 9: This program is distributed in the hope that it will be useful, ! 10: but WITHOUT ANY WARRANTY; without even the implied warranty of ! 11: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! 12: GNU General Public License for more details. ! 13: ! 14: You should have received a copy of the GNU General Public License ! 15: along with this program; if not, write to the Free Software ! 16: Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ! 17: ! 18: In other words, you are welcome to use, share and improve this program. ! 19: You are forbidden to forbid anyone else to use, share and improve ! 20: what you give them. Help stamp out software-hoarding! */ ! 21: ! 22: ! 23: #include <stdio.h> ! 24: #include <ctype.h> ! 25: ! 26: #ifdef VMS ! 27: #include <file.h> ! 28: ! 29: #define EXIT_SUCCESS ((1 << 28) | 1) ! 30: #define EXIT_FATAL ((1 << 28) | 4) ! 31: #define unlink delete ! 32: #define tell(fd) lseek(fd, 0L, 1) ! 33: #else ! 34: #include <sys/types.h> ! 35: #include <sys/fcntl.h> ! 36: #include <sys/file.h> ! 37: ! 38: #define EXIT_SUCCESS 0 ! 39: #define EXIT_FATAL 1 ! 40: #endif ! 41: ! 42: ! 43: #ifndef L_XTND ! 44: #define L_XTND 2 ! 45: #endif ! 46: ! 47: /* When sorting in core, this structure describes one line ! 48: and the position and length of its first keyfield. */ ! 49: ! 50: struct lineinfo ! 51: { ! 52: char *text; /* The actual text of the line */ ! 53: union ! 54: { /* The start of the key (for textual comparison) */ ! 55: char *text; ! 56: long number; /* or the numeric value (for numeric comparison) */ ! 57: } key; ! 58: long keylen; /* Length of key field */ ! 59: }; ! 60: ! 61: /* This structure describes a field to use as a sort key */ ! 62: ! 63: struct keyfield ! 64: { ! 65: int startwords; /* # words to skip */ ! 66: int startchars; /* and # additional chars to skip, to start of field */ ! 67: int endwords; /* similar, from beg (or end) of line, to find end of field */ ! 68: int endchars; ! 69: char ignore_blanks; /* Ignore spaces and tabs within the field */ ! 70: char fold_case; /* Convert upper case to lower before comparing */ ! 71: char reverse; /* Compare in reverse order */ ! 72: char numeric; /* Parse text as an integer and compare the integers */ ! 73: char positional; /* Sort according to position within the file */ ! 74: char braced; /* Count balanced-braced groupings as fields */ ! 75: }; ! 76: ! 77: /* Vector of keyfields to use */ ! 78: ! 79: struct keyfield keyfields[3]; ! 80: ! 81: /* Number of keyfields stored in that vector. */ ! 82: ! 83: int num_keyfields = 3; ! 84: ! 85: /* Vector of input file names, terminated with a zero (null pointer) */ ! 86: ! 87: char **infiles; ! 88: ! 89: /* Vector of corresponding output file names, or zero meaning default it */ ! 90: ! 91: char **outfiles; ! 92: ! 93: /* Length of `infiles' */ ! 94: ! 95: int num_infiles; ! 96: ! 97: /* Pointer to the array of pointers to lines being sorted */ ! 98: ! 99: char **linearray; ! 100: ! 101: /* The allocated length of `linearray'. */ ! 102: ! 103: long lines; ! 104: ! 105: /* Directory to use for temporary files. On Unix, it ends with a slash. */ ! 106: ! 107: char *tempdir; ! 108: ! 109: /* Start of filename to use for temporary files. */ ! 110: ! 111: char *tempbase; ! 112: ! 113: /* Number of last temporary file. */ ! 114: ! 115: int tempcount; ! 116: ! 117: /* Number of last temporary file already deleted. ! 118: Temporary files are deleted by `flush_tempfiles' in order of creation. */ ! 119: ! 120: int last_deleted_tempcount; ! 121: ! 122: /* During in-core sort, this points to the base of the data block ! 123: which contains all the lines of data. */ ! 124: ! 125: char *text_base; ! 126: ! 127: /* Additional command switches */ ! 128: ! 129: int keep_tempfiles; /* Nonzero means do not delete tempfiles -- for debugging */ ! 130: ! 131: /* Forward declarations of functions in this file */ ! 132: ! 133: void decode_command (); ! 134: void sort_in_core (); ! 135: void sort_offline (); ! 136: char **parsefile (); ! 137: char *find_field (); ! 138: char *find_pos (); ! 139: long find_value (); ! 140: char *find_braced_pos (); ! 141: char *find_braced_end (); ! 142: void writelines (); ! 143: int compare_full (); ! 144: long readline (); ! 145: int merge_files (); ! 146: int merge_direct (); ! 147: char *concat (); ! 148: char *maketempname (); ! 149: void flush_tempfiles (); ! 150: char *tempcopy (); ! 151: ! 152: extern char *mktemp (); ! 153: ! 154: #define MAX_IN_CORE_SORT 500000 ! 155: ! 156: int ! 157: main (argc, argv) ! 158: int argc; ! 159: char **argv; ! 160: { ! 161: int i; ! 162: ! 163: tempcount = 0; ! 164: last_deleted_tempcount = 0; ! 165: ! 166: /* Describe the kind of sorting to do. */ ! 167: /* The first keyfield uses the first braced field and folds case */ ! 168: keyfields[0].braced = 1; ! 169: keyfields[0].fold_case = 1; ! 170: keyfields[0].endwords = -1; ! 171: keyfields[0].endchars = -1; ! 172: /* The second keyfield uses the second braced field, numerically */ ! 173: keyfields[1].braced = 1; ! 174: keyfields[1].numeric = 1; ! 175: keyfields[1].startwords = 1; ! 176: keyfields[1].endwords = -1; ! 177: keyfields[1].endchars = -1; ! 178: /* The third keyfield (which is ignored while discarding duplicates) ! 179: compares the whole line */ ! 180: keyfields[2].endwords = -1; ! 181: keyfields[2].endchars = -1; ! 182: ! 183: decode_command (argc, argv); ! 184: ! 185: tempbase = mktemp (concat ("txiXXXXXX", "", "")); ! 186: ! 187: /* Process input files completely, one by one. */ ! 188: ! 189: for (i = 0; i < num_infiles; i++) ! 190: { ! 191: int desc; ! 192: long ptr; ! 193: char *outfile; ! 194: char *p; ! 195: ! 196: desc = open (infiles[i], 0, 0); ! 197: if (desc < 0) pfatal_with_name (infiles[i]); ! 198: lseek (desc, 0, L_XTND); ! 199: ptr = tell (desc); ! 200: close (desc); ! 201: ! 202: outfile = outfiles[i]; ! 203: if (!outfile) ! 204: { ! 205: outfile = concat (infiles[i], "s", ""); ! 206: } ! 207: ! 208: if (ptr < MAX_IN_CORE_SORT) ! 209: /* Sort a small amount of data */ ! 210: sort_in_core (infiles[i], ptr, outfile); ! 211: else ! 212: sort_offline (infiles[i], ptr, outfile); ! 213: } ! 214: ! 215: flush_tempfiles (tempcount); ! 216: exit (EXIT_SUCCESS); ! 217: } ! 218: ! 219: /* This page decodes the command line arguments to set the parameter variables ! 220: and set up the vector of keyfields and the vector of input files */ ! 221: ! 222: void ! 223: decode_command (argc, argv) ! 224: int argc; ! 225: char **argv; ! 226: { ! 227: int i; ! 228: char **ip; ! 229: char **op; ! 230: ! 231: /* Store default values into parameter variables */ ! 232: ! 233: #ifdef VMS ! 234: tempdir = "sys$scratch:"; ! 235: #else ! 236: tempdir = "/tmp/"; ! 237: #endif ! 238: ! 239: keep_tempfiles = 0; ! 240: ! 241: /* Allocate argc input files, which must be enough. */ ! 242: ! 243: infiles = (char **) xmalloc (argc * sizeof (char *)); ! 244: outfiles = (char **) xmalloc (argc * sizeof (char *)); ! 245: ip = infiles; ! 246: op = outfiles; ! 247: ! 248: /* First find all switches that control the default kind-of-sort */ ! 249: ! 250: for (i = 1; i < argc; i++) ! 251: { ! 252: int tem = classify_arg (argv[i]); ! 253: char c; ! 254: char *p; ! 255: ! 256: if (tem <= 0) ! 257: { ! 258: *ip++ = argv[i]; ! 259: *op++ = 0; ! 260: continue; ! 261: } ! 262: if (tem > 1) ! 263: { ! 264: if (i + 1 == argc) ! 265: fatal ("switch %s given with no argument following it", argv[i]); ! 266: else if (!strcmp (argv[i], "-T")) ! 267: tempdir = argv[i + 1]; ! 268: else if (!strcmp (argv[i], "-o")) ! 269: *(op - 1) = argv[i + 1]; ! 270: i += tem - 1; ! 271: continue; ! 272: } ! 273: ! 274: p = &argv[i][1]; ! 275: while (c = *p++) ! 276: switch (c) ! 277: { ! 278: case 'k': ! 279: keep_tempfiles = 1; ! 280: break; ! 281: ! 282: default: ! 283: fatal ("invalid command switch %c", c); ! 284: } ! 285: switchdone: ; ! 286: } ! 287: ! 288: /* Record number of keyfields, terminate list of filenames */ ! 289: ! 290: num_infiles = ip - infiles; ! 291: *ip = 0; ! 292: } ! 293: ! 294: /* Return 0 for an argument that is not a switch; ! 295: for a switch, return 1 plus the number of following arguments that the switch swallows. ! 296: */ ! 297: ! 298: int ! 299: classify_arg (arg) ! 300: char *arg; ! 301: { ! 302: if (!strcmp (arg, "-T") || !strcmp (arg, "-o")) ! 303: return 2; ! 304: if (arg[0] == '-') ! 305: return 1; ! 306: return 0; ! 307: } ! 308: ! 309: /* Create a name for a temporary file */ ! 310: ! 311: char * ! 312: maketempname (count) ! 313: int count; ! 314: { ! 315: char tempsuffix[10]; ! 316: sprintf (tempsuffix, "%d", count); ! 317: return concat (tempdir, tempbase, tempsuffix); ! 318: } ! 319: ! 320: /* Delete all temporary files up to the specified count */ ! 321: ! 322: void ! 323: flush_tempfiles (to_count) ! 324: int to_count; ! 325: { ! 326: if (keep_tempfiles) return; ! 327: while (last_deleted_tempcount < to_count) ! 328: unlink (maketempname (++last_deleted_tempcount)); ! 329: } ! 330: ! 331: /* Copy an input file into a temporary file, and return the temporary file name */ ! 332: ! 333: #define BUFSIZE 1024 ! 334: ! 335: char * ! 336: tempcopy (idesc) ! 337: int idesc; ! 338: { ! 339: char *outfile = maketempname (++tempcount); ! 340: int odesc; ! 341: char buffer[BUFSIZE]; ! 342: ! 343: odesc = open (outfile, O_WRONLY | O_CREAT, 0666); ! 344: ! 345: if (odesc < 0) pfatal_with_name (outfile); ! 346: ! 347: while (1) ! 348: { ! 349: int nread = read (idesc, buffer, BUFSIZE); ! 350: write (odesc, buffer, nread); ! 351: if (!nread) break; ! 352: } ! 353: ! 354: close (odesc); ! 355: ! 356: return outfile; ! 357: } ! 358: ! 359: /* Compare two lines, provided as pointers to pointers to text, ! 360: according to the specified set of keyfields */ ! 361: ! 362: int ! 363: compare_full (line1, line2) ! 364: char **line1, **line2; ! 365: { ! 366: int i; ! 367: ! 368: /* Compare using the first keyfield; ! 369: if that does not distinguish the lines, try the second keyfield; and so on. */ ! 370: ! 371: for (i = 0; i < num_keyfields; i++) ! 372: { ! 373: long length1, length2; ! 374: char *start1 = find_field (&keyfields[i], *line1, &length1); ! 375: char *start2 = find_field (&keyfields[i], *line2, &length2); ! 376: int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base, ! 377: start2, length2, *line2 - text_base); ! 378: if (tem) ! 379: { ! 380: if (keyfields[i].reverse) ! 381: return - tem; ! 382: return tem; ! 383: } ! 384: } ! 385: ! 386: return 0; /* Lines match exactly */ ! 387: } ! 388: ! 389: /* Compare two lines described by structures ! 390: in which the first keyfield is identified in advance. ! 391: For positional sorting, assumes that the order of the lines in core ! 392: reflects their nominal order. */ ! 393: ! 394: int ! 395: compare_prepared (line1, line2) ! 396: struct lineinfo *line1, *line2; ! 397: { ! 398: int i; ! 399: int tem; ! 400: char *text1, *text2; ! 401: ! 402: /* Compare using the first keyfield, which has been found for us already */ ! 403: if (keyfields->positional) ! 404: { ! 405: if (line1->text - text_base > line2->text - text_base) ! 406: tem = 1; ! 407: else ! 408: tem = -1; ! 409: } ! 410: else if (keyfields->numeric) ! 411: tem = line1->key.number - line2->key.number; ! 412: else ! 413: tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, line2->key.text, line2->keylen, 0); ! 414: if (tem) ! 415: { ! 416: if (keyfields->reverse) ! 417: return - tem; ! 418: return tem; ! 419: } ! 420: ! 421: text1 = line1->text; ! 422: text2 = line2->text; ! 423: ! 424: /* Compare using the second keyfield; ! 425: if that does not distinguish the lines, try the third keyfield; and so on. */ ! 426: ! 427: for (i = 1; i < num_keyfields; i++) ! 428: { ! 429: long length1, length2; ! 430: char *start1 = find_field (&keyfields[i], text1, &length1); ! 431: char *start2 = find_field (&keyfields[i], text2, &length2); ! 432: int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base, ! 433: start2, length2, text2 - text_base); ! 434: if (tem) ! 435: { ! 436: if (keyfields[i].reverse) ! 437: return - tem; ! 438: return tem; ! 439: } ! 440: } ! 441: ! 442: return 0; /* Lines match exactly */ ! 443: } ! 444: ! 445: /* Like compare_full but more general. ! 446: You can pass any strings, and you can say how many keyfields to use. ! 447: `pos1' and `pos2' should indicate the nominal positional ordering of ! 448: the two lines in the input. */ ! 449: ! 450: int ! 451: compare_general (str1, str2, pos1, pos2, use_keyfields) ! 452: char *str1, *str2; ! 453: long pos1, pos2; ! 454: int use_keyfields; ! 455: { ! 456: int i; ! 457: ! 458: /* Compare using the first keyfield; ! 459: if that does not distinguish the lines, try the second keyfield; and so on. */ ! 460: ! 461: for (i = 0; i < use_keyfields; i++) ! 462: { ! 463: long length1, length2; ! 464: char *start1 = find_field (&keyfields[i], str1, &length1); ! 465: char *start2 = find_field (&keyfields[i], str2, &length2); ! 466: int tem = compare_field (&keyfields[i], start1, length1, pos1, start2, length2, pos2); ! 467: if (tem) ! 468: { ! 469: if (keyfields[i].reverse) ! 470: return - tem; ! 471: return tem; ! 472: } ! 473: } ! 474: ! 475: return 0; /* Lines match exactly */ ! 476: } ! 477: ! 478: /* Find the start and length of a field in `str' according to `keyfield'. ! 479: A pointer to the starting character is returned, and the length ! 480: is stored into the int that `lengthptr' points to. */ ! 481: ! 482: char * ! 483: find_field (keyfield, str, lengthptr) ! 484: struct keyfield *keyfield; ! 485: char *str; ! 486: long *lengthptr; ! 487: { ! 488: char *start; ! 489: char *end; ! 490: char *(*fun) (); ! 491: ! 492: if (keyfield->braced) fun = find_braced_pos; ! 493: else fun = find_pos; ! 494: ! 495: start = ( *fun )(str, keyfield->startwords, keyfield->startchars, ! 496: keyfield->ignore_blanks); ! 497: if (keyfield->endwords < 0) ! 498: { ! 499: if (keyfield->braced) ! 500: end = find_braced_end (start); ! 501: else ! 502: { ! 503: end = start; ! 504: while (*end && *end != '\n') end++; ! 505: } ! 506: } ! 507: else ! 508: { ! 509: end = ( *fun )(str, keyfield->endwords, keyfield->endchars, 0); ! 510: if (end - str < start - str) end = start; ! 511: } ! 512: *lengthptr = end - start; ! 513: return start; ! 514: } ! 515: ! 516: /* Find a pointer to a specified place within `str', ! 517: skipping (from the beginning) `words' words and then `chars' chars. ! 518: If `ignore_blanks' is nonzero, we skip all blanks ! 519: after finding the specified word. */ ! 520: ! 521: char * ! 522: find_pos (str, words, chars, ignore_blanks) ! 523: char *str; ! 524: int words, chars; ! 525: int ignore_blanks; ! 526: { ! 527: int i; ! 528: char *p = str; ! 529: ! 530: for (i = 0; i < words; i++) ! 531: { ! 532: char c; ! 533: /* Find next bunch of nonblanks and skip them. */ ! 534: while ((c = *p) == ' ' || c == '\t') p++; ! 535: while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) p++; ! 536: if (!*p || *p == '\n') return p; ! 537: } ! 538: ! 539: while (*p == ' ' || *p == '\t') p++; ! 540: ! 541: for (i = 0; i < chars; i++) ! 542: { ! 543: if (!*p || *p == '\n') break; ! 544: p++; ! 545: } ! 546: return p; ! 547: } ! 548: ! 549: /* Like find_pos but assumes that each field is surrounded by braces ! 550: and that braces within fields are balanced. */ ! 551: ! 552: char * ! 553: find_braced_pos (str, words, chars, ignore_blanks) ! 554: char *str; ! 555: int words, chars; ! 556: int ignore_blanks; ! 557: { ! 558: int i; ! 559: int bracelevel; ! 560: char *p = str; ! 561: char c; ! 562: ! 563: for (i = 0; i < words; i++) ! 564: { ! 565: bracelevel = 1; ! 566: while ((c = *p++) != '{' && c != '\n' && c); ! 567: if (c != '{') ! 568: return p - 1; ! 569: while (bracelevel) ! 570: { ! 571: c = *p++; ! 572: if (c == '{') bracelevel++; ! 573: if (c == '}') bracelevel--; ! 574: if (c == '\\') c = *p++; /* \ quotes braces and \ */ ! 575: if (c == 0 || c == '\n') return p-1; ! 576: } ! 577: } ! 578: ! 579: while ((c = *p++) != '{' && c != '\n' && c); ! 580: ! 581: if (c != '{') ! 582: return p-1; ! 583: ! 584: if (ignore_blanks) ! 585: while ((c = *p) == ' ' || c == '\t') p++; ! 586: ! 587: for (i = 0; i < chars; i++) ! 588: { ! 589: if (!*p || *p == '\n') break; ! 590: p++; ! 591: } ! 592: return p; ! 593: } ! 594: ! 595: /* Find the end of the balanced-brace field which starts at `str'. ! 596: The position returned is just before the closing brace. */ ! 597: ! 598: char * ! 599: find_braced_end (str) ! 600: char *str; ! 601: { ! 602: int bracelevel; ! 603: char *p = str; ! 604: char c; ! 605: ! 606: bracelevel = 1; ! 607: while (bracelevel) ! 608: { ! 609: c = *p++; ! 610: if (c == '{') bracelevel++; ! 611: if (c == '}') bracelevel--; ! 612: if (c == '\\') c = *p++; ! 613: if (c == 0 || c == '\n') return p-1; ! 614: } ! 615: return p - 1; ! 616: } ! 617: ! 618: long ! 619: find_value (start, length) ! 620: char *start; ! 621: long length; ! 622: { ! 623: while (length != 0L) { ! 624: if (isdigit(*start)) ! 625: return atol(start); ! 626: length--; ! 627: start++; ! 628: } ! 629: return 0l; ! 630: } ! 631: ! 632: /* Vector used to translate characters for comparison. ! 633: This is how we make all alphanumerics follow all else, ! 634: and ignore case in the first sorting. */ ! 635: int char_order[256]; ! 636: ! 637: init_char_order () ! 638: { ! 639: int i; ! 640: for (i = 1; i < 256; i++) ! 641: char_order[i] = i; ! 642: ! 643: for (i = '0'; i <= '9'; i++) ! 644: char_order[i] += 512; ! 645: ! 646: for (i = 'a'; i <= 'z'; i++) { ! 647: char_order[i] = 512 + i; ! 648: char_order[i + 'A' - 'a'] = 512 + i; ! 649: } ! 650: } ! 651: ! 652: /* Compare two fields (each specified as a start pointer and a character count) ! 653: according to `keyfield'. The sign of the value reports the relation between the fields */ ! 654: ! 655: int ! 656: compare_field (keyfield, start1, length1, pos1, start2, length2, pos2) ! 657: struct keyfield *keyfield; ! 658: char *start1; ! 659: long length1; ! 660: long pos1; ! 661: char *start2; ! 662: long length2; ! 663: long pos2; ! 664: { ! 665: if (keyfields->positional) ! 666: { ! 667: if (pos1 > pos2) ! 668: return 1; ! 669: else ! 670: return -1; ! 671: } ! 672: if (keyfield->numeric) ! 673: { ! 674: long value = find_value (start1, length1) - find_value (start2, length2); ! 675: if (value > 0) return 1; ! 676: if (value < 0) return -1; ! 677: return 0; ! 678: } ! 679: else ! 680: { ! 681: char *p1 = start1; ! 682: char *p2 = start2; ! 683: char *e1 = start1 + length1; ! 684: char *e2 = start2 + length2; ! 685: ! 686: int fold_case = keyfield->fold_case; ! 687: ! 688: while (1) ! 689: { ! 690: int c1, c2; ! 691: ! 692: if (p1 == e1) c1 = 0; ! 693: else c1 = *p1++; ! 694: if (p2 == e2) c2 = 0; ! 695: else c2 = *p2++; ! 696: ! 697: if (char_order[c1] != char_order[c2]) ! 698: return char_order[c1] - char_order[c2]; ! 699: if (!c1) break; ! 700: } ! 701: ! 702: /* Strings are equal except possibly for case. */ ! 703: p1 = start1; ! 704: p2 = start2; ! 705: while (1) ! 706: { ! 707: int c1, c2; ! 708: ! 709: if (p1 == e1) c1 = 0; ! 710: else c1 = *p1++; ! 711: if (p2 == e2) c2 = 0; ! 712: else c2 = *p2++; ! 713: ! 714: if (c1 != c2) ! 715: /* Reverse sign here so upper case comes out last. */ ! 716: return c2 - c1; ! 717: if (!c1) break; ! 718: } ! 719: ! 720: return 0; ! 721: } ! 722: } ! 723: ! 724: /* A `struct linebuffer' is a structure which holds a line of text. ! 725: `readline' reads a line from a stream into a linebuffer ! 726: and works regardless of the length of the line. */ ! 727: ! 728: struct linebuffer ! 729: { ! 730: long size; ! 731: char *buffer; ! 732: }; ! 733: ! 734: /* Initialize a linebuffer for use */ ! 735: ! 736: void ! 737: initbuffer (linebuffer) ! 738: struct linebuffer *linebuffer; ! 739: { ! 740: linebuffer->size = 200; ! 741: linebuffer->buffer = (char *) xmalloc (200); ! 742: } ! 743: ! 744: /* Read a line of text from `stream' into `linebuffer'. ! 745: Return the length of the line. */ ! 746: ! 747: long ! 748: readline (linebuffer, stream) ! 749: struct linebuffer *linebuffer; ! 750: FILE *stream; ! 751: { ! 752: char *buffer = linebuffer->buffer; ! 753: char *p = linebuffer->buffer; ! 754: char *end = p + linebuffer->size; ! 755: ! 756: while (1) ! 757: { ! 758: int c = getc (stream); ! 759: if (p == end) ! 760: { ! 761: buffer = (char *) xrealloc (buffer, linebuffer->size *= 2); ! 762: p += buffer - linebuffer->buffer; ! 763: end += buffer - linebuffer->buffer; ! 764: linebuffer->buffer = buffer; ! 765: } ! 766: if (c < 0 || c == '\n') ! 767: { ! 768: *p = 0; ! 769: break; ! 770: } ! 771: *p++ = c; ! 772: } ! 773: ! 774: return p - buffer; ! 775: } ! 776: ! 777: /* Sort an input file too big to sort in core. */ ! 778: ! 779: void ! 780: sort_offline (infile, nfiles, total, outfile) ! 781: char *infile; ! 782: long total; ! 783: char *outfile; ! 784: { ! 785: int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; /* More than enough */ ! 786: char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); ! 787: FILE *istream = fopen (infile, "r"); ! 788: int i; ! 789: struct linebuffer lb; ! 790: long linelength; ! 791: int failure = 0; ! 792: ! 793: initbuffer (&lb); ! 794: ! 795: /* Read in one line of input data. */ ! 796: ! 797: linelength = readline (&lb, istream); ! 798: ! 799: if (lb.buffer[0] != '\\') ! 800: { ! 801: error ("%s: not a texinfo index file", infile); ! 802: return; ! 803: } ! 804: ! 805: /* Split up the input into `ntemps' temporary files, or maybe fewer, ! 806: and put the new files' names into `tempfiles' */ ! 807: ! 808: for (i = 0; i < ntemps; i++) ! 809: { ! 810: char *outname = maketempname (++tempcount); ! 811: FILE *ostream = fopen (outname, "w"); ! 812: long tempsize = 0; ! 813: ! 814: if (!ostream) pfatal_with_name (outname); ! 815: tempfiles[i] = outname; ! 816: ! 817: /* Copy lines into this temp file as long as it does not make file "too big" ! 818: or until there are no more lines. */ ! 819: ! 820: while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT) ! 821: { ! 822: tempsize += linelength + 1; ! 823: fputs (lb.buffer, ostream); ! 824: putc ('\n', ostream); ! 825: ! 826: /* Read another line of input data. */ ! 827: ! 828: linelength = readline (&lb, istream); ! 829: if (!linelength && feof (istream)) break; ! 830: ! 831: if (lb.buffer[0] != '\\') ! 832: { ! 833: error ("%s: not a texinfo index file", infile); ! 834: failure = 1; ! 835: goto fail; ! 836: } ! 837: } ! 838: fclose (ostream); ! 839: if (feof (istream)) break; ! 840: } ! 841: ! 842: free (lb.buffer); ! 843: ! 844: fail: ! 845: /* Record number of temp files we actually needed. */ ! 846: ! 847: ntemps = i; ! 848: ! 849: /* Sort each tempfile into another tempfile. ! 850: Delete the first set of tempfiles and put the names of the second into `tempfiles' */ ! 851: ! 852: for (i = 0; i < ntemps; i++) ! 853: { ! 854: char *newtemp = maketempname (++tempcount); ! 855: sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp); ! 856: if (!keep_tempfiles) ! 857: unlink (tempfiles[i]); ! 858: tempfiles[i] = newtemp; ! 859: } ! 860: ! 861: if (failure) ! 862: return; ! 863: ! 864: /* Merge the tempfiles together and indexify */ ! 865: ! 866: merge_files (tempfiles, ntemps, outfile); ! 867: } ! 868: ! 869: /* Sort `infile', whose size is `total', ! 870: assuming that is small enough to be done in-core, ! 871: then indexify it and send the output to `outfile' (or to stdout). */ ! 872: ! 873: void ! 874: sort_in_core (infile, total, outfile) ! 875: char *infile; ! 876: long total; ! 877: char *outfile; ! 878: { ! 879: char **nextline; ! 880: char *data = (char *) xmalloc (total + 1); ! 881: char *file_data; ! 882: long file_size; ! 883: int i; ! 884: FILE *ostream = stdout; ! 885: struct lineinfo *lineinfo; ! 886: ! 887: /* Read the contents of the file into the moby array `data' */ ! 888: ! 889: int desc = open (infile, 0, 0); ! 890: ! 891: if (desc < 0) ! 892: fatal ("failure reopening %s", infile); ! 893: for (file_size = 0; ; ) ! 894: { ! 895: if ((i = read (desc, data + file_size, total - file_size)) <= 0) ! 896: break; ! 897: file_size += i; ! 898: } ! 899: file_data = data; ! 900: data[file_size] = 0; ! 901: ! 902: close (desc); ! 903: ! 904: if (file_size > 0 && data[0] != '\\') ! 905: { ! 906: error ("%s: not a texinfo index file", infile); ! 907: return; ! 908: } ! 909: ! 910: init_char_order (); ! 911: ! 912: /* Sort routines want to know this address */ ! 913: ! 914: text_base = data; ! 915: ! 916: /* Create the array of pointers to lines, with a default size frequently enough. */ ! 917: ! 918: lines = total / 50; ! 919: if (!lines) lines = 2; ! 920: linearray = (char **) xmalloc (lines * sizeof (char *)); ! 921: ! 922: /* `nextline' points to the next free slot in this array. ! 923: `lines' is the allocated size. */ ! 924: ! 925: nextline = linearray; ! 926: ! 927: /* Parse the input file's data, and make entries for the lines. */ ! 928: ! 929: nextline = parsefile (infile, nextline, file_data, file_size); ! 930: if (nextline == 0) ! 931: { ! 932: error ("%s: not a texinfo index file", infile); ! 933: return; ! 934: } ! 935: ! 936: /* Sort the lines */ ! 937: ! 938: /* If we have enough space, find the first keyfield of each line in advance. ! 939: Make a `struct lineinfo' for each line, which records the keyfield ! 940: as well as the line, and sort them. */ ! 941: ! 942: lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo)); ! 943: ! 944: if (lineinfo) ! 945: { ! 946: struct lineinfo *lp; ! 947: char **p; ! 948: ! 949: for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) ! 950: { ! 951: lp->text = *p; ! 952: lp->key.text = find_field (keyfields, *p, &lp->keylen); ! 953: if (keyfields->numeric) ! 954: lp->key.number = find_value (lp->key.text, lp->keylen); ! 955: } ! 956: ! 957: qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared); ! 958: ! 959: for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) ! 960: *p = lp->text; ! 961: ! 962: free (lineinfo); ! 963: } ! 964: else ! 965: qsort (linearray, nextline - linearray, sizeof (char *), compare_full); ! 966: ! 967: /* Open the output file */ ! 968: ! 969: if (outfile) ! 970: { ! 971: ostream = fopen (outfile, "w"); ! 972: if (!ostream) ! 973: pfatal_with_name (outfile); ! 974: } ! 975: ! 976: writelines (linearray, nextline - linearray, ostream); ! 977: if (outfile) fclose (ostream); ! 978: ! 979: free (linearray); ! 980: free (data); ! 981: } ! 982: ! 983: /* Parse an input string in core into lines. ! 984: DATA is the input string, and SIZE is its length. ! 985: Data goes in LINEARRAY starting at NEXTLINE. ! 986: The value returned is the first entry in LINEARRAY still unused. ! 987: Value 0 means input file contents are invalid. */ ! 988: ! 989: char ** ! 990: parsefile (filename, nextline, data, size) ! 991: char *filename; ! 992: char **nextline; ! 993: char *data; ! 994: long size; ! 995: { ! 996: char *p, *end; ! 997: char **line = nextline; ! 998: ! 999: p = data; ! 1000: end = p + size; ! 1001: *end = 0; ! 1002: ! 1003: while (p != end) ! 1004: { ! 1005: if (p[0] != '\\') ! 1006: return 0; ! 1007: ! 1008: *line = p; ! 1009: while (*p && *p != '\n') p++; ! 1010: if (p != end) p++; ! 1011: ! 1012: line++; ! 1013: if (line == linearray + lines) ! 1014: { ! 1015: char **old = linearray; ! 1016: linearray = (char **) xrealloc (linearray, sizeof (char *) * (lines *= 4)); ! 1017: line += linearray - old; ! 1018: } ! 1019: } ! 1020: ! 1021: return line; ! 1022: } ! 1023: ! 1024: /* Indexification is a filter applied to the sorted lines ! 1025: as they are being written to the output file. ! 1026: Multiple entries for the same name, with different page numbers, ! 1027: get combined into a single entry with multiple page numbers. ! 1028: The first braced field, which is used for sorting, is discarded. ! 1029: However, its first character is examined, folded to lower case, ! 1030: and if it is different from that in the previous line fed to us ! 1031: a \initial line is written with one argument, the new initial. ! 1032: ! 1033: If an entry has four braced fields, then the second and third ! 1034: constitute primary and secondary names. ! 1035: In this case, each change of primary name ! 1036: generates a \primary line which contains only the primary name, ! 1037: and in between these are \secondary lines which contain ! 1038: just a secondary name and page numbers. ! 1039: */ ! 1040: ! 1041: /* The last primary name we wrote a \primary entry for. ! 1042: If only one level of indexing is being done, this is the last name seen */ ! 1043: char *lastprimary; ! 1044: int lastprimarylength; /* Length of storage allocated for lastprimary */ ! 1045: ! 1046: /* Similar, for the secondary name. */ ! 1047: char *lastsecondary; ! 1048: int lastsecondarylength; ! 1049: ! 1050: /* Zero if we are not in the middle of writing an entry. ! 1051: One if we have written the beginning of an entry but have not ! 1052: yet written any page numbers into it. ! 1053: Greater than one if we have written the beginning of an entry ! 1054: plus at least one page number. */ ! 1055: int pending; ! 1056: ! 1057: /* The initial (for sorting purposes) of the last primary entry written. ! 1058: When this changes, a \initial {c} line is written */ ! 1059: ! 1060: char * lastinitial; ! 1061: ! 1062: int lastinitiallength; ! 1063: ! 1064: /* When we need a string of length 1 for the value of lastinitial, ! 1065: store it here. */ ! 1066: ! 1067: char lastinitial1[2]; ! 1068: ! 1069: /* Initialize static storage for writing an index */ ! 1070: ! 1071: void ! 1072: init_index () ! 1073: { ! 1074: pending = 0; ! 1075: lastinitial = lastinitial1; ! 1076: lastinitial1[0] = 0; ! 1077: lastinitial1[1] = 0; ! 1078: lastinitiallength = 0; ! 1079: lastprimarylength = 100; ! 1080: lastprimary = (char *) xmalloc (lastprimarylength + 1); ! 1081: bzero (lastprimary, lastprimarylength + 1); ! 1082: lastsecondarylength = 100; ! 1083: lastsecondary = (char *) xmalloc (lastsecondarylength + 1); ! 1084: bzero (lastsecondary, lastsecondarylength + 1); ! 1085: } ! 1086: ! 1087: /* Indexify. Merge entries for the same name, ! 1088: insert headers for each initial character, etc. */ ! 1089: ! 1090: indexify (line, ostream) ! 1091: char *line; ! 1092: FILE *ostream; ! 1093: { ! 1094: char *primary, *secondary, *pagenumber; ! 1095: int primarylength, secondarylength, pagelength; ! 1096: int len = strlen (line); ! 1097: int nosecondary; ! 1098: int initiallength; ! 1099: char *initial; ! 1100: char initial1[2]; ! 1101: register char *p; ! 1102: ! 1103: /* First, analyze the parts of the entry fed to us this time */ ! 1104: ! 1105: p = find_braced_pos (line, 0, 0, 0); ! 1106: if (*p == '{') ! 1107: { ! 1108: initial = p; ! 1109: /* Get length of inner pair of braces starting at p, ! 1110: including that inner pair of braces. */ ! 1111: initiallength = find_braced_end (p + 1) + 1 - p; ! 1112: } ! 1113: else ! 1114: { ! 1115: initial = initial1; ! 1116: initial1[0] = *p; ! 1117: initial1[1] = 0; ! 1118: initiallength = 1; ! 1119: ! 1120: if (initial1[0] >= 'a' && initial1[0] <= 'z') ! 1121: initial1[0] -= 040; ! 1122: } ! 1123: ! 1124: pagenumber = find_braced_pos (line, 1, 0, 0); ! 1125: pagelength = find_braced_end (pagenumber) - pagenumber; ! 1126: if (pagelength == 0) ! 1127: abort (); ! 1128: ! 1129: primary = find_braced_pos (line, 2, 0, 0); ! 1130: primarylength = find_braced_end (primary) - primary; ! 1131: ! 1132: secondary = find_braced_pos (line, 3, 0, 0); ! 1133: nosecondary = !*secondary; ! 1134: if (!nosecondary) ! 1135: secondarylength = find_braced_end (secondary) - secondary; ! 1136: ! 1137: /* If the primary is different from before, make a new primary entry */ ! 1138: if (strncmp (primary, lastprimary, primarylength)) ! 1139: { ! 1140: /* Close off current secondary entry first, if one is open */ ! 1141: if (pending) ! 1142: { ! 1143: fputs ("}\n", ostream); ! 1144: pending = 0; ! 1145: } ! 1146: ! 1147: /* If this primary has a different initial, include an entry for the initial */ ! 1148: if (initiallength != lastinitiallength || ! 1149: strncmp (initial, lastinitial, initiallength)) ! 1150: { ! 1151: fprintf (ostream, "\\initial {"); ! 1152: fwrite (initial, 1, initiallength, ostream); ! 1153: fprintf (ostream, "}\n", initial); ! 1154: if (initial == initial1) ! 1155: { ! 1156: lastinitial = lastinitial1; ! 1157: *lastinitial1 = *initial1; ! 1158: } ! 1159: else ! 1160: { ! 1161: lastinitial = initial; ! 1162: } ! 1163: lastinitiallength = initiallength; ! 1164: } ! 1165: ! 1166: /* Make the entry for the primary. */ ! 1167: if (nosecondary) ! 1168: fputs ("\\entry {", ostream); ! 1169: else ! 1170: fputs ("\\primary {", ostream); ! 1171: fwrite (primary, primarylength, 1, ostream); ! 1172: if (nosecondary) ! 1173: { ! 1174: fputs ("}{", ostream); ! 1175: pending = 1; ! 1176: } ! 1177: else ! 1178: fputs ("}\n", ostream); ! 1179: ! 1180: /* Record name of most recent primary */ ! 1181: if (lastprimarylength < primarylength) ! 1182: { ! 1183: lastprimarylength = primarylength + 100; ! 1184: lastprimary = (char *) xrealloc (lastprimary, ! 1185: 1 + lastprimarylength); ! 1186: } ! 1187: strncpy (lastprimary, primary, primarylength); ! 1188: lastprimary[primarylength] = 0; ! 1189: ! 1190: /* There is no current secondary within this primary, now */ ! 1191: lastsecondary[0] = 0; ! 1192: } ! 1193: ! 1194: /* Should not have an entry with no subtopic following one with a subtopic */ ! 1195: ! 1196: if (nosecondary && *lastsecondary) ! 1197: error ("entry %s follows an entry with a secondary name", line); ! 1198: ! 1199: /* Start a new secondary entry if necessary */ ! 1200: if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength)) ! 1201: { ! 1202: if (pending) ! 1203: { ! 1204: fputs ("}\n", ostream); ! 1205: pending = 0; ! 1206: } ! 1207: ! 1208: /* Write the entry for the secondary. */ ! 1209: fputs ("\\secondary {", ostream); ! 1210: fwrite (secondary, secondarylength, 1, ostream); ! 1211: fputs ("}{", ostream); ! 1212: pending = 1; ! 1213: ! 1214: /* Record name of most recent secondary */ ! 1215: if (lastsecondarylength < secondarylength) ! 1216: { ! 1217: lastsecondarylength = secondarylength + 100; ! 1218: lastsecondary = (char *) xrealloc (lastsecondary, ! 1219: 1 + lastsecondarylength); ! 1220: } ! 1221: strncpy (lastsecondary, secondary, secondarylength); ! 1222: lastsecondary[secondarylength] = 0; ! 1223: } ! 1224: ! 1225: /* Here to add one more page number to the current entry */ ! 1226: if (pending++ != 1) ! 1227: fputs (", ", ostream); /* Punctuate first, if this is not the first */ ! 1228: fwrite (pagenumber, pagelength, 1, ostream); ! 1229: } ! 1230: ! 1231: /* Close out any unfinished output entry */ ! 1232: ! 1233: void ! 1234: finish_index (ostream) ! 1235: FILE *ostream; ! 1236: { ! 1237: if (pending) ! 1238: fputs ("}\n", ostream); ! 1239: free (lastprimary); ! 1240: free (lastsecondary); ! 1241: } ! 1242: ! 1243: /* Copy the lines in the sorted order. ! 1244: Each line is copied out of the input file it was found in. */ ! 1245: ! 1246: void ! 1247: writelines (linearray, nlines, ostream) ! 1248: char **linearray; ! 1249: int nlines; ! 1250: FILE *ostream; ! 1251: { ! 1252: char **stop_line = linearray + nlines; ! 1253: char **next_line; ! 1254: ! 1255: init_index (); ! 1256: ! 1257: /* Output the text of the lines, and free the buffer space */ ! 1258: ! 1259: for (next_line = linearray; next_line != stop_line; next_line++) ! 1260: { ! 1261: /* If -u was specified, output the line only if distinct from previous one. */ ! 1262: if (next_line == linearray ! 1263: /* Compare previous line with this one, using only the explicitly specd keyfields */ ! 1264: || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1)) ! 1265: { ! 1266: char *p = *next_line; ! 1267: char c; ! 1268: while ((c = *p++) && c != '\n'); ! 1269: *(p-1) = 0; ! 1270: indexify (*next_line, ostream); ! 1271: } ! 1272: } ! 1273: ! 1274: finish_index (ostream); ! 1275: } ! 1276: ! 1277: /* Assume (and optionally verify) that each input file is sorted; ! 1278: merge them and output the result. ! 1279: Returns nonzero if any input file fails to be sorted. ! 1280: ! 1281: This is the high-level interface that can handle an unlimited number of files. */ ! 1282: ! 1283: #define MAX_DIRECT_MERGE 10 ! 1284: ! 1285: int ! 1286: merge_files (infiles, nfiles, outfile) ! 1287: char **infiles; ! 1288: int nfiles; ! 1289: char *outfile; ! 1290: { ! 1291: char **tempfiles; ! 1292: int ntemps; ! 1293: int i; ! 1294: int value = 0; ! 1295: int start_tempcount = tempcount; ! 1296: ! 1297: if (nfiles <= MAX_DIRECT_MERGE) ! 1298: return merge_direct (infiles, nfiles, outfile); ! 1299: ! 1300: /* Merge groups of MAX_DIRECT_MERGE input files at a time, ! 1301: making a temporary file to hold each group's result. */ ! 1302: ! 1303: ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE; ! 1304: tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); ! 1305: for (i = 0; i < ntemps; i++) ! 1306: { ! 1307: int nf = MAX_DIRECT_MERGE; ! 1308: if (i + 1 == ntemps) ! 1309: nf = nfiles - i * MAX_DIRECT_MERGE; ! 1310: tempfiles[i] = maketempname (++tempcount); ! 1311: value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]); ! 1312: } ! 1313: ! 1314: /* All temporary files that existed before are no longer needed ! 1315: since their contents have been merged into our new tempfiles. ! 1316: So delete them. */ ! 1317: flush_tempfiles (start_tempcount); ! 1318: ! 1319: /* Now merge the temporary files we created. */ ! 1320: ! 1321: merge_files (tempfiles, ntemps, outfile); ! 1322: ! 1323: free (tempfiles); ! 1324: ! 1325: return value; ! 1326: } ! 1327: ! 1328: /* Assume (and optionally verify) that each input file is sorted; ! 1329: merge them and output the result. ! 1330: Returns nonzero if any input file fails to be sorted. ! 1331: ! 1332: This version of merging will not work if the number of ! 1333: input files gets too high. Higher level functions ! 1334: use it only with a bounded number of input files. */ ! 1335: ! 1336: int ! 1337: merge_direct (infiles, nfiles, outfile) ! 1338: char **infiles; ! 1339: int nfiles; ! 1340: char *outfile; ! 1341: { ! 1342: char **ip = infiles; ! 1343: struct linebuffer *lb1, *lb2; ! 1344: struct linebuffer **thisline, **prevline; ! 1345: FILE **streams; ! 1346: int i; ! 1347: int nleft; ! 1348: int lossage = 0; ! 1349: int *file_lossage; ! 1350: struct linebuffer *prev_out = 0; ! 1351: FILE *ostream = stdout; ! 1352: ! 1353: if (outfile) ! 1354: { ! 1355: ostream = fopen (outfile, "w"); ! 1356: } ! 1357: if (!ostream) pfatal_with_name (outfile); ! 1358: ! 1359: init_index (); ! 1360: ! 1361: if (nfiles == 0) ! 1362: { ! 1363: if (outfile) ! 1364: fclose (ostream); ! 1365: return 0; ! 1366: } ! 1367: ! 1368: /* For each file, make two line buffers. ! 1369: Also, for each file, there is an element of `thisline' ! 1370: which points at any time to one of the file's two buffers, ! 1371: and an element of `prevline' which points to the other buffer. ! 1372: `thisline' is supposed to point to the next available line from the file, ! 1373: while `prevline' holds the last file line used, ! 1374: which is remembered so that we can verify that the file is properly sorted. */ ! 1375: ! 1376: /* lb1 and lb2 contain one buffer each per file */ ! 1377: lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); ! 1378: lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); ! 1379: ! 1380: /* thisline[i] points to the linebuffer holding the next available line in file i, ! 1381: or is zero if there are no lines left in that file. */ ! 1382: thisline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *)); ! 1383: /* prevline[i] points to the linebuffer holding the last used line from file i. ! 1384: This is just for verifying that file i is properly sorted. */ ! 1385: prevline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *)); ! 1386: /* streams[i] holds the input stream for file i. */ ! 1387: streams = (FILE **) xmalloc (nfiles * sizeof (FILE *)); ! 1388: /* file_lossage[i] is nonzero if we already know file i is not properly sorted. */ ! 1389: file_lossage = (int *) xmalloc (nfiles * sizeof (int)); ! 1390: ! 1391: /* Allocate and initialize all that storage */ ! 1392: ! 1393: for (i = 0; i < nfiles; i++) ! 1394: { ! 1395: initbuffer (&lb1[i]); ! 1396: initbuffer (&lb2[i]); ! 1397: thisline[i] = &lb1[i]; ! 1398: prevline[i] = &lb2[i]; ! 1399: file_lossage[i] = 0; ! 1400: streams[i] = fopen (infiles[i], "r"); ! 1401: if (!streams[i]) ! 1402: pfatal_with_name (infiles[i]); ! 1403: ! 1404: readline (thisline[i], streams[i]); ! 1405: } ! 1406: ! 1407: /* Keep count of number of files not at eof */ ! 1408: nleft = nfiles; ! 1409: ! 1410: while (nleft) ! 1411: { ! 1412: struct linebuffer *best = 0; ! 1413: struct linebuffer *exch; ! 1414: int bestfile = -1; ! 1415: int i; ! 1416: ! 1417: /* Look at the next avail line of each file; choose the least one. */ ! 1418: ! 1419: for (i = 0; i < nfiles; i++) ! 1420: { ! 1421: if (thisline[i] && ! 1422: (!best || ! 1423: 0 < compare_general (best->buffer, thisline[i]->buffer, ! 1424: (long) bestfile, (long) i, num_keyfields))) ! 1425: { ! 1426: best = thisline[i]; ! 1427: bestfile = i; ! 1428: } ! 1429: } ! 1430: ! 1431: /* Output that line, unless it matches the previous one and we don't want duplicates */ ! 1432: ! 1433: if (!(prev_out && ! 1434: !compare_general (prev_out->buffer, best->buffer, 0L, 1L, num_keyfields - 1))) ! 1435: indexify (best->buffer, ostream); ! 1436: prev_out = best; ! 1437: ! 1438: /* Now make the line the previous of its file, and fetch a new line from that file */ ! 1439: ! 1440: exch = prevline[bestfile]; ! 1441: prevline[bestfile] = thisline[bestfile]; ! 1442: thisline[bestfile] = exch; ! 1443: ! 1444: while (1) ! 1445: { ! 1446: /* If the file has no more, mark it empty */ ! 1447: ! 1448: if (feof (streams[bestfile])) ! 1449: { ! 1450: thisline[bestfile] = 0; ! 1451: nleft--; /* Update the number of files still not empty */ ! 1452: break; ! 1453: } ! 1454: readline (thisline[bestfile], streams[bestfile]); ! 1455: if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) break; ! 1456: } ! 1457: } ! 1458: ! 1459: finish_index (ostream); ! 1460: ! 1461: /* Free all storage and close all input streams */ ! 1462: ! 1463: for (i = 0; i < nfiles; i++) ! 1464: { ! 1465: fclose (streams[i]); ! 1466: free (lb1[i].buffer); ! 1467: free (lb2[i].buffer); ! 1468: } ! 1469: free (file_lossage); ! 1470: free (lb1); ! 1471: free (lb2); ! 1472: free (thisline); ! 1473: free (prevline); ! 1474: free (streams); ! 1475: ! 1476: if (outfile) ! 1477: fclose (ostream); ! 1478: ! 1479: return lossage; ! 1480: } ! 1481: ! 1482: /* Print error message and exit. */ ! 1483: ! 1484: fatal (s1, s2) ! 1485: char *s1, *s2; ! 1486: { ! 1487: error (s1, s2); ! 1488: exit (EXIT_FATAL); ! 1489: } ! 1490: ! 1491: /* Print error message. `s1' is printf control string, `s2' is arg for it. */ ! 1492: ! 1493: error (s1, s2) ! 1494: char *s1, *s2; ! 1495: { ! 1496: printf ("texindex: "); ! 1497: printf (s1, s2); ! 1498: printf ("\n"); ! 1499: } ! 1500: ! 1501: perror_with_name (name) ! 1502: char *name; ! 1503: { ! 1504: #ifdef VMS ! 1505: #include <errno.h> ! 1506: extern noshare int sys_nerr; ! 1507: extern noshare char *sys_errlist[]; ! 1508: #else ! 1509: extern int errno, sys_nerr; ! 1510: extern char *sys_errlist[]; ! 1511: #endif ! 1512: char *s; ! 1513: ! 1514: if (errno < sys_nerr) ! 1515: s = concat ("", sys_errlist[errno], " for %s"); ! 1516: else ! 1517: s = "cannot open %s"; ! 1518: error (s, name); ! 1519: } ! 1520: ! 1521: pfatal_with_name (name) ! 1522: char *name; ! 1523: { ! 1524: extern int errno, sys_nerr; ! 1525: extern char *sys_errlist[]; ! 1526: char *s; ! 1527: ! 1528: if (errno < sys_nerr) ! 1529: s = concat ("", sys_errlist[errno], " for %s"); ! 1530: else ! 1531: s = "cannot open %s"; ! 1532: fatal (s, name); ! 1533: } ! 1534: ! 1535: /* Return a newly-allocated string whose contents concatenate those of s1, s2, s3. */ ! 1536: ! 1537: char * ! 1538: concat (s1, s2, s3) ! 1539: char *s1, *s2, *s3; ! 1540: { ! 1541: int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); ! 1542: char *result = (char *) xmalloc (len1 + len2 + len3 + 1); ! 1543: ! 1544: strcpy (result, s1); ! 1545: strcpy (result + len1, s2); ! 1546: strcpy (result + len1 + len2, s3); ! 1547: *(result + len1 + len2 + len3) = 0; ! 1548: ! 1549: return result; ! 1550: } ! 1551: ! 1552: /* Like malloc but get fatal error if memory is exhausted. */ ! 1553: ! 1554: int ! 1555: xmalloc (size) ! 1556: int size; ! 1557: { ! 1558: int result = malloc (size); ! 1559: if (!result) ! 1560: fatal ("virtual memory exhausted", 0); ! 1561: return result; ! 1562: } ! 1563: ! 1564: ! 1565: int ! 1566: xrealloc (ptr, size) ! 1567: char *ptr; ! 1568: int size; ! 1569: { ! 1570: int result = realloc (ptr, size); ! 1571: if (!result) ! 1572: fatal ("virtual memory exhausted"); ! 1573: return result; ! 1574: } ! 1575: ! 1576: bzero (b, length) ! 1577: register char *b; ! 1578: register int length; ! 1579: { ! 1580: #ifdef VMS ! 1581: short zero = 0; ! 1582: long max_str = 65535; ! 1583: ! 1584: while (length > max_str) { ! 1585: (void) LIB$MOVC5 (&zero, &zero, &zero, &max_str, b); ! 1586: length -= max_str; ! 1587: b += max_str; ! 1588: } ! 1589: (void) LIB$MOVC5 (&zero, &zero, &zero, &length, b); ! 1590: #else ! 1591: while (length-- > 0) ! 1592: *b++ = 0; ! 1593: #endif /* not VMS */ ! 1594: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.