|
|
1.1 ! root 1: /* Prepare Tex index dribble output into an actual index. ! 2: Copyright (C) 1987 Free Software Foundation, Inc. ! 3: ! 4: NO WARRANTY ! 5: ! 6: BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY ! 7: NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT ! 8: WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC, ! 9: RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS" ! 10: WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, ! 11: BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND ! 12: FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY ! 13: AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE ! 14: DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR ! 15: CORRECTION. ! 16: ! 17: IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M. ! 18: STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY ! 19: WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE ! 20: LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR ! 21: OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE ! 22: USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR ! 23: DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR ! 24: A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS ! 25: PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH ! 26: DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY. ! 27: ! 28: GENERAL PUBLIC LICENSE TO COPY ! 29: ! 30: 1. You may copy and distribute verbatim copies of this source file ! 31: as you receive it, in any medium, provided that you conspicuously ! 32: and appropriately publish on each copy a valid copyright notice ! 33: "Copyright (C) 1987 Free Software Foundation, Inc.", and include ! 34: following the copyright notice a verbatim copy of the above disclaimer ! 35: of warranty and of this License. ! 36: ! 37: 2. You may modify your copy or copies of this source file or ! 38: any portion of it, and copy and distribute such modifications under ! 39: the terms of Paragraph 1 above, provided that you also do the following: ! 40: ! 41: a) cause the modified files to carry prominent notices stating ! 42: that you changed the files and the date of any change; and ! 43: ! 44: b) cause the whole of any work that you distribute or publish, ! 45: that in whole or in part contains or is a derivative of this ! 46: program or any part thereof, to be licensed at no charge to all ! 47: third parties on terms identical to those contained in this ! 48: License Agreement (except that you may choose to grant more extensive ! 49: warranty protection to some or all third parties, at your option). ! 50: ! 51: c) You may charge a distribution fee for the physical act of ! 52: transferring a copy, and you may at your option offer warranty ! 53: protection in exchange for a fee. ! 54: ! 55: Mere aggregation of another unrelated program with this program (or its ! 56: derivative) on a volume of a storage or distribution medium does not bring ! 57: the other program under the scope of these terms. ! 58: ! 59: 3. You may copy and distribute this program (or a portion or derivative ! 60: of it, under Paragraph 2) in object code or executable form under the terms ! 61: of Paragraphs 1 and 2 above provided that you also do one of the following: ! 62: ! 63: a) accompany it with the complete corresponding machine-readable ! 64: source code, which must be distributed under the terms of ! 65: Paragraphs 1 and 2 above; or, ! 66: ! 67: b) accompany it with a written offer, valid for at least three ! 68: years, to give any third party free (except for a nominal ! 69: shipping charge) a complete machine-readable copy of the ! 70: corresponding source code, to be distributed under the terms of ! 71: Paragraphs 1 and 2 above; or, ! 72: ! 73: c) accompany it with the information you received as to where the ! 74: corresponding source code may be obtained. (This alternative is ! 75: allowed only for noncommercial distribution and only if you ! 76: received the program in object code or executable form alone.) ! 77: ! 78: For an executable file, complete source code means all the source code for ! 79: all modules it contains; but, as a special exception, it need not include ! 80: source code for modules which are standard libraries that accompany the ! 81: operating system on which the executable file runs. ! 82: ! 83: 4. You may not copy, sublicense, distribute or transfer this program ! 84: except as expressly provided under this License Agreement. Any attempt ! 85: otherwise to copy, sublicense, distribute or transfer this program is void and ! 86: your rights to use the program under this License agreement shall be ! 87: automatically terminated. However, parties who have received computer ! 88: software programs from you with this License Agreement will not have ! 89: their licenses terminated so long as such parties remain in full compliance. ! 90: ! 91: 5. If you wish to incorporate parts of this program into other free ! 92: programs whose distribution conditions are different, write to the Free ! 93: Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet ! 94: worked out a simple rule that can be stated here, but we will often permit ! 95: this. We will be guided by the two goals of preserving the free status of ! 96: all derivatives of our free software and of promoting the sharing and reuse of ! 97: software. ! 98: ! 99: In other words, you are welcome to use, share and improve this program. ! 100: You are forbidden to forbid anyone else to use, share and improve ! 101: what you give them. Help stamp out software-hoarding! */ ! 102: ! 103: ! 104: #include <stdio.h> ! 105: #include <ctype.h> ! 106: ! 107: #ifdef VMS ! 108: #include <file.h> ! 109: ! 110: #define EXIT_SUCCESS ((1 << 28) | 1) ! 111: #define EXIT_FATAL ((1 << 28) | 4) ! 112: #define unlink delete ! 113: #define tell(fd) lseek(fd, 0L, 1) ! 114: #else ! 115: #include <sys/file.h> ! 116: ! 117: #define EXIT_SUCCESS 0 ! 118: #define EXIT_FATAL 1 ! 119: #endif ! 120: ! 121: ! 122: #ifndef L_XTND ! 123: #define L_XTND 2 ! 124: #endif ! 125: ! 126: /* When sorting in core, this structure describes one line ! 127: and the position and length of its first keyfield. */ ! 128: ! 129: struct lineinfo ! 130: { ! 131: char *text; /* The actual text of the line */ ! 132: union ! 133: { /* The start of the key (for textual comparison) */ ! 134: char *text; ! 135: long number; /* or the numeric value (for numeric comparison) */ ! 136: } key; ! 137: long keylen; /* Length of key field */ ! 138: }; ! 139: ! 140: /* This structure describes a field to use as a sort key */ ! 141: ! 142: struct keyfield ! 143: { ! 144: int startwords; /* # words to skip */ ! 145: int startchars; /* and # additional chars to skip, to start of field */ ! 146: int endwords; /* similar, from beg (or end) of line, to find end of field */ ! 147: int endchars; ! 148: char ignore_blanks; /* Ignore spaces and tabs within the field */ ! 149: char fold_case; /* Convert upper case to lower before comparing */ ! 150: char reverse; /* Compare in reverse order */ ! 151: char numeric; /* Parse text as an integer and compare the integers */ ! 152: char positional; /* Sort according to position within the file */ ! 153: char braced; /* Count balanced-braced groupings as fields */ ! 154: }; ! 155: ! 156: /* Vector of keyfields to use */ ! 157: ! 158: struct keyfield keyfields[3]; ! 159: ! 160: /* Number of keyfields stored in that vector. */ ! 161: ! 162: int num_keyfields = 3; ! 163: ! 164: /* Vector of input file names, terminated with a zero (null pointer) */ ! 165: ! 166: char **infiles; ! 167: ! 168: /* Vector of corresponding output file names, or zero meaning default it */ ! 169: ! 170: char **outfiles; ! 171: ! 172: /* Length of `infiles' */ ! 173: ! 174: int num_infiles; ! 175: ! 176: /* Pointer to the array of pointers to lines being sorted */ ! 177: ! 178: char **linearray; ! 179: ! 180: /* The allocated length of `linearray'. */ ! 181: ! 182: long lines; ! 183: ! 184: /* Directory to use for temporary files. On Unix, it ends with a slash. */ ! 185: ! 186: char *tempdir; ! 187: ! 188: /* Start of filename to use for temporary files. */ ! 189: ! 190: char *tempbase; ! 191: ! 192: /* Number of last temporary file. */ ! 193: ! 194: int tempcount; ! 195: ! 196: /* Number of last temporary file already deleted. ! 197: Temporary files are deleted by `flush_tempfiles' in order of creation. */ ! 198: ! 199: int last_deleted_tempcount; ! 200: ! 201: /* During in-core sort, this points to the base of the data block ! 202: which contains all the lines of data. */ ! 203: ! 204: char *text_base; ! 205: ! 206: /* Additional command switches */ ! 207: ! 208: int keep_tempfiles; /* Nonzero means do not delete tempfiles -- for debugging */ ! 209: ! 210: /* Forward declarations of functions in this file */ ! 211: ! 212: void decode_command (); ! 213: void sort_in_core (); ! 214: void sort_offline (); ! 215: char **parsefile (); ! 216: char *find_field (); ! 217: char *find_pos (); ! 218: long find_value (); ! 219: char *find_braced_pos (); ! 220: char *find_braced_end (); ! 221: void writelines (); ! 222: int compare_full (); ! 223: long readline (); ! 224: int merge_files (); ! 225: int merge_direct (); ! 226: char *concat (); ! 227: char *maketempname (); ! 228: void flush_tempfiles (); ! 229: char *tempcopy (); ! 230: ! 231: extern char *mktemp (); ! 232: ! 233: #define MAX_IN_CORE_SORT 500000 ! 234: ! 235: int ! 236: main (argc, argv) ! 237: int argc; ! 238: char **argv; ! 239: { ! 240: int i; ! 241: ! 242: tempcount = 0; ! 243: last_deleted_tempcount = 0; ! 244: ! 245: /* Describe the kind of sorting to do. */ ! 246: /* The first keyfield uses the first braced field and folds case */ ! 247: keyfields[0].braced = 1; ! 248: keyfields[0].fold_case = 1; ! 249: keyfields[0].endwords = -1; ! 250: keyfields[0].endchars = -1; ! 251: /* The second keyfield uses the second braced field, numerically */ ! 252: keyfields[1].braced = 1; ! 253: keyfields[1].numeric = 1; ! 254: keyfields[1].startwords = 1; ! 255: keyfields[1].endwords = -1; ! 256: keyfields[1].endchars = -1; ! 257: /* The third keyfield (which is ignored while discarding duplicates) ! 258: compares the whole line */ ! 259: keyfields[2].endwords = -1; ! 260: keyfields[2].endchars = -1; ! 261: ! 262: decode_command (argc, argv); ! 263: ! 264: tempbase = mktemp (concat ("txiXXXXXX", "", "")); ! 265: ! 266: /* Process input files completely, one by one. */ ! 267: ! 268: for (i = 0; i < num_infiles; i++) ! 269: { ! 270: int desc; ! 271: long ptr; ! 272: char *outfile; ! 273: char *p; ! 274: ! 275: desc = open (infiles[i], 0, 0); ! 276: if (desc < 0) pfatal_with_name (infiles[i]); ! 277: lseek (desc, 0, L_XTND); ! 278: ptr = tell (desc); ! 279: close (desc); ! 280: ! 281: outfile = outfiles[i]; ! 282: if (!outfile) ! 283: { ! 284: outfile = concat (infiles[i], "s", ""); ! 285: } ! 286: ! 287: if (ptr < MAX_IN_CORE_SORT) ! 288: /* Sort a small amount of data */ ! 289: sort_in_core (infiles[i], ptr, outfile); ! 290: else ! 291: sort_offline (infiles[i], ptr, outfile); ! 292: } ! 293: ! 294: flush_tempfiles (tempcount); ! 295: exit (EXIT_SUCCESS); ! 296: } ! 297: ! 298: /* This page decodes the command line arguments to set the parameter variables ! 299: and set up the vector of keyfields and the vector of input files */ ! 300: ! 301: void ! 302: decode_command (argc, argv) ! 303: int argc; ! 304: char **argv; ! 305: { ! 306: int i; ! 307: char **ip; ! 308: char **op; ! 309: ! 310: /* Store default values into parameter variables */ ! 311: ! 312: #ifdef VMS ! 313: tempdir = "sys$scratch:"; ! 314: #else ! 315: tempdir = "/tmp/"; ! 316: #endif ! 317: ! 318: keep_tempfiles = 0; ! 319: ! 320: /* Allocate argc input files, which must be enough. */ ! 321: ! 322: infiles = (char **) xmalloc (argc * sizeof (char *)); ! 323: outfiles = (char **) xmalloc (argc * sizeof (char *)); ! 324: ip = infiles; ! 325: op = outfiles; ! 326: ! 327: /* First find all switches that control the default kind-of-sort */ ! 328: ! 329: for (i = 1; i < argc; i++) ! 330: { ! 331: int tem = classify_arg (argv[i]); ! 332: char c; ! 333: char *p; ! 334: ! 335: if (tem <= 0) ! 336: { ! 337: *ip++ = argv[i]; ! 338: *op++ = 0; ! 339: continue; ! 340: } ! 341: if (tem > 1) ! 342: { ! 343: if (i + 1 == argc) ! 344: fatal ("switch %s given with no argument following it", argv[i]); ! 345: else if (!strcmp (argv[i], "-T")) ! 346: tempdir = argv[i + 1]; ! 347: else if (!strcmp (argv[i], "-o")) ! 348: *(op - 1) = argv[i + 1]; ! 349: i += tem - 1; ! 350: continue; ! 351: } ! 352: ! 353: p = &argv[i][1]; ! 354: while (c = *p++) ! 355: switch (c) ! 356: { ! 357: case 'k': ! 358: keep_tempfiles = 1; ! 359: break; ! 360: ! 361: default: ! 362: fatal ("invalid command switch %c", c); ! 363: } ! 364: switchdone: ; ! 365: } ! 366: ! 367: /* Record number of keyfields, terminate list of filenames */ ! 368: ! 369: num_infiles = ip - infiles; ! 370: *ip = 0; ! 371: } ! 372: ! 373: /* Return 0 for an argument that is not a switch; ! 374: for a switch, return 1 plus the number of following arguments that the switch swallows. ! 375: */ ! 376: ! 377: int ! 378: classify_arg (arg) ! 379: char *arg; ! 380: { ! 381: if (!strcmp (arg, "-T") || !strcmp (arg, "-o")) ! 382: return 2; ! 383: if (arg[0] == '-') ! 384: return 1; ! 385: return 0; ! 386: } ! 387: ! 388: /* Create a name for a temporary file */ ! 389: ! 390: char * ! 391: maketempname (count) ! 392: int count; ! 393: { ! 394: char tempsuffix[10]; ! 395: sprintf (tempsuffix, "%d", count); ! 396: return concat (tempdir, tempbase, tempsuffix); ! 397: } ! 398: ! 399: /* Delete all temporary files up to the specified count */ ! 400: ! 401: void ! 402: flush_tempfiles (to_count) ! 403: int to_count; ! 404: { ! 405: if (keep_tempfiles) return; ! 406: while (last_deleted_tempcount < to_count) ! 407: unlink (maketempname (++last_deleted_tempcount)); ! 408: } ! 409: ! 410: /* Copy an input file into a temporary file, and return the temporary file name */ ! 411: ! 412: #define BUFSIZE 1024 ! 413: ! 414: char * ! 415: tempcopy (idesc) ! 416: int idesc; ! 417: { ! 418: char *outfile = maketempname (++tempcount); ! 419: int odesc; ! 420: char buffer[BUFSIZE]; ! 421: ! 422: odesc = open (outfile, O_WRONLY | O_CREAT, 0666); ! 423: ! 424: if (odesc < 0) pfatal_with_name (outfile); ! 425: ! 426: while (1) ! 427: { ! 428: int nread = read (idesc, buffer, BUFSIZE); ! 429: write (odesc, buffer, nread); ! 430: if (!nread) break; ! 431: } ! 432: ! 433: close (odesc); ! 434: ! 435: return outfile; ! 436: } ! 437: ! 438: /* Compare two lines, provided as pointers to pointers to text, ! 439: according to the specified set of keyfields */ ! 440: ! 441: int ! 442: compare_full (line1, line2) ! 443: char **line1, **line2; ! 444: { ! 445: int i; ! 446: ! 447: /* Compare using the first keyfield; ! 448: if that does not distinguish the lines, try the second keyfield; and so on. */ ! 449: ! 450: for (i = 0; i < num_keyfields; i++) ! 451: { ! 452: long length1, length2; ! 453: char *start1 = find_field (&keyfields[i], *line1, &length1); ! 454: char *start2 = find_field (&keyfields[i], *line2, &length2); ! 455: int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base, ! 456: start2, length2, *line2 - text_base); ! 457: if (tem) ! 458: { ! 459: if (keyfields[i].reverse) ! 460: return - tem; ! 461: return tem; ! 462: } ! 463: } ! 464: ! 465: return 0; /* Lines match exactly */ ! 466: } ! 467: ! 468: /* Compare two lines described by structures ! 469: in which the first keyfield is identified in advance. ! 470: For positional sorting, assumes that the order of the lines in core ! 471: reflects their nominal order. */ ! 472: ! 473: int ! 474: compare_prepared (line1, line2) ! 475: struct lineinfo *line1, *line2; ! 476: { ! 477: int i; ! 478: int tem; ! 479: char *text1, *text2; ! 480: ! 481: /* Compare using the first keyfield, which has been found for us already */ ! 482: if (keyfields->positional) ! 483: { ! 484: if (line1->text - text_base > line2->text - text_base) ! 485: tem = 1; ! 486: else ! 487: tem = -1; ! 488: } ! 489: else if (keyfields->numeric) ! 490: tem = line1->key.number - line2->key.number; ! 491: else ! 492: tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, line2->key.text, line2->keylen, 0); ! 493: if (tem) ! 494: { ! 495: if (keyfields->reverse) ! 496: return - tem; ! 497: return tem; ! 498: } ! 499: ! 500: text1 = line1->text; ! 501: text2 = line2->text; ! 502: ! 503: /* Compare using the second keyfield; ! 504: if that does not distinguish the lines, try the third keyfield; and so on. */ ! 505: ! 506: for (i = 1; i < num_keyfields; i++) ! 507: { ! 508: long length1, length2; ! 509: char *start1 = find_field (&keyfields[i], text1, &length1); ! 510: char *start2 = find_field (&keyfields[i], text2, &length2); ! 511: int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base, ! 512: start2, length2, text2 - text_base); ! 513: if (tem) ! 514: { ! 515: if (keyfields[i].reverse) ! 516: return - tem; ! 517: return tem; ! 518: } ! 519: } ! 520: ! 521: return 0; /* Lines match exactly */ ! 522: } ! 523: ! 524: /* Like compare_full but more general. ! 525: You can pass any strings, and you can say how many keyfields to use. ! 526: `pos1' and `pos2' should indicate the nominal positional ordering of ! 527: the two lines in the input. */ ! 528: ! 529: int ! 530: compare_general (str1, str2, pos1, pos2, use_keyfields) ! 531: char *str1, *str2; ! 532: long pos1, pos2; ! 533: int use_keyfields; ! 534: { ! 535: int i; ! 536: ! 537: /* Compare using the first keyfield; ! 538: if that does not distinguish the lines, try the second keyfield; and so on. */ ! 539: ! 540: for (i = 0; i < use_keyfields; i++) ! 541: { ! 542: long length1, length2; ! 543: char *start1 = find_field (&keyfields[i], str1, &length1); ! 544: char *start2 = find_field (&keyfields[i], str2, &length2); ! 545: int tem = compare_field (&keyfields[i], start1, length1, pos1, start2, length2, pos2); ! 546: if (tem) ! 547: { ! 548: if (keyfields[i].reverse) ! 549: return - tem; ! 550: return tem; ! 551: } ! 552: } ! 553: ! 554: return 0; /* Lines match exactly */ ! 555: } ! 556: ! 557: /* Find the start and length of a field in `str' according to `keyfield'. ! 558: A pointer to the starting character is returned, and the length ! 559: is stored into the int that `lengthptr' points to. */ ! 560: ! 561: char * ! 562: find_field (keyfield, str, lengthptr) ! 563: struct keyfield *keyfield; ! 564: char *str; ! 565: long *lengthptr; ! 566: { ! 567: char *start; ! 568: char *end; ! 569: char *(*fun) (); ! 570: ! 571: if (keyfield->braced) fun = find_braced_pos; ! 572: else fun = find_pos; ! 573: ! 574: start = ( *fun )(str, keyfield->startwords, keyfield->startchars, ! 575: keyfield->ignore_blanks); ! 576: if (keyfield->endwords < 0) ! 577: { ! 578: if (keyfield->braced) ! 579: end = find_braced_end (start); ! 580: else ! 581: { ! 582: end = start; ! 583: while (*end && *end != '\n') end++; ! 584: } ! 585: } ! 586: else ! 587: { ! 588: end = ( *fun )(str, keyfield->endwords, keyfield->endchars, 0); ! 589: if (end - str < start - str) end = start; ! 590: } ! 591: *lengthptr = end - start; ! 592: return start; ! 593: } ! 594: ! 595: /* Find a pointer to a specified place within `str', ! 596: skipping (from the beginning) `words' words and then `chars' chars. ! 597: If `ignore_blanks' is nonzero, we skip all blanks ! 598: after finding the specified word. */ ! 599: ! 600: char * ! 601: find_pos (str, words, chars, ignore_blanks) ! 602: char *str; ! 603: int words, chars; ! 604: int ignore_blanks; ! 605: { ! 606: int i; ! 607: char *p = str; ! 608: ! 609: for (i = 0; i < words; i++) ! 610: { ! 611: char c; ! 612: /* Find next bunch of nonblanks and skip them. */ ! 613: while ((c = *p) == ' ' || c == '\t') p++; ! 614: while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) p++; ! 615: if (!*p || *p == '\n') return p; ! 616: } ! 617: ! 618: while (*p == ' ' || *p == '\t') p++; ! 619: ! 620: for (i = 0; i < chars; i++) ! 621: { ! 622: if (!*p || *p == '\n') break; ! 623: p++; ! 624: } ! 625: return p; ! 626: } ! 627: ! 628: /* Like find_pos but assumes that each field is surrounded by braces ! 629: and that braces within fields are balanced. */ ! 630: ! 631: char * ! 632: find_braced_pos (str, words, chars, ignore_blanks) ! 633: char *str; ! 634: int words, chars; ! 635: int ignore_blanks; ! 636: { ! 637: int i; ! 638: int bracelevel; ! 639: char *p = str; ! 640: char c; ! 641: ! 642: for (i = 0; i < words; i++) ! 643: { ! 644: bracelevel = 1; ! 645: while ((c = *p++) != '{' && c != '\n' && c); ! 646: if (c != '{') ! 647: return p - 1; ! 648: while (bracelevel) ! 649: { ! 650: c = *p++; ! 651: if (c == '{') bracelevel++; ! 652: if (c == '}') bracelevel--; ! 653: if (c == '\\') c = *p++; /* \ quotes braces and \ */ ! 654: if (c == 0 || c == '\n') return p-1; ! 655: } ! 656: } ! 657: ! 658: while ((c = *p++) != '{' && c != '\n' && c); ! 659: ! 660: if (c != '{') ! 661: return p-1; ! 662: ! 663: if (ignore_blanks) ! 664: while ((c = *p) == ' ' || c == '\t') p++; ! 665: ! 666: for (i = 0; i < chars; i++) ! 667: { ! 668: if (!*p || *p == '\n') break; ! 669: p++; ! 670: } ! 671: return p; ! 672: } ! 673: ! 674: /* Find the end of the balanced-brace field which starts at `str'. ! 675: The position returned is just before the closing brace. */ ! 676: ! 677: char * ! 678: find_braced_end (str) ! 679: char *str; ! 680: { ! 681: int bracelevel; ! 682: char *p = str; ! 683: char c; ! 684: ! 685: bracelevel = 1; ! 686: while (bracelevel) ! 687: { ! 688: c = *p++; ! 689: if (c == '{') bracelevel++; ! 690: if (c == '}') bracelevel--; ! 691: if (c == '\\') c = *p++; ! 692: if (c == 0 || c == '\n') return p-1; ! 693: } ! 694: return p - 1; ! 695: } ! 696: ! 697: long ! 698: find_value (start, length) ! 699: char *start; ! 700: long length; ! 701: { ! 702: while (length != 0L) { ! 703: if (isdigit(*start)) ! 704: return atol(start); ! 705: length--; ! 706: start++; ! 707: } ! 708: return 0l; ! 709: } ! 710: ! 711: /* Vector used to translate characters for comparison. ! 712: This is how we make all alphanumerics follow all else, ! 713: and ignore case in the first sorting. */ ! 714: int char_order[256]; ! 715: ! 716: init_char_order () ! 717: { ! 718: int i; ! 719: for (i = 1; i < 256; i++) ! 720: char_order[i] = i; ! 721: ! 722: for (i = '0'; i <= '9'; i++) ! 723: char_order[i] += 512; ! 724: ! 725: for (i = 'a'; i <= 'z'; i++) { ! 726: char_order[i] = 512 + i; ! 727: char_order[i + 'A' - 'a'] = 512 + i; ! 728: } ! 729: } ! 730: ! 731: /* Compare two fields (each specified as a start pointer and a character count) ! 732: according to `keyfield'. The sign of the value reports the relation between the fields */ ! 733: ! 734: int ! 735: compare_field (keyfield, start1, length1, pos1, start2, length2, pos2) ! 736: struct keyfield *keyfield; ! 737: char *start1; ! 738: long length1; ! 739: long pos1; ! 740: char *start2; ! 741: long length2; ! 742: long pos2; ! 743: { ! 744: if (keyfields->positional) ! 745: { ! 746: if (pos1 > pos2) ! 747: return 1; ! 748: else ! 749: return -1; ! 750: } ! 751: if (keyfield->numeric) ! 752: { ! 753: long value = find_value (start1, length1) - find_value (start2, length2); ! 754: if (value > 0) return 1; ! 755: if (value < 0) return -1; ! 756: return 0; ! 757: } ! 758: else ! 759: { ! 760: char *p1 = start1; ! 761: char *p2 = start2; ! 762: char *e1 = start1 + length1; ! 763: char *e2 = start2 + length2; ! 764: ! 765: int fold_case = keyfield->fold_case; ! 766: ! 767: while (1) ! 768: { ! 769: int c1, c2; ! 770: ! 771: if (p1 == e1) c1 = 0; ! 772: else c1 = *p1++; ! 773: if (p2 == e2) c2 = 0; ! 774: else c2 = *p2++; ! 775: ! 776: if (char_order[c1] != char_order[c2]) ! 777: return char_order[c1] - char_order[c2]; ! 778: if (!c1) break; ! 779: } ! 780: ! 781: /* Strings are equal except possibly for case. */ ! 782: p1 = start1; ! 783: p2 = start2; ! 784: while (1) ! 785: { ! 786: int c1, c2; ! 787: ! 788: if (p1 == e1) c1 = 0; ! 789: else c1 = *p1++; ! 790: if (p2 == e2) c2 = 0; ! 791: else c2 = *p2++; ! 792: ! 793: if (c1 != c2) ! 794: /* Reverse sign here so upper case comes out last. */ ! 795: return c2 - c1; ! 796: if (!c1) break; ! 797: } ! 798: ! 799: return 0; ! 800: } ! 801: } ! 802: ! 803: /* A `struct linebuffer' is a structure which holds a line of text. ! 804: `readline' reads a line from a stream into a linebuffer ! 805: and works regardless of the length of the line. */ ! 806: ! 807: struct linebuffer ! 808: { ! 809: long size; ! 810: char *buffer; ! 811: }; ! 812: ! 813: /* Initialize a linebuffer for use */ ! 814: ! 815: void ! 816: initbuffer (linebuffer) ! 817: struct linebuffer *linebuffer; ! 818: { ! 819: linebuffer->size = 200; ! 820: linebuffer->buffer = (char *) xmalloc (200); ! 821: } ! 822: ! 823: /* Read a line of text from `stream' into `linebuffer'. ! 824: Return the length of the line. */ ! 825: ! 826: long ! 827: readline (linebuffer, stream) ! 828: struct linebuffer *linebuffer; ! 829: FILE *stream; ! 830: { ! 831: char *buffer = linebuffer->buffer; ! 832: char *p = linebuffer->buffer; ! 833: char *end = p + linebuffer->size; ! 834: ! 835: while (1) ! 836: { ! 837: int c = getc (stream); ! 838: if (p == end) ! 839: { ! 840: buffer = (char *) xrealloc (buffer, linebuffer->size *= 2); ! 841: p += buffer - linebuffer->buffer; ! 842: end += buffer - linebuffer->buffer; ! 843: linebuffer->buffer = buffer; ! 844: } ! 845: if (c < 0 || c == '\n') ! 846: { ! 847: *p = 0; ! 848: break; ! 849: } ! 850: *p++ = c; ! 851: } ! 852: ! 853: return p - buffer; ! 854: } ! 855: ! 856: /* Sort an input file too big to sort in core. */ ! 857: ! 858: void ! 859: sort_offline (infile, nfiles, total, outfile) ! 860: char *infile; ! 861: long total; ! 862: char *outfile; ! 863: { ! 864: int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; /* More than enough */ ! 865: char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); ! 866: FILE *istream = fopen (infile, "r"); ! 867: int i; ! 868: struct linebuffer lb; ! 869: long linelength; ! 870: int failure = 0; ! 871: ! 872: initbuffer (&lb); ! 873: ! 874: /* Read in one line of input data. */ ! 875: ! 876: linelength = readline (&lb, istream); ! 877: ! 878: if (lb.buffer[0] != '\\') ! 879: { ! 880: error ("%s: not a texinfo index file", infile); ! 881: return; ! 882: } ! 883: ! 884: /* Split up the input into `ntemps' temporary files, or maybe fewer, ! 885: and put the new files' names into `tempfiles' */ ! 886: ! 887: for (i = 0; i < ntemps; i++) ! 888: { ! 889: char *outname = maketempname (++tempcount); ! 890: FILE *ostream = fopen (outname, "w"); ! 891: long tempsize = 0; ! 892: ! 893: if (!ostream) pfatal_with_name (outname); ! 894: tempfiles[i] = outname; ! 895: ! 896: /* Copy lines into this temp file as long as it does not make file "too big" ! 897: or until there are no more lines. */ ! 898: ! 899: while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT) ! 900: { ! 901: tempsize += linelength + 1; ! 902: fputs (lb.buffer, ostream); ! 903: putc ('\n', ostream); ! 904: ! 905: /* Read another line of input data. */ ! 906: ! 907: linelength = readline (&lb, istream); ! 908: if (!linelength && feof (istream)) break; ! 909: ! 910: if (lb.buffer[0] != '\\') ! 911: { ! 912: error ("%s: not a texinfo index file", infile); ! 913: failure = 1; ! 914: goto fail; ! 915: } ! 916: } ! 917: fclose (ostream); ! 918: if (feof (istream)) break; ! 919: } ! 920: ! 921: free (lb.buffer); ! 922: ! 923: fail: ! 924: /* Record number of temp files we actually needed. */ ! 925: ! 926: ntemps = i; ! 927: ! 928: /* Sort each tempfile into another tempfile. ! 929: Delete the first set of tempfiles and put the names of the second into `tempfiles' */ ! 930: ! 931: for (i = 0; i < ntemps; i++) ! 932: { ! 933: char *newtemp = maketempname (++tempcount); ! 934: sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp); ! 935: if (!keep_tempfiles) ! 936: unlink (tempfiles[i]); ! 937: tempfiles[i] = newtemp; ! 938: } ! 939: ! 940: if (failure) ! 941: return; ! 942: ! 943: /* Merge the tempfiles together and indexify */ ! 944: ! 945: merge_files (tempfiles, ntemps, outfile); ! 946: } ! 947: ! 948: /* Sort `infile', whose size is `total', ! 949: assuming that is small enough to be done in-core, ! 950: then indexify it and send the output to `outfile' (or to stdout). */ ! 951: ! 952: void ! 953: sort_in_core (infile, total, outfile) ! 954: char *infile; ! 955: long total; ! 956: char *outfile; ! 957: { ! 958: char **nextline; ! 959: char *data = (char *) xmalloc (total + 1); ! 960: char *file_data; ! 961: long file_size; ! 962: int i; ! 963: FILE *ostream = stdout; ! 964: struct lineinfo *lineinfo; ! 965: ! 966: /* Read the contents of the file into the moby array `data' */ ! 967: ! 968: int desc = open (infile, 0, 0); ! 969: ! 970: if (desc < 0) ! 971: fatal ("failure reopening %s", infile); ! 972: for (file_size = 0; ; ) ! 973: { ! 974: if ((i = read (desc, data + file_size, total - file_size)) <= 0) ! 975: break; ! 976: file_size += i; ! 977: } ! 978: file_data = data; ! 979: data[file_size] = 0; ! 980: ! 981: close (desc); ! 982: ! 983: if (file_size > 0 && data[0] != '\\') ! 984: { ! 985: error ("%s: not a texinfo index file", infile); ! 986: return; ! 987: } ! 988: ! 989: init_char_order (); ! 990: ! 991: /* Sort routines want to know this address */ ! 992: ! 993: text_base = data; ! 994: ! 995: /* Create the array of pointers to lines, with a default size frequently enough. */ ! 996: ! 997: lines = total / 50; ! 998: if (!lines) lines = 2; ! 999: linearray = (char **) xmalloc (lines * sizeof (char *)); ! 1000: ! 1001: /* `nextline' points to the next free slot in this array. ! 1002: `lines' is the allocated size. */ ! 1003: ! 1004: nextline = linearray; ! 1005: ! 1006: /* Parse the input file's data, and make entries for the lines. */ ! 1007: ! 1008: nextline = parsefile (infile, nextline, file_data, file_size); ! 1009: if (nextline == 0) ! 1010: { ! 1011: error ("%s: not a texinfo index file", infile); ! 1012: return; ! 1013: } ! 1014: ! 1015: /* Sort the lines */ ! 1016: ! 1017: /* If we have enough space, find the first keyfield of each line in advance. ! 1018: Make a `struct lineinfo' for each line, which records the keyfield ! 1019: as well as the line, and sort them. */ ! 1020: ! 1021: lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo)); ! 1022: ! 1023: if (lineinfo) ! 1024: { ! 1025: struct lineinfo *lp; ! 1026: char **p; ! 1027: ! 1028: for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) ! 1029: { ! 1030: lp->text = *p; ! 1031: lp->key.text = find_field (keyfields, *p, &lp->keylen); ! 1032: if (keyfields->numeric) ! 1033: lp->key.number = find_value (lp->key.text, lp->keylen); ! 1034: } ! 1035: ! 1036: qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared); ! 1037: ! 1038: for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) ! 1039: *p = lp->text; ! 1040: ! 1041: free (lineinfo); ! 1042: } ! 1043: else ! 1044: qsort (linearray, nextline - linearray, sizeof (char *), compare_full); ! 1045: ! 1046: /* Open the output file */ ! 1047: ! 1048: if (outfile) ! 1049: { ! 1050: ostream = fopen (outfile, "w"); ! 1051: if (!ostream) ! 1052: pfatal_with_name (outfile); ! 1053: } ! 1054: ! 1055: writelines (linearray, nextline - linearray, ostream); ! 1056: if (outfile) fclose (ostream); ! 1057: ! 1058: free (linearray); ! 1059: free (data); ! 1060: } ! 1061: ! 1062: /* Parse an input string in core into lines. ! 1063: DATA is the input string, and SIZE is its length. ! 1064: Data goes in LINEARRAY starting at NEXTLINE. ! 1065: The value returned is the first entry in LINEARRAY still unused. ! 1066: Value 0 means input file contents are invalid. */ ! 1067: ! 1068: char ** ! 1069: parsefile (filename, nextline, data, size) ! 1070: char *filename; ! 1071: char **nextline; ! 1072: char *data; ! 1073: long size; ! 1074: { ! 1075: char *p, *end; ! 1076: char **line = nextline; ! 1077: ! 1078: p = data; ! 1079: end = p + size; ! 1080: *end = 0; ! 1081: ! 1082: while (p != end) ! 1083: { ! 1084: if (p[0] != '\\') ! 1085: return 0; ! 1086: ! 1087: *line = p; ! 1088: while (*p && *p != '\n') p++; ! 1089: if (p != end) p++; ! 1090: ! 1091: line++; ! 1092: if (line == linearray + lines) ! 1093: { ! 1094: char **old = linearray; ! 1095: linearray = (char **) xrealloc (linearray, sizeof (char *) * (lines *= 4)); ! 1096: line += linearray - old; ! 1097: } ! 1098: } ! 1099: ! 1100: return line; ! 1101: } ! 1102: ! 1103: /* Indexification is a filter applied to the sorted lines ! 1104: as they are being written to the output file. ! 1105: Multiple entries for the same name, with different page numbers, ! 1106: get combined into a single entry with multiple page numbers. ! 1107: The first braced field, which is used for sorting, is discarded. ! 1108: However, its first character is examined, folded to lower case, ! 1109: and if it is different from that in the previous line fed to us ! 1110: a \initial line is written with one argument, the new initial. ! 1111: ! 1112: If an entry has four braced fields, then the second and third ! 1113: constitute primary and secondary names. ! 1114: In this case, each change of primary name ! 1115: generates a \primary line which contains only the primary name, ! 1116: and in between these are \secondary lines which contain ! 1117: just a secondary name and page numbers. ! 1118: */ ! 1119: ! 1120: /* The last primary name we wrote a \primary entry for. ! 1121: If only one level of indexing is being done, this is the last name seen */ ! 1122: char *lastprimary; ! 1123: int lastprimarylength; /* Length of storage allocated for lastprimary */ ! 1124: ! 1125: /* Similar, for the secondary name. */ ! 1126: char *lastsecondary; ! 1127: int lastsecondarylength; ! 1128: ! 1129: /* Zero if we are not in the middle of writing an entry. ! 1130: One if we have written the beginning of an entry but have not ! 1131: yet written any page numbers into it. ! 1132: Greater than one if we have written the beginning of an entry ! 1133: plus at least one page number. */ ! 1134: int pending; ! 1135: ! 1136: /* The initial (for sorting purposes) of the last primary entry written. ! 1137: When this changes, a \initial {c} line is written */ ! 1138: ! 1139: char * lastinitial; ! 1140: ! 1141: int lastinitiallength; ! 1142: ! 1143: /* When we need a string of length 1 for the value of lastinitial, ! 1144: store it here. */ ! 1145: ! 1146: char lastinitial1[2]; ! 1147: ! 1148: /* Initialize static storage for writing an index */ ! 1149: ! 1150: void ! 1151: init_index () ! 1152: { ! 1153: pending = 0; ! 1154: lastinitial = lastinitial1; ! 1155: lastinitial1[0] = 0; ! 1156: lastinitial1[1] = 0; ! 1157: lastinitiallength = 0; ! 1158: lastprimarylength = 100; ! 1159: lastprimary = (char *) xmalloc (lastprimarylength + 1); ! 1160: bzero (lastprimary, lastprimarylength + 1); ! 1161: lastsecondarylength = 100; ! 1162: lastsecondary = (char *) xmalloc (lastsecondarylength + 1); ! 1163: bzero (lastsecondary, lastsecondarylength + 1); ! 1164: } ! 1165: ! 1166: /* Indexify. Merge entries for the same name, ! 1167: insert headers for each initial character, etc. */ ! 1168: ! 1169: indexify (line, ostream) ! 1170: char *line; ! 1171: FILE *ostream; ! 1172: { ! 1173: char *primary, *secondary, *pagenumber; ! 1174: int primarylength, secondarylength, pagelength; ! 1175: int len = strlen (line); ! 1176: int nosecondary; ! 1177: int initiallength; ! 1178: char *initial; ! 1179: char initial1[2]; ! 1180: register char *p; ! 1181: ! 1182: /* First, analyze the parts of the entry fed to us this time */ ! 1183: ! 1184: p = find_braced_pos (line, 0, 0, 0); ! 1185: if (*p == '{') ! 1186: { ! 1187: initial = p; ! 1188: /* Get length of inner pair of braces starting at p, ! 1189: including that inner pair of braces. */ ! 1190: initiallength = find_braced_end (p + 1) + 1 - p; ! 1191: } ! 1192: else ! 1193: { ! 1194: initial = initial1; ! 1195: initial1[0] = *p; ! 1196: initial1[1] = 0; ! 1197: initiallength = 1; ! 1198: ! 1199: if (initial1[0] >= 'a' && initial1[0] <= 'z') ! 1200: initial1[0] -= 040; ! 1201: } ! 1202: ! 1203: pagenumber = find_braced_pos (line, 1, 0, 0); ! 1204: pagelength = find_braced_end (pagenumber) - pagenumber; ! 1205: if (pagelength == 0) ! 1206: abort (); ! 1207: ! 1208: primary = find_braced_pos (line, 2, 0, 0); ! 1209: primarylength = find_braced_end (primary) - primary; ! 1210: ! 1211: secondary = find_braced_pos (line, 3, 0, 0); ! 1212: nosecondary = !*secondary; ! 1213: if (!nosecondary) ! 1214: secondarylength = find_braced_end (secondary) - secondary; ! 1215: ! 1216: /* If the primary is different from before, make a new primary entry */ ! 1217: if (strncmp (primary, lastprimary, primarylength)) ! 1218: { ! 1219: /* Close off current secondary entry first, if one is open */ ! 1220: if (pending) ! 1221: { ! 1222: fputs ("}\n", ostream); ! 1223: pending = 0; ! 1224: } ! 1225: ! 1226: /* If this primary has a different initial, include an entry for the initial */ ! 1227: if (initiallength != lastinitiallength || ! 1228: strncmp (initial, lastinitial, initiallength)) ! 1229: { ! 1230: fprintf (ostream, "\\initial {"); ! 1231: fwrite (initial, 1, initiallength, ostream); ! 1232: fprintf (ostream, "}\n", initial); ! 1233: if (initial == initial1) ! 1234: { ! 1235: lastinitial = lastinitial1; ! 1236: *lastinitial1 = *initial1; ! 1237: } ! 1238: else ! 1239: { ! 1240: lastinitial = initial; ! 1241: } ! 1242: lastinitiallength = initiallength; ! 1243: } ! 1244: ! 1245: /* Make the entry for the primary. */ ! 1246: if (nosecondary) ! 1247: fputs ("\\entry {", ostream); ! 1248: else ! 1249: fputs ("\\primary {", ostream); ! 1250: fwrite (primary, primarylength, 1, ostream); ! 1251: if (nosecondary) ! 1252: { ! 1253: fputs ("}{", ostream); ! 1254: pending = 1; ! 1255: } ! 1256: else ! 1257: fputs ("}\n", ostream); ! 1258: ! 1259: /* Record name of most recent primary */ ! 1260: if (lastprimarylength < primarylength) ! 1261: { ! 1262: lastprimarylength = primarylength + 100; ! 1263: lastprimary = (char *) xrealloc (lastprimary, ! 1264: 1 + lastprimarylength); ! 1265: } ! 1266: strncpy (lastprimary, primary, primarylength); ! 1267: lastprimary[primarylength] = 0; ! 1268: ! 1269: /* There is no current secondary within this primary, now */ ! 1270: lastsecondary[0] = 0; ! 1271: } ! 1272: ! 1273: /* Should not have an entry with no subtopic following one with a subtopic */ ! 1274: ! 1275: if (nosecondary && *lastsecondary) ! 1276: error ("entry %s follows an entry with a secondary name", line); ! 1277: ! 1278: /* Start a new secondary entry if necessary */ ! 1279: if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength)) ! 1280: { ! 1281: if (pending) ! 1282: { ! 1283: fputs ("}\n", ostream); ! 1284: pending = 0; ! 1285: } ! 1286: ! 1287: /* Write the entry for the secondary. */ ! 1288: fputs ("\\secondary {", ostream); ! 1289: fwrite (secondary, secondarylength, 1, ostream); ! 1290: fputs ("}{", ostream); ! 1291: pending = 1; ! 1292: ! 1293: /* Record name of most recent secondary */ ! 1294: if (lastsecondarylength < secondarylength) ! 1295: { ! 1296: lastsecondarylength = secondarylength + 100; ! 1297: lastsecondary = (char *) xrealloc (lastsecondary, ! 1298: 1 + lastsecondarylength); ! 1299: } ! 1300: strncpy (lastsecondary, secondary, secondarylength); ! 1301: lastsecondary[secondarylength] = 0; ! 1302: } ! 1303: ! 1304: /* Here to add one more page number to the current entry */ ! 1305: if (pending++ != 1) ! 1306: fputs (", ", ostream); /* Punctuate first, if this is not the first */ ! 1307: fwrite (pagenumber, pagelength, 1, ostream); ! 1308: } ! 1309: ! 1310: /* Close out any unfinished output entry */ ! 1311: ! 1312: void ! 1313: finish_index (ostream) ! 1314: FILE *ostream; ! 1315: { ! 1316: if (pending) ! 1317: fputs ("}\n", ostream); ! 1318: free (lastprimary); ! 1319: free (lastsecondary); ! 1320: } ! 1321: ! 1322: /* Copy the lines in the sorted order. ! 1323: Each line is copied out of the input file it was found in. */ ! 1324: ! 1325: void ! 1326: writelines (linearray, nlines, ostream) ! 1327: char **linearray; ! 1328: int nlines; ! 1329: FILE *ostream; ! 1330: { ! 1331: char **stop_line = linearray + nlines; ! 1332: char **next_line; ! 1333: ! 1334: init_index (); ! 1335: ! 1336: /* Output the text of the lines, and free the buffer space */ ! 1337: ! 1338: for (next_line = linearray; next_line != stop_line; next_line++) ! 1339: { ! 1340: /* If -u was specified, output the line only if distinct from previous one. */ ! 1341: if (next_line == linearray ! 1342: /* Compare previous line with this one, using only the explicitly specd keyfields */ ! 1343: || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1)) ! 1344: { ! 1345: char *p = *next_line; ! 1346: char c; ! 1347: while ((c = *p++) && c != '\n'); ! 1348: *(p-1) = 0; ! 1349: indexify (*next_line, ostream); ! 1350: } ! 1351: } ! 1352: ! 1353: finish_index (ostream); ! 1354: } ! 1355: ! 1356: /* Assume (and optionally verify) that each input file is sorted; ! 1357: merge them and output the result. ! 1358: Returns nonzero if any input file fails to be sorted. ! 1359: ! 1360: This is the high-level interface that can handle an unlimited number of files. */ ! 1361: ! 1362: #define MAX_DIRECT_MERGE 10 ! 1363: ! 1364: int ! 1365: merge_files (infiles, nfiles, outfile) ! 1366: char **infiles; ! 1367: int nfiles; ! 1368: char *outfile; ! 1369: { ! 1370: char **tempfiles; ! 1371: int ntemps; ! 1372: int i; ! 1373: int value = 0; ! 1374: int start_tempcount = tempcount; ! 1375: ! 1376: if (nfiles <= MAX_DIRECT_MERGE) ! 1377: return merge_direct (infiles, nfiles, outfile); ! 1378: ! 1379: /* Merge groups of MAX_DIRECT_MERGE input files at a time, ! 1380: making a temporary file to hold each group's result. */ ! 1381: ! 1382: ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE; ! 1383: tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); ! 1384: for (i = 0; i < ntemps; i++) ! 1385: { ! 1386: int nf = MAX_DIRECT_MERGE; ! 1387: if (i + 1 == ntemps) ! 1388: nf = nfiles - i * MAX_DIRECT_MERGE; ! 1389: tempfiles[i] = maketempname (++tempcount); ! 1390: value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]); ! 1391: } ! 1392: ! 1393: /* All temporary files that existed before are no longer needed ! 1394: since their contents have been merged into our new tempfiles. ! 1395: So delete them. */ ! 1396: flush_tempfiles (start_tempcount); ! 1397: ! 1398: /* Now merge the temporary files we created. */ ! 1399: ! 1400: merge_files (tempfiles, ntemps, outfile); ! 1401: ! 1402: free (tempfiles); ! 1403: ! 1404: return value; ! 1405: } ! 1406: ! 1407: /* Assume (and optionally verify) that each input file is sorted; ! 1408: merge them and output the result. ! 1409: Returns nonzero if any input file fails to be sorted. ! 1410: ! 1411: This version of merging will not work if the number of ! 1412: input files gets too high. Higher level functions ! 1413: use it only with a bounded number of input files. */ ! 1414: ! 1415: int ! 1416: merge_direct (infiles, nfiles, outfile) ! 1417: char **infiles; ! 1418: int nfiles; ! 1419: char *outfile; ! 1420: { ! 1421: char **ip = infiles; ! 1422: struct linebuffer *lb1, *lb2; ! 1423: struct linebuffer **thisline, **prevline; ! 1424: FILE **streams; ! 1425: int i; ! 1426: int nleft; ! 1427: int lossage = 0; ! 1428: int *file_lossage; ! 1429: struct linebuffer *prev_out = 0; ! 1430: FILE *ostream = stdout; ! 1431: ! 1432: if (outfile) ! 1433: { ! 1434: ostream = fopen (outfile, "w"); ! 1435: } ! 1436: if (!ostream) pfatal_with_name (outfile); ! 1437: ! 1438: init_index (); ! 1439: ! 1440: if (nfiles == 0) ! 1441: { ! 1442: if (outfile) ! 1443: fclose (ostream); ! 1444: return 0; ! 1445: } ! 1446: ! 1447: /* For each file, make two line buffers. ! 1448: Also, for each file, there is an element of `thisline' ! 1449: which points at any time to one of the file's two buffers, ! 1450: and an element of `prevline' which points to the other buffer. ! 1451: `thisline' is supposed to point to the next available line from the file, ! 1452: while `prevline' holds the last file line used, ! 1453: which is remembered so that we can verify that the file is properly sorted. */ ! 1454: ! 1455: /* lb1 and lb2 contain one buffer each per file */ ! 1456: lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); ! 1457: lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); ! 1458: ! 1459: /* thisline[i] points to the linebuffer holding the next available line in file i, ! 1460: or is zero if there are no lines left in that file. */ ! 1461: thisline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *)); ! 1462: /* prevline[i] points to the linebuffer holding the last used line from file i. ! 1463: This is just for verifying that file i is properly sorted. */ ! 1464: prevline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *)); ! 1465: /* streams[i] holds the input stream for file i. */ ! 1466: streams = (FILE **) xmalloc (nfiles * sizeof (FILE *)); ! 1467: /* file_lossage[i] is nonzero if we already know file i is not properly sorted. */ ! 1468: file_lossage = (int *) xmalloc (nfiles * sizeof (int)); ! 1469: ! 1470: /* Allocate and initialize all that storage */ ! 1471: ! 1472: for (i = 0; i < nfiles; i++) ! 1473: { ! 1474: initbuffer (&lb1[i]); ! 1475: initbuffer (&lb2[i]); ! 1476: thisline[i] = &lb1[i]; ! 1477: prevline[i] = &lb2[i]; ! 1478: file_lossage[i] = 0; ! 1479: streams[i] = fopen (infiles[i], "r"); ! 1480: if (!streams[i]) ! 1481: pfatal_with_name (infiles[i]); ! 1482: ! 1483: readline (thisline[i], streams[i]); ! 1484: } ! 1485: ! 1486: /* Keep count of number of files not at eof */ ! 1487: nleft = nfiles; ! 1488: ! 1489: while (nleft) ! 1490: { ! 1491: struct linebuffer *best = 0; ! 1492: struct linebuffer *exch; ! 1493: int bestfile = -1; ! 1494: int i; ! 1495: ! 1496: /* Look at the next avail line of each file; choose the least one. */ ! 1497: ! 1498: for (i = 0; i < nfiles; i++) ! 1499: { ! 1500: if (thisline[i] && ! 1501: (!best || ! 1502: 0 < compare_general (best->buffer, thisline[i]->buffer, ! 1503: (long) bestfile, (long) i, num_keyfields))) ! 1504: { ! 1505: best = thisline[i]; ! 1506: bestfile = i; ! 1507: } ! 1508: } ! 1509: ! 1510: /* Output that line, unless it matches the previous one and we don't want duplicates */ ! 1511: ! 1512: if (!(prev_out && ! 1513: !compare_general (prev_out->buffer, best->buffer, 0L, 1L, num_keyfields - 1))) ! 1514: indexify (best->buffer, ostream); ! 1515: prev_out = best; ! 1516: ! 1517: /* Now make the line the previous of its file, and fetch a new line from that file */ ! 1518: ! 1519: exch = prevline[bestfile]; ! 1520: prevline[bestfile] = thisline[bestfile]; ! 1521: thisline[bestfile] = exch; ! 1522: ! 1523: while (1) ! 1524: { ! 1525: /* If the file has no more, mark it empty */ ! 1526: ! 1527: if (feof (streams[bestfile])) ! 1528: { ! 1529: thisline[bestfile] = 0; ! 1530: nleft--; /* Update the number of files still not empty */ ! 1531: break; ! 1532: } ! 1533: readline (thisline[bestfile], streams[bestfile]); ! 1534: if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) break; ! 1535: } ! 1536: } ! 1537: ! 1538: finish_index (ostream); ! 1539: ! 1540: /* Free all storage and close all input streams */ ! 1541: ! 1542: for (i = 0; i < nfiles; i++) ! 1543: { ! 1544: fclose (streams[i]); ! 1545: free (lb1[i].buffer); ! 1546: free (lb2[i].buffer); ! 1547: } ! 1548: free (file_lossage); ! 1549: free (lb1); ! 1550: free (lb2); ! 1551: free (thisline); ! 1552: free (prevline); ! 1553: free (streams); ! 1554: ! 1555: if (outfile) ! 1556: fclose (ostream); ! 1557: ! 1558: return lossage; ! 1559: } ! 1560: ! 1561: /* Print error message and exit. */ ! 1562: ! 1563: fatal (s1, s2) ! 1564: char *s1, *s2; ! 1565: { ! 1566: error (s1, s2); ! 1567: exit (EXIT_FATAL); ! 1568: } ! 1569: ! 1570: /* Print error message. `s1' is printf control string, `s2' is arg for it. */ ! 1571: ! 1572: error (s1, s2) ! 1573: char *s1, *s2; ! 1574: { ! 1575: printf ("texindex: "); ! 1576: printf (s1, s2); ! 1577: printf ("\n"); ! 1578: } ! 1579: ! 1580: perror_with_name (name) ! 1581: char *name; ! 1582: { ! 1583: #ifdef VMS ! 1584: #include <errno.h> ! 1585: extern noshare int sys_nerr; ! 1586: extern noshare char *sys_errlist[]; ! 1587: #else ! 1588: extern int errno, sys_nerr; ! 1589: extern char *sys_errlist[]; ! 1590: #endif ! 1591: char *s; ! 1592: ! 1593: if (errno < sys_nerr) ! 1594: s = concat ("", sys_errlist[errno], " for %s"); ! 1595: else ! 1596: s = "cannot open %s"; ! 1597: error (s, name); ! 1598: } ! 1599: ! 1600: pfatal_with_name (name) ! 1601: char *name; ! 1602: { ! 1603: extern int errno, sys_nerr; ! 1604: extern char *sys_errlist[]; ! 1605: char *s; ! 1606: ! 1607: if (errno < sys_nerr) ! 1608: s = concat ("", sys_errlist[errno], " for %s"); ! 1609: else ! 1610: s = "cannot open %s"; ! 1611: fatal (s, name); ! 1612: } ! 1613: ! 1614: /* Return a newly-allocated string whose contents concatenate those of s1, s2, s3. */ ! 1615: ! 1616: char * ! 1617: concat (s1, s2, s3) ! 1618: char *s1, *s2, *s3; ! 1619: { ! 1620: int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); ! 1621: char *result = (char *) xmalloc (len1 + len2 + len3 + 1); ! 1622: ! 1623: strcpy (result, s1); ! 1624: strcpy (result + len1, s2); ! 1625: strcpy (result + len1 + len2, s3); ! 1626: *(result + len1 + len2 + len3) = 0; ! 1627: ! 1628: return result; ! 1629: } ! 1630: ! 1631: /* Like malloc but get fatal error if memory is exhausted. */ ! 1632: ! 1633: int ! 1634: xmalloc (size) ! 1635: int size; ! 1636: { ! 1637: int result = malloc (size); ! 1638: if (!result) ! 1639: fatal ("virtual memory exhausted", 0); ! 1640: return result; ! 1641: } ! 1642: ! 1643: ! 1644: int ! 1645: xrealloc (ptr, size) ! 1646: char *ptr; ! 1647: int size; ! 1648: { ! 1649: int result = realloc (ptr, size); ! 1650: if (!result) ! 1651: fatal ("virtual memory exhausted"); ! 1652: return result; ! 1653: } ! 1654: ! 1655: bzero (b, length) ! 1656: register char *b; ! 1657: register int length; ! 1658: { ! 1659: #ifdef VMS ! 1660: short zero = 0; ! 1661: long max_str = 65535; ! 1662: ! 1663: while (length > max_str) { ! 1664: (void) LIB$MOVC5 (&zero, &zero, &zero, &max_str, b); ! 1665: length -= max_str; ! 1666: b += max_str; ! 1667: } ! 1668: (void) LIB$MOVC5 (&zero, &zero, &zero, &length, b); ! 1669: #else ! 1670: while (length-- > 0) ! 1671: *b++ = 0; ! 1672: #endif /* not VMS */ ! 1673: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.