|
|
1.1 root 1: #ifndef lint
2: static char *sccsid = "@(#)sortbib.c 4.3 (Berkeley) 5/11/89";
3: #endif
4:
5: #include <stdio.h>
6: #include <signal.h>
7: #include "pathnames.h"
8:
9: #define BUF BUFSIZ
10: #define MXFILES 16
11:
12: char *tempfile; /* temporary file for sorting keys */
13: char *keystr = "AD"; /* default sorting on author and date */
14: int multauth = 0; /* by default sort on senior author only */
15: int oneauth; /* has there been author in the record? */
16:
17: main(argc, argv) /* sortbib: sort bibliographic database in place */
18: int argc;
19: char *argv[];
20: {
21: FILE *fp[MXFILES], *tfp, *fopen();
22: int i, onintr();
23: char *mktemp();
24:
25: if (argc == 1) /* can't use stdin for seeking anyway */
26: {
27: puts("Usage: sortbib [-sKEYS] database [...]");
28: puts("\t-s: sort by fields in KEYS (default is AD)");
29: exit(1);
30: }
31: if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
32: {
33: keystr = argv[1]+2;
34: eval(keystr); /* evaluate A+ for multiple authors */
35: argv++; argc--;
36: }
37: if (argc > MXFILES+1) /* too many open file streams */
38: {
39: fprintf(stderr,
40: "sortbib: More than %d databases specified\n", MXFILES);
41: exit(1);
42: }
43: for (i = 1; i < argc; i++) /* open files in arg list */
44: if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
45: error(argv[i]);
46: tempfile = _PATH_TMPS; /* tempfile for sorting keys */
47: mktemp(tempfile);
48: if (signal(SIGINT,SIG_IGN) != SIG_IGN) /* remove if interrupted */
49: signal(SIGINT, onintr);
50: if ((tfp = fopen(tempfile, "w")) == NULL)
51: error(tempfile);
52: for (i = 0; i < argc-1; i++) /* read keys from bib files */
53: sortbib(fp[i], tfp, i);
54: fclose(tfp);
55: deliver(fp, tfp); /* do disk seeks and read from biblio files */
56: unlink(tempfile);
57: exit(0);
58: }
59:
60: int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */
61:
62: sortbib(fp, tfp, i) /* read records, prepare list for sorting */
63: FILE *fp, *tfp;
64: int i;
65: {
66: long offset, lastoffset = 0, ftell(); /* byte offsets in file */
67: int length, newrec, recno = 0; /* reclen, new rec'd?, number */
68: char line[BUF], fld[4][BUF]; /* one line, the sort fields */
69:
70: /* measure byte offset, then get new line */
71: while (offset = ftell(fp), fgets(line, BUF, fp))
72: {
73: if (recno == 0) /* accept record w/o initial newline */
74: newrec = 1;
75: if (line[0] == '\n') /* accept null line record separator */
76: {
77: if (!rsmode)
78: rsmode = 1; /* null line mode */
79: if (rsmode == 1)
80: newrec = 1;
81: }
82: if (line[0] == '.' && line[1] == '[') /* also accept .[ .] */
83: {
84: if (!rsmode)
85: rsmode = 2; /* bracket pair mode */
86: if (rsmode == 2)
87: newrec = 1;
88: }
89: if (newrec) /* by whatever means above */
90: {
91: newrec = 0;
92: length = offset - lastoffset; /* measure rec len */
93: if (length > BUF*8)
94: {
95: fprintf(stderr,
96: "sortbib: record %d longer than %d (%d)\n",
97: recno, BUF*8, length);
98: exit(1);
99: }
100: if (recno++) /* info for sorting */
101: {
102: fprintf(tfp, "%d %D %d : %s %s %s %s\n",
103: i, lastoffset, length,
104: fld[0], fld[1], fld[2], fld[3]);
105: if (ferror(tfp))
106: error(tempfile);
107: }
108: *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
109: oneauth = 0; /* reset number of authors */
110: lastoffset = offset; /* save for next time */
111: }
112: if (line[0] == '%') /* parse out fields to be sorted */
113: parse(line, fld);
114: }
115: offset = ftell(fp); /* measure byte offset at EOF */
116: length = offset - lastoffset; /* measure final record length */
117: if (length > BUF*8)
118: {
119: fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
120: recno, BUF*8, length);
121: exit(1);
122: }
123: if (line[0] != '\n') /* ignore null line just before EOF */
124: {
125: fprintf(tfp, "%d %D %d : %s %s %s %s\n",
126: i, lastoffset, length,
127: fld[0], fld[1], fld[2], fld[3]);
128: if (ferror(tfp))
129: error(tempfile); /* disk error in /tmp */
130: }
131: }
132:
133: deliver(fp, tfp) /* deliver sorted entries out of database(s) */
134: FILE *fp[], *tfp;
135: {
136: char str[BUF], buff[BUF*8]; /* for tempfile & databases */
137: char cmd[80]; /* for using system sort command */
138: long int offset;
139: int i, length;
140:
141: /* when sorting, ignore case distinctions; tab char is ':' */
142: sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
143: if (system(cmd) == 127)
144: error("sortbib");
145: tfp = fopen(tempfile, "r");
146: while (fgets(str, sizeof(str), tfp))
147: {
148: /* get file pointer, record offset, and length */
149: if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
150: error("sortbib: sorting error");
151: /* seek to proper disk location in proper file */
152: if (fseek(fp[i], offset, 0) == -1)
153: error("sortbib");
154: /* read exactly one record from bibliography */
155: if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
156: error("sortbib");
157: /* add newline between unseparated records */
158: if (buff[0] != '\n' && rsmode == 1)
159: putchar('\n');
160: /* write record buffer to standard output */
161: if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
162: error("sortbib");
163: }
164: }
165:
166: parse(line, fld) /* get fields out of line, prepare for sorting */
167: char line[];
168: char fld[][BUF];
169: {
170: char wd[8][BUF/4], *strcat();
171: int n, i, j;
172:
173: for (i = 0; i < 8; i++) /* zap out old strings */
174: *wd[i] = NULL;
175: n = sscanf(line, "%s %s %s %s %s %s %s %s",
176: wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
177: for (i = 0; i < 4; i++)
178: {
179: if (wd[0][1] == keystr[i])
180: {
181: if (wd[0][1] == 'A')
182: {
183: if (oneauth && !multauth) /* no repeat */
184: break;
185: else if (oneauth) /* mult auths */
186: strcat(fld[i], "~~");
187: if (!endcomma(wd[n-2])) /* surname */
188: strcat(fld[i], wd[n-1]);
189: else { /* jr. or ed. */
190: strcat(fld[i], wd[n-2]);
191: n--;
192: }
193: strcat(fld[i], " ");
194: for (j = 1; j < n-1; j++)
195: strcat(fld[i], wd[j]);
196: oneauth = 1;
197: }
198: else if (wd[0][1] == 'D')
199: {
200: strcat(fld[i], wd[n-1]); /* year */
201: if (n > 2)
202: strcat(fld[i], wd[1]); /* month */
203: }
204: else if (wd[0][1] == 'T' || wd[0][1] == 'J')
205: {
206: j = 1;
207: if (article(wd[1])) /* skip article */
208: j++;
209: for (; j < n; j++)
210: strcat(fld[i], wd[j]);
211: }
212: else /* any other field */
213: for (j = 1; j < n; j++)
214: strcat(fld[i], wd[j]);
215: }
216: /* %Q quorporate or queer author - unreversed %A */
217: else if (wd[0][1] == 'Q' && keystr[i] == 'A')
218: for (j = 1; j < n; j++)
219: strcat(fld[i], wd[j]);
220: }
221: }
222:
223: article(str) /* see if string contains an article */
224: char *str;
225: {
226: if (strcmp("The", str) == 0) /* English */
227: return(1);
228: if (strcmp("A", str) == 0)
229: return(1);
230: if (strcmp("An", str) == 0)
231: return(1);
232: if (strcmp("Le", str) == 0) /* French */
233: return(1);
234: if (strcmp("La", str) == 0)
235: return(1);
236: if (strcmp("Der", str) == 0) /* German */
237: return(1);
238: if (strcmp("Die", str) == 0)
239: return(1);
240: if (strcmp("Das", str) == 0)
241: return(1);
242: if (strcmp("El", str) == 0) /* Spanish */
243: return(1);
244: if (strcmp("Den", str) == 0) /* Scandinavian */
245: return(1);
246: return(0);
247: }
248:
249: eval(keystr) /* evaluate key string for A+ marking */
250: char keystr[];
251: {
252: int i, j;
253:
254: for (i = 0, j = 0; keystr[i]; i++, j++)
255: {
256: if (keystr[i] == '+')
257: {
258: multauth = 1;
259: i++;
260: }
261: keystr[j] = keystr[i];
262: }
263: keystr[j] = NULL;
264: }
265:
266: error(s) /* exit in case of various system errors */
267: char *s;
268: {
269: perror(s);
270: exit(1);
271: }
272:
273: onintr() /* remove tempfile in case of interrupt */
274: {
275: fprintf(stderr, "\nInterrupt\n");
276: unlink(tempfile);
277: exit(1);
278: }
279:
280: endcomma(str)
281: char *str;
282: {
283: int n;
284:
285: n = strlen(str) - 1;
286: if (str[n] == ',')
287: {
288: str[n] = NULL;
289: return(1);
290: }
291: return(0);
292: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.