|
|
1.1 root 1: #ifndef lint
2: static char *sccsid = "@(#)sortbib.c 4.1 (Berkeley) 5/6/83";
3: #endif
4:
5: #include <stdio.h>
6: #include <signal.h>
7: #define BUF BUFSIZ
8: #define MXFILES 16
9:
10: char *tempfile; /* temporary file for sorting keys */
11: char *keystr = "AD"; /* default sorting on author and date */
12: int multauth = 0; /* by default sort on senior author only */
13: int oneauth; /* has there been author in the record? */
14:
15: main(argc, argv) /* sortbib: sort bibliographic database in place */
16: int argc;
17: char *argv[];
18: {
19: FILE *fp[MXFILES], *tfp, *fopen();
20: int i, onintr();
21: char *mktemp();
22:
23: if (argc == 1) /* can't use stdin for seeking anyway */
24: {
25: puts("Usage: sortbib [-sKEYS] database [...]");
26: puts("\t-s: sort by fields in KEYS (default is AD)");
27: exit(1);
28: }
29: if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
30: {
31: keystr = argv[1]+2;
32: eval(keystr); /* evaluate A+ for multiple authors */
33: argv++; argc--;
34: }
35: if (argc > MXFILES+1) /* too many open file streams */
36: {
37: fprintf(stderr,
38: "sortbib: More than %d databases specified\n", MXFILES);
39: exit(1);
40: }
41: for (i = 1; i < argc; i++) /* open files in arg list */
42: if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
43: error(argv[i]);
44: tempfile = "/tmp/SbibXXXXX"; /* tempfile for sorting keys */
45: mktemp(tempfile);
46: if (signal(SIGINT,SIG_IGN) != SIG_IGN) /* remove if interrupted */
47: signal(SIGINT, onintr);
48: if ((tfp = fopen(tempfile, "w")) == NULL)
49: error(tempfile);
50: for (i = 0; i < argc-1; i++) /* read keys from bib files */
51: sortbib(fp[i], tfp, i);
52: fclose(tfp);
53: deliver(fp, tfp); /* do disk seeks and read from biblio files */
54: unlink(tempfile);
55: exit(0);
56: }
57:
58: int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */
59:
60: sortbib(fp, tfp, i) /* read records, prepare list for sorting */
61: FILE *fp, *tfp;
62: int i;
63: {
64: long offset, lastoffset = 0, ftell(); /* byte offsets in file */
65: int length, newrec, recno = 0; /* reclen, new rec'd?, number */
66: char line[BUF], fld[4][BUF]; /* one line, the sort fields */
67:
68: /* measure byte offset, then get new line */
69: while (offset = ftell(fp), fgets(line, BUF, fp))
70: {
71: if (recno == 0) /* accept record w/o initial newline */
72: newrec = 1;
73: if (line[0] == '\n') /* accept null line record separator */
74: {
75: if (!rsmode)
76: rsmode = 1; /* null line mode */
77: if (rsmode == 1)
78: newrec = 1;
79: }
80: if (line[0] == '.' && line[1] == '[') /* also accept .[ .] */
81: {
82: if (!rsmode)
83: rsmode = 2; /* bracket pair mode */
84: if (rsmode == 2)
85: newrec = 1;
86: }
87: if (newrec) /* by whatever means above */
88: {
89: newrec = 0;
90: length = offset - lastoffset; /* measure rec len */
91: if (length > BUF*8)
92: {
93: fprintf(stderr,
94: "sortbib: record %d longer than %d (%d)\n",
95: recno, BUF*8, length);
96: exit(1);
97: }
98: if (recno++) /* info for sorting */
99: {
100: fprintf(tfp, "%d %D %d : %s %s %s %s\n",
101: i, lastoffset, length,
102: fld[0], fld[1], fld[2], fld[3]);
103: if (ferror(tfp))
104: error(tempfile);
105: }
106: *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
107: oneauth = 0; /* reset number of authors */
108: lastoffset = offset; /* save for next time */
109: }
110: if (line[0] == '%') /* parse out fields to be sorted */
111: parse(line, fld);
112: }
113: offset = ftell(fp); /* measure byte offset at EOF */
114: length = offset - lastoffset; /* measure final record length */
115: if (length > BUF*8)
116: {
117: fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
118: recno, BUF*8, length);
119: exit(1);
120: }
121: if (line[0] != '\n') /* ignore null line just before EOF */
122: {
123: fprintf(tfp, "%d %D %d : %s %s %s %s\n",
124: i, lastoffset, length,
125: fld[0], fld[1], fld[2], fld[3]);
126: if (ferror(tfp))
127: error(tempfile); /* disk error in /tmp */
128: }
129: }
130:
131: deliver(fp, tfp) /* deliver sorted entries out of database(s) */
132: FILE *fp[], *tfp;
133: {
134: char str[BUF], buff[BUF*8]; /* for tempfile & databases */
135: char cmd[80], *sprintf(); /* for using system sort command */
136: long int offset;
137: int i, length;
138:
139: /* when sorting, ignore case distinctions; tab char is ':' */
140: sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
141: if (system(cmd) == 127)
142: error("sortbib");
143: tfp = fopen(tempfile, "r");
144: while (fgets(str, sizeof(str), tfp))
145: {
146: /* get file pointer, record offset, and length */
147: if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
148: error("sortbib: sorting error");
149: /* seek to proper disk location in proper file */
150: if (fseek(fp[i], offset, 0) == -1)
151: error("sortbib");
152: /* read exactly one record from bibliography */
153: if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
154: error("sortbib");
155: /* add newline between unseparated records */
156: if (buff[0] != '\n' && rsmode == 1)
157: putchar('\n');
158: /* write record buffer to standard output */
159: if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
160: error("sortbib");
161: }
162: }
163:
164: parse(line, fld) /* get fields out of line, prepare for sorting */
165: char line[];
166: char fld[][BUF];
167: {
168: char wd[8][BUF/4], *strcat();
169: int n, i, j;
170:
171: for (i = 0; i < 8; i++) /* zap out old strings */
172: *wd[i] = NULL;
173: n = sscanf(line, "%s %s %s %s %s %s %s %s",
174: wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
175: for (i = 0; i < 4; i++)
176: {
177: if (wd[0][1] == keystr[i])
178: {
179: if (wd[0][1] == 'A')
180: {
181: if (oneauth && !multauth) /* no repeat */
182: break;
183: else if (oneauth) /* mult auths */
184: strcat(fld[i], "~~");
185: if (!endcomma(wd[n-2])) /* surname */
186: strcat(fld[i], wd[n-1]);
187: else { /* jr. or ed. */
188: strcat(fld[i], wd[n-2]);
189: n--;
190: }
191: strcat(fld[i], " ");
192: for (j = 1; j < n-1; j++)
193: strcat(fld[i], wd[j]);
194: oneauth = 1;
195: }
196: else if (wd[0][1] == 'D')
197: {
198: strcat(fld[i], wd[n-1]); /* year */
199: if (n > 2)
200: strcat(fld[i], wd[1]); /* month */
201: }
202: else if (wd[0][1] == 'T' || wd[0][1] == 'J')
203: {
204: j = 1;
205: if (article(wd[1])) /* skip article */
206: j++;
207: for (; j < n; j++)
208: strcat(fld[i], wd[j]);
209: }
210: else /* any other field */
211: for (j = 1; j < n; j++)
212: strcat(fld[i], wd[j]);
213: }
214: /* %Q quorporate or queer author - unreversed %A */
215: else if (wd[0][1] == 'Q' && keystr[i] == 'A')
216: for (j = 1; j < n; j++)
217: strcat(fld[i], wd[j]);
218: }
219: }
220:
221: article(str) /* see if string contains an article */
222: char *str;
223: {
224: if (strcmp("The", str) == 0) /* English */
225: return(1);
226: if (strcmp("A", str) == 0)
227: return(1);
228: if (strcmp("An", str) == 0)
229: return(1);
230: if (strcmp("Le", str) == 0) /* French */
231: return(1);
232: if (strcmp("La", str) == 0)
233: return(1);
234: if (strcmp("Der", str) == 0) /* German */
235: return(1);
236: if (strcmp("Die", str) == 0)
237: return(1);
238: if (strcmp("Das", str) == 0)
239: return(1);
240: if (strcmp("El", str) == 0) /* Spanish */
241: return(1);
242: if (strcmp("Den", str) == 0) /* Scandinavian */
243: return(1);
244: return(0);
245: }
246:
247: eval(keystr) /* evaluate key string for A+ marking */
248: char keystr[];
249: {
250: int i, j;
251:
252: for (i = 0, j = 0; keystr[i]; i++, j++)
253: {
254: if (keystr[i] == '+')
255: {
256: multauth = 1;
257: i++;
258: }
259: keystr[j] = keystr[i];
260: }
261: keystr[j] = NULL;
262: }
263:
264: error(s) /* exit in case of various system errors */
265: char *s;
266: {
267: perror(s);
268: exit(1);
269: }
270:
271: onintr() /* remove tempfile in case of interrupt */
272: {
273: fprintf(stderr, "\nInterrupt\n");
274: unlink(tempfile);
275: exit(1);
276: }
277:
278: endcomma(str)
279: char *str;
280: {
281: int n;
282:
283: n = strlen(str) - 1;
284: if (str[n] == ',')
285: {
286: str[n] = NULL;
287: return(1);
288: }
289: return(0);
290: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.