1 /*-
2 * %sccs.include.proprietary.c%
3 */
4
5 #ifndef lint
6 static char sccsid[] = "@(#)sortbib.c 4.6 (Berkeley) 04/18/91";
7 #endif /* not lint */
8
9 #include <stdio.h>
10 #include <signal.h>
11 #include <string.h>
12 #include "pathnames.h"
13
14 #define BUF BUFSIZ
15 #define MXFILES 16
16
17 char *tempfile; /* temporary file for sorting keys */
18 char *keystr = "AD"; /* default sorting on author and date */
19 int multauth = 0; /* by default sort on senior author only */
20 int oneauth; /* has there been author in the record? */
21
main(argc,argv)22 main(argc, argv) /* sortbib: sort bibliographic database in place */
23 int argc;
24 char *argv[];
25 {
26 FILE *fp[MXFILES], *tfp, *fopen();
27 int i;
28 void onintr();
29 char *mktemp();
30
31 if (argc == 1) /* can't use stdin for seeking anyway */
32 {
33 puts("Usage: sortbib [-sKEYS] database [...]");
34 puts("\t-s: sort by fields in KEYS (default is AD)");
35 exit(1);
36 }
37 if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
38 {
39 keystr = argv[1]+2;
40 eval(keystr); /* evaluate A+ for multiple authors */
41 argv++; argc--;
42 }
43 if (argc > MXFILES+1) /* too many open file streams */
44 {
45 fprintf(stderr,
46 "sortbib: More than %d databases specified\n", MXFILES);
47 exit(1);
48 }
49 for (i = 1; i < argc; i++) /* open files in arg list */
50 if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
51 error(argv[i]);
52 tempfile = mktemp(strdup(_PATH_TMPS)); /* tempfile for sorting keys */
53 if (signal(SIGINT,SIG_IGN) != SIG_IGN) /* remove if interrupted */
54 signal(SIGINT, onintr);
55 if ((tfp = fopen(tempfile, "w")) == NULL)
56 error(tempfile);
57 for (i = 0; i < argc-1; i++) /* read keys from bib files */
58 sortbib(fp[i], tfp, i);
59 fclose(tfp);
60 deliver(fp, tfp); /* do disk seeks and read from biblio files */
61 unlink(tempfile);
62 exit(0);
63 }
64
65 int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */
66
sortbib(fp,tfp,i)67 sortbib(fp, tfp, i) /* read records, prepare list for sorting */
68 FILE *fp, *tfp;
69 int i;
70 {
71 long offset, lastoffset = 0, ftell(); /* byte offsets in file */
72 int length, newrec, recno = 0; /* reclen, new rec'd?, number */
73 char line[BUF], fld[4][BUF]; /* one line, the sort fields */
74
75 /* measure byte offset, then get new line */
76 while (offset = ftell(fp), fgets(line, BUF, fp))
77 {
78 if (recno == 0) /* accept record w/o initial newline */
79 newrec = 1;
80 if (line[0] == '\n') /* accept null line record separator */
81 {
82 if (!rsmode)
83 rsmode = 1; /* null line mode */
84 if (rsmode == 1)
85 newrec = 1;
86 }
87 if (line[0] == '.' && line[1] == '[') /* also accept .[ .] */
88 {
89 if (!rsmode)
90 rsmode = 2; /* bracket pair mode */
91 if (rsmode == 2)
92 newrec = 1;
93 }
94 if (newrec) /* by whatever means above */
95 {
96 newrec = 0;
97 length = offset - lastoffset; /* measure rec len */
98 if (length > BUF*8)
99 {
100 fprintf(stderr,
101 "sortbib: record %d longer than %d (%d)\n",
102 recno, BUF*8, length);
103 exit(1);
104 }
105 if (recno++) /* info for sorting */
106 {
107 fprintf(tfp, "%d %D %d : %s %s %s %s\n",
108 i, lastoffset, length,
109 fld[0], fld[1], fld[2], fld[3]);
110 if (ferror(tfp))
111 error(tempfile);
112 }
113 *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
114 oneauth = 0; /* reset number of authors */
115 lastoffset = offset; /* save for next time */
116 }
117 if (line[0] == '%') /* parse out fields to be sorted */
118 parse(line, fld);
119 }
120 offset = ftell(fp); /* measure byte offset at EOF */
121 length = offset - lastoffset; /* measure final record length */
122 if (length > BUF*8)
123 {
124 fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
125 recno, BUF*8, length);
126 exit(1);
127 }
128 if (line[0] != '\n') /* ignore null line just before EOF */
129 {
130 fprintf(tfp, "%d %D %d : %s %s %s %s\n",
131 i, lastoffset, length,
132 fld[0], fld[1], fld[2], fld[3]);
133 if (ferror(tfp))
134 error(tempfile); /* disk error in /tmp */
135 }
136 }
137
deliver(fp,tfp)138 deliver(fp, tfp) /* deliver sorted entries out of database(s) */
139 FILE *fp[], *tfp;
140 {
141 char str[BUF], buff[BUF*8]; /* for tempfile & databases */
142 char cmd[80]; /* for using system sort command */
143 long int offset;
144 int i, length;
145
146 /* when sorting, ignore case distinctions; tab char is ':' */
147 sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
148 if (system(cmd) == 127)
149 error("sortbib");
150 tfp = fopen(tempfile, "r");
151 while (fgets(str, sizeof(str), tfp))
152 {
153 /* get file pointer, record offset, and length */
154 if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
155 error("sortbib: sorting error");
156 /* seek to proper disk location in proper file */
157 if (fseek(fp[i], offset, 0) == -1)
158 error("sortbib");
159 /* read exactly one record from bibliography */
160 if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
161 error("sortbib");
162 /* add newline between unseparated records */
163 if (buff[0] != '\n' && rsmode == 1)
164 putchar('\n');
165 /* write record buffer to standard output */
166 if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
167 error("sortbib");
168 }
169 }
170
parse(line,fld)171 parse(line, fld) /* get fields out of line, prepare for sorting */
172 char line[];
173 char fld[][BUF];
174 {
175 char wd[8][BUF/4], *strcat();
176 int n, i, j;
177
178 for (i = 0; i < 8; i++) /* zap out old strings */
179 *wd[i] = NULL;
180 n = sscanf(line, "%s %s %s %s %s %s %s %s",
181 wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
182 for (i = 0; i < 4; i++)
183 {
184 if (wd[0][1] == keystr[i])
185 {
186 if (wd[0][1] == 'A')
187 {
188 if (oneauth && !multauth) /* no repeat */
189 break;
190 else if (oneauth) /* mult auths */
191 strcat(fld[i], "~~");
192 if (!endcomma(wd[n-2])) /* surname */
193 strcat(fld[i], wd[n-1]);
194 else { /* jr. or ed. */
195 strcat(fld[i], wd[n-2]);
196 n--;
197 }
198 strcat(fld[i], " ");
199 for (j = 1; j < n-1; j++)
200 strcat(fld[i], wd[j]);
201 oneauth = 1;
202 }
203 else if (wd[0][1] == 'D')
204 {
205 strcat(fld[i], wd[n-1]); /* year */
206 if (n > 2)
207 strcat(fld[i], wd[1]); /* month */
208 }
209 else if (wd[0][1] == 'T' || wd[0][1] == 'J')
210 {
211 j = 1;
212 if (article(wd[1])) /* skip article */
213 j++;
214 for (; j < n; j++)
215 strcat(fld[i], wd[j]);
216 }
217 else /* any other field */
218 for (j = 1; j < n; j++)
219 strcat(fld[i], wd[j]);
220 }
221 /* %Q quorporate or queer author - unreversed %A */
222 else if (wd[0][1] == 'Q' && keystr[i] == 'A')
223 for (j = 1; j < n; j++)
224 strcat(fld[i], wd[j]);
225 }
226 }
227
article(str)228 article(str) /* see if string contains an article */
229 char *str;
230 {
231 if (strcmp("The", str) == 0) /* English */
232 return(1);
233 if (strcmp("A", str) == 0)
234 return(1);
235 if (strcmp("An", str) == 0)
236 return(1);
237 if (strcmp("Le", str) == 0) /* French */
238 return(1);
239 if (strcmp("La", str) == 0)
240 return(1);
241 if (strcmp("Der", str) == 0) /* German */
242 return(1);
243 if (strcmp("Die", str) == 0)
244 return(1);
245 if (strcmp("Das", str) == 0)
246 return(1);
247 if (strcmp("El", str) == 0) /* Spanish */
248 return(1);
249 if (strcmp("Den", str) == 0) /* Scandinavian */
250 return(1);
251 return(0);
252 }
253
eval(keystr)254 eval(keystr) /* evaluate key string for A+ marking */
255 char keystr[];
256 {
257 int i, j;
258
259 for (i = 0, j = 0; keystr[i]; i++, j++)
260 {
261 if (keystr[i] == '+')
262 {
263 multauth = 1;
264 i++;
265 }
266 keystr[j] = keystr[i];
267 }
268 keystr[j] = NULL;
269 }
270
error(s)271 error(s) /* exit in case of various system errors */
272 char *s;
273 {
274 perror(s);
275 exit(1);
276 }
277
278 void
onintr()279 onintr() /* remove tempfile in case of interrupt */
280 {
281 fprintf(stderr, "\nInterrupt\n");
282 unlink(tempfile);
283 exit(1);
284 }
285
endcomma(str)286 endcomma(str)
287 char *str;
288 {
289 int n;
290
291 n = strlen(str) - 1;
292 if (str[n] == ',')
293 {
294 str[n] = NULL;
295 return(1);
296 }
297 return(0);
298 }
299