xref: /original-bsd/old/refer/sortbib/sortbib.c (revision 51dd4909)
1 #ifndef lint
2 static char *sccsid = "@(#)sortbib.c	4.5 (Berkeley) 03/07/91";
3 #endif
4 
5 #include <stdio.h>
6 #include <signal.h>
7 #include <string.h>
8 #include "pathnames.h"
9 
10 #define BUF BUFSIZ
11 #define MXFILES 16
12 
13 char *tempfile;			/* temporary file for sorting keys */
14 char *keystr = "AD";		/* default sorting on author and date */
15 int multauth = 0;		/* by default sort on senior author only */
16 int oneauth;			/* has there been author in the record? */
17 
18 main(argc, argv)	/* sortbib: sort bibliographic database in place */
19 int argc;
20 char *argv[];
21 {
22 	FILE *fp[MXFILES], *tfp, *fopen();
23 	int i;
24 	void onintr();
25 	char *mktemp();
26 
27 	if (argc == 1)		/* can't use stdin for seeking anyway */
28 	{
29 		puts("Usage:  sortbib [-sKEYS] database [...]");
30 		puts("\t-s: sort by fields in KEYS (default is AD)");
31 		exit(1);
32 	}
33 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
34 	{
35 		keystr = argv[1]+2;
36 		eval(keystr);		/* evaluate A+ for multiple authors */
37 		argv++; argc--;
38 	}
39 	if (argc > MXFILES+1)	/* too many open file streams */
40 	{
41 		fprintf(stderr,
42 		"sortbib: More than %d databases specified\n", MXFILES);
43 		exit(1);
44 	}
45 	for (i = 1; i < argc; i++)		/* open files in arg list */
46 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
47 			error(argv[i]);
48 	tempfile = mktemp(strdup(_PATH_TMPS));	/* tempfile for sorting keys */
49 	if (signal(SIGINT,SIG_IGN) != SIG_IGN)	/* remove if interrupted */
50 		signal(SIGINT, onintr);
51 	if ((tfp = fopen(tempfile, "w")) == NULL)
52 		error(tempfile);
53 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
54 		sortbib(fp[i], tfp, i);
55 	fclose(tfp);
56 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
57 	unlink(tempfile);
58 	exit(0);
59 }
60 
61 int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
62 
63 sortbib(fp, tfp, i)	/* read records, prepare list for sorting */
64 FILE *fp, *tfp;
65 int i;
66 {
67 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
68 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
69 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
70 
71 	/* measure byte offset, then get new line */
72 	while (offset = ftell(fp), fgets(line, BUF, fp))
73 	{
74 		if (recno == 0)		/* accept record w/o initial newline */
75 			newrec = 1;
76 		if (line[0] == '\n')	/* accept null line record separator */
77 		{
78 			if (!rsmode)
79 				rsmode = 1;	/* null line mode */
80 			if (rsmode == 1)
81 				newrec = 1;
82 		}
83 		if (line[0] == '.' && line[1] == '[')	/* also accept .[ .] */
84 		{
85 			if (!rsmode)
86 				rsmode = 2;	/* bracket pair mode */
87 			if (rsmode == 2)
88 				newrec = 1;
89 		}
90 		if (newrec)		/* by whatever means above */
91 		{
92 			newrec = 0;
93 			length = offset - lastoffset;	/* measure rec len */
94 			if (length > BUF*8)
95 			{
96 				fprintf(stderr,
97 				"sortbib: record %d longer than %d (%d)\n",
98 					recno, BUF*8, length);
99 				exit(1);
100 			}
101 			if (recno++)			/* info for sorting */
102 			{
103 				fprintf(tfp, "%d %D %d : %s %s %s %s\n",
104 					i, lastoffset, length,
105 					fld[0], fld[1], fld[2], fld[3]);
106 				if (ferror(tfp))
107 					error(tempfile);
108 			}
109 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
110 			oneauth = 0;		/* reset number of authors */
111 			lastoffset = offset;	/* save for next time */
112 		}
113 		if (line[0] == '%')	/* parse out fields to be sorted */
114 			parse(line, fld);
115 	}
116 	offset = ftell(fp);		/* measure byte offset at EOF */
117 	length = offset - lastoffset;	/* measure final record length */
118 	if (length > BUF*8)
119 	{
120 		fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
121 			recno, BUF*8, length);
122 		exit(1);
123 	}
124 	if (line[0] != '\n')		/* ignore null line just before EOF */
125 	{
126 		fprintf(tfp, "%d %D %d : %s %s %s %s\n",
127 			i, lastoffset, length,
128 			fld[0], fld[1], fld[2], fld[3]);
129 		if (ferror(tfp))
130 			error(tempfile);	/* disk error in /tmp */
131 	}
132 }
133 
134 deliver(fp, tfp)	/* deliver sorted entries out of database(s) */
135 FILE *fp[], *tfp;
136 {
137 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
138 	char cmd[80];			/* for using system sort command */
139 	long int offset;
140 	int i, length;
141 
142 	/* when sorting, ignore case distinctions; tab char is ':' */
143 	sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
144 	if (system(cmd) == 127)
145 		error("sortbib");
146 	tfp = fopen(tempfile, "r");
147 	while (fgets(str, sizeof(str), tfp))
148 	{
149 		/* get file pointer, record offset, and length */
150 		if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
151 			error("sortbib: sorting error");
152 		/* seek to proper disk location in proper file */
153 		if (fseek(fp[i], offset, 0) == -1)
154 			error("sortbib");
155 		/* read exactly one record from bibliography */
156 		if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
157 			error("sortbib");
158 		/* add newline between unseparated records */
159 		if (buff[0] != '\n' && rsmode == 1)
160 			putchar('\n');
161 		/* write record buffer to standard output */
162 		if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
163 			error("sortbib");
164 	}
165 }
166 
167 parse(line, fld)	/* get fields out of line, prepare for sorting */
168 char line[];
169 char fld[][BUF];
170 {
171 	char wd[8][BUF/4], *strcat();
172 	int n, i, j;
173 
174 	for (i = 0; i < 8; i++)		/* zap out old strings */
175 		*wd[i] = NULL;
176 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
177 		wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
178 	for (i = 0; i < 4; i++)
179 	{
180 		if (wd[0][1] == keystr[i])
181 		{
182 			if (wd[0][1] == 'A')
183 			{
184 				if (oneauth && !multauth)	/* no repeat */
185 					break;
186 				else if (oneauth)		/* mult auths */
187 					strcat(fld[i], "~~");
188 				if (!endcomma(wd[n-2]))		/* surname */
189 					strcat(fld[i], wd[n-1]);
190 				else {				/* jr. or ed. */
191 					strcat(fld[i], wd[n-2]);
192 					n--;
193 				}
194 				strcat(fld[i], " ");
195 				for (j = 1; j < n-1; j++)
196 					strcat(fld[i], wd[j]);
197 				oneauth = 1;
198 			}
199 			else if (wd[0][1] == 'D')
200 			{
201 				strcat(fld[i], wd[n-1]);	/* year */
202 				if (n > 2)
203 					strcat(fld[i], wd[1]);	/* month */
204 			}
205 			else if (wd[0][1] == 'T' || wd[0][1] == 'J')
206 			{
207 				j = 1;
208 				if (article(wd[1]))	/* skip article */
209 					j++;
210 				for (; j < n; j++)
211 					strcat(fld[i], wd[j]);
212 			}
213 			else  /* any other field */
214 				for (j = 1; j < n; j++)
215 					strcat(fld[i], wd[j]);
216 		}
217 		/* %Q quorporate or queer author - unreversed %A */
218 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
219 			for (j = 1; j < n; j++)
220 				strcat(fld[i], wd[j]);
221 	}
222 }
223 
224 article(str)		/* see if string contains an article */
225 char *str;
226 {
227 	if (strcmp("The", str) == 0)	/* English */
228 		return(1);
229 	if (strcmp("A", str) == 0)
230 		return(1);
231 	if (strcmp("An", str) == 0)
232 		return(1);
233 	if (strcmp("Le", str) == 0)	/* French */
234 		return(1);
235 	if (strcmp("La", str) == 0)
236 		return(1);
237 	if (strcmp("Der", str) == 0)	/* German */
238 		return(1);
239 	if (strcmp("Die", str) == 0)
240 		return(1);
241 	if (strcmp("Das", str) == 0)
242 		return(1);
243 	if (strcmp("El", str) == 0)	/* Spanish */
244 		return(1);
245 	if (strcmp("Den", str) == 0)	/* Scandinavian */
246 		return(1);
247 	return(0);
248 }
249 
250 eval(keystr)		/* evaluate key string for A+ marking */
251 char keystr[];
252 {
253 	int i, j;
254 
255 	for (i = 0, j = 0; keystr[i]; i++, j++)
256 	{
257 		if (keystr[i] == '+')
258 		{
259 			multauth = 1;
260 			i++;
261 		}
262 		keystr[j] = keystr[i];
263 	}
264 	keystr[j] = NULL;
265 }
266 
267 error(s)		/* exit in case of various system errors */
268 char *s;
269 {
270 	perror(s);
271 	exit(1);
272 }
273 
274 void
275 onintr()		/* remove tempfile in case of interrupt */
276 {
277 	fprintf(stderr, "\nInterrupt\n");
278 	unlink(tempfile);
279 	exit(1);
280 }
281 
282 endcomma(str)
283 char *str;
284 {
285 	int n;
286 
287 	n = strlen(str) - 1;
288 	if (str[n] == ',')
289 	{
290 		str[n] = NULL;
291 		return(1);
292 	}
293 	return(0);
294 }
295