xref: /original-bsd/old/refer/sortbib/sortbib.c (revision 16bc4816)
1 /*-
2  * %sccs.include.proprietary.c%
3  */
4 
5 #ifndef lint
6 static char sccsid[] = "@(#)sortbib.c	4.6 (Berkeley) 04/18/91";
7 #endif /* not lint */
8 
9 #include <stdio.h>
10 #include <signal.h>
11 #include <string.h>
12 #include "pathnames.h"
13 
14 #define BUF BUFSIZ
15 #define MXFILES 16
16 
17 char *tempfile;			/* temporary file for sorting keys */
18 char *keystr = "AD";		/* default sorting on author and date */
19 int multauth = 0;		/* by default sort on senior author only */
20 int oneauth;			/* has there been author in the record? */
21 
22 main(argc, argv)	/* sortbib: sort bibliographic database in place */
23 int argc;
24 char *argv[];
25 {
26 	FILE *fp[MXFILES], *tfp, *fopen();
27 	int i;
28 	void onintr();
29 	char *mktemp();
30 
31 	if (argc == 1)		/* can't use stdin for seeking anyway */
32 	{
33 		puts("Usage:  sortbib [-sKEYS] database [...]");
34 		puts("\t-s: sort by fields in KEYS (default is AD)");
35 		exit(1);
36 	}
37 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
38 	{
39 		keystr = argv[1]+2;
40 		eval(keystr);		/* evaluate A+ for multiple authors */
41 		argv++; argc--;
42 	}
43 	if (argc > MXFILES+1)	/* too many open file streams */
44 	{
45 		fprintf(stderr,
46 		"sortbib: More than %d databases specified\n", MXFILES);
47 		exit(1);
48 	}
49 	for (i = 1; i < argc; i++)		/* open files in arg list */
50 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
51 			error(argv[i]);
52 	tempfile = mktemp(strdup(_PATH_TMPS));	/* tempfile for sorting keys */
53 	if (signal(SIGINT,SIG_IGN) != SIG_IGN)	/* remove if interrupted */
54 		signal(SIGINT, onintr);
55 	if ((tfp = fopen(tempfile, "w")) == NULL)
56 		error(tempfile);
57 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
58 		sortbib(fp[i], tfp, i);
59 	fclose(tfp);
60 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
61 	unlink(tempfile);
62 	exit(0);
63 }
64 
65 int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
66 
67 sortbib(fp, tfp, i)	/* read records, prepare list for sorting */
68 FILE *fp, *tfp;
69 int i;
70 {
71 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
72 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
73 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
74 
75 	/* measure byte offset, then get new line */
76 	while (offset = ftell(fp), fgets(line, BUF, fp))
77 	{
78 		if (recno == 0)		/* accept record w/o initial newline */
79 			newrec = 1;
80 		if (line[0] == '\n')	/* accept null line record separator */
81 		{
82 			if (!rsmode)
83 				rsmode = 1;	/* null line mode */
84 			if (rsmode == 1)
85 				newrec = 1;
86 		}
87 		if (line[0] == '.' && line[1] == '[')	/* also accept .[ .] */
88 		{
89 			if (!rsmode)
90 				rsmode = 2;	/* bracket pair mode */
91 			if (rsmode == 2)
92 				newrec = 1;
93 		}
94 		if (newrec)		/* by whatever means above */
95 		{
96 			newrec = 0;
97 			length = offset - lastoffset;	/* measure rec len */
98 			if (length > BUF*8)
99 			{
100 				fprintf(stderr,
101 				"sortbib: record %d longer than %d (%d)\n",
102 					recno, BUF*8, length);
103 				exit(1);
104 			}
105 			if (recno++)			/* info for sorting */
106 			{
107 				fprintf(tfp, "%d %D %d : %s %s %s %s\n",
108 					i, lastoffset, length,
109 					fld[0], fld[1], fld[2], fld[3]);
110 				if (ferror(tfp))
111 					error(tempfile);
112 			}
113 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
114 			oneauth = 0;		/* reset number of authors */
115 			lastoffset = offset;	/* save for next time */
116 		}
117 		if (line[0] == '%')	/* parse out fields to be sorted */
118 			parse(line, fld);
119 	}
120 	offset = ftell(fp);		/* measure byte offset at EOF */
121 	length = offset - lastoffset;	/* measure final record length */
122 	if (length > BUF*8)
123 	{
124 		fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
125 			recno, BUF*8, length);
126 		exit(1);
127 	}
128 	if (line[0] != '\n')		/* ignore null line just before EOF */
129 	{
130 		fprintf(tfp, "%d %D %d : %s %s %s %s\n",
131 			i, lastoffset, length,
132 			fld[0], fld[1], fld[2], fld[3]);
133 		if (ferror(tfp))
134 			error(tempfile);	/* disk error in /tmp */
135 	}
136 }
137 
138 deliver(fp, tfp)	/* deliver sorted entries out of database(s) */
139 FILE *fp[], *tfp;
140 {
141 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
142 	char cmd[80];			/* for using system sort command */
143 	long int offset;
144 	int i, length;
145 
146 	/* when sorting, ignore case distinctions; tab char is ':' */
147 	sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
148 	if (system(cmd) == 127)
149 		error("sortbib");
150 	tfp = fopen(tempfile, "r");
151 	while (fgets(str, sizeof(str), tfp))
152 	{
153 		/* get file pointer, record offset, and length */
154 		if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
155 			error("sortbib: sorting error");
156 		/* seek to proper disk location in proper file */
157 		if (fseek(fp[i], offset, 0) == -1)
158 			error("sortbib");
159 		/* read exactly one record from bibliography */
160 		if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
161 			error("sortbib");
162 		/* add newline between unseparated records */
163 		if (buff[0] != '\n' && rsmode == 1)
164 			putchar('\n');
165 		/* write record buffer to standard output */
166 		if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
167 			error("sortbib");
168 	}
169 }
170 
171 parse(line, fld)	/* get fields out of line, prepare for sorting */
172 char line[];
173 char fld[][BUF];
174 {
175 	char wd[8][BUF/4], *strcat();
176 	int n, i, j;
177 
178 	for (i = 0; i < 8; i++)		/* zap out old strings */
179 		*wd[i] = NULL;
180 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
181 		wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
182 	for (i = 0; i < 4; i++)
183 	{
184 		if (wd[0][1] == keystr[i])
185 		{
186 			if (wd[0][1] == 'A')
187 			{
188 				if (oneauth && !multauth)	/* no repeat */
189 					break;
190 				else if (oneauth)		/* mult auths */
191 					strcat(fld[i], "~~");
192 				if (!endcomma(wd[n-2]))		/* surname */
193 					strcat(fld[i], wd[n-1]);
194 				else {				/* jr. or ed. */
195 					strcat(fld[i], wd[n-2]);
196 					n--;
197 				}
198 				strcat(fld[i], " ");
199 				for (j = 1; j < n-1; j++)
200 					strcat(fld[i], wd[j]);
201 				oneauth = 1;
202 			}
203 			else if (wd[0][1] == 'D')
204 			{
205 				strcat(fld[i], wd[n-1]);	/* year */
206 				if (n > 2)
207 					strcat(fld[i], wd[1]);	/* month */
208 			}
209 			else if (wd[0][1] == 'T' || wd[0][1] == 'J')
210 			{
211 				j = 1;
212 				if (article(wd[1]))	/* skip article */
213 					j++;
214 				for (; j < n; j++)
215 					strcat(fld[i], wd[j]);
216 			}
217 			else  /* any other field */
218 				for (j = 1; j < n; j++)
219 					strcat(fld[i], wd[j]);
220 		}
221 		/* %Q quorporate or queer author - unreversed %A */
222 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
223 			for (j = 1; j < n; j++)
224 				strcat(fld[i], wd[j]);
225 	}
226 }
227 
228 article(str)		/* see if string contains an article */
229 char *str;
230 {
231 	if (strcmp("The", str) == 0)	/* English */
232 		return(1);
233 	if (strcmp("A", str) == 0)
234 		return(1);
235 	if (strcmp("An", str) == 0)
236 		return(1);
237 	if (strcmp("Le", str) == 0)	/* French */
238 		return(1);
239 	if (strcmp("La", str) == 0)
240 		return(1);
241 	if (strcmp("Der", str) == 0)	/* German */
242 		return(1);
243 	if (strcmp("Die", str) == 0)
244 		return(1);
245 	if (strcmp("Das", str) == 0)
246 		return(1);
247 	if (strcmp("El", str) == 0)	/* Spanish */
248 		return(1);
249 	if (strcmp("Den", str) == 0)	/* Scandinavian */
250 		return(1);
251 	return(0);
252 }
253 
254 eval(keystr)		/* evaluate key string for A+ marking */
255 char keystr[];
256 {
257 	int i, j;
258 
259 	for (i = 0, j = 0; keystr[i]; i++, j++)
260 	{
261 		if (keystr[i] == '+')
262 		{
263 			multauth = 1;
264 			i++;
265 		}
266 		keystr[j] = keystr[i];
267 	}
268 	keystr[j] = NULL;
269 }
270 
271 error(s)		/* exit in case of various system errors */
272 char *s;
273 {
274 	perror(s);
275 	exit(1);
276 }
277 
278 void
279 onintr()		/* remove tempfile in case of interrupt */
280 {
281 	fprintf(stderr, "\nInterrupt\n");
282 	unlink(tempfile);
283 	exit(1);
284 }
285 
286 endcomma(str)
287 char *str;
288 {
289 	int n;
290 
291 	n = strlen(str) - 1;
292 	if (str[n] == ',')
293 	{
294 		str[n] = NULL;
295 		return(1);
296 	}
297 	return(0);
298 }
299