xref: /original-bsd/old/refer/sortbib/sortbib.c (revision 7e95f7c4)
1 #ifndef lint
2 static char *sccsid = "@(#)sortbib.c	4.3 (Berkeley) 05/11/89";
3 #endif
4 
5 #include <stdio.h>
6 #include <signal.h>
7 #include "pathnames.h"
8 
9 #define BUF BUFSIZ
10 #define MXFILES 16
11 
12 char *tempfile;			/* temporary file for sorting keys */
13 char *keystr = "AD";		/* default sorting on author and date */
14 int multauth = 0;		/* by default sort on senior author only */
15 int oneauth;			/* has there been author in the record? */
16 
17 main(argc, argv)	/* sortbib: sort bibliographic database in place */
18 int argc;
19 char *argv[];
20 {
21 	FILE *fp[MXFILES], *tfp, *fopen();
22 	int i, onintr();
23 	char *mktemp();
24 
25 	if (argc == 1)		/* can't use stdin for seeking anyway */
26 	{
27 		puts("Usage:  sortbib [-sKEYS] database [...]");
28 		puts("\t-s: sort by fields in KEYS (default is AD)");
29 		exit(1);
30 	}
31 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
32 	{
33 		keystr = argv[1]+2;
34 		eval(keystr);		/* evaluate A+ for multiple authors */
35 		argv++; argc--;
36 	}
37 	if (argc > MXFILES+1)	/* too many open file streams */
38 	{
39 		fprintf(stderr,
40 		"sortbib: More than %d databases specified\n", MXFILES);
41 		exit(1);
42 	}
43 	for (i = 1; i < argc; i++)		/* open files in arg list */
44 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
45 			error(argv[i]);
46 	tempfile = _PATH_TMPS;			/* tempfile for sorting keys */
47 	mktemp(tempfile);
48 	if (signal(SIGINT,SIG_IGN) != SIG_IGN)	/* remove if interrupted */
49 		signal(SIGINT, onintr);
50 	if ((tfp = fopen(tempfile, "w")) == NULL)
51 		error(tempfile);
52 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
53 		sortbib(fp[i], tfp, i);
54 	fclose(tfp);
55 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
56 	unlink(tempfile);
57 	exit(0);
58 }
59 
60 int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
61 
62 sortbib(fp, tfp, i)	/* read records, prepare list for sorting */
63 FILE *fp, *tfp;
64 int i;
65 {
66 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
67 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
68 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
69 
70 	/* measure byte offset, then get new line */
71 	while (offset = ftell(fp), fgets(line, BUF, fp))
72 	{
73 		if (recno == 0)		/* accept record w/o initial newline */
74 			newrec = 1;
75 		if (line[0] == '\n')	/* accept null line record separator */
76 		{
77 			if (!rsmode)
78 				rsmode = 1;	/* null line mode */
79 			if (rsmode == 1)
80 				newrec = 1;
81 		}
82 		if (line[0] == '.' && line[1] == '[')	/* also accept .[ .] */
83 		{
84 			if (!rsmode)
85 				rsmode = 2;	/* bracket pair mode */
86 			if (rsmode == 2)
87 				newrec = 1;
88 		}
89 		if (newrec)		/* by whatever means above */
90 		{
91 			newrec = 0;
92 			length = offset - lastoffset;	/* measure rec len */
93 			if (length > BUF*8)
94 			{
95 				fprintf(stderr,
96 				"sortbib: record %d longer than %d (%d)\n",
97 					recno, BUF*8, length);
98 				exit(1);
99 			}
100 			if (recno++)			/* info for sorting */
101 			{
102 				fprintf(tfp, "%d %D %d : %s %s %s %s\n",
103 					i, lastoffset, length,
104 					fld[0], fld[1], fld[2], fld[3]);
105 				if (ferror(tfp))
106 					error(tempfile);
107 			}
108 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
109 			oneauth = 0;		/* reset number of authors */
110 			lastoffset = offset;	/* save for next time */
111 		}
112 		if (line[0] == '%')	/* parse out fields to be sorted */
113 			parse(line, fld);
114 	}
115 	offset = ftell(fp);		/* measure byte offset at EOF */
116 	length = offset - lastoffset;	/* measure final record length */
117 	if (length > BUF*8)
118 	{
119 		fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
120 			recno, BUF*8, length);
121 		exit(1);
122 	}
123 	if (line[0] != '\n')		/* ignore null line just before EOF */
124 	{
125 		fprintf(tfp, "%d %D %d : %s %s %s %s\n",
126 			i, lastoffset, length,
127 			fld[0], fld[1], fld[2], fld[3]);
128 		if (ferror(tfp))
129 			error(tempfile);	/* disk error in /tmp */
130 	}
131 }
132 
133 deliver(fp, tfp)	/* deliver sorted entries out of database(s) */
134 FILE *fp[], *tfp;
135 {
136 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
137 	char cmd[80];			/* for using system sort command */
138 	long int offset;
139 	int i, length;
140 
141 	/* when sorting, ignore case distinctions; tab char is ':' */
142 	sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
143 	if (system(cmd) == 127)
144 		error("sortbib");
145 	tfp = fopen(tempfile, "r");
146 	while (fgets(str, sizeof(str), tfp))
147 	{
148 		/* get file pointer, record offset, and length */
149 		if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
150 			error("sortbib: sorting error");
151 		/* seek to proper disk location in proper file */
152 		if (fseek(fp[i], offset, 0) == -1)
153 			error("sortbib");
154 		/* read exactly one record from bibliography */
155 		if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
156 			error("sortbib");
157 		/* add newline between unseparated records */
158 		if (buff[0] != '\n' && rsmode == 1)
159 			putchar('\n');
160 		/* write record buffer to standard output */
161 		if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
162 			error("sortbib");
163 	}
164 }
165 
166 parse(line, fld)	/* get fields out of line, prepare for sorting */
167 char line[];
168 char fld[][BUF];
169 {
170 	char wd[8][BUF/4], *strcat();
171 	int n, i, j;
172 
173 	for (i = 0; i < 8; i++)		/* zap out old strings */
174 		*wd[i] = NULL;
175 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
176 		wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
177 	for (i = 0; i < 4; i++)
178 	{
179 		if (wd[0][1] == keystr[i])
180 		{
181 			if (wd[0][1] == 'A')
182 			{
183 				if (oneauth && !multauth)	/* no repeat */
184 					break;
185 				else if (oneauth)		/* mult auths */
186 					strcat(fld[i], "~~");
187 				if (!endcomma(wd[n-2]))		/* surname */
188 					strcat(fld[i], wd[n-1]);
189 				else {				/* jr. or ed. */
190 					strcat(fld[i], wd[n-2]);
191 					n--;
192 				}
193 				strcat(fld[i], " ");
194 				for (j = 1; j < n-1; j++)
195 					strcat(fld[i], wd[j]);
196 				oneauth = 1;
197 			}
198 			else if (wd[0][1] == 'D')
199 			{
200 				strcat(fld[i], wd[n-1]);	/* year */
201 				if (n > 2)
202 					strcat(fld[i], wd[1]);	/* month */
203 			}
204 			else if (wd[0][1] == 'T' || wd[0][1] == 'J')
205 			{
206 				j = 1;
207 				if (article(wd[1]))	/* skip article */
208 					j++;
209 				for (; j < n; j++)
210 					strcat(fld[i], wd[j]);
211 			}
212 			else  /* any other field */
213 				for (j = 1; j < n; j++)
214 					strcat(fld[i], wd[j]);
215 		}
216 		/* %Q quorporate or queer author - unreversed %A */
217 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
218 			for (j = 1; j < n; j++)
219 				strcat(fld[i], wd[j]);
220 	}
221 }
222 
223 article(str)		/* see if string contains an article */
224 char *str;
225 {
226 	if (strcmp("The", str) == 0)	/* English */
227 		return(1);
228 	if (strcmp("A", str) == 0)
229 		return(1);
230 	if (strcmp("An", str) == 0)
231 		return(1);
232 	if (strcmp("Le", str) == 0)	/* French */
233 		return(1);
234 	if (strcmp("La", str) == 0)
235 		return(1);
236 	if (strcmp("Der", str) == 0)	/* German */
237 		return(1);
238 	if (strcmp("Die", str) == 0)
239 		return(1);
240 	if (strcmp("Das", str) == 0)
241 		return(1);
242 	if (strcmp("El", str) == 0)	/* Spanish */
243 		return(1);
244 	if (strcmp("Den", str) == 0)	/* Scandinavian */
245 		return(1);
246 	return(0);
247 }
248 
249 eval(keystr)		/* evaluate key string for A+ marking */
250 char keystr[];
251 {
252 	int i, j;
253 
254 	for (i = 0, j = 0; keystr[i]; i++, j++)
255 	{
256 		if (keystr[i] == '+')
257 		{
258 			multauth = 1;
259 			i++;
260 		}
261 		keystr[j] = keystr[i];
262 	}
263 	keystr[j] = NULL;
264 }
265 
266 error(s)		/* exit in case of various system errors */
267 char *s;
268 {
269 	perror(s);
270 	exit(1);
271 }
272 
273 onintr()		/* remove tempfile in case of interrupt */
274 {
275 	fprintf(stderr, "\nInterrupt\n");
276 	unlink(tempfile);
277 	exit(1);
278 }
279 
280 endcomma(str)
281 char *str;
282 {
283 	int n;
284 
285 	n = strlen(str) - 1;
286 	if (str[n] == ',')
287 	{
288 		str[n] = NULL;
289 		return(1);
290 	}
291 	return(0);
292 }
293