xref: /original-bsd/old/refer/sortbib/sortbib.c (revision 5848352c)
1 #ifndef lint
2 static char *sccsid = "@(#)sortbib.c	4.2 (Berkeley) 10/22/87";
3 #endif
4 
5 #include <stdio.h>
6 #include <signal.h>
7 #define BUF BUFSIZ
8 #define MXFILES 16
9 
10 char *tempfile;			/* temporary file for sorting keys */
11 char *keystr = "AD";		/* default sorting on author and date */
12 int multauth = 0;		/* by default sort on senior author only */
13 int oneauth;			/* has there been author in the record? */
14 
15 main(argc, argv)	/* sortbib: sort bibliographic database in place */
16 int argc;
17 char *argv[];
18 {
19 	FILE *fp[MXFILES], *tfp, *fopen();
20 	int i, onintr();
21 	char *mktemp();
22 
23 	if (argc == 1)		/* can't use stdin for seeking anyway */
24 	{
25 		puts("Usage:  sortbib [-sKEYS] database [...]");
26 		puts("\t-s: sort by fields in KEYS (default is AD)");
27 		exit(1);
28 	}
29 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
30 	{
31 		keystr = argv[1]+2;
32 		eval(keystr);		/* evaluate A+ for multiple authors */
33 		argv++; argc--;
34 	}
35 	if (argc > MXFILES+1)	/* too many open file streams */
36 	{
37 		fprintf(stderr,
38 		"sortbib: More than %d databases specified\n", MXFILES);
39 		exit(1);
40 	}
41 	for (i = 1; i < argc; i++)		/* open files in arg list */
42 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
43 			error(argv[i]);
44 	tempfile = "/tmp/SbibXXXXX";		/* tempfile for sorting keys */
45 	mktemp(tempfile);
46 	if (signal(SIGINT,SIG_IGN) != SIG_IGN)	/* remove if interrupted */
47 		signal(SIGINT, onintr);
48 	if ((tfp = fopen(tempfile, "w")) == NULL)
49 		error(tempfile);
50 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
51 		sortbib(fp[i], tfp, i);
52 	fclose(tfp);
53 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
54 	unlink(tempfile);
55 	exit(0);
56 }
57 
58 int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
59 
60 sortbib(fp, tfp, i)	/* read records, prepare list for sorting */
61 FILE *fp, *tfp;
62 int i;
63 {
64 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
65 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
66 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
67 
68 	/* measure byte offset, then get new line */
69 	while (offset = ftell(fp), fgets(line, BUF, fp))
70 	{
71 		if (recno == 0)		/* accept record w/o initial newline */
72 			newrec = 1;
73 		if (line[0] == '\n')	/* accept null line record separator */
74 		{
75 			if (!rsmode)
76 				rsmode = 1;	/* null line mode */
77 			if (rsmode == 1)
78 				newrec = 1;
79 		}
80 		if (line[0] == '.' && line[1] == '[')	/* also accept .[ .] */
81 		{
82 			if (!rsmode)
83 				rsmode = 2;	/* bracket pair mode */
84 			if (rsmode == 2)
85 				newrec = 1;
86 		}
87 		if (newrec)		/* by whatever means above */
88 		{
89 			newrec = 0;
90 			length = offset - lastoffset;	/* measure rec len */
91 			if (length > BUF*8)
92 			{
93 				fprintf(stderr,
94 				"sortbib: record %d longer than %d (%d)\n",
95 					recno, BUF*8, length);
96 				exit(1);
97 			}
98 			if (recno++)			/* info for sorting */
99 			{
100 				fprintf(tfp, "%d %D %d : %s %s %s %s\n",
101 					i, lastoffset, length,
102 					fld[0], fld[1], fld[2], fld[3]);
103 				if (ferror(tfp))
104 					error(tempfile);
105 			}
106 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
107 			oneauth = 0;		/* reset number of authors */
108 			lastoffset = offset;	/* save for next time */
109 		}
110 		if (line[0] == '%')	/* parse out fields to be sorted */
111 			parse(line, fld);
112 	}
113 	offset = ftell(fp);		/* measure byte offset at EOF */
114 	length = offset - lastoffset;	/* measure final record length */
115 	if (length > BUF*8)
116 	{
117 		fprintf(stderr, "sortbib: record %d longer than %d (%d)\n",
118 			recno, BUF*8, length);
119 		exit(1);
120 	}
121 	if (line[0] != '\n')		/* ignore null line just before EOF */
122 	{
123 		fprintf(tfp, "%d %D %d : %s %s %s %s\n",
124 			i, lastoffset, length,
125 			fld[0], fld[1], fld[2], fld[3]);
126 		if (ferror(tfp))
127 			error(tempfile);	/* disk error in /tmp */
128 	}
129 }
130 
131 deliver(fp, tfp)	/* deliver sorted entries out of database(s) */
132 FILE *fp[], *tfp;
133 {
134 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
135 	char cmd[80];			/* for using system sort command */
136 	long int offset;
137 	int i, length;
138 
139 	/* when sorting, ignore case distinctions; tab char is ':' */
140 	sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile);
141 	if (system(cmd) == 127)
142 		error("sortbib");
143 	tfp = fopen(tempfile, "r");
144 	while (fgets(str, sizeof(str), tfp))
145 	{
146 		/* get file pointer, record offset, and length */
147 		if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3)
148 			error("sortbib: sorting error");
149 		/* seek to proper disk location in proper file */
150 		if (fseek(fp[i], offset, 0) == -1)
151 			error("sortbib");
152 		/* read exactly one record from bibliography */
153 		if (fread(buff, sizeof(*buff), length, fp[i]) == 0)
154 			error("sortbib");
155 		/* add newline between unseparated records */
156 		if (buff[0] != '\n' && rsmode == 1)
157 			putchar('\n');
158 		/* write record buffer to standard output */
159 		if (fwrite(buff, sizeof(*buff), length, stdout) == 0)
160 			error("sortbib");
161 	}
162 }
163 
164 parse(line, fld)	/* get fields out of line, prepare for sorting */
165 char line[];
166 char fld[][BUF];
167 {
168 	char wd[8][BUF/4], *strcat();
169 	int n, i, j;
170 
171 	for (i = 0; i < 8; i++)		/* zap out old strings */
172 		*wd[i] = NULL;
173 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
174 		wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
175 	for (i = 0; i < 4; i++)
176 	{
177 		if (wd[0][1] == keystr[i])
178 		{
179 			if (wd[0][1] == 'A')
180 			{
181 				if (oneauth && !multauth)	/* no repeat */
182 					break;
183 				else if (oneauth)		/* mult auths */
184 					strcat(fld[i], "~~");
185 				if (!endcomma(wd[n-2]))		/* surname */
186 					strcat(fld[i], wd[n-1]);
187 				else {				/* jr. or ed. */
188 					strcat(fld[i], wd[n-2]);
189 					n--;
190 				}
191 				strcat(fld[i], " ");
192 				for (j = 1; j < n-1; j++)
193 					strcat(fld[i], wd[j]);
194 				oneauth = 1;
195 			}
196 			else if (wd[0][1] == 'D')
197 			{
198 				strcat(fld[i], wd[n-1]);	/* year */
199 				if (n > 2)
200 					strcat(fld[i], wd[1]);	/* month */
201 			}
202 			else if (wd[0][1] == 'T' || wd[0][1] == 'J')
203 			{
204 				j = 1;
205 				if (article(wd[1]))	/* skip article */
206 					j++;
207 				for (; j < n; j++)
208 					strcat(fld[i], wd[j]);
209 			}
210 			else  /* any other field */
211 				for (j = 1; j < n; j++)
212 					strcat(fld[i], wd[j]);
213 		}
214 		/* %Q quorporate or queer author - unreversed %A */
215 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
216 			for (j = 1; j < n; j++)
217 				strcat(fld[i], wd[j]);
218 	}
219 }
220 
221 article(str)		/* see if string contains an article */
222 char *str;
223 {
224 	if (strcmp("The", str) == 0)	/* English */
225 		return(1);
226 	if (strcmp("A", str) == 0)
227 		return(1);
228 	if (strcmp("An", str) == 0)
229 		return(1);
230 	if (strcmp("Le", str) == 0)	/* French */
231 		return(1);
232 	if (strcmp("La", str) == 0)
233 		return(1);
234 	if (strcmp("Der", str) == 0)	/* German */
235 		return(1);
236 	if (strcmp("Die", str) == 0)
237 		return(1);
238 	if (strcmp("Das", str) == 0)
239 		return(1);
240 	if (strcmp("El", str) == 0)	/* Spanish */
241 		return(1);
242 	if (strcmp("Den", str) == 0)	/* Scandinavian */
243 		return(1);
244 	return(0);
245 }
246 
247 eval(keystr)		/* evaluate key string for A+ marking */
248 char keystr[];
249 {
250 	int i, j;
251 
252 	for (i = 0, j = 0; keystr[i]; i++, j++)
253 	{
254 		if (keystr[i] == '+')
255 		{
256 			multauth = 1;
257 			i++;
258 		}
259 		keystr[j] = keystr[i];
260 	}
261 	keystr[j] = NULL;
262 }
263 
264 error(s)		/* exit in case of various system errors */
265 char *s;
266 {
267 	perror(s);
268 	exit(1);
269 }
270 
271 onintr()		/* remove tempfile in case of interrupt */
272 {
273 	fprintf(stderr, "\nInterrupt\n");
274 	unlink(tempfile);
275 	exit(1);
276 }
277 
278 endcomma(str)
279 char *str;
280 {
281 	int n;
282 
283 	n = strlen(str) - 1;
284 	if (str[n] == ',')
285 	{
286 		str[n] = NULL;
287 		return(1);
288 	}
289 	return(0);
290 }
291