1 /*- 2 * %sccs.include.proprietary.c% 3 */ 4 5 #ifndef lint 6 static char sccsid[] = "@(#)sortbib.c 4.6 (Berkeley) 04/18/91"; 7 #endif /* not lint */ 8 9 #include <stdio.h> 10 #include <signal.h> 11 #include <string.h> 12 #include "pathnames.h" 13 14 #define BUF BUFSIZ 15 #define MXFILES 16 16 17 char *tempfile; /* temporary file for sorting keys */ 18 char *keystr = "AD"; /* default sorting on author and date */ 19 int multauth = 0; /* by default sort on senior author only */ 20 int oneauth; /* has there been author in the record? */ 21 22 main(argc, argv) /* sortbib: sort bibliographic database in place */ 23 int argc; 24 char *argv[]; 25 { 26 FILE *fp[MXFILES], *tfp, *fopen(); 27 int i; 28 void onintr(); 29 char *mktemp(); 30 31 if (argc == 1) /* can't use stdin for seeking anyway */ 32 { 33 puts("Usage: sortbib [-sKEYS] database [...]"); 34 puts("\t-s: sort by fields in KEYS (default is AD)"); 35 exit(1); 36 } 37 if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') 38 { 39 keystr = argv[1]+2; 40 eval(keystr); /* evaluate A+ for multiple authors */ 41 argv++; argc--; 42 } 43 if (argc > MXFILES+1) /* too many open file streams */ 44 { 45 fprintf(stderr, 46 "sortbib: More than %d databases specified\n", MXFILES); 47 exit(1); 48 } 49 for (i = 1; i < argc; i++) /* open files in arg list */ 50 if ((fp[i-1] = fopen(argv[i], "r")) == NULL) 51 error(argv[i]); 52 tempfile = mktemp(strdup(_PATH_TMPS)); /* tempfile for sorting keys */ 53 if (signal(SIGINT,SIG_IGN) != SIG_IGN) /* remove if interrupted */ 54 signal(SIGINT, onintr); 55 if ((tfp = fopen(tempfile, "w")) == NULL) 56 error(tempfile); 57 for (i = 0; i < argc-1; i++) /* read keys from bib files */ 58 sortbib(fp[i], tfp, i); 59 fclose(tfp); 60 deliver(fp, tfp); /* do disk seeks and read from biblio files */ 61 unlink(tempfile); 62 exit(0); 63 } 64 65 int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */ 66 67 sortbib(fp, tfp, i) /* read records, prepare list for sorting */ 68 FILE *fp, *tfp; 69 int i; 70 { 71 long offset, lastoffset = 0, ftell(); /* byte offsets in file */ 72 int length, newrec, recno = 0; /* reclen, new rec'd?, number */ 73 char line[BUF], fld[4][BUF]; /* one line, the sort fields */ 74 75 /* measure byte offset, then get new line */ 76 while (offset = ftell(fp), fgets(line, BUF, fp)) 77 { 78 if (recno == 0) /* accept record w/o initial newline */ 79 newrec = 1; 80 if (line[0] == '\n') /* accept null line record separator */ 81 { 82 if (!rsmode) 83 rsmode = 1; /* null line mode */ 84 if (rsmode == 1) 85 newrec = 1; 86 } 87 if (line[0] == '.' && line[1] == '[') /* also accept .[ .] */ 88 { 89 if (!rsmode) 90 rsmode = 2; /* bracket pair mode */ 91 if (rsmode == 2) 92 newrec = 1; 93 } 94 if (newrec) /* by whatever means above */ 95 { 96 newrec = 0; 97 length = offset - lastoffset; /* measure rec len */ 98 if (length > BUF*8) 99 { 100 fprintf(stderr, 101 "sortbib: record %d longer than %d (%d)\n", 102 recno, BUF*8, length); 103 exit(1); 104 } 105 if (recno++) /* info for sorting */ 106 { 107 fprintf(tfp, "%d %D %d : %s %s %s %s\n", 108 i, lastoffset, length, 109 fld[0], fld[1], fld[2], fld[3]); 110 if (ferror(tfp)) 111 error(tempfile); 112 } 113 *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL; 114 oneauth = 0; /* reset number of authors */ 115 lastoffset = offset; /* save for next time */ 116 } 117 if (line[0] == '%') /* parse out fields to be sorted */ 118 parse(line, fld); 119 } 120 offset = ftell(fp); /* measure byte offset at EOF */ 121 length = offset - lastoffset; /* measure final record length */ 122 if (length > BUF*8) 123 { 124 fprintf(stderr, "sortbib: record %d longer than %d (%d)\n", 125 recno, BUF*8, length); 126 exit(1); 127 } 128 if (line[0] != '\n') /* ignore null line just before EOF */ 129 { 130 fprintf(tfp, "%d %D %d : %s %s %s %s\n", 131 i, lastoffset, length, 132 fld[0], fld[1], fld[2], fld[3]); 133 if (ferror(tfp)) 134 error(tempfile); /* disk error in /tmp */ 135 } 136 } 137 138 deliver(fp, tfp) /* deliver sorted entries out of database(s) */ 139 FILE *fp[], *tfp; 140 { 141 char str[BUF], buff[BUF*8]; /* for tempfile & databases */ 142 char cmd[80]; /* for using system sort command */ 143 long int offset; 144 int i, length; 145 146 /* when sorting, ignore case distinctions; tab char is ':' */ 147 sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile); 148 if (system(cmd) == 127) 149 error("sortbib"); 150 tfp = fopen(tempfile, "r"); 151 while (fgets(str, sizeof(str), tfp)) 152 { 153 /* get file pointer, record offset, and length */ 154 if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3) 155 error("sortbib: sorting error"); 156 /* seek to proper disk location in proper file */ 157 if (fseek(fp[i], offset, 0) == -1) 158 error("sortbib"); 159 /* read exactly one record from bibliography */ 160 if (fread(buff, sizeof(*buff), length, fp[i]) == 0) 161 error("sortbib"); 162 /* add newline between unseparated records */ 163 if (buff[0] != '\n' && rsmode == 1) 164 putchar('\n'); 165 /* write record buffer to standard output */ 166 if (fwrite(buff, sizeof(*buff), length, stdout) == 0) 167 error("sortbib"); 168 } 169 } 170 171 parse(line, fld) /* get fields out of line, prepare for sorting */ 172 char line[]; 173 char fld[][BUF]; 174 { 175 char wd[8][BUF/4], *strcat(); 176 int n, i, j; 177 178 for (i = 0; i < 8; i++) /* zap out old strings */ 179 *wd[i] = NULL; 180 n = sscanf(line, "%s %s %s %s %s %s %s %s", 181 wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]); 182 for (i = 0; i < 4; i++) 183 { 184 if (wd[0][1] == keystr[i]) 185 { 186 if (wd[0][1] == 'A') 187 { 188 if (oneauth && !multauth) /* no repeat */ 189 break; 190 else if (oneauth) /* mult auths */ 191 strcat(fld[i], "~~"); 192 if (!endcomma(wd[n-2])) /* surname */ 193 strcat(fld[i], wd[n-1]); 194 else { /* jr. or ed. */ 195 strcat(fld[i], wd[n-2]); 196 n--; 197 } 198 strcat(fld[i], " "); 199 for (j = 1; j < n-1; j++) 200 strcat(fld[i], wd[j]); 201 oneauth = 1; 202 } 203 else if (wd[0][1] == 'D') 204 { 205 strcat(fld[i], wd[n-1]); /* year */ 206 if (n > 2) 207 strcat(fld[i], wd[1]); /* month */ 208 } 209 else if (wd[0][1] == 'T' || wd[0][1] == 'J') 210 { 211 j = 1; 212 if (article(wd[1])) /* skip article */ 213 j++; 214 for (; j < n; j++) 215 strcat(fld[i], wd[j]); 216 } 217 else /* any other field */ 218 for (j = 1; j < n; j++) 219 strcat(fld[i], wd[j]); 220 } 221 /* %Q quorporate or queer author - unreversed %A */ 222 else if (wd[0][1] == 'Q' && keystr[i] == 'A') 223 for (j = 1; j < n; j++) 224 strcat(fld[i], wd[j]); 225 } 226 } 227 228 article(str) /* see if string contains an article */ 229 char *str; 230 { 231 if (strcmp("The", str) == 0) /* English */ 232 return(1); 233 if (strcmp("A", str) == 0) 234 return(1); 235 if (strcmp("An", str) == 0) 236 return(1); 237 if (strcmp("Le", str) == 0) /* French */ 238 return(1); 239 if (strcmp("La", str) == 0) 240 return(1); 241 if (strcmp("Der", str) == 0) /* German */ 242 return(1); 243 if (strcmp("Die", str) == 0) 244 return(1); 245 if (strcmp("Das", str) == 0) 246 return(1); 247 if (strcmp("El", str) == 0) /* Spanish */ 248 return(1); 249 if (strcmp("Den", str) == 0) /* Scandinavian */ 250 return(1); 251 return(0); 252 } 253 254 eval(keystr) /* evaluate key string for A+ marking */ 255 char keystr[]; 256 { 257 int i, j; 258 259 for (i = 0, j = 0; keystr[i]; i++, j++) 260 { 261 if (keystr[i] == '+') 262 { 263 multauth = 1; 264 i++; 265 } 266 keystr[j] = keystr[i]; 267 } 268 keystr[j] = NULL; 269 } 270 271 error(s) /* exit in case of various system errors */ 272 char *s; 273 { 274 perror(s); 275 exit(1); 276 } 277 278 void 279 onintr() /* remove tempfile in case of interrupt */ 280 { 281 fprintf(stderr, "\nInterrupt\n"); 282 unlink(tempfile); 283 exit(1); 284 } 285 286 endcomma(str) 287 char *str; 288 { 289 int n; 290 291 n = strlen(str) - 1; 292 if (str[n] == ',') 293 { 294 str[n] = NULL; 295 return(1); 296 } 297 return(0); 298 } 299