1 #ifndef lint 2 static char *sccsid = "@(#)sortbib.c 4.5 (Berkeley) 03/07/91"; 3 #endif 4 5 #include <stdio.h> 6 #include <signal.h> 7 #include <string.h> 8 #include "pathnames.h" 9 10 #define BUF BUFSIZ 11 #define MXFILES 16 12 13 char *tempfile; /* temporary file for sorting keys */ 14 char *keystr = "AD"; /* default sorting on author and date */ 15 int multauth = 0; /* by default sort on senior author only */ 16 int oneauth; /* has there been author in the record? */ 17 18 main(argc, argv) /* sortbib: sort bibliographic database in place */ 19 int argc; 20 char *argv[]; 21 { 22 FILE *fp[MXFILES], *tfp, *fopen(); 23 int i; 24 void onintr(); 25 char *mktemp(); 26 27 if (argc == 1) /* can't use stdin for seeking anyway */ 28 { 29 puts("Usage: sortbib [-sKEYS] database [...]"); 30 puts("\t-s: sort by fields in KEYS (default is AD)"); 31 exit(1); 32 } 33 if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') 34 { 35 keystr = argv[1]+2; 36 eval(keystr); /* evaluate A+ for multiple authors */ 37 argv++; argc--; 38 } 39 if (argc > MXFILES+1) /* too many open file streams */ 40 { 41 fprintf(stderr, 42 "sortbib: More than %d databases specified\n", MXFILES); 43 exit(1); 44 } 45 for (i = 1; i < argc; i++) /* open files in arg list */ 46 if ((fp[i-1] = fopen(argv[i], "r")) == NULL) 47 error(argv[i]); 48 tempfile = mktemp(strdup(_PATH_TMPS)); /* tempfile for sorting keys */ 49 if (signal(SIGINT,SIG_IGN) != SIG_IGN) /* remove if interrupted */ 50 signal(SIGINT, onintr); 51 if ((tfp = fopen(tempfile, "w")) == NULL) 52 error(tempfile); 53 for (i = 0; i < argc-1; i++) /* read keys from bib files */ 54 sortbib(fp[i], tfp, i); 55 fclose(tfp); 56 deliver(fp, tfp); /* do disk seeks and read from biblio files */ 57 unlink(tempfile); 58 exit(0); 59 } 60 61 int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */ 62 63 sortbib(fp, tfp, i) /* read records, prepare list for sorting */ 64 FILE *fp, *tfp; 65 int i; 66 { 67 long offset, lastoffset = 0, ftell(); /* byte offsets in file */ 68 int length, newrec, recno = 0; /* reclen, new rec'd?, number */ 69 char line[BUF], fld[4][BUF]; /* one line, the sort fields */ 70 71 /* measure byte offset, then get new line */ 72 while (offset = ftell(fp), fgets(line, BUF, fp)) 73 { 74 if (recno == 0) /* accept record w/o initial newline */ 75 newrec = 1; 76 if (line[0] == '\n') /* accept null line record separator */ 77 { 78 if (!rsmode) 79 rsmode = 1; /* null line mode */ 80 if (rsmode == 1) 81 newrec = 1; 82 } 83 if (line[0] == '.' && line[1] == '[') /* also accept .[ .] */ 84 { 85 if (!rsmode) 86 rsmode = 2; /* bracket pair mode */ 87 if (rsmode == 2) 88 newrec = 1; 89 } 90 if (newrec) /* by whatever means above */ 91 { 92 newrec = 0; 93 length = offset - lastoffset; /* measure rec len */ 94 if (length > BUF*8) 95 { 96 fprintf(stderr, 97 "sortbib: record %d longer than %d (%d)\n", 98 recno, BUF*8, length); 99 exit(1); 100 } 101 if (recno++) /* info for sorting */ 102 { 103 fprintf(tfp, "%d %D %d : %s %s %s %s\n", 104 i, lastoffset, length, 105 fld[0], fld[1], fld[2], fld[3]); 106 if (ferror(tfp)) 107 error(tempfile); 108 } 109 *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL; 110 oneauth = 0; /* reset number of authors */ 111 lastoffset = offset; /* save for next time */ 112 } 113 if (line[0] == '%') /* parse out fields to be sorted */ 114 parse(line, fld); 115 } 116 offset = ftell(fp); /* measure byte offset at EOF */ 117 length = offset - lastoffset; /* measure final record length */ 118 if (length > BUF*8) 119 { 120 fprintf(stderr, "sortbib: record %d longer than %d (%d)\n", 121 recno, BUF*8, length); 122 exit(1); 123 } 124 if (line[0] != '\n') /* ignore null line just before EOF */ 125 { 126 fprintf(tfp, "%d %D %d : %s %s %s %s\n", 127 i, lastoffset, length, 128 fld[0], fld[1], fld[2], fld[3]); 129 if (ferror(tfp)) 130 error(tempfile); /* disk error in /tmp */ 131 } 132 } 133 134 deliver(fp, tfp) /* deliver sorted entries out of database(s) */ 135 FILE *fp[], *tfp; 136 { 137 char str[BUF], buff[BUF*8]; /* for tempfile & databases */ 138 char cmd[80]; /* for using system sort command */ 139 long int offset; 140 int i, length; 141 142 /* when sorting, ignore case distinctions; tab char is ':' */ 143 sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile); 144 if (system(cmd) == 127) 145 error("sortbib"); 146 tfp = fopen(tempfile, "r"); 147 while (fgets(str, sizeof(str), tfp)) 148 { 149 /* get file pointer, record offset, and length */ 150 if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3) 151 error("sortbib: sorting error"); 152 /* seek to proper disk location in proper file */ 153 if (fseek(fp[i], offset, 0) == -1) 154 error("sortbib"); 155 /* read exactly one record from bibliography */ 156 if (fread(buff, sizeof(*buff), length, fp[i]) == 0) 157 error("sortbib"); 158 /* add newline between unseparated records */ 159 if (buff[0] != '\n' && rsmode == 1) 160 putchar('\n'); 161 /* write record buffer to standard output */ 162 if (fwrite(buff, sizeof(*buff), length, stdout) == 0) 163 error("sortbib"); 164 } 165 } 166 167 parse(line, fld) /* get fields out of line, prepare for sorting */ 168 char line[]; 169 char fld[][BUF]; 170 { 171 char wd[8][BUF/4], *strcat(); 172 int n, i, j; 173 174 for (i = 0; i < 8; i++) /* zap out old strings */ 175 *wd[i] = NULL; 176 n = sscanf(line, "%s %s %s %s %s %s %s %s", 177 wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]); 178 for (i = 0; i < 4; i++) 179 { 180 if (wd[0][1] == keystr[i]) 181 { 182 if (wd[0][1] == 'A') 183 { 184 if (oneauth && !multauth) /* no repeat */ 185 break; 186 else if (oneauth) /* mult auths */ 187 strcat(fld[i], "~~"); 188 if (!endcomma(wd[n-2])) /* surname */ 189 strcat(fld[i], wd[n-1]); 190 else { /* jr. or ed. */ 191 strcat(fld[i], wd[n-2]); 192 n--; 193 } 194 strcat(fld[i], " "); 195 for (j = 1; j < n-1; j++) 196 strcat(fld[i], wd[j]); 197 oneauth = 1; 198 } 199 else if (wd[0][1] == 'D') 200 { 201 strcat(fld[i], wd[n-1]); /* year */ 202 if (n > 2) 203 strcat(fld[i], wd[1]); /* month */ 204 } 205 else if (wd[0][1] == 'T' || wd[0][1] == 'J') 206 { 207 j = 1; 208 if (article(wd[1])) /* skip article */ 209 j++; 210 for (; j < n; j++) 211 strcat(fld[i], wd[j]); 212 } 213 else /* any other field */ 214 for (j = 1; j < n; j++) 215 strcat(fld[i], wd[j]); 216 } 217 /* %Q quorporate or queer author - unreversed %A */ 218 else if (wd[0][1] == 'Q' && keystr[i] == 'A') 219 for (j = 1; j < n; j++) 220 strcat(fld[i], wd[j]); 221 } 222 } 223 224 article(str) /* see if string contains an article */ 225 char *str; 226 { 227 if (strcmp("The", str) == 0) /* English */ 228 return(1); 229 if (strcmp("A", str) == 0) 230 return(1); 231 if (strcmp("An", str) == 0) 232 return(1); 233 if (strcmp("Le", str) == 0) /* French */ 234 return(1); 235 if (strcmp("La", str) == 0) 236 return(1); 237 if (strcmp("Der", str) == 0) /* German */ 238 return(1); 239 if (strcmp("Die", str) == 0) 240 return(1); 241 if (strcmp("Das", str) == 0) 242 return(1); 243 if (strcmp("El", str) == 0) /* Spanish */ 244 return(1); 245 if (strcmp("Den", str) == 0) /* Scandinavian */ 246 return(1); 247 return(0); 248 } 249 250 eval(keystr) /* evaluate key string for A+ marking */ 251 char keystr[]; 252 { 253 int i, j; 254 255 for (i = 0, j = 0; keystr[i]; i++, j++) 256 { 257 if (keystr[i] == '+') 258 { 259 multauth = 1; 260 i++; 261 } 262 keystr[j] = keystr[i]; 263 } 264 keystr[j] = NULL; 265 } 266 267 error(s) /* exit in case of various system errors */ 268 char *s; 269 { 270 perror(s); 271 exit(1); 272 } 273 274 void 275 onintr() /* remove tempfile in case of interrupt */ 276 { 277 fprintf(stderr, "\nInterrupt\n"); 278 unlink(tempfile); 279 exit(1); 280 } 281 282 endcomma(str) 283 char *str; 284 { 285 int n; 286 287 n = strlen(str) - 1; 288 if (str[n] == ',') 289 { 290 str[n] = NULL; 291 return(1); 292 } 293 return(0); 294 } 295