1 #ifndef lint 2 static char *sccsid = "@(#)sortbib.c 4.3 (Berkeley) 05/11/89"; 3 #endif 4 5 #include <stdio.h> 6 #include <signal.h> 7 #include "pathnames.h" 8 9 #define BUF BUFSIZ 10 #define MXFILES 16 11 12 char *tempfile; /* temporary file for sorting keys */ 13 char *keystr = "AD"; /* default sorting on author and date */ 14 int multauth = 0; /* by default sort on senior author only */ 15 int oneauth; /* has there been author in the record? */ 16 17 main(argc, argv) /* sortbib: sort bibliographic database in place */ 18 int argc; 19 char *argv[]; 20 { 21 FILE *fp[MXFILES], *tfp, *fopen(); 22 int i, onintr(); 23 char *mktemp(); 24 25 if (argc == 1) /* can't use stdin for seeking anyway */ 26 { 27 puts("Usage: sortbib [-sKEYS] database [...]"); 28 puts("\t-s: sort by fields in KEYS (default is AD)"); 29 exit(1); 30 } 31 if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') 32 { 33 keystr = argv[1]+2; 34 eval(keystr); /* evaluate A+ for multiple authors */ 35 argv++; argc--; 36 } 37 if (argc > MXFILES+1) /* too many open file streams */ 38 { 39 fprintf(stderr, 40 "sortbib: More than %d databases specified\n", MXFILES); 41 exit(1); 42 } 43 for (i = 1; i < argc; i++) /* open files in arg list */ 44 if ((fp[i-1] = fopen(argv[i], "r")) == NULL) 45 error(argv[i]); 46 tempfile = _PATH_TMPS; /* tempfile for sorting keys */ 47 mktemp(tempfile); 48 if (signal(SIGINT,SIG_IGN) != SIG_IGN) /* remove if interrupted */ 49 signal(SIGINT, onintr); 50 if ((tfp = fopen(tempfile, "w")) == NULL) 51 error(tempfile); 52 for (i = 0; i < argc-1; i++) /* read keys from bib files */ 53 sortbib(fp[i], tfp, i); 54 fclose(tfp); 55 deliver(fp, tfp); /* do disk seeks and read from biblio files */ 56 unlink(tempfile); 57 exit(0); 58 } 59 60 int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */ 61 62 sortbib(fp, tfp, i) /* read records, prepare list for sorting */ 63 FILE *fp, *tfp; 64 int i; 65 { 66 long offset, lastoffset = 0, ftell(); /* byte offsets in file */ 67 int length, newrec, recno = 0; /* reclen, new rec'd?, number */ 68 char line[BUF], fld[4][BUF]; /* one line, the sort fields */ 69 70 /* measure byte offset, then get new line */ 71 while (offset = ftell(fp), fgets(line, BUF, fp)) 72 { 73 if (recno == 0) /* accept record w/o initial newline */ 74 newrec = 1; 75 if (line[0] == '\n') /* accept null line record separator */ 76 { 77 if (!rsmode) 78 rsmode = 1; /* null line mode */ 79 if (rsmode == 1) 80 newrec = 1; 81 } 82 if (line[0] == '.' && line[1] == '[') /* also accept .[ .] */ 83 { 84 if (!rsmode) 85 rsmode = 2; /* bracket pair mode */ 86 if (rsmode == 2) 87 newrec = 1; 88 } 89 if (newrec) /* by whatever means above */ 90 { 91 newrec = 0; 92 length = offset - lastoffset; /* measure rec len */ 93 if (length > BUF*8) 94 { 95 fprintf(stderr, 96 "sortbib: record %d longer than %d (%d)\n", 97 recno, BUF*8, length); 98 exit(1); 99 } 100 if (recno++) /* info for sorting */ 101 { 102 fprintf(tfp, "%d %D %d : %s %s %s %s\n", 103 i, lastoffset, length, 104 fld[0], fld[1], fld[2], fld[3]); 105 if (ferror(tfp)) 106 error(tempfile); 107 } 108 *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL; 109 oneauth = 0; /* reset number of authors */ 110 lastoffset = offset; /* save for next time */ 111 } 112 if (line[0] == '%') /* parse out fields to be sorted */ 113 parse(line, fld); 114 } 115 offset = ftell(fp); /* measure byte offset at EOF */ 116 length = offset - lastoffset; /* measure final record length */ 117 if (length > BUF*8) 118 { 119 fprintf(stderr, "sortbib: record %d longer than %d (%d)\n", 120 recno, BUF*8, length); 121 exit(1); 122 } 123 if (line[0] != '\n') /* ignore null line just before EOF */ 124 { 125 fprintf(tfp, "%d %D %d : %s %s %s %s\n", 126 i, lastoffset, length, 127 fld[0], fld[1], fld[2], fld[3]); 128 if (ferror(tfp)) 129 error(tempfile); /* disk error in /tmp */ 130 } 131 } 132 133 deliver(fp, tfp) /* deliver sorted entries out of database(s) */ 134 FILE *fp[], *tfp; 135 { 136 char str[BUF], buff[BUF*8]; /* for tempfile & databases */ 137 char cmd[80]; /* for using system sort command */ 138 long int offset; 139 int i, length; 140 141 /* when sorting, ignore case distinctions; tab char is ':' */ 142 sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile); 143 if (system(cmd) == 127) 144 error("sortbib"); 145 tfp = fopen(tempfile, "r"); 146 while (fgets(str, sizeof(str), tfp)) 147 { 148 /* get file pointer, record offset, and length */ 149 if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3) 150 error("sortbib: sorting error"); 151 /* seek to proper disk location in proper file */ 152 if (fseek(fp[i], offset, 0) == -1) 153 error("sortbib"); 154 /* read exactly one record from bibliography */ 155 if (fread(buff, sizeof(*buff), length, fp[i]) == 0) 156 error("sortbib"); 157 /* add newline between unseparated records */ 158 if (buff[0] != '\n' && rsmode == 1) 159 putchar('\n'); 160 /* write record buffer to standard output */ 161 if (fwrite(buff, sizeof(*buff), length, stdout) == 0) 162 error("sortbib"); 163 } 164 } 165 166 parse(line, fld) /* get fields out of line, prepare for sorting */ 167 char line[]; 168 char fld[][BUF]; 169 { 170 char wd[8][BUF/4], *strcat(); 171 int n, i, j; 172 173 for (i = 0; i < 8; i++) /* zap out old strings */ 174 *wd[i] = NULL; 175 n = sscanf(line, "%s %s %s %s %s %s %s %s", 176 wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]); 177 for (i = 0; i < 4; i++) 178 { 179 if (wd[0][1] == keystr[i]) 180 { 181 if (wd[0][1] == 'A') 182 { 183 if (oneauth && !multauth) /* no repeat */ 184 break; 185 else if (oneauth) /* mult auths */ 186 strcat(fld[i], "~~"); 187 if (!endcomma(wd[n-2])) /* surname */ 188 strcat(fld[i], wd[n-1]); 189 else { /* jr. or ed. */ 190 strcat(fld[i], wd[n-2]); 191 n--; 192 } 193 strcat(fld[i], " "); 194 for (j = 1; j < n-1; j++) 195 strcat(fld[i], wd[j]); 196 oneauth = 1; 197 } 198 else if (wd[0][1] == 'D') 199 { 200 strcat(fld[i], wd[n-1]); /* year */ 201 if (n > 2) 202 strcat(fld[i], wd[1]); /* month */ 203 } 204 else if (wd[0][1] == 'T' || wd[0][1] == 'J') 205 { 206 j = 1; 207 if (article(wd[1])) /* skip article */ 208 j++; 209 for (; j < n; j++) 210 strcat(fld[i], wd[j]); 211 } 212 else /* any other field */ 213 for (j = 1; j < n; j++) 214 strcat(fld[i], wd[j]); 215 } 216 /* %Q quorporate or queer author - unreversed %A */ 217 else if (wd[0][1] == 'Q' && keystr[i] == 'A') 218 for (j = 1; j < n; j++) 219 strcat(fld[i], wd[j]); 220 } 221 } 222 223 article(str) /* see if string contains an article */ 224 char *str; 225 { 226 if (strcmp("The", str) == 0) /* English */ 227 return(1); 228 if (strcmp("A", str) == 0) 229 return(1); 230 if (strcmp("An", str) == 0) 231 return(1); 232 if (strcmp("Le", str) == 0) /* French */ 233 return(1); 234 if (strcmp("La", str) == 0) 235 return(1); 236 if (strcmp("Der", str) == 0) /* German */ 237 return(1); 238 if (strcmp("Die", str) == 0) 239 return(1); 240 if (strcmp("Das", str) == 0) 241 return(1); 242 if (strcmp("El", str) == 0) /* Spanish */ 243 return(1); 244 if (strcmp("Den", str) == 0) /* Scandinavian */ 245 return(1); 246 return(0); 247 } 248 249 eval(keystr) /* evaluate key string for A+ marking */ 250 char keystr[]; 251 { 252 int i, j; 253 254 for (i = 0, j = 0; keystr[i]; i++, j++) 255 { 256 if (keystr[i] == '+') 257 { 258 multauth = 1; 259 i++; 260 } 261 keystr[j] = keystr[i]; 262 } 263 keystr[j] = NULL; 264 } 265 266 error(s) /* exit in case of various system errors */ 267 char *s; 268 { 269 perror(s); 270 exit(1); 271 } 272 273 onintr() /* remove tempfile in case of interrupt */ 274 { 275 fprintf(stderr, "\nInterrupt\n"); 276 unlink(tempfile); 277 exit(1); 278 } 279 280 endcomma(str) 281 char *str; 282 { 283 int n; 284 285 n = strlen(str) - 1; 286 if (str[n] == ',') 287 { 288 str[n] = NULL; 289 return(1); 290 } 291 return(0); 292 } 293