1 #ifndef lint 2 static char *sccsid = "@(#)sortbib.c 4.2 (Berkeley) 10/22/87"; 3 #endif 4 5 #include <stdio.h> 6 #include <signal.h> 7 #define BUF BUFSIZ 8 #define MXFILES 16 9 10 char *tempfile; /* temporary file for sorting keys */ 11 char *keystr = "AD"; /* default sorting on author and date */ 12 int multauth = 0; /* by default sort on senior author only */ 13 int oneauth; /* has there been author in the record? */ 14 15 main(argc, argv) /* sortbib: sort bibliographic database in place */ 16 int argc; 17 char *argv[]; 18 { 19 FILE *fp[MXFILES], *tfp, *fopen(); 20 int i, onintr(); 21 char *mktemp(); 22 23 if (argc == 1) /* can't use stdin for seeking anyway */ 24 { 25 puts("Usage: sortbib [-sKEYS] database [...]"); 26 puts("\t-s: sort by fields in KEYS (default is AD)"); 27 exit(1); 28 } 29 if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') 30 { 31 keystr = argv[1]+2; 32 eval(keystr); /* evaluate A+ for multiple authors */ 33 argv++; argc--; 34 } 35 if (argc > MXFILES+1) /* too many open file streams */ 36 { 37 fprintf(stderr, 38 "sortbib: More than %d databases specified\n", MXFILES); 39 exit(1); 40 } 41 for (i = 1; i < argc; i++) /* open files in arg list */ 42 if ((fp[i-1] = fopen(argv[i], "r")) == NULL) 43 error(argv[i]); 44 tempfile = "/tmp/SbibXXXXX"; /* tempfile for sorting keys */ 45 mktemp(tempfile); 46 if (signal(SIGINT,SIG_IGN) != SIG_IGN) /* remove if interrupted */ 47 signal(SIGINT, onintr); 48 if ((tfp = fopen(tempfile, "w")) == NULL) 49 error(tempfile); 50 for (i = 0; i < argc-1; i++) /* read keys from bib files */ 51 sortbib(fp[i], tfp, i); 52 fclose(tfp); 53 deliver(fp, tfp); /* do disk seeks and read from biblio files */ 54 unlink(tempfile); 55 exit(0); 56 } 57 58 int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */ 59 60 sortbib(fp, tfp, i) /* read records, prepare list for sorting */ 61 FILE *fp, *tfp; 62 int i; 63 { 64 long offset, lastoffset = 0, ftell(); /* byte offsets in file */ 65 int length, newrec, recno = 0; /* reclen, new rec'd?, number */ 66 char line[BUF], fld[4][BUF]; /* one line, the sort fields */ 67 68 /* measure byte offset, then get new line */ 69 while (offset = ftell(fp), fgets(line, BUF, fp)) 70 { 71 if (recno == 0) /* accept record w/o initial newline */ 72 newrec = 1; 73 if (line[0] == '\n') /* accept null line record separator */ 74 { 75 if (!rsmode) 76 rsmode = 1; /* null line mode */ 77 if (rsmode == 1) 78 newrec = 1; 79 } 80 if (line[0] == '.' && line[1] == '[') /* also accept .[ .] */ 81 { 82 if (!rsmode) 83 rsmode = 2; /* bracket pair mode */ 84 if (rsmode == 2) 85 newrec = 1; 86 } 87 if (newrec) /* by whatever means above */ 88 { 89 newrec = 0; 90 length = offset - lastoffset; /* measure rec len */ 91 if (length > BUF*8) 92 { 93 fprintf(stderr, 94 "sortbib: record %d longer than %d (%d)\n", 95 recno, BUF*8, length); 96 exit(1); 97 } 98 if (recno++) /* info for sorting */ 99 { 100 fprintf(tfp, "%d %D %d : %s %s %s %s\n", 101 i, lastoffset, length, 102 fld[0], fld[1], fld[2], fld[3]); 103 if (ferror(tfp)) 104 error(tempfile); 105 } 106 *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL; 107 oneauth = 0; /* reset number of authors */ 108 lastoffset = offset; /* save for next time */ 109 } 110 if (line[0] == '%') /* parse out fields to be sorted */ 111 parse(line, fld); 112 } 113 offset = ftell(fp); /* measure byte offset at EOF */ 114 length = offset - lastoffset; /* measure final record length */ 115 if (length > BUF*8) 116 { 117 fprintf(stderr, "sortbib: record %d longer than %d (%d)\n", 118 recno, BUF*8, length); 119 exit(1); 120 } 121 if (line[0] != '\n') /* ignore null line just before EOF */ 122 { 123 fprintf(tfp, "%d %D %d : %s %s %s %s\n", 124 i, lastoffset, length, 125 fld[0], fld[1], fld[2], fld[3]); 126 if (ferror(tfp)) 127 error(tempfile); /* disk error in /tmp */ 128 } 129 } 130 131 deliver(fp, tfp) /* deliver sorted entries out of database(s) */ 132 FILE *fp[], *tfp; 133 { 134 char str[BUF], buff[BUF*8]; /* for tempfile & databases */ 135 char cmd[80]; /* for using system sort command */ 136 long int offset; 137 int i, length; 138 139 /* when sorting, ignore case distinctions; tab char is ':' */ 140 sprintf(cmd, "sort -ft: +1 %s -o %s", tempfile, tempfile); 141 if (system(cmd) == 127) 142 error("sortbib"); 143 tfp = fopen(tempfile, "r"); 144 while (fgets(str, sizeof(str), tfp)) 145 { 146 /* get file pointer, record offset, and length */ 147 if (sscanf(str, "%d %D %d :", &i, &offset, &length) != 3) 148 error("sortbib: sorting error"); 149 /* seek to proper disk location in proper file */ 150 if (fseek(fp[i], offset, 0) == -1) 151 error("sortbib"); 152 /* read exactly one record from bibliography */ 153 if (fread(buff, sizeof(*buff), length, fp[i]) == 0) 154 error("sortbib"); 155 /* add newline between unseparated records */ 156 if (buff[0] != '\n' && rsmode == 1) 157 putchar('\n'); 158 /* write record buffer to standard output */ 159 if (fwrite(buff, sizeof(*buff), length, stdout) == 0) 160 error("sortbib"); 161 } 162 } 163 164 parse(line, fld) /* get fields out of line, prepare for sorting */ 165 char line[]; 166 char fld[][BUF]; 167 { 168 char wd[8][BUF/4], *strcat(); 169 int n, i, j; 170 171 for (i = 0; i < 8; i++) /* zap out old strings */ 172 *wd[i] = NULL; 173 n = sscanf(line, "%s %s %s %s %s %s %s %s", 174 wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]); 175 for (i = 0; i < 4; i++) 176 { 177 if (wd[0][1] == keystr[i]) 178 { 179 if (wd[0][1] == 'A') 180 { 181 if (oneauth && !multauth) /* no repeat */ 182 break; 183 else if (oneauth) /* mult auths */ 184 strcat(fld[i], "~~"); 185 if (!endcomma(wd[n-2])) /* surname */ 186 strcat(fld[i], wd[n-1]); 187 else { /* jr. or ed. */ 188 strcat(fld[i], wd[n-2]); 189 n--; 190 } 191 strcat(fld[i], " "); 192 for (j = 1; j < n-1; j++) 193 strcat(fld[i], wd[j]); 194 oneauth = 1; 195 } 196 else if (wd[0][1] == 'D') 197 { 198 strcat(fld[i], wd[n-1]); /* year */ 199 if (n > 2) 200 strcat(fld[i], wd[1]); /* month */ 201 } 202 else if (wd[0][1] == 'T' || wd[0][1] == 'J') 203 { 204 j = 1; 205 if (article(wd[1])) /* skip article */ 206 j++; 207 for (; j < n; j++) 208 strcat(fld[i], wd[j]); 209 } 210 else /* any other field */ 211 for (j = 1; j < n; j++) 212 strcat(fld[i], wd[j]); 213 } 214 /* %Q quorporate or queer author - unreversed %A */ 215 else if (wd[0][1] == 'Q' && keystr[i] == 'A') 216 for (j = 1; j < n; j++) 217 strcat(fld[i], wd[j]); 218 } 219 } 220 221 article(str) /* see if string contains an article */ 222 char *str; 223 { 224 if (strcmp("The", str) == 0) /* English */ 225 return(1); 226 if (strcmp("A", str) == 0) 227 return(1); 228 if (strcmp("An", str) == 0) 229 return(1); 230 if (strcmp("Le", str) == 0) /* French */ 231 return(1); 232 if (strcmp("La", str) == 0) 233 return(1); 234 if (strcmp("Der", str) == 0) /* German */ 235 return(1); 236 if (strcmp("Die", str) == 0) 237 return(1); 238 if (strcmp("Das", str) == 0) 239 return(1); 240 if (strcmp("El", str) == 0) /* Spanish */ 241 return(1); 242 if (strcmp("Den", str) == 0) /* Scandinavian */ 243 return(1); 244 return(0); 245 } 246 247 eval(keystr) /* evaluate key string for A+ marking */ 248 char keystr[]; 249 { 250 int i, j; 251 252 for (i = 0, j = 0; keystr[i]; i++, j++) 253 { 254 if (keystr[i] == '+') 255 { 256 multauth = 1; 257 i++; 258 } 259 keystr[j] = keystr[i]; 260 } 261 keystr[j] = NULL; 262 } 263 264 error(s) /* exit in case of various system errors */ 265 char *s; 266 { 267 perror(s); 268 exit(1); 269 } 270 271 onintr() /* remove tempfile in case of interrupt */ 272 { 273 fprintf(stderr, "\nInterrupt\n"); 274 unlink(tempfile); 275 exit(1); 276 } 277 278 endcomma(str) 279 char *str; 280 { 281 int n; 282 283 n = strlen(str) - 1; 284 if (str[n] == ',') 285 { 286 str[n] = NULL; 287 return(1); 288 } 289 return(0); 290 } 291