1 /*- 2 * Copyright (c) 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Peter McIlroy. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 static char sccsid[] = "@(#)fsort.c 8.1 (Berkeley) 06/06/93"; 13 #endif /* not lint */ 14 15 /* 16 * Read in the next bin. If it fits in one segment sort it; 17 * otherwise refine it by segment deeper by one character, 18 * and try again on smaller bins. Sort the final bin at this level 19 * of recursion to keep the head of fstack at 0. 20 * After PANIC passes, abort to merge sort. 21 */ 22 #include "sort.h" 23 #include "fsort.h" 24 25 #include <stdlib.h> 26 #include <string.h> 27 28 u_char **keylist = 0, *buffer = 0, *linebuf = 0; 29 struct tempfile fstack[MAXFCT]; 30 extern char *toutpath; 31 #define FSORTMAX 4 32 int PANIC = FSORTMAX; 33 34 void 35 fsort(binno, depth, infiles, nfiles, outfd, ftbl) 36 register int binno, depth, nfiles; 37 register union f_handle infiles; 38 FILE *outfd; 39 register struct field *ftbl; 40 { 41 register u_char *bufend, **keypos, *tmpbuf; 42 u_char *weights; 43 int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0; 44 register int c, nelem; 45 long sizes [NBINS+1]; 46 union f_handle tfiles, mstart = {MAXFCT-16}; 47 register int (*get)(int, union f_handle, int, RECHEADER *, 48 u_char *, struct field *); 49 register struct recheader *crec; 50 struct field tfield[2]; 51 FILE *prevfd, *tailfd[FSORTMAX+1]; 52 53 memset(tailfd, 0, sizeof(tailfd)); 54 prevfd = outfd; 55 memset(tfield, 0, sizeof(tfield)); 56 if (ftbl[0].flags & R) 57 tfield[0].weights = Rascii; 58 else 59 tfield[0].weights = ascii; 60 tfield[0].icol.num = 1; 61 weights = ftbl[0].weights; 62 if (!buffer) { 63 buffer = malloc(BUFSIZE); 64 keylist = malloc(MAXNUM * sizeof(u_char *)); 65 if (!SINGL_FLD) 66 linebuf = malloc(MAXLLEN); 67 } 68 bufend = buffer + BUFSIZE; 69 if (binno >= 0) { 70 tfiles.top = infiles.top + nfiles; 71 get = getnext; 72 } else { 73 tfiles.top = 0; 74 if (SINGL_FLD) 75 get = makeline; 76 else 77 get = makekey; 78 } 79 for (;;) { 80 memset(sizes, 0, sizeof(sizes)); 81 c = ntfiles = 0; 82 if (binno == weights[REC_D] && 83 !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */ 84 rd_append(weights[REC_D], 85 infiles, nfiles, prevfd, buffer, bufend); 86 break; 87 } else if (binno == weights[REC_D]) { 88 depth = 0; /* start over on flat weights */ 89 ftbl = tfield; 90 weights = ftbl[0].weights; 91 } 92 while (c != EOF) { 93 keypos = keylist; 94 nelem = 0; 95 crec = (RECHEADER *) buffer; 96 while((c = get(binno, infiles, nfiles, crec, bufend, 97 ftbl)) == 0) { 98 *keypos++ = crec->data + depth; 99 if (++nelem == MAXNUM) { 100 c = BUFFEND; 101 break; 102 } 103 crec =(RECHEADER *) ((char *) crec + 104 SALIGN(crec->length) + sizeof(TRECHEADER)); 105 } 106 if (c == BUFFEND || ntfiles || mfct) { /* push */ 107 if (panic >= PANIC) { 108 fstack[MAXFCT-16+mfct].fd = ftmp(); 109 if (radixsort(keylist, nelem, weights, 110 REC_D)) 111 err(2, NULL); 112 append(keylist, nelem, depth, fstack[ 113 MAXFCT-16+mfct].fd, putrec, ftbl); 114 mfct++; 115 /* reduce number of open files */ 116 if (mfct == 16 ||(c == EOF && ntfiles)) { 117 tmpbuf = malloc(bufend - 118 crec->data); 119 memmove(tmpbuf, crec->data, 120 bufend - crec->data); 121 fstack[tfiles.top + ntfiles].fd 122 = ftmp(); 123 fmerge(0, mstart, mfct, geteasy, 124 fstack[tfiles.top+ntfiles].fd, 125 putrec, ftbl); 126 ++ntfiles; 127 mfct = 0; 128 memmove(crec->data, tmpbuf, 129 bufend - crec->data); 130 free(tmpbuf); 131 } 132 } else { 133 fstack[tfiles.top + ntfiles].fd= ftmp(); 134 onepass(keylist, depth, nelem, sizes, 135 weights, fstack[tfiles.top+ntfiles].fd); 136 ++ntfiles; 137 } 138 } 139 } 140 get = getnext; 141 if (!ntfiles && !mfct) { /* everything in memory--pop */ 142 if (nelem > 1) 143 if (radixsort(keylist, nelem, weights, REC_D)) 144 err(2, NULL); 145 append(keylist, nelem, depth, outfd, putline, ftbl); 146 break; /* pop */ 147 } 148 if (panic >= PANIC) { 149 if (!ntfiles) 150 fmerge(0, mstart, mfct, geteasy, 151 outfd, putline, ftbl); 152 else 153 fmerge(0, tfiles, ntfiles, geteasy, 154 outfd, putline, ftbl); 155 break; 156 157 } 158 total = maxb = lastb = 0; /* find if one bin dominates */ 159 for (i = 0; i < NBINS; i++) 160 if (sizes[i]) { 161 if (sizes[i] > sizes[maxb]) 162 maxb = i; 163 lastb = i; 164 total += sizes[i]; 165 } 166 if (sizes[maxb] < max((total / 2) , BUFSIZE)) 167 maxb = lastb; /* otherwise pop after last bin */ 168 fstack[tfiles.top].lastb = lastb; 169 fstack[tfiles.top].maxb = maxb; 170 171 /* start refining next level. */ 172 get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */ 173 for (i = 0; i < maxb; i++) { 174 if (!sizes[i]) /* bin empty; step ahead file offset */ 175 get(i, tfiles, ntfiles, crec, bufend, 0); 176 else 177 fsort(i, depth+1, tfiles, ntfiles, outfd, ftbl); 178 } 179 if (lastb != maxb) { 180 if (prevfd != outfd) 181 tailfd[panic] = prevfd; 182 prevfd = ftmp(); 183 for (i = maxb+1; i <= lastb; i++) 184 if (!sizes[i]) 185 get(i, tfiles, ntfiles, crec, bufend,0); 186 else 187 fsort(i, depth+1, tfiles, ntfiles, 188 prevfd, ftbl); 189 } 190 191 /* sort biggest (or last) bin at this level */ 192 depth++; 193 panic++; 194 binno = maxb; 195 infiles.top = tfiles.top; /* getnext will free tfiles, */ 196 nfiles = ntfiles; /* so overwrite them */ 197 } 198 if (prevfd != outfd) { 199 concat(outfd, prevfd); 200 fclose(prevfd); 201 } 202 for (i = panic; i >= 0; --i) 203 if (tailfd[i]) { 204 concat(outfd, tailfd[i]); 205 fclose(tailfd[i]); 206 } 207 } 208 209 /* 210 This is one pass of radix exchange, dumping the bins to disk. 211 */ 212 #define swap(a, b, t) t = a, a = b, b = t 213 void 214 onepass(a, depth, n, sizes, tr, fd) 215 u_char **a; 216 int depth; 217 long n, sizes[]; 218 u_char *tr; 219 FILE *fd; 220 { 221 long tsizes[NBINS+1]; 222 u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp; 223 static histo[256]; 224 int *hp; 225 register int c; 226 u_char **an, *t, **aj; 227 register u_char **ak, *r; 228 229 memset(tsizes, 0, sizeof(tsizes)); 230 depth += sizeof(TRECHEADER); 231 an = a + n; 232 for (ak = a; ak < an; ak++) { 233 histo[c = tr[**ak]]++; 234 tsizes[c] += ((RECHEADER *) (*ak -= depth))->length; 235 } 236 237 bin[0] = a; 238 bpmax = bin + 256; 239 tp = top, hp = histo; 240 for (bp = bin; bp < bpmax; bp++) { 241 *tp++ = *(bp+1) = *bp + (c = *hp); 242 *hp++ = 0; 243 if (c <= 1) 244 continue; 245 } 246 for(aj = a; aj < an; *aj = r, aj = bin[c+1]) 247 for(r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;) 248 swap(*ak, r, t); 249 250 for (ak = a, c = 0; c < 256; c++) { 251 an = bin[c+1]; 252 n = an - ak; 253 tsizes[c] += n * sizeof(TRECHEADER); 254 /* tell getnext how many elements in this bin, this segment. */ 255 EWRITE(tsizes+c, sizeof(long), 1, fd); 256 sizes[c] += tsizes[c]; 257 for (; ak < an; ++ak) 258 putrec((RECHEADER *) *ak, fd); 259 } 260 } 261