1 /*-
2 * Copyright (c) 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Peter McIlroy.
7 *
8 * %sccs.include.redist.c%
9 */
10
11 #ifndef lint
12 static char sccsid[] = "@(#)fsort.c 8.1 (Berkeley) 06/06/93";
13 #endif /* not lint */
14
15 /*
16 * Read in the next bin. If it fits in one segment sort it;
17 * otherwise refine it by segment deeper by one character,
18 * and try again on smaller bins. Sort the final bin at this level
19 * of recursion to keep the head of fstack at 0.
20 * After PANIC passes, abort to merge sort.
21 */
22 #include "sort.h"
23 #include "fsort.h"
24
25 #include <stdlib.h>
26 #include <string.h>
27
28 u_char **keylist = 0, *buffer = 0, *linebuf = 0;
29 struct tempfile fstack[MAXFCT];
30 extern char *toutpath;
31 #define FSORTMAX 4
32 int PANIC = FSORTMAX;
33
34 void
fsort(binno,depth,infiles,nfiles,outfd,ftbl)35 fsort(binno, depth, infiles, nfiles, outfd, ftbl)
36 register int binno, depth, nfiles;
37 register union f_handle infiles;
38 FILE *outfd;
39 register struct field *ftbl;
40 {
41 register u_char *bufend, **keypos, *tmpbuf;
42 u_char *weights;
43 int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
44 register int c, nelem;
45 long sizes [NBINS+1];
46 union f_handle tfiles, mstart = {MAXFCT-16};
47 register int (*get)(int, union f_handle, int, RECHEADER *,
48 u_char *, struct field *);
49 register struct recheader *crec;
50 struct field tfield[2];
51 FILE *prevfd, *tailfd[FSORTMAX+1];
52
53 memset(tailfd, 0, sizeof(tailfd));
54 prevfd = outfd;
55 memset(tfield, 0, sizeof(tfield));
56 if (ftbl[0].flags & R)
57 tfield[0].weights = Rascii;
58 else
59 tfield[0].weights = ascii;
60 tfield[0].icol.num = 1;
61 weights = ftbl[0].weights;
62 if (!buffer) {
63 buffer = malloc(BUFSIZE);
64 keylist = malloc(MAXNUM * sizeof(u_char *));
65 if (!SINGL_FLD)
66 linebuf = malloc(MAXLLEN);
67 }
68 bufend = buffer + BUFSIZE;
69 if (binno >= 0) {
70 tfiles.top = infiles.top + nfiles;
71 get = getnext;
72 } else {
73 tfiles.top = 0;
74 if (SINGL_FLD)
75 get = makeline;
76 else
77 get = makekey;
78 }
79 for (;;) {
80 memset(sizes, 0, sizeof(sizes));
81 c = ntfiles = 0;
82 if (binno == weights[REC_D] &&
83 !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */
84 rd_append(weights[REC_D],
85 infiles, nfiles, prevfd, buffer, bufend);
86 break;
87 } else if (binno == weights[REC_D]) {
88 depth = 0; /* start over on flat weights */
89 ftbl = tfield;
90 weights = ftbl[0].weights;
91 }
92 while (c != EOF) {
93 keypos = keylist;
94 nelem = 0;
95 crec = (RECHEADER *) buffer;
96 while((c = get(binno, infiles, nfiles, crec, bufend,
97 ftbl)) == 0) {
98 *keypos++ = crec->data + depth;
99 if (++nelem == MAXNUM) {
100 c = BUFFEND;
101 break;
102 }
103 crec =(RECHEADER *) ((char *) crec +
104 SALIGN(crec->length) + sizeof(TRECHEADER));
105 }
106 if (c == BUFFEND || ntfiles || mfct) { /* push */
107 if (panic >= PANIC) {
108 fstack[MAXFCT-16+mfct].fd = ftmp();
109 if (radixsort(keylist, nelem, weights,
110 REC_D))
111 err(2, NULL);
112 append(keylist, nelem, depth, fstack[
113 MAXFCT-16+mfct].fd, putrec, ftbl);
114 mfct++;
115 /* reduce number of open files */
116 if (mfct == 16 ||(c == EOF && ntfiles)) {
117 tmpbuf = malloc(bufend -
118 crec->data);
119 memmove(tmpbuf, crec->data,
120 bufend - crec->data);
121 fstack[tfiles.top + ntfiles].fd
122 = ftmp();
123 fmerge(0, mstart, mfct, geteasy,
124 fstack[tfiles.top+ntfiles].fd,
125 putrec, ftbl);
126 ++ntfiles;
127 mfct = 0;
128 memmove(crec->data, tmpbuf,
129 bufend - crec->data);
130 free(tmpbuf);
131 }
132 } else {
133 fstack[tfiles.top + ntfiles].fd= ftmp();
134 onepass(keylist, depth, nelem, sizes,
135 weights, fstack[tfiles.top+ntfiles].fd);
136 ++ntfiles;
137 }
138 }
139 }
140 get = getnext;
141 if (!ntfiles && !mfct) { /* everything in memory--pop */
142 if (nelem > 1)
143 if (radixsort(keylist, nelem, weights, REC_D))
144 err(2, NULL);
145 append(keylist, nelem, depth, outfd, putline, ftbl);
146 break; /* pop */
147 }
148 if (panic >= PANIC) {
149 if (!ntfiles)
150 fmerge(0, mstart, mfct, geteasy,
151 outfd, putline, ftbl);
152 else
153 fmerge(0, tfiles, ntfiles, geteasy,
154 outfd, putline, ftbl);
155 break;
156
157 }
158 total = maxb = lastb = 0; /* find if one bin dominates */
159 for (i = 0; i < NBINS; i++)
160 if (sizes[i]) {
161 if (sizes[i] > sizes[maxb])
162 maxb = i;
163 lastb = i;
164 total += sizes[i];
165 }
166 if (sizes[maxb] < max((total / 2) , BUFSIZE))
167 maxb = lastb; /* otherwise pop after last bin */
168 fstack[tfiles.top].lastb = lastb;
169 fstack[tfiles.top].maxb = maxb;
170
171 /* start refining next level. */
172 get(-1, tfiles, ntfiles, crec, bufend, 0); /* rewind */
173 for (i = 0; i < maxb; i++) {
174 if (!sizes[i]) /* bin empty; step ahead file offset */
175 get(i, tfiles, ntfiles, crec, bufend, 0);
176 else
177 fsort(i, depth+1, tfiles, ntfiles, outfd, ftbl);
178 }
179 if (lastb != maxb) {
180 if (prevfd != outfd)
181 tailfd[panic] = prevfd;
182 prevfd = ftmp();
183 for (i = maxb+1; i <= lastb; i++)
184 if (!sizes[i])
185 get(i, tfiles, ntfiles, crec, bufend,0);
186 else
187 fsort(i, depth+1, tfiles, ntfiles,
188 prevfd, ftbl);
189 }
190
191 /* sort biggest (or last) bin at this level */
192 depth++;
193 panic++;
194 binno = maxb;
195 infiles.top = tfiles.top; /* getnext will free tfiles, */
196 nfiles = ntfiles; /* so overwrite them */
197 }
198 if (prevfd != outfd) {
199 concat(outfd, prevfd);
200 fclose(prevfd);
201 }
202 for (i = panic; i >= 0; --i)
203 if (tailfd[i]) {
204 concat(outfd, tailfd[i]);
205 fclose(tailfd[i]);
206 }
207 }
208
209 /*
210 This is one pass of radix exchange, dumping the bins to disk.
211 */
212 #define swap(a, b, t) t = a, a = b, b = t
213 void
onepass(a,depth,n,sizes,tr,fd)214 onepass(a, depth, n, sizes, tr, fd)
215 u_char **a;
216 int depth;
217 long n, sizes[];
218 u_char *tr;
219 FILE *fd;
220 {
221 long tsizes[NBINS+1];
222 u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp;
223 static histo[256];
224 int *hp;
225 register int c;
226 u_char **an, *t, **aj;
227 register u_char **ak, *r;
228
229 memset(tsizes, 0, sizeof(tsizes));
230 depth += sizeof(TRECHEADER);
231 an = a + n;
232 for (ak = a; ak < an; ak++) {
233 histo[c = tr[**ak]]++;
234 tsizes[c] += ((RECHEADER *) (*ak -= depth))->length;
235 }
236
237 bin[0] = a;
238 bpmax = bin + 256;
239 tp = top, hp = histo;
240 for (bp = bin; bp < bpmax; bp++) {
241 *tp++ = *(bp+1) = *bp + (c = *hp);
242 *hp++ = 0;
243 if (c <= 1)
244 continue;
245 }
246 for(aj = a; aj < an; *aj = r, aj = bin[c+1])
247 for(r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;)
248 swap(*ak, r, t);
249
250 for (ak = a, c = 0; c < 256; c++) {
251 an = bin[c+1];
252 n = an - ak;
253 tsizes[c] += n * sizeof(TRECHEADER);
254 /* tell getnext how many elements in this bin, this segment. */
255 EWRITE(tsizes+c, sizeof(long), 1, fd);
256 sizes[c] += tsizes[c];
257 for (; ak < an; ++ak)
258 putrec((RECHEADER *) *ak, fd);
259 }
260 }
261