xref: /original-bsd/contrib/sort/fsort.c (revision 6c633850)
1 /*-
2  * Copyright (c) 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Peter McIlroy.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #ifndef lint
12 static char sccsid[] = "@(#)fsort.c	8.1 (Berkeley) 06/06/93";
13 #endif /* not lint */
14 
15 /*
16  * Read in the next bin.  If it fits in one segment sort it;
17  * otherwise refine it by segment deeper by one character,
18  * and try again on smaller bins.  Sort the final bin at this level
19  * of recursion to keep the head of fstack at 0.
20  * After PANIC passes, abort to merge sort.
21 */
22 #include "sort.h"
23 #include "fsort.h"
24 
25 #include <stdlib.h>
26 #include <string.h>
27 
28 u_char **keylist = 0, *buffer = 0, *linebuf = 0;
29 struct tempfile fstack[MAXFCT];
30 extern char *toutpath;
31 #define FSORTMAX 4
32 int PANIC = FSORTMAX;
33 
34 void
fsort(binno,depth,infiles,nfiles,outfd,ftbl)35 fsort(binno, depth, infiles, nfiles, outfd, ftbl)
36 	register int binno, depth, nfiles;
37 	register union f_handle infiles;
38 	FILE *outfd;
39 	register struct field *ftbl;
40 {
41 	register u_char *bufend, **keypos, *tmpbuf;
42 	u_char *weights;
43 	int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0;
44 	register int c, nelem;
45 	long sizes [NBINS+1];
46 	union f_handle tfiles, mstart = {MAXFCT-16};
47 	register int (*get)(int, union f_handle, int, RECHEADER *,
48 		u_char *, struct field *);
49 	register struct recheader *crec;
50 	struct field tfield[2];
51 	FILE *prevfd, *tailfd[FSORTMAX+1];
52 
53 	memset(tailfd, 0, sizeof(tailfd));
54 	prevfd = outfd;
55 	memset(tfield, 0, sizeof(tfield));
56 	if (ftbl[0].flags & R)
57 		tfield[0].weights = Rascii;
58 	else
59 		tfield[0].weights = ascii;
60 	tfield[0].icol.num = 1;
61 	weights = ftbl[0].weights;
62 	if (!buffer) {
63 		buffer = malloc(BUFSIZE);
64 		keylist = malloc(MAXNUM * sizeof(u_char *));
65 		if (!SINGL_FLD)
66 			linebuf = malloc(MAXLLEN);
67 	}
68 	bufend = buffer + BUFSIZE;
69 	if (binno >= 0) {
70 		tfiles.top = infiles.top + nfiles;
71 		get = getnext;
72 	} else {
73 		tfiles.top = 0;
74 		if (SINGL_FLD)
75 			get = makeline;
76 		else
77 			get = makekey;
78 	}
79 	for (;;) {
80 		memset(sizes, 0, sizeof(sizes));
81 		c = ntfiles = 0;
82 		if (binno == weights[REC_D] &&
83 		    !(SINGL_FLD && ftbl[0].flags & F)) {	/* pop */
84 			rd_append(weights[REC_D],
85 			    infiles, nfiles, prevfd, buffer, bufend);
86 			break;
87 		} else if (binno == weights[REC_D]) {
88 			depth = 0;		/* start over on flat weights */
89 			ftbl = tfield;
90 			weights = ftbl[0].weights;
91 		}
92 		while (c != EOF) {
93 			keypos = keylist;
94 			nelem = 0;
95 			crec = (RECHEADER *) buffer;
96 			while((c = get(binno, infiles, nfiles, crec, bufend,
97 			    ftbl)) == 0) {
98 				*keypos++ = crec->data + depth;
99 				if (++nelem == MAXNUM) {
100 					c = BUFFEND;
101 					break;
102 				}
103 				crec =(RECHEADER *)	((char *) crec +
104 				SALIGN(crec->length) + sizeof(TRECHEADER));
105 			}
106 			if (c == BUFFEND || ntfiles || mfct) {	/* push */
107 				if (panic >= PANIC) {
108 					fstack[MAXFCT-16+mfct].fd = ftmp();
109 					if (radixsort(keylist, nelem, weights,
110 					    REC_D))
111 						err(2, NULL);
112 					append(keylist, nelem, depth, fstack[
113 					 MAXFCT-16+mfct].fd, putrec, ftbl);
114 					mfct++;
115 					/* reduce number of open files */
116 					if (mfct == 16 ||(c == EOF && ntfiles)) {
117 						tmpbuf = malloc(bufend -
118 						    crec->data);
119 						memmove(tmpbuf, crec->data,
120 						    bufend - crec->data);
121 						fstack[tfiles.top + ntfiles].fd
122 						    = ftmp();
123 						fmerge(0, mstart, mfct, geteasy,
124 						  fstack[tfiles.top+ntfiles].fd,
125 						  putrec, ftbl);
126 						++ntfiles;
127 						mfct = 0;
128 						memmove(crec->data, tmpbuf,
129 						    bufend - crec->data);
130 						free(tmpbuf);
131 					}
132 				} else {
133 					fstack[tfiles.top + ntfiles].fd= ftmp();
134 					onepass(keylist, depth, nelem, sizes,
135 					weights, fstack[tfiles.top+ntfiles].fd);
136 					++ntfiles;
137 				}
138 			}
139 		}
140 		get = getnext;
141 		if (!ntfiles && !mfct) {	/* everything in memory--pop */
142 			if (nelem > 1)
143 			   if (radixsort(keylist, nelem, weights, REC_D))
144 				err(2, NULL);
145 			append(keylist, nelem, depth, outfd, putline, ftbl);
146 			break;					/* pop */
147 		}
148 		if (panic >= PANIC) {
149 			if (!ntfiles)
150 				fmerge(0, mstart, mfct, geteasy,
151 				    outfd, putline, ftbl);
152 			else
153 				fmerge(0, tfiles, ntfiles, geteasy,
154 				    outfd, putline, ftbl);
155 			break;
156 
157 		}
158 		total = maxb = lastb = 0;	/* find if one bin dominates */
159 		for (i = 0; i < NBINS; i++)
160 		  if (sizes[i]) {
161 			if (sizes[i] > sizes[maxb])
162 				maxb = i;
163 			lastb = i;
164 			total += sizes[i];
165 		}
166 		if (sizes[maxb] < max((total / 2) , BUFSIZE))
167 			maxb = lastb;	/* otherwise pop after last bin */
168 		fstack[tfiles.top].lastb = lastb;
169 		fstack[tfiles.top].maxb = maxb;
170 
171 			/* start refining next level. */
172 		get(-1, tfiles, ntfiles, crec, bufend, 0);	/* rewind */
173 		for (i = 0; i < maxb; i++) {
174 			if (!sizes[i])	/* bin empty; step ahead file offset */
175 				get(i, tfiles, ntfiles, crec, bufend, 0);
176 			else
177 				fsort(i, depth+1, tfiles, ntfiles, outfd, ftbl);
178 		}
179 		if (lastb != maxb) {
180 			if (prevfd != outfd)
181 				tailfd[panic] = prevfd;
182 			prevfd = ftmp();
183 			for (i = maxb+1; i <= lastb; i++)
184 				if (!sizes[i])
185 					get(i, tfiles, ntfiles, crec, bufend,0);
186 				else
187 					fsort(i, depth+1, tfiles, ntfiles,
188 					    prevfd, ftbl);
189 		}
190 
191 		/* sort biggest (or last) bin at this level */
192 		depth++;
193 		panic++;
194 		binno = maxb;
195 		infiles.top = tfiles.top;	/* getnext will free tfiles, */
196 		nfiles = ntfiles;		/* so overwrite them */
197 	}
198 	if (prevfd != outfd) {
199 		concat(outfd, prevfd);
200 		fclose(prevfd);
201 	}
202 	for (i = panic; i >= 0; --i)
203 		if (tailfd[i]) {
204 			concat(outfd, tailfd[i]);
205 			fclose(tailfd[i]);
206 		}
207 }
208 
209 /*
210  This is one pass of radix exchange, dumping the bins to disk.
211  */
212 #define swap(a, b, t) t = a, a = b, b = t
213 void
onepass(a,depth,n,sizes,tr,fd)214 onepass(a, depth, n, sizes, tr, fd)
215 	u_char **a;
216 	int depth;
217 	long n, sizes[];
218 	u_char *tr;
219 	FILE *fd;
220 {
221 	long tsizes[NBINS+1];
222 	u_char **bin[257], **top[256], ***bp, ***bpmax, ***tp;
223 	static histo[256];
224 	int *hp;
225 	register int c;
226 	u_char **an, *t, **aj;
227 	register u_char **ak, *r;
228 
229 	memset(tsizes, 0, sizeof(tsizes));
230 	depth += sizeof(TRECHEADER);
231 	an = a + n;
232 	for (ak = a; ak < an; ak++) {
233 		histo[c = tr[**ak]]++;
234 		tsizes[c] += ((RECHEADER *) (*ak -= depth))->length;
235 	}
236 
237 	bin[0] = a;
238 	bpmax = bin + 256;
239 	tp = top, hp = histo;
240 	for (bp = bin; bp < bpmax; bp++) {
241 		*tp++ = *(bp+1) = *bp + (c = *hp);
242 		*hp++ = 0;
243 		if (c <= 1)
244 			continue;
245 	}
246 	for(aj = a; aj < an; *aj = r, aj = bin[c+1])
247 		for(r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;)
248 			swap(*ak, r, t);
249 
250 	for (ak = a, c = 0; c < 256; c++) {
251 		an = bin[c+1];
252 		n = an - ak;
253 		tsizes[c] += n * sizeof(TRECHEADER);
254 		/* tell getnext how many elements in this bin, this segment. */
255 		EWRITE(tsizes+c, sizeof(long), 1, fd);
256 		sizes[c] += tsizes[c];
257 		for (; ak < an; ++ak)
258 			putrec((RECHEADER *) *ak, fd);
259 	}
260 }
261