xref: /original-bsd/contrib/sort/msort.c (revision c3e32dec)
1 /*-
2  * Copyright (c) 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Peter McIlroy.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #ifndef lint
12 static char sccsid[] = "@(#)msort.c	8.1 (Berkeley) 06/06/93";
13 #endif /* not lint */
14 
15 #include "sort.h"
16 #include "fsort.h"
17 
18 #include <stdlib.h>
19 #include <string.h>
20 #include <unistd.h>
21 
22 /* Subroutines using comparisons: merge sort and check order */
23 #define DELETE (1)
24 #define LALIGN(n) ((n+3) & ~3)
25 
26 typedef struct mfile {
27 	u_char *end;
28 	short flno;
29 	struct recheader rec[1];
30 } MFILE;
31 typedef struct tmfile {
32 	u_char *end;
33 	short flno;
34 	struct trecheader rec[1];
35 } TMFILE;
36 u_char *wts, *wts1 = 0;
37 struct mfile *cfilebuf;
38 
39 static int cmp __P((struct recheader *, struct recheader *));
40 static int insert __P((struct mfile **, struct mfile **, int, int));
41 
42 void
43 fmerge(binno, files, nfiles, get, outfd, fput, ftbl)
44 	union f_handle files;
45 	int binno, nfiles;
46 	int (*get)();
47 	FILE *outfd;
48 	void (*fput)();
49 	struct field *ftbl;
50 {
51 	FILE *tout;
52 	int i, j, last;
53 	void (*put)(struct recheader *, FILE *);
54 	extern int geteasy();
55 	struct tempfile *l_fstack;
56 
57 	wts = ftbl->weights;
58 	if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
59 		wts1 = (ftbl->flags & R) ? Rascii : ascii;
60 	if (!cfilebuf)
61 		cfilebuf = malloc(MAXLLEN + sizeof(TMFILE));
62 
63 	i = min(16, nfiles) * LALIGN(MAXLLEN+sizeof(TMFILE));
64 	if (!buffer || i > BUFSIZE) {
65 		buffer = buffer ? realloc(buffer, i) : malloc(i);
66 		if (!buffer)
67 			err(2, NULL);
68 		if (!SINGL_FLD)
69 			linebuf = malloc(MAXLLEN);
70 	}
71 
72 	if (binno >= 0)
73 		l_fstack = fstack + files.top;
74 	else
75 		l_fstack = fstack;
76 	while (nfiles) {
77 		put = putrec;
78 		for (j = 0; j < nfiles; j += 16) {
79 			if (nfiles <= 16) {
80 				tout = outfd;
81 				put = fput;
82 			}
83 			else
84 				tout = ftmp();
85 			last = min(16, nfiles - j);
86 			if (binno < 0) {
87 				for (i = 0; i < last; i++)
88 					if (!(l_fstack[i+MAXFCT-1-16].fd =
89 					    fopen(files.names[j + i], "r")))
90 						err(2, "%s", files.names[j+i]);
91 				merge(MAXFCT-1-16, last, get, tout, put, ftbl);
92 			}
93 			else {
94 				for (i = 0; i< last; i++)
95 					rewind(l_fstack[i+j].fd);
96 				merge(files.top+j, last, get, tout, put, ftbl);
97 			}
98 			if (nfiles > 16) l_fstack[j/16].fd = tout;
99 		}
100 		nfiles = (nfiles + 15) / 16;
101 		if (nfiles == 1)
102 			nfiles = 0;
103 		if (binno < 0) {
104 			binno = 0;
105 			get = geteasy;
106 			files.top = 0;
107 		}
108 	}
109 }
110 
111 void
112 merge(infl0, nfiles, get, outfd, put, ftbl)
113 	int infl0, nfiles;
114 	int (*get)();
115 	void (*put)(struct recheader *, FILE *);
116 	FILE *outfd;
117 	struct field *ftbl;
118 {
119 	int c, i, j;
120 	union f_handle dummy = {0};
121 	struct mfile *flist[16], *cfile;
122 	for (i = j = 0; i < nfiles; i++) {
123 		cfile = (MFILE *) (buffer +
124 		    i * LALIGN(MAXLLEN + sizeof(TMFILE)));
125 		cfile->flno = j + infl0;
126 		cfile->end = cfile->rec->data + MAXLLEN;
127 		for (c = 1; c == 1;) {
128 			if (EOF == (c = get(j+infl0, dummy, nfiles,
129 			   cfile->rec, cfile->end, ftbl))) {
130 				--i;
131 				--nfiles;
132 				break;
133 			}
134 			if (i)
135 				c = insert(flist, &cfile, i, !DELETE);
136 			else
137 				flist[0] = cfile;
138 		}
139 		j++;
140 	}
141 	cfile = cfilebuf;
142 	cfile->flno = flist[0]->flno;
143 	cfile->end = cfile->rec->data + MAXLLEN;
144 	while (nfiles) {
145 		for (c = 1; c == 1;) {
146 			if (EOF == (c = get(cfile->flno, dummy, nfiles,
147 			   cfile->rec, cfile->end, ftbl))) {
148 				put(flist[0]->rec, outfd);
149 				memmove(flist, flist + 1,
150 				    sizeof(MFILE *) * (--nfiles));
151 				cfile->flno = flist[0]->flno;
152 				break;
153 			}
154 			if (!(c = insert(flist, &cfile, nfiles, DELETE)))
155 				put(cfile->rec, outfd);
156 		}
157 	}
158 }
159 
160 /*
161  * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
162  * otherwise just inserts *rec in flist.
163 */
164 static int
165 insert(flist, rec, ttop, delete)
166 	struct mfile **flist, **rec;
167 	int delete, ttop;			/* delete = 0 or 1 */
168 {
169 	register struct mfile *tmprec;
170 	register int top, mid, bot = 0, cmpv = 1;
171 	tmprec = *rec;
172 	top = ttop;
173 	for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
174 		cmpv = cmp(tmprec->rec, flist[mid]->rec);
175 		if (cmpv < 0)
176 			top = mid;
177 		else if (cmpv > 0)
178 			bot = mid;
179 		else {
180 			if (!UNIQUE)
181 				bot = mid - 1;
182 			break;
183 		}
184 	}
185 	if (delete) {
186 		if (UNIQUE) {
187 			if (!bot && cmpv)
188 				cmpv = cmp(tmprec->rec, flist[0]->rec);
189 			if (!cmpv)
190 				return(1);
191 		}
192 		tmprec = flist[0];
193 		if (bot)
194 			memmove(flist, flist+1, bot * sizeof(MFILE **));
195 		flist[bot] = *rec;
196 		*rec = tmprec;
197 		(*rec)->flno = (*flist)->flno;
198 		return (0);
199 	}
200 	else {
201 		if (!bot && !(UNIQUE && !cmpv)) {
202 			cmpv = cmp(tmprec->rec, flist[0]->rec);
203 			if (cmpv < 0)
204 				bot = -1;
205 		}
206 		if (UNIQUE && !cmpv)
207 			return (1);
208 		bot++;
209 		memmove(flist + bot+1, flist + bot,
210 		    (ttop - bot) * sizeof(MFILE **));
211 		flist[bot] = *rec;
212 		return (0);
213 	}
214 }
215 
216 /*
217  * check order on one file
218  */
219 void
220 order(infile, get, ftbl)
221 	union f_handle infile;
222 	int (*get)();
223 	struct field *ftbl;
224 {
225 	u_char *end;
226 	int c;
227 	struct recheader *crec, *prec, *trec;
228 
229 	if (!SINGL_FLD)
230 		linebuf = malloc(MAXLLEN);
231 	buffer = malloc(2 * (MAXLLEN + sizeof(TRECHEADER)));
232 	end = buffer + 2 * (MAXLLEN + sizeof(TRECHEADER));
233 	crec = (RECHEADER *) buffer;
234 	prec = (RECHEADER *) (buffer + MAXLLEN + sizeof(TRECHEADER));
235 	wts = ftbl->weights;
236 	if (SINGL_FLD && ftbl->flags & F)
237 		wts1 = ftbl->flags & R ? Rascii : ascii;
238 	else
239 		wts1 = 0;
240 	if (0 == get(-1, infile, 1, prec, end, ftbl))
241 	while (0 == get(-1, infile, 1, crec, end, ftbl)) {
242 		if (0 < (c = cmp(prec, crec))) {
243 			crec->data[crec->length-1] = 0;
244 			errx(1, "found disorder: %s", crec->data+crec->offset);
245 		}
246 		if (UNIQUE && !c) {
247 			crec->data[crec->length-1] = 0;
248 			errx(1, "found non-uniqueness: %s",
249 			    crec->data+crec->offset);
250 		}
251 		trec = prec;
252 		prec = crec;
253 		crec = trec;
254 	}
255 	exit(0);
256 }
257 
258 static int
259 cmp(rec1, rec2)
260 	struct recheader *rec1, *rec2;
261 {
262 	register r;
263 	register u_char *pos1, *pos2, *end;
264 	register u_char *cwts;
265 	for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) {
266 		pos1 = rec1->data;
267 		pos2 = rec2->data;
268 		if (!SINGL_FLD && UNIQUE)
269 			end = pos1 + min(rec1->offset, rec2->offset);
270 		else
271 			end = pos1 + min(rec1->length, rec2->length);
272 		for (; pos1 < end; ) {
273 			if (r = cwts[*pos1++] - cwts[*pos2++])
274 				return (r);
275 		}
276 	}
277 	return (0);
278 }
279