1 /*-
2 * Copyright (c) 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Peter McIlroy.
7 *
8 * %sccs.include.redist.c%
9 */
10
11 #ifndef lint
12 static char sccsid[] = "@(#)msort.c 8.1 (Berkeley) 06/06/93";
13 #endif /* not lint */
14
15 #include "sort.h"
16 #include "fsort.h"
17
18 #include <stdlib.h>
19 #include <string.h>
20 #include <unistd.h>
21
22 /* Subroutines using comparisons: merge sort and check order */
23 #define DELETE (1)
24 #define LALIGN(n) ((n+3) & ~3)
25
26 typedef struct mfile {
27 u_char *end;
28 short flno;
29 struct recheader rec[1];
30 } MFILE;
31 typedef struct tmfile {
32 u_char *end;
33 short flno;
34 struct trecheader rec[1];
35 } TMFILE;
36 u_char *wts, *wts1 = 0;
37 struct mfile *cfilebuf;
38
39 static int cmp __P((struct recheader *, struct recheader *));
40 static int insert __P((struct mfile **, struct mfile **, int, int));
41
42 void
fmerge(binno,files,nfiles,get,outfd,fput,ftbl)43 fmerge(binno, files, nfiles, get, outfd, fput, ftbl)
44 union f_handle files;
45 int binno, nfiles;
46 int (*get)();
47 FILE *outfd;
48 void (*fput)();
49 struct field *ftbl;
50 {
51 FILE *tout;
52 int i, j, last;
53 void (*put)(struct recheader *, FILE *);
54 extern int geteasy();
55 struct tempfile *l_fstack;
56
57 wts = ftbl->weights;
58 if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
59 wts1 = (ftbl->flags & R) ? Rascii : ascii;
60 if (!cfilebuf)
61 cfilebuf = malloc(MAXLLEN + sizeof(TMFILE));
62
63 i = min(16, nfiles) * LALIGN(MAXLLEN+sizeof(TMFILE));
64 if (!buffer || i > BUFSIZE) {
65 buffer = buffer ? realloc(buffer, i) : malloc(i);
66 if (!buffer)
67 err(2, NULL);
68 if (!SINGL_FLD)
69 linebuf = malloc(MAXLLEN);
70 }
71
72 if (binno >= 0)
73 l_fstack = fstack + files.top;
74 else
75 l_fstack = fstack;
76 while (nfiles) {
77 put = putrec;
78 for (j = 0; j < nfiles; j += 16) {
79 if (nfiles <= 16) {
80 tout = outfd;
81 put = fput;
82 }
83 else
84 tout = ftmp();
85 last = min(16, nfiles - j);
86 if (binno < 0) {
87 for (i = 0; i < last; i++)
88 if (!(l_fstack[i+MAXFCT-1-16].fd =
89 fopen(files.names[j + i], "r")))
90 err(2, "%s", files.names[j+i]);
91 merge(MAXFCT-1-16, last, get, tout, put, ftbl);
92 }
93 else {
94 for (i = 0; i< last; i++)
95 rewind(l_fstack[i+j].fd);
96 merge(files.top+j, last, get, tout, put, ftbl);
97 }
98 if (nfiles > 16) l_fstack[j/16].fd = tout;
99 }
100 nfiles = (nfiles + 15) / 16;
101 if (nfiles == 1)
102 nfiles = 0;
103 if (binno < 0) {
104 binno = 0;
105 get = geteasy;
106 files.top = 0;
107 }
108 }
109 }
110
111 void
merge(infl0,nfiles,get,outfd,put,ftbl)112 merge(infl0, nfiles, get, outfd, put, ftbl)
113 int infl0, nfiles;
114 int (*get)();
115 void (*put)(struct recheader *, FILE *);
116 FILE *outfd;
117 struct field *ftbl;
118 {
119 int c, i, j;
120 union f_handle dummy = {0};
121 struct mfile *flist[16], *cfile;
122 for (i = j = 0; i < nfiles; i++) {
123 cfile = (MFILE *) (buffer +
124 i * LALIGN(MAXLLEN + sizeof(TMFILE)));
125 cfile->flno = j + infl0;
126 cfile->end = cfile->rec->data + MAXLLEN;
127 for (c = 1; c == 1;) {
128 if (EOF == (c = get(j+infl0, dummy, nfiles,
129 cfile->rec, cfile->end, ftbl))) {
130 --i;
131 --nfiles;
132 break;
133 }
134 if (i)
135 c = insert(flist, &cfile, i, !DELETE);
136 else
137 flist[0] = cfile;
138 }
139 j++;
140 }
141 cfile = cfilebuf;
142 cfile->flno = flist[0]->flno;
143 cfile->end = cfile->rec->data + MAXLLEN;
144 while (nfiles) {
145 for (c = 1; c == 1;) {
146 if (EOF == (c = get(cfile->flno, dummy, nfiles,
147 cfile->rec, cfile->end, ftbl))) {
148 put(flist[0]->rec, outfd);
149 memmove(flist, flist + 1,
150 sizeof(MFILE *) * (--nfiles));
151 cfile->flno = flist[0]->flno;
152 break;
153 }
154 if (!(c = insert(flist, &cfile, nfiles, DELETE)))
155 put(cfile->rec, outfd);
156 }
157 }
158 }
159
160 /*
161 * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
162 * otherwise just inserts *rec in flist.
163 */
164 static int
insert(flist,rec,ttop,delete)165 insert(flist, rec, ttop, delete)
166 struct mfile **flist, **rec;
167 int delete, ttop; /* delete = 0 or 1 */
168 {
169 register struct mfile *tmprec;
170 register int top, mid, bot = 0, cmpv = 1;
171 tmprec = *rec;
172 top = ttop;
173 for (mid = top/2; bot +1 != top; mid = (bot+top)/2) {
174 cmpv = cmp(tmprec->rec, flist[mid]->rec);
175 if (cmpv < 0)
176 top = mid;
177 else if (cmpv > 0)
178 bot = mid;
179 else {
180 if (!UNIQUE)
181 bot = mid - 1;
182 break;
183 }
184 }
185 if (delete) {
186 if (UNIQUE) {
187 if (!bot && cmpv)
188 cmpv = cmp(tmprec->rec, flist[0]->rec);
189 if (!cmpv)
190 return(1);
191 }
192 tmprec = flist[0];
193 if (bot)
194 memmove(flist, flist+1, bot * sizeof(MFILE **));
195 flist[bot] = *rec;
196 *rec = tmprec;
197 (*rec)->flno = (*flist)->flno;
198 return (0);
199 }
200 else {
201 if (!bot && !(UNIQUE && !cmpv)) {
202 cmpv = cmp(tmprec->rec, flist[0]->rec);
203 if (cmpv < 0)
204 bot = -1;
205 }
206 if (UNIQUE && !cmpv)
207 return (1);
208 bot++;
209 memmove(flist + bot+1, flist + bot,
210 (ttop - bot) * sizeof(MFILE **));
211 flist[bot] = *rec;
212 return (0);
213 }
214 }
215
216 /*
217 * check order on one file
218 */
219 void
order(infile,get,ftbl)220 order(infile, get, ftbl)
221 union f_handle infile;
222 int (*get)();
223 struct field *ftbl;
224 {
225 u_char *end;
226 int c;
227 struct recheader *crec, *prec, *trec;
228
229 if (!SINGL_FLD)
230 linebuf = malloc(MAXLLEN);
231 buffer = malloc(2 * (MAXLLEN + sizeof(TRECHEADER)));
232 end = buffer + 2 * (MAXLLEN + sizeof(TRECHEADER));
233 crec = (RECHEADER *) buffer;
234 prec = (RECHEADER *) (buffer + MAXLLEN + sizeof(TRECHEADER));
235 wts = ftbl->weights;
236 if (SINGL_FLD && ftbl->flags & F)
237 wts1 = ftbl->flags & R ? Rascii : ascii;
238 else
239 wts1 = 0;
240 if (0 == get(-1, infile, 1, prec, end, ftbl))
241 while (0 == get(-1, infile, 1, crec, end, ftbl)) {
242 if (0 < (c = cmp(prec, crec))) {
243 crec->data[crec->length-1] = 0;
244 errx(1, "found disorder: %s", crec->data+crec->offset);
245 }
246 if (UNIQUE && !c) {
247 crec->data[crec->length-1] = 0;
248 errx(1, "found non-uniqueness: %s",
249 crec->data+crec->offset);
250 }
251 trec = prec;
252 prec = crec;
253 crec = trec;
254 }
255 exit(0);
256 }
257
258 static int
cmp(rec1,rec2)259 cmp(rec1, rec2)
260 struct recheader *rec1, *rec2;
261 {
262 register r;
263 register u_char *pos1, *pos2, *end;
264 register u_char *cwts;
265 for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) {
266 pos1 = rec1->data;
267 pos2 = rec2->data;
268 if (!SINGL_FLD && UNIQUE)
269 end = pos1 + min(rec1->offset, rec2->offset);
270 else
271 end = pos1 + min(rec1->length, rec2->length);
272 for (; pos1 < end; ) {
273 if (r = cwts[*pos1++] - cwts[*pos2++])
274 return (r);
275 }
276 }
277 return (0);
278 }
279