1 /*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Peter McIlroy. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 static char sccsid[] = "@(#)msort.c 5.1 (Berkeley) 06/01/93"; 13 #endif /* not lint */ 14 15 #include "sort.h" 16 #include "fsort.h" 17 18 #include <stdlib.h> 19 #include <string.h> 20 #include <unistd.h> 21 22 /* Subroutines using comparisons: merge sort and check order */ 23 #define DELETE (1) 24 #define LALIGN(n) ((n+3) & ~3) 25 26 typedef struct mfile { 27 u_char *end; 28 short flno; 29 struct recheader rec[1]; 30 } MFILE; 31 typedef struct tmfile { 32 u_char *end; 33 short flno; 34 struct trecheader rec[1]; 35 } TMFILE; 36 u_char *wts, *wts1 = 0; 37 struct mfile *cfilebuf; 38 39 static int cmp __P((struct recheader *, struct recheader *)); 40 static int insert __P((struct mfile **, struct mfile **, int, int)); 41 42 void 43 fmerge(binno, files, nfiles, get, outfd, fput, ftbl) 44 union f_handle files; 45 int binno, nfiles; 46 int (*get)(); 47 FILE *outfd; 48 void (*fput)(); 49 struct field *ftbl; 50 { 51 FILE *tout; 52 int i, j, last; 53 void (*put)(struct recheader *, FILE *); 54 extern int geteasy(); 55 struct tempfile *l_fstack; 56 57 wts = ftbl->weights; 58 if (!UNIQUE && SINGL_FLD && ftbl->flags & F) 59 wts1 = (ftbl->flags & R) ? Rascii : ascii; 60 if (!cfilebuf) 61 cfilebuf = malloc(MAXLLEN + sizeof(TMFILE)); 62 63 i = min(16, nfiles) * LALIGN(MAXLLEN+sizeof(TMFILE)); 64 if (!buffer || i > BUFSIZE) { 65 buffer = buffer ? realloc(buffer, i) : malloc(i); 66 if (!buffer) 67 err(2, NULL); 68 if (!SINGL_FLD) 69 linebuf = malloc(MAXLLEN); 70 } 71 72 if (binno >= 0) 73 l_fstack = fstack + files.top; 74 else 75 l_fstack = fstack; 76 while (nfiles) { 77 put = putrec; 78 for (j = 0; j < nfiles; j += 16) { 79 if (nfiles <= 16) { 80 tout = outfd; 81 put = fput; 82 } 83 else 84 tout = ftmp(); 85 last = min(16, nfiles - j); 86 if (binno < 0) { 87 for (i = 0; i < last; i++) 88 if (!(l_fstack[i+MAXFCT-1-16].fd = 89 fopen(files.names[j + i], "r"))) 90 err(2, "%s", files.names[j+i]); 91 merge(MAXFCT-1-16, last, get, tout, put, ftbl); 92 } 93 else { 94 for (i = 0; i< last; i++) 95 rewind(l_fstack[i+j].fd); 96 merge(files.top+j, last, get, tout, put, ftbl); 97 } 98 if (nfiles > 16) l_fstack[j/16].fd = tout; 99 } 100 nfiles = (nfiles + 15) / 16; 101 if (nfiles == 1) 102 nfiles = 0; 103 if (binno < 0) { 104 binno = 0; 105 get = geteasy; 106 files.top = 0; 107 } 108 } 109 } 110 111 void 112 merge(infl0, nfiles, get, outfd, put, ftbl) 113 int infl0, nfiles; 114 int (*get)(); 115 void (*put)(struct recheader *, FILE *); 116 FILE *outfd; 117 struct field *ftbl; 118 { 119 int c, i, j; 120 union f_handle dummy = {0}; 121 struct mfile *flist[16], *cfile; 122 for (i = j = 0; i < nfiles; i++) { 123 cfile = (MFILE *) (buffer + 124 i * LALIGN(MAXLLEN + sizeof(TMFILE))); 125 cfile->flno = j + infl0; 126 cfile->end = cfile->rec->data + MAXLLEN; 127 for (c = 1; c == 1;) { 128 if (EOF == (c = get(j+infl0, dummy, nfiles, 129 cfile->rec, cfile->end, ftbl))) { 130 --i; 131 --nfiles; 132 break; 133 } 134 if (i) 135 c = insert(flist, &cfile, i, !DELETE); 136 else 137 flist[0] = cfile; 138 } 139 j++; 140 } 141 cfile = cfilebuf; 142 cfile->flno = flist[0]->flno; 143 cfile->end = cfile->rec->data + MAXLLEN; 144 while (nfiles) { 145 for (c = 1; c == 1;) { 146 if (EOF == (c = get(cfile->flno, dummy, nfiles, 147 cfile->rec, cfile->end, ftbl))) { 148 put(flist[0]->rec, outfd); 149 memmove(flist, flist + 1, 150 sizeof(MFILE *) * (--nfiles)); 151 cfile->flno = flist[0]->flno; 152 break; 153 } 154 if (!(c = insert(flist, &cfile, nfiles, DELETE))) 155 put(cfile->rec, outfd); 156 } 157 } 158 } 159 160 /* 161 * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec; 162 * otherwise just inserts *rec in flist. 163 */ 164 static int 165 insert(flist, rec, ttop, delete) 166 struct mfile **flist, **rec; 167 int delete, ttop; /* delete = 0 or 1 */ 168 { 169 register struct mfile *tmprec; 170 register int top, mid, bot = 0, cmpv = 1; 171 tmprec = *rec; 172 top = ttop; 173 for (mid = top/2; bot +1 != top; mid = (bot+top)/2) { 174 cmpv = cmp(tmprec->rec, flist[mid]->rec); 175 if (cmpv < 0) 176 top = mid; 177 else if (cmpv > 0) 178 bot = mid; 179 else { 180 if (!UNIQUE) 181 bot = mid - 1; 182 break; 183 } 184 } 185 if (delete) { 186 if (UNIQUE) { 187 if (!bot && cmpv) 188 cmpv = cmp(tmprec->rec, flist[0]->rec); 189 if (!cmpv) 190 return(1); 191 } 192 tmprec = flist[0]; 193 if (bot) 194 memmove(flist, flist+1, bot * sizeof(MFILE **)); 195 flist[bot] = *rec; 196 *rec = tmprec; 197 (*rec)->flno = (*flist)->flno; 198 return (0); 199 } 200 else { 201 if (!bot && !(UNIQUE && !cmpv)) { 202 cmpv = cmp(tmprec->rec, flist[0]->rec); 203 if (cmpv < 0) 204 bot = -1; 205 } 206 if (UNIQUE && !cmpv) 207 return (1); 208 bot++; 209 memmove(flist + bot+1, flist + bot, 210 (ttop - bot) * sizeof(MFILE **)); 211 flist[bot] = *rec; 212 return (0); 213 } 214 } 215 216 /* 217 * check order on one file 218 */ 219 void 220 order(infile, get, ftbl) 221 union f_handle infile; 222 int (*get)(); 223 struct field *ftbl; 224 { 225 u_char *end; 226 int c; 227 struct recheader *crec, *prec, *trec; 228 229 if (!SINGL_FLD) 230 linebuf = malloc(MAXLLEN); 231 buffer = malloc(2 * (MAXLLEN + sizeof(TRECHEADER))); 232 end = buffer + 2 * (MAXLLEN + sizeof(TRECHEADER)); 233 crec = (RECHEADER *) buffer; 234 prec = (RECHEADER *) (buffer + MAXLLEN + sizeof(TRECHEADER)); 235 wts = ftbl->weights; 236 if (SINGL_FLD && ftbl->flags & F) 237 wts1 = ftbl->flags & R ? Rascii : ascii; 238 else 239 wts1 = 0; 240 if (0 == get(-1, infile, 1, prec, end, ftbl)) 241 while (0 == get(-1, infile, 1, crec, end, ftbl)) { 242 if (0 < (c = cmp(prec, crec))) { 243 crec->data[crec->length-1] = 0; 244 errx(1, "found disorder: %s", crec->data+crec->offset); 245 } 246 if (UNIQUE && !c) { 247 crec->data[crec->length-1] = 0; 248 errx(1, "found non-uniqueness: %s", 249 crec->data+crec->offset); 250 } 251 trec = prec; 252 prec = crec; 253 crec = trec; 254 } 255 exit(0); 256 } 257 258 static int 259 cmp(rec1, rec2) 260 struct recheader *rec1, *rec2; 261 { 262 register r; 263 register u_char *pos1, *pos2, *end; 264 register u_char *cwts; 265 for (cwts = wts; cwts; cwts = (cwts == wts1 ? 0 : wts1)) { 266 pos1 = rec1->data; 267 pos2 = rec2->data; 268 if (!SINGL_FLD && UNIQUE) 269 end = pos1 + min(rec1->offset, rec2->offset); 270 else 271 end = pos1 + min(rec1->length, rec2->length); 272 for (; pos1 < end; ) { 273 if (r = cwts[*pos1++] - cwts[*pos2++]) 274 return (r); 275 } 276 } 277 return (0); 278 } 279