1 /* $NetBSD: files.c,v 1.17 2001/05/15 11:18:23 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include "sort.h" 40 #include "fsort.h" 41 42 #ifndef lint 43 __RCSID("$NetBSD: files.c,v 1.17 2001/05/15 11:18:23 jdolecek Exp $"); 44 __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); 45 #endif /* not lint */ 46 47 #include <string.h> 48 49 static int seq __P((FILE *, DBT *, DBT *)); 50 51 /* 52 * this is the subroutine for file management for fsort(). 53 * It keeps the buffers for all temporary files. 54 */ 55 int 56 getnext(binno, infl0, filelist, nfiles, pos, end, dummy) 57 int binno, infl0; 58 struct filelist *filelist; 59 int nfiles; 60 RECHEADER *pos; 61 u_char *end; 62 struct field *dummy; 63 { 64 int i; 65 u_char *hp; 66 static size_t nleft = 0; 67 static int cnt = 0, flag = -1; 68 static u_char maxb = 0; 69 static FILE *fp; 70 71 if (nleft == 0) { 72 if (binno < 0) /* reset files. */ { 73 for (i = 0; i < nfiles; i++) { 74 rewind(fstack[infl0 + i].fp); 75 fstack[infl0 + i].max_o = 0; 76 } 77 flag = -1; 78 nleft = cnt = 0; 79 return (-1); 80 } 81 maxb = fstack[infl0].maxb; 82 for (; nleft == 0; cnt++) { 83 if (cnt >= nfiles) { 84 cnt = 0; 85 return (EOF); 86 } 87 fp = fstack[infl0 + cnt].fp; 88 fread(&nleft, sizeof(nleft), 1, fp); 89 if (binno < maxb) 90 fstack[infl0+cnt].max_o 91 += sizeof(nleft) + nleft; 92 else if (binno == maxb) { 93 if (binno != fstack[infl0].lastb) { 94 fseek(fp, fstack[infl0+ 95 cnt].max_o, SEEK_SET); 96 fread(&nleft, sizeof(nleft), 1, fp); 97 } 98 if (nleft == 0) 99 fclose(fp); 100 } else if (binno == maxb + 1) { /* skip a bin */ 101 fseek(fp, nleft, SEEK_CUR); 102 fread(&nleft, sizeof(nleft), 1, fp); 103 flag = cnt; 104 } 105 } 106 } 107 if ((u_char *) pos > end - sizeof(TRECHEADER)) 108 return (BUFFEND); 109 fread(pos, sizeof(TRECHEADER), 1, fp); 110 if (end - pos->data < pos->length) { 111 hp = ((u_char *)pos) + sizeof(TRECHEADER); 112 for (i = sizeof(TRECHEADER); i ; i--) 113 ungetc(*--hp, fp); 114 return (BUFFEND); 115 } 116 fread(pos->data, pos->length, 1, fp); 117 nleft -= pos->length + sizeof(TRECHEADER); 118 if (nleft == 0 && binno == fstack[infl0].maxb) 119 fclose(fp); 120 return (0); 121 } 122 123 /* 124 * this is called when there is no special key. It's only called 125 * in the first fsort pass. 126 */ 127 int 128 makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2) 129 int flno, top; 130 struct filelist *filelist; 131 int nfiles; 132 RECHEADER *recbuf; 133 u_char *bufend; 134 struct field *dummy2; 135 { 136 static u_char *obufend; 137 static size_t osz; 138 char *pos; 139 static int filenum = 0, overflow = 0; 140 static FILE *fp = 0; 141 int c; 142 143 pos = (char *) recbuf->data; 144 if (overflow) { 145 /* 146 * Buffer shortage is solved by either of two ways: 147 * o flush previous buffered data and start using the 148 * buffer from start (see fsort()) 149 * o realloc buffer and bump bufend 150 * 151 * The former is preferred, realloc is only done when 152 * there is exactly one item in buffer which does not fit. 153 */ 154 if (bufend == obufend) 155 memmove(pos, bufend - osz, osz); 156 157 pos += osz; 158 overflow = 0; 159 } 160 for (;;) { 161 if (flno >= 0 && (fp = fstack[flno].fp) == NULL) 162 return (EOF); 163 else if (fp == NULL) { 164 if (filenum >= nfiles) 165 return (EOF); 166 if (!(fp = fopen(filelist->names[filenum], "r"))) 167 err(2, "%s", filelist->names[filenum]); 168 filenum++; 169 } 170 while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) { 171 if ((*pos++ = c) == REC_D) { 172 recbuf->offset = 0; 173 recbuf->length = pos - (char *) recbuf->data; 174 return (0); 175 } 176 } 177 if (pos >= (char *)bufend) { 178 if (recbuf->data < bufend) { 179 overflow = 1; 180 obufend = bufend; 181 osz = (pos - (char *) recbuf->data); 182 } 183 return (BUFFEND); 184 } else if (c == EOF) { 185 if (recbuf->data != (u_char *) pos) { 186 *pos++ = REC_D; 187 recbuf->offset = 0; 188 recbuf->length = pos - (char *) recbuf->data; 189 return (0); 190 } 191 FCLOSE(fp); 192 fp = 0; 193 if (flno >= 0) 194 fstack[flno].fp = 0; 195 } else { 196 197 warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data); 198 199 /* Consume the rest of line from input */ 200 while((c = getc(fp)) != REC_D && c != EOF) 201 ; 202 203 recbuf->offset = 0; 204 recbuf->length = 0; 205 206 return (BUFFEND); 207 } 208 } 209 } 210 211 /* 212 * This generates keys. It's only called in the first fsort pass 213 */ 214 int 215 makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl) 216 int flno, top; 217 struct filelist *filelist; 218 int nfiles; 219 RECHEADER *recbuf; 220 u_char *bufend; 221 struct field *ftbl; 222 { 223 static int filenum = 0; 224 static FILE *dbdesc = 0; 225 static DBT dbkey[1], line[1]; 226 static int overflow = 0; 227 int c; 228 229 if (overflow) { 230 overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf, 231 ftbl); 232 if (overflow) 233 return (BUFFEND); 234 else 235 return (0); 236 } 237 238 for (;;) { 239 if (flno >= 0) { 240 if (!(dbdesc = fstack[flno].fp)) 241 return (EOF); 242 } else if (!dbdesc) { 243 if (filenum >= nfiles) 244 return (EOF); 245 dbdesc = fopen(filelist->names[filenum], "r"); 246 if (!dbdesc) 247 err(2, "%s", filelist->names[filenum]); 248 filenum++; 249 } 250 if (!(c = seq(dbdesc, line, dbkey))) { 251 if ((signed)line->size > bufend - recbuf->data) { 252 overflow = 1; 253 } else { 254 overflow = enterkey(recbuf, line, 255 bufend - (u_char *) recbuf, ftbl); 256 } 257 if (overflow) 258 return (BUFFEND); 259 else 260 return (0); 261 } 262 if (c == EOF) { 263 FCLOSE(dbdesc); 264 dbdesc = 0; 265 if (flno >= 0) 266 fstack[flno].fp = 0; 267 } else { 268 ((char *) line->data)[60] = '\000'; 269 warnx("makekey: line too long: ignoring %.100s...", 270 (char *)line->data); 271 } 272 } 273 } 274 275 /* 276 * get a key/line pair from fp 277 */ 278 static int 279 seq(fp, line, key) 280 FILE *fp; 281 DBT *key, *line; 282 { 283 static char *buf, flag = 1; 284 char *end, *pos; 285 int c; 286 287 if (flag) { 288 flag = 0; 289 buf = (char *) linebuf; 290 end = buf + linebuf_size; 291 line->data = buf; 292 } 293 pos = buf; 294 while ((c = getc(fp)) != EOF) { 295 if ((*pos++ = c) == REC_D) { 296 line->size = pos - buf; 297 return (0); 298 } 299 if (pos == end) { 300 linebuf_size *= 2; 301 linebuf = realloc(linebuf, linebuf_size); 302 if (!linebuf) 303 err(2, "realloc of linebuf to %lu bytes failed", 304 (unsigned long)linebuf_size); 305 306 end = linebuf + linebuf_size; 307 pos = linebuf + (pos - buf); 308 line->data = buf = (char *)linebuf; 309 continue; 310 } 311 } 312 if (pos != buf) { 313 *pos++ = REC_D; 314 line->size = pos - buf; 315 return (0); 316 } else 317 return (EOF); 318 } 319 320 /* 321 * write a key/line pair to a temporary file 322 */ 323 void 324 putrec(rec, fp) 325 const RECHEADER *rec; 326 FILE *fp; 327 { 328 EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); 329 } 330 331 /* 332 * write a line to output 333 */ 334 void 335 putline(rec, fp) 336 const RECHEADER *rec; 337 FILE *fp; 338 { 339 EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); 340 } 341 342 /* 343 * get a record from a temporary file. (Used by merge sort.) 344 */ 345 int 346 geteasy(flno, top, filelist, nfiles, rec, end, dummy2) 347 int flno, top; 348 struct filelist *filelist; 349 int nfiles; 350 RECHEADER *rec; 351 u_char *end; 352 struct field *dummy2; 353 { 354 int i; 355 FILE *fp; 356 357 fp = fstack[flno].fp; 358 if ((u_char *) rec > end - sizeof(TRECHEADER)) 359 return (BUFFEND); 360 if (!fread(rec, 1, sizeof(TRECHEADER), fp)) { 361 fclose(fp); 362 fstack[flno].fp = 0; 363 return (EOF); 364 } 365 if (end - rec->data < rec->length) { 366 for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) 367 ungetc(*((char *) rec + i), fp); 368 return (BUFFEND); 369 } 370 fread(rec->data, rec->length, 1, fp); 371 return (0); 372 } 373