xref: /original-bsd/contrib/sort/files.c (revision c3e32dec)
1 /*-
2  * Copyright (c) 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Peter McIlroy.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #ifndef lint
12 static char sccsid[] = "@(#)files.c	8.1 (Berkeley) 06/06/93";
13 #endif /* not lint */
14 
15 #include "sort.h"
16 #include "fsort.h"
17 
18 #include <string.h>
19 
20 /*
21  * this is the subroutine for file management for fsort().
22  * It keeps the buffers for all temporary files.
23  */
24 int
25 getnext(binno, infl0, nfiles, pos, end, dummy)
26 	int binno, nfiles;
27 	union f_handle infl0;
28 	register struct recheader *pos;
29 	register u_char *end;
30 	struct field *dummy;
31 {
32 	register int i;
33 	register u_char *hp;
34 	static long nleft = 0;
35 	static int cnt = 0, flag = -1;
36 	static u_char maxb = 0;
37 	static FILE *fd;
38 
39 	if (nleft == 0) {
40 		if (binno < 0)	/* reset files. */ {
41 			for (i = 0; i < nfiles; i++) {
42 				rewind(fstack[infl0.top + i].fd);
43 				fstack[infl0.top + i].max_o = 0;
44 			}
45 			flag = -1;
46 			nleft = cnt = 0;
47 			return(-1);
48 		}
49 		maxb = fstack[infl0.top].maxb;
50 		for (; nleft == 0; cnt++) {
51 			if (cnt >= nfiles) {
52 				cnt = 0;
53 				return (EOF);
54 			}
55 			fd = fstack[infl0.top + cnt].fd;
56 			hp = (u_char *) &nleft;
57 			for (i = sizeof(TRECHEADER); i; --i)
58 				*hp++ = getc(fd);
59 			if (binno < maxb)
60 				fstack[infl0.top+cnt].max_o
61 					+= sizeof(nleft) + nleft;
62 			else if (binno == maxb) {
63 				if (binno != fstack[infl0.top].lastb) {
64 					fseek(fd, fstack[infl0.top+
65 						cnt].max_o, SEEK_SET);
66 					fread(&nleft, sizeof(nleft), 1, fd);
67 				}
68 				if (nleft == 0)
69 					fclose(fd);
70 			} else if (binno == maxb + 1) {		/* skip a bin */
71 				fseek(fd, nleft, SEEK_CUR);
72 				fread(&nleft, sizeof(nleft), 1, fd);
73 				flag = cnt;
74 			}
75 		}
76 	}
77 	if ((u_char *) pos > end - sizeof(TRECHEADER))
78 		return (BUFFEND);
79 	hp = (u_char *) pos;
80 	for (i = sizeof(TRECHEADER); i ; --i)
81 		*hp++ = (u_char) getc(fd);
82 	if (end - pos->data < pos->length) {
83 		for (i = sizeof(TRECHEADER); i ;  i--)
84 			ungetc(*--hp, fd);
85 		return (BUFFEND);
86 	}
87 	fread(pos->data, pos->length, 1, fd);
88 	nleft -= pos->length + sizeof(TRECHEADER);
89 	if (nleft == 0 && binno == fstack[infl0.top].maxb)
90 		fclose(fd);
91 	return (0);
92 }
93 
94 /*
95  * this is called when there is no special key. It's only called
96  * in the first fsort pass.
97  */
98 int
99 makeline(flno, filelist, nfiles, buffer, bufend, dummy2)
100 	int flno, nfiles;
101 	union f_handle filelist;
102 	struct recheader *buffer;
103 	u_char *bufend;
104 	struct field *dummy2;
105 {
106 	static char *opos;
107 	register char *end, *pos;
108 	static int fileno = 0, overflow = 0;
109 	static FILE *fd = 0;
110 	register int c;
111 
112 	pos = (char *) buffer->data;
113 	end = min((char *) bufend, pos + MAXLLEN);
114 	if (overflow) {
115 		memmove(pos, opos, bufend - (u_char *) opos);
116 		pos += ((char *) bufend - opos);
117 		overflow = 0;
118 	}
119 	for (;;) {
120 		if (flno >= 0) {
121 			if (!(fd = fstack[flno].fd))
122 				return (EOF);
123 		} else if (!fd) {
124 			if (fileno  >= nfiles) return(EOF);
125 			if (!(fd = fopen(filelist.names[fileno], "r")))
126 				err(2, "%s", filelist.names[fileno]);
127 			++fileno;
128 		}
129 		while ((pos < end) && ((c = getc(fd)) != EOF)) {
130 			if ((*pos++ = c) == REC_D) {
131 				buffer->offset = 0;
132 				buffer->length = pos - (char *) buffer->data;
133 				return (0);
134 			}
135 		}
136 		if (pos >= end && end == (char *) bufend) {
137 			if ((char *) buffer->data < end) {
138 				overflow = 1;
139 				opos = (char *) buffer->data;
140 			}
141 			return (BUFFEND);
142 		} else if (c == EOF) {
143 			if (buffer->data != (u_char *) pos) {
144 				warnx("last character not record delimiter");
145 				*pos++ = REC_D;
146 				buffer->offset = 0;
147 				buffer->length = pos - (char *) buffer->data;
148 				return(0);
149 			}
150 			FCLOSE(fd);
151 			fd = 0;
152 			if(flno >= 0) fstack[flno].fd = 0;
153 		} else {
154 			buffer->data[100] = '\000';
155 			warnx("line too long:ignoring %s...", buffer->data);
156 		}
157 	}
158 }
159 
160 /*
161  * This generates keys. It's only called in the first fsort pass
162  */
163 int
164 makekey(flno, filelist, nfiles, buffer, bufend, ftbl)
165 	int flno, nfiles;
166 	union f_handle filelist;
167 	struct recheader *buffer;
168 	u_char *bufend;
169 	struct field *ftbl;
170 {
171 	static int (*get)();
172 	static int fileno = 0;
173 	static FILE *dbdesc = 0;
174 	static DBT dbkey[1], line[1];
175 	static int overflow = 0;
176 	int c;
177 	if (overflow) {
178 		overflow = 0;
179 		enterkey(buffer, line, bufend - (u_char *) buffer, ftbl);
180 		return (0);
181 	}
182 	for (;;) {
183 		if (flno >= 0) {
184 			get = seq;
185 			if (!(dbdesc = fstack[flno].fd))
186 				return(EOF);
187 		} else if (!dbdesc) {
188 			if (fileno  >= nfiles)
189 				return (EOF);
190 			dbdesc = fopen(filelist.names[fileno], "r");
191 			if (!dbdesc)
192 				err(2, "%s", filelist.names[fileno]);
193 			++fileno;
194 			get = seq;
195 		}
196 		if (!(c = get(dbdesc, line, dbkey))) {
197 			if ((signed)line->size > bufend - buffer->data)
198 				overflow = 1;
199 			else
200 				overflow = enterkey(buffer, line,
201 				    bufend - (u_char *) buffer, ftbl);
202 			if (overflow)
203 				return (BUFFEND);
204 			else
205 				return (0);
206 		}
207 		if (c == EOF) {
208 			FCLOSE(dbdesc);
209 			dbdesc = 0;
210 			if (flno >= 0) fstack[flno].fd = 0;
211 		} else {
212 
213 			((char *) line->data)[60] = '\000';
214 			warnx("line too long: ignoring %.100s...",
215 			    (char *)line->data);
216 		}
217 
218 	}
219 }
220 
221 /*
222  * get a key/line pair from fd
223  */
224 int
225 seq(fd, line, key)
226 	FILE *fd;
227 	DBT *key, *line;
228 {
229 	static char *buf, flag = 1;
230 	register char *end, *pos;
231 	register int c;
232 	if (flag) {
233 		flag = 0;
234 		buf = (char *) linebuf;
235 		end = buf + MAXLLEN;
236 		line->data = buf;
237 	}
238 	pos = buf;
239 	while ((c = getc(fd)) != EOF) {
240 		if ((*pos++ = c) == REC_D) {
241 			line->size = pos - buf;
242 			return (0);
243 		}
244 		if (pos == end) {
245 			line->size = MAXLLEN;
246 			*--pos = REC_D;
247 			while ((c = getc(fd)) != EOF) {
248 				if (c == REC_D)
249 					return (BUFFEND);
250 			}
251 		}
252 	}
253 	if (pos != buf) {
254 		warnx("last character not record delimiter");
255 		*pos++ = REC_D;
256 		line->size = pos - buf;
257 		return (0);
258 	} else
259 		return (EOF);
260 }
261 
262 /*
263  * write a key/line pair to a temporary file
264  */
265 void
266 putrec(rec, fd)
267 	register struct recheader *rec;
268 	register FILE *fd;
269 {
270 	EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fd);
271 }
272 
273 /*
274  * write a line to output
275  */
276 void
277 putline(rec, fd)
278 	register struct recheader *rec;
279 	register FILE *fd;
280 {
281 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fd);
282 }
283 
284 /*
285  * get a record from a temporary file. (Used by merge sort.)
286  */
287 int
288 geteasy(flno, filelist, nfiles, rec, end, dummy2)
289 	int flno, nfiles;
290 	union f_handle filelist;
291 	register struct recheader *rec;
292 	register u_char *end;
293 	struct field *dummy2;
294 {
295 	int i;
296 	FILE *fd;
297 	fd = fstack[flno].fd;
298 	if ((u_char *) rec > end - sizeof(TRECHEADER))
299 		return (BUFFEND);
300 	if (!fread(rec, 1, sizeof(TRECHEADER), fd)) {
301 		fclose(fd);
302 		fstack[flno].fd = 0;
303 		return (EOF);
304 	}
305 	if (end - rec->data < rec->length) {
306 		for (i = sizeof(TRECHEADER) - 1; i >= 0;  i--)
307 			ungetc(*((char *) rec + i), fd);
308 		return (BUFFEND);
309 	}
310 	fread(rec->data, rec->length, 1, fd);
311 	return (0);
312 }
313