xref: /minix/usr.bin/sort/files.c (revision 0a6a1f1d)
1 /*	$NetBSD: files.c,v 1.42 2015/08/05 07:10:03 mrg Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Peter McIlroy.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 #include "sort.h"
65 #include "fsort.h"
66 
67 __RCSID("$NetBSD: files.c,v 1.42 2015/08/05 07:10:03 mrg Exp $");
68 
69 #include <string.h>
70 
71 /* Align records in temporary files to avoid misaligned copies */
72 #define REC_ROUNDUP(n) (((n) + sizeof (long) - 1) & ~(sizeof (long) - 1))
73 
74 static ssize_t	seq(FILE *, u_char **);
75 
76 /*
77  * this is called when there is no special key. It's only called
78  * in the first fsort pass.
79  */
80 
81 static u_char *opos;
82 static size_t osz;
83 
84 void
makeline_copydown(RECHEADER * recbuf)85 makeline_copydown(RECHEADER *recbuf)
86 {
87 	memmove(recbuf->data, opos, osz);
88 }
89 
90 int
makeline(FILE * fp,RECHEADER * recbuf,u_char * bufend,struct field * dummy2)91 makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
92 {
93 	u_char *pos;
94 	int c;
95 
96 	pos = recbuf->data;
97 	if (osz != 0) {
98 		/*
99 		 * Buffer shortage is solved by either of two ways:
100 		 * o flush previous buffered data and start using the
101 		 *   buffer from start.
102 		 *   makeline_copydown() above must be called.
103 		 * o realloc buffer
104 		 *
105 		 * This code has relied on realloc changing 'bufend',
106 		 * but that isn't necessarily true.
107 		 */
108 		pos += osz;
109 		osz = 0;
110 	}
111 
112 	while (pos < bufend) {
113 		c = getc(fp);
114 		if (c == EOF) {
115 			if (pos == recbuf->data) {
116 				FCLOSE(fp);
117 				return EOF;
118 			}
119 			/* Add terminator to partial line */
120 			c = REC_D;
121 		}
122 		*pos++ = c;
123 		if (c == REC_D) {
124 			recbuf->offset = 0;
125 			recbuf->length = pos - recbuf->data;
126 			recbuf->keylen = recbuf->length - 1;
127 			return (0);
128 		}
129 	}
130 
131 	/* Ran out of buffer space... */
132 	if (recbuf->data < bufend) {
133 		/* Remember where the partial record is */
134 		osz = pos - recbuf->data;
135 		opos = recbuf->data;
136 	}
137 	return (BUFFEND);
138 }
139 
140 /*
141  * This generates keys. It's only called in the first fsort pass
142  */
143 int
makekey(FILE * fp,RECHEADER * recbuf,u_char * bufend,struct field * ftbl)144 makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
145 {
146 	static u_char *line_data;
147 	static ssize_t line_size;
148 	static int overflow = 0;
149 
150 	/* We get re-entered after returning BUFFEND - save old data */
151 	if (overflow) {
152 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
153 		return overflow ? BUFFEND : 0;
154 	}
155 
156 	line_size = seq(fp, &line_data);
157 	if (line_size == 0) {
158 		FCLOSE(fp);
159 		return EOF;
160 	}
161 
162 	if (line_size > bufend - recbuf->data) {
163 		overflow = 1;
164 	} else {
165 		overflow = enterkey(recbuf, bufend, line_data, line_size, ftbl);
166 	}
167 	return overflow ? BUFFEND : 0;
168 }
169 
170 /*
171  * get a line of input from fp
172  */
173 static ssize_t
seq(FILE * fp,u_char ** line)174 seq(FILE *fp, u_char **line)
175 {
176 	static u_char *buf;
177 	static size_t buf_size = DEFLLEN;
178 	u_char *end, *pos;
179 	int c;
180 	u_char *new_buf;
181 
182 	if (!buf) {
183 		/* one-time initialization */
184 		buf = malloc(buf_size);
185 		if (!buf)
186 		    err(2, "malloc of linebuf for %zu bytes failed",
187 			    buf_size);
188 	}
189 
190 	end = buf + buf_size;
191 	pos = buf;
192 	while ((c = getc(fp)) != EOF) {
193 		*pos++ = c;
194 		if (c == REC_D) {
195 			*line = buf;
196 			return pos - buf;
197 		}
198 		if (pos == end) {
199 			/* Long line - double size of buffer */
200 			/* XXX: Check here for stupidly long lines */
201 			buf_size *= 2;
202 			new_buf = realloc(buf, buf_size);
203 			if (!new_buf)
204 				err(2, "realloc of linebuf to %zu bytes failed",
205 					buf_size);
206 
207 			end = new_buf + buf_size;
208 			pos = new_buf + (pos - buf);
209 			buf = new_buf;
210 		}
211 	}
212 
213 	if (pos != buf) {
214 		/* EOF part way through line - add line terminator */
215 		*pos++ = REC_D;
216 		*line = buf;
217 		return pos - buf;
218 	}
219 
220 	return 0;
221 }
222 
223 /*
224  * write a key/line pair to a temporary file
225  */
226 void
putrec(const RECHEADER * rec,FILE * fp)227 putrec(const RECHEADER *rec, FILE *fp)
228 {
229 	EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length), fp,
230 	       "failed to write temp file");
231 }
232 
233 /*
234  * write a line to output
235  */
236 void
putline(const RECHEADER * rec,FILE * fp)237 putline(const RECHEADER *rec, FILE *fp)
238 {
239 	EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp,
240 	       "failed to write");
241 }
242 
243 /*
244  * write dump of key to output (for -Dk)
245  */
246 void
putkeydump(const RECHEADER * rec,FILE * fp)247 putkeydump(const RECHEADER *rec, FILE *fp)
248 {
249 	EWRITE(rec, 1, REC_ROUNDUP(offsetof(RECHEADER, data) + rec->offset), fp,
250 	       "failed to write debug key");
251 }
252 
253 /*
254  * get a record from a temporary file. (Used by merge sort.)
255  */
256 int
geteasy(FILE * fp,RECHEADER * rec,u_char * end,struct field * dummy2)257 geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2)
258 {
259 	length_t file_len;
260 	int i;
261 
262 	(void)sizeof (char[offsetof(RECHEADER, length) == 0 ? 1 : -1]);
263 
264 	if ((u_char *)(rec + 1) > end)
265 		return (BUFFEND);
266 	if (!fread(&rec->length, 1, sizeof rec->length, fp)) {
267 		fclose(fp);
268 		return (EOF);
269 	}
270 	file_len = REC_ROUNDUP(offsetof(RECHEADER, data) + rec->length);
271 	if (end - rec->data < (ptrdiff_t)file_len) {
272 		for (i = sizeof rec->length - 1; i >= 0;  i--)
273 			ungetc(*((char *) rec + i), fp);
274 		return (BUFFEND);
275 	}
276 
277 	fread(&rec->length + 1, file_len - sizeof rec->length, 1, fp);
278 	return (0);
279 }
280