xref: /netbsd/usr.bin/sort/fields.c (revision bf9ec67e)
1 /*	$NetBSD: fields.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $	*/
2 
3 /*-
4  * Copyright (c) 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Peter McIlroy.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 /* Subroutines to generate sort keys. */
40 
41 #include "sort.h"
42 
43 #ifndef lint
44 __RCSID("$NetBSD: fields.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $");
45 __SCCSID("@(#)fields.c	8.1 (Berkeley) 6/6/93");
46 #endif /* not lint */
47 
48 #define blancmange(ptr) {					\
49 	if (BLANK & d_mask[*(ptr)])				\
50 		while (BLANK & d_mask[*(++(ptr))]);		\
51 }
52 
53 #define NEXTCOL(pos) {						\
54 	if (!SEP_FLAG)						\
55 		while (BLANK & l_d_mask[*(++pos)]);		\
56 	while (!((FLD_D | REC_D_F) & l_d_mask[*++pos]));	\
57 }
58 
59 static u_char *enterfield __P((u_char *, u_char *, struct field *, int));
60 static u_char *number __P((u_char *, u_char *, u_char *, u_char *, int));
61 
62 extern struct coldesc clist[(ND+1)*2];
63 extern int ncols;
64 
65 #define DECIMAL '.'
66 #define OFFSET 128
67 
68 u_char TENS[10];	/* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
69 u_char NEGTENS[10];	/* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
70 u_char *OFF_TENS, *OFF_NTENS;	/* TENS - '0', NEGTENS - '0' */
71 u_char fnum[NBINS], rnum[NBINS];
72 
73 /*
74  * constructs sort key with leading recheader, followed by the key,
75  * followed by the original line.
76  */
77 length_t
78 enterkey(keybuf, line, size, fieldtable)
79 	RECHEADER *keybuf;	/* pointer to start of key */
80 	DBT *line;
81 	int size;
82 	struct field fieldtable[];
83 {
84 	int i;
85 	u_char *l_d_mask;
86 	u_char *lineend, *pos;
87 	u_char *endkey, *keypos;
88 	struct coldesc *clpos;
89 	int col = 1;
90 	struct field *ftpos;
91 	l_d_mask = d_mask;
92 	pos = (u_char *) line->data - 1;
93 	lineend = (u_char *) line->data + line->size-1;
94 				/* don't include rec_delimiter */
95 
96 	for (i = 0; i < ncols; i++) {
97 		clpos = clist + i;
98 		for (; (col < clpos->num) && (pos < lineend); col++) {
99 			NEXTCOL(pos);
100 		}
101 		if (pos >= lineend)
102 			break;
103 		clpos->start = SEP_FLAG ? pos + 1 : pos;
104 		NEXTCOL(pos);
105 		clpos->end = pos;
106 		col++;
107 		if (pos >= lineend) {
108 			clpos->end = lineend;
109 			i++;
110 			break;
111 		}
112 	}
113 	for (; i <= ncols; i++)
114 		clist[i].start = clist[i].end = lineend;
115 	if (clist[0].start < (u_char *) line->data)
116 		clist[0].start++;
117 
118 	keypos = keybuf->data;
119 	endkey = (u_char *) keybuf + size - line->size;
120 	for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++)
121 		if ((keypos = enterfield(keypos, endkey, ftpos,
122 		    fieldtable->flags)) == NULL)
123 			return (1);
124 
125 	keybuf->offset = keypos - keybuf->data;
126 	keybuf->length = keybuf->offset + line->size;
127 	if (keybuf->length + sizeof(TRECHEADER) > size) {
128 		/* line too long for buffer */
129 		return (1);
130 	}
131 
132 	/*
133 	 * Make [s]radixsort() only sort by relevant part of key if:
134 	 * 1. we want to choose unique items by relevant field[s]
135 	 * 2. we want stable sort and so the items should be sorted only by
136 	 *    the relevant field[s]
137 	 */
138 	if (UNIQUE || (stable_sort && keybuf->offset < line->size))
139 		keypos[-1] = REC_D;
140 
141 	memcpy(keybuf->data + keybuf->offset, line->data, line->size);
142 	return (0);
143 }
144 
145 /*
146  * constructs a field (as defined by -k) within a key
147  */
148 static u_char *
149 enterfield(tablepos, endkey, cur_fld, gflags)
150 	struct field *cur_fld;
151 	u_char *tablepos, *endkey;
152 	int gflags;
153 {
154 	u_char *start, *end, *lineend, *mask, *lweight;
155 	struct column icol, tcol;
156 	u_int flags;
157 	u_int Rflag;
158 
159 	icol = cur_fld->icol;
160 	tcol = cur_fld->tcol;
161 	flags = cur_fld->flags;
162 	start = icol.p->start;
163 	lineend = clist[ncols].end;
164 	if (flags & BI)
165 		blancmange(start);
166 	start += icol.indent;
167 	start = min(start, lineend);
168 
169 	if (!tcol.num)
170 		end = lineend;
171 	else {
172 		if (tcol.indent) {
173 			end = tcol.p->start;
174 			if (flags & BT)
175 				blancmange(end);
176 			end += tcol.indent;
177 			end = min(end, lineend);
178 		} else
179 			end = tcol.p->end;
180 	}
181 
182 	if (flags & N) {
183 		Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
184 		return number(tablepos, endkey, start, end, Rflag);
185 	}
186 
187 	mask = cur_fld->mask;
188 	lweight = cur_fld->weights;
189 	for (; start < end; start++)
190 		if (mask[*start]) {
191 			if (*start <= 1) {
192 				if (tablepos+2 >= endkey)
193 					return (NULL);
194 				*tablepos++ = lweight[1];
195 				*tablepos++ = lweight[*start ? 2 : 1];
196 			} else {
197 				if (tablepos+1 >= endkey)
198 					return (NULL);
199 				*tablepos++ = lweight[*start];
200 			}
201 		}
202 	*tablepos++ = lweight[0];
203 	return (tablepos == endkey ? NULL : tablepos);
204 }
205 
206 /* Uses the first bin to assign sign, expsign, 0, and the first
207  * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
208  *   When sorting in forward order:
209  * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
210  * else use (0-99)->(2-102).
211  * If the exponent is >=61, use another byte for each additional 253
212  * in the exponent. Cutoff is at 567.
213  * To avoid confusing the exponent and the mantissa, use a field delimiter
214  * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
215  * only time a field delimiter can come in that position.
216  * Reverse order is done analagously.
217  */
218 
219 static u_char *
220 number(pos, bufend, line, lineend, Rflag)
221 	u_char *line, *pos, *bufend, *lineend;
222 	int Rflag;
223 {
224 	int or_sign, parity = 0;
225 	int expincr = 1, exponent = -1;
226 	int bite, expsign = 1, sign = 1;
227 	u_char lastvalue, *nonzero, *tline, *C_TENS;
228 	u_char *nweights;
229 
230 	if (Rflag)
231 		nweights = rnum;
232 	else
233 		nweights = fnum;
234 	if (pos > bufend - 8)
235 		return (NULL);
236 	/*
237 	 * or_sign sets the sort direction:
238 	 *	(-r: +/-)(sign: +/-)(expsign: +/-)
239 	 */
240 	or_sign = sign ^ expsign ^ Rflag;
241 	blancmange(line);
242 	if (*line == '-') {	/* set the sign */
243 		or_sign ^= 1;
244 		sign = 0;
245 		line++;
246 	}
247 	/* eat initial zeroes */
248 	for (; *line == '0' && line < lineend; line++)
249 		;
250 	/* calculate exponents < 0 */
251 	if (*line == DECIMAL) {
252 		exponent = 1;
253 		while (*++line == '0' && line < lineend)
254 			exponent++;
255 		expincr = 0;
256 		expsign = 0;
257 	}
258 	/* next character better be a digit */
259 	if (*line < '1' || *line > '9' || line >= lineend) {
260 		*pos++ = nweights[127];
261 		return (pos);
262 	}
263 	if (expincr) {
264 		for (tline = line-1; *++tline >= '0' &&
265 		    *tline <= '9' && tline < lineend;)
266 			exponent++;
267 	}
268 	if (exponent > 567) {
269 		*pos++ = nweights[sign ? (expsign ? 254 : 128)
270 					: (expsign ? 0 : 126)];
271 		warnx("exponent out of bounds");
272 		return (pos);
273 	}
274 	bite = min(exponent, 61);
275 	*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
276 				: (expsign ? 64-bite : 64+bite)];
277 	if (bite >= 61) {
278 		do {
279 			exponent -= bite;
280 			bite = min(exponent, 254);
281 			*pos++ = nweights[or_sign ? 254-bite : bite];
282 		} while (bite == 254);
283 	}
284 	C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
285 	for (; line < lineend; line++) {
286 		if (*line >= '0' && *line <= '9') {
287 			if (parity) {
288 				*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
289 						: *line);
290 				if (pos == bufend)
291 					return (NULL);
292 				if (*line != '0' || lastvalue != '0')
293 					nonzero = pos;
294 			} else
295 				lastvalue = *line;
296 			parity ^= 1;
297 		} else if(*line == DECIMAL) {
298 			if(!expincr)	/* a decimal already occurred once */
299 				break;
300 			expincr = 0;
301 		} else
302 			break;
303 	}
304 	if (parity && lastvalue != '0') {
305 		*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
306 					OFF_TENS[lastvalue] + '0';
307 	} else
308 		pos = nonzero;
309 	if (pos > bufend-1)
310 		return (NULL);
311 	*pos++ = or_sign ? nweights[254] : nweights[0];
312 	return (pos);
313 }
314 
315 /* This forces a gap around the record delimiter
316  * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
317  * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
318  */
319 void
320 num_init()
321 {
322 	int i;
323 	TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
324 	NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
325 	OFF_TENS = TENS - '0';
326 	OFF_NTENS = NEGTENS - '0';
327 	for (i = 1; i < 10; i++) {
328 		TENS[i] = TENS[i - 1] + 10;
329 		NEGTENS[i] = NEGTENS[i - 1] - 10;
330 	}
331 	for (i = 0; i < REC_D; i++) {
332 		fnum[i] = i;
333 		rnum[255 - i] = i;
334 	}
335 	for (i = REC_D; i <255; i++) {
336 		fnum[i] = i + 1;
337 		rnum[255 - i] = i - 1;
338 	}
339 }
340