xref: /netbsd/usr.bin/sort/init.c (revision 87c48f11)
1 /*	$NetBSD: init.c,v 1.11 2004/02/15 14:19:22 jdolecek Exp $	*/
2 
3 /*-
4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Ben Harris and Jaromir Dolecek.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*-
40  * Copyright (c) 1993
41  *	The Regents of the University of California.  All rights reserved.
42  *
43  * This code is derived from software contributed to Berkeley by
44  * Peter McIlroy.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  * 3. Neither the name of the University nor the names of its contributors
55  *    may be used to endorse or promote products derived from this software
56  *    without specific prior written permission.
57  *
58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  */
70 
71 #include "sort.h"
72 
73 #ifndef lint
74 __RCSID("$NetBSD: init.c,v 1.11 2004/02/15 14:19:22 jdolecek Exp $");
75 __SCCSID("@(#)init.c	8.1 (Berkeley) 6/6/93");
76 #endif /* not lint */
77 
78 #include <ctype.h>
79 #include <string.h>
80 
81 static void insertcol __P((struct field *));
82 static const char *setcolumn __P((const char *, struct field *, int));
83 
84 u_char gweights[NBINS];
85 
86 /*
87  * masks of ignored characters.  Alltable is 256 ones.
88  */
89 static u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
90 
91 /*
92  * clist (list of columns which correspond to one or more icol or tcol)
93  * is in increasing order of columns.
94  * Fields are kept in increasing order of fields.
95  */
96 
97 /*
98  * keep clist in order--inserts a column in a sorted array
99  */
100 static void
101 insertcol(field)
102 	struct field *field;
103 {
104 	int i;
105 	for (i = 0; i < ncols; i++)
106 		if (field->icol.num <= clist[i].num)
107 			break;
108 	if (field->icol.num != clist[i].num) {
109 		memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
110 		clist[i].num = field->icol.num;
111 		ncols++;
112 	}
113 	if (field->tcol.num && field->tcol.num != field->icol.num) {
114 		for (i = 0; i < ncols; i++)
115 			if (field->tcol.num <= clist[i].num)
116 				break;
117 		if (field->tcol.num != clist[i].num) {
118 			memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
119 			clist[i].num = field->tcol.num;
120 			ncols++;
121 		}
122 	}
123 }
124 
125 /*
126  * matches fields with the appropriate columns--n^2 but who cares?
127  */
128 void
129 fldreset(fldtab)
130 	struct field *fldtab;
131 {
132 	int i;
133 	fldtab[0].tcol.p = clist+ncols-1;
134 	for (++fldtab; fldtab->icol.num; ++fldtab) {
135 		for (i = 0; fldtab->icol.num != clist[i].num; i++)
136 			;
137 		fldtab->icol.p = clist + i;
138 		if (!fldtab->tcol.num)
139 			continue;
140 		for (i = 0; fldtab->tcol.num != clist[i].num; i++)
141 			;
142 		fldtab->tcol.p = clist + i;
143 	}
144 }
145 
146 /*
147  * interprets a column in a -k field
148  */
149 static const char *
150 setcolumn(pos, cur_fld, gflag)
151 	const char *pos;
152 	struct field *cur_fld;
153 	int gflag;
154 {
155 	struct column *col;
156 	char *npos;
157 	int tmp;
158 	col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
159 	col->num = (int) strtol(pos, &npos, 10);
160 	pos = npos;
161 	if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
162 		errx(2, "field numbers must be positive");
163 	if (*pos == '.') {
164 		if (!col->num)
165 			errx(2, "cannot indent end of line");
166 		++pos;
167 		col->indent = (int) strtol(pos, &npos, 10);
168 		pos = npos;
169 		if (&cur_fld->icol == col)
170 			col->indent--;
171 		if (col->indent < 0)
172 			errx(2, "illegal offset");
173 	}
174 	for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
175 		cur_fld->flags |= tmp;
176 	if (cur_fld->icol.num == 0)
177 		cur_fld->icol.num = 1;
178 	return (pos);
179 }
180 
181 int
182 setfield(pos, cur_fld, gflag)
183 	const char *pos;
184 	struct field *cur_fld;
185 	int gflag;
186 {
187 	static int nfields = 0;
188 	int tmp;
189 
190 	if (++nfields == ND)
191 		errx(2, "too many sort keys. (Limit is %d)", ND-1);
192 
193 	cur_fld->weights = ascii;
194 	cur_fld->mask = alltable;
195 
196 	pos = setcolumn(pos, cur_fld, gflag);
197 	if (*pos == '\0')			/* key extends to EOL. */
198 		cur_fld->tcol.num = 0;
199 	else {
200 		if (*pos != ',')
201 			errx(2, "illegal field descriptor");
202 		setcolumn((++pos), cur_fld, gflag);
203 	}
204 	if (!cur_fld->flags)
205 		cur_fld->flags = gflag;
206 	tmp = cur_fld->flags;
207 
208 	/*
209 	 * Assign appropriate mask table and weight table.
210 	 * If the global weights are reversed, the local field
211 	 * must be "re-reversed".
212 	 */
213 	if (((tmp & R) ^ (gflag & R)) && (tmp & F))
214 		cur_fld->weights = RFtable;
215 	else if (tmp & F)
216 		cur_fld->weights = Ftable;
217 	else if ((tmp & R) ^ (gflag & R))
218 		cur_fld->weights = Rascii;
219 
220 	if (tmp & I)
221 		cur_fld->mask = itable;
222 	else if (tmp & D)
223 		cur_fld->mask = dtable;
224 
225 	cur_fld->flags |= (gflag & (BI | BT));
226 	if (!cur_fld->tcol.indent)	/* BT has no meaning at end of field */
227 		cur_fld->flags &= ~BT;
228 
229 	if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
230 	    && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
231 	    && cur_fld->tcol.indent < cur_fld->icol.indent))
232 		errx(2, "fields out of order");
233 	insertcol(cur_fld);
234 	return (cur_fld->tcol.num);
235 }
236 
237 int
238 optval(desc, tcolflag)
239 	int desc, tcolflag;
240 {
241 	switch(desc) {
242 		case 'b':
243 			if (!tcolflag)
244 				return (BI);
245 			else
246 				return (BT);
247 		case 'd': return (D);
248 		case 'f': return (F);
249 		case 'i': return (I);
250 		case 'n': return (N);
251 		case 'r': return (R);
252 		default:  return (0);
253 	}
254 }
255 
256 /*
257  * Replace historic +SPEC arguments with appropriate -kSPEC.
258  */
259 void
260 fixit(argc, argv)
261 	int *argc;
262 	char **argv;
263 {
264 	int i, j, fplus=0;
265 	char *vpos, *tpos, spec[20];
266 	int col, indent;
267 	size_t sz;
268 
269 	for (i = 1; i < *argc; i++) {
270 		if (argv[i][0] != '+' && !fplus)
271 			continue;
272 
273 		if (fplus && (argv[i][0] != '-' || !isdigit(argv[i][1]))) {
274 			/* not a -POS argument, skip */
275 			fplus = 0;
276 			continue;
277 		}
278 
279 		/* parse spec */
280 		tpos = argv[i]+1;
281 		col = (int)strtol(tpos, &tpos, 10);
282 		if (*tpos == '.') {
283 			++tpos;
284 			indent = (int) strtol(tpos, &tpos, 10);
285 		} else
286 			indent = 0;
287 		/* tpos points to optional flags now */
288 
289 		/*
290 		 * For x.y, the obsolescent variant assumed 0 == beginning
291 		 * of line, while the new form uses 0 == end of line.
292 		 * Convert accordingly.
293 		 */
294 		if (!fplus) {
295 			/* +POS */
296 			col += 1;
297 			indent += 1;
298 		} else {
299 			/* -POS */
300 			if (indent > 0)
301 				col += 1;
302 		}
303 
304 		/* new style spec */
305 		sz = snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
306 		    tpos);
307 
308 		if (!fplus) {
309 			/* Replace the +POS argument with new-style -kSPEC */
310 			asprintf(&vpos, "-k%s", spec);
311 			argv[i] = vpos;
312 			fplus = 1;
313 		} else {
314 			/*
315 			 * Append the spec to one previously generated from
316 			 * +POS argument, and remove the argv element.
317 			 */
318 			asprintf(&vpos, "%s,%s", argv[i-1], spec);
319 			free(argv[i-1]);
320 			argv[i-1] = vpos;
321 			for (j=i; j < *argc; j++)
322 				argv[j] = argv[j+1];
323 			*argc -= 1;
324 		}
325 	}
326 }
327 
328 /*
329  * ascii, Rascii, Ftable, and RFtable map
330  * REC_D -> REC_D;  {not REC_D} -> {not REC_D}.
331  * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
332  * Note: when sorting in forward order, to encode character zero in a key,
333  * use \001\001; character 1 becomes \001\002.  In this case, character 0
334  * is reserved for the field delimiter.  Analagously for -r (fld_d = 255).
335  * Note: this is only good for ASCII sorting.  For different LC 's,
336  * all bets are off.  See also num_init in number.c
337  */
338 void
339 settables(gflags)
340 	int gflags;
341 {
342 	u_char *wts;
343 	int i, incr;
344 	for (i=0; i < 256; i++) {
345 		ascii[i] = i;
346 		if (i > REC_D && i < 255 - REC_D+1)
347 			Rascii[i] = 255 - i + 1;
348 		else
349 			Rascii[i] = 255 - i;
350 		if (islower(i)) {
351 			Ftable[i] = Ftable[toupper(i)];
352 			RFtable[i] = RFtable[toupper(i)];
353 		} else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
354 			Ftable[i] = i + 1;
355 			RFtable[i] = Rascii[i] - 1;
356 		} else {
357 			Ftable[i] = i;
358 			RFtable[i] = Rascii[i];
359 		}
360 		alltable[i] = 1;
361 
362 		if (i == '\n' || isprint(i))
363 			itable[i] = 1;
364 		else
365 			itable[i] = 0;
366 
367 		if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
368 			dtable[i] = 1;
369 		else
370 			dtable[i] = 0;
371 	}
372 
373 	Rascii[REC_D] = RFtable[REC_D] = REC_D;
374 	if (isupper(REC_D))
375 		Ftable[tolower(REC_D)]++;
376 
377 	if ((gflags & R) && !((gflags & F) && SINGL_FLD))
378 		wts = Rascii;
379 	else if (!((gflags & F) && SINGL_FLD))
380 		wts = ascii;
381 	else if (gflags & R)
382 		wts = RFtable;
383 	else
384 		wts = Ftable;
385 
386 	memmove(gweights, wts, sizeof(gweights));
387 	incr = (gflags & R) ? -1 : 1;
388 	for (i = 0; i < REC_D; i++)
389 		gweights[i] += incr;
390 	gweights[REC_D] = ((gflags & R) ? 255 : 0);
391 	if (SINGL_FLD && (gflags & F)) {
392 		for (i = 0; i < REC_D; i++) {
393 			ascii[i] += incr;
394 			Rascii[i] += incr;
395 		}
396 		ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
397 	}
398 }
399