1 /* $NetBSD: init.c,v 1.30 2021/09/19 11:37:01 andvar Exp $ */
2
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * This code is derived from software contributed to Berkeley by
37 * Peter McIlroy.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 */
63
64 #include "sort.h"
65
66 __RCSID("$NetBSD: init.c,v 1.30 2021/09/19 11:37:01 andvar Exp $");
67
68 #include <ctype.h>
69 #include <string.h>
70
71 static void insertcol(struct field *);
72 static const char *setcolumn(const char *, struct field *);
73
74 /*
75 * masks of ignored characters.
76 */
77 static u_char dtable[NBINS], itable[NBINS];
78
79 /*
80 * parsed key options
81 */
82 struct coldesc *clist = NULL;
83 int ncols = 0;
84
85 /*
86 * clist (list of columns which correspond to one or more icol or tcol)
87 * is in increasing order of columns.
88 * Fields are kept in increasing order of fields.
89 */
90
91 /*
92 * keep clist in order--inserts a column in a sorted array
93 */
94 static void
insertcol(struct field * field)95 insertcol(struct field *field)
96 {
97 int i;
98 struct coldesc *p;
99
100 /* Make space for new item */
101 p = realloc(clist, (ncols + 2) * sizeof(*clist));
102 if (!p)
103 err(1, "realloc");
104 clist = p;
105 memset(&clist[ncols], 0, sizeof(clist[ncols]));
106
107 for (i = 0; i < ncols; i++)
108 if (field->icol.num <= clist[i].num)
109 break;
110 if (field->icol.num != clist[i].num) {
111 memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
112 clist[i].num = field->icol.num;
113 ncols++;
114 }
115 if (field->tcol.num && field->tcol.num != field->icol.num) {
116 for (i = 0; i < ncols; i++)
117 if (field->tcol.num <= clist[i].num)
118 break;
119 if (field->tcol.num != clist[i].num) {
120 memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
121 clist[i].num = field->tcol.num;
122 ncols++;
123 }
124 }
125 }
126
127 /*
128 * matches fields with the appropriate columns--n^2 but who cares?
129 */
130 void
fldreset(struct field * fldtab)131 fldreset(struct field *fldtab)
132 {
133 int i;
134
135 fldtab[0].tcol.p = clist + ncols - 1;
136 for (++fldtab; fldtab->icol.num; ++fldtab) {
137 for (i = 0; fldtab->icol.num != clist[i].num; i++)
138 ;
139 fldtab->icol.p = clist + i;
140 if (!fldtab->tcol.num)
141 continue;
142 for (i = 0; fldtab->tcol.num != clist[i].num; i++)
143 ;
144 fldtab->tcol.p = clist + i;
145 }
146 }
147
148 /*
149 * interprets a column in a -k field
150 */
151 static const char *
setcolumn(const char * pos,struct field * cur_fld)152 setcolumn(const char *pos, struct field *cur_fld)
153 {
154 struct column *col;
155 char *npos;
156 int tmp;
157 col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
158 col->num = (int) strtol(pos, &npos, 10);
159 pos = npos;
160 if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
161 errx(2, "field numbers must be positive");
162 if (*pos == '.') {
163 if (!col->num)
164 errx(2, "cannot indent end of line");
165 ++pos;
166 col->indent = (int) strtol(pos, &npos, 10);
167 pos = npos;
168 if (&cur_fld->icol == col)
169 col->indent--;
170 if (col->indent < 0)
171 errx(2, "illegal offset");
172 }
173 for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
174 cur_fld->flags |= tmp;
175 if (cur_fld->icol.num == 0)
176 cur_fld->icol.num = 1;
177 return (pos);
178 }
179
180 int
setfield(const char * pos,struct field * cur_fld,int gflag)181 setfield(const char *pos, struct field *cur_fld, int gflag)
182 {
183 cur_fld->mask = NULL;
184
185 pos = setcolumn(pos, cur_fld);
186 if (*pos == '\0') /* key extends to EOL. */
187 cur_fld->tcol.num = 0;
188 else {
189 if (*pos != ',')
190 errx(2, "illegal field descriptor");
191 setcolumn((++pos), cur_fld);
192 }
193 if (!cur_fld->flags)
194 cur_fld->flags = gflag;
195 if (REVERSE)
196 /* A local 'r' doesn't invert the global one */
197 cur_fld->flags &= ~R;
198
199 /* Assign appropriate mask table and weight table. */
200 cur_fld->weights = weight_tables[cur_fld->flags & (R | F)];
201 if (cur_fld->flags & I)
202 cur_fld->mask = itable;
203 else if (cur_fld->flags & D)
204 cur_fld->mask = dtable;
205
206 cur_fld->flags |= (gflag & (BI | BT));
207 if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
208 cur_fld->flags &= ~BT;
209
210 if (cur_fld->tcol.num
211 && !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
212 && (cur_fld->tcol.num <= cur_fld->icol.num
213 /* indent if 0 -> end of field, i.e. okay */
214 && cur_fld->tcol.indent != 0
215 && cur_fld->tcol.indent < cur_fld->icol.indent))
216 errx(2, "fields out of order");
217
218 insertcol(cur_fld);
219 return (cur_fld->tcol.num);
220 }
221
222 int
optval(int desc,int tcolflag)223 optval(int desc, int tcolflag)
224 {
225 switch(desc) {
226 case 'b':
227 if (!tcolflag)
228 return BI;
229 else
230 return BT;
231 case 'd': return D;
232 case 'f': return F;
233 case 'i': return I;
234 case 'l': return L;
235 case 'n': return N;
236 case 'r': return R;
237 default: return 0;
238 }
239 }
240
241 /*
242 * Return true if the options found in ARG, according to the getopt
243 * spec in OPTS, require an additional argv word as an option
244 * argument.
245 */
246 static int
options_need_argument(const char * arg,const char * opts)247 options_need_argument(const char *arg, const char *opts)
248 {
249 size_t pos;
250 const char *s;
251
252 /*assert(arg[0] == '-');*/
253
254 pos = 1;
255 while (arg[pos]) {
256 s = strchr(opts, arg[pos]);
257 if (s == NULL) {
258 /* invalid option */
259 return 0;
260 }
261 if (s[1] == ':') {
262 /* option requires argument */
263 if (arg[pos+1] == '\0') {
264 /* no argument in this arg */
265 return 1;
266 }
267 else {
268 /* argument is in this arg; no more options */
269 return 0;
270 }
271 }
272 pos++;
273 }
274 return 0;
275 }
276
277 /*
278 * Replace historic +SPEC arguments with appropriate -kSPEC.
279 *
280 * The form can be either a single +SPEC or a pair +SPEC -SPEC.
281 * The following -SPEC is not recognized unless it follows
282 * immediately.
283 */
284 void
fixit(int * argc,char ** argv,const char * opts)285 fixit(int *argc, char **argv, const char *opts)
286 {
287 int i, j, sawplus;
288 char *vpos, *tpos, spec[20];
289 int col, indent;
290
291 sawplus = 0;
292 for (i = 1; i < *argc; i++) {
293 /*
294 * This loop must stop exactly where getopt will stop.
295 * Otherwise it turns e.g. "sort x +3" into "sort x
296 * -k4.1", which will croak if +3 was in fact really a
297 * file name. In order to do this reliably we need to
298 * be able to identify argv words that are option
299 * arguments.
300 */
301
302 if (!strcmp(argv[i], "--")) {
303 /* End of options; stop. */
304 break;
305 }
306
307 if (argv[i][0] == '+') {
308 /* +POS argument */
309 sawplus = 1;
310 } else if (argv[i][0] == '-' && sawplus &&
311 isdigit((unsigned char)argv[i][1])) {
312 /* -POS argument */
313 sawplus = 0;
314 } else if (argv[i][0] == '-') {
315 /* other option */
316 sawplus = 0;
317 if (options_need_argument(argv[i], opts)) {
318 /* skip over the argument */
319 i++;
320 }
321 continue;
322 } else {
323 /* not an option at all; stop */
324 sawplus = 0;
325 break;
326 }
327
328 /*
329 * At this point argv[i] is an old-style spec. The
330 * sawplus flag used by the above loop logic also
331 * tells us if it's a +SPEC or -SPEC.
332 */
333
334 /* parse spec */
335 tpos = argv[i]+1;
336 col = (int)strtol(tpos, &tpos, 10);
337 if (*tpos == '.') {
338 ++tpos;
339 indent = (int) strtol(tpos, &tpos, 10);
340 } else
341 indent = 0;
342 /* tpos now points to the optional flags */
343
344 /*
345 * In the traditional form, x.0 means beginning of line;
346 * in the new form, x.0 means end of line. Adjust the
347 * value of INDENT accordingly.
348 */
349 if (sawplus) {
350 /* +POS */
351 col += 1;
352 indent += 1;
353 } else {
354 /* -POS */
355 if (indent > 0)
356 col += 1;
357 }
358
359 /* make the new style spec */
360 (void)snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
361 tpos);
362
363 if (sawplus) {
364 /* Replace the +POS argument with new-style -kSPEC */
365 asprintf(&vpos, "-k%s", spec);
366 argv[i] = vpos;
367 } else {
368 /*
369 * Append the spec to the one from the
370 * preceding +POS argument, and remove the
371 * current argv element entirely.
372 */
373 asprintf(&vpos, "%s,%s", argv[i-1], spec);
374 free(argv[i-1]);
375 argv[i-1] = vpos;
376 for (j=i; j < *argc; j++)
377 argv[j] = argv[j+1];
378 *argc -= 1;
379 i--;
380 }
381 }
382 }
383
384 /*
385 * ascii, Rascii, Ftable, and RFtable map
386 *
387 * Sorting 'weight' tables.
388 * Convert 'ascii' characters into their sort order.
389 * The 'F' variants fold lower case to upper equivalent
390 * The 'R' variants are for reverse sorting.
391 *
392 * The record separator (REC_D) never needs a weight, this frees one
393 * byte value as an 'end of key' marker. This must be 0 for normal
394 * weight tables, and 0xff for reverse weight tables - and is used
395 * to terminate keys so that short keys sort before (after if reverse)
396 * longer keys.
397 *
398 * The field separator has a normal weight - although it cannot occur
399 * within a key unless it is the default (space+tab).
400 *
401 * All other bytes map to the appropriate value for the sort order.
402 * Numeric sorts don't need any tables, they are reversed by negation.
403 *
404 * Global reverse sorts are done by writing the sorted keys in reverse
405 * order - the sort itself is still forwards.
406 * This means that weights are only ever used when generating keys, any
407 * sort of the original data bytes is always forwards and unweighted.
408 *
409 * Note: this is only good for ASCII sorting. For different LC 's,
410 * all bets are off.
411 *
412 * itable[] and dtable[] are the masks for -i (ignore non-printables)
413 * and -d (only sort blank and alphanumerics).
414 */
415 void
settables(void)416 settables(void)
417 {
418 int i;
419 int next_weight = 1;
420 int rev_weight = 254;
421
422 ascii[REC_D] = 0;
423 Rascii[REC_D] = 255;
424 Ftable[REC_D] = 0;
425 RFtable[REC_D] = 255;
426
427 for (i = 0; i < 256; i++) {
428 if (i == REC_D)
429 continue;
430 ascii[i] = next_weight;
431 Rascii[i] = rev_weight;
432 if (Ftable[i] == 0) {
433 Ftable[i] = next_weight;
434 RFtable[i] = rev_weight;
435 Ftable[tolower(i)] = next_weight;
436 RFtable[tolower(i)] = rev_weight;
437 }
438 next_weight++;
439 rev_weight--;
440
441 if (i == '\n' || isprint(i))
442 itable[i] = 1;
443
444 if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
445 dtable[i] = 1;
446 }
447 }
448