1*101e15b5SRichard Lowe /*
2*101e15b5SRichard Lowe * CDDL HEADER START
3*101e15b5SRichard Lowe *
4*101e15b5SRichard Lowe * The contents of this file are subject to the terms of the
5*101e15b5SRichard Lowe * Common Development and Distribution License, Version 1.0 only
6*101e15b5SRichard Lowe * (the "License"). You may not use this file except in compliance
7*101e15b5SRichard Lowe * with the License.
8*101e15b5SRichard Lowe *
9*101e15b5SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*101e15b5SRichard Lowe * or http://www.opensolaris.org/os/licensing.
11*101e15b5SRichard Lowe * See the License for the specific language governing permissions
12*101e15b5SRichard Lowe * and limitations under the License.
13*101e15b5SRichard Lowe *
14*101e15b5SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each
15*101e15b5SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*101e15b5SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the
17*101e15b5SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying
18*101e15b5SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner]
19*101e15b5SRichard Lowe *
20*101e15b5SRichard Lowe * CDDL HEADER END
21*101e15b5SRichard Lowe */
22*101e15b5SRichard Lowe /*
23*101e15b5SRichard Lowe * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24*101e15b5SRichard Lowe * Use is subject to license terms.
25*101e15b5SRichard Lowe */
26*101e15b5SRichard Lowe
27*101e15b5SRichard Lowe #include "options.h"
28*101e15b5SRichard Lowe
29*101e15b5SRichard Lowe /*
30*101e15b5SRichard Lowe * options
31*101e15b5SRichard Lowe *
32*101e15b5SRichard Lowe * Overview
33*101e15b5SRichard Lowe * sort(1) supports two methods for specifying the sort key: the original,
34*101e15b5SRichard Lowe * now-obsolete, +n -m form and the POSIX -k n,m form. We refer to the former
35*101e15b5SRichard Lowe * as "old specifiers" and the latter as "new specifiers". The options()
36*101e15b5SRichard Lowe * function parses the command line arguments given to sort, placing the sort
37*101e15b5SRichard Lowe * key specifiers in the internal representation used in fields.c.
38*101e15b5SRichard Lowe *
39*101e15b5SRichard Lowe * Equivalence of specifiers
40*101e15b5SRichard Lowe * One of sort(1)'s standard peculiarities is the transformation of the
41*101e15b5SRichard Lowe * character offsets and field numbering between the new and old style field
42*101e15b5SRichard Lowe * specifications. We simply quote from the Single Unix standard:
43*101e15b5SRichard Lowe *
44*101e15b5SRichard Lowe * +w.xT -y.zU
45*101e15b5SRichard Lowe *
46*101e15b5SRichard Lowe * is equivalent to
47*101e15b5SRichard Lowe *
48*101e15b5SRichard Lowe * undefined when z == 0, U contains b, and -t is set
49*101e15b5SRichard Lowe * -k w+1.x+1T,y.0U when z == 0 otherwise
50*101e15b5SRichard Lowe * -k w+1.x+1T,y+1.zU when z > 0
51*101e15b5SRichard Lowe *
52*101e15b5SRichard Lowe * Undoubtedly, this seemed logical at the time. (Using only the field head
53*101e15b5SRichard Lowe * as the coordinate, as done in the obsolete version, seems much simpler.)
54*101e15b5SRichard Lowe * The reverse map is where the key specifier
55*101e15b5SRichard Lowe *
56*101e15b5SRichard Lowe * -k w.xT,y.zU
57*101e15b5SRichard Lowe *
58*101e15b5SRichard Lowe * is equivalent to
59*101e15b5SRichard Lowe *
60*101e15b5SRichard Lowe * undefined when z == 0, U contains b, and -t is set
61*101e15b5SRichard Lowe * +w-1.x-1T,y.0U when z == 0 otherwise
62*101e15b5SRichard Lowe * +w-1.x-1T,y-1.z when z > 0
63*101e15b5SRichard Lowe *
64*101e15b5SRichard Lowe * in the obsolete syntax. Because the original key specifiers lead to a
65*101e15b5SRichard Lowe * simpler implementation, the internal representation of a field in this
66*101e15b5SRichard Lowe * implementation of sort is mostly that given by the obsolete syntax.
67*101e15b5SRichard Lowe */
68*101e15b5SRichard Lowe
69*101e15b5SRichard Lowe /*
70*101e15b5SRichard Lowe * While a key specifier in the obsolete +m ... -n form is being defined (that
71*101e15b5SRichard Lowe * is, before the closing -n is seen), a narrower set of options is permitted.
72*101e15b5SRichard Lowe * We specify this smaller set of options in OLD_SPEC_OPTIONS_STRING.
73*101e15b5SRichard Lowe */
74*101e15b5SRichard Lowe #define OPTIONS_STRING "cmuo:T:z:dfiMnrbt:k:S:0123456789"
75*101e15b5SRichard Lowe #define OLD_SPEC_OPTIONS_STRING "bdfiMnrcmuo:T:z:t:k:S:"
76*101e15b5SRichard Lowe
77*101e15b5SRichard Lowe #define OPTIONS_OLDSPEC 0x1 /* else new-style spec */
78*101e15b5SRichard Lowe #define OPTIONS_STARTSPEC 0x2 /* else end spec */
79*101e15b5SRichard Lowe
80*101e15b5SRichard Lowe static int
is_number(char * C)81*101e15b5SRichard Lowe is_number(char *C)
82*101e15b5SRichard Lowe {
83*101e15b5SRichard Lowe size_t i;
84*101e15b5SRichard Lowe
85*101e15b5SRichard Lowe for (i = 0; i < strlen(C); i++)
86*101e15b5SRichard Lowe if (!isdigit((uchar_t)C[i]))
87*101e15b5SRichard Lowe return (0);
88*101e15b5SRichard Lowe
89*101e15b5SRichard Lowe return (1);
90*101e15b5SRichard Lowe }
91*101e15b5SRichard Lowe
92*101e15b5SRichard Lowe /*
93*101e15b5SRichard Lowe * If a field specified by the -k option or by the +n syntax contains any
94*101e15b5SRichard Lowe * modifiers, then the current global field modifiers are not inherited.
95*101e15b5SRichard Lowe */
96*101e15b5SRichard Lowe static int
field_spec_has_modifiers(char * C,int length)97*101e15b5SRichard Lowe field_spec_has_modifiers(char *C, int length)
98*101e15b5SRichard Lowe {
99*101e15b5SRichard Lowe int p_nonmodifiers = strspn(C, ",.1234567890");
100*101e15b5SRichard Lowe
101*101e15b5SRichard Lowe if (p_nonmodifiers == length)
102*101e15b5SRichard Lowe return (0);
103*101e15b5SRichard Lowe
104*101e15b5SRichard Lowe return (1);
105*101e15b5SRichard Lowe }
106*101e15b5SRichard Lowe
107*101e15b5SRichard Lowe static void
field_apply_all(field_t * fc,flag_t flags)108*101e15b5SRichard Lowe field_apply_all(field_t *fc, flag_t flags)
109*101e15b5SRichard Lowe {
110*101e15b5SRichard Lowe field_t *f;
111*101e15b5SRichard Lowe
112*101e15b5SRichard Lowe for (f = fc; f; f = f->f_next)
113*101e15b5SRichard Lowe if ((f->f_options & FIELD_MODIFIERS_DEFINED) == 0)
114*101e15b5SRichard Lowe f->f_options |= flags;
115*101e15b5SRichard Lowe }
116*101e15b5SRichard Lowe
117*101e15b5SRichard Lowe static int
parse_field_spec(field_t * F,char * C,int flags,int length)118*101e15b5SRichard Lowe parse_field_spec(field_t *F, char *C, int flags, int length)
119*101e15b5SRichard Lowe {
120*101e15b5SRichard Lowe int p_period = MIN(length, strcspn(C, "."));
121*101e15b5SRichard Lowe int p_modifiers = MIN(length, strspn(C, ".1234567890"));
122*101e15b5SRichard Lowe int p_boundary = MIN(p_period, p_modifiers);
123*101e15b5SRichard Lowe int field = 0;
124*101e15b5SRichard Lowe int offset = 0;
125*101e15b5SRichard Lowe int offset_seen = 0;
126*101e15b5SRichard Lowe int i;
127*101e15b5SRichard Lowe int blanks_flag = 0;
128*101e15b5SRichard Lowe
129*101e15b5SRichard Lowe for (i = 0; i < p_boundary; i++) {
130*101e15b5SRichard Lowe if (isdigit((uchar_t)C[i]))
131*101e15b5SRichard Lowe field = (10 * field) + (C[i] - '0');
132*101e15b5SRichard Lowe else
133*101e15b5SRichard Lowe return (1);
134*101e15b5SRichard Lowe }
135*101e15b5SRichard Lowe
136*101e15b5SRichard Lowe if (p_period < p_modifiers) {
137*101e15b5SRichard Lowe for (i = p_period + 1; i < p_modifiers; i++) {
138*101e15b5SRichard Lowe if (isdigit((uchar_t)C[i])) {
139*101e15b5SRichard Lowe offset_seen++;
140*101e15b5SRichard Lowe offset = (10 * offset) + (C[i] - '0');
141*101e15b5SRichard Lowe } else {
142*101e15b5SRichard Lowe return (1);
143*101e15b5SRichard Lowe }
144*101e15b5SRichard Lowe }
145*101e15b5SRichard Lowe }
146*101e15b5SRichard Lowe
147*101e15b5SRichard Lowe if (p_modifiers < length) {
148*101e15b5SRichard Lowe for (i = p_modifiers; i < length; i++) {
149*101e15b5SRichard Lowe switch (C[i]) {
150*101e15b5SRichard Lowe case 'b':
151*101e15b5SRichard Lowe blanks_flag = 1;
152*101e15b5SRichard Lowe break;
153*101e15b5SRichard Lowe case 'd':
154*101e15b5SRichard Lowe F->f_options |= FIELD_DICTIONARY_ORDER;
155*101e15b5SRichard Lowe break;
156*101e15b5SRichard Lowe case 'f':
157*101e15b5SRichard Lowe F->f_options |= FIELD_FOLD_UPPERCASE;
158*101e15b5SRichard Lowe break;
159*101e15b5SRichard Lowe case 'i':
160*101e15b5SRichard Lowe F->f_options |=
161*101e15b5SRichard Lowe FIELD_IGNORE_NONPRINTABLES;
162*101e15b5SRichard Lowe break;
163*101e15b5SRichard Lowe case 'M':
164*101e15b5SRichard Lowe F->f_species = MONTH;
165*101e15b5SRichard Lowe break;
166*101e15b5SRichard Lowe case 'n':
167*101e15b5SRichard Lowe F->f_species = NUMERIC;
168*101e15b5SRichard Lowe break;
169*101e15b5SRichard Lowe case 'r':
170*101e15b5SRichard Lowe F->f_options |=
171*101e15b5SRichard Lowe FIELD_REVERSE_COMPARISONS;
172*101e15b5SRichard Lowe break;
173*101e15b5SRichard Lowe default:
174*101e15b5SRichard Lowe usage();
175*101e15b5SRichard Lowe break;
176*101e15b5SRichard Lowe }
177*101e15b5SRichard Lowe }
178*101e15b5SRichard Lowe }
179*101e15b5SRichard Lowe
180*101e15b5SRichard Lowe if (flags & OPTIONS_STARTSPEC) {
181*101e15b5SRichard Lowe F->f_start_field = field;
182*101e15b5SRichard Lowe F->f_start_offset = offset;
183*101e15b5SRichard Lowe if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC) {
184*101e15b5SRichard Lowe F->f_start_field--;
185*101e15b5SRichard Lowe if (offset_seen)
186*101e15b5SRichard Lowe F->f_start_offset--;
187*101e15b5SRichard Lowe }
188*101e15b5SRichard Lowe F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_START : 0;
189*101e15b5SRichard Lowe } else {
190*101e15b5SRichard Lowe F->f_end_field = field;
191*101e15b5SRichard Lowe F->f_end_offset = offset;
192*101e15b5SRichard Lowe if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC &&
193*101e15b5SRichard Lowe offset_seen && offset != 0)
194*101e15b5SRichard Lowe F->f_end_field--;
195*101e15b5SRichard Lowe F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_END : 0;
196*101e15b5SRichard Lowe }
197*101e15b5SRichard Lowe
198*101e15b5SRichard Lowe return (0);
199*101e15b5SRichard Lowe }
200*101e15b5SRichard Lowe
201*101e15b5SRichard Lowe static void
parse_new_field_spec(sort_t * S,char * arg)202*101e15b5SRichard Lowe parse_new_field_spec(sort_t *S, char *arg)
203*101e15b5SRichard Lowe {
204*101e15b5SRichard Lowe int length = strlen(arg);
205*101e15b5SRichard Lowe int p_comma = MIN(length, strcspn(arg, ","));
206*101e15b5SRichard Lowe field_t *nF;
207*101e15b5SRichard Lowe int p;
208*101e15b5SRichard Lowe
209*101e15b5SRichard Lowe /*
210*101e15b5SRichard Lowe * New field specifiers do not inherit from the general specifier if
211*101e15b5SRichard Lowe * they have any modifiers set. (This is specifically tested in the VSC
212*101e15b5SRichard Lowe * test suite, assertion 32 for POSIX.cmd/sort.)
213*101e15b5SRichard Lowe */
214*101e15b5SRichard Lowe if (field_spec_has_modifiers(arg, length)) {
215*101e15b5SRichard Lowe nF = field_new(NULL);
216*101e15b5SRichard Lowe nF->f_options = FIELD_MODIFIERS_DEFINED;
217*101e15b5SRichard Lowe } else {
218*101e15b5SRichard Lowe nF = field_new(S);
219*101e15b5SRichard Lowe }
220*101e15b5SRichard Lowe p = parse_field_spec(nF, arg, OPTIONS_STARTSPEC, p_comma);
221*101e15b5SRichard Lowe
222*101e15b5SRichard Lowe if (p != 0)
223*101e15b5SRichard Lowe usage();
224*101e15b5SRichard Lowe
225*101e15b5SRichard Lowe if (p_comma < length) {
226*101e15b5SRichard Lowe p = parse_field_spec(nF, &(arg[p_comma + 1]), 0,
227*101e15b5SRichard Lowe strlen(&(arg[p_comma + 1])));
228*101e15b5SRichard Lowe if (p != 0)
229*101e15b5SRichard Lowe usage();
230*101e15b5SRichard Lowe }
231*101e15b5SRichard Lowe
232*101e15b5SRichard Lowe if (nF->f_start_field < 0 || nF->f_start_offset < 0) {
233*101e15b5SRichard Lowe if (S->m_verbose)
234*101e15b5SRichard Lowe warn("-k %s is not a supported field specifier\n", arg);
235*101e15b5SRichard Lowe }
236*101e15b5SRichard Lowe nF->f_start_field = MAX(nF->f_start_field, 0);
237*101e15b5SRichard Lowe nF->f_start_offset = MAX(nF->f_start_offset, 0);
238*101e15b5SRichard Lowe
239*101e15b5SRichard Lowe /*
240*101e15b5SRichard Lowe * If the starting field exceeds a defined ending field, convention
241*101e15b5SRichard Lowe * dictates that the field is ignored.
242*101e15b5SRichard Lowe */
243*101e15b5SRichard Lowe if (nF->f_end_field == -1 || nF->f_start_field < nF->f_end_field ||
244*101e15b5SRichard Lowe (nF->f_start_field == nF->f_end_field &&
245*101e15b5SRichard Lowe nF->f_start_offset < nF->f_end_offset)) {
246*101e15b5SRichard Lowe field_add_to_chain(&(S->m_fields_head), nF);
247*101e15b5SRichard Lowe } else if (S->m_verbose) {
248*101e15b5SRichard Lowe warn("illegal field -k %s omitted", arg);
249*101e15b5SRichard Lowe }
250*101e15b5SRichard Lowe }
251*101e15b5SRichard Lowe
252*101e15b5SRichard Lowe /*
253*101e15b5SRichard Lowe * parse_old_field_spec() is getopt()-aware; it may modify the values of optind,
254*101e15b5SRichard Lowe * optarg, and so forth, to correctly determine the characteristics being
255*101e15b5SRichard Lowe * assigned to the current field.
256*101e15b5SRichard Lowe */
257*101e15b5SRichard Lowe static int
parse_old_field_spec(sort_t * S,int argc,char * argv[])258*101e15b5SRichard Lowe parse_old_field_spec(sort_t *S, int argc, char *argv[])
259*101e15b5SRichard Lowe {
260*101e15b5SRichard Lowe field_t *nF;
261*101e15b5SRichard Lowe int c, p;
262*101e15b5SRichard Lowe char *arg = argv[optind];
263*101e15b5SRichard Lowe
264*101e15b5SRichard Lowe if (field_spec_has_modifiers(arg + 1, strlen(arg + 1))) {
265*101e15b5SRichard Lowe nF = field_new(NULL);
266*101e15b5SRichard Lowe nF->f_options = FIELD_MODIFIERS_DEFINED;
267*101e15b5SRichard Lowe } else {
268*101e15b5SRichard Lowe nF = field_new(S);
269*101e15b5SRichard Lowe }
270*101e15b5SRichard Lowe
271*101e15b5SRichard Lowe p = parse_field_spec(nF, arg + 1, OPTIONS_OLDSPEC | OPTIONS_STARTSPEC,
272*101e15b5SRichard Lowe strlen(arg + 1));
273*101e15b5SRichard Lowe
274*101e15b5SRichard Lowe if (p != 0) {
275*101e15b5SRichard Lowe field_delete(nF);
276*101e15b5SRichard Lowe return (0);
277*101e15b5SRichard Lowe }
278*101e15b5SRichard Lowe
279*101e15b5SRichard Lowe /*
280*101e15b5SRichard Lowe * In the case that getopt() returns '?' (unrecognized option) or EOF
281*101e15b5SRichard Lowe * (non-option argument), the field is considered closed.
282*101e15b5SRichard Lowe */
283*101e15b5SRichard Lowe for (arg = argv[++optind]; optind < argc; arg = argv[optind]) {
284*101e15b5SRichard Lowe if (strlen(arg) >= 2 && *arg == '-' &&
285*101e15b5SRichard Lowe isdigit(*(uchar_t *)(arg + 1))) {
286*101e15b5SRichard Lowe (void) parse_field_spec(nF, arg + 1,
287*101e15b5SRichard Lowe OPTIONS_OLDSPEC, strlen(arg) - 1);
288*101e15b5SRichard Lowe field_add_to_chain(&(S->m_fields_head), nF);
289*101e15b5SRichard Lowe optind++;
290*101e15b5SRichard Lowe return (1);
291*101e15b5SRichard Lowe }
292*101e15b5SRichard Lowe
293*101e15b5SRichard Lowe if ((c = getopt(argc, argv, OLD_SPEC_OPTIONS_STRING)) != EOF) {
294*101e15b5SRichard Lowe switch (c) {
295*101e15b5SRichard Lowe case 'b':
296*101e15b5SRichard Lowe nF->f_options |= FIELD_IGNORE_BLANKS_START;
297*101e15b5SRichard Lowe break;
298*101e15b5SRichard Lowe case 'd':
299*101e15b5SRichard Lowe nF->f_options |= FIELD_DICTIONARY_ORDER;
300*101e15b5SRichard Lowe break;
301*101e15b5SRichard Lowe case 'f':
302*101e15b5SRichard Lowe nF->f_options |= FIELD_FOLD_UPPERCASE;
303*101e15b5SRichard Lowe break;
304*101e15b5SRichard Lowe case 'i':
305*101e15b5SRichard Lowe nF->f_options |= FIELD_IGNORE_NONPRINTABLES;
306*101e15b5SRichard Lowe break;
307*101e15b5SRichard Lowe case 'M':
308*101e15b5SRichard Lowe nF->f_species = MONTH;
309*101e15b5SRichard Lowe break;
310*101e15b5SRichard Lowe case 'n':
311*101e15b5SRichard Lowe nF->f_species = NUMERIC;
312*101e15b5SRichard Lowe break;
313*101e15b5SRichard Lowe case 'r':
314*101e15b5SRichard Lowe nF->f_options |= FIELD_REVERSE_COMPARISONS;
315*101e15b5SRichard Lowe break;
316*101e15b5SRichard Lowe case '?':
317*101e15b5SRichard Lowe case 'c':
318*101e15b5SRichard Lowe case 'm':
319*101e15b5SRichard Lowe case 'u':
320*101e15b5SRichard Lowe /*
321*101e15b5SRichard Lowe * Options without arguments.
322*101e15b5SRichard Lowe */
323*101e15b5SRichard Lowe optind -= 1;
324*101e15b5SRichard Lowe field_add_to_chain(&(S->m_fields_head), nF);
325*101e15b5SRichard Lowe return (1);
326*101e15b5SRichard Lowe /*NOTREACHED*/
327*101e15b5SRichard Lowe case 'o':
328*101e15b5SRichard Lowe case 'T':
329*101e15b5SRichard Lowe case 'z':
330*101e15b5SRichard Lowe case 't':
331*101e15b5SRichard Lowe case 'k':
332*101e15b5SRichard Lowe case 'S':
333*101e15b5SRichard Lowe /*
334*101e15b5SRichard Lowe * Options with arguments.
335*101e15b5SRichard Lowe */
336*101e15b5SRichard Lowe if (optarg == argv[optind - 1] + 2) {
337*101e15b5SRichard Lowe optind -= 1;
338*101e15b5SRichard Lowe } else {
339*101e15b5SRichard Lowe optind -= 2;
340*101e15b5SRichard Lowe }
341*101e15b5SRichard Lowe field_add_to_chain(&(S->m_fields_head), nF);
342*101e15b5SRichard Lowe return (1);
343*101e15b5SRichard Lowe /*NOTREACHED*/
344*101e15b5SRichard Lowe default:
345*101e15b5SRichard Lowe die(EMSG_UNKN_OPTION);
346*101e15b5SRichard Lowe /*NOTREACHED*/
347*101e15b5SRichard Lowe }
348*101e15b5SRichard Lowe } else {
349*101e15b5SRichard Lowe break;
350*101e15b5SRichard Lowe }
351*101e15b5SRichard Lowe }
352*101e15b5SRichard Lowe
353*101e15b5SRichard Lowe field_add_to_chain(&(S->m_fields_head), nF);
354*101e15b5SRichard Lowe return (1);
355*101e15b5SRichard Lowe }
356*101e15b5SRichard Lowe
357*101e15b5SRichard Lowe int
options(sort_t * S,int argc,char * argv[])358*101e15b5SRichard Lowe options(sort_t *S, int argc, char *argv[])
359*101e15b5SRichard Lowe {
360*101e15b5SRichard Lowe int c;
361*101e15b5SRichard Lowe
362*101e15b5SRichard Lowe optind = 1;
363*101e15b5SRichard Lowe while (optind < argc) {
364*101e15b5SRichard Lowe if (strncmp("-y", argv[optind], strlen("-y")) == 0) {
365*101e15b5SRichard Lowe /*
366*101e15b5SRichard Lowe * The -y [kmem] option violates the standard syntax
367*101e15b5SRichard Lowe * outlined in intro(1). we have to be a little fancy
368*101e15b5SRichard Lowe * to determine if the next argument is a valid integer.
369*101e15b5SRichard Lowe * (note, of course, that the previous sort(1) had no
370*101e15b5SRichard Lowe * mechanism to resolve a final
371*101e15b5SRichard Lowe * -y 99999
372*101e15b5SRichard Lowe * into
373*101e15b5SRichard Lowe * -y, file 99999
374*101e15b5SRichard Lowe * or
375*101e15b5SRichard Lowe * -y 99999, file stdin
376*101e15b5SRichard Lowe *
377*101e15b5SRichard Lowe * Now one can unambiguously use
378*101e15b5SRichard Lowe * -y -- 99999
379*101e15b5SRichard Lowe * and
380*101e15b5SRichard Lowe * -y 99999 -
381*101e15b5SRichard Lowe * to distinguish these cases.
382*101e15b5SRichard Lowe *
383*101e15b5SRichard Lowe * That said, we do not use the information passed using
384*101e15b5SRichard Lowe * -y option in sort(1); we provide the argument to
385*101e15b5SRichard Lowe * preserve compatibility for existing scripts.
386*101e15b5SRichard Lowe */
387*101e15b5SRichard Lowe if (strlen(argv[optind]) == strlen("-y") &&
388*101e15b5SRichard Lowe optind + 1 < argc &&
389*101e15b5SRichard Lowe is_number(argv[optind + 1]))
390*101e15b5SRichard Lowe optind += 2;
391*101e15b5SRichard Lowe else
392*101e15b5SRichard Lowe optind += 1;
393*101e15b5SRichard Lowe }
394*101e15b5SRichard Lowe
395*101e15b5SRichard Lowe if ((c = getopt(argc, argv, OPTIONS_STRING)) != EOF) {
396*101e15b5SRichard Lowe switch (c) {
397*101e15b5SRichard Lowe case 'c':
398*101e15b5SRichard Lowe S->m_check_if_sorted_only = 1;
399*101e15b5SRichard Lowe break;
400*101e15b5SRichard Lowe
401*101e15b5SRichard Lowe case 'm':
402*101e15b5SRichard Lowe S->m_merge_only = 1;
403*101e15b5SRichard Lowe break;
404*101e15b5SRichard Lowe
405*101e15b5SRichard Lowe case 'u':
406*101e15b5SRichard Lowe S->m_unique_lines = 1;
407*101e15b5SRichard Lowe break;
408*101e15b5SRichard Lowe
409*101e15b5SRichard Lowe case 'o':
410*101e15b5SRichard Lowe S->m_output_filename = optarg;
411*101e15b5SRichard Lowe break;
412*101e15b5SRichard Lowe
413*101e15b5SRichard Lowe case 'T':
414*101e15b5SRichard Lowe S->m_tmpdir_template = optarg;
415*101e15b5SRichard Lowe break;
416*101e15b5SRichard Lowe
417*101e15b5SRichard Lowe case 'z':
418*101e15b5SRichard Lowe /*
419*101e15b5SRichard Lowe * ignore optarg -- obsolete
420*101e15b5SRichard Lowe */
421*101e15b5SRichard Lowe break;
422*101e15b5SRichard Lowe
423*101e15b5SRichard Lowe case 'd':
424*101e15b5SRichard Lowe S->m_field_options |= FIELD_DICTIONARY_ORDER;
425*101e15b5SRichard Lowe field_apply_all(S->m_fields_head,
426*101e15b5SRichard Lowe FIELD_DICTIONARY_ORDER);
427*101e15b5SRichard Lowe break;
428*101e15b5SRichard Lowe
429*101e15b5SRichard Lowe case 'f':
430*101e15b5SRichard Lowe S->m_field_options |= FIELD_FOLD_UPPERCASE;
431*101e15b5SRichard Lowe field_apply_all(S->m_fields_head,
432*101e15b5SRichard Lowe FIELD_FOLD_UPPERCASE);
433*101e15b5SRichard Lowe break;
434*101e15b5SRichard Lowe
435*101e15b5SRichard Lowe case 'i':
436*101e15b5SRichard Lowe S->m_field_options |=
437*101e15b5SRichard Lowe FIELD_IGNORE_NONPRINTABLES;
438*101e15b5SRichard Lowe field_apply_all(S->m_fields_head,
439*101e15b5SRichard Lowe FIELD_IGNORE_NONPRINTABLES);
440*101e15b5SRichard Lowe break;
441*101e15b5SRichard Lowe
442*101e15b5SRichard Lowe case 'M':
443*101e15b5SRichard Lowe S->m_default_species = MONTH;
444*101e15b5SRichard Lowe S->m_field_options &=
445*101e15b5SRichard Lowe ~FIELD_IGNORE_BLANKS_START;
446*101e15b5SRichard Lowe break;
447*101e15b5SRichard Lowe
448*101e15b5SRichard Lowe case 'n':
449*101e15b5SRichard Lowe S->m_default_species = NUMERIC;
450*101e15b5SRichard Lowe {
451*101e15b5SRichard Lowe field_t *f;
452*101e15b5SRichard Lowe
453*101e15b5SRichard Lowe for (f = S->m_fields_head; f;
454*101e15b5SRichard Lowe f = f->f_next)
455*101e15b5SRichard Lowe if ((f->f_options &
456*101e15b5SRichard Lowe FIELD_MODIFIERS_DEFINED) ==
457*101e15b5SRichard Lowe 0)
458*101e15b5SRichard Lowe f->f_species = NUMERIC;
459*101e15b5SRichard Lowe }
460*101e15b5SRichard Lowe break;
461*101e15b5SRichard Lowe
462*101e15b5SRichard Lowe case 'b':
463*101e15b5SRichard Lowe S->m_field_options |=
464*101e15b5SRichard Lowe FIELD_IGNORE_BLANKS_START |
465*101e15b5SRichard Lowe FIELD_IGNORE_BLANKS_END;
466*101e15b5SRichard Lowe break;
467*101e15b5SRichard Lowe
468*101e15b5SRichard Lowe case 'r':
469*101e15b5SRichard Lowe S->m_field_options |=
470*101e15b5SRichard Lowe FIELD_REVERSE_COMPARISONS;
471*101e15b5SRichard Lowe field_apply_all(S->m_fields_head,
472*101e15b5SRichard Lowe FIELD_REVERSE_COMPARISONS);
473*101e15b5SRichard Lowe break;
474*101e15b5SRichard Lowe
475*101e15b5SRichard Lowe case 't':
476*101e15b5SRichard Lowe /*
477*101e15b5SRichard Lowe * delimiter
478*101e15b5SRichard Lowe */
479*101e15b5SRichard Lowe if (S->m_single_byte_locale) {
480*101e15b5SRichard Lowe /*
481*101e15b5SRichard Lowe * Most debuggers can't take tabs as
482*101e15b5SRichard Lowe * input arguments, so we provide an
483*101e15b5SRichard Lowe * escape sequence to allow testing of
484*101e15b5SRichard Lowe * this special case for the DEBUG
485*101e15b5SRichard Lowe * version.
486*101e15b5SRichard Lowe */
487*101e15b5SRichard Lowe S->m_field_separator.sc =
488*101e15b5SRichard Lowe #ifdef DEBUG
489*101e15b5SRichard Lowe xstreql(optarg, "\\t") ? '\t' :
490*101e15b5SRichard Lowe #endif
491*101e15b5SRichard Lowe optarg[0];
492*101e15b5SRichard Lowe } else
493*101e15b5SRichard Lowe (void) mbtowc(&S->m_field_separator.wc,
494*101e15b5SRichard Lowe optarg, MB_CUR_MAX);
495*101e15b5SRichard Lowe break;
496*101e15b5SRichard Lowe
497*101e15b5SRichard Lowe case 'k':
498*101e15b5SRichard Lowe /*
499*101e15b5SRichard Lowe * key
500*101e15b5SRichard Lowe */
501*101e15b5SRichard Lowe (void) parse_new_field_spec(S, optarg);
502*101e15b5SRichard Lowe break;
503*101e15b5SRichard Lowe
504*101e15b5SRichard Lowe case 'S':
505*101e15b5SRichard Lowe S->m_memory_limit = strtomem(optarg);
506*101e15b5SRichard Lowe #ifdef DEBUG
507*101e15b5SRichard Lowe (void) fprintf(stderr, CMDNAME
508*101e15b5SRichard Lowe ": limiting size to %d bytes\n",
509*101e15b5SRichard Lowe S->m_memory_limit);
510*101e15b5SRichard Lowe #endif /* DEBUG */
511*101e15b5SRichard Lowe break;
512*101e15b5SRichard Lowe
513*101e15b5SRichard Lowe /*
514*101e15b5SRichard Lowe * We never take a naked -999; these should always be
515*101e15b5SRichard Lowe * associated with a preceding +000.
516*101e15b5SRichard Lowe */
517*101e15b5SRichard Lowe case '0':
518*101e15b5SRichard Lowe case '1':
519*101e15b5SRichard Lowe case '2':
520*101e15b5SRichard Lowe case '3':
521*101e15b5SRichard Lowe case '4':
522*101e15b5SRichard Lowe case '5':
523*101e15b5SRichard Lowe case '6':
524*101e15b5SRichard Lowe case '7':
525*101e15b5SRichard Lowe case '8':
526*101e15b5SRichard Lowe case '9':
527*101e15b5SRichard Lowe usage();
528*101e15b5SRichard Lowe break;
529*101e15b5SRichard Lowe case '?':
530*101e15b5SRichard Lowe /* error case */
531*101e15b5SRichard Lowe usage();
532*101e15b5SRichard Lowe break;
533*101e15b5SRichard Lowe }
534*101e15b5SRichard Lowe
535*101e15b5SRichard Lowe /*
536*101e15b5SRichard Lowe * Go back for next argument.
537*101e15b5SRichard Lowe */
538*101e15b5SRichard Lowe continue;
539*101e15b5SRichard Lowe }
540*101e15b5SRichard Lowe
541*101e15b5SRichard Lowe /*
542*101e15b5SRichard Lowe * There are three (interpretable) possibilities for getopt() to
543*101e15b5SRichard Lowe * return EOF with arguments on the command line: we have seen
544*101e15b5SRichard Lowe * the "end-of-options" token, --, we have encountered the
545*101e15b5SRichard Lowe * old-style field definition, +NNN, or we have found a
546*101e15b5SRichard Lowe * filename.
547*101e15b5SRichard Lowe *
548*101e15b5SRichard Lowe * In the second case, we must also search for the optional -NNN
549*101e15b5SRichard Lowe * field terminal definition. (since "+joe", for instance, is
550*101e15b5SRichard Lowe * a valid filename, we must handle this pattern as well.) This
551*101e15b5SRichard Lowe * is performed by parse_old_field_spec().
552*101e15b5SRichard Lowe */
553*101e15b5SRichard Lowe if (xstreql(argv[optind - 1], "--")) {
554*101e15b5SRichard Lowe /*
555*101e15b5SRichard Lowe * Process all arguments following end-of-options token
556*101e15b5SRichard Lowe * as filenames.
557*101e15b5SRichard Lowe */
558*101e15b5SRichard Lowe while (optind < argc) {
559*101e15b5SRichard Lowe if (xstreql(argv[optind], "-"))
560*101e15b5SRichard Lowe S->m_input_from_stdin = 1;
561*101e15b5SRichard Lowe else
562*101e15b5SRichard Lowe stream_add_file_to_chain(
563*101e15b5SRichard Lowe &(S->m_input_streams),
564*101e15b5SRichard Lowe argv[optind]);
565*101e15b5SRichard Lowe optind++;
566*101e15b5SRichard Lowe }
567*101e15b5SRichard Lowe
568*101e15b5SRichard Lowe break;
569*101e15b5SRichard Lowe }
570*101e15b5SRichard Lowe
571*101e15b5SRichard Lowe if (optind < argc) {
572*101e15b5SRichard Lowe if (xstreql(argv[optind], "-")) {
573*101e15b5SRichard Lowe S->m_input_from_stdin = 1;
574*101e15b5SRichard Lowe optind++;
575*101e15b5SRichard Lowe } else if (*(argv[optind]) != '+' ||
576*101e15b5SRichard Lowe !parse_old_field_spec(S, argc, argv)) {
577*101e15b5SRichard Lowe /*
578*101e15b5SRichard Lowe * It's a filename, because it either doesn't
579*101e15b5SRichard Lowe * start with '+', or if it did, it wasn't an
580*101e15b5SRichard Lowe * actual field specifier.
581*101e15b5SRichard Lowe */
582*101e15b5SRichard Lowe stream_add_file_to_chain(&(S->m_input_streams),
583*101e15b5SRichard Lowe argv[optind]);
584*101e15b5SRichard Lowe optind++;
585*101e15b5SRichard Lowe }
586*101e15b5SRichard Lowe }
587*101e15b5SRichard Lowe }
588*101e15b5SRichard Lowe
589*101e15b5SRichard Lowe if (S->m_input_streams == NULL)
590*101e15b5SRichard Lowe S->m_input_from_stdin = 1;
591*101e15b5SRichard Lowe
592*101e15b5SRichard Lowe if (S->m_output_filename == NULL)
593*101e15b5SRichard Lowe S->m_output_to_stdout = 1;
594*101e15b5SRichard Lowe
595*101e15b5SRichard Lowe /*
596*101e15b5SRichard Lowe * If no fields, then one great field. However, if the -b option was
597*101e15b5SRichard Lowe * set globally, be sure to ignore it, as per UNIX98.
598*101e15b5SRichard Lowe */
599*101e15b5SRichard Lowe if (S->m_fields_head == NULL) {
600*101e15b5SRichard Lowe S->m_field_options &= ~FIELD_IGNORE_BLANKS_START;
601*101e15b5SRichard Lowe
602*101e15b5SRichard Lowe (void) parse_new_field_spec(S, "1");
603*101e15b5SRichard Lowe /*
604*101e15b5SRichard Lowe * "Entire line" fast path is only valid if no delimiter has
605*101e15b5SRichard Lowe * been set and no modifiers have been applied.
606*101e15b5SRichard Lowe */
607*101e15b5SRichard Lowe if (S->m_field_separator.wc == 0 &&
608*101e15b5SRichard Lowe S->m_default_species == ALPHA &&
609*101e15b5SRichard Lowe S->m_field_options == 0)
610*101e15b5SRichard Lowe S->m_entire_line = 1;
611*101e15b5SRichard Lowe }
612*101e15b5SRichard Lowe
613*101e15b5SRichard Lowe return (0);
614*101e15b5SRichard Lowe }
615