xref: /illumos-gate/usr/src/cmd/sort/options.c (revision 101e15b5)
1*101e15b5SRichard Lowe /*
2*101e15b5SRichard Lowe  * CDDL HEADER START
3*101e15b5SRichard Lowe  *
4*101e15b5SRichard Lowe  * The contents of this file are subject to the terms of the
5*101e15b5SRichard Lowe  * Common Development and Distribution License, Version 1.0 only
6*101e15b5SRichard Lowe  * (the "License").  You may not use this file except in compliance
7*101e15b5SRichard Lowe  * with the License.
8*101e15b5SRichard Lowe  *
9*101e15b5SRichard Lowe  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*101e15b5SRichard Lowe  * or http://www.opensolaris.org/os/licensing.
11*101e15b5SRichard Lowe  * See the License for the specific language governing permissions
12*101e15b5SRichard Lowe  * and limitations under the License.
13*101e15b5SRichard Lowe  *
14*101e15b5SRichard Lowe  * When distributing Covered Code, include this CDDL HEADER in each
15*101e15b5SRichard Lowe  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*101e15b5SRichard Lowe  * If applicable, add the following below this CDDL HEADER, with the
17*101e15b5SRichard Lowe  * fields enclosed by brackets "[]" replaced with your own identifying
18*101e15b5SRichard Lowe  * information: Portions Copyright [yyyy] [name of copyright owner]
19*101e15b5SRichard Lowe  *
20*101e15b5SRichard Lowe  * CDDL HEADER END
21*101e15b5SRichard Lowe  */
22*101e15b5SRichard Lowe /*
23*101e15b5SRichard Lowe  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*101e15b5SRichard Lowe  * Use is subject to license terms.
25*101e15b5SRichard Lowe  */
26*101e15b5SRichard Lowe 
27*101e15b5SRichard Lowe #include "options.h"
28*101e15b5SRichard Lowe 
29*101e15b5SRichard Lowe /*
30*101e15b5SRichard Lowe  * options
31*101e15b5SRichard Lowe  *
32*101e15b5SRichard Lowe  * Overview
33*101e15b5SRichard Lowe  *   sort(1) supports two methods for specifying the sort key:  the original,
34*101e15b5SRichard Lowe  *   now-obsolete, +n -m form and the POSIX -k n,m form.  We refer to the former
35*101e15b5SRichard Lowe  *   as "old specifiers" and the latter as "new specifiers".  The options()
36*101e15b5SRichard Lowe  *   function parses the command line arguments given to sort, placing the sort
37*101e15b5SRichard Lowe  *   key specifiers in the internal representation used in fields.c.
38*101e15b5SRichard Lowe  *
39*101e15b5SRichard Lowe  * Equivalence of specifiers
40*101e15b5SRichard Lowe  *   One of sort(1)'s standard peculiarities is the transformation of the
41*101e15b5SRichard Lowe  *   character offsets and field numbering between the new and old style field
42*101e15b5SRichard Lowe  *   specifications.  We simply quote from the Single Unix standard:
43*101e15b5SRichard Lowe  *
44*101e15b5SRichard Lowe  *	+w.xT -y.zU
45*101e15b5SRichard Lowe  *
46*101e15b5SRichard Lowe  *   is equivalent to
47*101e15b5SRichard Lowe  *
48*101e15b5SRichard Lowe  *	undefined		when z == 0, U contains b, and -t is set
49*101e15b5SRichard Lowe  *	-k w+1.x+1T,y.0U	when z == 0 otherwise
50*101e15b5SRichard Lowe  *	-k w+1.x+1T,y+1.zU	when z > 0
51*101e15b5SRichard Lowe  *
52*101e15b5SRichard Lowe  *   Undoubtedly, this seemed logical at the time.  (Using only the field head
53*101e15b5SRichard Lowe  *   as the coordinate, as done in the obsolete version, seems much simpler.)
54*101e15b5SRichard Lowe  *   The reverse map is where the key specifier
55*101e15b5SRichard Lowe  *
56*101e15b5SRichard Lowe  *	-k w.xT,y.zU
57*101e15b5SRichard Lowe  *
58*101e15b5SRichard Lowe  *   is equivalent to
59*101e15b5SRichard Lowe  *
60*101e15b5SRichard Lowe  *	undefined		when z == 0, U contains b, and -t is set
61*101e15b5SRichard Lowe  *	+w-1.x-1T,y.0U		when z == 0 otherwise
62*101e15b5SRichard Lowe  *	+w-1.x-1T,y-1.z		when z > 0
63*101e15b5SRichard Lowe  *
64*101e15b5SRichard Lowe  *   in the obsolete syntax.  Because the original key specifiers lead to a
65*101e15b5SRichard Lowe  *   simpler implementation, the internal representation of a field in this
66*101e15b5SRichard Lowe  *   implementation of sort is mostly that given by the obsolete syntax.
67*101e15b5SRichard Lowe  */
68*101e15b5SRichard Lowe 
69*101e15b5SRichard Lowe /*
70*101e15b5SRichard Lowe  * While a key specifier in the obsolete +m ... -n form is being defined (that
71*101e15b5SRichard Lowe  * is, before the closing -n is seen), a narrower set of options is permitted.
72*101e15b5SRichard Lowe  * We specify this smaller set of options in OLD_SPEC_OPTIONS_STRING.
73*101e15b5SRichard Lowe  */
74*101e15b5SRichard Lowe #define	OPTIONS_STRING		"cmuo:T:z:dfiMnrbt:k:S:0123456789"
75*101e15b5SRichard Lowe #define	OLD_SPEC_OPTIONS_STRING	"bdfiMnrcmuo:T:z:t:k:S:"
76*101e15b5SRichard Lowe 
77*101e15b5SRichard Lowe #define	OPTIONS_OLDSPEC		0x1	/* else new-style spec */
78*101e15b5SRichard Lowe #define	OPTIONS_STARTSPEC	0x2	/* else end spec */
79*101e15b5SRichard Lowe 
80*101e15b5SRichard Lowe static int
is_number(char * C)81*101e15b5SRichard Lowe is_number(char *C)
82*101e15b5SRichard Lowe {
83*101e15b5SRichard Lowe 	size_t	i;
84*101e15b5SRichard Lowe 
85*101e15b5SRichard Lowe 	for (i = 0; i < strlen(C); i++)
86*101e15b5SRichard Lowe 		if (!isdigit((uchar_t)C[i]))
87*101e15b5SRichard Lowe 			return (0);
88*101e15b5SRichard Lowe 
89*101e15b5SRichard Lowe 	return (1);
90*101e15b5SRichard Lowe }
91*101e15b5SRichard Lowe 
92*101e15b5SRichard Lowe /*
93*101e15b5SRichard Lowe  * If a field specified by the -k option or by the +n syntax contains any
94*101e15b5SRichard Lowe  * modifiers, then the current global field modifiers are not inherited.
95*101e15b5SRichard Lowe  */
96*101e15b5SRichard Lowe static int
field_spec_has_modifiers(char * C,int length)97*101e15b5SRichard Lowe field_spec_has_modifiers(char *C, int length)
98*101e15b5SRichard Lowe {
99*101e15b5SRichard Lowe 	int p_nonmodifiers = strspn(C, ",.1234567890");
100*101e15b5SRichard Lowe 
101*101e15b5SRichard Lowe 	if (p_nonmodifiers == length)
102*101e15b5SRichard Lowe 		return (0);
103*101e15b5SRichard Lowe 
104*101e15b5SRichard Lowe 	return (1);
105*101e15b5SRichard Lowe }
106*101e15b5SRichard Lowe 
107*101e15b5SRichard Lowe static void
field_apply_all(field_t * fc,flag_t flags)108*101e15b5SRichard Lowe field_apply_all(field_t *fc, flag_t flags)
109*101e15b5SRichard Lowe {
110*101e15b5SRichard Lowe 	field_t *f;
111*101e15b5SRichard Lowe 
112*101e15b5SRichard Lowe 	for (f = fc; f; f = f->f_next)
113*101e15b5SRichard Lowe 		if ((f->f_options & FIELD_MODIFIERS_DEFINED) == 0)
114*101e15b5SRichard Lowe 			f->f_options |= flags;
115*101e15b5SRichard Lowe }
116*101e15b5SRichard Lowe 
117*101e15b5SRichard Lowe static int
parse_field_spec(field_t * F,char * C,int flags,int length)118*101e15b5SRichard Lowe parse_field_spec(field_t *F, char *C, int flags, int length)
119*101e15b5SRichard Lowe {
120*101e15b5SRichard Lowe 	int p_period = MIN(length, strcspn(C, "."));
121*101e15b5SRichard Lowe 	int p_modifiers = MIN(length, strspn(C, ".1234567890"));
122*101e15b5SRichard Lowe 	int p_boundary = MIN(p_period, p_modifiers);
123*101e15b5SRichard Lowe 	int field = 0;
124*101e15b5SRichard Lowe 	int offset = 0;
125*101e15b5SRichard Lowe 	int offset_seen = 0;
126*101e15b5SRichard Lowe 	int i;
127*101e15b5SRichard Lowe 	int blanks_flag = 0;
128*101e15b5SRichard Lowe 
129*101e15b5SRichard Lowe 	for (i = 0; i < p_boundary; i++) {
130*101e15b5SRichard Lowe 		if (isdigit((uchar_t)C[i]))
131*101e15b5SRichard Lowe 			field = (10 * field) + (C[i] - '0');
132*101e15b5SRichard Lowe 		else
133*101e15b5SRichard Lowe 			return (1);
134*101e15b5SRichard Lowe 	}
135*101e15b5SRichard Lowe 
136*101e15b5SRichard Lowe 	if (p_period < p_modifiers) {
137*101e15b5SRichard Lowe 		for (i = p_period + 1; i < p_modifiers; i++) {
138*101e15b5SRichard Lowe 			if (isdigit((uchar_t)C[i])) {
139*101e15b5SRichard Lowe 				offset_seen++;
140*101e15b5SRichard Lowe 				offset = (10 * offset) + (C[i] - '0');
141*101e15b5SRichard Lowe 			} else {
142*101e15b5SRichard Lowe 				return (1);
143*101e15b5SRichard Lowe 			}
144*101e15b5SRichard Lowe 		}
145*101e15b5SRichard Lowe 	}
146*101e15b5SRichard Lowe 
147*101e15b5SRichard Lowe 	if (p_modifiers < length) {
148*101e15b5SRichard Lowe 		for (i = p_modifiers; i < length; i++) {
149*101e15b5SRichard Lowe 			switch (C[i]) {
150*101e15b5SRichard Lowe 				case 'b':
151*101e15b5SRichard Lowe 					blanks_flag = 1;
152*101e15b5SRichard Lowe 					break;
153*101e15b5SRichard Lowe 				case 'd':
154*101e15b5SRichard Lowe 					F->f_options |= FIELD_DICTIONARY_ORDER;
155*101e15b5SRichard Lowe 					break;
156*101e15b5SRichard Lowe 				case 'f':
157*101e15b5SRichard Lowe 					F->f_options |= FIELD_FOLD_UPPERCASE;
158*101e15b5SRichard Lowe 					break;
159*101e15b5SRichard Lowe 				case 'i':
160*101e15b5SRichard Lowe 					F->f_options |=
161*101e15b5SRichard Lowe 					    FIELD_IGNORE_NONPRINTABLES;
162*101e15b5SRichard Lowe 					break;
163*101e15b5SRichard Lowe 				case 'M':
164*101e15b5SRichard Lowe 					F->f_species = MONTH;
165*101e15b5SRichard Lowe 					break;
166*101e15b5SRichard Lowe 				case 'n':
167*101e15b5SRichard Lowe 					F->f_species = NUMERIC;
168*101e15b5SRichard Lowe 					break;
169*101e15b5SRichard Lowe 				case 'r':
170*101e15b5SRichard Lowe 					F->f_options |=
171*101e15b5SRichard Lowe 					    FIELD_REVERSE_COMPARISONS;
172*101e15b5SRichard Lowe 					break;
173*101e15b5SRichard Lowe 				default:
174*101e15b5SRichard Lowe 					usage();
175*101e15b5SRichard Lowe 					break;
176*101e15b5SRichard Lowe 			}
177*101e15b5SRichard Lowe 		}
178*101e15b5SRichard Lowe 	}
179*101e15b5SRichard Lowe 
180*101e15b5SRichard Lowe 	if (flags & OPTIONS_STARTSPEC) {
181*101e15b5SRichard Lowe 		F->f_start_field = field;
182*101e15b5SRichard Lowe 		F->f_start_offset = offset;
183*101e15b5SRichard Lowe 		if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC) {
184*101e15b5SRichard Lowe 			F->f_start_field--;
185*101e15b5SRichard Lowe 			if (offset_seen)
186*101e15b5SRichard Lowe 				F->f_start_offset--;
187*101e15b5SRichard Lowe 		}
188*101e15b5SRichard Lowe 		F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_START : 0;
189*101e15b5SRichard Lowe 	} else {
190*101e15b5SRichard Lowe 		F->f_end_field = field;
191*101e15b5SRichard Lowe 		F->f_end_offset = offset;
192*101e15b5SRichard Lowe 		if ((flags & OPTIONS_OLDSPEC) != OPTIONS_OLDSPEC &&
193*101e15b5SRichard Lowe 		    offset_seen && offset != 0)
194*101e15b5SRichard Lowe 			F->f_end_field--;
195*101e15b5SRichard Lowe 		F->f_options |= blanks_flag ? FIELD_IGNORE_BLANKS_END : 0;
196*101e15b5SRichard Lowe 	}
197*101e15b5SRichard Lowe 
198*101e15b5SRichard Lowe 	return (0);
199*101e15b5SRichard Lowe }
200*101e15b5SRichard Lowe 
201*101e15b5SRichard Lowe static void
parse_new_field_spec(sort_t * S,char * arg)202*101e15b5SRichard Lowe parse_new_field_spec(sort_t *S, char *arg)
203*101e15b5SRichard Lowe {
204*101e15b5SRichard Lowe 	int length = strlen(arg);
205*101e15b5SRichard Lowe 	int p_comma = MIN(length, strcspn(arg, ","));
206*101e15b5SRichard Lowe 	field_t *nF;
207*101e15b5SRichard Lowe 	int p;
208*101e15b5SRichard Lowe 
209*101e15b5SRichard Lowe 	/*
210*101e15b5SRichard Lowe 	 * New field specifiers do not inherit from the general specifier if
211*101e15b5SRichard Lowe 	 * they have any modifiers set.  (This is specifically tested in the VSC
212*101e15b5SRichard Lowe 	 * test suite, assertion 32 for POSIX.cmd/sort.)
213*101e15b5SRichard Lowe 	 */
214*101e15b5SRichard Lowe 	if (field_spec_has_modifiers(arg, length)) {
215*101e15b5SRichard Lowe 		nF = field_new(NULL);
216*101e15b5SRichard Lowe 		nF->f_options = FIELD_MODIFIERS_DEFINED;
217*101e15b5SRichard Lowe 	} else {
218*101e15b5SRichard Lowe 		nF = field_new(S);
219*101e15b5SRichard Lowe 	}
220*101e15b5SRichard Lowe 	p = parse_field_spec(nF, arg, OPTIONS_STARTSPEC, p_comma);
221*101e15b5SRichard Lowe 
222*101e15b5SRichard Lowe 	if (p != 0)
223*101e15b5SRichard Lowe 		usage();
224*101e15b5SRichard Lowe 
225*101e15b5SRichard Lowe 	if (p_comma < length) {
226*101e15b5SRichard Lowe 		p = parse_field_spec(nF, &(arg[p_comma + 1]), 0,
227*101e15b5SRichard Lowe 		    strlen(&(arg[p_comma + 1])));
228*101e15b5SRichard Lowe 		if (p != 0)
229*101e15b5SRichard Lowe 			usage();
230*101e15b5SRichard Lowe 	}
231*101e15b5SRichard Lowe 
232*101e15b5SRichard Lowe 	if (nF->f_start_field < 0 || nF->f_start_offset < 0) {
233*101e15b5SRichard Lowe 		if (S->m_verbose)
234*101e15b5SRichard Lowe 			warn("-k %s is not a supported field specifier\n", arg);
235*101e15b5SRichard Lowe 	}
236*101e15b5SRichard Lowe 	nF->f_start_field = MAX(nF->f_start_field, 0);
237*101e15b5SRichard Lowe 	nF->f_start_offset = MAX(nF->f_start_offset, 0);
238*101e15b5SRichard Lowe 
239*101e15b5SRichard Lowe 	/*
240*101e15b5SRichard Lowe 	 * If the starting field exceeds a defined ending field, convention
241*101e15b5SRichard Lowe 	 * dictates that the field is ignored.
242*101e15b5SRichard Lowe 	 */
243*101e15b5SRichard Lowe 	if (nF->f_end_field == -1 || nF->f_start_field < nF->f_end_field ||
244*101e15b5SRichard Lowe 	    (nF->f_start_field == nF->f_end_field &&
245*101e15b5SRichard Lowe 	    nF->f_start_offset < nF->f_end_offset)) {
246*101e15b5SRichard Lowe 		field_add_to_chain(&(S->m_fields_head), nF);
247*101e15b5SRichard Lowe 	} else if (S->m_verbose) {
248*101e15b5SRichard Lowe 		warn("illegal field -k %s omitted", arg);
249*101e15b5SRichard Lowe 	}
250*101e15b5SRichard Lowe }
251*101e15b5SRichard Lowe 
252*101e15b5SRichard Lowe /*
253*101e15b5SRichard Lowe  * parse_old_field_spec() is getopt()-aware; it may modify the values of optind,
254*101e15b5SRichard Lowe  * optarg, and so forth, to correctly determine the characteristics being
255*101e15b5SRichard Lowe  * assigned to the current field.
256*101e15b5SRichard Lowe  */
257*101e15b5SRichard Lowe static int
parse_old_field_spec(sort_t * S,int argc,char * argv[])258*101e15b5SRichard Lowe parse_old_field_spec(sort_t *S, int argc, char *argv[])
259*101e15b5SRichard Lowe {
260*101e15b5SRichard Lowe 	field_t *nF;
261*101e15b5SRichard Lowe 	int c, p;
262*101e15b5SRichard Lowe 	char *arg = argv[optind];
263*101e15b5SRichard Lowe 
264*101e15b5SRichard Lowe 	if (field_spec_has_modifiers(arg + 1, strlen(arg + 1))) {
265*101e15b5SRichard Lowe 		nF = field_new(NULL);
266*101e15b5SRichard Lowe 		nF->f_options = FIELD_MODIFIERS_DEFINED;
267*101e15b5SRichard Lowe 	} else {
268*101e15b5SRichard Lowe 		nF = field_new(S);
269*101e15b5SRichard Lowe 	}
270*101e15b5SRichard Lowe 
271*101e15b5SRichard Lowe 	p = parse_field_spec(nF, arg + 1, OPTIONS_OLDSPEC | OPTIONS_STARTSPEC,
272*101e15b5SRichard Lowe 	    strlen(arg + 1));
273*101e15b5SRichard Lowe 
274*101e15b5SRichard Lowe 	if (p != 0) {
275*101e15b5SRichard Lowe 		field_delete(nF);
276*101e15b5SRichard Lowe 		return (0);
277*101e15b5SRichard Lowe 	}
278*101e15b5SRichard Lowe 
279*101e15b5SRichard Lowe 	/*
280*101e15b5SRichard Lowe 	 * In the case that getopt() returns '?' (unrecognized option) or EOF
281*101e15b5SRichard Lowe 	 * (non-option argument), the field is considered closed.
282*101e15b5SRichard Lowe 	 */
283*101e15b5SRichard Lowe 	for (arg = argv[++optind]; optind < argc; arg = argv[optind]) {
284*101e15b5SRichard Lowe 		if (strlen(arg) >= 2 && *arg == '-' &&
285*101e15b5SRichard Lowe 		    isdigit(*(uchar_t *)(arg + 1))) {
286*101e15b5SRichard Lowe 			(void) parse_field_spec(nF, arg + 1,
287*101e15b5SRichard Lowe 			    OPTIONS_OLDSPEC, strlen(arg) - 1);
288*101e15b5SRichard Lowe 			field_add_to_chain(&(S->m_fields_head), nF);
289*101e15b5SRichard Lowe 			optind++;
290*101e15b5SRichard Lowe 			return (1);
291*101e15b5SRichard Lowe 		}
292*101e15b5SRichard Lowe 
293*101e15b5SRichard Lowe 		if ((c = getopt(argc, argv, OLD_SPEC_OPTIONS_STRING)) != EOF) {
294*101e15b5SRichard Lowe 			switch (c) {
295*101e15b5SRichard Lowe 			case 'b':
296*101e15b5SRichard Lowe 				nF->f_options |= FIELD_IGNORE_BLANKS_START;
297*101e15b5SRichard Lowe 				break;
298*101e15b5SRichard Lowe 			case 'd':
299*101e15b5SRichard Lowe 				nF->f_options |= FIELD_DICTIONARY_ORDER;
300*101e15b5SRichard Lowe 				break;
301*101e15b5SRichard Lowe 			case 'f':
302*101e15b5SRichard Lowe 				nF->f_options |= FIELD_FOLD_UPPERCASE;
303*101e15b5SRichard Lowe 				break;
304*101e15b5SRichard Lowe 			case 'i':
305*101e15b5SRichard Lowe 				nF->f_options |= FIELD_IGNORE_NONPRINTABLES;
306*101e15b5SRichard Lowe 				break;
307*101e15b5SRichard Lowe 			case 'M':
308*101e15b5SRichard Lowe 				nF->f_species = MONTH;
309*101e15b5SRichard Lowe 				break;
310*101e15b5SRichard Lowe 			case 'n':
311*101e15b5SRichard Lowe 				nF->f_species = NUMERIC;
312*101e15b5SRichard Lowe 				break;
313*101e15b5SRichard Lowe 			case 'r':
314*101e15b5SRichard Lowe 				nF->f_options |= FIELD_REVERSE_COMPARISONS;
315*101e15b5SRichard Lowe 				break;
316*101e15b5SRichard Lowe 			case '?':
317*101e15b5SRichard Lowe 			case 'c':
318*101e15b5SRichard Lowe 			case 'm':
319*101e15b5SRichard Lowe 			case 'u':
320*101e15b5SRichard Lowe 				/*
321*101e15b5SRichard Lowe 				 * Options without arguments.
322*101e15b5SRichard Lowe 				 */
323*101e15b5SRichard Lowe 				optind -= 1;
324*101e15b5SRichard Lowe 				field_add_to_chain(&(S->m_fields_head), nF);
325*101e15b5SRichard Lowe 				return (1);
326*101e15b5SRichard Lowe 				/*NOTREACHED*/
327*101e15b5SRichard Lowe 			case 'o':
328*101e15b5SRichard Lowe 			case 'T':
329*101e15b5SRichard Lowe 			case 'z':
330*101e15b5SRichard Lowe 			case 't':
331*101e15b5SRichard Lowe 			case 'k':
332*101e15b5SRichard Lowe 			case 'S':
333*101e15b5SRichard Lowe 				/*
334*101e15b5SRichard Lowe 				 * Options with arguments.
335*101e15b5SRichard Lowe 				 */
336*101e15b5SRichard Lowe 				if (optarg == argv[optind - 1] + 2) {
337*101e15b5SRichard Lowe 					optind -= 1;
338*101e15b5SRichard Lowe 				} else {
339*101e15b5SRichard Lowe 					optind -= 2;
340*101e15b5SRichard Lowe 				}
341*101e15b5SRichard Lowe 				field_add_to_chain(&(S->m_fields_head), nF);
342*101e15b5SRichard Lowe 				return (1);
343*101e15b5SRichard Lowe 				/*NOTREACHED*/
344*101e15b5SRichard Lowe 			default:
345*101e15b5SRichard Lowe 				die(EMSG_UNKN_OPTION);
346*101e15b5SRichard Lowe 				/*NOTREACHED*/
347*101e15b5SRichard Lowe 			}
348*101e15b5SRichard Lowe 		} else {
349*101e15b5SRichard Lowe 			break;
350*101e15b5SRichard Lowe 		}
351*101e15b5SRichard Lowe 	}
352*101e15b5SRichard Lowe 
353*101e15b5SRichard Lowe 	field_add_to_chain(&(S->m_fields_head), nF);
354*101e15b5SRichard Lowe 	return (1);
355*101e15b5SRichard Lowe }
356*101e15b5SRichard Lowe 
357*101e15b5SRichard Lowe int
options(sort_t * S,int argc,char * argv[])358*101e15b5SRichard Lowe options(sort_t *S, int argc, char *argv[])
359*101e15b5SRichard Lowe {
360*101e15b5SRichard Lowe 	int c;
361*101e15b5SRichard Lowe 
362*101e15b5SRichard Lowe 	optind = 1;
363*101e15b5SRichard Lowe 	while (optind < argc) {
364*101e15b5SRichard Lowe 		if (strncmp("-y", argv[optind], strlen("-y")) == 0) {
365*101e15b5SRichard Lowe 			/*
366*101e15b5SRichard Lowe 			 * The -y [kmem] option violates the standard syntax
367*101e15b5SRichard Lowe 			 * outlined in intro(1).  we have to be a little fancy
368*101e15b5SRichard Lowe 			 * to determine if the next argument is a valid integer.
369*101e15b5SRichard Lowe 			 * (note, of course, that the previous sort(1) had no
370*101e15b5SRichard Lowe 			 * mechanism to resolve a final
371*101e15b5SRichard Lowe 			 *	-y 99999
372*101e15b5SRichard Lowe 			 * into
373*101e15b5SRichard Lowe 			 *	-y, file 99999
374*101e15b5SRichard Lowe 			 * or
375*101e15b5SRichard Lowe 			 *	-y 99999, file stdin
376*101e15b5SRichard Lowe 			 *
377*101e15b5SRichard Lowe 			 * Now one can unambiguously use
378*101e15b5SRichard Lowe 			 *	-y -- 99999
379*101e15b5SRichard Lowe 			 * and
380*101e15b5SRichard Lowe 			 *	-y 99999 -
381*101e15b5SRichard Lowe 			 * to distinguish these cases.
382*101e15b5SRichard Lowe 			 *
383*101e15b5SRichard Lowe 			 * That said, we do not use the information passed using
384*101e15b5SRichard Lowe 			 * -y option in sort(1); we provide the argument to
385*101e15b5SRichard Lowe 			 * preserve compatibility for existing scripts.
386*101e15b5SRichard Lowe 			 */
387*101e15b5SRichard Lowe 			if (strlen(argv[optind]) == strlen("-y") &&
388*101e15b5SRichard Lowe 			    optind + 1 < argc &&
389*101e15b5SRichard Lowe 			    is_number(argv[optind + 1]))
390*101e15b5SRichard Lowe 				optind += 2;
391*101e15b5SRichard Lowe 			else
392*101e15b5SRichard Lowe 				optind += 1;
393*101e15b5SRichard Lowe 		}
394*101e15b5SRichard Lowe 
395*101e15b5SRichard Lowe 		if ((c = getopt(argc, argv, OPTIONS_STRING)) != EOF) {
396*101e15b5SRichard Lowe 			switch (c) {
397*101e15b5SRichard Lowe 			case 'c':
398*101e15b5SRichard Lowe 				S->m_check_if_sorted_only = 1;
399*101e15b5SRichard Lowe 				break;
400*101e15b5SRichard Lowe 
401*101e15b5SRichard Lowe 			case 'm':
402*101e15b5SRichard Lowe 				S->m_merge_only = 1;
403*101e15b5SRichard Lowe 				break;
404*101e15b5SRichard Lowe 
405*101e15b5SRichard Lowe 			case 'u':
406*101e15b5SRichard Lowe 				S->m_unique_lines = 1;
407*101e15b5SRichard Lowe 				break;
408*101e15b5SRichard Lowe 
409*101e15b5SRichard Lowe 			case 'o':
410*101e15b5SRichard Lowe 				S->m_output_filename = optarg;
411*101e15b5SRichard Lowe 				break;
412*101e15b5SRichard Lowe 
413*101e15b5SRichard Lowe 			case 'T':
414*101e15b5SRichard Lowe 				S->m_tmpdir_template = optarg;
415*101e15b5SRichard Lowe 				break;
416*101e15b5SRichard Lowe 
417*101e15b5SRichard Lowe 			case 'z':
418*101e15b5SRichard Lowe 				/*
419*101e15b5SRichard Lowe 				 * ignore optarg -- obsolete
420*101e15b5SRichard Lowe 				 */
421*101e15b5SRichard Lowe 				break;
422*101e15b5SRichard Lowe 
423*101e15b5SRichard Lowe 			case 'd':
424*101e15b5SRichard Lowe 				S->m_field_options |= FIELD_DICTIONARY_ORDER;
425*101e15b5SRichard Lowe 				field_apply_all(S->m_fields_head,
426*101e15b5SRichard Lowe 				    FIELD_DICTIONARY_ORDER);
427*101e15b5SRichard Lowe 				break;
428*101e15b5SRichard Lowe 
429*101e15b5SRichard Lowe 			case 'f':
430*101e15b5SRichard Lowe 				S->m_field_options |= FIELD_FOLD_UPPERCASE;
431*101e15b5SRichard Lowe 				field_apply_all(S->m_fields_head,
432*101e15b5SRichard Lowe 				    FIELD_FOLD_UPPERCASE);
433*101e15b5SRichard Lowe 				break;
434*101e15b5SRichard Lowe 
435*101e15b5SRichard Lowe 			case 'i':
436*101e15b5SRichard Lowe 				S->m_field_options |=
437*101e15b5SRichard Lowe 				    FIELD_IGNORE_NONPRINTABLES;
438*101e15b5SRichard Lowe 				field_apply_all(S->m_fields_head,
439*101e15b5SRichard Lowe 				    FIELD_IGNORE_NONPRINTABLES);
440*101e15b5SRichard Lowe 				break;
441*101e15b5SRichard Lowe 
442*101e15b5SRichard Lowe 			case 'M':
443*101e15b5SRichard Lowe 				S->m_default_species = MONTH;
444*101e15b5SRichard Lowe 				S->m_field_options &=
445*101e15b5SRichard Lowe 				    ~FIELD_IGNORE_BLANKS_START;
446*101e15b5SRichard Lowe 				break;
447*101e15b5SRichard Lowe 
448*101e15b5SRichard Lowe 			case 'n':
449*101e15b5SRichard Lowe 				S->m_default_species = NUMERIC;
450*101e15b5SRichard Lowe 				{
451*101e15b5SRichard Lowe 					field_t *f;
452*101e15b5SRichard Lowe 
453*101e15b5SRichard Lowe 					for (f = S->m_fields_head; f;
454*101e15b5SRichard Lowe 					    f = f->f_next)
455*101e15b5SRichard Lowe 						if ((f->f_options &
456*101e15b5SRichard Lowe 						    FIELD_MODIFIERS_DEFINED) ==
457*101e15b5SRichard Lowe 						    0)
458*101e15b5SRichard Lowe 							f->f_species = NUMERIC;
459*101e15b5SRichard Lowe 				}
460*101e15b5SRichard Lowe 				break;
461*101e15b5SRichard Lowe 
462*101e15b5SRichard Lowe 			case 'b':
463*101e15b5SRichard Lowe 				S->m_field_options |=
464*101e15b5SRichard Lowe 				    FIELD_IGNORE_BLANKS_START |
465*101e15b5SRichard Lowe 				    FIELD_IGNORE_BLANKS_END;
466*101e15b5SRichard Lowe 				break;
467*101e15b5SRichard Lowe 
468*101e15b5SRichard Lowe 			case 'r':
469*101e15b5SRichard Lowe 				S->m_field_options |=
470*101e15b5SRichard Lowe 				    FIELD_REVERSE_COMPARISONS;
471*101e15b5SRichard Lowe 				field_apply_all(S->m_fields_head,
472*101e15b5SRichard Lowe 				    FIELD_REVERSE_COMPARISONS);
473*101e15b5SRichard Lowe 				break;
474*101e15b5SRichard Lowe 
475*101e15b5SRichard Lowe 			case 't':
476*101e15b5SRichard Lowe 				/*
477*101e15b5SRichard Lowe 				 * delimiter
478*101e15b5SRichard Lowe 				 */
479*101e15b5SRichard Lowe 				if (S->m_single_byte_locale) {
480*101e15b5SRichard Lowe 					/*
481*101e15b5SRichard Lowe 					 * Most debuggers can't take tabs as
482*101e15b5SRichard Lowe 					 * input arguments, so we provide an
483*101e15b5SRichard Lowe 					 * escape sequence to allow testing of
484*101e15b5SRichard Lowe 					 * this special case for the DEBUG
485*101e15b5SRichard Lowe 					 * version.
486*101e15b5SRichard Lowe 					 */
487*101e15b5SRichard Lowe 					S->m_field_separator.sc =
488*101e15b5SRichard Lowe #ifdef DEBUG
489*101e15b5SRichard Lowe 					    xstreql(optarg, "\\t") ? '\t' :
490*101e15b5SRichard Lowe #endif
491*101e15b5SRichard Lowe 					    optarg[0];
492*101e15b5SRichard Lowe 				} else
493*101e15b5SRichard Lowe 					(void) mbtowc(&S->m_field_separator.wc,
494*101e15b5SRichard Lowe 					    optarg, MB_CUR_MAX);
495*101e15b5SRichard Lowe 				break;
496*101e15b5SRichard Lowe 
497*101e15b5SRichard Lowe 			case 'k':
498*101e15b5SRichard Lowe 				/*
499*101e15b5SRichard Lowe 				 * key
500*101e15b5SRichard Lowe 				 */
501*101e15b5SRichard Lowe 				(void) parse_new_field_spec(S, optarg);
502*101e15b5SRichard Lowe 				break;
503*101e15b5SRichard Lowe 
504*101e15b5SRichard Lowe 			case 'S':
505*101e15b5SRichard Lowe 				S->m_memory_limit = strtomem(optarg);
506*101e15b5SRichard Lowe #ifdef DEBUG
507*101e15b5SRichard Lowe 				(void) fprintf(stderr, CMDNAME
508*101e15b5SRichard Lowe 				    ": limiting size to %d bytes\n",
509*101e15b5SRichard Lowe 				    S->m_memory_limit);
510*101e15b5SRichard Lowe #endif /* DEBUG */
511*101e15b5SRichard Lowe 				break;
512*101e15b5SRichard Lowe 
513*101e15b5SRichard Lowe 			/*
514*101e15b5SRichard Lowe 			 * We never take a naked -999; these should always be
515*101e15b5SRichard Lowe 			 * associated with a preceding +000.
516*101e15b5SRichard Lowe 			 */
517*101e15b5SRichard Lowe 			case '0':
518*101e15b5SRichard Lowe 			case '1':
519*101e15b5SRichard Lowe 			case '2':
520*101e15b5SRichard Lowe 			case '3':
521*101e15b5SRichard Lowe 			case '4':
522*101e15b5SRichard Lowe 			case '5':
523*101e15b5SRichard Lowe 			case '6':
524*101e15b5SRichard Lowe 			case '7':
525*101e15b5SRichard Lowe 			case '8':
526*101e15b5SRichard Lowe 			case '9':
527*101e15b5SRichard Lowe 				usage();
528*101e15b5SRichard Lowe 				break;
529*101e15b5SRichard Lowe 			case '?':
530*101e15b5SRichard Lowe 				/* error case */
531*101e15b5SRichard Lowe 				usage();
532*101e15b5SRichard Lowe 				break;
533*101e15b5SRichard Lowe 			}
534*101e15b5SRichard Lowe 
535*101e15b5SRichard Lowe 			/*
536*101e15b5SRichard Lowe 			 * Go back for next argument.
537*101e15b5SRichard Lowe 			 */
538*101e15b5SRichard Lowe 			continue;
539*101e15b5SRichard Lowe 		}
540*101e15b5SRichard Lowe 
541*101e15b5SRichard Lowe 		/*
542*101e15b5SRichard Lowe 		 * There are three (interpretable) possibilities for getopt() to
543*101e15b5SRichard Lowe 		 * return EOF with arguments on the command line: we have seen
544*101e15b5SRichard Lowe 		 * the "end-of-options" token, --, we have encountered the
545*101e15b5SRichard Lowe 		 * old-style field definition, +NNN, or we have found a
546*101e15b5SRichard Lowe 		 * filename.
547*101e15b5SRichard Lowe 		 *
548*101e15b5SRichard Lowe 		 * In the second case, we must also search for the optional -NNN
549*101e15b5SRichard Lowe 		 * field terminal definition.  (since "+joe", for instance, is
550*101e15b5SRichard Lowe 		 * a valid filename, we must handle this pattern as well.)  This
551*101e15b5SRichard Lowe 		 * is performed by parse_old_field_spec().
552*101e15b5SRichard Lowe 		 */
553*101e15b5SRichard Lowe 		if (xstreql(argv[optind - 1], "--")) {
554*101e15b5SRichard Lowe 			/*
555*101e15b5SRichard Lowe 			 * Process all arguments following end-of-options token
556*101e15b5SRichard Lowe 			 * as filenames.
557*101e15b5SRichard Lowe 			 */
558*101e15b5SRichard Lowe 			while (optind < argc) {
559*101e15b5SRichard Lowe 				if (xstreql(argv[optind], "-"))
560*101e15b5SRichard Lowe 					S->m_input_from_stdin = 1;
561*101e15b5SRichard Lowe 				else
562*101e15b5SRichard Lowe 					stream_add_file_to_chain(
563*101e15b5SRichard Lowe 					    &(S->m_input_streams),
564*101e15b5SRichard Lowe 					    argv[optind]);
565*101e15b5SRichard Lowe 				optind++;
566*101e15b5SRichard Lowe 			}
567*101e15b5SRichard Lowe 
568*101e15b5SRichard Lowe 			break;
569*101e15b5SRichard Lowe 		}
570*101e15b5SRichard Lowe 
571*101e15b5SRichard Lowe 		if (optind < argc) {
572*101e15b5SRichard Lowe 			if (xstreql(argv[optind], "-")) {
573*101e15b5SRichard Lowe 				S->m_input_from_stdin = 1;
574*101e15b5SRichard Lowe 				optind++;
575*101e15b5SRichard Lowe 			} else if (*(argv[optind]) != '+' ||
576*101e15b5SRichard Lowe 			    !parse_old_field_spec(S, argc, argv)) {
577*101e15b5SRichard Lowe 				/*
578*101e15b5SRichard Lowe 				 * It's a filename, because it either doesn't
579*101e15b5SRichard Lowe 				 * start with '+', or if it did, it wasn't an
580*101e15b5SRichard Lowe 				 * actual field specifier.
581*101e15b5SRichard Lowe 				 */
582*101e15b5SRichard Lowe 				stream_add_file_to_chain(&(S->m_input_streams),
583*101e15b5SRichard Lowe 				    argv[optind]);
584*101e15b5SRichard Lowe 				optind++;
585*101e15b5SRichard Lowe 			}
586*101e15b5SRichard Lowe 		}
587*101e15b5SRichard Lowe 	}
588*101e15b5SRichard Lowe 
589*101e15b5SRichard Lowe 	if (S->m_input_streams == NULL)
590*101e15b5SRichard Lowe 		S->m_input_from_stdin = 1;
591*101e15b5SRichard Lowe 
592*101e15b5SRichard Lowe 	if (S->m_output_filename == NULL)
593*101e15b5SRichard Lowe 		S->m_output_to_stdout = 1;
594*101e15b5SRichard Lowe 
595*101e15b5SRichard Lowe 	/*
596*101e15b5SRichard Lowe 	 * If no fields, then one great field.  However, if the -b option was
597*101e15b5SRichard Lowe 	 * set globally, be sure to ignore it, as per UNIX98.
598*101e15b5SRichard Lowe 	 */
599*101e15b5SRichard Lowe 	if (S->m_fields_head == NULL) {
600*101e15b5SRichard Lowe 		S->m_field_options &= ~FIELD_IGNORE_BLANKS_START;
601*101e15b5SRichard Lowe 
602*101e15b5SRichard Lowe 		(void) parse_new_field_spec(S, "1");
603*101e15b5SRichard Lowe 		/*
604*101e15b5SRichard Lowe 		 * "Entire line" fast path is only valid if no delimiter has
605*101e15b5SRichard Lowe 		 * been set and no modifiers have been applied.
606*101e15b5SRichard Lowe 		 */
607*101e15b5SRichard Lowe 		if (S->m_field_separator.wc == 0 &&
608*101e15b5SRichard Lowe 		    S->m_default_species == ALPHA &&
609*101e15b5SRichard Lowe 		    S->m_field_options == 0)
610*101e15b5SRichard Lowe 			S->m_entire_line = 1;
611*101e15b5SRichard Lowe 	}
612*101e15b5SRichard Lowe 
613*101e15b5SRichard Lowe 	return (0);
614*101e15b5SRichard Lowe }
615