xref: /openbsd/usr.bin/sort/sort.c (revision 3aaa63eb)
1 /*	$OpenBSD: sort.c,v 1.90 2019/06/28 13:35:03 deraadt Exp $	*/
2 
3 /*-
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/resource.h>
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34 
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <md5.h>
40 #include <regex.h>
41 #include <signal.h>
42 #include <stdbool.h>
43 #include <stdint.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <wchar.h>
49 #include <wctype.h>
50 
51 #include "coll.h"
52 #include "file.h"
53 #include "sort.h"
54 
55 #ifdef GNUSORT_COMPATIBILITY
56 # define PERMUTE	""
57 #else
58 # define PERMUTE	"+"
59 #endif
60 #define	OPTIONS	PERMUTE"bCcdfgHhik:Mmno:RrS:st:T:uVz"
61 
62 static bool need_random;
63 static const char *random_source;
64 
65 MD5_CTX md5_ctx;
66 
67 struct sort_opts sort_opts_vals;
68 
69 bool debug_sort;
70 bool need_hint;
71 
72 static struct sort_mods default_sort_mods_object;
73 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
74 
75 /*
76  * Arguments from file (when file0-from option is used:
77  */
78 static size_t argc_from_file0 = (size_t)-1;
79 static char **argv_from_file0;
80 
81 /*
82  * Placeholder symbols for options which have no single-character equivalent
83  */
84 enum {
85 	SORT_OPT = CHAR_MAX + 1,
86 	HELP_OPT,
87 	FF_OPT,
88 	BS_OPT,
89 	VERSION_OPT,
90 	DEBUG_OPT,
91 	RANDOMSOURCE_OPT,
92 	COMPRESSPROGRAM_OPT,
93 	QSORT_OPT,
94 	HEAPSORT_OPT,
95 	RADIXSORT_OPT,
96 	MMAP_OPT
97 };
98 
99 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
100 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
101 
102 static const struct option long_options[] = {
103     { "batch-size", required_argument, NULL, BS_OPT },
104     { "buffer-size", required_argument, NULL, 'S' },
105     { "check", optional_argument, NULL, 'c' },
106     { "check=silent|quiet", optional_argument, NULL, 'C' },
107     { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
108     { "debug", no_argument, NULL, DEBUG_OPT },
109     { "dictionary-order", no_argument, NULL, 'd' },
110     { "field-separator", required_argument, NULL, 't' },
111     { "files0-from", required_argument, NULL, FF_OPT },
112     { "general-numeric-sort", no_argument, NULL, 'g' },
113     { "heapsort", no_argument, NULL, HEAPSORT_OPT },
114     { "help", no_argument, NULL, HELP_OPT },
115     { "human-numeric-sort", no_argument, NULL, 'h' },
116     { "ignore-leading-blanks", no_argument, NULL, 'b' },
117     { "ignore-case", no_argument, NULL, 'f' },
118     { "ignore-nonprinting", no_argument, NULL, 'i' },
119     { "key", required_argument, NULL, 'k' },
120     { "merge", no_argument, NULL, 'm' },
121     { "mergesort", no_argument, NULL, 'H' },
122     { "mmap", no_argument, NULL, MMAP_OPT },
123     { "month-sort", no_argument, NULL, 'M' },
124     { "numeric-sort", no_argument, NULL, 'n' },
125     { "output", required_argument, NULL, 'o' },
126     { "qsort", no_argument, NULL, QSORT_OPT },
127     { "radixsort", no_argument, NULL, RADIXSORT_OPT },
128     { "random-sort", no_argument, NULL, 'R' },
129     { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
130     { "reverse", no_argument, NULL, 'r' },
131     { "sort", required_argument, NULL, SORT_OPT },
132     { "stable", no_argument, NULL, 's' },
133     { "temporary-directory", required_argument, NULL, 'T' },
134     { "unique", no_argument, NULL, 'u' },
135     { "version", no_argument, NULL, VERSION_OPT },
136     { "version-sort", no_argument, NULL, 'V' },
137     { "zero-terminated", no_argument, NULL, 'z' },
138     { NULL, no_argument, NULL, 0 }
139 };
140 
141 /*
142  * Check where sort modifier is present
143  */
144 static bool
sort_modifier_empty(struct sort_mods * sm)145 sort_modifier_empty(struct sort_mods *sm)
146 {
147 	return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
148 	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag);
149 }
150 
151 /*
152  * Print out usage text.
153  */
154 static __dead void
usage(int exit_val)155 usage(int exit_val)
156 {
157 	fprintf(exit_val ? stderr : stdout,
158 	    "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] "
159 	    "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname());
160 	exit(exit_val);
161 }
162 
163 /*
164  * Read input file names from a file (file0-from option).
165  */
166 static void
read_fns_from_file0(const char * fn)167 read_fns_from_file0(const char *fn)
168 {
169 	FILE *f;
170 	char *line = NULL;
171 	size_t linesize = 0;
172 	ssize_t linelen;
173 
174 	f = fopen(fn, "r");
175 	if (f == NULL)
176 		err(2, "%s", fn);
177 
178 	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
179 		if (*line != '\0') {
180 			if (argc_from_file0 == (size_t)-1)
181 				argc_from_file0 = 0;
182 			++argc_from_file0;
183 			argv_from_file0 = sort_reallocarray(argv_from_file0,
184 			    argc_from_file0, sizeof(char *));
185 			argv_from_file0[argc_from_file0 - 1] = line;
186 		} else {
187 			free(line);
188 		}
189 		line = NULL;
190 		linesize = 0;
191 	}
192 	if (ferror(f))
193 		err(2, "%s: getdelim", fn);
194 
195 	closefile(f, fn);
196 }
197 
198 /*
199  * Check how much RAM is available for the sort.
200  */
201 static void
set_hw_params(void)202 set_hw_params(void)
203 {
204 	unsigned long long free_memory;
205 	long long user_memory;
206 	struct rlimit rl;
207 	size_t len;
208 	int mib[] = { CTL_HW, HW_USERMEM64 };
209 
210 	/* Get total user (non-kernel) memory. */
211 	len = sizeof(user_memory);
212 	if (sysctl(mib, 2, &user_memory, &len, NULL, 0) == -1)
213 	    user_memory = -1;
214 
215 	/* Increase our data size to the max */
216 	if (getrlimit(RLIMIT_DATA, &rl) == 0) {
217 		free_memory = (unsigned long long)rl.rlim_cur;
218 		rl.rlim_cur = rl.rlim_max;
219 		if (setrlimit(RLIMIT_DATA, &rl) == 0) {
220 			free_memory = (unsigned long long)rl.rlim_max;
221 		} else {
222 			warn("Can't set resource limit to max data size");
223 		}
224 	} else {
225 		free_memory = 1000000;
226 		warn("Can't get resource limit for data size");
227 	}
228 
229 	/* We prefer to use temp files rather than swap space. */
230 	if (user_memory != -1 && free_memory > user_memory)
231 		free_memory = user_memory;
232 
233 	available_free_memory = free_memory / 2;
234 }
235 
236 /*
237  * Set directory temporary files.
238  */
239 static void
set_tmpdir(void)240 set_tmpdir(void)
241 {
242 	if (!issetugid()) {
243 		char *td;
244 
245 		td = getenv("TMPDIR");
246 		if (td != NULL)
247 			tmpdir = td;
248 	}
249 }
250 
251 /*
252  * Parse -S option.
253  */
254 static unsigned long long
parse_memory_buffer_value(const char * value)255 parse_memory_buffer_value(const char *value)
256 {
257 	char *endptr;
258 	unsigned long long membuf;
259 
260 	membuf = strtoll(value, &endptr, 10);
261 	if (endptr == value || (long long)membuf < 0 ||
262 	    (errno == ERANGE && membuf == LLONG_MAX))
263 		goto invalid;
264 
265 	switch (*endptr) {
266 	case 'Y':
267 		if (membuf > ULLONG_MAX / 1024)
268 			goto invalid;
269 		membuf *= 1024;
270 		/* FALLTHROUGH */
271 	case 'Z':
272 		if (membuf > ULLONG_MAX / 1024)
273 			goto invalid;
274 		membuf *= 1024;
275 		/* FALLTHROUGH */
276 	case 'E':
277 		if (membuf > ULLONG_MAX / 1024)
278 			goto invalid;
279 		membuf *= 1024;
280 		/* FALLTHROUGH */
281 	case 'P':
282 		if (membuf > ULLONG_MAX / 1024)
283 			goto invalid;
284 		membuf *= 1024;
285 		/* FALLTHROUGH */
286 	case 'T':
287 		if (membuf > ULLONG_MAX / 1024)
288 			goto invalid;
289 		membuf *= 1024;
290 		/* FALLTHROUGH */
291 	case 'G':
292 		if (membuf > ULLONG_MAX / 1024)
293 			goto invalid;
294 		membuf *= 1024;
295 		/* FALLTHROUGH */
296 	case 'M':
297 		if (membuf > ULLONG_MAX / 1024)
298 			goto invalid;
299 		membuf *= 1024;
300 		/* FALLTHROUGH */
301 	case '\0':
302 	case 'K':
303 		if (membuf > ULLONG_MAX / 1024)
304 			goto invalid;
305 		membuf *= 1024;
306 		/* FALLTHROUGH */
307 	case 'b':
308 		break;
309 	case '%':
310 		if (available_free_memory != 0 &&
311 		    membuf > ULLONG_MAX / available_free_memory)
312 			goto invalid;
313 		membuf = (available_free_memory * membuf) /
314 		    100;
315 		break;
316 	default:
317 		warnc(EINVAL, "%s", optarg);
318 		membuf = available_free_memory;
319 	}
320 	if (membuf > SIZE_MAX)
321 		goto invalid;
322 	return membuf;
323 invalid:
324 	errx(2, "invalid memory buffer size: %s", value);
325 }
326 
327 /*
328  * Signal handler that clears the temporary files.
329  */
330 static void
sig_handler(int sig __unused)331 sig_handler(int sig __unused)
332 {
333 	clear_tmp_files();
334 	_exit(2);
335 }
336 
337 /*
338  * Set signal handler on panic signals.
339  */
340 static void
set_signal_handler(void)341 set_signal_handler(void)
342 {
343 	struct sigaction sa;
344 	int i, signals[] = {SIGTERM, SIGHUP, SIGINT, SIGUSR1, SIGUSR2,
345 	    SIGPIPE, SIGXCPU, SIGXFSZ, 0};
346 
347 	memset(&sa, 0, sizeof(sa));
348 	sigfillset(&sa.sa_mask);
349 	sa.sa_flags = SA_RESTART;
350 	sa.sa_handler = sig_handler;
351 
352 	for (i = 0; signals[i] != 0; i++) {
353 		if (sigaction(signals[i], &sa, NULL) == -1) {
354 			warn("sigaction(%s)", strsignal(signals[i]));
355 			continue;
356 		}
357 	}
358 }
359 
360 /*
361  * Print "unknown" message and exit with status 2.
362  */
363 static void
unknown(const char * what)364 unknown(const char *what)
365 {
366 	errx(2, "Unknown feature: %s", what);
367 }
368 
369 /*
370  * Check whether contradictory input options are used.
371  */
372 static void
check_mutually_exclusive_flags(char c,bool * mef_flags)373 check_mutually_exclusive_flags(char c, bool *mef_flags)
374 {
375 	int i, fo_index, mec;
376 	bool found_others, found_this;
377 
378 	found_others = found_this = false;
379 	fo_index = 0;
380 
381 	for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
382 		mec = mutually_exclusive_flags[i];
383 
384 		if (mec != c) {
385 			if (mef_flags[i]) {
386 				if (found_this) {
387 					errx(2,
388 					    "%c:%c: mutually exclusive flags",
389 					    c, mec);
390 				}
391 				found_others = true;
392 				fo_index = i;
393 			}
394 		} else {
395 			if (found_others) {
396 				errx(2,
397 				    "%c:%c: mutually exclusive flags",
398 				    c, mutually_exclusive_flags[fo_index]);
399 			}
400 			mef_flags[i] = true;
401 			found_this = true;
402 		}
403 	}
404 }
405 
406 /*
407  * Initialise sort opts data.
408  */
409 static void
set_sort_opts(void)410 set_sort_opts(void)
411 {
412 	memset(&default_sort_mods_object, 0,
413 	    sizeof(default_sort_mods_object));
414 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
415 	default_sort_mods_object.func =
416 	    get_sort_func(&default_sort_mods_object);
417 }
418 
419 /*
420  * Set a sort modifier on a sort modifiers object.
421  */
422 static bool
set_sort_modifier(struct sort_mods * sm,int c)423 set_sort_modifier(struct sort_mods *sm, int c)
424 {
425 	switch (c) {
426 	case 'b':
427 		sm->bflag = true;
428 		break;
429 	case 'd':
430 		sm->dflag = true;
431 		break;
432 	case 'f':
433 		sm->fflag = true;
434 		break;
435 	case 'g':
436 		sm->gflag = true;
437 		need_hint = true;
438 		break;
439 	case 'i':
440 		sm->iflag = true;
441 		break;
442 	case 'R':
443 		sm->Rflag = true;
444 		need_random = true;
445 		break;
446 	case 'M':
447 		initialise_months();
448 		sm->Mflag = true;
449 		need_hint = true;
450 		break;
451 	case 'n':
452 		sm->nflag = true;
453 		need_hint = true;
454 		break;
455 	case 'r':
456 		sm->rflag = true;
457 		break;
458 	case 'V':
459 		sm->Vflag = true;
460 		break;
461 	case 'h':
462 		sm->hflag = true;
463 		need_hint = true;
464 		break;
465 	default:
466 		return false;
467 	}
468 	sort_opts_vals.complex_sort = true;
469 	sm->func = get_sort_func(sm);
470 
471 	return true;
472 }
473 
474 /*
475  * Parse POS in -k option.
476  */
477 static int
parse_pos(const char * s,struct key_specs * ks,bool * mef_flags,bool second)478 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
479 {
480 	regmatch_t pmatch[4];
481 	regex_t re;
482 	char *c, *f;
483 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
484 	size_t len, nmatch;
485 	int ret;
486 
487 	ret = -1;
488 	nmatch = 4;
489 	c = f = NULL;
490 
491 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
492 		return -1;
493 
494 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
495 		goto end;
496 
497 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
498 		goto end;
499 
500 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
501 		goto end;
502 
503 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
504 
505 	f = sort_malloc(len + 1);
506 	memcpy(f, s + pmatch[1].rm_so, len);
507 	f[len] = '\0';
508 
509 	if (second) {
510 		errno = 0;
511 		ks->f2 = (size_t)strtoul(f, NULL, 10);
512 		if (errno != 0)
513 			goto end;
514 		if (ks->f2 == 0) {
515 			warn("0 field in key specs");
516 			goto end;
517 		}
518 	} else {
519 		errno = 0;
520 		ks->f1 = (size_t)strtoul(f, NULL, 10);
521 		if (errno != 0)
522 			goto end;
523 		if (ks->f1 == 0) {
524 			warn("0 field in key specs");
525 			goto end;
526 		}
527 	}
528 
529 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
530 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
531 
532 		c = sort_malloc(len + 1);
533 		memcpy(c, s + pmatch[2].rm_so + 1, len);
534 		c[len] = '\0';
535 
536 		if (second) {
537 			errno = 0;
538 			ks->c2 = (size_t)strtoul(c, NULL, 10);
539 			if (errno != 0)
540 				goto end;
541 		} else {
542 			errno = 0;
543 			ks->c1 = (size_t)strtoul(c, NULL, 10);
544 			if (errno != 0)
545 				goto end;
546 			if (ks->c1 == 0) {
547 				warn("0 column in key specs");
548 				goto end;
549 			}
550 		}
551 	} else {
552 		if (second)
553 			ks->c2 = 0;
554 		else
555 			ks->c1 = 1;
556 	}
557 
558 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
559 		regoff_t i = 0;
560 
561 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
562 			check_mutually_exclusive_flags(s[i], mef_flags);
563 			if (s[i] == 'b') {
564 				if (second)
565 					ks->pos2b = true;
566 				else
567 					ks->pos1b = true;
568 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
569 				goto end;
570 		}
571 	}
572 
573 	ret = 0;
574 
575 end:
576 	sort_free(c);
577 	sort_free(f);
578 	regfree(&re);
579 
580 	return ret;
581 }
582 
583 /*
584  * Parse -k option value.
585  */
586 static int
parse_k(const char * s,struct key_specs * ks)587 parse_k(const char *s, struct key_specs *ks)
588 {
589 	int ret = -1;
590 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
591 	    { false, false, false, false, false, false };
592 
593 	if (*s != '\0') {
594 		char *sptr;
595 
596 		sptr = strchr(s, ',');
597 		if (sptr) {
598 			size_t size1;
599 			char *pos1, *pos2;
600 
601 			size1 = sptr - s;
602 
603 			if (size1 < 1)
604 				return -1;
605 
606 			pos1 = sort_malloc(size1 + 1);
607 			memcpy(pos1, s, size1);
608 			pos1[size1] = '\0';
609 
610 			ret = parse_pos(pos1, ks, mef_flags, false);
611 
612 			sort_free(pos1);
613 			if (ret < 0)
614 				return ret;
615 
616 			pos2 = sort_strdup(sptr + 1);
617 			ret = parse_pos(pos2, ks, mef_flags, true);
618 			sort_free(pos2);
619 		} else
620 			ret = parse_pos(s, ks, mef_flags, false);
621 	}
622 
623 	return ret;
624 }
625 
626 /*
627  * Parse POS in +POS -POS option.
628  */
629 static int
parse_pos_obs(const char * s,size_t * nf,size_t * nc,char * sopts,size_t sopts_size)630 parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts, size_t sopts_size)
631 {
632 	regex_t re;
633 	regmatch_t pmatch[4];
634 	char *c, *f;
635 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
636 	int ret;
637 	size_t len, nmatch;
638 
639 	ret = -1;
640 	nmatch = 4;
641 	c = f = NULL;
642 	*nc = *nf = 0;
643 
644 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
645 		return -1;
646 
647 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
648 		goto end;
649 
650 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
651 		goto end;
652 
653 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
654 		goto end;
655 
656 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
657 
658 	f = sort_malloc(len + 1);
659 	memcpy(f, s + pmatch[1].rm_so, len);
660 	f[len] = '\0';
661 
662 	errno = 0;
663 	*nf = (size_t)strtoul(f, NULL, 10);
664 	if (errno != 0)
665 		errx(2, "Invalid key position");
666 
667 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
668 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
669 
670 		c = sort_malloc(len + 1);
671 		memcpy(c, s + pmatch[2].rm_so + 1, len);
672 		c[len] = '\0';
673 
674 		errno = 0;
675 		*nc = (size_t)strtoul(c, NULL, 10);
676 		if (errno != 0)
677 			errx(2, "Invalid key position");
678 	}
679 
680 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
681 
682 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
683 
684 		if (len >= sopts_size)
685 			errx(2, "Invalid key position");
686 		memcpy(sopts, s + pmatch[3].rm_so, len);
687 		sopts[len] = '\0';
688 	}
689 
690 	ret = 0;
691 
692 end:
693 	sort_free(c);
694 	sort_free(f);
695 	regfree(&re);
696 
697 	return ret;
698 }
699 
700 /*
701  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
702  */
703 static void
fix_obsolete_keys(int * argc,char ** argv)704 fix_obsolete_keys(int *argc, char **argv)
705 {
706 	char sopt[129];
707 	int i;
708 
709 	for (i = 1; i < *argc; i++) {
710 		const char *arg1 = argv[i];
711 
712 		if (arg1[0] == '+') {
713 			size_t c1, f1;
714 			char sopts1[128];
715 
716 			sopts1[0] = 0;
717 			c1 = f1 = 0;
718 
719 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1,
720 			    sizeof(sopts1)) < 0)
721 				continue;
722 
723 			f1 += 1;
724 			c1 += 1;
725 			if (i + 1 < *argc) {
726 				const char *arg2 = argv[i + 1];
727 
728 				if (arg2[0] == '-') {
729 					size_t c2, f2;
730 					char sopts2[128];
731 
732 					sopts2[0] = 0;
733 					c2 = f2 = 0;
734 
735 					if (parse_pos_obs(arg2 + 1, &f2, &c2,
736 					    sopts2, sizeof(sopts2)) >= 0) {
737 						int j;
738 						if (c2 > 0)
739 							f2 += 1;
740 						snprintf(sopt, sizeof(sopt),
741 						    "-k%zu.%zu%s,%zu.%zu%s",
742 						    f1, c1, sopts1, f2,
743 						    c2, sopts2);
744 						argv[i] = sort_strdup(sopt);
745 						for (j = i + 1; j + 1 < *argc; j++)
746 							argv[j] = argv[j + 1];
747 						*argc -= 1;
748 						continue;
749 					}
750 				}
751 			}
752 			snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s",
753 			    f1, c1, sopts1);
754 			argv[i] = sort_strdup(sopt);
755 		}
756 	}
757 }
758 
759 /*
760  * Set random seed
761  */
762 static void
set_random_seed(void)763 set_random_seed(void)
764 {
765 	if (!need_random)
766 		return;
767 
768 	MD5Init(&md5_ctx);
769 	if (random_source != NULL) {
770 		unsigned char buf[BUFSIZ];
771 		size_t nr;
772 		FILE *fp;
773 
774 		if ((fp = fopen(random_source, "r")) == NULL)
775 			err(2, "%s", random_source);
776 		while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0)
777 			MD5Update(&md5_ctx, buf, nr);
778 		if (ferror(fp))
779 			err(2, "%s", random_source);
780 		fclose(fp);
781 	} else {
782 		unsigned char rsd[1024];
783 
784 		arc4random_buf(rsd, sizeof(rsd));
785 		MD5Update(&md5_ctx, rsd, sizeof(rsd));
786 	}
787 }
788 
789 /*
790  * Main function.
791  */
792 int
main(int argc,char * argv[])793 main(int argc, char *argv[])
794 {
795 	char *outfile, *real_outfile, *sflag;
796 	int c;
797 	size_t i;
798 	struct sort_mods *sm = &default_sort_mods_object;
799 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
800 	    { false, false, false, false, false, false };
801 
802 	set_hw_params();
803 
804 	if (pledge("stdio rpath wpath cpath fattr chown proc exec", NULL) == -1)
805 		err(2, "pledge");
806 
807 	outfile = "-";
808 	real_outfile = NULL;
809 	sflag = NULL;
810 
811 	init_tmp_files();
812 
813 	set_signal_handler();
814 
815 	atexit(clear_tmp_files);
816 
817 	set_tmpdir();
818 	set_sort_opts();
819 
820 	fix_obsolete_keys(&argc, argv);
821 
822 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
823 	    != -1)) {
824 
825 		check_mutually_exclusive_flags(c, mef_flags);
826 
827 		if (!set_sort_modifier(sm, c)) {
828 			switch (c) {
829 			case 'c':
830 				sort_opts_vals.cflag = true;
831 				if (optarg) {
832 					if (!strcmp(optarg, "diagnose-first"))
833 						;
834 					else if (!strcmp(optarg, "silent") ||
835 					    !strcmp(optarg, "quiet"))
836 						sort_opts_vals.csilentflag = true;
837 					else if (*optarg)
838 						unknown(optarg);
839 				}
840 				break;
841 			case 'C':
842 				sort_opts_vals.cflag = true;
843 				sort_opts_vals.csilentflag = true;
844 				break;
845 			case 'k':
846 			{
847 				sort_opts_vals.complex_sort = true;
848 				sort_opts_vals.kflag = true;
849 
850 				keys = sort_reallocarray(keys, keys_num + 1,
851 				    sizeof(struct key_specs));
852 				memset(&(keys[keys_num]), 0,
853 				    sizeof(struct key_specs));
854 #ifndef GNUSORT_COMPATIBILITY
855 				keys[keys_num].pos1b = default_sort_mods->bflag;
856 				keys[keys_num].pos2b = default_sort_mods->bflag;
857 #endif
858 
859 				if (parse_k(optarg, &(keys[keys_num++])) < 0)
860 					errc(2, EINVAL, "-k %s", optarg);
861 
862 				break;
863 			}
864 			case 'm':
865 				sort_opts_vals.mflag = true;
866 				break;
867 			case 'o':
868 				outfile = optarg;
869 				break;
870 			case 's':
871 				sort_opts_vals.sflag = true;
872 				break;
873 			case 'S':
874 				sflag = optarg;
875 				break;
876 			case 'T':
877 				tmpdir = optarg;
878 				break;
879 			case 't':
880 				while (strlen(optarg) > 1) {
881 					if (optarg[0] != '\\') {
882 						errc(2, EINVAL, "%s", optarg);
883 					}
884 					optarg += 1;
885 					if (*optarg == '0') {
886 						*optarg = 0;
887 						break;
888 					}
889 				}
890 				sort_opts_vals.tflag = true;
891 				sort_opts_vals.field_sep = btowc(optarg[0]);
892 				if (sort_opts_vals.field_sep == WEOF) {
893 					errno = EINVAL;
894 					err(2, NULL);
895 				}
896 				break;
897 			case 'u':
898 				sort_opts_vals.uflag = true;
899 				/* stable sort for the correct unique val */
900 				sort_opts_vals.sflag = true;
901 				break;
902 			case 'z':
903 				sort_opts_vals.zflag = true;
904 				break;
905 			case SORT_OPT:
906 				if (!strcmp(optarg, "general-numeric"))
907 					set_sort_modifier(sm, 'g');
908 				else if (!strcmp(optarg, "human-numeric"))
909 					set_sort_modifier(sm, 'h');
910 				else if (!strcmp(optarg, "numeric"))
911 					set_sort_modifier(sm, 'n');
912 				else if (!strcmp(optarg, "month"))
913 					set_sort_modifier(sm, 'M');
914 				else if (!strcmp(optarg, "random"))
915 					set_sort_modifier(sm, 'R');
916 				else
917 					unknown(optarg);
918 				break;
919 			case QSORT_OPT:
920 				sort_opts_vals.sort_method = SORT_QSORT;
921 				break;
922 			case 'H':
923 				sort_opts_vals.sort_method = SORT_MERGESORT;
924 				break;
925 			case MMAP_OPT:
926 				use_mmap = true;
927 				break;
928 			case HEAPSORT_OPT:
929 				sort_opts_vals.sort_method = SORT_HEAPSORT;
930 				break;
931 			case RADIXSORT_OPT:
932 				sort_opts_vals.sort_method = SORT_RADIXSORT;
933 				break;
934 			case RANDOMSOURCE_OPT:
935 				random_source = optarg;
936 				break;
937 			case COMPRESSPROGRAM_OPT:
938 				compress_program = optarg;
939 				break;
940 			case FF_OPT:
941 				read_fns_from_file0(optarg);
942 				break;
943 			case BS_OPT:
944 			{
945 				const char *errstr;
946 
947 				max_open_files = strtonum(optarg, 2,
948 				    UINT_MAX - 1, &errstr) + 1;
949 				if (errstr != NULL)
950 					errx(2, "--batch-size argument is %s",
951 					    errstr);
952 				break;
953 			}
954 			case VERSION_OPT:
955 				printf("%s\n", VERSION);
956 				exit(EXIT_SUCCESS);
957 				/* NOTREACHED */
958 				break;
959 			case DEBUG_OPT:
960 				debug_sort = true;
961 				break;
962 			case HELP_OPT:
963 				usage(0);
964 				/* NOTREACHED */
965 				break;
966 			default:
967 				usage(2);
968 				/* NOTREACHED */
969 			}
970 		}
971 	}
972 	argc -= optind;
973 	argv += optind;
974 
975 	if (compress_program == NULL) {
976 		if (pledge("stdio rpath wpath cpath fattr chown", NULL) == -1)
977 			err(2, "pledge");
978 	}
979 
980 #ifndef GNUSORT_COMPATIBILITY
981 	if (argc > 2 && strcmp(argv[argc - 2], "-o") == 0) {
982 		outfile = argv[argc - 1];
983 		argc -= 2;
984 	}
985 #endif
986 
987 	if (argv_from_file0) {
988 		argc = argc_from_file0;
989 		argv = argv_from_file0;
990 	}
991 
992 	if (sort_opts_vals.cflag) {
993 		if (argc > 1)
994 			errx(2, "only one input file is allowed with the -%c flag",
995 			    sort_opts_vals.csilentflag ? 'C' : 'c');
996 
997 		if (argc == 0 || strcmp(argv[0], "-") == 0) {
998 			if (compress_program) {
999 				if (pledge("stdio proc exec", NULL) == -1)
1000 					err(2, "pledge");
1001 			} else {
1002 				if (pledge("stdio", NULL) == -1)
1003 					err(2, "pledge");
1004 			}
1005 		} else {
1006 			if (compress_program) {
1007 				if (pledge("stdio rpath proc exec", NULL) == -1)
1008 					err(2, "pledge");
1009 			} else {
1010 				if (pledge("stdio rpath", NULL) == -1)
1011 					err(2, "pledge");
1012 			}
1013 		}
1014 	} else {
1015 		/* Case when the outfile equals one of the input files: */
1016 		if (strcmp(outfile, "-") != 0) {
1017 			struct stat sb;
1018 			int fd, i;
1019 
1020 			for (i = 0; i < argc; ++i) {
1021 				if (strcmp(argv[i], outfile) == 0) {
1022 					if (stat(outfile, &sb) == -1)
1023 						err(2, "%s", outfile);
1024 					if (access(outfile, W_OK) == -1)
1025 						err(2, "%s", outfile);
1026 					real_outfile = outfile;
1027 					sort_asprintf(&outfile, "%s.XXXXXXXXXX",
1028 					    real_outfile);
1029 					if ((fd = mkstemp(outfile)) == -1)
1030 						err(2, "%s", outfile);
1031 					(void)fchown(fd, sb.st_uid, sb.st_gid);
1032 					if (fchmod(fd, sb.st_mode & ACCESSPERMS) == -1)
1033 						err(2, "%s", outfile);
1034 					close(fd);
1035 					tmp_file_atexit(outfile);
1036 					break;
1037 				}
1038 			}
1039 		}
1040 
1041 		if (compress_program) {
1042 			if (pledge("stdio rpath wpath cpath proc exec", NULL) == -1)
1043 				err(2, "pledge");
1044 		} else {
1045 			if (pledge("stdio rpath wpath cpath", NULL) == -1)
1046 				err(2, "pledge");
1047 		}
1048 	}
1049 
1050 	if (sflag != NULL)
1051 		available_free_memory = parse_memory_buffer_value(sflag);
1052 
1053 	if (keys_num == 0) {
1054 		keys_num = 1;
1055 		keys = sort_reallocarray(keys, 1, sizeof(struct key_specs));
1056 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1057 		keys[0].c1 = 1;
1058 #ifdef GNUSORT_COMPATIBILITY
1059 		keys[0].pos1b = sm->bflag;
1060 		keys[0].pos2b = sm->bflag;
1061 #endif
1062 		memcpy(&(keys[0].sm), sm, sizeof(struct sort_mods));
1063 	}
1064 
1065 	for (i = 0; i < keys_num; i++) {
1066 		struct key_specs *ks;
1067 
1068 		ks = &(keys[i]);
1069 
1070 		if (sort_modifier_empty(&(ks->sm))) {
1071 #ifdef GNUSORT_COMPATIBILITY
1072 			if (!(ks->pos1b) && !(ks->pos2b)) {
1073 				ks->pos1b = sm->bflag;
1074 				ks->pos2b = sm->bflag;
1075 			}
1076 #endif
1077 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1078 		}
1079 
1080 		ks->sm.func = get_sort_func(&(ks->sm));
1081 	}
1082 
1083 	if (debug_sort)
1084 		printf("Memory to be used for sorting: %llu\n",
1085 		    available_free_memory);
1086 
1087 	if (sort_opts_vals.cflag)
1088 		return check(argc ? *argv : "-");
1089 
1090 	set_random_seed();
1091 
1092 	if (!sort_opts_vals.mflag) {
1093 		struct file_list fl;
1094 		struct sort_list list;
1095 
1096 		sort_list_init(&list);
1097 		file_list_init(&fl, true);
1098 
1099 		if (argc < 1)
1100 			procfile("-", &list, &fl);
1101 		else {
1102 			while (argc > 0) {
1103 				procfile(*argv, &list, &fl);
1104 				--argc;
1105 				++argv;
1106 			}
1107 		}
1108 
1109 		if (fl.count < 1)
1110 			sort_list_to_file(&list, outfile);
1111 		else {
1112 			if (list.count > 0) {
1113 				char *flast = new_tmp_file_name();
1114 
1115 				sort_list_to_file(&list, flast);
1116 				file_list_add(&fl, flast, false);
1117 			}
1118 			merge_files(&fl, outfile);
1119 		}
1120 
1121 		file_list_clean(&fl);
1122 
1123 		/*
1124 		 * We are about to exit the program, so we can ignore
1125 		 * the clean-up for speed
1126 		 *
1127 		 * sort_list_clean(&list);
1128 		 */
1129 
1130 	} else {
1131 		struct file_list fl;
1132 
1133 		file_list_init(&fl, false);
1134 		if (argc < 1)
1135 			file_list_add(&fl, "-", true);
1136 		else
1137 			file_list_populate(&fl, argc, argv, true);
1138 		merge_files(&fl, outfile);
1139 		file_list_clean(&fl);
1140 	}
1141 
1142 	if (real_outfile) {
1143 		if (rename(outfile, real_outfile) == -1)
1144 			err(2, "%s", real_outfile);
1145 		sort_free(outfile);
1146 	}
1147 
1148 	return 0;
1149 }
1150