xref: /openbsd/usr.bin/sort/sort.c (revision 8529ddd3)
1 /*	$OpenBSD: sort.c,v 1.79 2015/04/05 13:56:04 millert Exp $	*/
2 
3 /*-
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/resource.h>
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34 
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <md5.h>
41 #include <regex.h>
42 #include <signal.h>
43 #include <stdbool.h>
44 #include <stdint.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 #include <wchar.h>
50 #include <wctype.h>
51 
52 #include "coll.h"
53 #include "file.h"
54 #include "sort.h"
55 
56 #ifdef GNUSORT_COMPATIBILITY
57 # define PERMUTE	""
58 #else
59 # define PERMUTE	"+"
60 #endif
61 #define	OPTIONS	PERMUTE"bCcdfgHhik:Mmno:RrS:st:T:uVz"
62 
63 static bool need_random;
64 static const char *random_source;
65 
66 MD5_CTX md5_ctx;
67 
68 struct sort_opts sort_opts_vals;
69 
70 bool debug_sort;
71 bool need_hint;
72 
73 static bool gnusort_numeric_compatibility;
74 
75 static struct sort_mods default_sort_mods_object;
76 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
77 
78 static bool print_symbols_on_debug;
79 
80 /*
81  * Arguments from file (when file0-from option is used:
82  */
83 static size_t argc_from_file0 = (size_t)-1;
84 static char **argv_from_file0;
85 
86 /*
87  * Placeholder symbols for options which have no single-character equivalent
88  */
89 enum {
90 	SORT_OPT = CHAR_MAX + 1,
91 	HELP_OPT,
92 	FF_OPT,
93 	BS_OPT,
94 	VERSION_OPT,
95 	DEBUG_OPT,
96 	RANDOMSOURCE_OPT,
97 	COMPRESSPROGRAM_OPT,
98 	QSORT_OPT,
99 	HEAPSORT_OPT,
100 	RADIXSORT_OPT,
101 	MMAP_OPT
102 };
103 
104 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
105 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
106 
107 static const struct option long_options[] = {
108     { "batch-size", required_argument, NULL, BS_OPT },
109     { "buffer-size", required_argument, NULL, 'S' },
110     { "check", optional_argument, NULL, 'c' },
111     { "check=silent|quiet", optional_argument, NULL, 'C' },
112     { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
113     { "debug", no_argument, NULL, DEBUG_OPT },
114     { "dictionary-order", no_argument, NULL, 'd' },
115     { "field-separator", required_argument, NULL, 't' },
116     { "files0-from", required_argument, NULL, FF_OPT },
117     { "general-numeric-sort", no_argument, NULL, 'g' },
118     { "heapsort", no_argument, NULL, HEAPSORT_OPT },
119     { "help", no_argument, NULL, HELP_OPT },
120     { "human-numeric-sort", no_argument, NULL, 'h' },
121     { "ignore-leading-blanks", no_argument, NULL, 'b' },
122     { "ignore-case", no_argument, NULL, 'f' },
123     { "ignore-nonprinting", no_argument, NULL, 'i' },
124     { "key", required_argument, NULL, 'k' },
125     { "merge", no_argument, NULL, 'm' },
126     { "mergesort", no_argument, NULL, 'H' },
127     { "mmap", no_argument, NULL, MMAP_OPT },
128     { "month-sort", no_argument, NULL, 'M' },
129     { "numeric-sort", no_argument, NULL, 'n' },
130     { "output", required_argument, NULL, 'o' },
131     { "qsort", no_argument, NULL, QSORT_OPT },
132     { "radixsort", no_argument, NULL, RADIXSORT_OPT },
133     { "random-sort", no_argument, NULL, 'R' },
134     { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
135     { "reverse", no_argument, NULL, 'r' },
136     { "sort", required_argument, NULL, SORT_OPT },
137     { "stable", no_argument, NULL, 's' },
138     { "temporary-directory", required_argument, NULL, 'T' },
139     { "unique", no_argument, NULL, 'u' },
140     { "version", no_argument, NULL, VERSION_OPT },
141     { "version-sort", no_argument, NULL, 'V' },
142     { "zero-terminated", no_argument, NULL, 'z' },
143     { NULL, no_argument, NULL, 0 }
144 };
145 
146 /*
147  * Check where sort modifier is present
148  */
149 static bool
150 sort_modifier_empty(struct sort_mods *sm)
151 {
152 	return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
153 	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag);
154 }
155 
156 /*
157  * Print out usage text.
158  */
159 static __dead void
160 usage(int exit_val)
161 {
162 	fprintf(exit_val ? stderr : stdout,
163 	    "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] "
164 	    "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname());
165 	exit(exit_val);
166 }
167 
168 /*
169  * Read input file names from a file (file0-from option).
170  */
171 static void
172 read_fns_from_file0(const char *fn)
173 {
174 	FILE *f;
175 	char *line = NULL;
176 	size_t linesize = 0;
177 	ssize_t linelen;
178 
179 	f = fopen(fn, "r");
180 	if (f == NULL)
181 		err(2, "%s", fn);
182 
183 	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
184 		if (*line != '\0') {
185 			if (argc_from_file0 == (size_t)-1)
186 				argc_from_file0 = 0;
187 			++argc_from_file0;
188 			argv_from_file0 = sort_reallocarray(argv_from_file0,
189 			    argc_from_file0, sizeof(char *));
190 			argv_from_file0[argc_from_file0 - 1] = line;
191 		} else {
192 			free(line);
193 		}
194 		line = NULL;
195 		linesize = 0;
196 	}
197 	if (ferror(f))
198 		err(2, "%s: getdelim", fn);
199 
200 	closefile(f, fn);
201 }
202 
203 /*
204  * Check how much RAM is available for the sort.
205  */
206 static void
207 set_hw_params(void)
208 {
209 	unsigned long long free_memory;
210 	long long user_memory;
211 	struct rlimit rl;
212 	size_t len;
213 	int mib[] = { CTL_HW, HW_USERMEM64 };
214 
215 	/* Get total user (non-kernel) memory. */
216 	len = sizeof(user_memory);
217 	if (sysctl(mib, 2, &user_memory, &len, NULL, 0) == -1)
218 	    user_memory = -1;
219 
220 	/* Increase our data size to the max */
221 	if (getrlimit(RLIMIT_DATA, &rl) == 0) {
222 		free_memory = (unsigned long long)rl.rlim_cur;
223 		rl.rlim_cur = rl.rlim_max;
224 		if (setrlimit(RLIMIT_DATA, &rl) == 0) {
225 			free_memory = (unsigned long long)rl.rlim_max;
226 		} else {
227 			warn("Can't set resource limit to max data size");
228 		}
229 	} else {
230 		free_memory = 1000000;
231 		warn("Can't get resource limit for data size");
232 	}
233 
234 	/* We prefer to use temp files rather than swap space. */
235 	if (user_memory != -1 && free_memory > user_memory)
236 		free_memory = user_memory;
237 
238 	available_free_memory = free_memory / 2;
239 }
240 
241 /*
242  * Convert "plain" symbol to wide symbol, with default value.
243  */
244 static void
245 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
246 {
247 	int res;
248 
249 	res = mbtowc(wc, c, MB_CUR_MAX);
250 	if (res < 1)
251 		*wc = def;
252 }
253 
254 /*
255  * Set current locale symbols.
256  */
257 static void
258 set_locale(void)
259 {
260 	struct lconv *lc;
261 	const char *locale;
262 
263 	setlocale(LC_ALL, "");
264 
265 	/* Obtain LC_NUMERIC info */
266 	lc = localeconv();
267 
268 	/* Convert to wide char form */
269 	conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
270 	    symbol_decimal_point);
271 	conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
272 	    symbol_thousands_sep);
273 	conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
274 	    symbol_positive_sign);
275 	conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
276 	    symbol_negative_sign);
277 
278 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
279 		gnusort_numeric_compatibility = true;
280 
281 	locale = setlocale(LC_COLLATE, NULL);
282 	if (locale != NULL) {
283 		char *tmpl;
284 		const char *byteclocale;
285 
286 		tmpl = sort_strdup(locale);
287 		byteclocale = setlocale(LC_COLLATE, "C");
288 		if (byteclocale && strcmp(byteclocale, tmpl) == 0) {
289 			byte_sort = true;
290 		} else {
291 			byteclocale = setlocale(LC_COLLATE, "POSIX");
292 			if (byteclocale && strcmp(byteclocale, tmpl) == 0)
293 				byte_sort = true;
294 			else
295 				setlocale(LC_COLLATE, tmpl);
296 		}
297 		sort_free(tmpl);
298 	}
299 	if (!byte_sort)
300 		sort_mb_cur_max = MB_CUR_MAX;
301 }
302 
303 /*
304  * Set directory temporary files.
305  */
306 static void
307 set_tmpdir(void)
308 {
309 	if (!issetugid()) {
310 		char *td;
311 
312 		td = getenv("TMPDIR");
313 		if (td != NULL)
314 			tmpdir = td;
315 	}
316 }
317 
318 /*
319  * Parse -S option.
320  */
321 static unsigned long long
322 parse_memory_buffer_value(const char *value)
323 {
324 	char *endptr;
325 	unsigned long long membuf;
326 
327 	membuf = strtoll(value, &endptr, 10);
328 	if (endptr == value || (long long)membuf < 0 ||
329 	    (errno == ERANGE && membuf == LLONG_MAX))
330 		goto invalid;
331 
332 	switch (*endptr) {
333 	case 'Y':
334 		if (membuf > ULLONG_MAX / 1024)
335 			goto invalid;
336 		membuf *= 1024;
337 		/* FALLTHROUGH */
338 	case 'Z':
339 		if (membuf > ULLONG_MAX / 1024)
340 			goto invalid;
341 		membuf *= 1024;
342 		/* FALLTHROUGH */
343 	case 'E':
344 		if (membuf > ULLONG_MAX / 1024)
345 			goto invalid;
346 		membuf *= 1024;
347 		/* FALLTHROUGH */
348 	case 'P':
349 		if (membuf > ULLONG_MAX / 1024)
350 			goto invalid;
351 		membuf *= 1024;
352 		/* FALLTHROUGH */
353 	case 'T':
354 		if (membuf > ULLONG_MAX / 1024)
355 			goto invalid;
356 		membuf *= 1024;
357 		/* FALLTHROUGH */
358 	case 'G':
359 		if (membuf > ULLONG_MAX / 1024)
360 			goto invalid;
361 		membuf *= 1024;
362 		/* FALLTHROUGH */
363 	case 'M':
364 		if (membuf > ULLONG_MAX / 1024)
365 			goto invalid;
366 		membuf *= 1024;
367 		/* FALLTHROUGH */
368 	case '\0':
369 	case 'K':
370 		if (membuf > ULLONG_MAX / 1024)
371 			goto invalid;
372 		membuf *= 1024;
373 		/* FALLTHROUGH */
374 	case 'b':
375 		break;
376 	case '%':
377 		if (available_free_memory != 0 &&
378 		    membuf > ULLONG_MAX / available_free_memory)
379 			goto invalid;
380 		membuf = (available_free_memory * membuf) /
381 		    100;
382 		break;
383 	default:
384 		warnc(EINVAL, "%s", optarg);
385 		membuf = available_free_memory;
386 	}
387 	if (membuf > SIZE_MAX)
388 		goto invalid;
389 	return membuf;
390 invalid:
391 	errx(2, "invalid memory buffer size: %s", value);
392 }
393 
394 /*
395  * Signal handler that clears the temporary files.
396  */
397 static void
398 sig_handler(int sig __unused)
399 {
400 	clear_tmp_files();
401 	_exit(2);
402 }
403 
404 /*
405  * Set signal handler on panic signals.
406  */
407 static void
408 set_signal_handler(void)
409 {
410 	struct sigaction sa;
411 	int i, signals[] = {SIGTERM, SIGHUP, SIGINT, SIGUSR1, SIGUSR2,
412 	    SIGPIPE, SIGXCPU, SIGXFSZ, 0};
413 
414 	memset(&sa, 0, sizeof(sa));
415 	sigfillset(&sa.sa_mask);
416 	sa.sa_flags = SA_RESTART;
417 	sa.sa_handler = sig_handler;
418 
419 	for (i = 0; signals[i] != 0; i++) {
420 		if (sigaction(signals[i], &sa, NULL) < 0) {
421 			warn("sigaction(%s)", strsignal(signals[i]));
422 			continue;
423 		}
424 	}
425 }
426 
427 /*
428  * Print "unknown" message and exit with status 2.
429  */
430 static void
431 unknown(const char *what)
432 {
433 	errx(2, "Unknown feature: %s", what);
434 }
435 
436 /*
437  * Check whether contradictory input options are used.
438  */
439 static void
440 check_mutually_exclusive_flags(char c, bool *mef_flags)
441 {
442 	int i, fo_index, mec;
443 	bool found_others, found_this;
444 
445 	found_others = found_this = false;
446 	fo_index = 0;
447 
448 	for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
449 		mec = mutually_exclusive_flags[i];
450 
451 		if (mec != c) {
452 			if (mef_flags[i]) {
453 				if (found_this) {
454 					errx(2,
455 					    "%c:%c: mutually exclusive flags",
456 					    c, mec);
457 				}
458 				found_others = true;
459 				fo_index = i;
460 			}
461 		} else {
462 			if (found_others) {
463 				errx(2,
464 				    "%c:%c: mutually exclusive flags",
465 				    c, mutually_exclusive_flags[fo_index]);
466 			}
467 			mef_flags[i] = true;
468 			found_this = true;
469 		}
470 	}
471 }
472 
473 /*
474  * Initialise sort opts data.
475  */
476 static void
477 set_sort_opts(void)
478 {
479 	memset(&default_sort_mods_object, 0,
480 	    sizeof(default_sort_mods_object));
481 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
482 	default_sort_mods_object.func =
483 	    get_sort_func(&default_sort_mods_object);
484 }
485 
486 /*
487  * Set a sort modifier on a sort modifiers object.
488  */
489 static bool
490 set_sort_modifier(struct sort_mods *sm, int c)
491 {
492 	switch (c) {
493 	case 'b':
494 		sm->bflag = true;
495 		break;
496 	case 'd':
497 		sm->dflag = true;
498 		break;
499 	case 'f':
500 		sm->fflag = true;
501 		break;
502 	case 'g':
503 		sm->gflag = true;
504 		need_hint = true;
505 		break;
506 	case 'i':
507 		sm->iflag = true;
508 		break;
509 	case 'R':
510 		sm->Rflag = true;
511 		need_random = true;
512 		break;
513 	case 'M':
514 		initialise_months();
515 		sm->Mflag = true;
516 		need_hint = true;
517 		break;
518 	case 'n':
519 		sm->nflag = true;
520 		need_hint = true;
521 		print_symbols_on_debug = true;
522 		break;
523 	case 'r':
524 		sm->rflag = true;
525 		break;
526 	case 'V':
527 		sm->Vflag = true;
528 		break;
529 	case 'h':
530 		sm->hflag = true;
531 		need_hint = true;
532 		print_symbols_on_debug = true;
533 		break;
534 	default:
535 		return false;
536 	}
537 	sort_opts_vals.complex_sort = true;
538 	sm->func = get_sort_func(sm);
539 
540 	return true;
541 }
542 
543 /*
544  * Parse POS in -k option.
545  */
546 static int
547 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
548 {
549 	regmatch_t pmatch[4];
550 	regex_t re;
551 	char *c, *f;
552 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
553 	size_t len, nmatch;
554 	int ret;
555 
556 	ret = -1;
557 	nmatch = 4;
558 	c = f = NULL;
559 
560 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
561 		return -1;
562 
563 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
564 		goto end;
565 
566 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
567 		goto end;
568 
569 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
570 		goto end;
571 
572 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
573 
574 	f = sort_malloc(len + 1);
575 	memcpy(f, s + pmatch[1].rm_so, len);
576 	f[len] = '\0';
577 
578 	if (second) {
579 		errno = 0;
580 		ks->f2 = (size_t)strtoul(f, NULL, 10);
581 		if (errno != 0)
582 			goto end;
583 		if (ks->f2 == 0) {
584 			warn("0 field in key specs");
585 			goto end;
586 		}
587 	} else {
588 		errno = 0;
589 		ks->f1 = (size_t)strtoul(f, NULL, 10);
590 		if (errno != 0)
591 			goto end;
592 		if (ks->f1 == 0) {
593 			warn("0 field in key specs");
594 			goto end;
595 		}
596 	}
597 
598 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
599 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
600 
601 		c = sort_malloc(len + 1);
602 		memcpy(c, s + pmatch[2].rm_so + 1, len);
603 		c[len] = '\0';
604 
605 		if (second) {
606 			errno = 0;
607 			ks->c2 = (size_t)strtoul(c, NULL, 10);
608 			if (errno != 0)
609 				goto end;
610 		} else {
611 			errno = 0;
612 			ks->c1 = (size_t)strtoul(c, NULL, 10);
613 			if (errno != 0)
614 				goto end;
615 			if (ks->c1 == 0) {
616 				warn("0 column in key specs");
617 				goto end;
618 			}
619 		}
620 	} else {
621 		if (second)
622 			ks->c2 = 0;
623 		else
624 			ks->c1 = 1;
625 	}
626 
627 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
628 		regoff_t i = 0;
629 
630 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
631 			check_mutually_exclusive_flags(s[i], mef_flags);
632 			if (s[i] == 'b') {
633 				if (second)
634 					ks->pos2b = true;
635 				else
636 					ks->pos1b = true;
637 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
638 				goto end;
639 		}
640 	}
641 
642 	ret = 0;
643 
644 end:
645 	sort_free(c);
646 	sort_free(f);
647 	regfree(&re);
648 
649 	return ret;
650 }
651 
652 /*
653  * Parse -k option value.
654  */
655 static int
656 parse_k(const char *s, struct key_specs *ks)
657 {
658 	int ret = -1;
659 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
660 	    { false, false, false, false, false, false };
661 
662 	if (*s != '\0') {
663 		char *sptr;
664 
665 		sptr = strchr(s, ',');
666 		if (sptr) {
667 			size_t size1;
668 			char *pos1, *pos2;
669 
670 			size1 = sptr - s;
671 
672 			if (size1 < 1)
673 				return -1;
674 
675 			pos1 = sort_malloc(size1 + 1);
676 			memcpy(pos1, s, size1);
677 			pos1[size1] = '\0';
678 
679 			ret = parse_pos(pos1, ks, mef_flags, false);
680 
681 			sort_free(pos1);
682 			if (ret < 0)
683 				return ret;
684 
685 			pos2 = sort_strdup(sptr + 1);
686 			ret = parse_pos(pos2, ks, mef_flags, true);
687 			sort_free(pos2);
688 		} else
689 			ret = parse_pos(s, ks, mef_flags, false);
690 	}
691 
692 	return ret;
693 }
694 
695 /*
696  * Parse POS in +POS -POS option.
697  */
698 static int
699 parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts, size_t sopts_size)
700 {
701 	regex_t re;
702 	regmatch_t pmatch[4];
703 	char *c, *f;
704 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
705 	int ret;
706 	size_t len, nmatch;
707 
708 	ret = -1;
709 	nmatch = 4;
710 	c = f = NULL;
711 	*nc = *nf = 0;
712 
713 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
714 		return -1;
715 
716 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
717 		goto end;
718 
719 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
720 		goto end;
721 
722 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
723 		goto end;
724 
725 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
726 
727 	f = sort_malloc(len + 1);
728 	memcpy(f, s + pmatch[1].rm_so, len);
729 	f[len] = '\0';
730 
731 	errno = 0;
732 	*nf = (size_t)strtoul(f, NULL, 10);
733 	if (errno != 0)
734 		errx(2, "Invalid key position");
735 
736 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
737 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
738 
739 		c = sort_malloc(len + 1);
740 		memcpy(c, s + pmatch[2].rm_so + 1, len);
741 		c[len] = '\0';
742 
743 		errno = 0;
744 		*nc = (size_t)strtoul(c, NULL, 10);
745 		if (errno != 0)
746 			errx(2, "Invalid key position");
747 	}
748 
749 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
750 
751 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
752 
753 		if (len >= sopts_size)
754 			errx(2, "Invalid key position");
755 		memcpy(sopts, s + pmatch[3].rm_so, len);
756 		sopts[len] = '\0';
757 	}
758 
759 	ret = 0;
760 
761 end:
762 	sort_free(c);
763 	sort_free(f);
764 	regfree(&re);
765 
766 	return ret;
767 }
768 
769 /*
770  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
771  */
772 static void
773 fix_obsolete_keys(int *argc, char **argv)
774 {
775 	char sopt[129];
776 	int i;
777 
778 	for (i = 1; i < *argc; i++) {
779 		const char *arg1 = argv[i];
780 
781 		if (arg1[0] == '+') {
782 			size_t c1, f1;
783 			char sopts1[128];
784 
785 			sopts1[0] = 0;
786 			c1 = f1 = 0;
787 
788 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1,
789 			    sizeof(sopts1)) < 0)
790 				continue;
791 
792 			f1 += 1;
793 			c1 += 1;
794 			if (i + 1 < *argc) {
795 				const char *arg2 = argv[i + 1];
796 
797 				if (arg2[0] == '-') {
798 					size_t c2, f2;
799 					char sopts2[128];
800 
801 					sopts2[0] = 0;
802 					c2 = f2 = 0;
803 
804 					if (parse_pos_obs(arg2 + 1, &f2, &c2,
805 					    sopts2, sizeof(sopts2)) >= 0) {
806 						int j;
807 						if (c2 > 0)
808 							f2 += 1;
809 						snprintf(sopt, sizeof(sopt),
810 						    "-k%zu.%zu%s,%zu.%zu%s",
811 						    f1, c1, sopts1, f2,
812 						    c2, sopts2);
813 						argv[i] = sort_strdup(sopt);
814 						for (j = i + 1; j + 1 < *argc; j++)
815 							argv[j] = argv[j + 1];
816 						*argc -= 1;
817 						continue;
818 					}
819 				}
820 			}
821 			snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s",
822 			    f1, c1, sopts1);
823 			argv[i] = sort_strdup(sopt);
824 		}
825 	}
826 }
827 
828 /*
829  * Set random seed
830  */
831 static void
832 set_random_seed(void)
833 {
834 	if (!need_random)
835 		return;
836 
837 	MD5Init(&md5_ctx);
838 	if (random_source != NULL) {
839 		unsigned char buf[BUFSIZ];
840 		size_t nr;
841 		FILE *fp;
842 
843 		if ((fp = fopen(random_source, "r")) == NULL)
844 			err(2, "%s", random_source);
845 		while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0)
846 			MD5Update(&md5_ctx, buf, nr);
847 		if (ferror(fp))
848 			err(2, "%s", random_source);
849 		fclose(fp);
850 	} else {
851 		unsigned char rsd[1024];
852 
853 		arc4random_buf(rsd, sizeof(rsd));
854 		MD5Update(&md5_ctx, rsd, sizeof(rsd));
855 	}
856 }
857 
858 /*
859  * Main function.
860  */
861 int
862 main(int argc, char *argv[])
863 {
864 	char *outfile, *real_outfile, *sflag;
865 	int c;
866 	size_t i;
867 	struct sort_mods *sm = &default_sort_mods_object;
868 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
869 	    { false, false, false, false, false, false };
870 
871 	outfile = "-";
872 	real_outfile = NULL;
873 	sflag = NULL;
874 
875 	init_tmp_files();
876 
877 	set_signal_handler();
878 
879 	atexit(clear_tmp_files);
880 
881 	set_hw_params();
882 	set_locale();
883 	set_tmpdir();
884 	set_sort_opts();
885 
886 	fix_obsolete_keys(&argc, argv);
887 
888 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
889 	    != -1)) {
890 
891 		check_mutually_exclusive_flags(c, mef_flags);
892 
893 		if (!set_sort_modifier(sm, c)) {
894 			switch (c) {
895 			case 'c':
896 				sort_opts_vals.cflag = true;
897 				if (optarg) {
898 					if (!strcmp(optarg, "diagnose-first"))
899 						;
900 					else if (!strcmp(optarg, "silent") ||
901 					    !strcmp(optarg, "quiet"))
902 						sort_opts_vals.csilentflag = true;
903 					else if (*optarg)
904 						unknown(optarg);
905 				}
906 				break;
907 			case 'C':
908 				sort_opts_vals.cflag = true;
909 				sort_opts_vals.csilentflag = true;
910 				break;
911 			case 'k':
912 			{
913 				sort_opts_vals.complex_sort = true;
914 				sort_opts_vals.kflag = true;
915 
916 				keys = sort_reallocarray(keys, keys_num + 1,
917 				    sizeof(struct key_specs));
918 				memset(&(keys[keys_num]), 0,
919 				    sizeof(struct key_specs));
920 #ifndef GNUSORT_COMPATIBILITY
921 				keys[keys_num].pos1b = default_sort_mods->bflag;
922 				keys[keys_num].pos2b = default_sort_mods->bflag;
923 #endif
924 
925 				if (parse_k(optarg, &(keys[keys_num++])) < 0)
926 					errc(2, EINVAL, "-k %s", optarg);
927 
928 				break;
929 			}
930 			case 'm':
931 				sort_opts_vals.mflag = true;
932 				break;
933 			case 'o':
934 				outfile = optarg;
935 				break;
936 			case 's':
937 				sort_opts_vals.sflag = true;
938 				break;
939 			case 'S':
940 				sflag = optarg;
941 				break;
942 			case 'T':
943 				tmpdir = optarg;
944 				break;
945 			case 't':
946 				while (strlen(optarg) > 1) {
947 					if (optarg[0] != '\\') {
948 						errc(2, EINVAL, "%s", optarg);
949 					}
950 					optarg += 1;
951 					if (*optarg == '0') {
952 						*optarg = 0;
953 						break;
954 					}
955 				}
956 				sort_opts_vals.tflag = true;
957 				sort_opts_vals.field_sep = btowc(optarg[0]);
958 				if (sort_opts_vals.field_sep == WEOF) {
959 					errno = EINVAL;
960 					err(2, NULL);
961 				}
962 				if (!gnusort_numeric_compatibility) {
963 					if (symbol_decimal_point == sort_opts_vals.field_sep)
964 						symbol_decimal_point = WEOF;
965 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
966 						symbol_thousands_sep = WEOF;
967 					if (symbol_negative_sign == sort_opts_vals.field_sep)
968 						symbol_negative_sign = WEOF;
969 					if (symbol_positive_sign == sort_opts_vals.field_sep)
970 						symbol_positive_sign = WEOF;
971 				}
972 				break;
973 			case 'u':
974 				sort_opts_vals.uflag = true;
975 				/* stable sort for the correct unique val */
976 				sort_opts_vals.sflag = true;
977 				break;
978 			case 'z':
979 				sort_opts_vals.zflag = true;
980 				break;
981 			case SORT_OPT:
982 				if (!strcmp(optarg, "general-numeric"))
983 					set_sort_modifier(sm, 'g');
984 				else if (!strcmp(optarg, "human-numeric"))
985 					set_sort_modifier(sm, 'h');
986 				else if (!strcmp(optarg, "numeric"))
987 					set_sort_modifier(sm, 'n');
988 				else if (!strcmp(optarg, "month"))
989 					set_sort_modifier(sm, 'M');
990 				else if (!strcmp(optarg, "random"))
991 					set_sort_modifier(sm, 'R');
992 				else
993 					unknown(optarg);
994 				break;
995 			case QSORT_OPT:
996 				sort_opts_vals.sort_method = SORT_QSORT;
997 				break;
998 			case 'H':
999 				sort_opts_vals.sort_method = SORT_MERGESORT;
1000 				break;
1001 			case MMAP_OPT:
1002 				use_mmap = true;
1003 				break;
1004 			case HEAPSORT_OPT:
1005 				sort_opts_vals.sort_method = SORT_HEAPSORT;
1006 				break;
1007 			case RADIXSORT_OPT:
1008 				sort_opts_vals.sort_method = SORT_RADIXSORT;
1009 				break;
1010 			case RANDOMSOURCE_OPT:
1011 				random_source = optarg;
1012 				break;
1013 			case COMPRESSPROGRAM_OPT:
1014 				compress_program = optarg;
1015 				break;
1016 			case FF_OPT:
1017 				read_fns_from_file0(optarg);
1018 				break;
1019 			case BS_OPT:
1020 			{
1021 				const char *errstr;
1022 
1023 				max_open_files = strtonum(optarg, 2,
1024 				    UINT_MAX - 1, &errstr) + 1;
1025 				if (errstr != NULL)
1026 					errx(2, "--batch-size argument is %s",
1027 					    errstr);
1028 				break;
1029 			}
1030 			case VERSION_OPT:
1031 				printf("%s\n", VERSION);
1032 				exit(EXIT_SUCCESS);
1033 				/* NOTREACHED */
1034 				break;
1035 			case DEBUG_OPT:
1036 				debug_sort = true;
1037 				break;
1038 			case HELP_OPT:
1039 				usage(0);
1040 				/* NOTREACHED */
1041 				break;
1042 			default:
1043 				usage(2);
1044 				/* NOTREACHED */
1045 			}
1046 		}
1047 	}
1048 	argc -= optind;
1049 	argv += optind;
1050 
1051 #ifndef GNUSORT_COMPATIBILITY
1052 	if (argc > 2 && strcmp(argv[argc - 2], "-o") == 0) {
1053 		outfile = argv[argc - 1];
1054 		argc -= 2;
1055 	}
1056 #endif
1057 
1058 	if (sort_opts_vals.cflag && argc > 1)
1059 		errx(2, "only one input file is allowed with the -%c flag",
1060 		    sort_opts_vals.csilentflag ? 'C' : 'c');
1061 
1062 	if (sflag != NULL)
1063 		available_free_memory = parse_memory_buffer_value(sflag);
1064 
1065 	if (keys_num == 0) {
1066 		keys_num = 1;
1067 		keys = sort_reallocarray(keys, 1, sizeof(struct key_specs));
1068 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1069 		keys[0].c1 = 1;
1070 #ifdef GNUSORT_COMPATIBILITY
1071 		keys[0].pos1b = sm->bflag;
1072 		keys[0].pos2b = sm->bflag;
1073 #endif
1074 		memcpy(&(keys[0].sm), sm, sizeof(struct sort_mods));
1075 	}
1076 
1077 	for (i = 0; i < keys_num; i++) {
1078 		struct key_specs *ks;
1079 
1080 		ks = &(keys[i]);
1081 
1082 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1083 		    !(ks->pos2b)) {
1084 #ifdef GNUSORT_COMPATIBILITY
1085 			ks->pos1b = sm->bflag;
1086 			ks->pos2b = sm->bflag;
1087 #endif
1088 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1089 		}
1090 
1091 		ks->sm.func = get_sort_func(&(ks->sm));
1092 	}
1093 
1094 	if (argv_from_file0) {
1095 		argc = argc_from_file0;
1096 		argv = argv_from_file0;
1097 	}
1098 
1099 	if (debug_sort) {
1100 		printf("Memory to be used for sorting: %llu\n",
1101 		    available_free_memory);
1102 		printf("Using collate rules of %s locale\n",
1103 		    setlocale(LC_COLLATE, NULL));
1104 		if (byte_sort)
1105 			printf("Byte sort is used\n");
1106 		if (print_symbols_on_debug) {
1107 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1108 			if (symbol_thousands_sep)
1109 				printf("Thousands separator: <%lc>\n",
1110 				    symbol_thousands_sep);
1111 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1112 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1113 		}
1114 	}
1115 
1116 	if (sort_opts_vals.cflag)
1117 		return check(argc ? *argv : "-");
1118 
1119 	set_random_seed();
1120 
1121 	/* Case when the outfile equals one of the input files: */
1122 	if (strcmp(outfile, "-") != 0) {
1123 		struct stat sb;
1124 		int fd, i;
1125 
1126 		for (i = 0; i < argc; ++i) {
1127 			if (strcmp(argv[i], outfile) == 0) {
1128 				if (stat(outfile, &sb) == -1)
1129 					err(2, "%s", outfile);
1130 				if (access(outfile, W_OK) == -1)
1131 					err(2, "%s", outfile);
1132 				real_outfile = outfile;
1133 				sort_asprintf(&outfile, "%s.XXXXXXXXXX",
1134 				    real_outfile);
1135 				if ((fd = mkstemp(outfile)) == -1 ||
1136 				    fchmod(fd, sb.st_mode & ALLPERMS) == -1)
1137 					err(2, "%s", outfile);
1138 				close(fd);
1139 				tmp_file_atexit(outfile);
1140 				break;
1141 			}
1142 		}
1143 	}
1144 
1145 	if (!sort_opts_vals.mflag) {
1146 		struct file_list fl;
1147 		struct sort_list list;
1148 
1149 		sort_list_init(&list);
1150 		file_list_init(&fl, true);
1151 
1152 		if (argc < 1)
1153 			procfile("-", &list, &fl);
1154 		else {
1155 			while (argc > 0) {
1156 				procfile(*argv, &list, &fl);
1157 				--argc;
1158 				++argv;
1159 			}
1160 		}
1161 
1162 		if (fl.count < 1)
1163 			sort_list_to_file(&list, outfile);
1164 		else {
1165 			if (list.count > 0) {
1166 				char *flast = new_tmp_file_name();
1167 
1168 				sort_list_to_file(&list, flast);
1169 				file_list_add(&fl, flast, false);
1170 			}
1171 			merge_files(&fl, outfile);
1172 		}
1173 
1174 		file_list_clean(&fl);
1175 
1176 		/*
1177 		 * We are about to exit the program, so we can ignore
1178 		 * the clean-up for speed
1179 		 *
1180 		 * sort_list_clean(&list);
1181 		 */
1182 
1183 	} else {
1184 		struct file_list fl;
1185 
1186 		file_list_init(&fl, false);
1187 		file_list_populate(&fl, argc, argv, true);
1188 		merge_files(&fl, outfile);
1189 		file_list_clean(&fl);
1190 	}
1191 
1192 	if (real_outfile) {
1193 		if (rename(outfile, real_outfile) < 0)
1194 			err(2, "%s", real_outfile);
1195 		sort_free(outfile);
1196 	}
1197 
1198 	return 0;
1199 }
1200