xref: /freebsd/usr.bin/sort/sort.c (revision 9768746b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/stat.h>
34 #include <sys/sysctl.h>
35 #include <sys/types.h>
36 
37 #include <err.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <getopt.h>
41 #include <limits.h>
42 #include <locale.h>
43 #include <md5.h>
44 #include <regex.h>
45 #include <signal.h>
46 #include <stdbool.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52 #include <wctype.h>
53 
54 #include "coll.h"
55 #include "file.h"
56 #include "sort.h"
57 
58 #define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
59 
60 static bool need_random;
61 
62 MD5_CTX md5_ctx;
63 
64 /*
65  * Default messages to use
66  */
67 const char *nlsstr[] = { "",
68 /* 1*/"mutually exclusive flags",
69 /* 2*/"extra argument not allowed with -c",
70 /* 3*/"Unknown feature",
71 /* 4*/"Wrong memory buffer specification",
72 /* 5*/"0 field in key specs",
73 /* 6*/"0 column in key specs",
74 /* 7*/"Wrong file mode",
75 /* 8*/"Cannot open file for reading",
76 /* 9*/"Radix sort cannot be used with these sort options",
77 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
78 /*11*/"Invalid key position",
79 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
80       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
81       "[-o outfile] [--batch-size size] [--files0-from file] "
82       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
83       "[--mmap] "
84 #if defined(SORT_THREADS)
85       "[--parallel thread_no] "
86 #endif
87       "[--human-numeric-sort] "
88       "[--version-sort] [--random-sort [--random-source file]] "
89       "[--compress-program program] [file ...]\n" };
90 
91 struct sort_opts sort_opts_vals;
92 
93 bool debug_sort;
94 bool need_hint;
95 
96 size_t mb_cur_max;
97 
98 #if defined(SORT_THREADS)
99 unsigned int ncpu = 1;
100 size_t nthreads = 1;
101 #endif
102 
103 static bool gnusort_numeric_compatibility;
104 
105 static struct sort_mods default_sort_mods_object;
106 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
107 
108 static bool print_symbols_on_debug;
109 
110 /*
111  * Arguments from file (when file0-from option is used:
112  */
113 static size_t argc_from_file0 = (size_t)-1;
114 static char **argv_from_file0;
115 
116 /*
117  * Placeholder symbols for options which have no single-character equivalent
118  */
119 enum
120 {
121 	SORT_OPT = CHAR_MAX + 1,
122 	HELP_OPT,
123 	FF_OPT,
124 	BS_OPT,
125 	VERSION_OPT,
126 	DEBUG_OPT,
127 #if defined(SORT_THREADS)
128 	PARALLEL_OPT,
129 #endif
130 	RANDOMSOURCE_OPT,
131 	COMPRESSPROGRAM_OPT,
132 	QSORT_OPT,
133 	MERGESORT_OPT,
134 	HEAPSORT_OPT,
135 	RADIXSORT_OPT,
136 	MMAP_OPT
137 };
138 
139 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
140 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
141 
142 static struct option long_options[] = {
143 				{ "batch-size", required_argument, NULL, BS_OPT },
144 				{ "buffer-size", required_argument, NULL, 'S' },
145 				{ "check", optional_argument, NULL, 'c' },
146 				{ "check=silent|quiet", optional_argument, NULL, 'C' },
147 				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
148 				{ "debug", no_argument, NULL, DEBUG_OPT },
149 				{ "dictionary-order", no_argument, NULL, 'd' },
150 				{ "field-separator", required_argument, NULL, 't' },
151 				{ "files0-from", required_argument, NULL, FF_OPT },
152 				{ "general-numeric-sort", no_argument, NULL, 'g' },
153 				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
154 				{ "help",no_argument, NULL, HELP_OPT },
155 				{ "human-numeric-sort", no_argument, NULL, 'h' },
156 				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
157 				{ "ignore-case", no_argument, NULL, 'f' },
158 				{ "ignore-nonprinting", no_argument, NULL, 'i' },
159 				{ "key", required_argument, NULL, 'k' },
160 				{ "merge", no_argument, NULL, 'm' },
161 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
162 				{ "mmap", no_argument, NULL, MMAP_OPT },
163 				{ "month-sort", no_argument, NULL, 'M' },
164 				{ "numeric-sort", no_argument, NULL, 'n' },
165 				{ "output", required_argument, NULL, 'o' },
166 #if defined(SORT_THREADS)
167 				{ "parallel", required_argument, NULL, PARALLEL_OPT },
168 #endif
169 				{ "qsort", no_argument, NULL, QSORT_OPT },
170 				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
171 				{ "random-sort", no_argument, NULL, 'R' },
172 				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
173 				{ "reverse", no_argument, NULL, 'r' },
174 				{ "sort", required_argument, NULL, SORT_OPT },
175 				{ "stable", no_argument, NULL, 's' },
176 				{ "temporary-directory",required_argument, NULL, 'T' },
177 				{ "unique", no_argument, NULL, 'u' },
178 				{ "version", no_argument, NULL, VERSION_OPT },
179 				{ "version-sort",no_argument, NULL, 'V' },
180 				{ "zero-terminated", no_argument, NULL, 'z' },
181 				{ NULL, no_argument, NULL, 0 }
182 };
183 
184 void fix_obsolete_keys(int *argc, char **argv);
185 
186 /*
187  * Check where sort modifier is present
188  */
189 static bool
190 sort_modifier_empty(struct sort_mods *sm)
191 {
192 
193 	if (sm == NULL)
194 		return (true);
195 	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
196 	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
197 }
198 
199 /*
200  * Print out usage text.
201  */
202 static void
203 usage(bool opt_err)
204 {
205 	FILE *out;
206 
207 	out = opt_err ? stderr : stdout;
208 
209 	fprintf(out, getstr(12), getprogname());
210 	if (opt_err)
211 		exit(2);
212 	exit(0);
213 }
214 
215 /*
216  * Read input file names from a file (file0-from option).
217  */
218 static void
219 read_fns_from_file0(const char *fn)
220 {
221 	FILE *f;
222 	char *line = NULL;
223 	size_t linesize = 0;
224 	ssize_t linelen;
225 
226 	if (fn == NULL)
227 		return;
228 
229 	f = fopen(fn, "r");
230 	if (f == NULL)
231 		err(2, "%s", fn);
232 
233 	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
234 		if (*line != '\0') {
235 			if (argc_from_file0 == (size_t) - 1)
236 				argc_from_file0 = 0;
237 			++argc_from_file0;
238 			argv_from_file0 = sort_realloc(argv_from_file0,
239 			    argc_from_file0 * sizeof(char *));
240 			if (argv_from_file0 == NULL)
241 				err(2, NULL);
242 			argv_from_file0[argc_from_file0 - 1] = line;
243 		} else {
244 			free(line);
245 		}
246 		line = NULL;
247 		linesize = 0;
248 	}
249 	if (ferror(f))
250 		err(2, "%s: getdelim", fn);
251 
252 	closefile(f, fn);
253 }
254 
255 /*
256  * Check how much RAM is available for the sort.
257  */
258 static void
259 set_hw_params(void)
260 {
261 	long pages, psize;
262 
263 #if defined(SORT_THREADS)
264 	ncpu = 1;
265 #endif
266 
267 	pages = sysconf(_SC_PHYS_PAGES);
268 	if (pages < 1) {
269 		perror("sysconf pages");
270 		pages = 1;
271 	}
272 	psize = sysconf(_SC_PAGESIZE);
273 	if (psize < 1) {
274 		perror("sysconf psize");
275 		psize = 4096;
276 	}
277 #if defined(SORT_THREADS)
278 	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
279 	if (ncpu < 1)
280 		ncpu = 1;
281 	else if(ncpu > 32)
282 		ncpu = 32;
283 
284 	nthreads = ncpu;
285 #endif
286 
287 	free_memory = (unsigned long long) pages * (unsigned long long) psize;
288 	available_free_memory = free_memory / 2;
289 
290 	if (available_free_memory < 1024)
291 		available_free_memory = 1024;
292 }
293 
294 /*
295  * Convert "plain" symbol to wide symbol, with default value.
296  */
297 static void
298 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
299 {
300 
301 	if (wc && c) {
302 		int res;
303 
304 		res = mbtowc(wc, c, mb_cur_max);
305 		if (res < 1)
306 			*wc = def;
307 	}
308 }
309 
310 /*
311  * Set current locale symbols.
312  */
313 static void
314 set_locale(void)
315 {
316 	struct lconv *lc;
317 	const char *locale;
318 
319 	setlocale(LC_ALL, "");
320 
321 	mb_cur_max = MB_CUR_MAX;
322 
323 	lc = localeconv();
324 
325 	if (lc) {
326 		/* obtain LC_NUMERIC info */
327 		/* Convert to wide char form */
328 		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
329 		    symbol_decimal_point);
330 		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
331 		    symbol_thousands_sep);
332 		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
333 		    symbol_positive_sign);
334 		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
335 		    symbol_negative_sign);
336 	}
337 
338 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
339 		gnusort_numeric_compatibility = true;
340 
341 	locale = setlocale(LC_COLLATE, NULL);
342 
343 	if (locale) {
344 		char *tmpl;
345 		const char *cclocale;
346 
347 		tmpl = sort_strdup(locale);
348 		cclocale = setlocale(LC_COLLATE, "C");
349 		if (cclocale && !strcmp(cclocale, tmpl))
350 			byte_sort = true;
351 		else {
352 			const char *pclocale;
353 
354 			pclocale = setlocale(LC_COLLATE, "POSIX");
355 			if (pclocale && !strcmp(pclocale, tmpl))
356 				byte_sort = true;
357 		}
358 		setlocale(LC_COLLATE, tmpl);
359 		sort_free(tmpl);
360 	}
361 }
362 
363 /*
364  * Set directory temporary files.
365  */
366 static void
367 set_tmpdir(void)
368 {
369 	char *td;
370 
371 	td = getenv("TMPDIR");
372 	if (td != NULL)
373 		tmpdir = sort_strdup(td);
374 }
375 
376 /*
377  * Parse -S option.
378  */
379 static unsigned long long
380 parse_memory_buffer_value(const char *value)
381 {
382 
383 	if (value == NULL)
384 		return (available_free_memory);
385 	else {
386 		char *endptr;
387 		unsigned long long membuf;
388 
389 		endptr = NULL;
390 		errno = 0;
391 		membuf = strtoll(value, &endptr, 10);
392 
393 		if (errno != 0) {
394 			warn("%s",getstr(4));
395 			membuf = available_free_memory;
396 		} else {
397 			switch (*endptr){
398 			case 'Y':
399 				membuf *= 1024;
400 				/* FALLTHROUGH */
401 			case 'Z':
402 				membuf *= 1024;
403 				/* FALLTHROUGH */
404 			case 'E':
405 				membuf *= 1024;
406 				/* FALLTHROUGH */
407 			case 'P':
408 				membuf *= 1024;
409 				/* FALLTHROUGH */
410 			case 'T':
411 				membuf *= 1024;
412 				/* FALLTHROUGH */
413 			case 'G':
414 				membuf *= 1024;
415 				/* FALLTHROUGH */
416 			case 'M':
417 				membuf *= 1024;
418 				/* FALLTHROUGH */
419 			case '\0':
420 			case 'K':
421 				membuf *= 1024;
422 				/* FALLTHROUGH */
423 			case 'b':
424 				break;
425 			case '%':
426 				membuf = (available_free_memory * membuf) /
427 				    100;
428 				break;
429 			default:
430 				warnc(EINVAL, "%s", optarg);
431 				membuf = available_free_memory;
432 			}
433 		}
434 		return (membuf);
435 	}
436 }
437 
438 /*
439  * Signal handler that clears the temporary files.
440  */
441 static void
442 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
443     void *context __unused)
444 {
445 
446 	clear_tmp_files();
447 	exit(-1);
448 }
449 
450 /*
451  * Set signal handler on panic signals.
452  */
453 static void
454 set_signal_handler(void)
455 {
456 	struct sigaction sa;
457 
458 	memset(&sa, 0, sizeof(sa));
459 	sa.sa_sigaction = &sig_handler;
460 	sa.sa_flags = SA_SIGINFO;
461 
462 	if (sigaction(SIGTERM, &sa, NULL) < 0) {
463 		perror("sigaction");
464 		return;
465 	}
466 	if (sigaction(SIGHUP, &sa, NULL) < 0) {
467 		perror("sigaction");
468 		return;
469 	}
470 	if (sigaction(SIGINT, &sa, NULL) < 0) {
471 		perror("sigaction");
472 		return;
473 	}
474 	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
475 		perror("sigaction");
476 		return;
477 	}
478 	if (sigaction(SIGABRT, &sa, NULL) < 0) {
479 		perror("sigaction");
480 		return;
481 	}
482 	if (sigaction(SIGBUS, &sa, NULL) < 0) {
483 		perror("sigaction");
484 		return;
485 	}
486 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
487 		perror("sigaction");
488 		return;
489 	}
490 	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
491 		perror("sigaction");
492 		return;
493 	}
494 	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
495 		perror("sigaction");
496 		return;
497 	}
498 }
499 
500 /*
501  * Print "unknown" message and exit with status 2.
502  */
503 static void
504 unknown(const char *what)
505 {
506 
507 	errx(2, "%s: %s", getstr(3), what);
508 }
509 
510 /*
511  * Check whether contradictory input options are used.
512  */
513 static void
514 check_mutually_exclusive_flags(char c, bool *mef_flags)
515 {
516 	int fo_index, mec;
517 	bool found_others, found_this;
518 
519 	found_others = found_this = false;
520 	fo_index = 0;
521 
522 	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
523 		mec = mutually_exclusive_flags[i];
524 
525 		if (mec != c) {
526 			if (mef_flags[i]) {
527 				if (found_this)
528 					errx(1, "%c:%c: %s", c, mec, getstr(1));
529 				found_others = true;
530 				fo_index = i;
531 			}
532 		} else {
533 			if (found_others)
534 				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
535 			mef_flags[i] = true;
536 			found_this = true;
537 		}
538 	}
539 }
540 
541 /*
542  * Initialise sort opts data.
543  */
544 static void
545 set_sort_opts(void)
546 {
547 
548 	memset(&default_sort_mods_object, 0,
549 	    sizeof(default_sort_mods_object));
550 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
551 	default_sort_mods_object.func =
552 	    get_sort_func(&default_sort_mods_object);
553 }
554 
555 /*
556  * Set a sort modifier on a sort modifiers object.
557  */
558 static bool
559 set_sort_modifier(struct sort_mods *sm, int c)
560 {
561 
562 	if (sm == NULL)
563 		return (true);
564 
565 	switch (c){
566 	case 'b':
567 		sm->bflag = true;
568 		break;
569 	case 'd':
570 		sm->dflag = true;
571 		break;
572 	case 'f':
573 		sm->fflag = true;
574 		break;
575 	case 'g':
576 		sm->gflag = true;
577 		need_hint = true;
578 		break;
579 	case 'i':
580 		sm->iflag = true;
581 		break;
582 	case 'R':
583 		sm->Rflag = true;
584 		need_hint = true;
585 		need_random = true;
586 		break;
587 	case 'M':
588 		initialise_months();
589 		sm->Mflag = true;
590 		need_hint = true;
591 		break;
592 	case 'n':
593 		sm->nflag = true;
594 		need_hint = true;
595 		print_symbols_on_debug = true;
596 		break;
597 	case 'r':
598 		sm->rflag = true;
599 		break;
600 	case 'V':
601 		sm->Vflag = true;
602 		break;
603 	case 'h':
604 		sm->hflag = true;
605 		need_hint = true;
606 		print_symbols_on_debug = true;
607 		break;
608 	default:
609 		return (false);
610 	}
611 
612 	sort_opts_vals.complex_sort = true;
613 	sm->func = get_sort_func(sm);
614 	return (true);
615 }
616 
617 /*
618  * Parse POS in -k option.
619  */
620 static int
621 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
622 {
623 	regmatch_t pmatch[4];
624 	regex_t re;
625 	char *c, *f;
626 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
627 	size_t len, nmatch;
628 	int ret;
629 
630 	ret = -1;
631 	nmatch = 4;
632 	c = f = NULL;
633 
634 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
635 		return (-1);
636 
637 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
638 		goto end;
639 
640 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
641 		goto end;
642 
643 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
644 		goto end;
645 
646 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
647 	f = sort_malloc((len + 1) * sizeof(char));
648 
649 	strncpy(f, s + pmatch[1].rm_so, len);
650 	f[len] = '\0';
651 
652 	if (second) {
653 		errno = 0;
654 		ks->f2 = (size_t) strtoul(f, NULL, 10);
655 		if (errno != 0)
656 			err(2, "-k");
657 		if (ks->f2 == 0) {
658 			warn("%s",getstr(5));
659 			goto end;
660 		}
661 	} else {
662 		errno = 0;
663 		ks->f1 = (size_t) strtoul(f, NULL, 10);
664 		if (errno != 0)
665 			err(2, "-k");
666 		if (ks->f1 == 0) {
667 			warn("%s",getstr(5));
668 			goto end;
669 		}
670 	}
671 
672 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
673 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
674 		c = sort_malloc((len + 1) * sizeof(char));
675 
676 		strncpy(c, s + pmatch[2].rm_so + 1, len);
677 		c[len] = '\0';
678 
679 		if (second) {
680 			errno = 0;
681 			ks->c2 = (size_t) strtoul(c, NULL, 10);
682 			if (errno != 0)
683 				err(2, "-k");
684 		} else {
685 			errno = 0;
686 			ks->c1 = (size_t) strtoul(c, NULL, 10);
687 			if (errno != 0)
688 				err(2, "-k");
689 			if (ks->c1 == 0) {
690 				warn("%s",getstr(6));
691 				goto end;
692 			}
693 		}
694 	} else {
695 		if (second)
696 			ks->c2 = 0;
697 		else
698 			ks->c1 = 1;
699 	}
700 
701 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
702 		regoff_t i = 0;
703 
704 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
705 			check_mutually_exclusive_flags(s[i], mef_flags);
706 			if (s[i] == 'b') {
707 				if (second)
708 					ks->pos2b = true;
709 				else
710 					ks->pos1b = true;
711 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
712 				goto end;
713 		}
714 	}
715 
716 	ret = 0;
717 
718 end:
719 
720 	if (c)
721 		sort_free(c);
722 	if (f)
723 		sort_free(f);
724 	regfree(&re);
725 
726 	return (ret);
727 }
728 
729 /*
730  * Parse -k option value.
731  */
732 static int
733 parse_k(const char *s, struct key_specs *ks)
734 {
735 	int ret = -1;
736 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
737 	    { false, false, false, false, false, false };
738 
739 	if (s && *s) {
740 		char *sptr;
741 
742 		sptr = strchr(s, ',');
743 		if (sptr) {
744 			size_t size1;
745 			char *pos1, *pos2;
746 
747 			size1 = sptr - s;
748 
749 			if (size1 < 1)
750 				return (-1);
751 			pos1 = sort_malloc((size1 + 1) * sizeof(char));
752 
753 			strncpy(pos1, s, size1);
754 			pos1[size1] = '\0';
755 
756 			ret = parse_pos(pos1, ks, mef_flags, false);
757 
758 			sort_free(pos1);
759 			if (ret < 0)
760 				return (ret);
761 
762 			pos2 = sort_strdup(sptr + 1);
763 			ret = parse_pos(pos2, ks, mef_flags, true);
764 			sort_free(pos2);
765 		} else
766 			ret = parse_pos(s, ks, mef_flags, false);
767 	}
768 
769 	return (ret);
770 }
771 
772 /*
773  * Parse POS in +POS -POS option.
774  */
775 static int
776 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
777 {
778 	regex_t re;
779 	regmatch_t pmatch[4];
780 	char *c, *f;
781 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
782 	int ret;
783 	size_t len, nmatch;
784 
785 	ret = -1;
786 	nmatch = 4;
787 	c = f = NULL;
788 	*nc = *nf = 0;
789 
790 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
791 		return (-1);
792 
793 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
794 		goto end;
795 
796 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
797 		goto end;
798 
799 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
800 		goto end;
801 
802 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
803 	f = sort_malloc((len + 1) * sizeof(char));
804 
805 	strncpy(f, s + pmatch[1].rm_so, len);
806 	f[len] = '\0';
807 
808 	errno = 0;
809 	*nf = (size_t) strtoul(f, NULL, 10);
810 	if (errno != 0)
811 		errx(2, "%s", getstr(11));
812 
813 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
814 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
815 		c = sort_malloc((len + 1) * sizeof(char));
816 
817 		strncpy(c, s + pmatch[2].rm_so + 1, len);
818 		c[len] = '\0';
819 
820 		errno = 0;
821 		*nc = (size_t) strtoul(c, NULL, 10);
822 		if (errno != 0)
823 			errx(2, "%s", getstr(11));
824 	}
825 
826 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
827 
828 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
829 
830 		strncpy(sopts, s + pmatch[3].rm_so, len);
831 		sopts[len] = '\0';
832 	}
833 
834 	ret = 0;
835 
836 end:
837 	if (c)
838 		sort_free(c);
839 	if (f)
840 		sort_free(f);
841 	regfree(&re);
842 
843 	return (ret);
844 }
845 
846 /*
847  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
848  */
849 void
850 fix_obsolete_keys(int *argc, char **argv)
851 {
852 	char sopt[129];
853 
854 	for (int i = 1; i < *argc; i++) {
855 		char *arg1;
856 
857 		arg1 = argv[i];
858 
859 		if (strcmp(arg1, "--") == 0) {
860 			/* Following arguments are treated as filenames. */
861 			break;
862 		}
863 
864 		if (strlen(arg1) > 1 && arg1[0] == '+') {
865 			int c1, f1;
866 			char sopts1[128];
867 
868 			sopts1[0] = 0;
869 			c1 = f1 = 0;
870 
871 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
872 				continue;
873 			else {
874 				f1 += 1;
875 				c1 += 1;
876 				if (i + 1 < *argc) {
877 					char *arg2 = argv[i + 1];
878 
879 					if (strlen(arg2) > 1 &&
880 					    arg2[0] == '-') {
881 						int c2, f2;
882 						char sopts2[128];
883 
884 						sopts2[0] = 0;
885 						c2 = f2 = 0;
886 
887 						if (parse_pos_obs(arg2 + 1,
888 						    &f2, &c2, sopts2) >= 0) {
889 							if (c2 > 0)
890 								f2 += 1;
891 							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
892 							    f1, c1, sopts1, f2, c2, sopts2);
893 							argv[i] = sort_strdup(sopt);
894 							for (int j = i + 1; j + 1 < *argc; j++)
895 								argv[j] = argv[j + 1];
896 							*argc -= 1;
897 							continue;
898 						}
899 					}
900 				}
901 				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
902 				argv[i] = sort_strdup(sopt);
903 			}
904 		}
905 	}
906 }
907 
908 /*
909  * Seed random sort
910  */
911 static void
912 get_random_seed(const char *random_source)
913 {
914 	char randseed[32];
915 	struct stat fsb, rsb;
916 	ssize_t rd;
917 	int rsfd;
918 
919 	rsfd = -1;
920 	rd = sizeof(randseed);
921 
922 	if (random_source == NULL) {
923 		if (getentropy(randseed, sizeof(randseed)) < 0)
924 			err(EX_SOFTWARE, "getentropy");
925 		goto out;
926 	}
927 
928 	rsfd = open(random_source, O_RDONLY | O_CLOEXEC);
929 	if (rsfd < 0)
930 		err(EX_NOINPUT, "open: %s", random_source);
931 
932 	if (fstat(rsfd, &fsb) != 0)
933 		err(EX_SOFTWARE, "fstat");
934 
935 	if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode))
936 		err(EX_USAGE,
937 		    "random seed isn't a regular file or /dev/random");
938 
939 	/*
940 	 * Regular files: read up to maximum seed size and explicitly
941 	 * reject longer files.
942 	 */
943 	if (S_ISREG(fsb.st_mode)) {
944 		if (fsb.st_size > (off_t)sizeof(randseed))
945 			errx(EX_USAGE, "random seed is too large (%jd >"
946 			    " %zu)!", (intmax_t)fsb.st_size,
947 			    sizeof(randseed));
948 		else if (fsb.st_size < 1)
949 			errx(EX_USAGE, "random seed is too small ("
950 			    "0 bytes)");
951 
952 		memset(randseed, 0, sizeof(randseed));
953 
954 		rd = read(rsfd, randseed, fsb.st_size);
955 		if (rd < 0)
956 			err(EX_SOFTWARE, "reading random seed file %s",
957 			    random_source);
958 		if (rd < (ssize_t)fsb.st_size)
959 			errx(EX_SOFTWARE, "short read from %s", random_source);
960 	} else if (S_ISCHR(fsb.st_mode)) {
961 		if (stat("/dev/random", &rsb) < 0)
962 			err(EX_SOFTWARE, "stat");
963 
964 		if (fsb.st_dev != rsb.st_dev ||
965 		    fsb.st_ino != rsb.st_ino)
966 			errx(EX_USAGE, "random seed is a character "
967 			    "device other than /dev/random");
968 
969 		if (getentropy(randseed, sizeof(randseed)) < 0)
970 			err(EX_SOFTWARE, "getentropy");
971 	}
972 
973 out:
974 	if (rsfd >= 0)
975 		close(rsfd);
976 
977 	MD5Init(&md5_ctx);
978 	MD5Update(&md5_ctx, randseed, rd);
979 }
980 
981 /*
982  * Main function.
983  */
984 int
985 main(int argc, char **argv)
986 {
987 	char *outfile, *real_outfile;
988 	char *random_source = NULL;
989 	int c, result;
990 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
991 	    { false, false, false, false, false, false };
992 
993 	result = 0;
994 	outfile = sort_strdup("-");
995 	real_outfile = NULL;
996 
997 	struct sort_mods *sm = &default_sort_mods_object;
998 
999 	init_tmp_files();
1000 
1001 	set_signal_handler();
1002 
1003 	set_hw_params();
1004 	set_locale();
1005 	set_tmpdir();
1006 	set_sort_opts();
1007 
1008 	fix_obsolete_keys(&argc, argv);
1009 
1010 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1011 	    != -1)) {
1012 
1013 		check_mutually_exclusive_flags(c, mef_flags);
1014 
1015 		if (!set_sort_modifier(sm, c)) {
1016 
1017 			switch (c) {
1018 			case 'c':
1019 				sort_opts_vals.cflag = true;
1020 				if (optarg) {
1021 					if (!strcmp(optarg, "diagnose-first"))
1022 						;
1023 					else if (!strcmp(optarg, "silent") ||
1024 					    !strcmp(optarg, "quiet"))
1025 						sort_opts_vals.csilentflag = true;
1026 					else if (*optarg)
1027 						unknown(optarg);
1028 				}
1029 				break;
1030 			case 'C':
1031 				sort_opts_vals.cflag = true;
1032 				sort_opts_vals.csilentflag = true;
1033 				break;
1034 			case 'k':
1035 			{
1036 				sort_opts_vals.complex_sort = true;
1037 				sort_opts_vals.kflag = true;
1038 
1039 				keys_num++;
1040 				keys = sort_realloc(keys, keys_num *
1041 				    sizeof(struct key_specs));
1042 				memset(&(keys[keys_num - 1]), 0,
1043 				    sizeof(struct key_specs));
1044 
1045 				if (parse_k(optarg, &(keys[keys_num - 1]))
1046 				    < 0) {
1047 					errc(2, EINVAL, "-k %s", optarg);
1048 				}
1049 
1050 				break;
1051 			}
1052 			case 'm':
1053 				sort_opts_vals.mflag = true;
1054 				break;
1055 			case 'o':
1056 				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1057 				strcpy(outfile, optarg);
1058 				break;
1059 			case 's':
1060 				sort_opts_vals.sflag = true;
1061 				break;
1062 			case 'S':
1063 				available_free_memory =
1064 				    parse_memory_buffer_value(optarg);
1065 				break;
1066 			case 'T':
1067 				tmpdir = sort_strdup(optarg);
1068 				break;
1069 			case 't':
1070 				while (strlen(optarg) > 1) {
1071 					if (optarg[0] != '\\') {
1072 						errc(2, EINVAL, "%s", optarg);
1073 					}
1074 					optarg += 1;
1075 					if (*optarg == '0') {
1076 						*optarg = 0;
1077 						break;
1078 					}
1079 				}
1080 				sort_opts_vals.tflag = true;
1081 				sort_opts_vals.field_sep = btowc(optarg[0]);
1082 				if (sort_opts_vals.field_sep == WEOF) {
1083 					errno = EINVAL;
1084 					err(2, NULL);
1085 				}
1086 				if (!gnusort_numeric_compatibility) {
1087 					if (symbol_decimal_point == sort_opts_vals.field_sep)
1088 						symbol_decimal_point = WEOF;
1089 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1090 						symbol_thousands_sep = WEOF;
1091 					if (symbol_negative_sign == sort_opts_vals.field_sep)
1092 						symbol_negative_sign = WEOF;
1093 					if (symbol_positive_sign == sort_opts_vals.field_sep)
1094 						symbol_positive_sign = WEOF;
1095 				}
1096 				break;
1097 			case 'u':
1098 				sort_opts_vals.uflag = true;
1099 				/* stable sort for the correct unique val */
1100 				sort_opts_vals.sflag = true;
1101 				break;
1102 			case 'z':
1103 				sort_opts_vals.zflag = true;
1104 				break;
1105 			case SORT_OPT:
1106 				if (optarg) {
1107 					if (!strcmp(optarg, "general-numeric"))
1108 						set_sort_modifier(sm, 'g');
1109 					else if (!strcmp(optarg, "human-numeric"))
1110 						set_sort_modifier(sm, 'h');
1111 					else if (!strcmp(optarg, "numeric"))
1112 						set_sort_modifier(sm, 'n');
1113 					else if (!strcmp(optarg, "month"))
1114 						set_sort_modifier(sm, 'M');
1115 					else if (!strcmp(optarg, "random"))
1116 						set_sort_modifier(sm, 'R');
1117 					else
1118 						unknown(optarg);
1119 				}
1120 				break;
1121 #if defined(SORT_THREADS)
1122 			case PARALLEL_OPT:
1123 				nthreads = (size_t)(atoi(optarg));
1124 				if (nthreads < 1)
1125 					nthreads = 1;
1126 				if (nthreads > 1024)
1127 					nthreads = 1024;
1128 				break;
1129 #endif
1130 			case QSORT_OPT:
1131 				sort_opts_vals.sort_method = SORT_QSORT;
1132 				break;
1133 			case MERGESORT_OPT:
1134 				sort_opts_vals.sort_method = SORT_MERGESORT;
1135 				break;
1136 			case MMAP_OPT:
1137 				use_mmap = true;
1138 				break;
1139 			case HEAPSORT_OPT:
1140 				sort_opts_vals.sort_method = SORT_HEAPSORT;
1141 				break;
1142 			case RADIXSORT_OPT:
1143 				sort_opts_vals.sort_method = SORT_RADIXSORT;
1144 				break;
1145 			case RANDOMSOURCE_OPT:
1146 				random_source = strdup(optarg);
1147 				break;
1148 			case COMPRESSPROGRAM_OPT:
1149 				compress_program = strdup(optarg);
1150 				break;
1151 			case FF_OPT:
1152 				read_fns_from_file0(optarg);
1153 				break;
1154 			case BS_OPT:
1155 			{
1156 				errno = 0;
1157 				long mof = strtol(optarg, NULL, 10);
1158 				if (errno != 0)
1159 					err(2, "--batch-size");
1160 				if (mof >= 2)
1161 					max_open_files = (size_t) mof + 1;
1162 			}
1163 				break;
1164 			case VERSION_OPT:
1165 				printf("%s\n", VERSION);
1166 				exit(EXIT_SUCCESS);
1167 				/* NOTREACHED */
1168 				break;
1169 			case DEBUG_OPT:
1170 				debug_sort = true;
1171 				break;
1172 			case HELP_OPT:
1173 				usage(false);
1174 				/* NOTREACHED */
1175 				break;
1176 			default:
1177 				usage(true);
1178 				/* NOTREACHED */
1179 			}
1180 		}
1181 	}
1182 
1183 	argc -= optind;
1184 	argv += optind;
1185 
1186 	if (argv_from_file0) {
1187 		argc = argc_from_file0;
1188 		argv = argv_from_file0;
1189 	}
1190 
1191 	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1192 		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1193 
1194 	if (keys_num == 0) {
1195 		keys_num = 1;
1196 		keys = sort_realloc(keys, sizeof(struct key_specs));
1197 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1198 		keys[0].c1 = 1;
1199 		keys[0].pos1b = default_sort_mods->bflag;
1200 		keys[0].pos2b = default_sort_mods->bflag;
1201 		memcpy(&(keys[0].sm), default_sort_mods,
1202 		    sizeof(struct sort_mods));
1203 	}
1204 
1205 	for (size_t i = 0; i < keys_num; i++) {
1206 		struct key_specs *ks;
1207 
1208 		ks = &(keys[i]);
1209 
1210 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1211 		    !(ks->pos2b)) {
1212 			ks->pos1b = sm->bflag;
1213 			ks->pos2b = sm->bflag;
1214 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1215 		}
1216 
1217 		ks->sm.func = get_sort_func(&(ks->sm));
1218 	}
1219 
1220 	if (debug_sort) {
1221 		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1222 #if defined(SORT_THREADS)
1223 		printf("Number of CPUs: %d\n",(int)ncpu);
1224 		nthreads = 1;
1225 #endif
1226 		printf("Using collate rules of %s locale\n",
1227 		    setlocale(LC_COLLATE, NULL));
1228 		if (byte_sort)
1229 			printf("Byte sort is used\n");
1230 		if (print_symbols_on_debug) {
1231 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1232 			if (symbol_thousands_sep)
1233 				printf("Thousands separator: <%lc>\n",
1234 				    symbol_thousands_sep);
1235 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1236 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1237 		}
1238 	}
1239 
1240 	if (need_random)
1241 		get_random_seed(random_source);
1242 
1243 	/* Case when the outfile equals one of the input files: */
1244 	if (strcmp(outfile, "-")) {
1245 
1246 		for(int i = 0; i < argc; ++i) {
1247 			if (strcmp(argv[i], outfile) == 0) {
1248 				real_outfile = sort_strdup(outfile);
1249 				for(;;) {
1250 					char* tmp = sort_malloc(strlen(outfile) +
1251 					    strlen(".tmp") + 1);
1252 
1253 					strcpy(tmp, outfile);
1254 					strcpy(tmp + strlen(tmp), ".tmp");
1255 					sort_free(outfile);
1256 					outfile = tmp;
1257 					if (access(outfile, F_OK) < 0)
1258 						break;
1259 				}
1260 				tmp_file_atexit(outfile);
1261 			}
1262 		}
1263 	}
1264 
1265 #if defined(SORT_THREADS)
1266 	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1267 		nthreads = 1;
1268 #endif
1269 
1270 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1271 		struct file_list fl;
1272 		struct sort_list list;
1273 
1274 		sort_list_init(&list);
1275 		file_list_init(&fl, true);
1276 
1277 		if (argc < 1)
1278 			procfile("-", &list, &fl);
1279 		else {
1280 			while (argc > 0) {
1281 				procfile(*argv, &list, &fl);
1282 				--argc;
1283 				++argv;
1284 			}
1285 		}
1286 
1287 		if (fl.count < 1)
1288 			sort_list_to_file(&list, outfile);
1289 		else {
1290 			if (list.count > 0) {
1291 				char *flast = new_tmp_file_name();
1292 
1293 				sort_list_to_file(&list, flast);
1294 				file_list_add(&fl, flast, false);
1295 			}
1296 			merge_files(&fl, outfile);
1297 		}
1298 
1299 		file_list_clean(&fl);
1300 
1301 		/*
1302 		 * We are about to exit the program, so we can ignore
1303 		 * the clean-up for speed
1304 		 *
1305 		 * sort_list_clean(&list);
1306 		 */
1307 
1308 	} else if (sort_opts_vals.cflag) {
1309 		result = (argc == 0) ? (check("-")) : (check(*argv));
1310 	} else if (sort_opts_vals.mflag) {
1311 		struct file_list fl;
1312 
1313 		file_list_init(&fl, false);
1314 		/* No file arguments remaining means "read from stdin." */
1315 		if (argc == 0)
1316 			file_list_add(&fl, "-", true);
1317 		else
1318 			file_list_populate(&fl, argc, argv, true);
1319 		merge_files(&fl, outfile);
1320 		file_list_clean(&fl);
1321 	}
1322 
1323 	if (real_outfile) {
1324 		unlink(real_outfile);
1325 		if (rename(outfile, real_outfile) < 0)
1326 			err(2, NULL);
1327 		sort_free(real_outfile);
1328 	}
1329 
1330 	sort_free(outfile);
1331 
1332 	return (result);
1333 }
1334