xref: /freebsd/usr.bin/sort/sort.c (revision 1d386b48)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34 
35 #include <err.h>
36 #include <errno.h>
37 #include <fcntl.h>
38 #include <getopt.h>
39 #include <limits.h>
40 #include <locale.h>
41 #include <md5.h>
42 #include <regex.h>
43 #include <signal.h>
44 #include <stdbool.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <unistd.h>
49 #include <wchar.h>
50 #include <wctype.h>
51 
52 #include "coll.h"
53 #include "file.h"
54 #include "sort.h"
55 
56 #define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
57 
58 static bool need_random;
59 
60 MD5_CTX md5_ctx;
61 
62 /*
63  * Default messages to use
64  */
65 const char *nlsstr[] = { "",
66 /* 1*/"mutually exclusive flags",
67 /* 2*/"extra argument not allowed with -c",
68 /* 3*/"Unknown feature",
69 /* 4*/"Wrong memory buffer specification",
70 /* 5*/"0 field in key specs",
71 /* 6*/"0 column in key specs",
72 /* 7*/"Wrong file mode",
73 /* 8*/"Cannot open file for reading",
74 /* 9*/"Radix sort cannot be used with these sort options",
75 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
76 /*11*/"Invalid key position",
77 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
78       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
79       "[-o outfile] [--batch-size size] [--files0-from file] "
80       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
81       "[--mmap] "
82 #if defined(SORT_THREADS)
83       "[--parallel thread_no] "
84 #endif
85       "[--human-numeric-sort] "
86       "[--version-sort] [--random-sort [--random-source file]] "
87       "[--compress-program program] [file ...]\n" };
88 
89 struct sort_opts sort_opts_vals;
90 
91 bool debug_sort;
92 bool need_hint;
93 
94 size_t mb_cur_max;
95 
96 #if defined(SORT_THREADS)
97 unsigned int ncpu = 1;
98 size_t nthreads = 1;
99 #endif
100 
101 static bool gnusort_numeric_compatibility;
102 
103 static struct sort_mods default_sort_mods_object;
104 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
105 
106 static bool print_symbols_on_debug;
107 
108 /*
109  * Arguments from file (when file0-from option is used:
110  */
111 static size_t argc_from_file0 = (size_t)-1;
112 static char **argv_from_file0;
113 
114 /*
115  * Placeholder symbols for options which have no single-character equivalent
116  */
117 enum
118 {
119 	SORT_OPT = CHAR_MAX + 1,
120 	HELP_OPT,
121 	FF_OPT,
122 	BS_OPT,
123 	VERSION_OPT,
124 	DEBUG_OPT,
125 #if defined(SORT_THREADS)
126 	PARALLEL_OPT,
127 #endif
128 	RANDOMSOURCE_OPT,
129 	COMPRESSPROGRAM_OPT,
130 	QSORT_OPT,
131 	MERGESORT_OPT,
132 	HEAPSORT_OPT,
133 	RADIXSORT_OPT,
134 	MMAP_OPT
135 };
136 
137 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
138 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
139 
140 static struct option long_options[] = {
141 				{ "batch-size", required_argument, NULL, BS_OPT },
142 				{ "buffer-size", required_argument, NULL, 'S' },
143 				{ "check", optional_argument, NULL, 'c' },
144 				{ "check=silent|quiet", optional_argument, NULL, 'C' },
145 				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
146 				{ "debug", no_argument, NULL, DEBUG_OPT },
147 				{ "dictionary-order", no_argument, NULL, 'd' },
148 				{ "field-separator", required_argument, NULL, 't' },
149 				{ "files0-from", required_argument, NULL, FF_OPT },
150 				{ "general-numeric-sort", no_argument, NULL, 'g' },
151 				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
152 				{ "help",no_argument, NULL, HELP_OPT },
153 				{ "human-numeric-sort", no_argument, NULL, 'h' },
154 				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
155 				{ "ignore-case", no_argument, NULL, 'f' },
156 				{ "ignore-nonprinting", no_argument, NULL, 'i' },
157 				{ "key", required_argument, NULL, 'k' },
158 				{ "merge", no_argument, NULL, 'm' },
159 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
160 				{ "mmap", no_argument, NULL, MMAP_OPT },
161 				{ "month-sort", no_argument, NULL, 'M' },
162 				{ "numeric-sort", no_argument, NULL, 'n' },
163 				{ "output", required_argument, NULL, 'o' },
164 #if defined(SORT_THREADS)
165 				{ "parallel", required_argument, NULL, PARALLEL_OPT },
166 #endif
167 				{ "qsort", no_argument, NULL, QSORT_OPT },
168 				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
169 				{ "random-sort", no_argument, NULL, 'R' },
170 				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
171 				{ "reverse", no_argument, NULL, 'r' },
172 				{ "sort", required_argument, NULL, SORT_OPT },
173 				{ "stable", no_argument, NULL, 's' },
174 				{ "temporary-directory",required_argument, NULL, 'T' },
175 				{ "unique", no_argument, NULL, 'u' },
176 				{ "version", no_argument, NULL, VERSION_OPT },
177 				{ "version-sort",no_argument, NULL, 'V' },
178 				{ "zero-terminated", no_argument, NULL, 'z' },
179 				{ NULL, no_argument, NULL, 0 }
180 };
181 
182 void fix_obsolete_keys(int *argc, char **argv);
183 
184 /*
185  * Check where sort modifier is present
186  */
187 static bool
sort_modifier_empty(struct sort_mods * sm)188 sort_modifier_empty(struct sort_mods *sm)
189 {
190 
191 	if (sm == NULL)
192 		return (true);
193 	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
194 	    sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
195 }
196 
197 /*
198  * Print out usage text.
199  */
200 static void
usage(bool opt_err)201 usage(bool opt_err)
202 {
203 	FILE *out;
204 
205 	out = opt_err ? stderr : stdout;
206 
207 	fprintf(out, getstr(12), getprogname());
208 	if (opt_err)
209 		exit(2);
210 	exit(0);
211 }
212 
213 /*
214  * Read input file names from a file (file0-from option).
215  */
216 static void
read_fns_from_file0(const char * fn)217 read_fns_from_file0(const char *fn)
218 {
219 	FILE *f;
220 	char *line = NULL;
221 	size_t linesize = 0;
222 	ssize_t linelen;
223 
224 	if (fn == NULL)
225 		return;
226 
227 	f = fopen(fn, "r");
228 	if (f == NULL)
229 		err(2, "%s", fn);
230 
231 	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
232 		if (*line != '\0') {
233 			if (argc_from_file0 == (size_t) - 1)
234 				argc_from_file0 = 0;
235 			++argc_from_file0;
236 			argv_from_file0 = sort_realloc(argv_from_file0,
237 			    argc_from_file0 * sizeof(char *));
238 			if (argv_from_file0 == NULL)
239 				err(2, NULL);
240 			argv_from_file0[argc_from_file0 - 1] = line;
241 		} else {
242 			free(line);
243 		}
244 		line = NULL;
245 		linesize = 0;
246 	}
247 	if (ferror(f))
248 		err(2, "%s: getdelim", fn);
249 
250 	closefile(f, fn);
251 }
252 
253 /*
254  * Check how much RAM is available for the sort.
255  */
256 static void
set_hw_params(void)257 set_hw_params(void)
258 {
259 	long pages, psize;
260 
261 #if defined(SORT_THREADS)
262 	ncpu = 1;
263 #endif
264 
265 	pages = sysconf(_SC_PHYS_PAGES);
266 	if (pages < 1) {
267 		perror("sysconf pages");
268 		pages = 1;
269 	}
270 	psize = sysconf(_SC_PAGESIZE);
271 	if (psize < 1) {
272 		perror("sysconf psize");
273 		psize = 4096;
274 	}
275 #if defined(SORT_THREADS)
276 	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
277 	if (ncpu < 1)
278 		ncpu = 1;
279 	else if(ncpu > 32)
280 		ncpu = 32;
281 
282 	nthreads = ncpu;
283 #endif
284 
285 	free_memory = (unsigned long long) pages * (unsigned long long) psize;
286 	available_free_memory = free_memory / 2;
287 
288 	if (available_free_memory < 1024)
289 		available_free_memory = 1024;
290 }
291 
292 /*
293  * Convert "plain" symbol to wide symbol, with default value.
294  */
295 static void
conv_mbtowc(wchar_t * wc,const char * c,const wchar_t def)296 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
297 {
298 
299 	if (wc && c) {
300 		int res;
301 
302 		res = mbtowc(wc, c, mb_cur_max);
303 		if (res < 1)
304 			*wc = def;
305 	}
306 }
307 
308 /*
309  * Set current locale symbols.
310  */
311 static void
set_locale(void)312 set_locale(void)
313 {
314 	struct lconv *lc;
315 	const char *locale;
316 
317 	setlocale(LC_ALL, "");
318 
319 	mb_cur_max = MB_CUR_MAX;
320 
321 	lc = localeconv();
322 
323 	if (lc) {
324 		/* obtain LC_NUMERIC info */
325 		/* Convert to wide char form */
326 		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
327 		    symbol_decimal_point);
328 		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
329 		    symbol_thousands_sep);
330 		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
331 		    symbol_positive_sign);
332 		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
333 		    symbol_negative_sign);
334 	}
335 
336 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
337 		gnusort_numeric_compatibility = true;
338 
339 	locale = setlocale(LC_COLLATE, NULL);
340 
341 	if (locale) {
342 		char *tmpl;
343 		const char *cclocale;
344 
345 		tmpl = sort_strdup(locale);
346 		cclocale = setlocale(LC_COLLATE, "C");
347 		if (cclocale && !strcmp(cclocale, tmpl))
348 			byte_sort = true;
349 		else {
350 			const char *pclocale;
351 
352 			pclocale = setlocale(LC_COLLATE, "POSIX");
353 			if (pclocale && !strcmp(pclocale, tmpl))
354 				byte_sort = true;
355 		}
356 		setlocale(LC_COLLATE, tmpl);
357 		sort_free(tmpl);
358 	}
359 }
360 
361 /*
362  * Set directory temporary files.
363  */
364 static void
set_tmpdir(void)365 set_tmpdir(void)
366 {
367 	char *td;
368 
369 	td = getenv("TMPDIR");
370 	if (td != NULL)
371 		tmpdir = sort_strdup(td);
372 }
373 
374 /*
375  * Parse -S option.
376  */
377 static unsigned long long
parse_memory_buffer_value(const char * value)378 parse_memory_buffer_value(const char *value)
379 {
380 
381 	if (value == NULL)
382 		return (available_free_memory);
383 	else {
384 		char *endptr;
385 		unsigned long long membuf;
386 
387 		endptr = NULL;
388 		errno = 0;
389 		membuf = strtoll(value, &endptr, 10);
390 
391 		if (errno != 0) {
392 			warn("%s",getstr(4));
393 			membuf = available_free_memory;
394 		} else {
395 			switch (*endptr){
396 			case 'Y':
397 				membuf *= 1024;
398 				/* FALLTHROUGH */
399 			case 'Z':
400 				membuf *= 1024;
401 				/* FALLTHROUGH */
402 			case 'E':
403 				membuf *= 1024;
404 				/* FALLTHROUGH */
405 			case 'P':
406 				membuf *= 1024;
407 				/* FALLTHROUGH */
408 			case 'T':
409 				membuf *= 1024;
410 				/* FALLTHROUGH */
411 			case 'G':
412 				membuf *= 1024;
413 				/* FALLTHROUGH */
414 			case 'M':
415 				membuf *= 1024;
416 				/* FALLTHROUGH */
417 			case '\0':
418 			case 'K':
419 				membuf *= 1024;
420 				/* FALLTHROUGH */
421 			case 'b':
422 				break;
423 			case '%':
424 				membuf = (available_free_memory * membuf) /
425 				    100;
426 				break;
427 			default:
428 				warnc(EINVAL, "%s", optarg);
429 				membuf = available_free_memory;
430 			}
431 		}
432 		return (membuf);
433 	}
434 }
435 
436 /*
437  * Signal handler that clears the temporary files.
438  */
439 static void
sig_handler(int sig __unused,siginfo_t * siginfo __unused,void * context __unused)440 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
441     void *context __unused)
442 {
443 
444 	clear_tmp_files();
445 	exit(-1);
446 }
447 
448 /*
449  * Set signal handler on panic signals.
450  */
451 static void
set_signal_handler(void)452 set_signal_handler(void)
453 {
454 	struct sigaction sa;
455 
456 	memset(&sa, 0, sizeof(sa));
457 	sa.sa_sigaction = &sig_handler;
458 	sa.sa_flags = SA_SIGINFO;
459 
460 	if (sigaction(SIGTERM, &sa, NULL) < 0) {
461 		perror("sigaction");
462 		return;
463 	}
464 	if (sigaction(SIGHUP, &sa, NULL) < 0) {
465 		perror("sigaction");
466 		return;
467 	}
468 	if (sigaction(SIGINT, &sa, NULL) < 0) {
469 		perror("sigaction");
470 		return;
471 	}
472 	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
473 		perror("sigaction");
474 		return;
475 	}
476 	if (sigaction(SIGABRT, &sa, NULL) < 0) {
477 		perror("sigaction");
478 		return;
479 	}
480 	if (sigaction(SIGBUS, &sa, NULL) < 0) {
481 		perror("sigaction");
482 		return;
483 	}
484 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
485 		perror("sigaction");
486 		return;
487 	}
488 	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
489 		perror("sigaction");
490 		return;
491 	}
492 	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
493 		perror("sigaction");
494 		return;
495 	}
496 }
497 
498 /*
499  * Print "unknown" message and exit with status 2.
500  */
501 static void
unknown(const char * what)502 unknown(const char *what)
503 {
504 
505 	errx(2, "%s: %s", getstr(3), what);
506 }
507 
508 /*
509  * Check whether contradictory input options are used.
510  */
511 static void
check_mutually_exclusive_flags(char c,bool * mef_flags)512 check_mutually_exclusive_flags(char c, bool *mef_flags)
513 {
514 	int fo_index, mec;
515 	bool found_others, found_this;
516 
517 	found_others = found_this = false;
518 	fo_index = 0;
519 
520 	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
521 		mec = mutually_exclusive_flags[i];
522 
523 		if (mec != c) {
524 			if (mef_flags[i]) {
525 				if (found_this)
526 					errx(1, "%c:%c: %s", c, mec, getstr(1));
527 				found_others = true;
528 				fo_index = i;
529 			}
530 		} else {
531 			if (found_others)
532 				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
533 			mef_flags[i] = true;
534 			found_this = true;
535 		}
536 	}
537 }
538 
539 /*
540  * Initialise sort opts data.
541  */
542 static void
set_sort_opts(void)543 set_sort_opts(void)
544 {
545 
546 	memset(&default_sort_mods_object, 0,
547 	    sizeof(default_sort_mods_object));
548 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
549 	default_sort_mods_object.func =
550 	    get_sort_func(&default_sort_mods_object);
551 }
552 
553 /*
554  * Set a sort modifier on a sort modifiers object.
555  */
556 static bool
set_sort_modifier(struct sort_mods * sm,int c)557 set_sort_modifier(struct sort_mods *sm, int c)
558 {
559 
560 	if (sm == NULL)
561 		return (true);
562 
563 	switch (c){
564 	case 'b':
565 		sm->bflag = true;
566 		break;
567 	case 'd':
568 		sm->dflag = true;
569 		break;
570 	case 'f':
571 		sm->fflag = true;
572 		break;
573 	case 'g':
574 		sm->gflag = true;
575 		need_hint = true;
576 		break;
577 	case 'i':
578 		sm->iflag = true;
579 		break;
580 	case 'R':
581 		sm->Rflag = true;
582 		need_hint = true;
583 		need_random = true;
584 		break;
585 	case 'M':
586 		initialise_months();
587 		sm->Mflag = true;
588 		need_hint = true;
589 		break;
590 	case 'n':
591 		sm->nflag = true;
592 		need_hint = true;
593 		print_symbols_on_debug = true;
594 		break;
595 	case 'r':
596 		sm->rflag = true;
597 		break;
598 	case 'V':
599 		sm->Vflag = true;
600 		break;
601 	case 'h':
602 		sm->hflag = true;
603 		need_hint = true;
604 		print_symbols_on_debug = true;
605 		break;
606 	default:
607 		return (false);
608 	}
609 
610 	sort_opts_vals.complex_sort = true;
611 	sm->func = get_sort_func(sm);
612 	return (true);
613 }
614 
615 /*
616  * Parse POS in -k option.
617  */
618 static int
parse_pos(const char * s,struct key_specs * ks,bool * mef_flags,bool second)619 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
620 {
621 	regmatch_t pmatch[4];
622 	regex_t re;
623 	char *c, *f;
624 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
625 	size_t len, nmatch;
626 	int ret;
627 
628 	ret = -1;
629 	nmatch = 4;
630 	c = f = NULL;
631 
632 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
633 		return (-1);
634 
635 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
636 		goto end;
637 
638 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
639 		goto end;
640 
641 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
642 		goto end;
643 
644 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
645 	f = sort_malloc((len + 1) * sizeof(char));
646 
647 	strncpy(f, s + pmatch[1].rm_so, len);
648 	f[len] = '\0';
649 
650 	if (second) {
651 		errno = 0;
652 		ks->f2 = (size_t) strtoul(f, NULL, 10);
653 		if (errno != 0)
654 			err(2, "-k");
655 		if (ks->f2 == 0) {
656 			warn("%s",getstr(5));
657 			goto end;
658 		}
659 	} else {
660 		errno = 0;
661 		ks->f1 = (size_t) strtoul(f, NULL, 10);
662 		if (errno != 0)
663 			err(2, "-k");
664 		if (ks->f1 == 0) {
665 			warn("%s",getstr(5));
666 			goto end;
667 		}
668 	}
669 
670 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
671 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
672 		c = sort_malloc((len + 1) * sizeof(char));
673 
674 		strncpy(c, s + pmatch[2].rm_so + 1, len);
675 		c[len] = '\0';
676 
677 		if (second) {
678 			errno = 0;
679 			ks->c2 = (size_t) strtoul(c, NULL, 10);
680 			if (errno != 0)
681 				err(2, "-k");
682 		} else {
683 			errno = 0;
684 			ks->c1 = (size_t) strtoul(c, NULL, 10);
685 			if (errno != 0)
686 				err(2, "-k");
687 			if (ks->c1 == 0) {
688 				warn("%s",getstr(6));
689 				goto end;
690 			}
691 		}
692 	} else {
693 		if (second)
694 			ks->c2 = 0;
695 		else
696 			ks->c1 = 1;
697 	}
698 
699 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
700 		regoff_t i = 0;
701 
702 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
703 			check_mutually_exclusive_flags(s[i], mef_flags);
704 			if (s[i] == 'b') {
705 				if (second)
706 					ks->pos2b = true;
707 				else
708 					ks->pos1b = true;
709 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
710 				goto end;
711 		}
712 	}
713 
714 	ret = 0;
715 
716 end:
717 
718 	if (c)
719 		sort_free(c);
720 	if (f)
721 		sort_free(f);
722 	regfree(&re);
723 
724 	return (ret);
725 }
726 
727 /*
728  * Parse -k option value.
729  */
730 static int
parse_k(const char * s,struct key_specs * ks)731 parse_k(const char *s, struct key_specs *ks)
732 {
733 	int ret = -1;
734 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
735 	    { false, false, false, false, false, false };
736 
737 	if (s && *s) {
738 		char *sptr;
739 
740 		sptr = strchr(s, ',');
741 		if (sptr) {
742 			size_t size1;
743 			char *pos1, *pos2;
744 
745 			size1 = sptr - s;
746 
747 			if (size1 < 1)
748 				return (-1);
749 			pos1 = sort_malloc((size1 + 1) * sizeof(char));
750 
751 			strncpy(pos1, s, size1);
752 			pos1[size1] = '\0';
753 
754 			ret = parse_pos(pos1, ks, mef_flags, false);
755 
756 			sort_free(pos1);
757 			if (ret < 0)
758 				return (ret);
759 
760 			pos2 = sort_strdup(sptr + 1);
761 			ret = parse_pos(pos2, ks, mef_flags, true);
762 			sort_free(pos2);
763 		} else
764 			ret = parse_pos(s, ks, mef_flags, false);
765 	}
766 
767 	return (ret);
768 }
769 
770 /*
771  * Parse POS in +POS -POS option.
772  */
773 static int
parse_pos_obs(const char * s,int * nf,int * nc,char * sopts)774 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
775 {
776 	regex_t re;
777 	regmatch_t pmatch[4];
778 	char *c, *f;
779 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
780 	int ret;
781 	size_t len, nmatch;
782 
783 	ret = -1;
784 	nmatch = 4;
785 	c = f = NULL;
786 	*nc = *nf = 0;
787 
788 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
789 		return (-1);
790 
791 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
792 		goto end;
793 
794 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
795 		goto end;
796 
797 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
798 		goto end;
799 
800 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
801 	f = sort_malloc((len + 1) * sizeof(char));
802 
803 	strncpy(f, s + pmatch[1].rm_so, len);
804 	f[len] = '\0';
805 
806 	errno = 0;
807 	*nf = (size_t) strtoul(f, NULL, 10);
808 	if (errno != 0)
809 		errx(2, "%s", getstr(11));
810 
811 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
812 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
813 		c = sort_malloc((len + 1) * sizeof(char));
814 
815 		strncpy(c, s + pmatch[2].rm_so + 1, len);
816 		c[len] = '\0';
817 
818 		errno = 0;
819 		*nc = (size_t) strtoul(c, NULL, 10);
820 		if (errno != 0)
821 			errx(2, "%s", getstr(11));
822 	}
823 
824 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
825 
826 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
827 
828 		strncpy(sopts, s + pmatch[3].rm_so, len);
829 		sopts[len] = '\0';
830 	}
831 
832 	ret = 0;
833 
834 end:
835 	if (c)
836 		sort_free(c);
837 	if (f)
838 		sort_free(f);
839 	regfree(&re);
840 
841 	return (ret);
842 }
843 
844 /*
845  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
846  */
847 void
fix_obsolete_keys(int * argc,char ** argv)848 fix_obsolete_keys(int *argc, char **argv)
849 {
850 	char sopt[129];
851 
852 	for (int i = 1; i < *argc; i++) {
853 		char *arg1;
854 
855 		arg1 = argv[i];
856 
857 		if (strcmp(arg1, "--") == 0) {
858 			/* Following arguments are treated as filenames. */
859 			break;
860 		}
861 
862 		if (strlen(arg1) > 1 && arg1[0] == '+') {
863 			int c1, f1;
864 			char sopts1[128];
865 
866 			sopts1[0] = 0;
867 			c1 = f1 = 0;
868 
869 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
870 				continue;
871 			else {
872 				f1 += 1;
873 				c1 += 1;
874 				if (i + 1 < *argc) {
875 					char *arg2 = argv[i + 1];
876 
877 					if (strlen(arg2) > 1 &&
878 					    arg2[0] == '-') {
879 						int c2, f2;
880 						char sopts2[128];
881 
882 						sopts2[0] = 0;
883 						c2 = f2 = 0;
884 
885 						if (parse_pos_obs(arg2 + 1,
886 						    &f2, &c2, sopts2) >= 0) {
887 							if (c2 > 0)
888 								f2 += 1;
889 							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
890 							    f1, c1, sopts1, f2, c2, sopts2);
891 							argv[i] = sort_strdup(sopt);
892 							for (int j = i + 1; j + 1 < *argc; j++)
893 								argv[j] = argv[j + 1];
894 							*argc -= 1;
895 							continue;
896 						}
897 					}
898 				}
899 				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
900 				argv[i] = sort_strdup(sopt);
901 			}
902 		}
903 	}
904 }
905 
906 /*
907  * Seed random sort
908  */
909 static void
get_random_seed(const char * random_source)910 get_random_seed(const char *random_source)
911 {
912 	char randseed[32];
913 	struct stat fsb, rsb;
914 	ssize_t rd;
915 	int rsfd;
916 
917 	rsfd = -1;
918 	rd = sizeof(randseed);
919 
920 	if (random_source == NULL) {
921 		if (getentropy(randseed, sizeof(randseed)) < 0)
922 			err(EX_SOFTWARE, "getentropy");
923 		goto out;
924 	}
925 
926 	rsfd = open(random_source, O_RDONLY | O_CLOEXEC);
927 	if (rsfd < 0)
928 		err(EX_NOINPUT, "open: %s", random_source);
929 
930 	if (fstat(rsfd, &fsb) != 0)
931 		err(EX_SOFTWARE, "fstat");
932 
933 	if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode))
934 		err(EX_USAGE,
935 		    "random seed isn't a regular file or /dev/random");
936 
937 	/*
938 	 * Regular files: read up to maximum seed size and explicitly
939 	 * reject longer files.
940 	 */
941 	if (S_ISREG(fsb.st_mode)) {
942 		if (fsb.st_size > (off_t)sizeof(randseed))
943 			errx(EX_USAGE, "random seed is too large (%jd >"
944 			    " %zu)!", (intmax_t)fsb.st_size,
945 			    sizeof(randseed));
946 		else if (fsb.st_size < 1)
947 			errx(EX_USAGE, "random seed is too small ("
948 			    "0 bytes)");
949 
950 		memset(randseed, 0, sizeof(randseed));
951 
952 		rd = read(rsfd, randseed, fsb.st_size);
953 		if (rd < 0)
954 			err(EX_SOFTWARE, "reading random seed file %s",
955 			    random_source);
956 		if (rd < (ssize_t)fsb.st_size)
957 			errx(EX_SOFTWARE, "short read from %s", random_source);
958 	} else if (S_ISCHR(fsb.st_mode)) {
959 		if (stat("/dev/random", &rsb) < 0)
960 			err(EX_SOFTWARE, "stat");
961 
962 		if (fsb.st_dev != rsb.st_dev ||
963 		    fsb.st_ino != rsb.st_ino)
964 			errx(EX_USAGE, "random seed is a character "
965 			    "device other than /dev/random");
966 
967 		if (getentropy(randseed, sizeof(randseed)) < 0)
968 			err(EX_SOFTWARE, "getentropy");
969 	}
970 
971 out:
972 	if (rsfd >= 0)
973 		close(rsfd);
974 
975 	MD5Init(&md5_ctx);
976 	MD5Update(&md5_ctx, randseed, rd);
977 }
978 
979 /*
980  * Main function.
981  */
982 int
main(int argc,char ** argv)983 main(int argc, char **argv)
984 {
985 	char *outfile, *real_outfile;
986 	char *random_source = NULL;
987 	int c, result;
988 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
989 	    { false, false, false, false, false, false };
990 
991 	result = 0;
992 	outfile = sort_strdup("-");
993 	real_outfile = NULL;
994 
995 	struct sort_mods *sm = &default_sort_mods_object;
996 
997 	init_tmp_files();
998 
999 	set_signal_handler();
1000 
1001 	set_hw_params();
1002 	set_locale();
1003 	set_tmpdir();
1004 	set_sort_opts();
1005 
1006 	fix_obsolete_keys(&argc, argv);
1007 
1008 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1009 	    != -1)) {
1010 
1011 		check_mutually_exclusive_flags(c, mef_flags);
1012 
1013 		if (!set_sort_modifier(sm, c)) {
1014 
1015 			switch (c) {
1016 			case 'c':
1017 				sort_opts_vals.cflag = true;
1018 				if (optarg) {
1019 					if (!strcmp(optarg, "diagnose-first"))
1020 						;
1021 					else if (!strcmp(optarg, "silent") ||
1022 					    !strcmp(optarg, "quiet"))
1023 						sort_opts_vals.csilentflag = true;
1024 					else if (*optarg)
1025 						unknown(optarg);
1026 				}
1027 				break;
1028 			case 'C':
1029 				sort_opts_vals.cflag = true;
1030 				sort_opts_vals.csilentflag = true;
1031 				break;
1032 			case 'k':
1033 			{
1034 				sort_opts_vals.complex_sort = true;
1035 				sort_opts_vals.kflag = true;
1036 
1037 				keys_num++;
1038 				keys = sort_realloc(keys, keys_num *
1039 				    sizeof(struct key_specs));
1040 				memset(&(keys[keys_num - 1]), 0,
1041 				    sizeof(struct key_specs));
1042 
1043 				if (parse_k(optarg, &(keys[keys_num - 1]))
1044 				    < 0) {
1045 					errc(2, EINVAL, "-k %s", optarg);
1046 				}
1047 
1048 				break;
1049 			}
1050 			case 'm':
1051 				sort_opts_vals.mflag = true;
1052 				break;
1053 			case 'o':
1054 				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1055 				strcpy(outfile, optarg);
1056 				break;
1057 			case 's':
1058 				sort_opts_vals.sflag = true;
1059 				break;
1060 			case 'S':
1061 				available_free_memory =
1062 				    parse_memory_buffer_value(optarg);
1063 				break;
1064 			case 'T':
1065 				tmpdir = sort_strdup(optarg);
1066 				break;
1067 			case 't':
1068 				while (strlen(optarg) > 1) {
1069 					if (optarg[0] != '\\') {
1070 						errc(2, EINVAL, "%s", optarg);
1071 					}
1072 					optarg += 1;
1073 					if (*optarg == '0') {
1074 						*optarg = 0;
1075 						break;
1076 					}
1077 				}
1078 				sort_opts_vals.tflag = true;
1079 				sort_opts_vals.field_sep = btowc(optarg[0]);
1080 				if (sort_opts_vals.field_sep == WEOF) {
1081 					errno = EINVAL;
1082 					err(2, NULL);
1083 				}
1084 				if (!gnusort_numeric_compatibility) {
1085 					if (symbol_decimal_point == sort_opts_vals.field_sep)
1086 						symbol_decimal_point = WEOF;
1087 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1088 						symbol_thousands_sep = WEOF;
1089 					if (symbol_negative_sign == sort_opts_vals.field_sep)
1090 						symbol_negative_sign = WEOF;
1091 					if (symbol_positive_sign == sort_opts_vals.field_sep)
1092 						symbol_positive_sign = WEOF;
1093 				}
1094 				break;
1095 			case 'u':
1096 				sort_opts_vals.uflag = true;
1097 				/* stable sort for the correct unique val */
1098 				sort_opts_vals.sflag = true;
1099 				break;
1100 			case 'z':
1101 				sort_opts_vals.zflag = true;
1102 				break;
1103 			case SORT_OPT:
1104 				if (optarg) {
1105 					if (!strcmp(optarg, "general-numeric"))
1106 						set_sort_modifier(sm, 'g');
1107 					else if (!strcmp(optarg, "human-numeric"))
1108 						set_sort_modifier(sm, 'h');
1109 					else if (!strcmp(optarg, "numeric"))
1110 						set_sort_modifier(sm, 'n');
1111 					else if (!strcmp(optarg, "month"))
1112 						set_sort_modifier(sm, 'M');
1113 					else if (!strcmp(optarg, "random"))
1114 						set_sort_modifier(sm, 'R');
1115 					else
1116 						unknown(optarg);
1117 				}
1118 				break;
1119 #if defined(SORT_THREADS)
1120 			case PARALLEL_OPT:
1121 				nthreads = (size_t)(atoi(optarg));
1122 				if (nthreads < 1)
1123 					nthreads = 1;
1124 				if (nthreads > 1024)
1125 					nthreads = 1024;
1126 				break;
1127 #endif
1128 			case QSORT_OPT:
1129 				sort_opts_vals.sort_method = SORT_QSORT;
1130 				break;
1131 			case MERGESORT_OPT:
1132 				sort_opts_vals.sort_method = SORT_MERGESORT;
1133 				break;
1134 			case MMAP_OPT:
1135 				use_mmap = true;
1136 				break;
1137 			case HEAPSORT_OPT:
1138 				sort_opts_vals.sort_method = SORT_HEAPSORT;
1139 				break;
1140 			case RADIXSORT_OPT:
1141 				sort_opts_vals.sort_method = SORT_RADIXSORT;
1142 				break;
1143 			case RANDOMSOURCE_OPT:
1144 				random_source = strdup(optarg);
1145 				break;
1146 			case COMPRESSPROGRAM_OPT:
1147 				compress_program = strdup(optarg);
1148 				break;
1149 			case FF_OPT:
1150 				read_fns_from_file0(optarg);
1151 				break;
1152 			case BS_OPT:
1153 			{
1154 				errno = 0;
1155 				long mof = strtol(optarg, NULL, 10);
1156 				if (errno != 0)
1157 					err(2, "--batch-size");
1158 				if (mof >= 2)
1159 					max_open_files = (size_t) mof + 1;
1160 			}
1161 				break;
1162 			case VERSION_OPT:
1163 				printf("%s\n", VERSION);
1164 				exit(EXIT_SUCCESS);
1165 				/* NOTREACHED */
1166 				break;
1167 			case DEBUG_OPT:
1168 				debug_sort = true;
1169 				break;
1170 			case HELP_OPT:
1171 				usage(false);
1172 				/* NOTREACHED */
1173 				break;
1174 			default:
1175 				usage(true);
1176 				/* NOTREACHED */
1177 			}
1178 		}
1179 	}
1180 
1181 	argc -= optind;
1182 	argv += optind;
1183 
1184 	if (argv_from_file0) {
1185 		argc = argc_from_file0;
1186 		argv = argv_from_file0;
1187 	}
1188 
1189 	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1190 		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1191 
1192 	if (keys_num == 0) {
1193 		keys_num = 1;
1194 		keys = sort_realloc(keys, sizeof(struct key_specs));
1195 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1196 		keys[0].c1 = 1;
1197 		keys[0].pos1b = default_sort_mods->bflag;
1198 		keys[0].pos2b = default_sort_mods->bflag;
1199 		memcpy(&(keys[0].sm), default_sort_mods,
1200 		    sizeof(struct sort_mods));
1201 	}
1202 
1203 	for (size_t i = 0; i < keys_num; i++) {
1204 		struct key_specs *ks;
1205 
1206 		ks = &(keys[i]);
1207 
1208 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1209 		    !(ks->pos2b)) {
1210 			ks->pos1b = sm->bflag;
1211 			ks->pos2b = sm->bflag;
1212 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1213 		}
1214 
1215 		ks->sm.func = get_sort_func(&(ks->sm));
1216 	}
1217 
1218 	if (debug_sort) {
1219 		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1220 #if defined(SORT_THREADS)
1221 		printf("Number of CPUs: %d\n",(int)ncpu);
1222 		nthreads = 1;
1223 #endif
1224 		printf("Using collate rules of %s locale\n",
1225 		    setlocale(LC_COLLATE, NULL));
1226 		if (byte_sort)
1227 			printf("Byte sort is used\n");
1228 		if (print_symbols_on_debug) {
1229 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1230 			if (symbol_thousands_sep)
1231 				printf("Thousands separator: <%lc>\n",
1232 				    symbol_thousands_sep);
1233 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1234 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1235 		}
1236 	}
1237 
1238 	if (need_random)
1239 		get_random_seed(random_source);
1240 
1241 	/* Case when the outfile equals one of the input files: */
1242 	if (strcmp(outfile, "-")) {
1243 
1244 		for(int i = 0; i < argc; ++i) {
1245 			if (strcmp(argv[i], outfile) == 0) {
1246 				real_outfile = sort_strdup(outfile);
1247 				for(;;) {
1248 					char* tmp = sort_malloc(strlen(outfile) +
1249 					    strlen(".tmp") + 1);
1250 
1251 					strcpy(tmp, outfile);
1252 					strcpy(tmp + strlen(tmp), ".tmp");
1253 					sort_free(outfile);
1254 					outfile = tmp;
1255 					if (access(outfile, F_OK) < 0)
1256 						break;
1257 				}
1258 				tmp_file_atexit(outfile);
1259 			}
1260 		}
1261 	}
1262 
1263 #if defined(SORT_THREADS)
1264 	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1265 		nthreads = 1;
1266 #endif
1267 
1268 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1269 		struct file_list fl;
1270 		struct sort_list list;
1271 
1272 		sort_list_init(&list);
1273 		file_list_init(&fl, true);
1274 
1275 		if (argc < 1)
1276 			procfile("-", &list, &fl);
1277 		else {
1278 			while (argc > 0) {
1279 				procfile(*argv, &list, &fl);
1280 				--argc;
1281 				++argv;
1282 			}
1283 		}
1284 
1285 		if (fl.count < 1)
1286 			sort_list_to_file(&list, outfile);
1287 		else {
1288 			if (list.count > 0) {
1289 				char *flast = new_tmp_file_name();
1290 
1291 				sort_list_to_file(&list, flast);
1292 				file_list_add(&fl, flast, false);
1293 			}
1294 			merge_files(&fl, outfile);
1295 		}
1296 
1297 		file_list_clean(&fl);
1298 
1299 		/*
1300 		 * We are about to exit the program, so we can ignore
1301 		 * the clean-up for speed
1302 		 *
1303 		 * sort_list_clean(&list);
1304 		 */
1305 
1306 	} else if (sort_opts_vals.cflag) {
1307 		result = (argc == 0) ? (check("-")) : (check(*argv));
1308 	} else if (sort_opts_vals.mflag) {
1309 		struct file_list fl;
1310 
1311 		file_list_init(&fl, false);
1312 		/* No file arguments remaining means "read from stdin." */
1313 		if (argc == 0)
1314 			file_list_add(&fl, "-", true);
1315 		else
1316 			file_list_populate(&fl, argc, argv, true);
1317 		merge_files(&fl, outfile);
1318 		file_list_clean(&fl);
1319 	}
1320 
1321 	if (real_outfile) {
1322 		unlink(real_outfile);
1323 		if (rename(outfile, real_outfile) < 0)
1324 			err(2, NULL);
1325 		sort_free(real_outfile);
1326 	}
1327 
1328 	sort_free(outfile);
1329 
1330 	return (result);
1331 }
1332