xref: /dragonfly/usr.bin/sort/sort.c (revision a98f7024)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: head/usr.bin/sort/sort.c 281182 2015-04-07 01:17:49Z pfg $
28  */
29 
30 
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34 
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <regex.h>
41 #include <signal.h>
42 #include <stdbool.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <wchar.h>
48 #include <wctype.h>
49 #if defined(SORT_RANDOM)
50 #include <openssl/md5.h>
51 #endif
52 
53 #include "coll.h"
54 #include "file.h"
55 #include "sort.h"
56 
57 #ifndef WITHOUT_NLS
58 #include <nl_types.h>
59 nl_catd catalog;
60 #endif
61 
62 #if defined(SORT_RANDOM)
63 #define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
64 #else
65 #define	OPTIONS	"bcCdfghik:Mmno:rsS:t:T:uVz"
66 #endif
67 
68 #if defined(SORT_RANDOM)
69 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
70 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
71 
72 static bool need_random;
73 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
74 static const void *random_seed;
75 static size_t random_seed_size;
76 
77 MD5_CTX md5_ctx;
78 #endif
79 
80 /*
81  * Default messages to use when NLS is disabled or no catalogue
82  * is found.
83  */
84 const char *nlsstr[] = { "",
85 /* 1*/"mutually exclusive flags",
86 /* 2*/"extra argument not allowed with -c",
87 /* 3*/"Unknown feature",
88 /* 4*/"Wrong memory buffer specification",
89 /* 5*/"0 field in key specs",
90 /* 6*/"0 column in key specs",
91 /* 7*/"Wrong file mode",
92 /* 8*/"Cannot open file for reading",
93 /* 9*/"Radix sort cannot be used with these sort options",
94 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
95 /*11*/"Invalid key position",
96 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
97       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
98       "[-o outfile] [--batch-size size] [--files0-from file] "
99       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
100       "[--mmap] "
101 #if defined(SORT_THREADS)
102       "[--parallel thread_no] "
103 #endif
104       "[--human-numeric-sort] "
105 #if defined(SORT_RANDOM)
106       "[--version-sort] [--random-sort [--random-source file]] "
107 #else
108       "[--version-sort] "
109 #endif
110       "[--compress-program program] [file ...]\n" };
111 
112 struct sort_opts sort_opts_vals;
113 
114 bool debug_sort;
115 bool need_hint;
116 
117 #if defined(SORT_THREADS)
118 unsigned int ncpu = 1;
119 size_t nthreads = 1;
120 #endif
121 
122 static bool gnusort_numeric_compatibility;
123 
124 static struct sort_mods default_sort_mods_object;
125 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
126 
127 static bool print_symbols_on_debug;
128 
129 /*
130  * Arguments from file (when file0-from option is used:
131  */
132 static size_t argc_from_file0 = (size_t)-1;
133 static char **argv_from_file0;
134 
135 /*
136  * Placeholder symbols for options which have no single-character equivalent
137  */
138 enum
139 {
140 	SORT_OPT = CHAR_MAX + 1,
141 	HELP_OPT,
142 	FF_OPT,
143 	BS_OPT,
144 	VERSION_OPT,
145 	DEBUG_OPT,
146 #if defined(SORT_THREADS)
147 	PARALLEL_OPT,
148 #endif
149 #if defined(SORT_RANDOM)
150 	RANDOMSOURCE_OPT,
151 #endif
152 	COMPRESSPROGRAM_OPT,
153 	QSORT_OPT,
154 	MERGESORT_OPT,
155 	HEAPSORT_OPT,
156 	RADIXSORT_OPT,
157 	MMAP_OPT
158 };
159 
160 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
161 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
162 
163 static struct option long_options[] = {
164 				{ "batch-size", required_argument, NULL, BS_OPT },
165 				{ "buffer-size", required_argument, NULL, 'S' },
166 				{ "check", optional_argument, NULL, 'c' },
167 				{ "check=silent|quiet", optional_argument, NULL, 'C' },
168 				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
169 				{ "debug", no_argument, NULL, DEBUG_OPT },
170 				{ "dictionary-order", no_argument, NULL, 'd' },
171 				{ "field-separator", required_argument, NULL, 't' },
172 				{ "files0-from", required_argument, NULL, FF_OPT },
173 				{ "general-numeric-sort", no_argument, NULL, 'g' },
174 				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
175 				{ "help",no_argument, NULL, HELP_OPT },
176 				{ "human-numeric-sort", no_argument, NULL, 'h' },
177 				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
178 				{ "ignore-case", no_argument, NULL, 'f' },
179 				{ "ignore-nonprinting", no_argument, NULL, 'i' },
180 				{ "key", required_argument, NULL, 'k' },
181 				{ "merge", no_argument, NULL, 'm' },
182 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
183 				{ "mmap", no_argument, NULL, MMAP_OPT },
184 				{ "month-sort", no_argument, NULL, 'M' },
185 				{ "numeric-sort", no_argument, NULL, 'n' },
186 				{ "output", required_argument, NULL, 'o' },
187 #if defined(SORT_THREADS)
188 				{ "parallel", required_argument, NULL, PARALLEL_OPT },
189 #endif
190 				{ "qsort", no_argument, NULL, QSORT_OPT },
191 				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
192 #if defined(SORT_RANDOM)
193 				{ "random-sort", no_argument, NULL, 'R' },
194 				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
195 #endif
196 				{ "reverse", no_argument, NULL, 'r' },
197 				{ "sort", required_argument, NULL, SORT_OPT },
198 				{ "stable", no_argument, NULL, 's' },
199 				{ "temporary-directory",required_argument, NULL, 'T' },
200 				{ "unique", no_argument, NULL, 'u' },
201 				{ "version", no_argument, NULL, VERSION_OPT },
202 				{ "version-sort",no_argument, NULL, 'V' },
203 				{ "zero-terminated", no_argument, NULL, 'z' },
204 				{ NULL, no_argument, NULL, 0 }
205 };
206 
207 static void fix_obsolete_keys(int *argc, char **argv);
208 
209 /*
210  * Check where sort modifier is present
211  */
212 static bool
sort_modifier_empty(struct sort_mods * sm)213 sort_modifier_empty(struct sort_mods *sm)
214 {
215 
216 	if (sm == NULL)
217 		return (true);
218 	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
219 #ifdef SORT_RANDOM
220 	    sm->Rflag ||
221 #endif
222 	    sm->rflag || sm->hflag || sm->dflag || sm->fflag));
223 }
224 
225 /*
226  * Print out usage text.
227  */
228 static void
usage(bool opt_err)229 usage(bool opt_err)
230 {
231 	FILE *out;
232 
233 	out = opt_err ? stderr : stdout;
234 
235 	fprintf(out, getstr(12), getprogname());
236 	if (opt_err)
237 		exit(2);
238 	exit(0);
239 }
240 
241 /*
242  * Read input file names from a file (file0-from option).
243  */
244 static void
read_fns_from_file0(const char * fn)245 read_fns_from_file0(const char *fn)
246 {
247 	FILE *f;
248 	char *line = NULL;
249 	size_t linesize = 0;
250 	ssize_t linelen;
251 
252 	if (fn == NULL)
253 		return;
254 
255 	f = fopen(fn, "r");
256 	if (f == NULL)
257 		err(2, "%s", fn);
258 
259 	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
260 		if (*line != '\0') {
261 			if (argc_from_file0 == (size_t) - 1)
262 				argc_from_file0 = 0;
263 			++argc_from_file0;
264 			argv_from_file0 = sort_realloc(argv_from_file0,
265 			    argc_from_file0 * sizeof(char *));
266 			if (argv_from_file0 == NULL)
267 				err(2, NULL);
268 			argv_from_file0[argc_from_file0 - 1] = line;
269 		} else {
270 			free(line);
271 		}
272 		line = NULL;
273 		linesize = 0;
274 	}
275 	if (ferror(f))
276 		err(2, "%s: getdelim", fn);
277 
278 	closefile(f, fn);
279 }
280 
281 /*
282  * Check how much RAM is available for the sort.
283  */
284 static void
set_hw_params(void)285 set_hw_params(void)
286 {
287 	long pages, psize;
288 
289 	pages = psize = 0;
290 
291 #if defined(SORT_THREADS)
292 	ncpu = 1;
293 #endif
294 
295 	pages = sysconf(_SC_PHYS_PAGES);
296 	if (pages < 1) {
297 		perror("sysconf pages");
298 		pages = 1;
299 	}
300 	psize = sysconf(_SC_PAGESIZE);
301 	if (psize < 1) {
302 		perror("sysconf psize");
303 		psize = 4096;
304 	}
305 #if defined(SORT_THREADS)
306 	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
307 	if (ncpu < 1)
308 		ncpu = 1;
309 	else if(ncpu > 32)
310 		ncpu = 32;
311 
312 	nthreads = ncpu;
313 #endif
314 
315 	free_memory = (unsigned long long) pages * (unsigned long long) psize;
316 	available_free_memory = free_memory / 2;
317 
318 	if (available_free_memory < 1024)
319 		available_free_memory = 1024;
320 }
321 
322 /*
323  * Convert "plain" symbol to wide symbol, with default value.
324  */
325 static void
conv_mbtowc(wchar_t * wc,const char * c,const wchar_t def)326 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
327 {
328 
329 	if (wc && c) {
330 		int res;
331 
332 		res = mbtowc(wc, c, MB_CUR_MAX);
333 		if (res < 1)
334 			*wc = def;
335 	}
336 }
337 
338 /*
339  * Set current locale symbols.
340  */
341 static void
set_locale(void)342 set_locale(void)
343 {
344 	struct lconv *lc;
345 	const char *locale;
346 
347 	setlocale(LC_ALL, "");
348 
349 	lc = localeconv();
350 
351 	if (lc) {
352 		/* obtain LC_NUMERIC info */
353 		/* Convert to wide char form */
354 		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
355 		    symbol_decimal_point);
356 		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
357 		    symbol_thousands_sep);
358 		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
359 		    symbol_positive_sign);
360 		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
361 		    symbol_negative_sign);
362 	}
363 
364 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
365 		gnusort_numeric_compatibility = true;
366 
367 	locale = setlocale(LC_COLLATE, NULL);
368 
369 	if (locale) {
370 		char *tmpl;
371 		const char *cclocale;
372 
373 		tmpl = sort_strdup(locale);
374 		cclocale = setlocale(LC_COLLATE, "C");
375 		if (cclocale && !strcmp(cclocale, tmpl))
376 			byte_sort = true;
377 		else {
378 			const char *pclocale;
379 
380 			pclocale = setlocale(LC_COLLATE, "POSIX");
381 			if (pclocale && !strcmp(pclocale, tmpl))
382 				byte_sort = true;
383 		}
384 		setlocale(LC_COLLATE, tmpl);
385 		sort_free(tmpl);
386 	}
387 }
388 
389 /*
390  * Set directory temporary files.
391  */
392 static void
set_tmpdir(void)393 set_tmpdir(void)
394 {
395 	char *td;
396 
397 	td = getenv("TMPDIR");
398 	if (td != NULL)
399 		tmpdir = sort_strdup(td);
400 }
401 
402 /*
403  * Parse -S option.
404  */
405 static unsigned long long
parse_memory_buffer_value(const char * value)406 parse_memory_buffer_value(const char *value)
407 {
408 
409 	if (value == NULL)
410 		return (available_free_memory);
411 	else {
412 		char *endptr;
413 		unsigned long long membuf;
414 
415 		endptr = NULL;
416 		errno = 0;
417 		membuf = strtoll(value, &endptr, 10);
418 
419 		if (errno != 0) {
420 			warn("%s",getstr(4));
421 			membuf = available_free_memory;
422 		} else {
423 			switch (*endptr){
424 			case 'Y':
425 				membuf *= 1024;
426 				/* FALLTHROUGH */
427 			case 'Z':
428 				membuf *= 1024;
429 				/* FALLTHROUGH */
430 			case 'E':
431 				membuf *= 1024;
432 				/* FALLTHROUGH */
433 			case 'P':
434 				membuf *= 1024;
435 				/* FALLTHROUGH */
436 			case 'T':
437 				membuf *= 1024;
438 				/* FALLTHROUGH */
439 			case 'G':
440 				membuf *= 1024;
441 				/* FALLTHROUGH */
442 			case 'M':
443 				membuf *= 1024;
444 				/* FALLTHROUGH */
445 			case '\0':
446 			case 'K':
447 				membuf *= 1024;
448 				/* FALLTHROUGH */
449 			case 'b':
450 				break;
451 			case '%':
452 				membuf = (available_free_memory * membuf) /
453 				    100;
454 				break;
455 			default:
456 				warnc(EINVAL, "%s", optarg);
457 				membuf = available_free_memory;
458 			}
459 		}
460 		return (membuf);
461 	}
462 }
463 
464 /*
465  * Signal handler that clears the temporary files.
466  */
467 static void
sig_handler(int sig __unused,siginfo_t * siginfo __unused,void * context __unused)468 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
469     void *context __unused)
470 {
471 
472 	clear_tmp_files();
473 	exit(-1);
474 }
475 
476 /*
477  * Set signal handler on panic signals.
478  */
479 static void
set_signal_handler(void)480 set_signal_handler(void)
481 {
482 	struct sigaction sa;
483 
484 	memset(&sa, 0, sizeof(sa));
485 	sa.sa_sigaction = &sig_handler;
486 	sa.sa_flags = SA_SIGINFO;
487 
488 	if (sigaction(SIGTERM, &sa, NULL) < 0) {
489 		perror("sigaction");
490 		return;
491 	}
492 	if (sigaction(SIGHUP, &sa, NULL) < 0) {
493 		perror("sigaction");
494 		return;
495 	}
496 	if (sigaction(SIGINT, &sa, NULL) < 0) {
497 		perror("sigaction");
498 		return;
499 	}
500 	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
501 		perror("sigaction");
502 		return;
503 	}
504 	if (sigaction(SIGABRT, &sa, NULL) < 0) {
505 		perror("sigaction");
506 		return;
507 	}
508 	if (sigaction(SIGBUS, &sa, NULL) < 0) {
509 		perror("sigaction");
510 		return;
511 	}
512 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
513 		perror("sigaction");
514 		return;
515 	}
516 	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
517 		perror("sigaction");
518 		return;
519 	}
520 	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
521 		perror("sigaction");
522 		return;
523 	}
524 }
525 
526 /*
527  * Print "unknown" message and exit with status 2.
528  */
529 static void
unknown(const char * what)530 unknown(const char *what)
531 {
532 
533 	errx(2, "%s: %s", getstr(3), what);
534 }
535 
536 /*
537  * Check whether contradictory input options are used.
538  */
539 static void
check_mutually_exclusive_flags(char c,bool * mef_flags)540 check_mutually_exclusive_flags(char c, bool *mef_flags)
541 {
542 	int fo_index, mec;
543 	bool found_others, found_this;
544 
545 	found_others = found_this = false;
546 	fo_index = 0;
547 
548 	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
549 		mec = mutually_exclusive_flags[i];
550 
551 		if (mec != c) {
552 			if (mef_flags[i]) {
553 				if (found_this)
554 					errx(1, "%c:%c: %s", c, mec, getstr(1));
555 				found_others = true;
556 				fo_index = i;
557 			}
558 		} else {
559 			if (found_others)
560 				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
561 			mef_flags[i] = true;
562 			found_this = true;
563 		}
564 	}
565 }
566 
567 /*
568  * Initialise sort opts data.
569  */
570 static void
set_sort_opts(void)571 set_sort_opts(void)
572 {
573 
574 	memset(&default_sort_mods_object, 0,
575 	    sizeof(default_sort_mods_object));
576 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
577 	default_sort_mods_object.func =
578 	    get_sort_func(&default_sort_mods_object);
579 }
580 
581 /*
582  * Set a sort modifier on a sort modifiers object.
583  */
584 static bool
set_sort_modifier(struct sort_mods * sm,int c)585 set_sort_modifier(struct sort_mods *sm, int c)
586 {
587 
588 	if (sm) {
589 		switch (c){
590 		case 'b':
591 			sm->bflag = true;
592 			break;
593 		case 'd':
594 			sm->dflag = true;
595 			break;
596 		case 'f':
597 			sm->fflag = true;
598 			break;
599 		case 'g':
600 			sm->gflag = true;
601 			need_hint = true;
602 			break;
603 		case 'i':
604 			sm->iflag = true;
605 			break;
606 #ifdef SORT_RANDOM
607 		case 'R':
608 			sm->Rflag = true;
609 			need_random = true;
610 			break;
611 #endif
612 		case 'M':
613 			initialise_months();
614 			sm->Mflag = true;
615 			need_hint = true;
616 			break;
617 		case 'n':
618 			sm->nflag = true;
619 			need_hint = true;
620 			print_symbols_on_debug = true;
621 			break;
622 		case 'r':
623 			sm->rflag = true;
624 			break;
625 		case 'V':
626 			sm->Vflag = true;
627 			break;
628 		case 'h':
629 			sm->hflag = true;
630 			need_hint = true;
631 			print_symbols_on_debug = true;
632 			break;
633 		default:
634 			return false;
635 		}
636 		sort_opts_vals.complex_sort = true;
637 		sm->func = get_sort_func(sm);
638 	}
639 	return (true);
640 }
641 
642 /*
643  * Parse POS in -k option.
644  */
645 static int
parse_pos(const char * s,struct key_specs * ks,bool * mef_flags,bool second)646 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
647 {
648 	regmatch_t pmatch[4];
649 	regex_t re;
650 	char *c, *f;
651 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
652 	size_t len, nmatch;
653 	int ret;
654 
655 	ret = -1;
656 	nmatch = 4;
657 	c = f = NULL;
658 
659 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
660 		return (-1);
661 
662 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
663 		goto end;
664 
665 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
666 		goto end;
667 
668 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
669 		goto end;
670 
671 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
672 	f = sort_malloc((len + 1) * sizeof(char));
673 
674 	strncpy(f, s + pmatch[1].rm_so, len);
675 	f[len] = '\0';
676 
677 	if (second) {
678 		errno = 0;
679 		ks->f2 = (size_t) strtoul(f, NULL, 10);
680 		if (errno != 0)
681 			err(2, "-k");
682 		if (ks->f2 == 0) {
683 			warn("%s",getstr(5));
684 			goto end;
685 		}
686 	} else {
687 		errno = 0;
688 		ks->f1 = (size_t) strtoul(f, NULL, 10);
689 		if (errno != 0)
690 			err(2, "-k");
691 		if (ks->f1 == 0) {
692 			warn("%s",getstr(5));
693 			goto end;
694 		}
695 	}
696 
697 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
698 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
699 		c = sort_malloc((len + 1) * sizeof(char));
700 
701 		strncpy(c, s + pmatch[2].rm_so + 1, len);
702 		c[len] = '\0';
703 
704 		if (second) {
705 			errno = 0;
706 			ks->c2 = (size_t) strtoul(c, NULL, 10);
707 			if (errno != 0)
708 				err(2, "-k");
709 		} else {
710 			errno = 0;
711 			ks->c1 = (size_t) strtoul(c, NULL, 10);
712 			if (errno != 0)
713 				err(2, "-k");
714 			if (ks->c1 == 0) {
715 				warn("%s",getstr(6));
716 				goto end;
717 			}
718 		}
719 	} else {
720 		if (second)
721 			ks->c2 = 0;
722 		else
723 			ks->c1 = 1;
724 	}
725 
726 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
727 		regoff_t i = 0;
728 
729 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
730 			check_mutually_exclusive_flags(s[i], mef_flags);
731 			if (s[i] == 'b') {
732 				if (second)
733 					ks->pos2b = true;
734 				else
735 					ks->pos1b = true;
736 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
737 				goto end;
738 		}
739 	}
740 
741 	ret = 0;
742 
743 end:
744 
745 	if (c)
746 		sort_free(c);
747 	if (f)
748 		sort_free(f);
749 	regfree(&re);
750 
751 	return (ret);
752 }
753 
754 /*
755  * Parse -k option value.
756  */
757 static int
parse_k(const char * s,struct key_specs * ks)758 parse_k(const char *s, struct key_specs *ks)
759 {
760 	int ret = -1;
761 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
762 	    { false, false, false, false, false, false };
763 
764 	if (s && *s) {
765 		char *sptr;
766 
767 		sptr = strchr(s, ',');
768 		if (sptr) {
769 			size_t size1;
770 			char *pos1, *pos2;
771 
772 			size1 = sptr - s;
773 
774 			if (size1 < 1)
775 				return (-1);
776 			pos1 = sort_malloc((size1 + 1) * sizeof(char));
777 
778 			strncpy(pos1, s, size1);
779 			pos1[size1] = '\0';
780 
781 			ret = parse_pos(pos1, ks, mef_flags, false);
782 
783 			sort_free(pos1);
784 			if (ret < 0)
785 				return (ret);
786 
787 			pos2 = sort_strdup(sptr + 1);
788 			ret = parse_pos(pos2, ks, mef_flags, true);
789 			sort_free(pos2);
790 		} else
791 			ret = parse_pos(s, ks, mef_flags, false);
792 	}
793 
794 	return (ret);
795 }
796 
797 /*
798  * Parse POS in +POS -POS option.
799  */
800 static int
parse_pos_obs(const char * s,int * nf,int * nc,char * sopts)801 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
802 {
803 	regex_t re;
804 	regmatch_t pmatch[4];
805 	char *c, *f;
806 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
807 	int ret;
808 	size_t len, nmatch;
809 
810 	ret = -1;
811 	nmatch = 4;
812 	c = f = NULL;
813 	*nc = *nf = 0;
814 
815 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
816 		return (-1);
817 
818 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
819 		goto end;
820 
821 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
822 		goto end;
823 
824 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
825 		goto end;
826 
827 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
828 	f = sort_malloc((len + 1) * sizeof(char));
829 
830 	strncpy(f, s + pmatch[1].rm_so, len);
831 	f[len] = '\0';
832 
833 	errno = 0;
834 	*nf = (size_t) strtoul(f, NULL, 10);
835 	if (errno != 0)
836 		errx(2, "%s", getstr(11));
837 
838 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
839 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
840 		c = sort_malloc((len + 1) * sizeof(char));
841 
842 		strncpy(c, s + pmatch[2].rm_so + 1, len);
843 		c[len] = '\0';
844 
845 		errno = 0;
846 		*nc = (size_t) strtoul(c, NULL, 10);
847 		if (errno != 0)
848 			errx(2, "%s", getstr(11));
849 	}
850 
851 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
852 
853 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
854 
855 		strncpy(sopts, s + pmatch[3].rm_so, len);
856 		sopts[len] = '\0';
857 	}
858 
859 	ret = 0;
860 
861 end:
862 	if (c)
863 		sort_free(c);
864 	if (f)
865 		sort_free(f);
866 	regfree(&re);
867 
868 	return (ret);
869 }
870 
871 /*
872  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
873  */
874 static void
fix_obsolete_keys(int * argc,char ** argv)875 fix_obsolete_keys(int *argc, char **argv)
876 {
877 	char *snew = NULL;
878 
879 	for (int i = 1; i < *argc; i++) {
880 		char *arg1;
881 
882 		arg1 = argv[i];
883 
884 		if (strlen(arg1) > 1 && arg1[0] == '+') {
885 			int c1, f1;
886 			char sopts1[128];
887 
888 			sopts1[0] = 0;
889 			c1 = f1 = 0;
890 
891 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
892 				continue;
893 			else {
894 				f1 += 1;
895 				c1 += 1;
896 				if (i + 1 < *argc) {
897 					char *arg2 = argv[i + 1];
898 
899 					if (strlen(arg2) > 1 &&
900 					    arg2[0] == '-') {
901 						int c2, f2;
902 						char sopts2[128];
903 
904 						sopts2[0] = 0;
905 						c2 = f2 = 0;
906 
907 						if (parse_pos_obs(arg2 + 1,
908 						    &f2, &c2, sopts2) >= 0) {
909 							if (c2 > 0)
910 								f2 += 1;
911 							if (asprintf(&snew,
912 							    "-k%d.%d%s,%d.%d%s",
913 							    f1, c1, sopts1,
914 							    f2, c2, sopts2)== -1)
915 								return;
916 							argv[i] = snew;
917 							for (int j = i + 1; j + 1 < *argc; j++)
918 								argv[j] = argv[j + 1];
919 							*argc -= 1;
920 							continue;
921 						}
922 					}
923 				}
924 				asprintf(&snew, "-k%d.%d%s", f1, c1, sopts1);
925 				argv[i] = snew;
926 			}
927 		}
928 	}
929 }
930 
931 /*
932  * Set random seed
933  */
934 #if defined(SORT_RANDOM)
935 static char *
random_md5end(MD5_CTX * ctx)936 random_md5end(MD5_CTX *ctx)
937 {
938 	unsigned char digest[MD5_DIGEST_LENGTH];
939 	static const char hex[]="0123456789abcdef";
940 	char *buf;
941 	int i;
942 
943 	buf = malloc(MD5_DIGEST_LENGTH * 2 + 1);
944 	if (!buf)
945 		return NULL;
946 	MD5_Final(digest, ctx);
947 	for (i = 0; i < MD5_DIGEST_LENGTH; i++) {
948 		buf[2*i] = hex[digest[i] >> 4];
949 		buf[2*i+1] = hex[digest[i] & 0x0f];
950 	}
951 	buf[MD5_DIGEST_LENGTH * 2] = '\0';
952 	return buf;
953 }
954 
955 static char *
random_fromfile(const char * filename)956 random_fromfile(const char *filename)
957 {
958 	MD5_CTX ctx;
959 	FILE* fp;
960 	unsigned char buffer[4096];
961 	struct stat st;
962 	off_t size;
963 	int bytes;
964 
965 	fp = openfile(filename, "r");
966 	if (fp == NULL)
967 		return NULL;
968 	if (fstat(fileno(fp), &st) < 0) {
969 		bytes = -1;
970 		goto err;
971 	}
972 
973 	MD5_Init(&ctx);
974 	size = st.st_size;
975 	bytes = 0;
976 	while (size > 0 && (bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
977 		MD5_Update(&ctx, buffer, bytes);
978 		size -= bytes;
979 	}
980 
981 err:
982 	closefile(fp, NULL);
983 	if (bytes < 0)
984 		return NULL;
985 
986 	return (random_md5end(&ctx));
987 }
988 
989 static void
set_random_seed(void)990 set_random_seed(void)
991 {
992 	if (need_random) {
993 
994 		if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
995 			FILE* fseed;
996 			MD5_CTX ctx;
997 			char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
998 			size_t sz = 0;
999 
1000 			fseed = openfile(random_source, "r");
1001 			while (!feof(fseed)) {
1002 				int cr;
1003 
1004 				cr = fgetc(fseed);
1005 				if (cr == EOF)
1006 					break;
1007 
1008 				rsd[sz++] = (char) cr;
1009 
1010 				if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
1011 					break;
1012 			}
1013 
1014 			closefile(fseed, random_source);
1015 
1016 			MD5_Init(&ctx);
1017 			MD5_Update(&ctx, rsd, sz);
1018 
1019 			random_seed = random_md5end(&ctx);
1020 			random_seed_size = strlen(random_seed);
1021 
1022 		} else {
1023 			MD5_CTX ctx;
1024 			char *b;
1025 
1026 			MD5_Init(&ctx);
1027 			b = random_fromfile(random_source);
1028 			if (b == NULL)
1029 				err(2, NULL);
1030 
1031 			random_seed = b;
1032 			random_seed_size = strlen(b);
1033 		}
1034 
1035 		MD5_Init(&md5_ctx);
1036 		if(random_seed_size>0) {
1037 			MD5_Update(&md5_ctx, random_seed, random_seed_size);
1038 		}
1039 	}
1040 }
1041 #endif
1042 
1043 /*
1044  * Main function.
1045  */
1046 int
main(int argc,char ** argv)1047 main(int argc, char **argv)
1048 {
1049 	char *outfile, *real_outfile;
1050 	int c, result;
1051 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
1052 	    { false, false, false, false, false, false };
1053 
1054 	result = 0;
1055 	outfile = sort_strdup("-");
1056 	real_outfile = NULL;
1057 
1058 	struct sort_mods *sm = &default_sort_mods_object;
1059 
1060 	init_tmp_files();
1061 
1062 	set_signal_handler();
1063 
1064 	set_hw_params();
1065 	set_locale();
1066 	set_tmpdir();
1067 	set_sort_opts();
1068 
1069 	fix_obsolete_keys(&argc, argv);
1070 
1071 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1072 	    != -1)) {
1073 
1074 		check_mutually_exclusive_flags(c, mef_flags);
1075 
1076 		if (!set_sort_modifier(sm, c)) {
1077 
1078 			switch (c) {
1079 			case 'c':
1080 				sort_opts_vals.cflag = true;
1081 				if (optarg) {
1082 					if (!strcmp(optarg, "diagnose-first"))
1083 						;
1084 					else if (!strcmp(optarg, "silent") ||
1085 					    !strcmp(optarg, "quiet"))
1086 						sort_opts_vals.csilentflag = true;
1087 					else if (*optarg)
1088 						unknown(optarg);
1089 				}
1090 				break;
1091 			case 'C':
1092 				sort_opts_vals.cflag = true;
1093 				sort_opts_vals.csilentflag = true;
1094 				break;
1095 			case 'k':
1096 			{
1097 				sort_opts_vals.complex_sort = true;
1098 				sort_opts_vals.kflag = true;
1099 
1100 				keys_num++;
1101 				keys = sort_realloc(keys, keys_num *
1102 				    sizeof(struct key_specs));
1103 				memset(&(keys[keys_num - 1]), 0,
1104 				    sizeof(struct key_specs));
1105 
1106 				if (parse_k(optarg, &(keys[keys_num - 1]))
1107 				    < 0) {
1108 					errc(2, EINVAL, "-k %s", optarg);
1109 				}
1110 
1111 				break;
1112 			}
1113 			case 'm':
1114 				sort_opts_vals.mflag = true;
1115 				break;
1116 			case 'o':
1117 				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1118 				strcpy(outfile, optarg);
1119 				break;
1120 			case 's':
1121 				sort_opts_vals.sflag = true;
1122 				break;
1123 			case 'S':
1124 				available_free_memory =
1125 				    parse_memory_buffer_value(optarg);
1126 				break;
1127 			case 'T':
1128 				tmpdir = sort_strdup(optarg);
1129 				break;
1130 			case 't':
1131 				while (strlen(optarg) > 1) {
1132 					if (optarg[0] != '\\') {
1133 						errc(2, EINVAL, "%s", optarg);
1134 					}
1135 					optarg += 1;
1136 					if (*optarg == '0') {
1137 						*optarg = 0;
1138 						break;
1139 					}
1140 				}
1141 				sort_opts_vals.tflag = true;
1142 				sort_opts_vals.field_sep = btowc(optarg[0]);
1143 				if (sort_opts_vals.field_sep == WEOF) {
1144 					errno = EINVAL;
1145 					err(2, NULL);
1146 				}
1147 				if (!gnusort_numeric_compatibility) {
1148 					if (symbol_decimal_point == sort_opts_vals.field_sep)
1149 						symbol_decimal_point = WEOF;
1150 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1151 						symbol_thousands_sep = WEOF;
1152 					if (symbol_negative_sign == sort_opts_vals.field_sep)
1153 						symbol_negative_sign = WEOF;
1154 					if (symbol_positive_sign == sort_opts_vals.field_sep)
1155 						symbol_positive_sign = WEOF;
1156 				}
1157 				break;
1158 			case 'u':
1159 				sort_opts_vals.uflag = true;
1160 				/* stable sort for the correct unique val */
1161 				sort_opts_vals.sflag = true;
1162 				break;
1163 			case 'z':
1164 				sort_opts_vals.zflag = true;
1165 				break;
1166 			case SORT_OPT:
1167 				if (optarg) {
1168 					if (!strcmp(optarg, "general-numeric"))
1169 						set_sort_modifier(sm, 'g');
1170 					else if (!strcmp(optarg, "human-numeric"))
1171 						set_sort_modifier(sm, 'h');
1172 					else if (!strcmp(optarg, "numeric"))
1173 						set_sort_modifier(sm, 'n');
1174 					else if (!strcmp(optarg, "month"))
1175 						set_sort_modifier(sm, 'M');
1176 #if defined(SORT_RANDOM)
1177 					else if (!strcmp(optarg, "random"))
1178 						set_sort_modifier(sm, 'R');
1179 #endif
1180 					else
1181 						unknown(optarg);
1182 				}
1183 				break;
1184 #if defined(SORT_THREADS)
1185 			case PARALLEL_OPT:
1186 				nthreads = (size_t)(atoi(optarg));
1187 				if (nthreads < 1)
1188 					nthreads = 1;
1189 				if (nthreads > 1024)
1190 					nthreads = 1024;
1191 				break;
1192 #endif
1193 			case QSORT_OPT:
1194 				sort_opts_vals.sort_method = SORT_QSORT;
1195 				break;
1196 			case MERGESORT_OPT:
1197 				sort_opts_vals.sort_method = SORT_MERGESORT;
1198 				break;
1199 			case MMAP_OPT:
1200 				use_mmap = true;
1201 				break;
1202 			case HEAPSORT_OPT:
1203 				sort_opts_vals.sort_method = SORT_HEAPSORT;
1204 				break;
1205 			case RADIXSORT_OPT:
1206 				sort_opts_vals.sort_method = SORT_RADIXSORT;
1207 				break;
1208 #if defined(SORT_RANDOM)
1209 			case RANDOMSOURCE_OPT:
1210 				random_source = strdup(optarg);
1211 				break;
1212 #endif
1213 			case COMPRESSPROGRAM_OPT:
1214 				compress_program = strdup(optarg);
1215 				break;
1216 			case FF_OPT:
1217 				read_fns_from_file0(optarg);
1218 				break;
1219 			case BS_OPT:
1220 			{
1221 				errno = 0;
1222 				long mof = strtol(optarg, NULL, 10);
1223 				if (errno != 0)
1224 					err(2, "--batch-size");
1225 				if (mof >= 2)
1226 					max_open_files = (size_t) mof + 1;
1227 			}
1228 				break;
1229 			case VERSION_OPT:
1230 				printf("%s\n", VERSION);
1231 				exit(EXIT_SUCCESS);
1232 				/* NOTREACHED */
1233 				break;
1234 			case DEBUG_OPT:
1235 				debug_sort = true;
1236 				break;
1237 			case HELP_OPT:
1238 				usage(false);
1239 				/* NOTREACHED */
1240 				break;
1241 			default:
1242 				usage(true);
1243 				/* NOTREACHED */
1244 			}
1245 		}
1246 	}
1247 
1248 	argc -= optind;
1249 	argv += optind;
1250 
1251 #ifndef WITHOUT_NLS
1252 	catalog = catopen("sort", NL_CAT_LOCALE);
1253 #endif
1254 
1255 	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1256 		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1257 
1258 #ifndef WITHOUT_NLS
1259 	catclose(catalog);
1260 #endif
1261 
1262 	if (keys_num == 0) {
1263 		keys_num = 1;
1264 		keys = sort_realloc(keys, sizeof(struct key_specs));
1265 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1266 		keys[0].c1 = 1;
1267 		keys[0].pos1b = default_sort_mods->bflag;
1268 		keys[0].pos2b = default_sort_mods->bflag;
1269 		memcpy(&(keys[0].sm), default_sort_mods,
1270 		    sizeof(struct sort_mods));
1271 	}
1272 
1273 	for (size_t i = 0; i < keys_num; i++) {
1274 		struct key_specs *ks;
1275 
1276 		ks = &(keys[i]);
1277 
1278 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1279 		    !(ks->pos2b)) {
1280 			ks->pos1b = sm->bflag;
1281 			ks->pos2b = sm->bflag;
1282 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1283 		}
1284 
1285 		ks->sm.func = get_sort_func(&(ks->sm));
1286 	}
1287 
1288 	if (argv_from_file0) {
1289 		argc = argc_from_file0;
1290 		argv = argv_from_file0;
1291 	}
1292 
1293 	if (debug_sort) {
1294 		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1295 #if defined(SORT_THREADS)
1296 		printf("Number of CPUs: %d\n",(int)ncpu);
1297 		nthreads = 1;
1298 #endif
1299 		printf("Using collate rules of %s locale\n",
1300 		    setlocale(LC_COLLATE, NULL));
1301 		if (byte_sort)
1302 			printf("Byte sort is used\n");
1303 		if (print_symbols_on_debug) {
1304 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1305 			if (symbol_thousands_sep)
1306 				printf("Thousands separator: <%lc>\n",
1307 				    symbol_thousands_sep);
1308 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1309 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1310 		}
1311 	}
1312 
1313 #if defined(SORT_RANDOM)
1314 	set_random_seed();
1315 #endif
1316 
1317 	/* Case when the outfile equals one of the input files: */
1318 	if (strcmp(outfile, "-")) {
1319 
1320 		for(int i = 0; i < argc; ++i) {
1321 			if (strcmp(argv[i], outfile) == 0) {
1322 				real_outfile = sort_strdup(outfile);
1323 				for(;;) {
1324 					char* tmp = sort_malloc(strlen(outfile) +
1325 					    strlen(".tmp") + 1);
1326 
1327 					strcpy(tmp, outfile);
1328 					strcpy(tmp + strlen(tmp), ".tmp");
1329 					sort_free(outfile);
1330 					outfile = tmp;
1331 					if (access(outfile, F_OK) < 0)
1332 						break;
1333 				}
1334 				tmp_file_atexit(outfile);
1335 			}
1336 		}
1337 	}
1338 
1339 #if defined(SORT_THREADS)
1340 	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1341 		nthreads = 1;
1342 #endif
1343 
1344 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1345 		struct file_list fl;
1346 		struct sort_list list;
1347 
1348 		sort_list_init(&list);
1349 		file_list_init(&fl, true);
1350 
1351 		if (argc < 1)
1352 			procfile("-", &list, &fl);
1353 		else {
1354 			while (argc > 0) {
1355 				procfile(*argv, &list, &fl);
1356 				--argc;
1357 				++argv;
1358 			}
1359 		}
1360 
1361 		if (fl.count < 1)
1362 			sort_list_to_file(&list, outfile);
1363 		else {
1364 			if (list.count > 0) {
1365 				char *flast = new_tmp_file_name();
1366 
1367 				sort_list_to_file(&list, flast);
1368 				file_list_add(&fl, flast, false);
1369 			}
1370 			merge_files(&fl, outfile);
1371 		}
1372 
1373 		file_list_clean(&fl);
1374 
1375 		/*
1376 		 * We are about to exit the program, so we can ignore
1377 		 * the clean-up for speed
1378 		 *
1379 		 * sort_list_clean(&list);
1380 		 */
1381 
1382 	} else if (sort_opts_vals.cflag) {
1383 		result = (argc == 0) ? (check("-")) : (check(*argv));
1384 	} else if (sort_opts_vals.mflag) {
1385 		struct file_list fl;
1386 
1387 		file_list_init(&fl, false);
1388 		file_list_populate(&fl, argc, argv, true);
1389 		merge_files(&fl, outfile);
1390 		file_list_clean(&fl);
1391 	}
1392 
1393 	if (real_outfile) {
1394 		unlink(real_outfile);
1395 		if (rename(outfile, real_outfile) < 0)
1396 			err(2, NULL);
1397 		sort_free(real_outfile);
1398 	}
1399 
1400 	sort_free(outfile);
1401 
1402 	return (result);
1403 }
1404