xref: /dragonfly/usr.bin/sort/sort.c (revision 65cc0652)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: head/usr.bin/sort/sort.c 281182 2015-04-07 01:17:49Z pfg $
28  */
29 
30 
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34 
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <locale.h>
40 #if defined(SORT_RANDOM)
41 #include <md5.h>
42 #endif
43 #include <regex.h>
44 #include <signal.h>
45 #include <stdbool.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <wchar.h>
51 #include <wctype.h>
52 
53 #include "coll.h"
54 #include "file.h"
55 #include "sort.h"
56 
57 #ifndef WITHOUT_NLS
58 #include <nl_types.h>
59 nl_catd catalog;
60 #endif
61 
62 #if defined(SORT_RANDOM)
63 #define	OPTIONS	"bcCdfghik:Mmno:RrsS:t:T:uVz"
64 #else
65 #define	OPTIONS	"bcCdfghik:Mmno:rsS:t:T:uVz"
66 #endif
67 
68 #if defined(SORT_RANDOM)
69 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
70 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
71 
72 static bool need_random;
73 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
74 static const void *random_seed;
75 static size_t random_seed_size;
76 
77 MD5_CTX md5_ctx;
78 #endif
79 
80 /*
81  * Default messages to use when NLS is disabled or no catalogue
82  * is found.
83  */
84 const char *nlsstr[] = { "",
85 /* 1*/"mutually exclusive flags",
86 /* 2*/"extra argument not allowed with -c",
87 /* 3*/"Unknown feature",
88 /* 4*/"Wrong memory buffer specification",
89 /* 5*/"0 field in key specs",
90 /* 6*/"0 column in key specs",
91 /* 7*/"Wrong file mode",
92 /* 8*/"Cannot open file for reading",
93 /* 9*/"Radix sort cannot be used with these sort options",
94 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
95 /*11*/"Invalid key position",
96 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
97       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
98       "[-o outfile] [--batch-size size] [--files0-from file] "
99       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
100       "[--mmap] "
101 #if defined(SORT_THREADS)
102       "[--parallel thread_no] "
103 #endif
104       "[--human-numeric-sort] "
105 #if defined(SORT_RANDOM)
106       "[--version-sort] [--random-sort [--random-source file]] "
107 #else
108       "[--version-sort] "
109 #endif
110       "[--compress-program program] [file ...]\n" };
111 
112 struct sort_opts sort_opts_vals;
113 
114 bool debug_sort;
115 bool need_hint;
116 
117 #if defined(SORT_THREADS)
118 unsigned int ncpu = 1;
119 size_t nthreads = 1;
120 #endif
121 
122 static bool gnusort_numeric_compatibility;
123 
124 static struct sort_mods default_sort_mods_object;
125 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
126 
127 static bool print_symbols_on_debug;
128 
129 /*
130  * Arguments from file (when file0-from option is used:
131  */
132 static size_t argc_from_file0 = (size_t)-1;
133 static char **argv_from_file0;
134 
135 /*
136  * Placeholder symbols for options which have no single-character equivalent
137  */
138 enum
139 {
140 	SORT_OPT = CHAR_MAX + 1,
141 	HELP_OPT,
142 	FF_OPT,
143 	BS_OPT,
144 	VERSION_OPT,
145 	DEBUG_OPT,
146 #if defined(SORT_THREADS)
147 	PARALLEL_OPT,
148 #endif
149 #if defined(SORT_RANDOM)
150 	RANDOMSOURCE_OPT,
151 #endif
152 	COMPRESSPROGRAM_OPT,
153 	QSORT_OPT,
154 	MERGESORT_OPT,
155 	HEAPSORT_OPT,
156 	RADIXSORT_OPT,
157 	MMAP_OPT
158 };
159 
160 #define	NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
161 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
162 
163 static struct option long_options[] = {
164 				{ "batch-size", required_argument, NULL, BS_OPT },
165 				{ "buffer-size", required_argument, NULL, 'S' },
166 				{ "check", optional_argument, NULL, 'c' },
167 				{ "check=silent|quiet", optional_argument, NULL, 'C' },
168 				{ "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
169 				{ "debug", no_argument, NULL, DEBUG_OPT },
170 				{ "dictionary-order", no_argument, NULL, 'd' },
171 				{ "field-separator", required_argument, NULL, 't' },
172 				{ "files0-from", required_argument, NULL, FF_OPT },
173 				{ "general-numeric-sort", no_argument, NULL, 'g' },
174 				{ "heapsort", no_argument, NULL, HEAPSORT_OPT },
175 				{ "help",no_argument, NULL, HELP_OPT },
176 				{ "human-numeric-sort", no_argument, NULL, 'h' },
177 				{ "ignore-leading-blanks", no_argument, NULL, 'b' },
178 				{ "ignore-case", no_argument, NULL, 'f' },
179 				{ "ignore-nonprinting", no_argument, NULL, 'i' },
180 				{ "key", required_argument, NULL, 'k' },
181 				{ "merge", no_argument, NULL, 'm' },
182 				{ "mergesort", no_argument, NULL, MERGESORT_OPT },
183 				{ "mmap", no_argument, NULL, MMAP_OPT },
184 				{ "month-sort", no_argument, NULL, 'M' },
185 				{ "numeric-sort", no_argument, NULL, 'n' },
186 				{ "output", required_argument, NULL, 'o' },
187 #if defined(SORT_THREADS)
188 				{ "parallel", required_argument, NULL, PARALLEL_OPT },
189 #endif
190 				{ "qsort", no_argument, NULL, QSORT_OPT },
191 				{ "radixsort", no_argument, NULL, RADIXSORT_OPT },
192 #if defined(SORT_RANDOM)
193 				{ "random-sort", no_argument, NULL, 'R' },
194 				{ "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
195 #endif
196 				{ "reverse", no_argument, NULL, 'r' },
197 				{ "sort", required_argument, NULL, SORT_OPT },
198 				{ "stable", no_argument, NULL, 's' },
199 				{ "temporary-directory",required_argument, NULL, 'T' },
200 				{ "unique", no_argument, NULL, 'u' },
201 				{ "version", no_argument, NULL, VERSION_OPT },
202 				{ "version-sort",no_argument, NULL, 'V' },
203 				{ "zero-terminated", no_argument, NULL, 'z' },
204 				{ NULL, no_argument, NULL, 0 }
205 };
206 
207 void fix_obsolete_keys(int *argc, char **argv);
208 
209 /*
210  * Check where sort modifier is present
211  */
212 static bool
213 sort_modifier_empty(struct sort_mods *sm)
214 {
215 
216 	if (sm == NULL)
217 		return (true);
218 	return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
219 #ifdef SORT_RANDOM
220 	    sm->Rflag ||
221 #endif
222 	    sm->rflag || sm->hflag || sm->dflag || sm->fflag));
223 }
224 
225 /*
226  * Print out usage text.
227  */
228 static void
229 usage(bool opt_err)
230 {
231 //	struct option *o;
232 	FILE *out;
233 
234 	out = stdout;
235 //	o = &(long_options[0]);
236 
237 	if (opt_err)
238 		out = stderr;
239 	fprintf(out, getstr(12), getprogname());
240 	if (opt_err)
241 		exit(2);
242 	exit(0);
243 }
244 
245 /*
246  * Read input file names from a file (file0-from option).
247  */
248 static void
249 read_fns_from_file0(const char *fn)
250 {
251 	FILE *f;
252 	char *line = NULL;
253 	size_t linesize = 0;
254 	ssize_t linelen;
255 
256 	if (fn == NULL)
257 		return;
258 
259 	f = fopen(fn, "r");
260 	if (f == NULL)
261 		err(2, "%s", fn);
262 
263 	while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
264 		if (*line != '\0') {
265 			if (argc_from_file0 == (size_t) - 1)
266 				argc_from_file0 = 0;
267 			++argc_from_file0;
268 			argv_from_file0 = sort_realloc(argv_from_file0,
269 			    argc_from_file0 * sizeof(char *));
270 			if (argv_from_file0 == NULL)
271 				err(2, NULL);
272 			argv_from_file0[argc_from_file0 - 1] = line;
273 		} else {
274 			free(line);
275 		}
276 		line = NULL;
277 		linesize = 0;
278 	}
279 	if (ferror(f))
280 		err(2, "%s: getdelim", fn);
281 
282 	closefile(f, fn);
283 }
284 
285 /*
286  * Check how much RAM is available for the sort.
287  */
288 static void
289 set_hw_params(void)
290 {
291 	long pages, psize;
292 
293 	pages = psize = 0;
294 
295 #if defined(SORT_THREADS)
296 	ncpu = 1;
297 #endif
298 
299 	pages = sysconf(_SC_PHYS_PAGES);
300 	if (pages < 1) {
301 		perror("sysconf pages");
302 		pages = 1;
303 	}
304 	psize = sysconf(_SC_PAGESIZE);
305 	if (psize < 1) {
306 		perror("sysconf psize");
307 		psize = 4096;
308 	}
309 #if defined(SORT_THREADS)
310 	ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
311 	if (ncpu < 1)
312 		ncpu = 1;
313 	else if(ncpu > 32)
314 		ncpu = 32;
315 
316 	nthreads = ncpu;
317 #endif
318 
319 	free_memory = (unsigned long long) pages * (unsigned long long) psize;
320 	available_free_memory = free_memory / 2;
321 
322 	if (available_free_memory < 1024)
323 		available_free_memory = 1024;
324 }
325 
326 /*
327  * Convert "plain" symbol to wide symbol, with default value.
328  */
329 static void
330 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
331 {
332 
333 	if (wc && c) {
334 		int res;
335 
336 		res = mbtowc(wc, c, MB_CUR_MAX);
337 		if (res < 1)
338 			*wc = def;
339 	}
340 }
341 
342 /*
343  * Set current locale symbols.
344  */
345 static void
346 set_locale(void)
347 {
348 	struct lconv *lc;
349 	const char *locale;
350 
351 	setlocale(LC_ALL, "");
352 
353 	lc = localeconv();
354 
355 	if (lc) {
356 		/* obtain LC_NUMERIC info */
357 		/* Convert to wide char form */
358 		conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
359 		    symbol_decimal_point);
360 		conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
361 		    symbol_thousands_sep);
362 		conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
363 		    symbol_positive_sign);
364 		conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
365 		    symbol_negative_sign);
366 	}
367 
368 	if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
369 		gnusort_numeric_compatibility = true;
370 
371 	locale = setlocale(LC_COLLATE, NULL);
372 
373 	if (locale) {
374 		char *tmpl;
375 		const char *cclocale;
376 
377 		tmpl = sort_strdup(locale);
378 		cclocale = setlocale(LC_COLLATE, "C");
379 		if (cclocale && !strcmp(cclocale, tmpl))
380 			byte_sort = true;
381 		else {
382 			const char *pclocale;
383 
384 			pclocale = setlocale(LC_COLLATE, "POSIX");
385 			if (pclocale && !strcmp(pclocale, tmpl))
386 				byte_sort = true;
387 		}
388 		setlocale(LC_COLLATE, tmpl);
389 		sort_free(tmpl);
390 	}
391 }
392 
393 /*
394  * Set directory temporary files.
395  */
396 static void
397 set_tmpdir(void)
398 {
399 	char *td;
400 
401 	td = getenv("TMPDIR");
402 	if (td != NULL)
403 		tmpdir = sort_strdup(td);
404 }
405 
406 /*
407  * Parse -S option.
408  */
409 static unsigned long long
410 parse_memory_buffer_value(const char *value)
411 {
412 
413 	if (value == NULL)
414 		return (available_free_memory);
415 	else {
416 		char *endptr;
417 		unsigned long long membuf;
418 
419 		endptr = NULL;
420 		errno = 0;
421 		membuf = strtoll(value, &endptr, 10);
422 
423 		if (errno != 0) {
424 			warn("%s",getstr(4));
425 			membuf = available_free_memory;
426 		} else {
427 			switch (*endptr){
428 			case 'Y':
429 				membuf *= 1024;
430 				/* FALLTHROUGH */
431 			case 'Z':
432 				membuf *= 1024;
433 				/* FALLTHROUGH */
434 			case 'E':
435 				membuf *= 1024;
436 				/* FALLTHROUGH */
437 			case 'P':
438 				membuf *= 1024;
439 				/* FALLTHROUGH */
440 			case 'T':
441 				membuf *= 1024;
442 				/* FALLTHROUGH */
443 			case 'G':
444 				membuf *= 1024;
445 				/* FALLTHROUGH */
446 			case 'M':
447 				membuf *= 1024;
448 				/* FALLTHROUGH */
449 			case '\0':
450 			case 'K':
451 				membuf *= 1024;
452 				/* FALLTHROUGH */
453 			case 'b':
454 				break;
455 			case '%':
456 				membuf = (available_free_memory * membuf) /
457 				    100;
458 				break;
459 			default:
460 				warnc(EINVAL, "%s", optarg);
461 				membuf = available_free_memory;
462 			}
463 		}
464 		return (membuf);
465 	}
466 }
467 
468 /*
469  * Signal handler that clears the temporary files.
470  */
471 static void
472 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
473     void *context __unused)
474 {
475 
476 	clear_tmp_files();
477 	exit(-1);
478 }
479 
480 /*
481  * Set signal handler on panic signals.
482  */
483 static void
484 set_signal_handler(void)
485 {
486 	struct sigaction sa;
487 
488 	memset(&sa, 0, sizeof(sa));
489 	sa.sa_sigaction = &sig_handler;
490 	sa.sa_flags = SA_SIGINFO;
491 
492 	if (sigaction(SIGTERM, &sa, NULL) < 0) {
493 		perror("sigaction");
494 		return;
495 	}
496 	if (sigaction(SIGHUP, &sa, NULL) < 0) {
497 		perror("sigaction");
498 		return;
499 	}
500 	if (sigaction(SIGINT, &sa, NULL) < 0) {
501 		perror("sigaction");
502 		return;
503 	}
504 	if (sigaction(SIGQUIT, &sa, NULL) < 0) {
505 		perror("sigaction");
506 		return;
507 	}
508 	if (sigaction(SIGABRT, &sa, NULL) < 0) {
509 		perror("sigaction");
510 		return;
511 	}
512 	if (sigaction(SIGBUS, &sa, NULL) < 0) {
513 		perror("sigaction");
514 		return;
515 	}
516 	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
517 		perror("sigaction");
518 		return;
519 	}
520 	if (sigaction(SIGUSR1, &sa, NULL) < 0) {
521 		perror("sigaction");
522 		return;
523 	}
524 	if (sigaction(SIGUSR2, &sa, NULL) < 0) {
525 		perror("sigaction");
526 		return;
527 	}
528 }
529 
530 /*
531  * Print "unknown" message and exit with status 2.
532  */
533 static void
534 unknown(const char *what)
535 {
536 
537 	errx(2, "%s: %s", getstr(3), what);
538 }
539 
540 /*
541  * Check whether contradictory input options are used.
542  */
543 static void
544 check_mutually_exclusive_flags(char c, bool *mef_flags)
545 {
546 	int fo_index, mec;
547 	bool found_others, found_this;
548 
549 	found_others = found_this = false;
550 	fo_index = 0;
551 
552 	for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
553 		mec = mutually_exclusive_flags[i];
554 
555 		if (mec != c) {
556 			if (mef_flags[i]) {
557 				if (found_this)
558 					errx(1, "%c:%c: %s", c, mec, getstr(1));
559 				found_others = true;
560 				fo_index = i;
561 			}
562 		} else {
563 			if (found_others)
564 				errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
565 			mef_flags[i] = true;
566 			found_this = true;
567 		}
568 	}
569 }
570 
571 /*
572  * Initialise sort opts data.
573  */
574 static void
575 set_sort_opts(void)
576 {
577 
578 	memset(&default_sort_mods_object, 0,
579 	    sizeof(default_sort_mods_object));
580 	memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
581 	default_sort_mods_object.func =
582 	    get_sort_func(&default_sort_mods_object);
583 }
584 
585 /*
586  * Set a sort modifier on a sort modifiers object.
587  */
588 static bool
589 set_sort_modifier(struct sort_mods *sm, int c)
590 {
591 
592 	if (sm) {
593 		switch (c){
594 		case 'b':
595 			sm->bflag = true;
596 			break;
597 		case 'd':
598 			sm->dflag = true;
599 			break;
600 		case 'f':
601 			sm->fflag = true;
602 			break;
603 		case 'g':
604 			sm->gflag = true;
605 			need_hint = true;
606 			break;
607 		case 'i':
608 			sm->iflag = true;
609 			break;
610 #ifdef SORT_RANDOM
611 		case 'R':
612 			sm->Rflag = true;
613 			need_random = true;
614 			break;
615 #endif
616 		case 'M':
617 			initialise_months();
618 			sm->Mflag = true;
619 			need_hint = true;
620 			break;
621 		case 'n':
622 			sm->nflag = true;
623 			need_hint = true;
624 			print_symbols_on_debug = true;
625 			break;
626 		case 'r':
627 			sm->rflag = true;
628 			break;
629 		case 'V':
630 			sm->Vflag = true;
631 			break;
632 		case 'h':
633 			sm->hflag = true;
634 			need_hint = true;
635 			print_symbols_on_debug = true;
636 			break;
637 		default:
638 			return false;
639 		}
640 		sort_opts_vals.complex_sort = true;
641 		sm->func = get_sort_func(sm);
642 	}
643 	return (true);
644 }
645 
646 /*
647  * Parse POS in -k option.
648  */
649 static int
650 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
651 {
652 	regmatch_t pmatch[4];
653 	regex_t re;
654 	char *c, *f;
655 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
656 	size_t len, nmatch;
657 	int ret;
658 
659 	ret = -1;
660 	nmatch = 4;
661 	c = f = NULL;
662 
663 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
664 		return (-1);
665 
666 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
667 		goto end;
668 
669 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
670 		goto end;
671 
672 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
673 		goto end;
674 
675 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
676 	f = sort_malloc((len + 1) * sizeof(char));
677 
678 	strncpy(f, s + pmatch[1].rm_so, len);
679 	f[len] = '\0';
680 
681 	if (second) {
682 		errno = 0;
683 		ks->f2 = (size_t) strtoul(f, NULL, 10);
684 		if (errno != 0)
685 			err(2, "-k");
686 		if (ks->f2 == 0) {
687 			warn("%s",getstr(5));
688 			goto end;
689 		}
690 	} else {
691 		errno = 0;
692 		ks->f1 = (size_t) strtoul(f, NULL, 10);
693 		if (errno != 0)
694 			err(2, "-k");
695 		if (ks->f1 == 0) {
696 			warn("%s",getstr(5));
697 			goto end;
698 		}
699 	}
700 
701 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
702 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
703 		c = sort_malloc((len + 1) * sizeof(char));
704 
705 		strncpy(c, s + pmatch[2].rm_so + 1, len);
706 		c[len] = '\0';
707 
708 		if (second) {
709 			errno = 0;
710 			ks->c2 = (size_t) strtoul(c, NULL, 10);
711 			if (errno != 0)
712 				err(2, "-k");
713 		} else {
714 			errno = 0;
715 			ks->c1 = (size_t) strtoul(c, NULL, 10);
716 			if (errno != 0)
717 				err(2, "-k");
718 			if (ks->c1 == 0) {
719 				warn("%s",getstr(6));
720 				goto end;
721 			}
722 		}
723 	} else {
724 		if (second)
725 			ks->c2 = 0;
726 		else
727 			ks->c1 = 1;
728 	}
729 
730 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
731 		regoff_t i = 0;
732 
733 		for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
734 			check_mutually_exclusive_flags(s[i], mef_flags);
735 			if (s[i] == 'b') {
736 				if (second)
737 					ks->pos2b = true;
738 				else
739 					ks->pos1b = true;
740 			} else if (!set_sort_modifier(&(ks->sm), s[i]))
741 				goto end;
742 		}
743 	}
744 
745 	ret = 0;
746 
747 end:
748 
749 	if (c)
750 		sort_free(c);
751 	if (f)
752 		sort_free(f);
753 	regfree(&re);
754 
755 	return (ret);
756 }
757 
758 /*
759  * Parse -k option value.
760  */
761 static int
762 parse_k(const char *s, struct key_specs *ks)
763 {
764 	int ret = -1;
765 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
766 	    { false, false, false, false, false, false };
767 
768 	if (s && *s) {
769 		char *sptr;
770 
771 		sptr = strchr(s, ',');
772 		if (sptr) {
773 			size_t size1;
774 			char *pos1, *pos2;
775 
776 			size1 = sptr - s;
777 
778 			if (size1 < 1)
779 				return (-1);
780 			pos1 = sort_malloc((size1 + 1) * sizeof(char));
781 
782 			strncpy(pos1, s, size1);
783 			pos1[size1] = '\0';
784 
785 			ret = parse_pos(pos1, ks, mef_flags, false);
786 
787 			sort_free(pos1);
788 			if (ret < 0)
789 				return (ret);
790 
791 			pos2 = sort_strdup(sptr + 1);
792 			ret = parse_pos(pos2, ks, mef_flags, true);
793 			sort_free(pos2);
794 		} else
795 			ret = parse_pos(s, ks, mef_flags, false);
796 	}
797 
798 	return (ret);
799 }
800 
801 /*
802  * Parse POS in +POS -POS option.
803  */
804 static int
805 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
806 {
807 	regex_t re;
808 	regmatch_t pmatch[4];
809 	char *c, *f;
810 	const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
811 	int ret;
812 	size_t len, nmatch;
813 
814 	ret = -1;
815 	nmatch = 4;
816 	c = f = NULL;
817 	*nc = *nf = 0;
818 
819 	if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
820 		return (-1);
821 
822 	if (regexec(&re, s, nmatch, pmatch, 0) != 0)
823 		goto end;
824 
825 	if (pmatch[0].rm_eo <= pmatch[0].rm_so)
826 		goto end;
827 
828 	if (pmatch[1].rm_eo <= pmatch[1].rm_so)
829 		goto end;
830 
831 	len = pmatch[1].rm_eo - pmatch[1].rm_so;
832 	f = sort_malloc((len + 1) * sizeof(char));
833 
834 	strncpy(f, s + pmatch[1].rm_so, len);
835 	f[len] = '\0';
836 
837 	errno = 0;
838 	*nf = (size_t) strtoul(f, NULL, 10);
839 	if (errno != 0)
840 		errx(2, "%s", getstr(11));
841 
842 	if (pmatch[2].rm_eo > pmatch[2].rm_so) {
843 		len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
844 		c = sort_malloc((len + 1) * sizeof(char));
845 
846 		strncpy(c, s + pmatch[2].rm_so + 1, len);
847 		c[len] = '\0';
848 
849 		errno = 0;
850 		*nc = (size_t) strtoul(c, NULL, 10);
851 		if (errno != 0)
852 			errx(2, "%s", getstr(11));
853 	}
854 
855 	if (pmatch[3].rm_eo > pmatch[3].rm_so) {
856 
857 		len = pmatch[3].rm_eo - pmatch[3].rm_so;
858 
859 		strncpy(sopts, s + pmatch[3].rm_so, len);
860 		sopts[len] = '\0';
861 	}
862 
863 	ret = 0;
864 
865 end:
866 	if (c)
867 		sort_free(c);
868 	if (f)
869 		sort_free(f);
870 	regfree(&re);
871 
872 	return (ret);
873 }
874 
875 /*
876  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
877  */
878 void
879 fix_obsolete_keys(int *argc, char **argv)
880 {
881 	char sopt[129];
882 
883 	for (int i = 1; i < *argc; i++) {
884 		char *arg1;
885 
886 		arg1 = argv[i];
887 
888 		if (strlen(arg1) > 1 && arg1[0] == '+') {
889 			int c1, f1;
890 			char sopts1[128];
891 
892 			sopts1[0] = 0;
893 			c1 = f1 = 0;
894 
895 			if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
896 				continue;
897 			else {
898 				f1 += 1;
899 				c1 += 1;
900 				if (i + 1 < *argc) {
901 					char *arg2 = argv[i + 1];
902 
903 					if (strlen(arg2) > 1 &&
904 					    arg2[0] == '-') {
905 						int c2, f2;
906 						char sopts2[128];
907 
908 						sopts2[0] = 0;
909 						c2 = f2 = 0;
910 
911 						if (parse_pos_obs(arg2 + 1,
912 						    &f2, &c2, sopts2) >= 0) {
913 							if (c2 > 0)
914 								f2 += 1;
915 							sprintf(sopt, "-k%d.%d%s,%d.%d%s",
916 							    f1, c1, sopts1, f2, c2, sopts2);
917 							argv[i] = sort_strdup(sopt);
918 							for (int j = i + 1; j + 1 < *argc; j++)
919 								argv[j] = argv[j + 1];
920 							*argc -= 1;
921 							continue;
922 						}
923 					}
924 				}
925 				sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
926 				argv[i] = sort_strdup(sopt);
927 			}
928 		}
929 	}
930 }
931 
932 /*
933  * Set random seed
934  */
935 #if defined(SORT_RANDOM)
936 static void
937 set_random_seed(void)
938 {
939 	if (need_random) {
940 
941 		if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
942 			FILE* fseed;
943 			MD5_CTX ctx;
944 			char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
945 			size_t sz = 0;
946 
947 			fseed = openfile(random_source, "r");
948 			while (!feof(fseed)) {
949 				int cr;
950 
951 				cr = fgetc(fseed);
952 				if (cr == EOF)
953 					break;
954 
955 				rsd[sz++] = (char) cr;
956 
957 				if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
958 					break;
959 			}
960 
961 			closefile(fseed, random_source);
962 
963 			MD5Init(&ctx);
964 			MD5Update(&ctx, rsd, sz);
965 
966 			random_seed = MD5End(&ctx, NULL);
967 			random_seed_size = strlen(random_seed);
968 
969 		} else {
970 			MD5_CTX ctx;
971 			char *b;
972 
973 			MD5Init(&ctx);
974 			b = MD5File(random_source, NULL);
975 			if (b == NULL)
976 				err(2, NULL);
977 
978 			random_seed = b;
979 			random_seed_size = strlen(b);
980 		}
981 
982 		MD5Init(&md5_ctx);
983 		if(random_seed_size>0) {
984 			MD5Update(&md5_ctx, random_seed, random_seed_size);
985 		}
986 	}
987 }
988 #endif
989 
990 /*
991  * Main function.
992  */
993 int
994 main(int argc, char **argv)
995 {
996 	char *outfile, *real_outfile;
997 	int c, result;
998 	bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
999 	    { false, false, false, false, false, false };
1000 
1001 	result = 0;
1002 	outfile = sort_strdup("-");
1003 	real_outfile = NULL;
1004 
1005 	struct sort_mods *sm = &default_sort_mods_object;
1006 
1007 	init_tmp_files();
1008 
1009 	set_signal_handler();
1010 
1011 	set_hw_params();
1012 	set_locale();
1013 	set_tmpdir();
1014 	set_sort_opts();
1015 
1016 	fix_obsolete_keys(&argc, argv);
1017 
1018 	while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1019 	    != -1)) {
1020 
1021 		check_mutually_exclusive_flags(c, mef_flags);
1022 
1023 		if (!set_sort_modifier(sm, c)) {
1024 
1025 			switch (c) {
1026 			case 'c':
1027 				sort_opts_vals.cflag = true;
1028 				if (optarg) {
1029 					if (!strcmp(optarg, "diagnose-first"))
1030 						;
1031 					else if (!strcmp(optarg, "silent") ||
1032 					    !strcmp(optarg, "quiet"))
1033 						sort_opts_vals.csilentflag = true;
1034 					else if (*optarg)
1035 						unknown(optarg);
1036 				}
1037 				break;
1038 			case 'C':
1039 				sort_opts_vals.cflag = true;
1040 				sort_opts_vals.csilentflag = true;
1041 				break;
1042 			case 'k':
1043 			{
1044 				sort_opts_vals.complex_sort = true;
1045 				sort_opts_vals.kflag = true;
1046 
1047 				keys_num++;
1048 				keys = sort_realloc(keys, keys_num *
1049 				    sizeof(struct key_specs));
1050 				memset(&(keys[keys_num - 1]), 0,
1051 				    sizeof(struct key_specs));
1052 
1053 				if (parse_k(optarg, &(keys[keys_num - 1]))
1054 				    < 0) {
1055 					errc(2, EINVAL, "-k %s", optarg);
1056 				}
1057 
1058 				break;
1059 			}
1060 			case 'm':
1061 				sort_opts_vals.mflag = true;
1062 				break;
1063 			case 'o':
1064 				outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1065 				strcpy(outfile, optarg);
1066 				break;
1067 			case 's':
1068 				sort_opts_vals.sflag = true;
1069 				break;
1070 			case 'S':
1071 				available_free_memory =
1072 				    parse_memory_buffer_value(optarg);
1073 				break;
1074 			case 'T':
1075 				tmpdir = sort_strdup(optarg);
1076 				break;
1077 			case 't':
1078 				while (strlen(optarg) > 1) {
1079 					if (optarg[0] != '\\') {
1080 						errc(2, EINVAL, "%s", optarg);
1081 					}
1082 					optarg += 1;
1083 					if (*optarg == '0') {
1084 						*optarg = 0;
1085 						break;
1086 					}
1087 				}
1088 				sort_opts_vals.tflag = true;
1089 				sort_opts_vals.field_sep = btowc(optarg[0]);
1090 				if (sort_opts_vals.field_sep == WEOF) {
1091 					errno = EINVAL;
1092 					err(2, NULL);
1093 				}
1094 				if (!gnusort_numeric_compatibility) {
1095 					if (symbol_decimal_point == sort_opts_vals.field_sep)
1096 						symbol_decimal_point = WEOF;
1097 					if (symbol_thousands_sep == sort_opts_vals.field_sep)
1098 						symbol_thousands_sep = WEOF;
1099 					if (symbol_negative_sign == sort_opts_vals.field_sep)
1100 						symbol_negative_sign = WEOF;
1101 					if (symbol_positive_sign == sort_opts_vals.field_sep)
1102 						symbol_positive_sign = WEOF;
1103 				}
1104 				break;
1105 			case 'u':
1106 				sort_opts_vals.uflag = true;
1107 				/* stable sort for the correct unique val */
1108 				sort_opts_vals.sflag = true;
1109 				break;
1110 			case 'z':
1111 				sort_opts_vals.zflag = true;
1112 				break;
1113 			case SORT_OPT:
1114 				if (optarg) {
1115 					if (!strcmp(optarg, "general-numeric"))
1116 						set_sort_modifier(sm, 'g');
1117 					else if (!strcmp(optarg, "human-numeric"))
1118 						set_sort_modifier(sm, 'h');
1119 					else if (!strcmp(optarg, "numeric"))
1120 						set_sort_modifier(sm, 'n');
1121 					else if (!strcmp(optarg, "month"))
1122 						set_sort_modifier(sm, 'M');
1123 #if defined(SORT_RANDOM)
1124 					else if (!strcmp(optarg, "random"))
1125 						set_sort_modifier(sm, 'R');
1126 #endif
1127 					else
1128 						unknown(optarg);
1129 				}
1130 				break;
1131 #if defined(SORT_THREADS)
1132 			case PARALLEL_OPT:
1133 				nthreads = (size_t)(atoi(optarg));
1134 				if (nthreads < 1)
1135 					nthreads = 1;
1136 				if (nthreads > 1024)
1137 					nthreads = 1024;
1138 				break;
1139 #endif
1140 			case QSORT_OPT:
1141 				sort_opts_vals.sort_method = SORT_QSORT;
1142 				break;
1143 			case MERGESORT_OPT:
1144 				sort_opts_vals.sort_method = SORT_MERGESORT;
1145 				break;
1146 			case MMAP_OPT:
1147 				use_mmap = true;
1148 				break;
1149 			case HEAPSORT_OPT:
1150 				sort_opts_vals.sort_method = SORT_HEAPSORT;
1151 				break;
1152 			case RADIXSORT_OPT:
1153 				sort_opts_vals.sort_method = SORT_RADIXSORT;
1154 				break;
1155 #if defined(SORT_RANDOM)
1156 			case RANDOMSOURCE_OPT:
1157 				random_source = strdup(optarg);
1158 				break;
1159 #endif
1160 			case COMPRESSPROGRAM_OPT:
1161 				compress_program = strdup(optarg);
1162 				break;
1163 			case FF_OPT:
1164 				read_fns_from_file0(optarg);
1165 				break;
1166 			case BS_OPT:
1167 			{
1168 				errno = 0;
1169 				long mof = strtol(optarg, NULL, 10);
1170 				if (errno != 0)
1171 					err(2, "--batch-size");
1172 				if (mof >= 2)
1173 					max_open_files = (size_t) mof + 1;
1174 			}
1175 				break;
1176 			case VERSION_OPT:
1177 				printf("%s\n", VERSION);
1178 				exit(EXIT_SUCCESS);
1179 				/* NOTREACHED */
1180 				break;
1181 			case DEBUG_OPT:
1182 				debug_sort = true;
1183 				break;
1184 			case HELP_OPT:
1185 				usage(false);
1186 				/* NOTREACHED */
1187 				break;
1188 			default:
1189 				usage(true);
1190 				/* NOTREACHED */
1191 			}
1192 		}
1193 	}
1194 
1195 	argc -= optind;
1196 	argv += optind;
1197 
1198 #ifndef WITHOUT_NLS
1199 	catalog = catopen("sort", NL_CAT_LOCALE);
1200 #endif
1201 
1202 	if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1203 		errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1204 
1205 #ifndef WITHOUT_NLS
1206 	catclose(catalog);
1207 #endif
1208 
1209 	if (keys_num == 0) {
1210 		keys_num = 1;
1211 		keys = sort_realloc(keys, sizeof(struct key_specs));
1212 		memset(&(keys[0]), 0, sizeof(struct key_specs));
1213 		keys[0].c1 = 1;
1214 		keys[0].pos1b = default_sort_mods->bflag;
1215 		keys[0].pos2b = default_sort_mods->bflag;
1216 		memcpy(&(keys[0].sm), default_sort_mods,
1217 		    sizeof(struct sort_mods));
1218 	}
1219 
1220 	for (size_t i = 0; i < keys_num; i++) {
1221 		struct key_specs *ks;
1222 
1223 		ks = &(keys[i]);
1224 
1225 		if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1226 		    !(ks->pos2b)) {
1227 			ks->pos1b = sm->bflag;
1228 			ks->pos2b = sm->bflag;
1229 			memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1230 		}
1231 
1232 		ks->sm.func = get_sort_func(&(ks->sm));
1233 	}
1234 
1235 	if (argv_from_file0) {
1236 		argc = argc_from_file0;
1237 		argv = argv_from_file0;
1238 	}
1239 
1240 	if (debug_sort) {
1241 		printf("Memory to be used for sorting: %llu\n",available_free_memory);
1242 #if defined(SORT_THREADS)
1243 		printf("Number of CPUs: %d\n",(int)ncpu);
1244 		nthreads = 1;
1245 #endif
1246 		printf("Using collate rules of %s locale\n",
1247 		    setlocale(LC_COLLATE, NULL));
1248 		if (byte_sort)
1249 			printf("Byte sort is used\n");
1250 		if (print_symbols_on_debug) {
1251 			printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1252 			if (symbol_thousands_sep)
1253 				printf("Thousands separator: <%lc>\n",
1254 				    symbol_thousands_sep);
1255 			printf("Positive sign: <%lc>\n", symbol_positive_sign);
1256 			printf("Negative sign: <%lc>\n", symbol_negative_sign);
1257 		}
1258 	}
1259 
1260 #if defined(SORT_RANDOM)
1261 	set_random_seed();
1262 #endif
1263 
1264 	/* Case when the outfile equals one of the input files: */
1265 	if (strcmp(outfile, "-")) {
1266 
1267 		for(int i = 0; i < argc; ++i) {
1268 			if (strcmp(argv[i], outfile) == 0) {
1269 				real_outfile = sort_strdup(outfile);
1270 				for(;;) {
1271 					char* tmp = sort_malloc(strlen(outfile) +
1272 					    strlen(".tmp") + 1);
1273 
1274 					strcpy(tmp, outfile);
1275 					strcpy(tmp + strlen(tmp), ".tmp");
1276 					sort_free(outfile);
1277 					outfile = tmp;
1278 					if (access(outfile, F_OK) < 0)
1279 						break;
1280 				}
1281 				tmp_file_atexit(outfile);
1282 			}
1283 		}
1284 	}
1285 
1286 #if defined(SORT_THREADS)
1287 	if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1288 		nthreads = 1;
1289 #endif
1290 
1291 	if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1292 		struct file_list fl;
1293 		struct sort_list list;
1294 
1295 		sort_list_init(&list);
1296 		file_list_init(&fl, true);
1297 
1298 		if (argc < 1)
1299 			procfile("-", &list, &fl);
1300 		else {
1301 			while (argc > 0) {
1302 				procfile(*argv, &list, &fl);
1303 				--argc;
1304 				++argv;
1305 			}
1306 		}
1307 
1308 		if (fl.count < 1)
1309 			sort_list_to_file(&list, outfile);
1310 		else {
1311 			if (list.count > 0) {
1312 				char *flast = new_tmp_file_name();
1313 
1314 				sort_list_to_file(&list, flast);
1315 				file_list_add(&fl, flast, false);
1316 			}
1317 			merge_files(&fl, outfile);
1318 		}
1319 
1320 		file_list_clean(&fl);
1321 
1322 		/*
1323 		 * We are about to exit the program, so we can ignore
1324 		 * the clean-up for speed
1325 		 *
1326 		 * sort_list_clean(&list);
1327 		 */
1328 
1329 	} else if (sort_opts_vals.cflag) {
1330 		result = (argc == 0) ? (check("-")) : (check(*argv));
1331 	} else if (sort_opts_vals.mflag) {
1332 		struct file_list fl;
1333 
1334 		file_list_init(&fl, false);
1335 		file_list_populate(&fl, argc, argv, true);
1336 		merge_files(&fl, outfile);
1337 		file_list_clean(&fl);
1338 	}
1339 
1340 	if (real_outfile) {
1341 		unlink(real_outfile);
1342 		if (rename(outfile, real_outfile) < 0)
1343 			err(2, NULL);
1344 		sort_free(real_outfile);
1345 	}
1346 
1347 	sort_free(outfile);
1348 
1349 	return (result);
1350 }
1351