xref: /openbsd/usr.bin/sort/sort.c (revision 404b540a)
1 /*	$OpenBSD: sort.c,v 1.36 2007/08/22 06:56:40 jmc Exp $	*/
2 
3 /*-
4  * Copyright (c) 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Peter McIlroy.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #ifndef lint
36 static char copyright[] =
37 "@(#) Copyright (c) 1993\n\
38 	The Regents of the University of California.  All rights reserved.\n";
39 #endif /* not lint */
40 
41 #ifndef lint
42 #if 0
43 static char sccsid[] = "@(#)sort.c	8.1 (Berkeley) 6/6/93";
44 #else
45 static char rcsid[] = "$OpenBSD: sort.c,v 1.36 2007/08/22 06:56:40 jmc Exp $";
46 #endif
47 #endif /* not lint */
48 
49 /*
50  * Sort sorts a file using an optional user-defined key.
51  * Sort uses radix sort for internal sorting, and allows
52  * a choice of merge sort and radix sort for external sorting.
53  */
54 
55 #include "sort.h"
56 #include "fsort.h"
57 #include "pathnames.h"
58 
59 #include <sys/types.h>
60 #include <sys/stat.h>
61 #include <locale.h>
62 #include <paths.h>
63 #include <signal.h>
64 #include <stdlib.h>
65 #include <string.h>
66 #include <unistd.h>
67 #include <err.h>
68 
69 int REC_D = '\n';
70 u_char d_mask[NBINS];		/* flags for rec_d, field_d, <blank> */
71 
72 /*
73  * weight tables.  Gweights is one of ascii, Rascii..
74  * modified to weight rec_d = 0 (or 255)
75  */
76 extern u_char gweights[NBINS];
77 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
78 
79 /*
80  * masks of ignored characters.  Alltable is 256 ones
81  */
82 u_char dtable[NBINS], itable[NBINS], alltable[NBINS];
83 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0, STABLE = 0;
84 struct coldesc *clist;
85 int ncols = 0;
86 int ND = 10;			/* limit on number of -k options. */
87 
88 char *devstdin = _PATH_STDIN;
89 char *tmpdir = _PATH_VARTMP;
90 char toutpath[PATH_MAX];
91 
92 static void cleanup(void);
93 static void onsig(int);
94 static void usage(char *);
95 
96 #define CHECK_NFIELDS						\
97 	if (++nfields == ND) {					\
98 		ND += 10;					\
99 		if ((p = realloc(fldtab, ND)) == NULL)		\
100 			errx(2, "cannot allocate memory");	\
101 		ftpos = p + (ftpos - fldtab);			\
102 		fldtab = p;					\
103 	}
104 
105 int
106 main(int argc, char *argv[])
107 {
108 	int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *);
109 	int ch, i, stdinflag = 0, tmp = 0;
110 	char nfields = 0, cflag = 0, mflag = 0;
111 	char *outfile, *outpath = 0;
112 	struct field *fldtab, *ftpos;
113 	union f_handle filelist;
114 	FILE *outfp = NULL;
115 	void *p;
116 
117 	setlocale(LC_ALL, "");
118 
119 	if ((clist = calloc((ND+1)*2, sizeof(struct coldesc))) == NULL ||
120 	    (ftpos = fldtab = calloc(ND+2, sizeof(struct field))) == NULL)
121 		errx(2, "cannot allocate memory");
122 	memset(d_mask, 0, NBINS);
123 	d_mask[REC_D = '\n'] = REC_D_F;
124 	d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
125 	fixit(&argc, argv);
126 	if (!issetugid() && (outfile = getenv("TMPDIR")))
127 		tmpdir = outfile;
128 	while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:t:T:uy:zs")) != -1) {
129 		switch (ch) {
130 		case 'b': fldtab->flags |= BI | BT;
131 			break;
132 		case 'd':
133 		case 'f':
134 		case 'i':
135 		case 'n':
136 		case 'r': tmp |= optval(ch, 0);
137 			if (tmp & R && tmp & F)
138 				fldtab->weights = RFtable;
139 			else if (tmp & F)
140 				fldtab->weights = Ftable;
141 			else if (tmp & R)
142 				fldtab->weights = Rascii;
143 			fldtab->flags |= tmp;
144 			break;
145 		case 'o':
146 			outpath = optarg;
147 			break;
148 		case 'k':
149 			CHECK_NFIELDS;
150 			setfield(optarg, ++ftpos, fldtab->flags);
151 			break;
152 		case 't':
153 			if (SEP_FLAG)
154 				usage("multiple field delimiters");
155 			SEP_FLAG = 1;
156 			d_mask[' '] &= ~FLD_D;
157 			d_mask['\t'] &= ~FLD_D;
158 			d_mask[(int)*optarg] |= FLD_D;
159 			if (d_mask[(int)*optarg] & REC_D_F)
160 				err(2, "record/field delimiter clash");
161 			break;
162 		case 'R':
163 			if (REC_D != '\n')
164 				usage("multiple record delimiters");
165 			if ('\n' == (REC_D = *optarg))
166 				break;
167 			d_mask['\n'] = d_mask[' '];
168 			d_mask[REC_D] = REC_D_F;
169 			break;
170 		case 'T':
171 			tmpdir = optarg;
172 			break;
173 		case 'u':
174 			UNIQUE = 1;
175 			break;
176 		case 'c':
177 			cflag = 1;
178 			break;
179 		case 'm':
180 			mflag = 1;
181 			break;
182 		case 'H':
183 			PANIC = 0;
184 			break;
185 		case 'y':
186 			/* accept -y for backwards compat. */
187 			break;
188 		case 'z':
189 			if (REC_D != '\n')
190 				usage("multiple record delimiters");
191 			REC_D = '\0';
192 			d_mask['\n'] = d_mask[' '];
193 			d_mask[REC_D] = REC_D_F;
194 			break;
195 		case 's':
196 			STABLE = 1;
197 			break;
198 		case '?':
199 		default:
200 			usage(NULL);
201 		}
202 	}
203 
204 	if (cflag && argc > optind+1)
205 		errx(2, "too many input files for -c option");
206 
207 	if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
208 		outpath = argv[argc-1];
209 		argc -= 2;
210 	}
211 
212 	if (mflag && argc - optind > (MAXFCT - (16+1))*16)
213 		errx(2, "too many input files for -m option");
214 
215 	for (i = optind; i < argc; i++) {
216 		/* allow one occurrence of /dev/stdin */
217 		if (!strcmp(argv[i], "-") || !strcmp(argv[i], devstdin)) {
218 			if (stdinflag)
219 				warnx("ignoring extra \"%s\" in file list",
220 				    argv[i]);
221 			else {
222 				stdinflag = 1;
223 				argv[i] = devstdin;
224 			}
225 		} else if ((ch = access(argv[i], R_OK)))
226 			err(2, "%s", argv[i]);
227 	}
228 
229 	if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) {
230 		SINGL_FLD = 1;
231 		fldtab[0].icol.num = 1;
232 	} else {
233 		if (!fldtab[1].icol.num) {
234 			CHECK_NFIELDS;
235 			fldtab[0].flags &= ~(BI|BT);
236 			setfield("1", ++ftpos, fldtab->flags);
237 		}
238 		fldreset(fldtab);
239 		fldtab[0].flags &= ~F;
240 	}
241 	settables(fldtab[0].flags);
242 	num_init();
243 	fldtab->weights = gweights;
244 
245 	if (optind == argc) {
246 		static char *names[2];
247 
248 		names[0] = devstdin;
249 		names[1] = NULL;
250 		filelist.names = names;
251 		optind--;
252 	} else
253 		filelist.names = argv+optind;
254 
255 	if (SINGL_FLD)
256 		get = makeline;
257 	else
258 		get = makekey;
259 
260 	if (!SINGL_FLD) {
261 		if ((linebuf = malloc(linebuf_size)) == NULL)
262 			err(2, NULL);
263 	}
264 
265 	if (cflag) {
266 		order(filelist, get, fldtab);
267 		/* NOT REACHED */
268 	}
269 
270 	if (!outpath) {
271 		(void)snprintf(toutpath,
272 		    sizeof(toutpath), "%sstdout", _PATH_DEV);
273 		outfile = outpath = toutpath;
274 	} else if (!(ch = access(outpath, 0)) &&
275 	    strncmp(_PATH_DEV, outpath, 5)) {
276 		struct sigaction act;
277 		int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ,
278 		    SIGVTALRM, SIGPROF, 0};
279 		int outfd;
280 		mode_t um;
281 
282 		errno = 0;
283 
284 		if (access(outpath, W_OK))
285 			err(2, "%s", outpath);
286 		(void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXXXXXX",
287 		    outpath);
288 		um = umask(S_IWGRP|S_IWOTH);
289 		(void)umask(um);
290 		if ((outfd = mkstemp(toutpath)) == -1 ||
291 		    fchmod(outfd, DEFFILEMODE & ~um) == -1 ||
292 		    (outfp = fdopen(outfd, "w")) == 0)
293 			err(2, "%s", toutpath);
294 		outfile = toutpath;
295 
296 		(void)atexit(cleanup);
297 		sigfillset(&act.sa_mask);
298 		act.sa_flags = SA_RESTART;
299 		act.sa_handler = onsig;
300 		for (i = 0; sigtable[i]; ++i)	/* always unlink toutpath */
301 			sigaction(sigtable[i], &act, 0);
302 	} else
303 		outfile = outpath;
304 	if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL)
305 		err(2, "%s", outfile);
306 	if (mflag)
307 		fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab);
308 	else
309 		fsort(-1, 0, filelist, argc-optind, outfp, fldtab);
310 	if (outfile != outpath) {
311 		if (access(outfile, 0))
312 			err(2, "%s", outfile);
313 		(void)unlink(outpath);
314 		if (link(outfile, outpath))
315 			err(2, "cannot link %s: output left in %s",
316 			    outpath, outfile);
317 		(void)unlink(outfile);
318 	}
319 	exit(0);
320 }
321 
322 /* ARGSUSED */
323 static void
324 onsig(int signo)
325 {
326 
327 	cleanup();
328 	_exit(2);			/* return 2 on error/interrupt */
329 }
330 
331 static void
332 cleanup(void)
333 {
334 
335 	if (toutpath[0])
336 		(void)unlink(toutpath);
337 }
338 
339 static void
340 usage(char *msg)
341 {
342 	extern char *__progname;
343 
344 	if (msg != NULL)
345 		warnx("%s", msg);
346 	(void)fprintf(stderr, "usage: %s [-bcdfHimnrsuz] "
347 	    "[-k field1[,field2]] [-o output] [-R char]\n"
348 	    "\t[-T dir] [-t char] [file ...]\n", __progname);
349 	exit(2);
350 }
351