1 /* $OpenBSD: sort.c,v 1.36 2007/08/22 06:56:40 jmc Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Peter McIlroy. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #ifndef lint 36 static char copyright[] = 37 "@(#) Copyright (c) 1993\n\ 38 The Regents of the University of California. All rights reserved.\n"; 39 #endif /* not lint */ 40 41 #ifndef lint 42 #if 0 43 static char sccsid[] = "@(#)sort.c 8.1 (Berkeley) 6/6/93"; 44 #else 45 static char rcsid[] = "$OpenBSD: sort.c,v 1.36 2007/08/22 06:56:40 jmc Exp $"; 46 #endif 47 #endif /* not lint */ 48 49 /* 50 * Sort sorts a file using an optional user-defined key. 51 * Sort uses radix sort for internal sorting, and allows 52 * a choice of merge sort and radix sort for external sorting. 53 */ 54 55 #include "sort.h" 56 #include "fsort.h" 57 #include "pathnames.h" 58 59 #include <sys/types.h> 60 #include <sys/stat.h> 61 #include <locale.h> 62 #include <paths.h> 63 #include <signal.h> 64 #include <stdlib.h> 65 #include <string.h> 66 #include <unistd.h> 67 #include <err.h> 68 69 int REC_D = '\n'; 70 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 71 72 /* 73 * weight tables. Gweights is one of ascii, Rascii.. 74 * modified to weight rec_d = 0 (or 255) 75 */ 76 extern u_char gweights[NBINS]; 77 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 78 79 /* 80 * masks of ignored characters. Alltable is 256 ones 81 */ 82 u_char dtable[NBINS], itable[NBINS], alltable[NBINS]; 83 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0, STABLE = 0; 84 struct coldesc *clist; 85 int ncols = 0; 86 int ND = 10; /* limit on number of -k options. */ 87 88 char *devstdin = _PATH_STDIN; 89 char *tmpdir = _PATH_VARTMP; 90 char toutpath[PATH_MAX]; 91 92 static void cleanup(void); 93 static void onsig(int); 94 static void usage(char *); 95 96 #define CHECK_NFIELDS \ 97 if (++nfields == ND) { \ 98 ND += 10; \ 99 if ((p = realloc(fldtab, ND)) == NULL) \ 100 errx(2, "cannot allocate memory"); \ 101 ftpos = p + (ftpos - fldtab); \ 102 fldtab = p; \ 103 } 104 105 int 106 main(int argc, char *argv[]) 107 { 108 int (*get)(int, union f_handle, int, RECHEADER *, u_char *, struct field *); 109 int ch, i, stdinflag = 0, tmp = 0; 110 char nfields = 0, cflag = 0, mflag = 0; 111 char *outfile, *outpath = 0; 112 struct field *fldtab, *ftpos; 113 union f_handle filelist; 114 FILE *outfp = NULL; 115 void *p; 116 117 setlocale(LC_ALL, ""); 118 119 if ((clist = calloc((ND+1)*2, sizeof(struct coldesc))) == NULL || 120 (ftpos = fldtab = calloc(ND+2, sizeof(struct field))) == NULL) 121 errx(2, "cannot allocate memory"); 122 memset(d_mask, 0, NBINS); 123 d_mask[REC_D = '\n'] = REC_D_F; 124 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 125 fixit(&argc, argv); 126 if (!issetugid() && (outfile = getenv("TMPDIR"))) 127 tmpdir = outfile; 128 while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:t:T:uy:zs")) != -1) { 129 switch (ch) { 130 case 'b': fldtab->flags |= BI | BT; 131 break; 132 case 'd': 133 case 'f': 134 case 'i': 135 case 'n': 136 case 'r': tmp |= optval(ch, 0); 137 if (tmp & R && tmp & F) 138 fldtab->weights = RFtable; 139 else if (tmp & F) 140 fldtab->weights = Ftable; 141 else if (tmp & R) 142 fldtab->weights = Rascii; 143 fldtab->flags |= tmp; 144 break; 145 case 'o': 146 outpath = optarg; 147 break; 148 case 'k': 149 CHECK_NFIELDS; 150 setfield(optarg, ++ftpos, fldtab->flags); 151 break; 152 case 't': 153 if (SEP_FLAG) 154 usage("multiple field delimiters"); 155 SEP_FLAG = 1; 156 d_mask[' '] &= ~FLD_D; 157 d_mask['\t'] &= ~FLD_D; 158 d_mask[(int)*optarg] |= FLD_D; 159 if (d_mask[(int)*optarg] & REC_D_F) 160 err(2, "record/field delimiter clash"); 161 break; 162 case 'R': 163 if (REC_D != '\n') 164 usage("multiple record delimiters"); 165 if ('\n' == (REC_D = *optarg)) 166 break; 167 d_mask['\n'] = d_mask[' ']; 168 d_mask[REC_D] = REC_D_F; 169 break; 170 case 'T': 171 tmpdir = optarg; 172 break; 173 case 'u': 174 UNIQUE = 1; 175 break; 176 case 'c': 177 cflag = 1; 178 break; 179 case 'm': 180 mflag = 1; 181 break; 182 case 'H': 183 PANIC = 0; 184 break; 185 case 'y': 186 /* accept -y for backwards compat. */ 187 break; 188 case 'z': 189 if (REC_D != '\n') 190 usage("multiple record delimiters"); 191 REC_D = '\0'; 192 d_mask['\n'] = d_mask[' ']; 193 d_mask[REC_D] = REC_D_F; 194 break; 195 case 's': 196 STABLE = 1; 197 break; 198 case '?': 199 default: 200 usage(NULL); 201 } 202 } 203 204 if (cflag && argc > optind+1) 205 errx(2, "too many input files for -c option"); 206 207 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 208 outpath = argv[argc-1]; 209 argc -= 2; 210 } 211 212 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 213 errx(2, "too many input files for -m option"); 214 215 for (i = optind; i < argc; i++) { 216 /* allow one occurrence of /dev/stdin */ 217 if (!strcmp(argv[i], "-") || !strcmp(argv[i], devstdin)) { 218 if (stdinflag) 219 warnx("ignoring extra \"%s\" in file list", 220 argv[i]); 221 else { 222 stdinflag = 1; 223 argv[i] = devstdin; 224 } 225 } else if ((ch = access(argv[i], R_OK))) 226 err(2, "%s", argv[i]); 227 } 228 229 if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { 230 SINGL_FLD = 1; 231 fldtab[0].icol.num = 1; 232 } else { 233 if (!fldtab[1].icol.num) { 234 CHECK_NFIELDS; 235 fldtab[0].flags &= ~(BI|BT); 236 setfield("1", ++ftpos, fldtab->flags); 237 } 238 fldreset(fldtab); 239 fldtab[0].flags &= ~F; 240 } 241 settables(fldtab[0].flags); 242 num_init(); 243 fldtab->weights = gweights; 244 245 if (optind == argc) { 246 static char *names[2]; 247 248 names[0] = devstdin; 249 names[1] = NULL; 250 filelist.names = names; 251 optind--; 252 } else 253 filelist.names = argv+optind; 254 255 if (SINGL_FLD) 256 get = makeline; 257 else 258 get = makekey; 259 260 if (!SINGL_FLD) { 261 if ((linebuf = malloc(linebuf_size)) == NULL) 262 err(2, NULL); 263 } 264 265 if (cflag) { 266 order(filelist, get, fldtab); 267 /* NOT REACHED */ 268 } 269 270 if (!outpath) { 271 (void)snprintf(toutpath, 272 sizeof(toutpath), "%sstdout", _PATH_DEV); 273 outfile = outpath = toutpath; 274 } else if (!(ch = access(outpath, 0)) && 275 strncmp(_PATH_DEV, outpath, 5)) { 276 struct sigaction act; 277 int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ, 278 SIGVTALRM, SIGPROF, 0}; 279 int outfd; 280 mode_t um; 281 282 errno = 0; 283 284 if (access(outpath, W_OK)) 285 err(2, "%s", outpath); 286 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXXXXXX", 287 outpath); 288 um = umask(S_IWGRP|S_IWOTH); 289 (void)umask(um); 290 if ((outfd = mkstemp(toutpath)) == -1 || 291 fchmod(outfd, DEFFILEMODE & ~um) == -1 || 292 (outfp = fdopen(outfd, "w")) == 0) 293 err(2, "%s", toutpath); 294 outfile = toutpath; 295 296 (void)atexit(cleanup); 297 sigfillset(&act.sa_mask); 298 act.sa_flags = SA_RESTART; 299 act.sa_handler = onsig; 300 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 301 sigaction(sigtable[i], &act, 0); 302 } else 303 outfile = outpath; 304 if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL) 305 err(2, "%s", outfile); 306 if (mflag) 307 fmerge(-1, filelist, argc-optind, get, outfp, putline, fldtab); 308 else 309 fsort(-1, 0, filelist, argc-optind, outfp, fldtab); 310 if (outfile != outpath) { 311 if (access(outfile, 0)) 312 err(2, "%s", outfile); 313 (void)unlink(outpath); 314 if (link(outfile, outpath)) 315 err(2, "cannot link %s: output left in %s", 316 outpath, outfile); 317 (void)unlink(outfile); 318 } 319 exit(0); 320 } 321 322 /* ARGSUSED */ 323 static void 324 onsig(int signo) 325 { 326 327 cleanup(); 328 _exit(2); /* return 2 on error/interrupt */ 329 } 330 331 static void 332 cleanup(void) 333 { 334 335 if (toutpath[0]) 336 (void)unlink(toutpath); 337 } 338 339 static void 340 usage(char *msg) 341 { 342 extern char *__progname; 343 344 if (msg != NULL) 345 warnx("%s", msg); 346 (void)fprintf(stderr, "usage: %s [-bcdfHimnrsuz] " 347 "[-k field1[,field2]] [-o output] [-R char]\n" 348 "\t[-T dir] [-t char] [file ...]\n", __progname); 349 exit(2); 350 } 351