1 /* $NetBSD: sort.c,v 1.59 2010/06/05 17:44:51 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 /* Sort sorts a file using an optional user-defined key. 65 * Sort uses radix sort for internal sorting, and allows 66 * a choice of merge sort and radix sort for external sorting. 67 */ 68 69 #include <sys/stat.h> 70 #include "sort.h" 71 #include "fsort.h" 72 #include "pathnames.h" 73 74 #include <sys/types.h> 75 #include <sys/time.h> 76 #include <sys/resource.h> 77 78 #include <paths.h> 79 #include <signal.h> 80 #include <stdlib.h> 81 #include <string.h> 82 #include <unistd.h> 83 #include <locale.h> 84 85 int REC_D = '\n'; 86 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 87 88 /* 89 * weight tables. Gweights is one of ascii, Rascii.. 90 * modified to weight rec_d = 0 (or 255) 91 */ 92 u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable }; 93 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 94 95 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 96 int REVERSE = 0; 97 int posix_sort; 98 99 unsigned int debug_flags = 0; 100 101 static char toutpath[MAXPATHLEN]; 102 103 const char *tmpdir; /* where temporary files should be put */ 104 105 static void cleanup(void); 106 static void onsignal(int); 107 static void usage(const char *); 108 109 int 110 main(int argc, char *argv[]) 111 { 112 int ch, i, stdinflag = 0; 113 char cflag = 0, mflag = 0; 114 char *outfile, *outpath = NULL; 115 struct field *fldtab; 116 size_t fldtab_sz, fld_cnt; 117 size_t alloc_size; 118 struct filelist filelist; 119 int num_input_files; 120 FILE *outfp = NULL; 121 struct rlimit rl; 122 struct stat st; 123 124 setlocale(LC_ALL, ""); 125 126 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 127 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 128 err(2, "getrlimit"); 129 rl.rlim_cur = rl.rlim_max; 130 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 131 err(2, "setrlimit"); 132 133 d_mask[REC_D = '\n'] = REC_D_F; 134 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 135 136 /* fldtab[0] is the global options. */ 137 fldtab_sz = 3; 138 fld_cnt = 0; 139 alloc_size = fldtab_sz * sizeof(*fldtab); 140 fldtab = malloc(alloc_size); 141 if (fldtab == NULL) 142 err(1, "Cannot allocate %zu bytes", alloc_size); 143 memset(fldtab, 0, alloc_size); 144 145 #define SORT_OPTS "bcdD:fHik:lmno:rR:sSt:T:ux" 146 147 /* Convert "+field" args to -f format */ 148 fixit(&argc, argv, SORT_OPTS); 149 150 if (!(tmpdir = getenv("TMPDIR"))) 151 tmpdir = _PATH_TMP; 152 153 while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) { 154 switch (ch) { 155 case 'b': 156 fldtab[0].flags |= BI | BT; 157 break; 158 case 'c': 159 cflag = 1; 160 break; 161 case 'D': /* Debug flags */ 162 for (i = 0; optarg[i]; i++) 163 debug_flags |= 1 << (optarg[i] & 31); 164 break; 165 case 'd': case 'f': case 'i': case 'n': case 'l': 166 fldtab[0].flags |= optval(ch, 0); 167 break; 168 case 'H': 169 /* -H was ; use merge sort for blocks of large files' */ 170 /* That is now the default. */ 171 break; 172 case 'k': 173 alloc_size = (fldtab_sz + 1) * sizeof(*fldtab); 174 fldtab = realloc(fldtab, alloc_size); 175 if (fldtab == NULL) 176 err(1, "Cannot re-allocate %zu bytes", alloc_size); 177 memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0])); 178 fldtab_sz++; 179 180 setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags); 181 break; 182 case 'm': 183 mflag = 1; 184 break; 185 case 'o': 186 outpath = optarg; 187 break; 188 case 'r': 189 REVERSE = 1; 190 break; 191 case 's': 192 /* 193 * Nominally 'stable sort', keep lines with equal keys 194 * in input file order. (Default for NetBSD) 195 * (-s for GNU sort compatibility.) 196 */ 197 posix_sort = 0; 198 break; 199 case 'S': 200 /* 201 * Reverse of -s! 202 * This needs to enforce a POSIX sort where records 203 * with equal keys are then sorted by the raw data. 204 * Currently not implemented! 205 * (using libc radixsort() v sradixsort() doesn't 206 * have the desired effect.) 207 */ 208 posix_sort = 1; 209 break; 210 case 't': 211 if (SEP_FLAG) 212 usage("multiple field delimiters"); 213 SEP_FLAG = 1; 214 d_mask[' '] &= ~FLD_D; 215 d_mask['\t'] &= ~FLD_D; 216 d_mask[(u_char)*optarg] |= FLD_D; 217 if (d_mask[(u_char)*optarg] & REC_D_F) 218 errx(2, "record/field delimiter clash"); 219 break; 220 case 'R': 221 if (REC_D != '\n') 222 usage("multiple record delimiters"); 223 REC_D = *optarg; 224 if (REC_D == '\n') 225 break; 226 if (optarg[1] != '\0') { 227 char *ep; 228 int t = 0; 229 if (optarg[0] == '\\') 230 optarg++, t = 8; 231 REC_D = (int)strtol(optarg, &ep, t); 232 if (*ep != '\0' || REC_D < 0 || 233 REC_D >= (int)__arraycount(d_mask)) 234 errx(2, "invalid record delimiter %s", 235 optarg); 236 } 237 d_mask['\n'] = d_mask[' ']; 238 d_mask[REC_D] = REC_D_F; 239 break; 240 case 'T': 241 /* -T tmpdir */ 242 tmpdir = optarg; 243 break; 244 case 'u': 245 UNIQUE = 1; 246 break; 247 case '?': 248 default: 249 usage(NULL); 250 } 251 } 252 253 if (UNIQUE) 254 /* Don't sort on raw record if keys match */ 255 posix_sort = 0; 256 257 if (cflag && argc > optind+1) 258 errx(2, "too many input files for -c option"); 259 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 260 outpath = argv[argc-1]; 261 argc -= 2; 262 } 263 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 264 errx(2, "too many input files for -m option"); 265 266 for (i = optind; i < argc; i++) { 267 /* allow one occurrence of /dev/stdin */ 268 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 269 if (stdinflag) 270 warnx("ignoring extra \"%s\" in file list", 271 argv[i]); 272 else 273 stdinflag = 1; 274 275 /* change to /dev/stdin if '-' */ 276 if (argv[i][0] == '-') { 277 static char path_stdin[] = _PATH_STDIN; 278 argv[i] = path_stdin; 279 } 280 281 } else if ((ch = access(argv[i], R_OK))) 282 err(2, "%s", argv[i]); 283 } 284 285 if (fldtab[1].icol.num == 0) { 286 /* No sort key specified */ 287 if (fldtab[0].flags & (I|D|F|N|L)) { 288 /* Modified - generate a key that covers the line */ 289 fldtab[0].flags &= ~(BI|BT); 290 setfield("1", &fldtab[++fld_cnt], fldtab->flags); 291 fldreset(fldtab); 292 } else { 293 /* Unmodified, just compare the line */ 294 SINGL_FLD = 1; 295 fldtab[0].icol.num = 1; 296 } 297 } else { 298 fldreset(fldtab); 299 } 300 301 settables(); 302 303 if (optind == argc) { 304 static const char * const names[] = { _PATH_STDIN, NULL }; 305 filelist.names = names; 306 num_input_files = 1; 307 } else { 308 filelist.names = (const char * const *) &argv[optind]; 309 num_input_files = argc - optind; 310 } 311 312 if (cflag) { 313 order(&filelist, fldtab); 314 /* NOT REACHED */ 315 } 316 317 if (!outpath) { 318 toutpath[0] = '\0'; /* path not used in this case */ 319 outfile = outpath = toutpath; 320 outfp = stdout; 321 } else if (lstat(outpath, &st) == 0 322 && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { 323 /* output file exists and isn't character or block device */ 324 struct sigaction act; 325 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 326 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 327 int outfd; 328 errno = 0; 329 if (access(outpath, W_OK)) 330 err(2, "%s", outpath); 331 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 332 outpath); 333 if ((outfd = mkstemp(toutpath)) == -1) 334 err(2, "Cannot create temporary file `%s'", toutpath); 335 (void)atexit(cleanup); 336 act.sa_handler = onsignal; 337 (void) sigemptyset(&act.sa_mask); 338 act.sa_flags = SA_RESTART | SA_RESETHAND; 339 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 340 sigaction(sigtable[i], &act, 0); 341 outfile = toutpath; 342 if ((outfp = fdopen(outfd, "w")) == NULL) 343 err(2, "Cannot open temporary file `%s'", toutpath); 344 } else { 345 outfile = outpath; 346 347 if ((outfp = fopen(outfile, "w")) == NULL) 348 err(2, "output file %s", outfile); 349 } 350 351 if (mflag) 352 fmerge(&filelist, num_input_files, outfp, fldtab); 353 else 354 fsort(&filelist, num_input_files, outfp, fldtab); 355 356 if (outfile != outpath) { 357 if (access(outfile, F_OK)) 358 err(2, "%s", outfile); 359 360 /* 361 * Copy file permissions bits of the original file. 362 * st is initialized above, when we create the 363 * temporary spool file. 364 */ 365 if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { 366 err(2, "cannot chmod %s: output left in %s", 367 outpath, outfile); 368 } 369 370 (void)unlink(outpath); 371 if (link(outfile, outpath)) 372 err(2, "cannot link %s: output left in %s", 373 outpath, outfile); 374 (void)unlink(outfile); 375 toutpath[0] = 0; 376 } 377 exit(0); 378 } 379 380 static void 381 onsignal(int __attribute__ ((unused)) sig) 382 { 383 cleanup(); 384 } 385 386 static void 387 cleanup(void) 388 { 389 if (toutpath[0]) 390 (void)unlink(toutpath); 391 } 392 393 static void 394 usage(const char *msg) 395 { 396 if (msg != NULL) 397 (void)fprintf(stderr, "%s: %s\n", getprogname(), msg); 398 (void)fprintf(stderr, 399 "usage: %s [-bcdfHilmnrSsu] [-k field1[,field2]] [-o output]" 400 " [-R char] [-T dir]", getprogname()); 401 (void)fprintf(stderr, 402 " [-t char] [file ...]\n"); 403 exit(2); 404 } 405 406 RECHEADER * 407 allocrec(RECHEADER *rec, size_t size) 408 { 409 size_t alloc_size = size + sizeof(long) - 1; 410 RECHEADER *ach = realloc(rec, alloc_size); 411 if (ach == NULL) 412 err(1, "Cannot re-allocate %zu bytes", alloc_size); 413 return (ach); 414 } 415