1 /* $NetBSD: sort.c,v 1.61 2011/09/16 15:39:29 joerg Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 /* Sort sorts a file using an optional user-defined key. 65 * Sort uses radix sort for internal sorting, and allows 66 * a choice of merge sort and radix sort for external sorting. 67 */ 68 69 #include <util.h> 70 #include "sort.h" 71 #include "fsort.h" 72 #include "pathnames.h" 73 74 #ifndef lint 75 __COPYRIGHT("@(#) Copyright (c) 1993\ 76 The Regents of the University of California. All rights reserved."); 77 #endif /* not lint */ 78 79 __RCSID("$NetBSD: sort.c,v 1.61 2011/09/16 15:39:29 joerg Exp $"); 80 81 #include <sys/types.h> 82 #include <sys/time.h> 83 #include <sys/resource.h> 84 85 #include <paths.h> 86 #include <signal.h> 87 #include <stdlib.h> 88 #include <string.h> 89 #include <unistd.h> 90 #include <locale.h> 91 92 int REC_D = '\n'; 93 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 94 95 /* 96 * weight tables. Gweights is one of ascii, Rascii.. 97 * modified to weight rec_d = 0 (or 255) 98 */ 99 u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable }; 100 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 101 102 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 103 int REVERSE = 0; 104 int posix_sort; 105 106 unsigned int debug_flags = 0; 107 108 static char toutpath[MAXPATHLEN]; 109 110 const char *tmpdir; /* where temporary files should be put */ 111 112 static void cleanup(void); 113 static void onsignal(int); 114 __dead static void usage(const char *); 115 116 int 117 main(int argc, char *argv[]) 118 { 119 int ch, i, stdinflag = 0; 120 char cflag = 0, mflag = 0; 121 char *outfile, *outpath = 0; 122 struct field *fldtab; 123 size_t fldtab_sz, fld_cnt; 124 struct filelist filelist; 125 int num_input_files; 126 FILE *outfp = NULL; 127 struct rlimit rl; 128 struct stat st; 129 130 setlocale(LC_ALL, ""); 131 132 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 133 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 134 err(2, "getrlimit"); 135 rl.rlim_cur = rl.rlim_max; 136 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 137 err(2, "setrlimit"); 138 139 d_mask[REC_D = '\n'] = REC_D_F; 140 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 141 142 /* fldtab[0] is the global options. */ 143 fldtab_sz = 3; 144 fld_cnt = 0; 145 fldtab = emalloc(fldtab_sz * sizeof(*fldtab)); 146 memset(fldtab, 0, fldtab_sz * sizeof(*fldtab)); 147 148 #define SORT_OPTS "bcdD:fHik:lmno:rR:sSt:T:ux" 149 150 /* Convert "+field" args to -f format */ 151 fixit(&argc, argv, SORT_OPTS); 152 153 if (!(tmpdir = getenv("TMPDIR"))) 154 tmpdir = _PATH_TMP; 155 156 while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) { 157 switch (ch) { 158 case 'b': 159 fldtab[0].flags |= BI | BT; 160 break; 161 case 'c': 162 cflag = 1; 163 break; 164 case 'D': /* Debug flags */ 165 for (i = 0; optarg[i]; i++) 166 debug_flags |= 1 << (optarg[i] & 31); 167 break; 168 case 'd': case 'f': case 'i': case 'n': case 'l': 169 #if defined(__minix) 170 case 'x': 171 #endif /* defined(__minix) */ 172 fldtab[0].flags |= optval(ch, 0); 173 break; 174 case 'H': 175 /* -H was ; use merge sort for blocks of large files' */ 176 /* That is now the default. */ 177 break; 178 case 'k': 179 fldtab = erealloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab)); 180 memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0])); 181 fldtab_sz++; 182 183 setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags); 184 break; 185 case 'm': 186 mflag = 1; 187 break; 188 case 'o': 189 outpath = optarg; 190 break; 191 case 'r': 192 REVERSE = 1; 193 break; 194 case 's': 195 /* 196 * Nominally 'stable sort', keep lines with equal keys 197 * in input file order. (Default for NetBSD) 198 * (-s for GNU sort compatibility.) 199 */ 200 posix_sort = 0; 201 break; 202 case 'S': 203 /* 204 * Reverse of -s! 205 * This needs to enforce a POSIX sort where records 206 * with equal keys are then sorted by the raw data. 207 * Currently not implemented! 208 * (using libc radixsort() v sradixsort() doesn't 209 * have the desired effect.) 210 */ 211 posix_sort = 1; 212 break; 213 case 't': 214 if (SEP_FLAG) 215 usage("multiple field delimiters"); 216 SEP_FLAG = 1; 217 d_mask[' '] &= ~FLD_D; 218 d_mask['\t'] &= ~FLD_D; 219 d_mask[(u_char)*optarg] |= FLD_D; 220 if (d_mask[(u_char)*optarg] & REC_D_F) 221 errx(2, "record/field delimiter clash"); 222 break; 223 case 'R': 224 if (REC_D != '\n') 225 usage("multiple record delimiters"); 226 REC_D = *optarg; 227 if (REC_D == '\n') 228 break; 229 if (optarg[1] != '\0') { 230 char *ep; 231 int t = 0; 232 if (optarg[0] == '\\') 233 optarg++, t = 8; 234 REC_D = (int)strtol(optarg, &ep, t); 235 if (*ep != '\0' || REC_D < 0 || 236 REC_D >= (int)__arraycount(d_mask)) 237 errx(2, "invalid record delimiter %s", 238 optarg); 239 } 240 d_mask['\n'] = d_mask[' ']; 241 d_mask[REC_D] = REC_D_F; 242 break; 243 case 'T': 244 /* -T tmpdir */ 245 tmpdir = optarg; 246 break; 247 case 'u': 248 UNIQUE = 1; 249 break; 250 case '?': 251 default: 252 usage(NULL); 253 } 254 } 255 256 if (UNIQUE) 257 /* Don't sort on raw record if keys match */ 258 posix_sort = 0; 259 260 if (cflag && argc > optind+1) 261 errx(2, "too many input files for -c option"); 262 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 263 outpath = argv[argc-1]; 264 argc -= 2; 265 } 266 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 267 errx(2, "too many input files for -m option"); 268 269 for (i = optind; i < argc; i++) { 270 /* allow one occurrence of /dev/stdin */ 271 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 272 if (stdinflag) 273 warnx("ignoring extra \"%s\" in file list", 274 argv[i]); 275 else 276 stdinflag = 1; 277 278 /* change to /dev/stdin if '-' */ 279 if (argv[i][0] == '-') { 280 static char path_stdin[] = _PATH_STDIN; 281 argv[i] = path_stdin; 282 } 283 284 } else if ((ch = access(argv[i], R_OK))) 285 err(2, "%s", argv[i]); 286 } 287 288 if (fldtab[1].icol.num == 0) { 289 /* No sort key specified */ 290 if (fldtab[0].flags & (I|D|F|N|L)) { 291 /* Modified - generate a key that covers the line */ 292 fldtab[0].flags &= ~(BI|BT); 293 setfield("1", &fldtab[++fld_cnt], fldtab->flags); 294 fldreset(fldtab); 295 } else { 296 /* Unmodified, just compare the line */ 297 SINGL_FLD = 1; 298 fldtab[0].icol.num = 1; 299 } 300 } else { 301 fldreset(fldtab); 302 } 303 304 settables(); 305 306 if (optind == argc) { 307 static const char * const names[] = { _PATH_STDIN, NULL }; 308 filelist.names = names; 309 num_input_files = 1; 310 } else { 311 filelist.names = (const char * const *) &argv[optind]; 312 num_input_files = argc - optind; 313 } 314 315 if (cflag) { 316 order(&filelist, fldtab); 317 /* NOT REACHED */ 318 } 319 320 if (!outpath) { 321 toutpath[0] = '\0'; /* path not used in this case */ 322 outfile = outpath = toutpath; 323 outfp = stdout; 324 } else if (lstat(outpath, &st) == 0 325 && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { 326 /* output file exists and isn't character or block device */ 327 struct sigaction act; 328 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 329 #if defined(__minix) 330 SIGVTALRM, SIGPROF, 0}; 331 #else 332 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 333 #endif /* defined(__minix) */ 334 int outfd; 335 errno = 0; 336 if (access(outpath, W_OK)) 337 err(2, "%s", outpath); 338 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 339 outpath); 340 if ((outfd = mkstemp(toutpath)) == -1) 341 err(2, "Cannot create temporary file `%s'", toutpath); 342 (void)atexit(cleanup); 343 act.sa_handler = onsignal; 344 (void) sigemptyset(&act.sa_mask); 345 act.sa_flags = SA_RESTART | SA_RESETHAND; 346 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 347 sigaction(sigtable[i], &act, 0); 348 outfile = toutpath; 349 if ((outfp = fdopen(outfd, "w")) == NULL) 350 err(2, "Cannot open temporary file `%s'", toutpath); 351 } else { 352 outfile = outpath; 353 354 if ((outfp = fopen(outfile, "w")) == NULL) 355 err(2, "output file %s", outfile); 356 } 357 358 if (mflag) 359 fmerge(&filelist, num_input_files, outfp, fldtab); 360 else 361 fsort(&filelist, num_input_files, outfp, fldtab); 362 363 if (outfile != outpath) { 364 if (access(outfile, F_OK)) 365 err(2, "%s", outfile); 366 367 /* 368 * Copy file permissions bits of the original file. 369 * st is initialized above, when we create the 370 * temporary spool file. 371 */ 372 if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { 373 err(2, "cannot chmod %s: output left in %s", 374 outpath, outfile); 375 } 376 377 (void)unlink(outpath); 378 if (link(outfile, outpath)) 379 err(2, "cannot link %s: output left in %s", 380 outpath, outfile); 381 (void)unlink(outfile); 382 toutpath[0] = 0; 383 } 384 exit(0); 385 } 386 387 static void 388 onsignal(int sig) 389 { 390 cleanup(); 391 } 392 393 static void 394 cleanup(void) 395 { 396 if (toutpath[0]) 397 (void)unlink(toutpath); 398 } 399 400 static void 401 usage(const char *msg) 402 { 403 if (msg != NULL) 404 (void)fprintf(stderr, "%s: %s\n", getprogname(), msg); 405 (void)fprintf(stderr, 406 "usage: %s [-bcdfHilmnrSsu] [-k field1[,field2]] [-o output]" 407 " [-R char] [-T dir]", getprogname()); 408 (void)fprintf(stderr, 409 " [-t char] [file ...]\n"); 410 exit(2); 411 } 412 413 RECHEADER * 414 allocrec(RECHEADER *rec, size_t size) 415 { 416 417 return (erealloc(rec, size + sizeof(long) - 1)); 418 } 419