1 /* $NetBSD: sort.c,v 1.59 2010/06/05 17:44:51 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 /* Sort sorts a file using an optional user-defined key. 65 * Sort uses radix sort for internal sorting, and allows 66 * a choice of merge sort and radix sort for external sorting. 67 */ 68 69 #include <sys/stat.h> 70 #include "sort.h" 71 #include "fsort.h" 72 #include "pathnames.h" 73 74 #ifndef lint 75 __COPYRIGHT("@(#) Copyright (c) 1993\ 76 The Regents of the University of California. All rights reserved."); 77 #endif /* not lint */ 78 79 __RCSID("$NetBSD: sort.c,v 1.59 2010/06/05 17:44:51 dholland Exp $"); 80 81 #include <sys/types.h> 82 #include <sys/time.h> 83 #include <sys/resource.h> 84 85 #include <paths.h> 86 #include <signal.h> 87 #include <stdlib.h> 88 #include <string.h> 89 #include <unistd.h> 90 #include <locale.h> 91 92 int REC_D = '\n'; 93 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 94 95 /* 96 * weight tables. Gweights is one of ascii, Rascii.. 97 * modified to weight rec_d = 0 (or 255) 98 */ 99 u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable }; 100 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 101 102 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 103 int REVERSE = 0; 104 int posix_sort; 105 106 unsigned int debug_flags = 0; 107 108 static char toutpath[MAXPATHLEN]; 109 110 const char *tmpdir; /* where temporary files should be put */ 111 112 static void cleanup(void); 113 static void onsignal(int); 114 static void usage(const char *); 115 116 int main(int argc, char **argv); 117 118 int 119 main(int argc, char *argv[]) 120 { 121 int ch, i, stdinflag = 0; 122 char cflag = 0, mflag = 0; 123 char *outfile, *outpath = 0; 124 struct field *fldtab; 125 size_t fldtab_sz, fld_cnt; 126 size_t alloc_size; 127 struct filelist filelist; 128 int num_input_files; 129 FILE *outfp = NULL; 130 struct rlimit rl; 131 struct stat st; 132 133 setlocale(LC_ALL, ""); 134 135 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 136 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 137 err(2, "getrlimit"); 138 rl.rlim_cur = rl.rlim_max; 139 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 140 err(2, "setrlimit"); 141 142 d_mask[REC_D = '\n'] = REC_D_F; 143 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 144 145 /* fldtab[0] is the global options. */ 146 fldtab_sz = 3; 147 fld_cnt = 0; 148 alloc_size = fldtab_sz * sizeof(*fldtab); 149 fldtab = malloc(alloc_size); 150 if (fldtab == NULL) 151 err(1, "Cannot allocate %zu bytes", alloc_size); 152 memset(fldtab, 0, alloc_size); 153 154 #define SORT_OPTS "bcdD:fHik:lmno:rR:sSt:T:ux" 155 156 /* Convert "+field" args to -f format */ 157 fixit(&argc, argv, SORT_OPTS); 158 159 if (!(tmpdir = getenv("TMPDIR"))) 160 tmpdir = _PATH_TMP; 161 162 while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) { 163 switch (ch) { 164 case 'b': 165 fldtab[0].flags |= BI | BT; 166 break; 167 case 'c': 168 cflag = 1; 169 break; 170 case 'D': /* Debug flags */ 171 for (i = 0; optarg[i]; i++) 172 debug_flags |= 1 << (optarg[i] & 31); 173 break; 174 case 'd': case 'f': case 'i': case 'n': case 'l': 175 fldtab[0].flags |= optval(ch, 0); 176 break; 177 case 'H': 178 /* -H was ; use merge sort for blocks of large files' */ 179 /* That is now the default. */ 180 break; 181 case 'k': 182 alloc_size = (fldtab_sz + 1) * sizeof(*fldtab); 183 fldtab = realloc(fldtab, alloc_size); 184 if (fldtab == NULL) 185 err(1, "Cannot re-allocate %zu bytes", alloc_size); 186 memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0])); 187 fldtab_sz++; 188 189 setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags); 190 break; 191 case 'm': 192 mflag = 1; 193 break; 194 case 'o': 195 outpath = optarg; 196 break; 197 case 'r': 198 REVERSE = 1; 199 break; 200 case 's': 201 /* 202 * Nominally 'stable sort', keep lines with equal keys 203 * in input file order. (Default for NetBSD) 204 * (-s for GNU sort compatibility.) 205 */ 206 posix_sort = 0; 207 break; 208 case 'S': 209 /* 210 * Reverse of -s! 211 * This needs to enforce a POSIX sort where records 212 * with equal keys are then sorted by the raw data. 213 * Currently not implemented! 214 * (using libc radixsort() v sradixsort() doesn't 215 * have the desired effect.) 216 */ 217 posix_sort = 1; 218 break; 219 case 't': 220 if (SEP_FLAG) 221 usage("multiple field delimiters"); 222 SEP_FLAG = 1; 223 d_mask[' '] &= ~FLD_D; 224 d_mask['\t'] &= ~FLD_D; 225 d_mask[(u_char)*optarg] |= FLD_D; 226 if (d_mask[(u_char)*optarg] & REC_D_F) 227 errx(2, "record/field delimiter clash"); 228 break; 229 case 'R': 230 if (REC_D != '\n') 231 usage("multiple record delimiters"); 232 REC_D = *optarg; 233 if (REC_D == '\n') 234 break; 235 if (optarg[1] != '\0') { 236 char *ep; 237 int t = 0; 238 if (optarg[0] == '\\') 239 optarg++, t = 8; 240 REC_D = (int)strtol(optarg, &ep, t); 241 if (*ep != '\0' || REC_D < 0 || 242 REC_D >= (int)__arraycount(d_mask)) 243 errx(2, "invalid record delimiter %s", 244 optarg); 245 } 246 d_mask['\n'] = d_mask[' ']; 247 d_mask[REC_D] = REC_D_F; 248 break; 249 case 'T': 250 /* -T tmpdir */ 251 tmpdir = optarg; 252 break; 253 case 'u': 254 UNIQUE = 1; 255 break; 256 case '?': 257 default: 258 usage(NULL); 259 } 260 } 261 262 if (UNIQUE) 263 /* Don't sort on raw record if keys match */ 264 posix_sort = 0; 265 266 if (cflag && argc > optind+1) 267 errx(2, "too many input files for -c option"); 268 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 269 outpath = argv[argc-1]; 270 argc -= 2; 271 } 272 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 273 errx(2, "too many input files for -m option"); 274 275 for (i = optind; i < argc; i++) { 276 /* allow one occurrence of /dev/stdin */ 277 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 278 if (stdinflag) 279 warnx("ignoring extra \"%s\" in file list", 280 argv[i]); 281 else 282 stdinflag = 1; 283 284 /* change to /dev/stdin if '-' */ 285 if (argv[i][0] == '-') { 286 static char path_stdin[] = _PATH_STDIN; 287 argv[i] = path_stdin; 288 } 289 290 } else if ((ch = access(argv[i], R_OK))) 291 err(2, "%s", argv[i]); 292 } 293 294 if (fldtab[1].icol.num == 0) { 295 /* No sort key specified */ 296 if (fldtab[0].flags & (I|D|F|N|L)) { 297 /* Modified - generate a key that covers the line */ 298 fldtab[0].flags &= ~(BI|BT); 299 setfield("1", &fldtab[++fld_cnt], fldtab->flags); 300 fldreset(fldtab); 301 } else { 302 /* Unmodified, just compare the line */ 303 SINGL_FLD = 1; 304 fldtab[0].icol.num = 1; 305 } 306 } else { 307 fldreset(fldtab); 308 } 309 310 settables(); 311 312 if (optind == argc) { 313 static const char * const names[] = { _PATH_STDIN, NULL }; 314 filelist.names = names; 315 num_input_files = 1; 316 } else { 317 filelist.names = (const char * const *) &argv[optind]; 318 num_input_files = argc - optind; 319 } 320 321 if (cflag) { 322 order(&filelist, fldtab); 323 /* NOT REACHED */ 324 } 325 326 if (!outpath) { 327 toutpath[0] = '\0'; /* path not used in this case */ 328 outfile = outpath = toutpath; 329 outfp = stdout; 330 } else if (lstat(outpath, &st) == 0 331 && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { 332 /* output file exists and isn't character or block device */ 333 struct sigaction act; 334 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 335 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 336 int outfd; 337 errno = 0; 338 if (access(outpath, W_OK)) 339 err(2, "%s", outpath); 340 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 341 outpath); 342 if ((outfd = mkstemp(toutpath)) == -1) 343 err(2, "Cannot create temporary file `%s'", toutpath); 344 (void)atexit(cleanup); 345 act.sa_handler = onsignal; 346 (void) sigemptyset(&act.sa_mask); 347 act.sa_flags = SA_RESTART | SA_RESETHAND; 348 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 349 sigaction(sigtable[i], &act, 0); 350 outfile = toutpath; 351 if ((outfp = fdopen(outfd, "w")) == NULL) 352 err(2, "Cannot open temporary file `%s'", toutpath); 353 } else { 354 outfile = outpath; 355 356 if ((outfp = fopen(outfile, "w")) == NULL) 357 err(2, "output file %s", outfile); 358 } 359 360 if (mflag) 361 fmerge(&filelist, num_input_files, outfp, fldtab); 362 else 363 fsort(&filelist, num_input_files, outfp, fldtab); 364 365 if (outfile != outpath) { 366 if (access(outfile, F_OK)) 367 err(2, "%s", outfile); 368 369 /* 370 * Copy file permissions bits of the original file. 371 * st is initialized above, when we create the 372 * temporary spool file. 373 */ 374 if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { 375 err(2, "cannot chmod %s: output left in %s", 376 outpath, outfile); 377 } 378 379 (void)unlink(outpath); 380 if (link(outfile, outpath)) 381 err(2, "cannot link %s: output left in %s", 382 outpath, outfile); 383 (void)unlink(outfile); 384 toutpath[0] = 0; 385 } 386 exit(0); 387 } 388 389 static void 390 onsignal(int __attribute__ ((unused)) sig) 391 { 392 cleanup(); 393 } 394 395 static void 396 cleanup(void) 397 { 398 if (toutpath[0]) 399 (void)unlink(toutpath); 400 } 401 402 static void 403 usage(const char *msg) 404 { 405 if (msg != NULL) 406 (void)fprintf(stderr, "%s: %s\n", getprogname(), msg); 407 (void)fprintf(stderr, 408 "usage: %s [-bcdfHilmnrSsu] [-k field1[,field2]] [-o output]" 409 " [-R char] [-T dir]", getprogname()); 410 (void)fprintf(stderr, 411 " [-t char] [file ...]\n"); 412 exit(2); 413 } 414 415 RECHEADER * 416 allocrec(RECHEADER *rec, size_t size) 417 { 418 size_t alloc_size = size + sizeof(long) - 1; 419 RECHEADER *ach = realloc(rec, alloc_size); 420 if (ach == NULL) 421 err(1, "Cannot re-allocate %zu bytes", alloc_size); 422 return (ach); 423 } 424