1 /* $NetBSD: sort.c,v 1.59 2010/06/05 17:44:51 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 /* Sort sorts a file using an optional user-defined key. 65 * Sort uses radix sort for internal sorting, and allows 66 * a choice of merge sort and radix sort for external sorting. 67 */ 68 69 #include <sys/stat.h> 70 #include "sort.h" 71 #include "fsort.h" 72 #include "pathnames.h" 73 74 #include <sys/types.h> 75 #include <sys/time.h> 76 #include <sys/resource.h> 77 78 #include <paths.h> 79 #include <signal.h> 80 #include <stdlib.h> 81 #include <string.h> 82 #include <unistd.h> 83 #include <locale.h> 84 85 int REC_D = '\n'; 86 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 87 88 /* 89 * weight tables. Gweights is one of ascii, Rascii.. 90 * modified to weight rec_d = 0 (or 255) 91 */ 92 u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable }; 93 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 94 95 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 96 int REVERSE = 0; 97 int posix_sort; 98 99 unsigned int debug_flags = 0; 100 101 static char toutpath[MAXPATHLEN]; 102 103 const char *tmpdir; /* where temporary files should be put */ 104 105 static void cleanup(void); 106 static void onsignal(int); 107 static void usage(const char *); 108 109 int main(int argc, char **argv); 110 111 int 112 main(int argc, char *argv[]) 113 { 114 int ch, i, stdinflag = 0; 115 char cflag = 0, mflag = 0; 116 char *outfile, *outpath = NULL; 117 struct field *fldtab; 118 size_t fldtab_sz, fld_cnt; 119 size_t alloc_size; 120 struct filelist filelist; 121 int num_input_files; 122 FILE *outfp = NULL; 123 struct rlimit rl; 124 struct stat st; 125 126 setlocale(LC_ALL, ""); 127 128 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 129 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 130 err(2, "getrlimit"); 131 rl.rlim_cur = rl.rlim_max; 132 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 133 err(2, "setrlimit"); 134 135 d_mask[REC_D = '\n'] = REC_D_F; 136 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 137 138 /* fldtab[0] is the global options. */ 139 fldtab_sz = 3; 140 fld_cnt = 0; 141 alloc_size = fldtab_sz * sizeof(*fldtab); 142 fldtab = malloc(alloc_size); 143 if (fldtab == NULL) 144 err(1, "Cannot allocate %zu bytes", alloc_size); 145 memset(fldtab, 0, alloc_size); 146 147 #define SORT_OPTS "bcdD:fHik:lmno:rR:sSt:T:ux" 148 149 /* Convert "+field" args to -f format */ 150 fixit(&argc, argv, SORT_OPTS); 151 152 if (!(tmpdir = getenv("TMPDIR"))) 153 tmpdir = _PATH_TMP; 154 155 while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) { 156 switch (ch) { 157 case 'b': 158 fldtab[0].flags |= BI | BT; 159 break; 160 case 'c': 161 cflag = 1; 162 break; 163 case 'D': /* Debug flags */ 164 for (i = 0; optarg[i]; i++) 165 debug_flags |= 1 << (optarg[i] & 31); 166 break; 167 case 'd': case 'f': case 'i': case 'n': case 'l': 168 fldtab[0].flags |= optval(ch, 0); 169 break; 170 case 'H': 171 /* -H was ; use merge sort for blocks of large files' */ 172 /* That is now the default. */ 173 break; 174 case 'k': 175 alloc_size = (fldtab_sz + 1) * sizeof(*fldtab); 176 fldtab = realloc(fldtab, alloc_size); 177 if (fldtab == NULL) 178 err(1, "Cannot re-allocate %zu bytes", alloc_size); 179 memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0])); 180 fldtab_sz++; 181 182 setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags); 183 break; 184 case 'm': 185 mflag = 1; 186 break; 187 case 'o': 188 outpath = optarg; 189 break; 190 case 'r': 191 REVERSE = 1; 192 break; 193 case 's': 194 /* 195 * Nominally 'stable sort', keep lines with equal keys 196 * in input file order. (Default for NetBSD) 197 * (-s for GNU sort compatibility.) 198 */ 199 posix_sort = 0; 200 break; 201 case 'S': 202 /* 203 * Reverse of -s! 204 * This needs to enforce a POSIX sort where records 205 * with equal keys are then sorted by the raw data. 206 * Currently not implemented! 207 * (using libc radixsort() v sradixsort() doesn't 208 * have the desired effect.) 209 */ 210 posix_sort = 1; 211 break; 212 case 't': 213 if (SEP_FLAG) 214 usage("multiple field delimiters"); 215 SEP_FLAG = 1; 216 d_mask[' '] &= ~FLD_D; 217 d_mask['\t'] &= ~FLD_D; 218 d_mask[(u_char)*optarg] |= FLD_D; 219 if (d_mask[(u_char)*optarg] & REC_D_F) 220 errx(2, "record/field delimiter clash"); 221 break; 222 case 'R': 223 if (REC_D != '\n') 224 usage("multiple record delimiters"); 225 REC_D = *optarg; 226 if (REC_D == '\n') 227 break; 228 if (optarg[1] != '\0') { 229 char *ep; 230 int t = 0; 231 if (optarg[0] == '\\') 232 optarg++, t = 8; 233 REC_D = (int)strtol(optarg, &ep, t); 234 if (*ep != '\0' || REC_D < 0 || 235 REC_D >= (int)__arraycount(d_mask)) 236 errx(2, "invalid record delimiter %s", 237 optarg); 238 } 239 d_mask['\n'] = d_mask[' ']; 240 d_mask[REC_D] = REC_D_F; 241 break; 242 case 'T': 243 /* -T tmpdir */ 244 tmpdir = optarg; 245 break; 246 case 'u': 247 UNIQUE = 1; 248 break; 249 case '?': 250 default: 251 usage(NULL); 252 } 253 } 254 255 if (UNIQUE) 256 /* Don't sort on raw record if keys match */ 257 posix_sort = 0; 258 259 if (cflag && argc > optind+1) 260 errx(2, "too many input files for -c option"); 261 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 262 outpath = argv[argc-1]; 263 argc -= 2; 264 } 265 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 266 errx(2, "too many input files for -m option"); 267 268 for (i = optind; i < argc; i++) { 269 /* allow one occurrence of /dev/stdin */ 270 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 271 if (stdinflag) 272 warnx("ignoring extra \"%s\" in file list", 273 argv[i]); 274 else 275 stdinflag = 1; 276 277 /* change to /dev/stdin if '-' */ 278 if (argv[i][0] == '-') { 279 static char path_stdin[] = _PATH_STDIN; 280 argv[i] = path_stdin; 281 } 282 283 } else if ((ch = access(argv[i], R_OK))) 284 err(2, "%s", argv[i]); 285 } 286 287 if (fldtab[1].icol.num == 0) { 288 /* No sort key specified */ 289 if (fldtab[0].flags & (I|D|F|N|L)) { 290 /* Modified - generate a key that covers the line */ 291 fldtab[0].flags &= ~(BI|BT); 292 setfield("1", &fldtab[++fld_cnt], fldtab->flags); 293 fldreset(fldtab); 294 } else { 295 /* Unmodified, just compare the line */ 296 SINGL_FLD = 1; 297 fldtab[0].icol.num = 1; 298 } 299 } else { 300 fldreset(fldtab); 301 } 302 303 settables(); 304 305 if (optind == argc) { 306 static const char * const names[] = { _PATH_STDIN, NULL }; 307 filelist.names = names; 308 num_input_files = 1; 309 } else { 310 filelist.names = (const char * const *) &argv[optind]; 311 num_input_files = argc - optind; 312 } 313 314 if (cflag) { 315 order(&filelist, fldtab); 316 /* NOT REACHED */ 317 } 318 319 if (!outpath) { 320 toutpath[0] = '\0'; /* path not used in this case */ 321 outfile = outpath = toutpath; 322 outfp = stdout; 323 } else if (lstat(outpath, &st) == 0 324 && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { 325 /* output file exists and isn't character or block device */ 326 struct sigaction act; 327 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 328 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 329 int outfd; 330 errno = 0; 331 if (access(outpath, W_OK)) 332 err(2, "%s", outpath); 333 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 334 outpath); 335 if ((outfd = mkstemp(toutpath)) == -1) 336 err(2, "Cannot create temporary file `%s'", toutpath); 337 (void)atexit(cleanup); 338 act.sa_handler = onsignal; 339 (void) sigemptyset(&act.sa_mask); 340 act.sa_flags = SA_RESTART | SA_RESETHAND; 341 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 342 sigaction(sigtable[i], &act, 0); 343 outfile = toutpath; 344 if ((outfp = fdopen(outfd, "w")) == NULL) 345 err(2, "Cannot open temporary file `%s'", toutpath); 346 } else { 347 outfile = outpath; 348 349 if ((outfp = fopen(outfile, "w")) == NULL) 350 err(2, "output file %s", outfile); 351 } 352 353 if (mflag) 354 fmerge(&filelist, num_input_files, outfp, fldtab); 355 else 356 fsort(&filelist, num_input_files, outfp, fldtab); 357 358 if (outfile != outpath) { 359 if (access(outfile, F_OK)) 360 err(2, "%s", outfile); 361 362 /* 363 * Copy file permissions bits of the original file. 364 * st is initialized above, when we create the 365 * temporary spool file. 366 */ 367 if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { 368 err(2, "cannot chmod %s: output left in %s", 369 outpath, outfile); 370 } 371 372 (void)unlink(outpath); 373 if (link(outfile, outpath)) 374 err(2, "cannot link %s: output left in %s", 375 outpath, outfile); 376 (void)unlink(outfile); 377 toutpath[0] = 0; 378 } 379 exit(0); 380 } 381 382 static void 383 onsignal(int __attribute__ ((unused)) sig) 384 { 385 cleanup(); 386 } 387 388 static void 389 cleanup(void) 390 { 391 if (toutpath[0]) 392 (void)unlink(toutpath); 393 } 394 395 static void 396 usage(const char *msg) 397 { 398 if (msg != NULL) 399 (void)fprintf(stderr, "%s: %s\n", getprogname(), msg); 400 (void)fprintf(stderr, 401 "usage: %s [-bcdfHilmnrSsu] [-k field1[,field2]] [-o output]" 402 " [-R char] [-T dir]", getprogname()); 403 (void)fprintf(stderr, 404 " [-t char] [file ...]\n"); 405 exit(2); 406 } 407 408 RECHEADER * 409 allocrec(RECHEADER *rec, size_t size) 410 { 411 size_t alloc_size = size + sizeof(long) - 1; 412 RECHEADER *ach = realloc(rec, alloc_size); 413 if (ach == NULL) 414 err(1, "Cannot re-allocate %zu bytes", alloc_size); 415 return (ach); 416 } 417