1 /* $NetBSD: cut.c,v 1.25 2008/07/21 14:19:22 lukem Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/cdefs.h> 36 #ifndef lint 37 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ 38 The Regents of the University of California. All rights reserved."); 39 #endif /* not lint */ 40 41 #ifndef lint 42 #if 0 43 static char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 44 #endif 45 __RCSID("$NetBSD: cut.c,v 1.25 2008/07/21 14:19:22 lukem Exp $"); 46 #endif /* not lint */ 47 48 #include <ctype.h> 49 #include <err.h> 50 #include <errno.h> 51 #include <limits.h> 52 #include <locale.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <unistd.h> 57 #include <util.h> 58 #include <wchar.h> 59 #include <sys/param.h> 60 61 static int bflag; 62 static int cflag; 63 static char dchar; 64 static int dflag; 65 static int fflag; 66 static int sflag; 67 68 static void b_cut(FILE *, const char *); 69 static void c_cut(FILE *, const char *); 70 static void f_cut(FILE *, const char *); 71 static void get_list(char *); 72 static void usage(void) __dead; 73 74 int 75 main(int argc, char *argv[]) 76 { 77 FILE *fp; 78 void (*fcn)(FILE *, const char *); 79 int ch; 80 81 fcn = NULL; 82 (void)setlocale(LC_ALL, ""); 83 84 dchar = '\t'; /* default delimiter is \t */ 85 86 /* Since we don't support multi-byte characters, the -c and -b 87 options are equivalent, and the -n option is meaningless. */ 88 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 89 switch(ch) { 90 case 'b': 91 fcn = b_cut; 92 get_list(optarg); 93 bflag = 1; 94 break; 95 case 'c': 96 fcn = c_cut; 97 get_list(optarg); 98 cflag = 1; 99 break; 100 case 'd': 101 dchar = *optarg; 102 dflag = 1; 103 break; 104 case 'f': 105 get_list(optarg); 106 fcn = f_cut; 107 fflag = 1; 108 break; 109 case 's': 110 sflag = 1; 111 break; 112 case 'n': 113 break; 114 case '?': 115 default: 116 usage(); 117 } 118 argc -= optind; 119 argv += optind; 120 121 if (fflag) { 122 if (cflag || bflag) 123 usage(); 124 } else if ((!cflag && !bflag) || dflag || sflag) 125 usage(); 126 else if (bflag && cflag) 127 usage(); 128 129 if (*argv) 130 for (; *argv; ++argv) { 131 if (strcmp(*argv, "-") == 0) 132 fcn(stdin, "stdin"); 133 else { 134 if ((fp = fopen(*argv, "r")) == NULL) 135 err(1, "%s", *argv); 136 fcn(fp, *argv); 137 (void)fclose(fp); 138 } 139 } 140 else 141 fcn(stdin, "stdin"); 142 return 0; 143 } 144 145 static size_t autostart, autostop, maxval; 146 147 static char *positions = NULL; 148 static size_t numpositions = 0; 149 #define ALLOC_CHUNK _POSIX2_LINE_MAX /* malloc granularity */ 150 151 static void 152 get_list(char *list) 153 { 154 size_t setautostart, start, stop; 155 char *pos; 156 char *p; 157 158 if (positions == NULL) { 159 numpositions = ALLOC_CHUNK; 160 positions = ecalloc(numpositions, sizeof(*positions)); 161 } 162 163 /* 164 * set a byte in the positions array to indicate if a field or 165 * column is to be selected; use +1, it's 1-based, not 0-based. 166 * This parser is less restrictive than the Draft 9 POSIX spec. 167 * POSIX doesn't allow lists that aren't in increasing order or 168 * overlapping lists. We also handle "-3-5" although there's no 169 * real reason too. 170 */ 171 for (; (p = strtok(list, ", \t")) != NULL; list = NULL) { 172 setautostart = start = stop = 0; 173 if (*p == '-') { 174 ++p; 175 setautostart = 1; 176 } 177 if (isdigit((unsigned char)*p)) { 178 start = stop = strtol(p, &p, 10); 179 if (setautostart && start > autostart) 180 autostart = start; 181 } 182 if (*p == '-') { 183 if (isdigit((unsigned char)p[1])) 184 stop = strtol(p + 1, &p, 10); 185 if (*p == '-') { 186 ++p; 187 if (!autostop || autostop > stop) 188 autostop = stop; 189 } 190 } 191 if (*p) 192 errx(1, "[-cf] list: illegal list value"); 193 if (!stop || !start) 194 errx(1, "[-cf] list: values may not include zero"); 195 if (stop + 1 > numpositions) { 196 size_t newsize; 197 newsize = roundup(stop + 1, ALLOC_CHUNK); 198 positions = erealloc(positions, newsize); 199 (void)memset(positions + numpositions, 0, 200 newsize - numpositions); 201 numpositions = newsize; 202 } 203 if (maxval < stop) 204 maxval = stop; 205 for (pos = positions + start; start++ <= stop; pos++) 206 *pos = 1; 207 } 208 209 /* overlapping ranges */ 210 if (autostop && maxval > autostop) 211 maxval = autostop; 212 213 /* set autostart */ 214 if (autostart) 215 (void)memset(positions + 1, '1', autostart); 216 } 217 218 static void 219 /*ARGSUSED*/ 220 f_cut(FILE *fp, const char *fname __unused) 221 { 222 int ch, field, isdelim; 223 char *pos, *p, sep; 224 int output; 225 size_t len; 226 char *lbuf, *tbuf; 227 228 for (sep = dchar, tbuf = NULL; (lbuf = fgetln(fp, &len)) != NULL;) { 229 output = 0; 230 if (lbuf[len - 1] != '\n') { 231 /* no newline at the end of the last line so add one */ 232 if ((tbuf = (char *)malloc(len + 1)) == NULL) 233 err(1, NULL); 234 (void)memcpy(tbuf, lbuf, len); 235 tbuf[len++] = '\n'; 236 lbuf = tbuf; 237 } 238 for (isdelim = 0, p = lbuf;; ++p) { 239 ch = *p; 240 /* this should work if newline is delimiter */ 241 if (ch == sep) 242 isdelim = 1; 243 if (ch == '\n') { 244 if (!isdelim && !sflag) 245 (void)fwrite(lbuf, len, 1, stdout); 246 break; 247 } 248 } 249 if (!isdelim) 250 continue; 251 252 pos = positions + 1; 253 for (field = maxval, p = lbuf; field; --field, ++pos) { 254 if (*pos) { 255 if (output++) 256 (void)putchar(sep); 257 while ((ch = *p++) != '\n' && ch != sep) 258 (void)putchar(ch); 259 } else { 260 while ((ch = *p++) != '\n' && ch != sep) 261 continue; 262 } 263 if (ch == '\n') 264 break; 265 } 266 if (ch != '\n') { 267 if (autostop) { 268 if (output) 269 (void)putchar(sep); 270 for (; (ch = *p) != '\n'; ++p) 271 (void)putchar(ch); 272 } else 273 for (; (ch = *p) != '\n'; ++p); 274 } 275 (void)putchar('\n'); 276 if (tbuf) { 277 free(tbuf); 278 tbuf = NULL; 279 } 280 } 281 if (tbuf) 282 free(tbuf); 283 } 284 285 static void 286 usage(void) 287 { 288 (void)fprintf(stderr, "Usage:\tcut -b list [-n] [file ...]\n" 289 "\tcut -c list [file1 ...]\n" 290 "\tcut -f list [-d delim] [-s] [file ...]\n"); 291 exit(1); 292 } 293 294 /* make b_put(): */ 295 #define CUT_BYTE 1 296 #include "x_cut.c" 297 #undef CUT_BYTE 298 299 /* make c_put(): */ 300 #define CUT_BYTE 0 301 #include "x_cut.c" 302 #undef CUT_BYTE 303