1 /* $OpenBSD: cut.c,v 1.26 2019/02/07 19:11:23 tobias Exp $ */ 2 /* $NetBSD: cut.c,v 1.9 1995/09/02 05:59:23 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <assert.h> 37 #include <ctype.h> 38 #include <err.h> 39 #include <errno.h> 40 #include <limits.h> 41 #include <locale.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <unistd.h> 46 47 char dchar[5]; 48 int dlen; 49 50 int bflag; 51 int cflag; 52 int dflag; 53 int fflag; 54 int nflag; 55 int sflag; 56 57 void b_cut(FILE *, char *); 58 void c_cut(FILE *, char *); 59 void f_cut(FILE *, char *); 60 void get_list(char *); 61 void usage(void); 62 63 int 64 main(int argc, char *argv[]) 65 { 66 FILE *fp; 67 void (*fcn)(FILE *, char *); 68 int ch, rval; 69 70 setlocale(LC_CTYPE, ""); 71 72 if (pledge("stdio rpath", NULL) == -1) 73 err(1, "pledge"); 74 75 dchar[0] = '\t'; /* default delimiter */ 76 dchar[1] = '\0'; 77 dlen = 1; 78 79 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 80 switch(ch) { 81 case 'b': 82 get_list(optarg); 83 bflag = 1; 84 break; 85 case 'c': 86 get_list(optarg); 87 cflag = 1; 88 break; 89 case 'd': 90 if ((dlen = mblen(optarg, MB_CUR_MAX)) == -1) 91 usage(); 92 assert(dlen < sizeof(dchar)); 93 (void)memcpy(dchar, optarg, dlen); 94 dchar[dlen] = '\0'; 95 dflag = 1; 96 break; 97 case 'f': 98 get_list(optarg); 99 fflag = 1; 100 break; 101 case 'n': 102 nflag = 1; 103 break; 104 case 's': 105 sflag = 1; 106 break; 107 case '?': 108 default: 109 usage(); 110 } 111 argc -= optind; 112 argv += optind; 113 114 if (bflag + cflag + fflag != 1 || 115 (nflag && !bflag) || 116 ((dflag || sflag) && !fflag)) 117 usage(); 118 119 if (MB_CUR_MAX == 1) { 120 nflag = 0; 121 if (cflag) { 122 bflag = 1; 123 cflag = 0; 124 } 125 } 126 127 fcn = fflag ? f_cut : (cflag || nflag) ? c_cut : b_cut; 128 129 rval = 0; 130 if (*argv) 131 for (; *argv; ++argv) { 132 if (strcmp(*argv, "-") == 0) 133 fcn(stdin, "stdin"); 134 else { 135 if ((fp = fopen(*argv, "r"))) { 136 fcn(fp, *argv); 137 (void)fclose(fp); 138 } else { 139 rval = 1; 140 warn("%s", *argv); 141 } 142 } 143 } 144 else { 145 if (pledge("stdio", NULL) == -1) 146 err(1, "pledge"); 147 148 fcn(stdin, "stdin"); 149 } 150 exit(rval); 151 } 152 153 int autostart, autostop, maxval; 154 155 char positions[_POSIX2_LINE_MAX + 1]; 156 157 int 158 read_number(char **p) 159 { 160 int dash, n; 161 const char *errstr; 162 char *q; 163 164 q = *p + strcspn(*p, "-"); 165 dash = *q == '-'; 166 *q = '\0'; 167 n = strtonum(*p, 1, _POSIX2_LINE_MAX, &errstr); 168 if (errstr != NULL) 169 errx(1, "[-bcf] list: %s %s (allowed 1-%d)", *p, errstr, 170 _POSIX2_LINE_MAX); 171 if (dash) 172 *q = '-'; 173 *p = q; 174 175 return n; 176 } 177 178 void 179 get_list(char *list) 180 { 181 int setautostart, start, stop; 182 char *p; 183 184 /* 185 * set a byte in the positions array to indicate if a field or 186 * column is to be selected; use +1, it's 1-based, not 0-based. 187 * This parser is less restrictive than the Draft 9 POSIX spec. 188 * POSIX doesn't allow lists that aren't in increasing order or 189 * overlapping lists. We also handle "-3-5" although there's no 190 * real reason too. 191 */ 192 while ((p = strsep(&list, ", \t"))) { 193 setautostart = start = stop = 0; 194 if (*p == '-') { 195 ++p; 196 setautostart = 1; 197 } 198 if (isdigit((unsigned char)*p)) { 199 start = stop = read_number(&p); 200 if (setautostart && start > autostart) 201 autostart = start; 202 } 203 if (*p == '-') { 204 if (isdigit((unsigned char)p[1])) { 205 ++p; 206 stop = read_number(&p); 207 } 208 if (*p == '-') { 209 ++p; 210 if (!autostop || autostop > stop) 211 autostop = stop; 212 } 213 } 214 if (*p != '\0' || !stop || !start) 215 errx(1, "[-bcf] list: illegal list value"); 216 if (maxval < stop) 217 maxval = stop; 218 if (start <= stop) 219 memset(positions + start, 1, stop - start + 1); 220 } 221 222 /* overlapping ranges */ 223 if (autostop && maxval > autostop) 224 maxval = autostop; 225 226 /* set autostart */ 227 if (autostart) 228 memset(positions + 1, '1', autostart); 229 } 230 231 /* ARGSUSED */ 232 void 233 b_cut(FILE *fp, char *fname) 234 { 235 int ch, col; 236 char *pos; 237 238 for (;;) { 239 pos = positions + 1; 240 for (col = maxval; col; --col) { 241 if ((ch = getc(fp)) == EOF) 242 return; 243 if (ch == '\n') 244 break; 245 if (*pos++) 246 (void)putchar(ch); 247 } 248 if (ch != '\n') { 249 if (autostop) 250 while ((ch = getc(fp)) != EOF && ch != '\n') 251 (void)putchar(ch); 252 else 253 while ((ch = getc(fp)) != EOF && ch != '\n') 254 ; 255 } 256 (void)putchar('\n'); 257 } 258 } 259 260 void 261 c_cut(FILE *fp, char *fname) 262 { 263 static char *line = NULL; 264 static size_t linesz = 0; 265 ssize_t linelen; 266 char *cp, *pos, *maxpos; 267 int len; 268 269 while ((linelen = getline(&line, &linesz, fp)) != -1) { 270 if (line[linelen - 1] == '\n') 271 line[linelen - 1] = '\0'; 272 273 cp = line; 274 pos = positions + 1; 275 maxpos = pos + maxval; 276 while(pos < maxpos && *cp != '\0') { 277 len = mblen(cp, MB_CUR_MAX); 278 if (len == -1) 279 len = 1; 280 pos += nflag ? len : 1; 281 if (pos[-1] == '\0') 282 cp += len; 283 else 284 while (len--) 285 putchar(*cp++); 286 } 287 if (autostop) 288 puts(cp); 289 else 290 putchar('\n'); 291 } 292 } 293 294 void 295 f_cut(FILE *fp, char *fname) 296 { 297 static char *line = NULL; 298 static size_t linesz = 0; 299 ssize_t linelen; 300 char *sp, *ep, *pos, *maxpos; 301 int output; 302 303 while ((linelen = getline(&line, &linesz, fp)) != -1) { 304 if (line[linelen - 1] == '\n') 305 line[linelen - 1] = '\0'; 306 307 if ((ep = strstr(line, dchar)) == NULL) { 308 if (!sflag) 309 puts(line); 310 continue; 311 } 312 313 pos = positions + 1; 314 maxpos = pos + maxval; 315 output = 0; 316 sp = line; 317 for (;;) { 318 if (*pos++) { 319 if (output) 320 fputs(dchar, stdout); 321 while (sp < ep) 322 putchar(*sp++); 323 output = 1; 324 } else 325 sp = ep; 326 if (*sp == '\0' || pos == maxpos) 327 break; 328 sp += dlen; 329 if ((ep = strstr(sp, dchar)) == NULL) 330 ep = strchr(sp, '\0'); 331 } 332 if (autostop) 333 puts(sp); 334 else 335 putchar('\n'); 336 } 337 } 338 339 void 340 usage(void) 341 { 342 (void)fprintf(stderr, 343 "usage: cut -b list [-n] [file ...]\n" 344 " cut -c list [file ...]\n" 345 " cut -f list [-s] [-d delim] [file ...]\n"); 346 exit(1); 347 } 348