1 /* $OpenBSD: cut.c,v 1.28 2023/03/08 04:43:10 guenther Exp $ */ 2 /* $NetBSD: cut.c,v 1.9 1995/09/02 05:59:23 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <assert.h> 37 #include <ctype.h> 38 #include <err.h> 39 #include <errno.h> 40 #include <limits.h> 41 #include <locale.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <unistd.h> 46 47 char dchar[5]; 48 int dlen; 49 50 int bflag; 51 int cflag; 52 int dflag; 53 int fflag; 54 int nflag; 55 int sflag; 56 57 void b_cut(FILE *, char *); 58 void c_cut(FILE *, char *); 59 void f_cut(FILE *, char *); 60 void get_list(char *); 61 void usage(void); 62 63 int 64 main(int argc, char *argv[]) 65 { 66 FILE *fp; 67 void (*fcn)(FILE *, char *); 68 int ch, rval; 69 70 setlocale(LC_CTYPE, ""); 71 72 if (pledge("stdio rpath", NULL) == -1) 73 err(1, "pledge"); 74 75 dchar[0] = '\t'; /* default delimiter */ 76 dchar[1] = '\0'; 77 dlen = 1; 78 79 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 80 switch(ch) { 81 case 'b': 82 get_list(optarg); 83 bflag = 1; 84 break; 85 case 'c': 86 get_list(optarg); 87 cflag = 1; 88 break; 89 case 'd': 90 if ((dlen = mblen(optarg, MB_CUR_MAX)) == -1) 91 usage(); 92 assert(dlen < sizeof(dchar)); 93 (void)memcpy(dchar, optarg, dlen); 94 dchar[dlen] = '\0'; 95 dflag = 1; 96 break; 97 case 'f': 98 get_list(optarg); 99 fflag = 1; 100 break; 101 case 'n': 102 nflag = 1; 103 break; 104 case 's': 105 sflag = 1; 106 break; 107 default: 108 usage(); 109 } 110 argc -= optind; 111 argv += optind; 112 113 if (bflag + cflag + fflag != 1 || 114 (nflag && !bflag) || 115 ((dflag || sflag) && !fflag)) 116 usage(); 117 118 if (MB_CUR_MAX == 1) { 119 nflag = 0; 120 if (cflag) { 121 bflag = 1; 122 cflag = 0; 123 } 124 } 125 126 fcn = fflag ? f_cut : (cflag || nflag) ? c_cut : b_cut; 127 128 rval = 0; 129 if (*argv) 130 for (; *argv; ++argv) { 131 if (strcmp(*argv, "-") == 0) 132 fcn(stdin, "stdin"); 133 else { 134 if ((fp = fopen(*argv, "r"))) { 135 fcn(fp, *argv); 136 (void)fclose(fp); 137 } else { 138 rval = 1; 139 warn("%s", *argv); 140 } 141 } 142 } 143 else { 144 if (pledge("stdio", NULL) == -1) 145 err(1, "pledge"); 146 147 fcn(stdin, "stdin"); 148 } 149 exit(rval); 150 } 151 152 int autostart, autostop, maxval; 153 154 char positions[_POSIX2_LINE_MAX + 1]; 155 156 int 157 read_number(char **p) 158 { 159 int dash, n; 160 const char *errstr; 161 char *q; 162 163 q = *p + strcspn(*p, "-"); 164 dash = *q == '-'; 165 *q = '\0'; 166 n = strtonum(*p, 1, _POSIX2_LINE_MAX, &errstr); 167 if (errstr != NULL) 168 errx(1, "[-bcf] list: %s %s (allowed 1-%d)", *p, errstr, 169 _POSIX2_LINE_MAX); 170 if (dash) 171 *q = '-'; 172 *p = q; 173 174 return n; 175 } 176 177 void 178 get_list(char *list) 179 { 180 int setautostart, start, stop; 181 char *p; 182 183 /* 184 * set a byte in the positions array to indicate if a field or 185 * column is to be selected; use +1, it's 1-based, not 0-based. 186 * This parser is less restrictive than the Draft 9 POSIX spec. 187 * POSIX doesn't allow lists that aren't in increasing order or 188 * overlapping lists. We also handle "-3-5" although there's no 189 * real reason too. 190 */ 191 while ((p = strsep(&list, ", \t"))) { 192 setautostart = start = stop = 0; 193 if (*p == '-') { 194 ++p; 195 setautostart = 1; 196 } 197 if (isdigit((unsigned char)*p)) { 198 start = stop = read_number(&p); 199 if (setautostart && start > autostart) 200 autostart = start; 201 } 202 if (*p == '-') { 203 if (isdigit((unsigned char)p[1])) { 204 ++p; 205 stop = read_number(&p); 206 } 207 if (*p == '-') { 208 ++p; 209 if (!autostop || autostop > stop) 210 autostop = stop; 211 } 212 } 213 if (*p != '\0' || !stop || !start) 214 errx(1, "[-bcf] list: illegal list value"); 215 if (maxval < stop) 216 maxval = stop; 217 if (start <= stop) 218 memset(positions + start, 1, stop - start + 1); 219 } 220 221 /* overlapping ranges */ 222 if (autostop && maxval > autostop) 223 maxval = autostop; 224 225 /* set autostart */ 226 if (autostart) 227 memset(positions + 1, '1', autostart); 228 } 229 230 void 231 b_cut(FILE *fp, char *fname) 232 { 233 int ch, col; 234 char *pos; 235 236 for (;;) { 237 pos = positions + 1; 238 for (col = maxval; col; --col) { 239 if ((ch = getc(fp)) == EOF) 240 return; 241 if (ch == '\n') 242 break; 243 if (*pos++) 244 (void)putchar(ch); 245 } 246 if (ch != '\n') { 247 if (autostop) 248 while ((ch = getc(fp)) != EOF && ch != '\n') 249 (void)putchar(ch); 250 else 251 while ((ch = getc(fp)) != EOF && ch != '\n') 252 ; 253 } 254 (void)putchar('\n'); 255 } 256 } 257 258 void 259 c_cut(FILE *fp, char *fname) 260 { 261 static char *line = NULL; 262 static size_t linesz = 0; 263 ssize_t linelen; 264 char *cp, *pos, *maxpos; 265 int len; 266 267 while ((linelen = getline(&line, &linesz, fp)) != -1) { 268 if (line[linelen - 1] == '\n') 269 line[linelen - 1] = '\0'; 270 271 cp = line; 272 pos = positions + 1; 273 maxpos = pos + maxval; 274 while(pos < maxpos && *cp != '\0') { 275 len = mblen(cp, MB_CUR_MAX); 276 if (len == -1) 277 len = 1; 278 pos += nflag ? len : 1; 279 if (pos[-1] == '\0') 280 cp += len; 281 else 282 while (len--) 283 putchar(*cp++); 284 } 285 if (autostop) 286 puts(cp); 287 else 288 putchar('\n'); 289 } 290 } 291 292 void 293 f_cut(FILE *fp, char *fname) 294 { 295 static char *line = NULL; 296 static size_t linesz = 0; 297 ssize_t linelen; 298 char *sp, *ep, *pos, *maxpos; 299 int output; 300 301 while ((linelen = getline(&line, &linesz, fp)) != -1) { 302 if (line[linelen - 1] == '\n') 303 line[linelen - 1] = '\0'; 304 305 if ((ep = strstr(line, dchar)) == NULL) { 306 if (!sflag) 307 puts(line); 308 continue; 309 } 310 311 pos = positions + 1; 312 maxpos = pos + maxval; 313 output = 0; 314 sp = line; 315 for (;;) { 316 if (*pos++) { 317 if (output) 318 fputs(dchar, stdout); 319 while (sp < ep) 320 putchar(*sp++); 321 output = 1; 322 } else 323 sp = ep; 324 if (*sp == '\0' || pos == maxpos) 325 break; 326 sp += dlen; 327 if ((ep = strstr(sp, dchar)) == NULL) 328 ep = strchr(sp, '\0'); 329 } 330 if (autostop) 331 puts(sp); 332 else 333 putchar('\n'); 334 } 335 } 336 337 void 338 usage(void) 339 { 340 (void)fprintf(stderr, 341 "usage: cut -b list [-n] [file ...]\n" 342 " cut -c list [file ...]\n" 343 " cut -f list [-s] [-d delim] [file ...]\n"); 344 exit(1); 345 } 346