1 /* $OpenBSD: cut.c,v 1.28 2023/03/08 04:43:10 guenther Exp $ */
2 /* $NetBSD: cut.c,v 1.9 1995/09/02 05:59:23 jtc Exp $ */
3
4 /*
5 * Copyright (c) 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <assert.h>
37 #include <ctype.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46
47 char dchar[5];
48 int dlen;
49
50 int bflag;
51 int cflag;
52 int dflag;
53 int fflag;
54 int nflag;
55 int sflag;
56
57 void b_cut(FILE *, char *);
58 void c_cut(FILE *, char *);
59 void f_cut(FILE *, char *);
60 void get_list(char *);
61 void usage(void);
62
63 int
main(int argc,char * argv[])64 main(int argc, char *argv[])
65 {
66 FILE *fp;
67 void (*fcn)(FILE *, char *);
68 int ch, rval;
69
70 setlocale(LC_CTYPE, "");
71
72 if (pledge("stdio rpath", NULL) == -1)
73 err(1, "pledge");
74
75 dchar[0] = '\t'; /* default delimiter */
76 dchar[1] = '\0';
77 dlen = 1;
78
79 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
80 switch(ch) {
81 case 'b':
82 get_list(optarg);
83 bflag = 1;
84 break;
85 case 'c':
86 get_list(optarg);
87 cflag = 1;
88 break;
89 case 'd':
90 if ((dlen = mblen(optarg, MB_CUR_MAX)) == -1)
91 usage();
92 assert(dlen < sizeof(dchar));
93 (void)memcpy(dchar, optarg, dlen);
94 dchar[dlen] = '\0';
95 dflag = 1;
96 break;
97 case 'f':
98 get_list(optarg);
99 fflag = 1;
100 break;
101 case 'n':
102 nflag = 1;
103 break;
104 case 's':
105 sflag = 1;
106 break;
107 default:
108 usage();
109 }
110 argc -= optind;
111 argv += optind;
112
113 if (bflag + cflag + fflag != 1 ||
114 (nflag && !bflag) ||
115 ((dflag || sflag) && !fflag))
116 usage();
117
118 if (MB_CUR_MAX == 1) {
119 nflag = 0;
120 if (cflag) {
121 bflag = 1;
122 cflag = 0;
123 }
124 }
125
126 fcn = fflag ? f_cut : (cflag || nflag) ? c_cut : b_cut;
127
128 rval = 0;
129 if (*argv)
130 for (; *argv; ++argv) {
131 if (strcmp(*argv, "-") == 0)
132 fcn(stdin, "stdin");
133 else {
134 if ((fp = fopen(*argv, "r"))) {
135 fcn(fp, *argv);
136 (void)fclose(fp);
137 } else {
138 rval = 1;
139 warn("%s", *argv);
140 }
141 }
142 }
143 else {
144 if (pledge("stdio", NULL) == -1)
145 err(1, "pledge");
146
147 fcn(stdin, "stdin");
148 }
149 exit(rval);
150 }
151
152 int autostart, autostop, maxval;
153
154 char positions[_POSIX2_LINE_MAX + 1];
155
156 int
read_number(char ** p)157 read_number(char **p)
158 {
159 int dash, n;
160 const char *errstr;
161 char *q;
162
163 q = *p + strcspn(*p, "-");
164 dash = *q == '-';
165 *q = '\0';
166 n = strtonum(*p, 1, _POSIX2_LINE_MAX, &errstr);
167 if (errstr != NULL)
168 errx(1, "[-bcf] list: %s %s (allowed 1-%d)", *p, errstr,
169 _POSIX2_LINE_MAX);
170 if (dash)
171 *q = '-';
172 *p = q;
173
174 return n;
175 }
176
177 void
get_list(char * list)178 get_list(char *list)
179 {
180 int setautostart, start, stop;
181 char *p;
182
183 /*
184 * set a byte in the positions array to indicate if a field or
185 * column is to be selected; use +1, it's 1-based, not 0-based.
186 * This parser is less restrictive than the Draft 9 POSIX spec.
187 * POSIX doesn't allow lists that aren't in increasing order or
188 * overlapping lists. We also handle "-3-5" although there's no
189 * real reason too.
190 */
191 while ((p = strsep(&list, ", \t"))) {
192 setautostart = start = stop = 0;
193 if (*p == '-') {
194 ++p;
195 setautostart = 1;
196 }
197 if (isdigit((unsigned char)*p)) {
198 start = stop = read_number(&p);
199 if (setautostart && start > autostart)
200 autostart = start;
201 }
202 if (*p == '-') {
203 if (isdigit((unsigned char)p[1])) {
204 ++p;
205 stop = read_number(&p);
206 }
207 if (*p == '-') {
208 ++p;
209 if (!autostop || autostop > stop)
210 autostop = stop;
211 }
212 }
213 if (*p != '\0' || !stop || !start)
214 errx(1, "[-bcf] list: illegal list value");
215 if (maxval < stop)
216 maxval = stop;
217 if (start <= stop)
218 memset(positions + start, 1, stop - start + 1);
219 }
220
221 /* overlapping ranges */
222 if (autostop && maxval > autostop)
223 maxval = autostop;
224
225 /* set autostart */
226 if (autostart)
227 memset(positions + 1, '1', autostart);
228 }
229
230 void
b_cut(FILE * fp,char * fname)231 b_cut(FILE *fp, char *fname)
232 {
233 int ch, col;
234 char *pos;
235
236 for (;;) {
237 pos = positions + 1;
238 for (col = maxval; col; --col) {
239 if ((ch = getc(fp)) == EOF)
240 return;
241 if (ch == '\n')
242 break;
243 if (*pos++)
244 (void)putchar(ch);
245 }
246 if (ch != '\n') {
247 if (autostop)
248 while ((ch = getc(fp)) != EOF && ch != '\n')
249 (void)putchar(ch);
250 else
251 while ((ch = getc(fp)) != EOF && ch != '\n')
252 ;
253 }
254 (void)putchar('\n');
255 }
256 }
257
258 void
c_cut(FILE * fp,char * fname)259 c_cut(FILE *fp, char *fname)
260 {
261 static char *line = NULL;
262 static size_t linesz = 0;
263 ssize_t linelen;
264 char *cp, *pos, *maxpos;
265 int len;
266
267 while ((linelen = getline(&line, &linesz, fp)) != -1) {
268 if (line[linelen - 1] == '\n')
269 line[linelen - 1] = '\0';
270
271 cp = line;
272 pos = positions + 1;
273 maxpos = pos + maxval;
274 while(pos < maxpos && *cp != '\0') {
275 len = mblen(cp, MB_CUR_MAX);
276 if (len == -1)
277 len = 1;
278 pos += nflag ? len : 1;
279 if (pos[-1] == '\0')
280 cp += len;
281 else
282 while (len--)
283 putchar(*cp++);
284 }
285 if (autostop)
286 puts(cp);
287 else
288 putchar('\n');
289 }
290 }
291
292 void
f_cut(FILE * fp,char * fname)293 f_cut(FILE *fp, char *fname)
294 {
295 static char *line = NULL;
296 static size_t linesz = 0;
297 ssize_t linelen;
298 char *sp, *ep, *pos, *maxpos;
299 int output;
300
301 while ((linelen = getline(&line, &linesz, fp)) != -1) {
302 if (line[linelen - 1] == '\n')
303 line[linelen - 1] = '\0';
304
305 if ((ep = strstr(line, dchar)) == NULL) {
306 if (!sflag)
307 puts(line);
308 continue;
309 }
310
311 pos = positions + 1;
312 maxpos = pos + maxval;
313 output = 0;
314 sp = line;
315 for (;;) {
316 if (*pos++) {
317 if (output)
318 fputs(dchar, stdout);
319 while (sp < ep)
320 putchar(*sp++);
321 output = 1;
322 } else
323 sp = ep;
324 if (*sp == '\0' || pos == maxpos)
325 break;
326 sp += dlen;
327 if ((ep = strstr(sp, dchar)) == NULL)
328 ep = strchr(sp, '\0');
329 }
330 if (autostop)
331 puts(sp);
332 else
333 putchar('\n');
334 }
335 }
336
337 void
usage(void)338 usage(void)
339 {
340 (void)fprintf(stderr,
341 "usage: cut -b list [-n] [file ...]\n"
342 " cut -c list [file ...]\n"
343 " cut -f list [-s] [-d delim] [file ...]\n");
344 exit(1);
345 }
346