xref: /openbsd/usr.bin/cut/cut.c (revision 4bdff4be)
1 /*	$OpenBSD: cut.c,v 1.28 2023/03/08 04:43:10 guenther Exp $	*/
2 /*	$NetBSD: cut.c,v 1.9 1995/09/02 05:59:23 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <assert.h>
37 #include <ctype.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 
47 char	dchar[5];
48 int	dlen;
49 
50 int	bflag;
51 int	cflag;
52 int	dflag;
53 int	fflag;
54 int	nflag;
55 int	sflag;
56 
57 void	b_cut(FILE *, char *);
58 void	c_cut(FILE *, char *);
59 void	f_cut(FILE *, char *);
60 void	get_list(char *);
61 void	usage(void);
62 
63 int
64 main(int argc, char *argv[])
65 {
66 	FILE *fp;
67 	void (*fcn)(FILE *, char *);
68 	int ch, rval;
69 
70 	setlocale(LC_CTYPE, "");
71 
72 	if (pledge("stdio rpath", NULL) == -1)
73 		err(1, "pledge");
74 
75 	dchar[0] = '\t';		/* default delimiter */
76 	dchar[1] = '\0';
77 	dlen = 1;
78 
79 	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
80 		switch(ch) {
81 		case 'b':
82 			get_list(optarg);
83 			bflag = 1;
84 			break;
85 		case 'c':
86 			get_list(optarg);
87 			cflag = 1;
88 			break;
89 		case 'd':
90 			if ((dlen = mblen(optarg, MB_CUR_MAX)) == -1)
91 				usage();
92 			assert(dlen < sizeof(dchar));
93 			(void)memcpy(dchar, optarg, dlen);
94 			dchar[dlen] = '\0';
95 			dflag = 1;
96 			break;
97 		case 'f':
98 			get_list(optarg);
99 			fflag = 1;
100 			break;
101 		case 'n':
102 			nflag = 1;
103 			break;
104 		case 's':
105 			sflag = 1;
106 			break;
107 		default:
108 			usage();
109 		}
110 	argc -= optind;
111 	argv += optind;
112 
113 	if (bflag + cflag + fflag != 1 ||
114 	    (nflag && !bflag) ||
115 	    ((dflag || sflag) && !fflag))
116 		usage();
117 
118 	if (MB_CUR_MAX == 1) {
119 		nflag = 0;
120 		if (cflag) {
121 			bflag = 1;
122 			cflag = 0;
123 		}
124 	}
125 
126 	fcn = fflag ? f_cut : (cflag || nflag) ? c_cut : b_cut;
127 
128 	rval = 0;
129 	if (*argv)
130 		for (; *argv; ++argv) {
131 			if (strcmp(*argv, "-") == 0)
132 				fcn(stdin, "stdin");
133 			else {
134 				if ((fp = fopen(*argv, "r"))) {
135 					fcn(fp, *argv);
136 					(void)fclose(fp);
137 				} else {
138 					rval = 1;
139 					warn("%s", *argv);
140 				}
141 			}
142 		}
143 	else {
144 		if (pledge("stdio", NULL) == -1)
145 			err(1, "pledge");
146 
147 		fcn(stdin, "stdin");
148 	}
149 	exit(rval);
150 }
151 
152 int autostart, autostop, maxval;
153 
154 char positions[_POSIX2_LINE_MAX + 1];
155 
156 int
157 read_number(char **p)
158 {
159 	int dash, n;
160 	const char *errstr;
161 	char *q;
162 
163 	q = *p + strcspn(*p, "-");
164 	dash = *q == '-';
165 	*q = '\0';
166 	n = strtonum(*p, 1, _POSIX2_LINE_MAX, &errstr);
167 	if (errstr != NULL)
168 		errx(1, "[-bcf] list: %s %s (allowed 1-%d)", *p, errstr,
169 		    _POSIX2_LINE_MAX);
170 	if (dash)
171 		*q = '-';
172 	*p = q;
173 
174 	return n;
175 }
176 
177 void
178 get_list(char *list)
179 {
180 	int setautostart, start, stop;
181 	char *p;
182 
183 	/*
184 	 * set a byte in the positions array to indicate if a field or
185 	 * column is to be selected; use +1, it's 1-based, not 0-based.
186 	 * This parser is less restrictive than the Draft 9 POSIX spec.
187 	 * POSIX doesn't allow lists that aren't in increasing order or
188 	 * overlapping lists.  We also handle "-3-5" although there's no
189 	 * real reason too.
190 	 */
191 	while ((p = strsep(&list, ", \t"))) {
192 		setautostart = start = stop = 0;
193 		if (*p == '-') {
194 			++p;
195 			setautostart = 1;
196 		}
197 		if (isdigit((unsigned char)*p)) {
198 			start = stop = read_number(&p);
199 			if (setautostart && start > autostart)
200 				autostart = start;
201 		}
202 		if (*p == '-') {
203 			if (isdigit((unsigned char)p[1])) {
204 				++p;
205 				stop = read_number(&p);
206 			}
207 			if (*p == '-') {
208 				++p;
209 				if (!autostop || autostop > stop)
210 					autostop = stop;
211 			}
212 		}
213 		if (*p != '\0' || !stop || !start)
214 			errx(1, "[-bcf] list: illegal list value");
215 		if (maxval < stop)
216 			maxval = stop;
217 		if (start <= stop)
218 			memset(positions + start, 1, stop - start + 1);
219 	}
220 
221 	/* overlapping ranges */
222 	if (autostop && maxval > autostop)
223 		maxval = autostop;
224 
225 	/* set autostart */
226 	if (autostart)
227 		memset(positions + 1, '1', autostart);
228 }
229 
230 void
231 b_cut(FILE *fp, char *fname)
232 {
233 	int ch, col;
234 	char *pos;
235 
236 	for (;;) {
237 		pos = positions + 1;
238 		for (col = maxval; col; --col) {
239 			if ((ch = getc(fp)) == EOF)
240 				return;
241 			if (ch == '\n')
242 				break;
243 			if (*pos++)
244 				(void)putchar(ch);
245 		}
246 		if (ch != '\n') {
247 			if (autostop)
248 				while ((ch = getc(fp)) != EOF && ch != '\n')
249 					(void)putchar(ch);
250 			else
251 				while ((ch = getc(fp)) != EOF && ch != '\n')
252 					;
253 		}
254 		(void)putchar('\n');
255 	}
256 }
257 
258 void
259 c_cut(FILE *fp, char *fname)
260 {
261 	static char	*line = NULL;
262 	static size_t	 linesz = 0;
263 	ssize_t		 linelen;
264 	char		*cp, *pos, *maxpos;
265 	int		 len;
266 
267 	while ((linelen = getline(&line, &linesz, fp)) != -1) {
268 		if (line[linelen - 1] == '\n')
269 			line[linelen - 1] = '\0';
270 
271 		cp = line;
272 		pos = positions + 1;
273 		maxpos = pos + maxval;
274 		while(pos < maxpos && *cp != '\0') {
275 			len = mblen(cp, MB_CUR_MAX);
276 			if (len == -1)
277 				len = 1;
278 			pos += nflag ? len : 1;
279 			if (pos[-1] == '\0')
280 				cp += len;
281 			else
282 				while (len--)
283 					putchar(*cp++);
284 		}
285 		if (autostop)
286 			puts(cp);
287 		else
288 			putchar('\n');
289 	}
290 }
291 
292 void
293 f_cut(FILE *fp, char *fname)
294 {
295 	static char	*line = NULL;
296 	static size_t	 linesz = 0;
297 	ssize_t		 linelen;
298 	char		*sp, *ep, *pos, *maxpos;
299 	int		 output;
300 
301 	while ((linelen = getline(&line, &linesz, fp)) != -1) {
302 		if (line[linelen - 1] == '\n')
303 			line[linelen - 1] = '\0';
304 
305 		if ((ep = strstr(line, dchar)) == NULL) {
306 			if (!sflag)
307 				puts(line);
308 			continue;
309 		}
310 
311 		pos = positions + 1;
312 		maxpos = pos + maxval;
313 		output = 0;
314 		sp = line;
315 		for (;;) {
316 			if (*pos++) {
317 				if (output)
318 					fputs(dchar, stdout);
319 				while (sp < ep)
320 					putchar(*sp++);
321 				output = 1;
322 			} else
323 				sp = ep;
324 			if (*sp == '\0' || pos == maxpos)
325 				break;
326 			sp += dlen;
327 			if ((ep = strstr(sp, dchar)) == NULL)
328 				ep = strchr(sp, '\0');
329 		}
330 		if (autostop)
331 			puts(sp);
332 		else
333 			putchar('\n');
334 	}
335 }
336 
337 void
338 usage(void)
339 {
340 	(void)fprintf(stderr,
341 	    "usage: cut -b list [-n] [file ...]\n"
342 	    "       cut -c list [file ...]\n"
343 	    "       cut -f list [-s] [-d delim] [file ...]\n");
344 	exit(1);
345 }
346