xref: /openbsd/usr.bin/cut/cut.c (revision 3cab2bb3)
1 /*	$OpenBSD: cut.c,v 1.26 2019/02/07 19:11:23 tobias Exp $	*/
2 /*	$NetBSD: cut.c,v 1.9 1995/09/02 05:59:23 jtc Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <assert.h>
37 #include <ctype.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 
47 char	dchar[5];
48 int	dlen;
49 
50 int	bflag;
51 int	cflag;
52 int	dflag;
53 int	fflag;
54 int	nflag;
55 int	sflag;
56 
57 void	b_cut(FILE *, char *);
58 void	c_cut(FILE *, char *);
59 void	f_cut(FILE *, char *);
60 void	get_list(char *);
61 void	usage(void);
62 
63 int
64 main(int argc, char *argv[])
65 {
66 	FILE *fp;
67 	void (*fcn)(FILE *, char *);
68 	int ch, rval;
69 
70 	setlocale(LC_CTYPE, "");
71 
72 	if (pledge("stdio rpath", NULL) == -1)
73 		err(1, "pledge");
74 
75 	dchar[0] = '\t';		/* default delimiter */
76 	dchar[1] = '\0';
77 	dlen = 1;
78 
79 	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
80 		switch(ch) {
81 		case 'b':
82 			get_list(optarg);
83 			bflag = 1;
84 			break;
85 		case 'c':
86 			get_list(optarg);
87 			cflag = 1;
88 			break;
89 		case 'd':
90 			if ((dlen = mblen(optarg, MB_CUR_MAX)) == -1)
91 				usage();
92 			assert(dlen < sizeof(dchar));
93 			(void)memcpy(dchar, optarg, dlen);
94 			dchar[dlen] = '\0';
95 			dflag = 1;
96 			break;
97 		case 'f':
98 			get_list(optarg);
99 			fflag = 1;
100 			break;
101 		case 'n':
102 			nflag = 1;
103 			break;
104 		case 's':
105 			sflag = 1;
106 			break;
107 		case '?':
108 		default:
109 			usage();
110 		}
111 	argc -= optind;
112 	argv += optind;
113 
114 	if (bflag + cflag + fflag != 1 ||
115 	    (nflag && !bflag) ||
116 	    ((dflag || sflag) && !fflag))
117 		usage();
118 
119 	if (MB_CUR_MAX == 1) {
120 		nflag = 0;
121 		if (cflag) {
122 			bflag = 1;
123 			cflag = 0;
124 		}
125 	}
126 
127 	fcn = fflag ? f_cut : (cflag || nflag) ? c_cut : b_cut;
128 
129 	rval = 0;
130 	if (*argv)
131 		for (; *argv; ++argv) {
132 			if (strcmp(*argv, "-") == 0)
133 				fcn(stdin, "stdin");
134 			else {
135 				if ((fp = fopen(*argv, "r"))) {
136 					fcn(fp, *argv);
137 					(void)fclose(fp);
138 				} else {
139 					rval = 1;
140 					warn("%s", *argv);
141 				}
142 			}
143 		}
144 	else {
145 		if (pledge("stdio", NULL) == -1)
146 			err(1, "pledge");
147 
148 		fcn(stdin, "stdin");
149 	}
150 	exit(rval);
151 }
152 
153 int autostart, autostop, maxval;
154 
155 char positions[_POSIX2_LINE_MAX + 1];
156 
157 int
158 read_number(char **p)
159 {
160 	int dash, n;
161 	const char *errstr;
162 	char *q;
163 
164 	q = *p + strcspn(*p, "-");
165 	dash = *q == '-';
166 	*q = '\0';
167 	n = strtonum(*p, 1, _POSIX2_LINE_MAX, &errstr);
168 	if (errstr != NULL)
169 		errx(1, "[-bcf] list: %s %s (allowed 1-%d)", *p, errstr,
170 		    _POSIX2_LINE_MAX);
171 	if (dash)
172 		*q = '-';
173 	*p = q;
174 
175 	return n;
176 }
177 
178 void
179 get_list(char *list)
180 {
181 	int setautostart, start, stop;
182 	char *p;
183 
184 	/*
185 	 * set a byte in the positions array to indicate if a field or
186 	 * column is to be selected; use +1, it's 1-based, not 0-based.
187 	 * This parser is less restrictive than the Draft 9 POSIX spec.
188 	 * POSIX doesn't allow lists that aren't in increasing order or
189 	 * overlapping lists.  We also handle "-3-5" although there's no
190 	 * real reason too.
191 	 */
192 	while ((p = strsep(&list, ", \t"))) {
193 		setautostart = start = stop = 0;
194 		if (*p == '-') {
195 			++p;
196 			setautostart = 1;
197 		}
198 		if (isdigit((unsigned char)*p)) {
199 			start = stop = read_number(&p);
200 			if (setautostart && start > autostart)
201 				autostart = start;
202 		}
203 		if (*p == '-') {
204 			if (isdigit((unsigned char)p[1])) {
205 				++p;
206 				stop = read_number(&p);
207 			}
208 			if (*p == '-') {
209 				++p;
210 				if (!autostop || autostop > stop)
211 					autostop = stop;
212 			}
213 		}
214 		if (*p != '\0' || !stop || !start)
215 			errx(1, "[-bcf] list: illegal list value");
216 		if (maxval < stop)
217 			maxval = stop;
218 		if (start <= stop)
219 			memset(positions + start, 1, stop - start + 1);
220 	}
221 
222 	/* overlapping ranges */
223 	if (autostop && maxval > autostop)
224 		maxval = autostop;
225 
226 	/* set autostart */
227 	if (autostart)
228 		memset(positions + 1, '1', autostart);
229 }
230 
231 /* ARGSUSED */
232 void
233 b_cut(FILE *fp, char *fname)
234 {
235 	int ch, col;
236 	char *pos;
237 
238 	for (;;) {
239 		pos = positions + 1;
240 		for (col = maxval; col; --col) {
241 			if ((ch = getc(fp)) == EOF)
242 				return;
243 			if (ch == '\n')
244 				break;
245 			if (*pos++)
246 				(void)putchar(ch);
247 		}
248 		if (ch != '\n') {
249 			if (autostop)
250 				while ((ch = getc(fp)) != EOF && ch != '\n')
251 					(void)putchar(ch);
252 			else
253 				while ((ch = getc(fp)) != EOF && ch != '\n')
254 					;
255 		}
256 		(void)putchar('\n');
257 	}
258 }
259 
260 void
261 c_cut(FILE *fp, char *fname)
262 {
263 	static char	*line = NULL;
264 	static size_t	 linesz = 0;
265 	ssize_t		 linelen;
266 	char		*cp, *pos, *maxpos;
267 	int		 len;
268 
269 	while ((linelen = getline(&line, &linesz, fp)) != -1) {
270 		if (line[linelen - 1] == '\n')
271 			line[linelen - 1] = '\0';
272 
273 		cp = line;
274 		pos = positions + 1;
275 		maxpos = pos + maxval;
276 		while(pos < maxpos && *cp != '\0') {
277 			len = mblen(cp, MB_CUR_MAX);
278 			if (len == -1)
279 				len = 1;
280 			pos += nflag ? len : 1;
281 			if (pos[-1] == '\0')
282 				cp += len;
283 			else
284 				while (len--)
285 					putchar(*cp++);
286 		}
287 		if (autostop)
288 			puts(cp);
289 		else
290 			putchar('\n');
291 	}
292 }
293 
294 void
295 f_cut(FILE *fp, char *fname)
296 {
297 	static char	*line = NULL;
298 	static size_t	 linesz = 0;
299 	ssize_t		 linelen;
300 	char		*sp, *ep, *pos, *maxpos;
301 	int		 output;
302 
303 	while ((linelen = getline(&line, &linesz, fp)) != -1) {
304 		if (line[linelen - 1] == '\n')
305 			line[linelen - 1] = '\0';
306 
307 		if ((ep = strstr(line, dchar)) == NULL) {
308 			if (!sflag)
309 				puts(line);
310 			continue;
311 		}
312 
313 		pos = positions + 1;
314 		maxpos = pos + maxval;
315 		output = 0;
316 		sp = line;
317 		for (;;) {
318 			if (*pos++) {
319 				if (output)
320 					fputs(dchar, stdout);
321 				while (sp < ep)
322 					putchar(*sp++);
323 				output = 1;
324 			} else
325 				sp = ep;
326 			if (*sp == '\0' || pos == maxpos)
327 				break;
328 			sp += dlen;
329 			if ((ep = strstr(sp, dchar)) == NULL)
330 				ep = strchr(sp, '\0');
331 		}
332 		if (autostop)
333 			puts(sp);
334 		else
335 			putchar('\n');
336 	}
337 }
338 
339 void
340 usage(void)
341 {
342 	(void)fprintf(stderr,
343 	    "usage: cut -b list [-n] [file ...]\n"
344 	    "       cut -c list [file ...]\n"
345 	    "       cut -f list [-s] [-d delim] [file ...]\n");
346 	exit(1);
347 }
348