xref: /dragonfly/usr.bin/split/split.c (revision 1d1731fa)
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#) Copyright (c) 1987, 1993, 1994 The Regents of the University of California.  All rights reserved.
34  * @(#)split.c	8.2 (Berkeley) 4/16/94
35  * $FreeBSD: src/usr.bin/split/split.c,v 1.6.2.2 2002/07/25 12:46:36 tjr Exp $
36  * $DragonFly: src/usr.bin/split/split.c,v 1.3 2003/08/28 02:22:38 hmp Exp $
37  */
38 
39 #include <sys/param.h>
40 
41 #include <ctype.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <limits.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <regex.h>
51 #include <sysexits.h>
52 
53 #define DEFLINE	1000			/* Default num lines per file. */
54 
55 off_t	 bytecnt;			/* Byte count to split on. */
56 long	 numlines;			/* Line count to split on. */
57 int	 file_open;			/* If a file open. */
58 int	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
59 char	 *bfr;				/* I/O buffer. */
60 char	 fname[MAXPATHLEN];		/* File name prefix. */
61 regex_t	 rgx;
62 int	 pflag;
63 long	 sufflen = 2;			/* File name suffix length. */
64 
65 void newfile(void);
66 void split1(void);
67 void split2(void);
68 static void usage(void);
69 
70 int
71 main(int argc, char **argv)
72 {
73 	long long bytecnti;
74 	long scale;
75 	int ch;
76 	char *ep, *p;
77 
78 	while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:")) != -1)
79 		switch (ch) {
80 		case '0': case '1': case '2': case '3': case '4':
81 		case '5': case '6': case '7': case '8': case '9':
82 			/*
83 			 * Undocumented kludge: split was originally designed
84 			 * to take a number after a dash.
85 			 */
86 			if (numlines == 0) {
87 				p = argv[optind - 1];
88 				if (p[0] == '-' && p[1] == ch && !p[2])
89 					numlines = strtol(++p, &ep, 10);
90 				else
91 					numlines =
92 					    strtol(argv[optind] + 1, &ep, 10);
93 				if (numlines <= 0 || *ep)
94 					errx(EX_USAGE,
95 					    "%s: illegal line count", optarg);
96 			}
97 			break;
98 		case '-':		/* Undocumented: historic stdin flag. */
99 			if (ifd != -1)
100 				usage();
101 			ifd = 0;
102 			break;
103 		case 'a':		/* Suffix length */
104 			if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
105 				errx(EX_USAGE,
106 				    "%s: illegal suffix length", optarg);
107 			break;
108 		case 'b':		/* Byte count. */
109 			errno = 0;
110 			if ((bytecnti = strtoll(optarg, &ep, 10)) <= 0 ||
111 			    (*ep != '\0' && *ep != 'k' && *ep != 'm') ||
112 			    errno != 0)
113 				errx(EX_USAGE,
114 				    "%s: illegal byte count", optarg);
115 			if (*ep == 'k')
116 				scale = 1024;
117 			else if (*ep == 'm')
118 				scale = 1024 * 1024;
119 			else
120 				scale = 1;
121 			bytecnt = (off_t)(bytecnti * scale);
122 			break;
123 		case 'p' :      /* pattern matching. */
124 			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
125 				errx(EX_USAGE, "%s: illegal regexp", optarg);
126 			pflag = 1;
127 			break;
128 		case 'l':		/* Line count. */
129 			if (numlines != 0)
130 				usage();
131 			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
132 				errx(EX_USAGE,
133 				    "%s: illegal line count", optarg);
134 			break;
135 		default:
136 			usage();
137 		}
138 	argv += optind;
139 	argc -= optind;
140 
141 	if (*argv != NULL)
142 		if (ifd == -1) {		/* Input file. */
143 			if (strcmp(*argv, "-") == 0)
144 				ifd = STDIN_FILENO;
145 			else if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
146 				err(EX_NOINPUT, "%s", *argv);
147 			++argv;
148 		}
149 	if (*argv != NULL)			/* File name prefix. */
150 		if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname))
151 			errx(EX_USAGE, "file name prefix is too long");
152 	if (*argv != NULL)
153 		usage();
154 
155 	if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
156 		errx(EX_USAGE, "suffix is too long");
157 	if (pflag && (numlines != 0 || bytecnt != 0))
158 		usage();
159 
160 	if (numlines == 0)
161 		numlines = DEFLINE;
162 	else if (bytecnt != 0)
163 		usage();
164 
165 	if (ifd == -1)				/* Stdin by default. */
166 		ifd = 0;
167 
168 	if (bytecnt) {
169 		split1();
170 		exit (0);
171 	}
172 	split2();
173 	if (pflag)
174 		regfree(&rgx);
175 	exit(0);
176 }
177 
178 /*
179  * split1 --
180  *	Split the input by bytes.
181  */
182 void
183 split1(void)
184 {
185 	off_t bcnt;
186 	char *C;
187 	ssize_t dist, len;
188 
189 	if((bfr = (char *)malloc(bytecnt)) == NULL)
190 		err(EX_OSERR, "malloc");
191 
192 	for (bcnt = 0;;)
193 		switch ((len = read(ifd, bfr, bytecnt))) {
194 		case 0:
195 			free(bfr);
196 			exit(0);
197 		case -1:
198 			free(bfr);
199 			err(EX_IOERR, "read");
200 			/* NOTREACHED */
201 		default:
202 			if (!file_open)
203 				newfile();
204 			if (bcnt + len >= bytecnt) {
205 				dist = bytecnt - bcnt;
206 				if (write(ofd, bfr, dist) != dist)
207 					err(EX_IOERR, "write");
208 				len -= dist;
209 				for (C = bfr + dist; len >= bytecnt;
210 				    len -= bytecnt, C += bytecnt) {
211 					newfile();
212 					if (write(ofd,
213 					    C, (int)bytecnt) != bytecnt) {
214 						free(bfr);
215 						err(EX_IOERR, "write");
216 					}
217 				}
218 				if (len != 0) {
219 					newfile();
220 					if (write(ofd, C, len) != len) {
221 						free(bfr);
222 						err(EX_IOERR, "write");
223 					}
224 				} else
225 					file_open = 0;
226 				bcnt = len;
227 			} else {
228 				bcnt += len;
229 				if (write(ofd, bfr, len) != len) {
230 					free(bfr);
231 					err(EX_IOERR, "write");
232 				}
233 			}
234 		}
235 	free(bfr);
236 }
237 
238 /*
239  * split2 --
240  *	Split the input by lines.
241  */
242 void
243 split2(void)
244 {
245 	long lcnt = 0;
246 	FILE *infp;
247 
248 	/* Stick a stream on top of input file descriptor */
249 	if ((infp = fdopen(ifd, "r")) == NULL)
250 		err(EX_NOINPUT, "fdopen");
251 
252 	if((bfr = (char *)malloc(MAXBSIZE)) == NULL)
253 		err(EX_OSERR, "malloc");
254 
255 	/* Process input one line at a time */
256 	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
257 		const int len = strlen(bfr);
258 
259 		/* If line is too long to deal with, just write it out */
260 		if (bfr[len - 1] != '\n')
261 			goto writeit;
262 
263 		/* Check if we need to start a new file */
264 		if (pflag) {
265 			regmatch_t pmatch;
266 
267 			pmatch.rm_so = 0;
268 			pmatch.rm_eo = len - 1;
269 			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
270 				newfile();
271 		} else if (lcnt++ == numlines) {
272 			newfile();
273 			lcnt = 1;
274 		}
275 
276 writeit:
277 		/* Open output file if needed */
278 		if (!file_open)
279 			newfile();
280 
281 		/* Write out line */
282 		if (write(ofd, bfr, len) != len) {
283 			free(bfr);
284 			err(EX_IOERR, "write");
285 		}
286 	}
287 
288 	free(bfr);
289 
290 	/* EOF or error? */
291 	if (ferror(infp))
292 		err(EX_IOERR, "read");
293 	else
294 		exit(0);
295 }
296 
297 /*
298  * newfile --
299  *	Open a new output file.
300  */
301 void
302 newfile(void)
303 {
304 	long i, maxfiles, tfnum;
305 	static long fnum;
306 	static int defname;
307 	static char *fpnt;
308 
309 	if (ofd == -1) {
310 		if (fname[0] == '\0') {
311 			fname[0] = 'x';
312 			fpnt = fname + 1;
313 			defname = 1;
314 		} else {
315 			fpnt = fname + strlen(fname);
316 			defname = 0;
317 		}
318 		ofd = fileno(stdout);
319 	}
320 
321 	/* maxfiles = 26^sufflen, but don't use libm. */
322 	for (maxfiles = 1, i = 0; i < sufflen; i++)
323 		if ((maxfiles *= 26) <= 0)
324 			errx(EX_USAGE, "suffix is too long (max %ld)", i);
325 
326 	/*
327 	 * Hack to increase max files; original code wandered through
328 	 * magic characters.
329 	 */
330 	if (fnum == maxfiles) {
331 		if (!defname || fname[0] == 'z')
332 			errx(EX_DATAERR, "too many files");
333 		++fname[0];
334 		fnum = 0;
335 	}
336 
337 	/* Generate suffix of sufflen letters */
338 	tfnum = fnum;
339 	i = sufflen - 1;
340 	do {
341 		fpnt[i] = tfnum % 26 + 'a';
342 		tfnum /= 26;
343 	} while (i-- > 0);
344 	fpnt[sufflen] = '\0';
345 
346 	++fnum;
347 	if (!freopen(fname, "w", stdout))
348 		err(EX_IOERR, "%s", fname);
349 	file_open = 1;
350 }
351 
352 static void
353 usage(void)
354 {
355 	(void)fprintf(stderr,
356 "usage: split [-a sufflen] [-b byte_count] [-l line_count] [-p pattern]\n");
357 	(void)fprintf(stderr,
358 "             [file [prefix]]\n");
359 	exit(EX_USAGE);
360 }
361