xref: /dragonfly/usr.bin/split/split.c (revision 1847e88f)
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#) Copyright (c) 1987, 1993, 1994 The Regents of the University of California.  All rights reserved.
34  * @(#)split.c	8.2 (Berkeley) 4/16/94
35  * $FreeBSD: src/usr.bin/split/split.c,v 1.6.2.2 2002/07/25 12:46:36 tjr Exp $
36  * $DragonFly: src/usr.bin/split/split.c,v 1.6 2005/08/30 21:19:13 liamfoy Exp $
37  */
38 
39 #include <sys/param.h>
40 
41 #include <ctype.h>
42 #include <err.h>
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <limits.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <regex.h>
51 #include <sysexits.h>
52 
53 #define DEFLINE	1000			/* Default num lines per file. */
54 
55 off_t	 bytecnt;			/* Byte count to split on. */
56 long	 numlines;			/* Line count to split on. */
57 int	 file_open;			/* If a file open. */
58 int	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
59 char	 *bfr;				/* I/O buffer. */
60 char	 fname[MAXPATHLEN];		/* File name prefix. */
61 regex_t	 rgx;
62 int	 pflag;
63 long	 sufflen = 2;			/* File name suffix length. */
64 
65 void newfile(void);
66 void split1(void);
67 void split2(void);
68 static void usage(void);
69 
70 int
71 main(int argc, char **argv)
72 {
73 	long long bytecnti;
74 	long scale;
75 	int ch;
76 	char *ep, *p;
77 
78 	while ((ch = getopt(argc, argv, "0123456789a:b:l:p:")) != -1)
79 		switch (ch) {
80 		case '0': case '1': case '2': case '3': case '4':
81 		case '5': case '6': case '7': case '8': case '9':
82 			/*
83 			 * Undocumented kludge: split was originally designed
84 			 * to take a number after a dash.
85 			 */
86 			if (numlines == 0) {
87 				p = argv[optind - 1];
88 				if (p[0] == '-' && p[1] == ch && !p[2])
89 					numlines = strtol(++p, &ep, 10);
90 				else
91 					numlines =
92 					    strtol(argv[optind] + 1, &ep, 10);
93 				if (numlines <= 0 || *ep)
94 					errx(EX_USAGE,
95 					    "%s: illegal line count", optarg);
96 			}
97 			break;
98 		case 'a':		/* Suffix length */
99 			if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
100 				errx(EX_USAGE,
101 				    "%s: illegal suffix length", optarg);
102 			break;
103 		case 'b':		/* Byte count. */
104 			errno = 0;
105 			if ((bytecnti = strtoll(optarg, &ep, 10)) <= 0 ||
106 			    (*ep != '\0' && *ep != 'k' && *ep != 'm') ||
107 			    errno != 0)
108 				errx(EX_USAGE,
109 				    "%s: illegal byte count", optarg);
110 			if (*ep == 'k')
111 				scale = 1024;
112 			else if (*ep == 'm')
113 				scale = 1024 * 1024;
114 			else
115 				scale = 1;
116 			bytecnt = (off_t)(bytecnti * scale);
117 			break;
118 		case 'p' :      /* pattern matching. */
119 			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
120 				errx(EX_USAGE, "%s: illegal regexp", optarg);
121 			pflag = 1;
122 			break;
123 		case 'l':		/* Line count. */
124 			if (numlines != 0)
125 				usage();
126 			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
127 				errx(EX_USAGE,
128 				    "%s: illegal line count", optarg);
129 			break;
130 		default:
131 			usage();
132 		}
133 	argv += optind;
134 	argc -= optind;
135 
136 	if (*argv != NULL) {
137 		if (strcmp(*argv, "-") == 0)
138 			ifd = STDIN_FILENO;
139 		else if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
140 			err(EX_NOINPUT, "%s", *argv);
141 		++argv;
142 	}
143 
144 	if (*argv != NULL)			/* File name prefix. */
145 		if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname))
146 			errx(EX_USAGE, "file name prefix is too long");
147 	if (*argv != NULL)
148 		usage();
149 
150 	if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
151 		errx(EX_USAGE, "suffix is too long");
152 	if (pflag && (numlines != 0 || bytecnt != 0))
153 		usage();
154 
155 	if (numlines == 0)
156 		numlines = DEFLINE;
157 	else if (bytecnt != 0)
158 		usage();
159 
160 	if (ifd == -1)				/* Stdin by default. */
161 		ifd = 0;
162 
163 	if (bytecnt) {
164 		split1();
165 		exit (0);
166 	}
167 	split2();
168 	if (pflag)
169 		regfree(&rgx);
170 	exit(0);
171 }
172 
173 /*
174  * split1 --
175  *	Split the input by bytes.
176  */
177 void
178 split1(void)
179 {
180 	off_t bcnt;
181 	char *C;
182 	ssize_t dist, len;
183 
184 	if((bfr = (char *)malloc(bytecnt)) == NULL)
185 		err(EX_OSERR, "malloc");
186 
187 	for (bcnt = 0;;)
188 		switch ((len = read(ifd, bfr, bytecnt))) {
189 		case 0:
190 			free(bfr);
191 			exit(0);
192 		case -1:
193 			free(bfr);
194 			err(EX_IOERR, "read");
195 			/* NOTREACHED */
196 		default:
197 			if (!file_open)
198 				newfile();
199 			if (bcnt + len >= bytecnt) {
200 				dist = bytecnt - bcnt;
201 				if (write(ofd, bfr, dist) != dist)
202 					err(EX_IOERR, "write");
203 				len -= dist;
204 				for (C = bfr + dist; len >= bytecnt;
205 				    len -= bytecnt, C += bytecnt) {
206 					newfile();
207 					if (write(ofd,
208 					    C, (int)bytecnt) != bytecnt) {
209 						free(bfr);
210 						err(EX_IOERR, "write");
211 					}
212 				}
213 				if (len != 0) {
214 					newfile();
215 					if (write(ofd, C, len) != len) {
216 						free(bfr);
217 						err(EX_IOERR, "write");
218 					}
219 				} else
220 					file_open = 0;
221 				bcnt = len;
222 			} else {
223 				bcnt += len;
224 				if (write(ofd, bfr, len) != len) {
225 					free(bfr);
226 					err(EX_IOERR, "write");
227 				}
228 			}
229 		}
230 	free(bfr);
231 }
232 
233 /*
234  * split2 --
235  *	Split the input by lines.
236  */
237 void
238 split2(void)
239 {
240 	int startofline = 1;
241 	long lcnt = 0;
242 	FILE *infp;
243 
244 	/* Stick a stream on top of input file descriptor */
245 	if ((infp = fdopen(ifd, "r")) == NULL)
246 		err(EX_NOINPUT, "fdopen");
247 
248 	if((bfr = (char *)malloc(MAXBSIZE)) == NULL)
249 		err(EX_OSERR, "malloc");
250 
251 	/* Process input one line at a time */
252 	while (fgets(bfr, MAXBSIZE, infp) != NULL) {
253 		const int len = strlen(bfr);
254 
255 		/* Consider starting a new file only when at beginning of a line */
256 		if (startofline) {
257 			/* Check if we need to start a new file */
258 			if (pflag) {
259 				regmatch_t pmatch;
260 
261 				pmatch.rm_so = 0;
262 				pmatch.rm_eo = len - 1;
263 				if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
264 					newfile();
265 			} else if (lcnt++ == numlines) {
266 				newfile();
267 				lcnt = 1;
268 			}
269 		}
270 
271 		if (bfr[len - 1] != '\n')
272 			startofline = 0;
273 		else
274 			startofline = 1;
275 
276 		/* Open output file if needed */
277 		if (!file_open)
278 			newfile();
279 
280 		/* Write out line */
281 		if (write(ofd, bfr, len) != len) {
282 			free(bfr);
283 			err(EX_IOERR, "write");
284 		}
285 	}
286 
287 	free(bfr);
288 
289 	/* EOF or error? */
290 	if (ferror(infp))
291 		err(EX_IOERR, "read");
292 	else
293 		exit(0);
294 }
295 
296 /*
297  * newfile --
298  *	Open a new output file.
299  */
300 void
301 newfile(void)
302 {
303 	long i, maxfiles, tfnum;
304 	static long fnum;
305 	static int defname;
306 	static char *fpnt;
307 
308 	if (ofd == -1) {
309 		if (fname[0] == '\0') {
310 			fname[0] = 'x';
311 			fpnt = fname + 1;
312 			defname = 1;
313 		} else {
314 			fpnt = fname + strlen(fname);
315 			defname = 0;
316 		}
317 		ofd = fileno(stdout);
318 	}
319 
320 	/* maxfiles = 26^sufflen, but don't use libm. */
321 	for (maxfiles = 1, i = 0; i < sufflen; i++)
322 		if ((maxfiles *= 26) <= 0)
323 			errx(EX_USAGE, "suffix is too long (max %ld)", i);
324 
325 	if (fnum == maxfiles)
326 		errx(EX_DATAERR, "too many files");
327 
328 	/* Generate suffix of sufflen letters */
329 	tfnum = fnum;
330 	i = sufflen - 1;
331 	do {
332 		fpnt[i] = tfnum % 26 + 'a';
333 		tfnum /= 26;
334 	} while (i-- > 0);
335 	fpnt[sufflen] = '\0';
336 
337 	++fnum;
338 	if (!freopen(fname, "w", stdout))
339 		err(EX_IOERR, "%s", fname);
340 	file_open = 1;
341 }
342 
343 static void
344 usage(void)
345 {
346 	(void)fprintf(stderr,
347 "usage: split [-a sufflen] [-b byte_count] [-l line_count] [-p pattern]\n");
348 	(void)fprintf(stderr,
349 "             [file [prefix]]\n");
350 	exit(EX_USAGE);
351 }
352