1 /* $OpenBSD: split.c,v 1.23 2021/11/28 19:28:42 deraadt Exp $ */
2 /* $NetBSD: split.c,v 1.5 1995/08/31 22:22:05 jtc Exp $ */
3
4 /*
5 * Copyright (c) 1987, 1993, 1994
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #include <sys/types.h>
34
35 #include <ctype.h>
36 #include <err.h>
37 #include <fcntl.h>
38 #include <limits.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43 #include <regex.h>
44
45 #define _MAXBSIZE (64 * 1024)
46
47 #define DEFLINE 1000 /* Default num lines per file. */
48
49 ssize_t bytecnt; /* Byte count to split on. */
50 long numlines; /* Line count to split on. */
51 int file_open; /* If a file open. */
52 int ifd = -1, ofd = -1; /* Input/output file descriptors. */
53 char bfr[_MAXBSIZE]; /* I/O buffer. */
54 char fname[PATH_MAX]; /* File name prefix. */
55 regex_t rgx;
56 int pflag;
57 int sufflen = 2; /* File name suffix length. */
58
59 void newfile(void);
60 void split1(void);
61 void split2(void);
62 __dead void usage(void);
63
64 int
main(int argc,char * argv[])65 main(int argc, char *argv[])
66 {
67 int ch, scale;
68 char *ep, *p;
69 const char *errstr;
70
71 if (pledge("stdio rpath wpath cpath", NULL) == -1)
72 err(1, "pledge");
73
74 while ((ch = getopt(argc, argv, "0123456789a:b:l:p:-")) != -1)
75 switch (ch) {
76 case '0': case '1': case '2': case '3': case '4':
77 case '5': case '6': case '7': case '8': case '9':
78 /*
79 * Undocumented kludge: split was originally designed
80 * to take a number after a dash.
81 */
82 if (numlines == 0) {
83 p = argv[optind - 1];
84 if (p[0] == '-' && p[1] == ch && !p[2])
85 numlines = strtol(++p, &ep, 10);
86 else
87 numlines =
88 strtol(argv[optind] + 1, &ep, 10);
89 if (numlines <= 0 || *ep)
90 errx(1, "%s: illegal line count",
91 optarg);
92 }
93 break;
94 case '-': /* Undocumented: historic stdin flag. */
95 if (ifd != -1)
96 usage();
97 ifd = 0;
98 break;
99 case 'a': /* suffix length. */
100 sufflen = strtonum(optarg, 1, NAME_MAX, &errstr);
101 if (errstr)
102 errx(1, "%s: %s", optarg, errstr);
103 break;
104 case 'b': /* Byte count. */
105 if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 ||
106 (*ep != '\0' && *ep != 'k' && *ep != 'm'))
107 errx(1, "%s: illegal byte count", optarg);
108 if (*ep == 'k')
109 scale = 1024;
110 else if (*ep == 'm')
111 scale = 1048576;
112 else
113 scale = 1;
114 if (bytecnt > SSIZE_MAX / scale)
115 errx(1, "%s: byte count too large", optarg);
116 bytecnt *= scale;
117 break;
118 case 'p' : /* pattern matching. */
119 if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
120 errx(1, "%s: illegal regexp", optarg);
121 pflag = 1;
122 break;
123 case 'l': /* Line count. */
124 if (numlines != 0)
125 usage();
126 if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
127 errx(1, "%s: illegal line count", optarg);
128 break;
129 default:
130 usage();
131 }
132 argv += optind;
133 argc -= optind;
134
135 if (*argv != NULL)
136 if (ifd == -1) { /* Input file. */
137 if ((ifd = open(*argv, O_RDONLY)) < 0)
138 err(1, "%s", *argv);
139 ++argv;
140 }
141 if (*argv != NULL) /* File name prefix. */
142 (void)strlcpy(fname, *argv++, sizeof(fname));
143 if (*argv != NULL)
144 usage();
145
146 if (strlen(fname) + sufflen >= sizeof(fname))
147 errx(1, "suffix is too long");
148 if (pflag && (numlines != 0 || bytecnt != 0))
149 usage();
150
151 if (numlines == 0)
152 numlines = DEFLINE;
153 else if (bytecnt != 0)
154 usage();
155
156 if (ifd == -1) /* Stdin by default. */
157 ifd = 0;
158
159 if (bytecnt) {
160 split1();
161 exit (0);
162 }
163 split2();
164 if (pflag)
165 regfree(&rgx);
166 exit(0);
167 }
168
169 /*
170 * split1 --
171 * Split the input by bytes.
172 */
173 void
split1(void)174 split1(void)
175 {
176 ssize_t bcnt, dist, len;
177 char *C;
178
179 for (bcnt = 0;;)
180 switch ((len = read(ifd, bfr, sizeof(bfr)))) {
181 case 0:
182 exit(0);
183 case -1:
184 err(1, "read");
185 /* NOTREACHED */
186 default:
187 if (!file_open)
188 newfile();
189 if (bcnt + len >= bytecnt) {
190 dist = bytecnt - bcnt;
191 if (write(ofd, bfr, dist) != dist)
192 err(1, "write");
193 len -= dist;
194 for (C = bfr + dist; len >= bytecnt;
195 len -= bytecnt, C += bytecnt) {
196 newfile();
197 if (write(ofd, C, bytecnt) != bytecnt)
198 err(1, "write");
199 }
200 if (len != 0) {
201 newfile();
202 if (write(ofd, C, len) != len)
203 err(1, "write");
204 } else
205 file_open = 0;
206 bcnt = len;
207 } else {
208 bcnt += len;
209 if (write(ofd, bfr, len) != len)
210 err(1, "write");
211 }
212 }
213 }
214
215 /*
216 * split2 --
217 * Split the input by lines.
218 */
219 void
split2(void)220 split2(void)
221 {
222 long lcnt = 0;
223 FILE *infp;
224
225 /* Stick a stream on top of input file descriptor */
226 if ((infp = fdopen(ifd, "r")) == NULL)
227 err(1, "fdopen");
228
229 /* Process input one line at a time */
230 while (fgets(bfr, sizeof(bfr), infp) != NULL) {
231 const int len = strlen(bfr);
232
233 if (len == 0)
234 continue;
235
236 /* If line is too long to deal with, just write it out */
237 if (bfr[len - 1] != '\n')
238 goto writeit;
239
240 /* Check if we need to start a new file */
241 if (pflag) {
242 regmatch_t pmatch;
243
244 pmatch.rm_so = 0;
245 pmatch.rm_eo = len - 1;
246 if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
247 newfile();
248 } else if (lcnt++ == numlines) {
249 newfile();
250 lcnt = 1;
251 }
252
253 writeit:
254 /* Open output file if needed */
255 if (!file_open)
256 newfile();
257
258 /* Write out line */
259 if (write(ofd, bfr, len) != len)
260 err(1, "write");
261 }
262
263 /* EOF or error? */
264 if (ferror(infp))
265 err(1, "read");
266 else
267 exit(0);
268 }
269
270 /*
271 * newfile --
272 * Open a new output file.
273 */
274 void
newfile(void)275 newfile(void)
276 {
277 static char *suffix, *sufftail;
278 char *sptr;
279
280 if (ofd == -1) {
281 ofd = fileno(stdout);
282 if (*fname == '\0') {
283 *fname = 'x'; /* no name specified, use 'x' */
284 memset(fname + 1, 'a', sufflen);
285 suffix = fname;
286 sufflen++; /* treat 'x' as part of suffix */
287 } else {
288 suffix = fname + strlen(fname);
289 memset(suffix, 'a', sufflen);
290 }
291 suffix[sufflen] = '\0';
292 sufftail = suffix + sufflen - 1;
293 } else {
294 for (sptr = sufftail; sptr >= suffix; sptr--) {
295 if (*sptr != 'z') {
296 (*sptr)++;
297 break;
298 } else
299 *sptr = 'a';
300 }
301 if (sptr < suffix)
302 errx(1, "too many files");
303 }
304
305 if (!freopen(fname, "w", stdout))
306 err(1, "%s", fname);
307 file_open = 1;
308 }
309
310 __dead void
usage(void)311 usage(void)
312 {
313 extern char *__progname;
314
315 (void)fprintf(stderr, "usage: %s [-a suffix_length]\n"
316 " [-b byte_count[k|m] | -l line_count | -p pattern] "
317 "[file [name]]\n", __progname);
318 exit(1);
319 }
320