1 /* $OpenBSD: split.c,v 1.23 2021/11/28 19:28:42 deraadt Exp $ */ 2 /* $NetBSD: split.c,v 1.5 1995/08/31 22:22:05 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1987, 1993, 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/types.h> 34 35 #include <ctype.h> 36 #include <err.h> 37 #include <fcntl.h> 38 #include <limits.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <unistd.h> 43 #include <regex.h> 44 45 #define _MAXBSIZE (64 * 1024) 46 47 #define DEFLINE 1000 /* Default num lines per file. */ 48 49 ssize_t bytecnt; /* Byte count to split on. */ 50 long numlines; /* Line count to split on. */ 51 int file_open; /* If a file open. */ 52 int ifd = -1, ofd = -1; /* Input/output file descriptors. */ 53 char bfr[_MAXBSIZE]; /* I/O buffer. */ 54 char fname[PATH_MAX]; /* File name prefix. */ 55 regex_t rgx; 56 int pflag; 57 int sufflen = 2; /* File name suffix length. */ 58 59 void newfile(void); 60 void split1(void); 61 void split2(void); 62 __dead void usage(void); 63 64 int 65 main(int argc, char *argv[]) 66 { 67 int ch, scale; 68 char *ep, *p; 69 const char *errstr; 70 71 if (pledge("stdio rpath wpath cpath", NULL) == -1) 72 err(1, "pledge"); 73 74 while ((ch = getopt(argc, argv, "0123456789a:b:l:p:-")) != -1) 75 switch (ch) { 76 case '0': case '1': case '2': case '3': case '4': 77 case '5': case '6': case '7': case '8': case '9': 78 /* 79 * Undocumented kludge: split was originally designed 80 * to take a number after a dash. 81 */ 82 if (numlines == 0) { 83 p = argv[optind - 1]; 84 if (p[0] == '-' && p[1] == ch && !p[2]) 85 numlines = strtol(++p, &ep, 10); 86 else 87 numlines = 88 strtol(argv[optind] + 1, &ep, 10); 89 if (numlines <= 0 || *ep) 90 errx(1, "%s: illegal line count", 91 optarg); 92 } 93 break; 94 case '-': /* Undocumented: historic stdin flag. */ 95 if (ifd != -1) 96 usage(); 97 ifd = 0; 98 break; 99 case 'a': /* suffix length. */ 100 sufflen = strtonum(optarg, 1, NAME_MAX, &errstr); 101 if (errstr) 102 errx(1, "%s: %s", optarg, errstr); 103 break; 104 case 'b': /* Byte count. */ 105 if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 || 106 (*ep != '\0' && *ep != 'k' && *ep != 'm')) 107 errx(1, "%s: illegal byte count", optarg); 108 if (*ep == 'k') 109 scale = 1024; 110 else if (*ep == 'm') 111 scale = 1048576; 112 else 113 scale = 1; 114 if (bytecnt > SSIZE_MAX / scale) 115 errx(1, "%s: byte count too large", optarg); 116 bytecnt *= scale; 117 break; 118 case 'p' : /* pattern matching. */ 119 if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) 120 errx(1, "%s: illegal regexp", optarg); 121 pflag = 1; 122 break; 123 case 'l': /* Line count. */ 124 if (numlines != 0) 125 usage(); 126 if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) 127 errx(1, "%s: illegal line count", optarg); 128 break; 129 default: 130 usage(); 131 } 132 argv += optind; 133 argc -= optind; 134 135 if (*argv != NULL) 136 if (ifd == -1) { /* Input file. */ 137 if ((ifd = open(*argv, O_RDONLY)) < 0) 138 err(1, "%s", *argv); 139 ++argv; 140 } 141 if (*argv != NULL) /* File name prefix. */ 142 (void)strlcpy(fname, *argv++, sizeof(fname)); 143 if (*argv != NULL) 144 usage(); 145 146 if (strlen(fname) + sufflen >= sizeof(fname)) 147 errx(1, "suffix is too long"); 148 if (pflag && (numlines != 0 || bytecnt != 0)) 149 usage(); 150 151 if (numlines == 0) 152 numlines = DEFLINE; 153 else if (bytecnt != 0) 154 usage(); 155 156 if (ifd == -1) /* Stdin by default. */ 157 ifd = 0; 158 159 if (bytecnt) { 160 split1(); 161 exit (0); 162 } 163 split2(); 164 if (pflag) 165 regfree(&rgx); 166 exit(0); 167 } 168 169 /* 170 * split1 -- 171 * Split the input by bytes. 172 */ 173 void 174 split1(void) 175 { 176 ssize_t bcnt, dist, len; 177 char *C; 178 179 for (bcnt = 0;;) 180 switch ((len = read(ifd, bfr, sizeof(bfr)))) { 181 case 0: 182 exit(0); 183 case -1: 184 err(1, "read"); 185 /* NOTREACHED */ 186 default: 187 if (!file_open) 188 newfile(); 189 if (bcnt + len >= bytecnt) { 190 dist = bytecnt - bcnt; 191 if (write(ofd, bfr, dist) != dist) 192 err(1, "write"); 193 len -= dist; 194 for (C = bfr + dist; len >= bytecnt; 195 len -= bytecnt, C += bytecnt) { 196 newfile(); 197 if (write(ofd, C, bytecnt) != bytecnt) 198 err(1, "write"); 199 } 200 if (len != 0) { 201 newfile(); 202 if (write(ofd, C, len) != len) 203 err(1, "write"); 204 } else 205 file_open = 0; 206 bcnt = len; 207 } else { 208 bcnt += len; 209 if (write(ofd, bfr, len) != len) 210 err(1, "write"); 211 } 212 } 213 } 214 215 /* 216 * split2 -- 217 * Split the input by lines. 218 */ 219 void 220 split2(void) 221 { 222 long lcnt = 0; 223 FILE *infp; 224 225 /* Stick a stream on top of input file descriptor */ 226 if ((infp = fdopen(ifd, "r")) == NULL) 227 err(1, "fdopen"); 228 229 /* Process input one line at a time */ 230 while (fgets(bfr, sizeof(bfr), infp) != NULL) { 231 const int len = strlen(bfr); 232 233 if (len == 0) 234 continue; 235 236 /* If line is too long to deal with, just write it out */ 237 if (bfr[len - 1] != '\n') 238 goto writeit; 239 240 /* Check if we need to start a new file */ 241 if (pflag) { 242 regmatch_t pmatch; 243 244 pmatch.rm_so = 0; 245 pmatch.rm_eo = len - 1; 246 if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) 247 newfile(); 248 } else if (lcnt++ == numlines) { 249 newfile(); 250 lcnt = 1; 251 } 252 253 writeit: 254 /* Open output file if needed */ 255 if (!file_open) 256 newfile(); 257 258 /* Write out line */ 259 if (write(ofd, bfr, len) != len) 260 err(1, "write"); 261 } 262 263 /* EOF or error? */ 264 if (ferror(infp)) 265 err(1, "read"); 266 else 267 exit(0); 268 } 269 270 /* 271 * newfile -- 272 * Open a new output file. 273 */ 274 void 275 newfile(void) 276 { 277 static char *suffix, *sufftail; 278 char *sptr; 279 280 if (ofd == -1) { 281 ofd = fileno(stdout); 282 if (*fname == '\0') { 283 *fname = 'x'; /* no name specified, use 'x' */ 284 memset(fname + 1, 'a', sufflen); 285 suffix = fname; 286 sufflen++; /* treat 'x' as part of suffix */ 287 } else { 288 suffix = fname + strlen(fname); 289 memset(suffix, 'a', sufflen); 290 } 291 suffix[sufflen] = '\0'; 292 sufftail = suffix + sufflen - 1; 293 } else { 294 for (sptr = sufftail; sptr >= suffix; sptr--) { 295 if (*sptr != 'z') { 296 (*sptr)++; 297 break; 298 } else 299 *sptr = 'a'; 300 } 301 if (sptr < suffix) 302 errx(1, "too many files"); 303 } 304 305 if (!freopen(fname, "w", stdout)) 306 err(1, "%s", fname); 307 file_open = 1; 308 } 309 310 __dead void 311 usage(void) 312 { 313 extern char *__progname; 314 315 (void)fprintf(stderr, "usage: %s [-a suffix_length]\n" 316 " [-b byte_count[k|m] | -l line_count | -p pattern] " 317 "[file [name]]\n", __progname); 318 exit(1); 319 } 320