1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#) Copyright (c) 1989, 1993 The Regents of the University of California. All rights reserved. 37 * @(#)cut.c 8.3 (Berkeley) 5/4/95 38 * $FreeBSD: src/usr.bin/cut/cut.c,v 1.9.2.3 2001/07/30 09:59:16 dd Exp $ 39 * $DragonFly: src/usr.bin/cut/cut.c,v 1.4 2007/11/06 05:50:23 hsu Exp $ 40 */ 41 42 #include <ctype.h> 43 #include <err.h> 44 #include <limits.h> 45 #include <locale.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 51 int cflag; 52 char dchar; 53 int dflag; 54 int fflag; 55 int sflag; 56 int wflag; 57 58 void c_cut (FILE *, const char *); 59 void f_cut (FILE *, const char *); 60 void get_list (char *); 61 int main (int, char **); 62 static void usage (void); 63 64 int 65 main(int argc, char **argv) 66 { 67 FILE *fp; 68 void (*fcn) (FILE *, const char *) = NULL; 69 int ch; 70 71 fcn = NULL; 72 setlocale (LC_ALL, ""); 73 74 dchar = '\t'; /* default delimiter is \t */ 75 76 /* Since we don't support multi-byte characters, the -c and -b 77 options are equivalent, and the -n option is meaningless. */ 78 while ((ch = getopt(argc, argv, "b:c:d:f:snw")) != -1) 79 switch(ch) { 80 case 'b': 81 case 'c': 82 fcn = c_cut; 83 get_list(optarg); 84 cflag = 1; 85 break; 86 case 'd': 87 dchar = *optarg; 88 dflag = 1; 89 break; 90 case 'f': 91 get_list(optarg); 92 fcn = f_cut; 93 fflag = 1; 94 break; 95 case 's': 96 sflag = 1; 97 break; 98 case 'n': 99 break; 100 case 'w': 101 wflag = 1; 102 break; 103 case '?': 104 default: 105 usage(); 106 } 107 argc -= optind; 108 argv += optind; 109 110 if (fflag) { 111 if (cflag || (wflag && dflag)) 112 usage(); 113 } else if (!cflag || dflag || sflag || wflag) 114 usage(); 115 116 if (*argv) 117 for (; *argv; ++argv) { 118 if (!(fp = fopen(*argv, "r"))) 119 err(1, "%s", *argv); 120 fcn(fp, *argv); 121 (void)fclose(fp); 122 } 123 else 124 fcn(stdin, "stdin"); 125 exit(0); 126 } 127 128 size_t autostart, autostop, maxval; 129 130 char positions[_POSIX2_LINE_MAX + 1]; 131 132 void 133 get_list(char *list) 134 { 135 size_t setautostart, start, stop; 136 char *pos; 137 char *p; 138 139 /* 140 * set a byte in the positions array to indicate if a field or 141 * column is to be selected; use +1, it's 1-based, not 0-based. 142 * This parser is less restrictive than the Draft 9 POSIX spec. 143 * POSIX doesn't allow lists that aren't in increasing order or 144 * overlapping lists. We also handle "-3-5" although there's no 145 * real reason too. 146 */ 147 for (; (p = strsep(&list, ", \t")) != NULL;) { 148 setautostart = start = stop = 0; 149 if (*p == '-') { 150 ++p; 151 setautostart = 1; 152 } 153 if (isdigit((unsigned char)*p)) { 154 start = stop = strtol(p, &p, 10); 155 if (setautostart && start > autostart) 156 autostart = start; 157 } 158 if (*p == '-') { 159 if (isdigit((unsigned char)p[1])) 160 stop = strtol(p + 1, &p, 10); 161 if (*p == '-') { 162 ++p; 163 if (!autostop || autostop > stop) 164 autostop = stop; 165 } 166 } 167 if (*p) 168 errx(1, "[-cf] list: illegal list value"); 169 if (!stop || !start) 170 errx(1, "[-cf] list: values may not include zero"); 171 if (stop > _POSIX2_LINE_MAX) 172 errx(1, "[-cf] list: %ld too large (max %d)", 173 (long)stop, _POSIX2_LINE_MAX); 174 if (maxval < stop) 175 maxval = stop; 176 for (pos = positions + start; start++ <= stop; *pos++ = 1); 177 } 178 179 /* overlapping ranges */ 180 if (autostop && maxval > autostop) 181 maxval = autostop; 182 183 /* set autostart */ 184 if (autostart) 185 memset(positions + 1, '1', autostart); 186 } 187 188 /* ARGSUSED */ 189 void 190 c_cut(FILE *fp, const char *fname) 191 { 192 int ch, col; 193 char *pos; 194 fname = NULL; 195 196 ch = 0; 197 for (;;) { 198 pos = positions + 1; 199 for (col = maxval; col; --col) { 200 if ((ch = getc(fp)) == EOF) 201 return; 202 if (ch == '\n') 203 break; 204 if (*pos++) 205 (void)putchar(ch); 206 } 207 if (ch != '\n') { 208 if (autostop) 209 while ((ch = getc(fp)) != EOF && ch != '\n') 210 (void)putchar(ch); 211 else 212 while ((ch = getc(fp)) != EOF && ch != '\n'); 213 } 214 (void)putchar('\n'); 215 } 216 } 217 218 int 219 is_delim(int ch) 220 { 221 if (wflag) { 222 if (ch == ' ' || ch == '\t') 223 return 1; 224 } else { 225 if (ch == dchar) 226 return 1; 227 } 228 return 0; 229 } 230 231 void 232 f_cut(FILE *fp, const char *fname __unused) 233 { 234 int ch, field, isdelim; 235 char *pos, *p, sep; 236 int output; 237 char *lbuf, *mlbuf = NULL; 238 size_t lbuflen; 239 240 sep = wflag ? ' ' : dchar; 241 while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 242 /* Assert EOL has a newline. */ 243 if (*(lbuf + lbuflen - 1) != '\n') { 244 /* Can't have > 1 line with no trailing newline. */ 245 mlbuf = malloc(lbuflen + 1); 246 if (mlbuf == NULL) 247 err(1, "malloc"); 248 memcpy(mlbuf, lbuf, lbuflen); 249 *(mlbuf + lbuflen) = '\n'; 250 lbuf = mlbuf; 251 } 252 output = 0; 253 for (isdelim = 0, p = lbuf;; ++p) { 254 ch = *p; 255 /* this should work if newline is delimiter */ 256 if (is_delim(ch)) 257 isdelim = 1; 258 if (ch == '\n') { 259 if (!isdelim && !sflag) 260 (void)fwrite(lbuf, lbuflen, 1, stdout); 261 break; 262 } 263 } 264 if (!isdelim) 265 continue; 266 267 pos = positions + 1; 268 for (field = maxval, p = lbuf; field; --field, ++pos) { 269 if (*pos) { 270 if (output++) 271 (void)putchar(sep); 272 while ((ch = *p++) != '\n' && !is_delim(ch)) 273 (void)putchar(ch); 274 /* compress whitespace */ 275 if (wflag && ch != '\n') 276 while (is_delim(*p)) p++; 277 } else { 278 while ((ch = *p++) != '\n' && !is_delim(ch)) 279 continue; 280 /* compress whitespace */ 281 if (wflag && ch != '\n') 282 while (is_delim(*p)) p++; 283 } 284 if (ch == '\n') 285 break; 286 } 287 if (ch != '\n') { 288 if (autostop) { 289 if (output) 290 (void)putchar(sep); 291 for (; (ch = *p) != '\n'; ++p) 292 (void)putchar(ch); 293 } else 294 for (; (ch = *p) != '\n'; ++p); 295 } 296 (void)putchar('\n'); 297 } 298 if (mlbuf != NULL) 299 free(mlbuf); 300 } 301 302 static void 303 usage(void) 304 { 305 (void)fprintf(stderr, "%s\n%s\n%s\n", 306 "usage: cut -b list [-n] [file ...]", 307 " cut -c list [file ...]", 308 " cut -f list [-s] [-w | -d delim] [file ...]"); 309 exit(1); 310 } 311