1 /* $OpenBSD: nl.c,v 1.6 2015/10/09 01:37:08 deraadt Exp $ */ 2 /* $NetBSD: nl.c,v 1.11 2011/08/16 12:00:46 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1999 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Klaus Klein. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <err.h> 34 #include <errno.h> 35 #include <limits.h> 36 #include <locale.h> 37 #include <regex.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 #include <wchar.h> 43 44 typedef enum { 45 number_all, /* number all lines */ 46 number_nonempty, /* number non-empty lines */ 47 number_none, /* no line numbering */ 48 number_regex /* number lines matching regular expression */ 49 } numbering_type; 50 51 struct numbering_property { 52 const char * const name; /* for diagnostics */ 53 numbering_type type; /* numbering type */ 54 regex_t expr; /* for type == number_regex */ 55 }; 56 57 /* line numbering formats */ 58 #define FORMAT_LN "%-*d" /* left justified, leading zeros suppressed */ 59 #define FORMAT_RN "%*d" /* right justified, leading zeros suppressed */ 60 #define FORMAT_RZ "%0*d" /* right justified, leading zeros kept */ 61 62 #define FOOTER 0 63 #define BODY 1 64 #define HEADER 2 65 #define NP_LAST HEADER 66 67 static struct numbering_property numbering_properties[NP_LAST + 1] = { 68 { "footer", number_none, { 0, 0, 0, 0 } }, 69 { "body", number_nonempty, { 0, 0, 0, 0 } }, 70 { "header", number_none, { 0, 0, 0, 0 } }, 71 }; 72 73 void filter(void); 74 void parse_numbering(const char *, int); 75 __dead void usage(void); 76 77 /* 78 * Delimiter characters that indicate the start of a logical page section. 79 */ 80 static char delim[2 * MB_LEN_MAX]; 81 static int delimlen; 82 83 /* 84 * Configurable parameters. 85 */ 86 87 /* line numbering format */ 88 static const char *format = FORMAT_RN; 89 90 /* increment value used to number logical page lines */ 91 static int incr = 1; 92 93 /* number of adjacent blank lines to be considered (and numbered) as one */ 94 static unsigned int nblank = 1; 95 96 /* whether to restart numbering at logical page delimiters */ 97 static int restart = 1; 98 99 /* characters used in separating the line number and the corrsp. text line */ 100 static const char *sep = "\t"; 101 102 /* initial value used to number logical page lines */ 103 static int startnum = 1; 104 105 /* number of characters to be used for the line number */ 106 /* should be unsigned but required signed by `*' precision conversion */ 107 static int width = 6; 108 109 110 int 111 main(int argc, char *argv[]) 112 { 113 int c; 114 size_t clen; 115 char delim1[MB_LEN_MAX] = { '\\' }, delim2[MB_LEN_MAX] = { ':' }; 116 size_t delim1len = 1, delim2len = 1; 117 const char *errstr; 118 119 (void)setlocale(LC_ALL, ""); 120 121 if (pledge("stdio rpath", NULL) == -1) 122 err(1, "pledge"); 123 124 while ((c = getopt(argc, argv, "pb:d:f:h:i:l:n:s:v:w:")) != -1) { 125 switch (c) { 126 case 'p': 127 restart = 0; 128 break; 129 case 'b': 130 parse_numbering(optarg, BODY); 131 break; 132 case 'd': 133 clen = mbrlen(optarg, MB_CUR_MAX, NULL); 134 if (clen == (size_t)-1 || clen == (size_t)-2) 135 errc(EXIT_FAILURE, EILSEQ, NULL); 136 if (clen != 0) { 137 memcpy(delim1, optarg, delim1len = clen); 138 clen = mbrlen(optarg + delim1len, 139 MB_CUR_MAX, NULL); 140 if (clen == (size_t)-1 || clen == (size_t)-2) 141 errc(EXIT_FAILURE, EILSEQ, NULL); 142 if (clen != 0) { 143 memcpy(delim2, optarg + delim1len, 144 delim2len = clen); 145 if (optarg[delim1len + clen] != '\0') { 146 errx(EXIT_FAILURE, 147 "invalid delimiter: %s", 148 optarg); 149 } 150 } 151 } 152 break; 153 case 'f': 154 parse_numbering(optarg, FOOTER); 155 break; 156 case 'h': 157 parse_numbering(optarg, HEADER); 158 break; 159 case 'i': 160 incr = strtonum(optarg, INT_MIN, INT_MAX, &errstr); 161 if (errstr) 162 errx(EXIT_FAILURE, "increment value is %s: %s", 163 errstr, optarg); 164 break; 165 case 'l': 166 nblank = strtonum(optarg, 0, UINT_MAX, &errstr); 167 if (errstr) 168 errx(EXIT_FAILURE, 169 "blank line value is %s: %s", 170 errstr, optarg); 171 break; 172 case 'n': 173 if (strcmp(optarg, "ln") == 0) { 174 format = FORMAT_LN; 175 } else if (strcmp(optarg, "rn") == 0) { 176 format = FORMAT_RN; 177 } else if (strcmp(optarg, "rz") == 0) { 178 format = FORMAT_RZ; 179 } else 180 errx(EXIT_FAILURE, 181 "illegal format -- %s", optarg); 182 break; 183 case 's': 184 sep = optarg; 185 break; 186 case 'v': 187 startnum = strtonum(optarg, INT_MIN, INT_MAX, &errstr); 188 if (errstr) 189 errx(EXIT_FAILURE, 190 "initial logical page value is %s: %s", 191 errstr, optarg); 192 break; 193 case 'w': 194 width = strtonum(optarg, 1, INT_MAX, &errstr); 195 if (errstr) 196 errx(EXIT_FAILURE, "width is %s: %s", errstr, 197 optarg); 198 break; 199 case '?': 200 default: 201 usage(); 202 /* NOTREACHED */ 203 } 204 } 205 argc -= optind; 206 argv += optind; 207 208 switch (argc) { 209 case 0: 210 break; 211 case 1: 212 if (strcmp(argv[0], "-") != 0 && 213 freopen(argv[0], "r", stdin) == NULL) 214 err(EXIT_FAILURE, "%s", argv[0]); 215 break; 216 default: 217 usage(); 218 /* NOTREACHED */ 219 } 220 221 /* Generate the delimiter sequence */ 222 memcpy(delim, delim1, delim1len); 223 memcpy(delim + delim1len, delim2, delim2len); 224 delimlen = delim1len + delim2len; 225 226 /* Do the work. */ 227 filter(); 228 229 exit(EXIT_SUCCESS); 230 } 231 232 void 233 filter(void) 234 { 235 char *buffer; 236 size_t buffersize; 237 ssize_t linelen; 238 int line; /* logical line number */ 239 int section; /* logical page section */ 240 unsigned int adjblank; /* adjacent blank lines */ 241 int donumber = 0, idx; 242 243 adjblank = 0; 244 line = startnum; 245 section = BODY; 246 247 buffer = NULL; 248 buffersize = 0; 249 while ((linelen = getline(&buffer, &buffersize, stdin)) > 0) { 250 for (idx = FOOTER; idx <= NP_LAST; idx++) { 251 /* Does it look like a delimiter? */ 252 if (delimlen * (idx + 1) > linelen) 253 break; 254 if (memcmp(buffer + delimlen * idx, delim, 255 delimlen) != 0) 256 break; 257 /* Was this the whole line? */ 258 if (buffer[delimlen * (idx + 1)] == '\n') { 259 section = idx; 260 adjblank = 0; 261 if (restart) 262 line = startnum; 263 goto nextline; 264 } 265 } 266 267 switch (numbering_properties[section].type) { 268 case number_all: 269 /* 270 * Doing this for number_all only is disputable, but 271 * the standard expresses an explicit dependency on 272 * `-b a' etc. 273 */ 274 if (buffer[0] == '\n' && ++adjblank < nblank) 275 donumber = 0; 276 else 277 donumber = 1, adjblank = 0; 278 break; 279 case number_nonempty: 280 donumber = (buffer[0] != '\n'); 281 break; 282 case number_none: 283 donumber = 0; 284 break; 285 case number_regex: 286 donumber = 287 (regexec(&numbering_properties[section].expr, 288 buffer, 0, NULL, 0) == 0); 289 break; 290 } 291 292 if (donumber) { 293 (void)printf(format, width, line); 294 line += incr; 295 (void)fputs(sep, stdout); 296 } else { 297 (void)printf("%*s", width, ""); 298 } 299 (void)fwrite(buffer, linelen, 1, stdout); 300 301 if (ferror(stdout)) 302 err(EXIT_FAILURE, "output error"); 303 nextline: 304 ; 305 } 306 307 if (ferror(stdin)) 308 err(EXIT_FAILURE, "input error"); 309 310 free(buffer); 311 } 312 313 /* 314 * Various support functions. 315 */ 316 317 void 318 parse_numbering(const char *argstr, int section) 319 { 320 int error; 321 char errorbuf[NL_TEXTMAX]; 322 323 switch (argstr[0]) { 324 case 'a': 325 numbering_properties[section].type = number_all; 326 break; 327 case 'n': 328 numbering_properties[section].type = number_none; 329 break; 330 case 't': 331 numbering_properties[section].type = number_nonempty; 332 break; 333 case 'p': 334 /* If there was a previous expression, throw it away. */ 335 if (numbering_properties[section].type == number_regex) 336 regfree(&numbering_properties[section].expr); 337 else 338 numbering_properties[section].type = number_regex; 339 340 /* Compile/validate the supplied regular expression. */ 341 if ((error = regcomp(&numbering_properties[section].expr, 342 &argstr[1], REG_NEWLINE|REG_NOSUB)) != 0) { 343 (void)regerror(error, 344 &numbering_properties[section].expr, 345 errorbuf, sizeof(errorbuf)); 346 errx(EXIT_FAILURE, 347 "%s expr: %s -- %s", 348 numbering_properties[section].name, errorbuf, 349 &argstr[1]); 350 } 351 break; 352 default: 353 errx(EXIT_FAILURE, 354 "illegal %s line numbering type -- %s", 355 numbering_properties[section].name, argstr); 356 } 357 } 358 359 __dead void 360 usage(void) 361 { 362 (void)fprintf(stderr, "usage: %s [-p] [-b type] [-d delim] [-f type] " 363 "[-h type] [-i incr] [-l num]\n\t[-n format] [-s sep] " 364 "[-v startnum] [-w width] [file]\n", getprogname()); 365 exit(EXIT_FAILURE); 366 } 367