1 /* $OpenBSD: nl.c,v 1.3 2014/05/20 01:25:23 guenther Exp $ */ 2 /* $NetBSD: nl.c,v 1.11 2011/08/16 12:00:46 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1999 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Klaus Klein. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 35 #include <err.h> 36 #include <errno.h> 37 #include <limits.h> 38 #include <locale.h> 39 #include <regex.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 #include <wchar.h> 45 46 typedef enum { 47 number_all, /* number all lines */ 48 number_nonempty, /* number non-empty lines */ 49 number_none, /* no line numbering */ 50 number_regex /* number lines matching regular expression */ 51 } numbering_type; 52 53 struct numbering_property { 54 const char * const name; /* for diagnostics */ 55 numbering_type type; /* numbering type */ 56 regex_t expr; /* for type == number_regex */ 57 }; 58 59 /* line numbering formats */ 60 #define FORMAT_LN "%-*d" /* left justified, leading zeros suppressed */ 61 #define FORMAT_RN "%*d" /* right justified, leading zeros suppressed */ 62 #define FORMAT_RZ "%0*d" /* right justified, leading zeros kept */ 63 64 #define FOOTER 0 65 #define BODY 1 66 #define HEADER 2 67 #define NP_LAST HEADER 68 69 static struct numbering_property numbering_properties[NP_LAST + 1] = { 70 { "footer", number_none, { 0, 0, 0, 0 } }, 71 { "body", number_nonempty, { 0, 0, 0, 0 } }, 72 { "header", number_none, { 0, 0, 0, 0 } }, 73 }; 74 75 void filter(void); 76 void parse_numbering(const char *, int); 77 __dead void usage(void); 78 79 /* 80 * Delimiter characters that indicate the start of a logical page section. 81 */ 82 static char delim[2 * MB_LEN_MAX]; 83 static int delimlen; 84 85 /* 86 * Configurable parameters. 87 */ 88 89 /* line numbering format */ 90 static const char *format = FORMAT_RN; 91 92 /* increment value used to number logical page lines */ 93 static int incr = 1; 94 95 /* number of adjacent blank lines to be considered (and numbered) as one */ 96 static unsigned int nblank = 1; 97 98 /* whether to restart numbering at logical page delimiters */ 99 static int restart = 1; 100 101 /* characters used in separating the line number and the corrsp. text line */ 102 static const char *sep = "\t"; 103 104 /* initial value used to number logical page lines */ 105 static int startnum = 1; 106 107 /* number of characters to be used for the line number */ 108 /* should be unsigned but required signed by `*' precision conversion */ 109 static int width = 6; 110 111 112 int 113 main(int argc, char *argv[]) 114 { 115 int c; 116 size_t clen; 117 char delim1[MB_LEN_MAX] = { '\\' }, delim2[MB_LEN_MAX] = { ':' }; 118 size_t delim1len = 1, delim2len = 1; 119 const char *errstr; 120 121 (void)setlocale(LC_ALL, ""); 122 123 while ((c = getopt(argc, argv, "pb:d:f:h:i:l:n:s:v:w:")) != -1) { 124 switch (c) { 125 case 'p': 126 restart = 0; 127 break; 128 case 'b': 129 parse_numbering(optarg, BODY); 130 break; 131 case 'd': 132 clen = mbrlen(optarg, MB_CUR_MAX, NULL); 133 if (clen == (size_t)-1 || clen == (size_t)-2) 134 errc(EXIT_FAILURE, EILSEQ, NULL); 135 if (clen != 0) { 136 memcpy(delim1, optarg, delim1len = clen); 137 clen = mbrlen(optarg + delim1len, 138 MB_CUR_MAX, NULL); 139 if (clen == (size_t)-1 || clen == (size_t)-2) 140 errc(EXIT_FAILURE, EILSEQ, NULL); 141 if (clen != 0) { 142 memcpy(delim2, optarg + delim1len, 143 delim2len = clen); 144 if (optarg[delim1len + clen] != '\0') { 145 errx(EXIT_FAILURE, 146 "invalid delimiter: %s", 147 optarg); 148 } 149 } 150 } 151 break; 152 case 'f': 153 parse_numbering(optarg, FOOTER); 154 break; 155 case 'h': 156 parse_numbering(optarg, HEADER); 157 break; 158 case 'i': 159 incr = strtonum(optarg, INT_MIN, INT_MAX, &errstr); 160 if (errstr) 161 errx(EXIT_FAILURE, "increment value is %s: %s", 162 errstr, optarg); 163 break; 164 case 'l': 165 nblank = strtonum(optarg, 0, UINT_MAX, &errstr); 166 if (errstr) 167 errx(EXIT_FAILURE, 168 "blank line value is %s: %s", 169 errstr, optarg); 170 break; 171 case 'n': 172 if (strcmp(optarg, "ln") == 0) { 173 format = FORMAT_LN; 174 } else if (strcmp(optarg, "rn") == 0) { 175 format = FORMAT_RN; 176 } else if (strcmp(optarg, "rz") == 0) { 177 format = FORMAT_RZ; 178 } else 179 errx(EXIT_FAILURE, 180 "illegal format -- %s", optarg); 181 break; 182 case 's': 183 sep = optarg; 184 break; 185 case 'v': 186 startnum = strtonum(optarg, INT_MIN, INT_MAX, &errstr); 187 if (errstr) 188 errx(EXIT_FAILURE, 189 "initial logical page value is %s: %s", 190 errstr, optarg); 191 break; 192 case 'w': 193 width = strtonum(optarg, 1, INT_MAX, &errstr); 194 if (errstr) 195 errx(EXIT_FAILURE, "width is %s: %s", errstr, 196 optarg); 197 break; 198 case '?': 199 default: 200 usage(); 201 /* NOTREACHED */ 202 } 203 } 204 argc -= optind; 205 argv += optind; 206 207 switch (argc) { 208 case 0: 209 break; 210 case 1: 211 if (strcmp(argv[0], "-") != 0 && 212 freopen(argv[0], "r", stdin) == NULL) 213 err(EXIT_FAILURE, "%s", argv[0]); 214 break; 215 default: 216 usage(); 217 /* NOTREACHED */ 218 } 219 220 /* Generate the delimiter sequence */ 221 memcpy(delim, delim1, delim1len); 222 memcpy(delim + delim1len, delim2, delim2len); 223 delimlen = delim1len + delim2len; 224 225 /* Do the work. */ 226 filter(); 227 228 exit(EXIT_SUCCESS); 229 } 230 231 void 232 filter(void) 233 { 234 char *buffer; 235 size_t buffersize; 236 ssize_t linelen; 237 int line; /* logical line number */ 238 int section; /* logical page section */ 239 unsigned int adjblank; /* adjacent blank lines */ 240 int donumber = 0, idx; 241 242 adjblank = 0; 243 line = startnum; 244 section = BODY; 245 246 buffer = NULL; 247 buffersize = 0; 248 while ((linelen = getline(&buffer, &buffersize, stdin)) > 0) { 249 for (idx = FOOTER; idx <= NP_LAST; idx++) { 250 /* Does it look like a delimiter? */ 251 if (delimlen * (idx + 1) > linelen) 252 break; 253 if (memcmp(buffer + delimlen * idx, delim, 254 delimlen) != 0) 255 break; 256 /* Was this the whole line? */ 257 if (buffer[delimlen * (idx + 1)] == '\n') { 258 section = idx; 259 adjblank = 0; 260 if (restart) 261 line = startnum; 262 goto nextline; 263 } 264 } 265 266 switch (numbering_properties[section].type) { 267 case number_all: 268 /* 269 * Doing this for number_all only is disputable, but 270 * the standard expresses an explicit dependency on 271 * `-b a' etc. 272 */ 273 if (buffer[0] == '\n' && ++adjblank < nblank) 274 donumber = 0; 275 else 276 donumber = 1, adjblank = 0; 277 break; 278 case number_nonempty: 279 donumber = (buffer[0] != '\n'); 280 break; 281 case number_none: 282 donumber = 0; 283 break; 284 case number_regex: 285 donumber = 286 (regexec(&numbering_properties[section].expr, 287 buffer, 0, NULL, 0) == 0); 288 break; 289 } 290 291 if (donumber) { 292 (void)printf(format, width, line); 293 line += incr; 294 (void)fputs(sep, stdout); 295 } else { 296 (void)printf("%*s", width, ""); 297 } 298 (void)fwrite(buffer, linelen, 1, stdout); 299 300 if (ferror(stdout)) 301 err(EXIT_FAILURE, "output error"); 302 nextline: 303 ; 304 } 305 306 if (ferror(stdin)) 307 err(EXIT_FAILURE, "input error"); 308 309 free(buffer); 310 } 311 312 /* 313 * Various support functions. 314 */ 315 316 void 317 parse_numbering(const char *argstr, int section) 318 { 319 int error; 320 char errorbuf[NL_TEXTMAX]; 321 322 switch (argstr[0]) { 323 case 'a': 324 numbering_properties[section].type = number_all; 325 break; 326 case 'n': 327 numbering_properties[section].type = number_none; 328 break; 329 case 't': 330 numbering_properties[section].type = number_nonempty; 331 break; 332 case 'p': 333 /* If there was a previous expression, throw it away. */ 334 if (numbering_properties[section].type == number_regex) 335 regfree(&numbering_properties[section].expr); 336 else 337 numbering_properties[section].type = number_regex; 338 339 /* Compile/validate the supplied regular expression. */ 340 if ((error = regcomp(&numbering_properties[section].expr, 341 &argstr[1], REG_NEWLINE|REG_NOSUB)) != 0) { 342 (void)regerror(error, 343 &numbering_properties[section].expr, 344 errorbuf, sizeof(errorbuf)); 345 errx(EXIT_FAILURE, 346 "%s expr: %s -- %s", 347 numbering_properties[section].name, errorbuf, 348 &argstr[1]); 349 } 350 break; 351 default: 352 errx(EXIT_FAILURE, 353 "illegal %s line numbering type -- %s", 354 numbering_properties[section].name, argstr); 355 } 356 } 357 358 __dead void 359 usage(void) 360 { 361 (void)fprintf(stderr, "usage: %s [-p] [-b type] [-d delim] [-f type] " 362 "[-h type] [-i incr] [-l num]\n\t[-n format] [-s sep] " 363 "[-v startnum] [-w width] [file]\n", getprogname()); 364 exit(EXIT_FAILURE); 365 } 366