1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Klaus Klein.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/types.h>
33
34 #include <err.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <locale.h>
38 #include <regex.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43 #include <wchar.h>
44
45 typedef enum {
46 number_all, /* number all lines */
47 number_nonempty, /* number non-empty lines */
48 number_none, /* no line numbering */
49 number_regex /* number lines matching regular expression */
50 } numbering_type;
51
52 struct numbering_property {
53 const char * const name; /* for diagnostics */
54 numbering_type type; /* numbering type */
55 regex_t expr; /* for type == number_regex */
56 };
57
58 /* line numbering formats */
59 #define FORMAT_LN "%-*d" /* left justified, leading zeros suppressed */
60 #define FORMAT_RN "%*d" /* right justified, leading zeros suppressed */
61 #define FORMAT_RZ "%0*d" /* right justified, leading zeros kept */
62
63 #define FOOTER 0
64 #define BODY 1
65 #define HEADER 2
66 #define NP_LAST HEADER
67
68 static struct numbering_property numbering_properties[NP_LAST + 1] = {
69 { .name = "footer", .type = number_none },
70 { .name = "body", .type = number_nonempty },
71 { .name = "header", .type = number_none }
72 };
73
74 #define max(a, b) ((a) > (b) ? (a) : (b))
75
76 /*
77 * Maximum number of characters required for a decimal representation of a
78 * (signed) int; courtesy of tzcode.
79 */
80 #define INT_STRLEN_MAXIMUM \
81 ((sizeof (int) * CHAR_BIT - 1) * 302 / 1000 + 2)
82
83 static void filter(void);
84 static void parse_numbering(const char *, int);
85 static void usage(void);
86
87 /*
88 * Dynamically allocated buffer suitable for string representation of ints.
89 */
90 static char *intbuffer;
91
92 /* delimiter characters that indicate the start of a logical page section */
93 static char delim[2 * MB_LEN_MAX];
94 static int delimlen;
95
96 /*
97 * Configurable parameters.
98 */
99
100 /* line numbering format */
101 static const char *format = FORMAT_RN;
102
103 /* increment value used to number logical page lines */
104 static int incr = 1;
105
106 /* number of adjacent blank lines to be considered (and numbered) as one */
107 static unsigned int nblank = 1;
108
109 /* whether to restart numbering at logical page delimiters */
110 static int restart = 1;
111
112 /* characters used in separating the line number and the corrsp. text line */
113 static const char *sep = "\t";
114
115 /* initial value used to number logical page lines */
116 static int startnum = 1;
117
118 /* number of characters to be used for the line number */
119 /* should be unsigned but required signed by `*' precision conversion */
120 static int width = 6;
121
122
123 int
main(int argc,char * argv[])124 main(int argc, char *argv[])
125 {
126 int c;
127 long val;
128 unsigned long uval;
129 char *ep;
130 size_t intbuffersize, clen;
131 char delim1[MB_LEN_MAX] = { '\\' }, delim2[MB_LEN_MAX] = { ':' };
132 size_t delim1len = 1, delim2len = 1;
133
134 (void)setlocale(LC_ALL, "");
135
136 while ((c = getopt(argc, argv, "pb:d:f:h:i:l:n:s:v:w:")) != -1) {
137 switch (c) {
138 case 'p':
139 restart = 0;
140 break;
141 case 'b':
142 parse_numbering(optarg, BODY);
143 break;
144 case 'd':
145 clen = mbrlen(optarg, MB_CUR_MAX, NULL);
146 if (clen == (size_t)-1 || clen == (size_t)-2)
147 errc(EXIT_FAILURE, EILSEQ, NULL);
148 if (clen != 0) {
149 memcpy(delim1, optarg, delim1len = clen);
150 clen = mbrlen(optarg + delim1len,
151 MB_CUR_MAX, NULL);
152 if (clen == (size_t)-1 ||
153 clen == (size_t)-2)
154 errc(EXIT_FAILURE, EILSEQ, NULL);
155 if (clen != 0) {
156 memcpy(delim2, optarg + delim1len,
157 delim2len = clen);
158 if (optarg[delim1len + clen] != '\0')
159 errx(EXIT_FAILURE,
160 "invalid delim argument -- %s",
161 optarg);
162 }
163 }
164 break;
165 case 'f':
166 parse_numbering(optarg, FOOTER);
167 break;
168 case 'h':
169 parse_numbering(optarg, HEADER);
170 break;
171 case 'i':
172 errno = 0;
173 val = strtol(optarg, &ep, 10);
174 if ((ep != NULL && *ep != '\0') ||
175 ((val == LONG_MIN || val == LONG_MAX) && errno != 0))
176 errx(EXIT_FAILURE,
177 "invalid incr argument -- %s", optarg);
178 incr = (int)val;
179 break;
180 case 'l':
181 errno = 0;
182 uval = strtoul(optarg, &ep, 10);
183 if ((ep != NULL && *ep != '\0') ||
184 (uval == ULONG_MAX && errno != 0))
185 errx(EXIT_FAILURE,
186 "invalid num argument -- %s", optarg);
187 nblank = (unsigned int)uval;
188 break;
189 case 'n':
190 if (strcmp(optarg, "ln") == 0) {
191 format = FORMAT_LN;
192 } else if (strcmp(optarg, "rn") == 0) {
193 format = FORMAT_RN;
194 } else if (strcmp(optarg, "rz") == 0) {
195 format = FORMAT_RZ;
196 } else
197 errx(EXIT_FAILURE,
198 "illegal format -- %s", optarg);
199 break;
200 case 's':
201 sep = optarg;
202 break;
203 case 'v':
204 errno = 0;
205 val = strtol(optarg, &ep, 10);
206 if ((ep != NULL && *ep != '\0') ||
207 ((val == LONG_MIN || val == LONG_MAX) && errno != 0))
208 errx(EXIT_FAILURE,
209 "invalid startnum value -- %s", optarg);
210 startnum = (int)val;
211 break;
212 case 'w':
213 errno = 0;
214 val = strtol(optarg, &ep, 10);
215 if ((ep != NULL && *ep != '\0') ||
216 ((val == LONG_MIN || val == LONG_MAX) && errno != 0))
217 errx(EXIT_FAILURE,
218 "invalid width value -- %s", optarg);
219 width = (int)val;
220 if (!(width > 0))
221 errx(EXIT_FAILURE,
222 "width argument must be > 0 -- %d",
223 width);
224 break;
225 case '?':
226 default:
227 usage();
228 /* NOTREACHED */
229 }
230 }
231 argc -= optind;
232 argv += optind;
233
234 switch (argc) {
235 case 0:
236 break;
237 case 1:
238 if (strcmp(argv[0], "-") != 0 &&
239 freopen(argv[0], "r", stdin) == NULL)
240 err(EXIT_FAILURE, "%s", argv[0]);
241 break;
242 default:
243 usage();
244 /* NOTREACHED */
245 }
246
247 /* Generate the delimiter sequence */
248 memcpy(delim, delim1, delim1len);
249 memcpy(delim + delim1len, delim2, delim2len);
250 delimlen = delim1len + delim2len;
251
252 /* Allocate a buffer suitable for preformatting line number. */
253 intbuffersize = max((int)INT_STRLEN_MAXIMUM, width) + 1; /* NUL */
254 if ((intbuffer = malloc(intbuffersize)) == NULL)
255 err(EXIT_FAILURE, "cannot allocate preformatting buffer");
256
257 /* Do the work. */
258 filter();
259
260 exit(EXIT_SUCCESS);
261 /* NOTREACHED */
262 }
263
264 static void
filter(void)265 filter(void)
266 {
267 char *buffer;
268 size_t buffersize;
269 ssize_t linelen;
270 int line; /* logical line number */
271 int section; /* logical page section */
272 unsigned int adjblank; /* adjacent blank lines */
273 int consumed; /* intbuffer measurement */
274 int donumber = 0, idx;
275
276 adjblank = 0;
277 line = startnum;
278 section = BODY;
279
280 buffer = NULL;
281 buffersize = 0;
282 while ((linelen = getline(&buffer, &buffersize, stdin)) > 0) {
283 for (idx = FOOTER; idx <= NP_LAST; idx++) {
284 /* Does it look like a delimiter? */
285 if (delimlen * (idx + 1) > linelen)
286 break;
287 if (memcmp(buffer + delimlen * idx, delim,
288 delimlen) != 0)
289 break;
290 /* Was this the whole line? */
291 if (buffer[delimlen * (idx + 1)] == '\n') {
292 section = idx;
293 adjblank = 0;
294 if (restart)
295 line = startnum;
296 goto nextline;
297 }
298 }
299
300 switch (numbering_properties[section].type) {
301 case number_all:
302 /*
303 * Doing this for number_all only is disputable, but
304 * the standard expresses an explicit dependency on
305 * `-b a' etc.
306 */
307 if (buffer[0] == '\n' && ++adjblank < nblank)
308 donumber = 0;
309 else
310 donumber = 1, adjblank = 0;
311 break;
312 case number_nonempty:
313 donumber = (buffer[0] != '\n');
314 break;
315 case number_none:
316 donumber = 0;
317 break;
318 case number_regex:
319 donumber =
320 (regexec(&numbering_properties[section].expr,
321 buffer, 0, NULL, 0) == 0);
322 break;
323 }
324
325 if (donumber) {
326 /* Note: sprintf() is safe here. */
327 consumed = sprintf(intbuffer, format, width, line);
328 (void)printf("%s",
329 intbuffer + max(0, consumed - width));
330 line += incr;
331 } else {
332 (void)printf("%*s", width, "");
333 }
334 (void)fputs(sep, stdout);
335 (void)fwrite(buffer, linelen, 1, stdout);
336
337 if (ferror(stdout))
338 err(EXIT_FAILURE, "output error");
339 nextline:
340 ;
341 }
342
343 if (ferror(stdin))
344 err(EXIT_FAILURE, "input error");
345
346 free(buffer);
347 }
348
349 /*
350 * Various support functions.
351 */
352
353 static void
parse_numbering(const char * argstr,int section)354 parse_numbering(const char *argstr, int section)
355 {
356 int error;
357 char errorbuf[NL_TEXTMAX];
358
359 switch (argstr[0]) {
360 case 'a':
361 numbering_properties[section].type = number_all;
362 break;
363 case 'n':
364 numbering_properties[section].type = number_none;
365 break;
366 case 't':
367 numbering_properties[section].type = number_nonempty;
368 break;
369 case 'p':
370 /* If there was a previous expression, throw it away. */
371 if (numbering_properties[section].type == number_regex)
372 regfree(&numbering_properties[section].expr);
373 else
374 numbering_properties[section].type = number_regex;
375
376 /* Compile/validate the supplied regular expression. */
377 if ((error = regcomp(&numbering_properties[section].expr,
378 &argstr[1], REG_NEWLINE|REG_NOSUB)) != 0) {
379 (void)regerror(error,
380 &numbering_properties[section].expr,
381 errorbuf, sizeof (errorbuf));
382 errx(EXIT_FAILURE,
383 "%s expr: %s -- %s",
384 numbering_properties[section].name, errorbuf,
385 &argstr[1]);
386 }
387 break;
388 default:
389 errx(EXIT_FAILURE,
390 "illegal %s line numbering type -- %s",
391 numbering_properties[section].name, argstr);
392 }
393 }
394
395 static void
usage(void)396 usage(void)
397 {
398
399 (void)fprintf(stderr,
400 "usage: nl [-p] [-b type] [-d delim] [-f type] [-h type] [-i incr] [-l num]\n"
401 " [-n format] [-s sep] [-v startnum] [-w width] [file]\n");
402 exit(EXIT_FAILURE);
403 }
404