1 /* $OpenBSD: nl.c,v 1.8 2022/12/04 23:50:49 cheloha Exp $ */
2 /* $NetBSD: nl.c,v 1.11 2011/08/16 12:00:46 christos Exp $ */
3
4 /*-
5 * Copyright (c) 1999 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Klaus Klein.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include <err.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <locale.h>
37 #include <regex.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <wchar.h>
43
44 typedef enum {
45 number_all, /* number all lines */
46 number_nonempty, /* number non-empty lines */
47 number_none, /* no line numbering */
48 number_regex /* number lines matching regular expression */
49 } numbering_type;
50
51 struct numbering_property {
52 const char * const name; /* for diagnostics */
53 numbering_type type; /* numbering type */
54 regex_t expr; /* for type == number_regex */
55 };
56
57 /* line numbering formats */
58 #define FORMAT_LN "%-*d" /* left justified, leading zeros suppressed */
59 #define FORMAT_RN "%*d" /* right justified, leading zeros suppressed */
60 #define FORMAT_RZ "%0*d" /* right justified, leading zeros kept */
61
62 #define FOOTER 0
63 #define BODY 1
64 #define HEADER 2
65 #define NP_LAST HEADER
66
67 static struct numbering_property numbering_properties[NP_LAST + 1] = {
68 { "footer", number_none, { 0, 0, 0, 0 } },
69 { "body", number_nonempty, { 0, 0, 0, 0 } },
70 { "header", number_none, { 0, 0, 0, 0 } },
71 };
72
73 void filter(void);
74 void parse_numbering(const char *, int);
75 __dead void usage(void);
76
77 /*
78 * Delimiter characters that indicate the start of a logical page section.
79 */
80 static char delim[2 * MB_LEN_MAX];
81 static int delimlen;
82
83 /*
84 * Configurable parameters.
85 */
86
87 /* line numbering format */
88 static const char *format = FORMAT_RN;
89
90 /* increment value used to number logical page lines */
91 static int incr = 1;
92
93 /* number of adjacent blank lines to be considered (and numbered) as one */
94 static unsigned int nblank = 1;
95
96 /* whether to restart numbering at logical page delimiters */
97 static int restart = 1;
98
99 /* characters used in separating the line number and the corrsp. text line */
100 static const char *sep = "\t";
101
102 /* initial value used to number logical page lines */
103 static int startnum = 1;
104
105 /* number of characters to be used for the line number */
106 /* should be unsigned but required signed by `*' precision conversion */
107 static int width = 6;
108
109
110 int
main(int argc,char * argv[])111 main(int argc, char *argv[])
112 {
113 int c;
114 size_t clen;
115 char delim1[MB_LEN_MAX] = { '\\' }, delim2[MB_LEN_MAX] = { ':' };
116 size_t delim1len = 1, delim2len = 1;
117 const char *errstr;
118
119 (void)setlocale(LC_ALL, "");
120
121 if (pledge("stdio rpath", NULL) == -1)
122 err(1, "pledge");
123
124 while ((c = getopt(argc, argv, "pb:d:f:h:i:l:n:s:v:w:")) != -1) {
125 switch (c) {
126 case 'p':
127 restart = 0;
128 break;
129 case 'b':
130 parse_numbering(optarg, BODY);
131 break;
132 case 'd':
133 clen = mbrlen(optarg, MB_CUR_MAX, NULL);
134 if (clen == (size_t)-1 || clen == (size_t)-2)
135 errc(EXIT_FAILURE, EILSEQ, NULL);
136 if (clen != 0) {
137 memcpy(delim1, optarg, delim1len = clen);
138 clen = mbrlen(optarg + delim1len,
139 MB_CUR_MAX, NULL);
140 if (clen == (size_t)-1 || clen == (size_t)-2)
141 errc(EXIT_FAILURE, EILSEQ, NULL);
142 if (clen != 0) {
143 memcpy(delim2, optarg + delim1len,
144 delim2len = clen);
145 if (optarg[delim1len + clen] != '\0') {
146 errx(EXIT_FAILURE,
147 "invalid delimiter: %s",
148 optarg);
149 }
150 }
151 }
152 break;
153 case 'f':
154 parse_numbering(optarg, FOOTER);
155 break;
156 case 'h':
157 parse_numbering(optarg, HEADER);
158 break;
159 case 'i':
160 incr = strtonum(optarg, INT_MIN, INT_MAX, &errstr);
161 if (errstr)
162 errx(EXIT_FAILURE, "increment value is %s: %s",
163 errstr, optarg);
164 break;
165 case 'l':
166 nblank = strtonum(optarg, 0, UINT_MAX, &errstr);
167 if (errstr)
168 errx(EXIT_FAILURE,
169 "blank line value is %s: %s",
170 errstr, optarg);
171 break;
172 case 'n':
173 if (strcmp(optarg, "ln") == 0) {
174 format = FORMAT_LN;
175 } else if (strcmp(optarg, "rn") == 0) {
176 format = FORMAT_RN;
177 } else if (strcmp(optarg, "rz") == 0) {
178 format = FORMAT_RZ;
179 } else
180 errx(EXIT_FAILURE,
181 "illegal format -- %s", optarg);
182 break;
183 case 's':
184 sep = optarg;
185 break;
186 case 'v':
187 startnum = strtonum(optarg, INT_MIN, INT_MAX, &errstr);
188 if (errstr)
189 errx(EXIT_FAILURE,
190 "initial logical page value is %s: %s",
191 errstr, optarg);
192 break;
193 case 'w':
194 width = strtonum(optarg, 1, INT_MAX, &errstr);
195 if (errstr)
196 errx(EXIT_FAILURE, "width is %s: %s", errstr,
197 optarg);
198 break;
199 default:
200 usage();
201 /* NOTREACHED */
202 }
203 }
204 argc -= optind;
205 argv += optind;
206
207 switch (argc) {
208 case 0:
209 break;
210 case 1:
211 if (strcmp(argv[0], "-") != 0 &&
212 freopen(argv[0], "r", stdin) == NULL)
213 err(EXIT_FAILURE, "%s", argv[0]);
214 break;
215 default:
216 usage();
217 /* NOTREACHED */
218 }
219
220 if (pledge("stdio", NULL) == -1)
221 err(1, "pledge");
222
223 /* Generate the delimiter sequence */
224 memcpy(delim, delim1, delim1len);
225 memcpy(delim + delim1len, delim2, delim2len);
226 delimlen = delim1len + delim2len;
227
228 /* Do the work. */
229 filter();
230
231 exit(EXIT_SUCCESS);
232 }
233
234 void
filter(void)235 filter(void)
236 {
237 char *buffer;
238 size_t buffersize;
239 ssize_t linelen;
240 int line; /* logical line number */
241 int section; /* logical page section */
242 unsigned int adjblank; /* adjacent blank lines */
243 int donumber = 0, idx;
244
245 adjblank = 0;
246 line = startnum;
247 section = BODY;
248
249 buffer = NULL;
250 buffersize = 0;
251 while ((linelen = getline(&buffer, &buffersize, stdin)) > 0) {
252 for (idx = FOOTER; idx <= NP_LAST; idx++) {
253 /* Does it look like a delimiter? */
254 if (delimlen * (idx + 1) > linelen)
255 break;
256 if (memcmp(buffer + delimlen * idx, delim,
257 delimlen) != 0)
258 break;
259 /* Was this the whole line? */
260 if (buffer[delimlen * (idx + 1)] == '\n') {
261 section = idx;
262 adjblank = 0;
263 if (restart)
264 line = startnum;
265 goto nextline;
266 }
267 }
268
269 switch (numbering_properties[section].type) {
270 case number_all:
271 /*
272 * Doing this for number_all only is disputable, but
273 * the standard expresses an explicit dependency on
274 * `-b a' etc.
275 */
276 if (buffer[0] == '\n' && ++adjblank < nblank)
277 donumber = 0;
278 else
279 donumber = 1, adjblank = 0;
280 break;
281 case number_nonempty:
282 donumber = (buffer[0] != '\n');
283 break;
284 case number_none:
285 donumber = 0;
286 break;
287 case number_regex:
288 donumber =
289 (regexec(&numbering_properties[section].expr,
290 buffer, 0, NULL, 0) == 0);
291 break;
292 }
293
294 if (donumber) {
295 (void)printf(format, width, line);
296 line += incr;
297 (void)fputs(sep, stdout);
298 } else {
299 (void)printf("%*s", width, "");
300 }
301 (void)fwrite(buffer, linelen, 1, stdout);
302
303 if (ferror(stdout))
304 err(EXIT_FAILURE, "output error");
305 nextline:
306 ;
307 }
308
309 if (ferror(stdin))
310 err(EXIT_FAILURE, "input error");
311
312 free(buffer);
313 }
314
315 /*
316 * Various support functions.
317 */
318
319 void
parse_numbering(const char * argstr,int section)320 parse_numbering(const char *argstr, int section)
321 {
322 int error;
323 char errorbuf[NL_TEXTMAX];
324
325 switch (argstr[0]) {
326 case 'a':
327 numbering_properties[section].type = number_all;
328 break;
329 case 'n':
330 numbering_properties[section].type = number_none;
331 break;
332 case 't':
333 numbering_properties[section].type = number_nonempty;
334 break;
335 case 'p':
336 /* If there was a previous expression, throw it away. */
337 if (numbering_properties[section].type == number_regex)
338 regfree(&numbering_properties[section].expr);
339 else
340 numbering_properties[section].type = number_regex;
341
342 /* Compile/validate the supplied regular expression. */
343 if ((error = regcomp(&numbering_properties[section].expr,
344 &argstr[1], REG_NEWLINE|REG_NOSUB)) != 0) {
345 (void)regerror(error,
346 &numbering_properties[section].expr,
347 errorbuf, sizeof(errorbuf));
348 errx(EXIT_FAILURE,
349 "%s expr: %s -- %s",
350 numbering_properties[section].name, errorbuf,
351 &argstr[1]);
352 }
353 break;
354 default:
355 errx(EXIT_FAILURE,
356 "illegal %s line numbering type -- %s",
357 numbering_properties[section].name, argstr);
358 }
359 }
360
361 __dead void
usage(void)362 usage(void)
363 {
364 (void)fprintf(stderr, "usage: %s [-p] [-b type] [-d delim] [-f type] "
365 "[-h type] [-i incr] [-l num]\n\t[-n format] [-s sep] "
366 "[-v startnum] [-w width] [file]\n", getprogname());
367 exit(EXIT_FAILURE);
368 }
369