xref: /openbsd/usr.bin/nl/nl.c (revision d7259957)
1 /*	$OpenBSD: nl.c,v 1.8 2022/12/04 23:50:49 cheloha Exp $ */
2 /*	$NetBSD: nl.c,v 1.11 2011/08/16 12:00:46 christos Exp $	*/
3 
4 /*-
5  * Copyright (c) 1999 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Klaus Klein.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <locale.h>
37 #include <regex.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <wchar.h>
43 
44 typedef enum {
45 	number_all,		/* number all lines */
46 	number_nonempty,	/* number non-empty lines */
47 	number_none,		/* no line numbering */
48 	number_regex		/* number lines matching regular expression */
49 } numbering_type;
50 
51 struct numbering_property {
52 	const char * const	name;		/* for diagnostics */
53 	numbering_type		type;		/* numbering type */
54 	regex_t			expr;		/* for type == number_regex */
55 };
56 
57 /* line numbering formats */
58 #define FORMAT_LN	"%-*d"	/* left justified, leading zeros suppressed */
59 #define FORMAT_RN	"%*d"	/* right justified, leading zeros suppressed */
60 #define FORMAT_RZ	"%0*d"	/* right justified, leading zeros kept */
61 
62 #define FOOTER		0
63 #define BODY		1
64 #define HEADER		2
65 #define NP_LAST		HEADER
66 
67 static struct numbering_property numbering_properties[NP_LAST + 1] = {
68 	{ "footer",	number_none,	{ 0, 0, 0, 0 } },
69 	{ "body",	number_nonempty, { 0, 0, 0, 0 } },
70 	{ "header",	number_none,	{ 0, 0, 0, 0 } },
71 };
72 
73 void		filter(void);
74 void		parse_numbering(const char *, int);
75 __dead void	usage(void);
76 
77 /*
78  * Delimiter characters that indicate the start of a logical page section.
79  */
80 static char delim[2 * MB_LEN_MAX];
81 static int delimlen;
82 
83 /*
84  * Configurable parameters.
85  */
86 
87 /* line numbering format */
88 static const char *format = FORMAT_RN;
89 
90 /* increment value used to number logical page lines */
91 static int incr = 1;
92 
93 /* number of adjacent blank lines to be considered (and numbered) as one */
94 static unsigned int nblank = 1;
95 
96 /* whether to restart numbering at logical page delimiters */
97 static int restart = 1;
98 
99 /* characters used in separating the line number and the corrsp. text line */
100 static const char *sep = "\t";
101 
102 /* initial value used to number logical page lines */
103 static int startnum = 1;
104 
105 /* number of characters to be used for the line number */
106 /* should be unsigned but required signed by `*' precision conversion */
107 static int width = 6;
108 
109 
110 int
main(int argc,char * argv[])111 main(int argc, char *argv[])
112 {
113 	int c;
114 	size_t clen;
115 	char delim1[MB_LEN_MAX] = { '\\' }, delim2[MB_LEN_MAX] = { ':' };
116 	size_t delim1len = 1, delim2len = 1;
117 	const char *errstr;
118 
119 	(void)setlocale(LC_ALL, "");
120 
121 	if (pledge("stdio rpath", NULL) == -1)
122 		err(1, "pledge");
123 
124 	while ((c = getopt(argc, argv, "pb:d:f:h:i:l:n:s:v:w:")) != -1) {
125 		switch (c) {
126 		case 'p':
127 			restart = 0;
128 			break;
129 		case 'b':
130 			parse_numbering(optarg, BODY);
131 			break;
132 		case 'd':
133 			clen = mbrlen(optarg, MB_CUR_MAX, NULL);
134 			if (clen == (size_t)-1 || clen == (size_t)-2)
135 				errc(EXIT_FAILURE, EILSEQ, NULL);
136 			if (clen != 0) {
137 				memcpy(delim1, optarg, delim1len = clen);
138 				clen = mbrlen(optarg + delim1len,
139 				    MB_CUR_MAX, NULL);
140 				if (clen == (size_t)-1 || clen == (size_t)-2)
141 					errc(EXIT_FAILURE, EILSEQ, NULL);
142 				if (clen != 0) {
143 					memcpy(delim2, optarg + delim1len,
144 					    delim2len = clen);
145 					if (optarg[delim1len + clen] != '\0') {
146 						errx(EXIT_FAILURE,
147 						    "invalid delimiter: %s",
148 						    optarg);
149 					}
150 				}
151 			}
152 			break;
153 		case 'f':
154 			parse_numbering(optarg, FOOTER);
155 			break;
156 		case 'h':
157 			parse_numbering(optarg, HEADER);
158 			break;
159 		case 'i':
160 			incr = strtonum(optarg, INT_MIN, INT_MAX, &errstr);
161 			if (errstr)
162 				errx(EXIT_FAILURE, "increment value is %s: %s",
163 				    errstr, optarg);
164 			break;
165 		case 'l':
166 			nblank = strtonum(optarg, 0, UINT_MAX, &errstr);
167 			if (errstr)
168 				errx(EXIT_FAILURE,
169 				    "blank line value is %s: %s",
170 				    errstr, optarg);
171 			break;
172 		case 'n':
173 			if (strcmp(optarg, "ln") == 0) {
174 				format = FORMAT_LN;
175 			} else if (strcmp(optarg, "rn") == 0) {
176 				format = FORMAT_RN;
177 			} else if (strcmp(optarg, "rz") == 0) {
178 				format = FORMAT_RZ;
179 			} else
180 				errx(EXIT_FAILURE,
181 				    "illegal format -- %s", optarg);
182 			break;
183 		case 's':
184 			sep = optarg;
185 			break;
186 		case 'v':
187 			startnum = strtonum(optarg, INT_MIN, INT_MAX, &errstr);
188 			if (errstr)
189 				errx(EXIT_FAILURE,
190 				    "initial logical page value is %s: %s",
191 				    errstr, optarg);
192 			break;
193 		case 'w':
194 			width = strtonum(optarg, 1, INT_MAX, &errstr);
195 			if (errstr)
196 				errx(EXIT_FAILURE, "width is %s: %s", errstr,
197 				    optarg);
198 			break;
199 		default:
200 			usage();
201 			/* NOTREACHED */
202 		}
203 	}
204 	argc -= optind;
205 	argv += optind;
206 
207 	switch (argc) {
208 	case 0:
209 		break;
210 	case 1:
211 		if (strcmp(argv[0], "-") != 0 &&
212 		    freopen(argv[0], "r", stdin) == NULL)
213 			err(EXIT_FAILURE, "%s", argv[0]);
214 		break;
215 	default:
216 		usage();
217 		/* NOTREACHED */
218 	}
219 
220 	if (pledge("stdio", NULL) == -1)
221 		err(1, "pledge");
222 
223 	/* Generate the delimiter sequence */
224 	memcpy(delim, delim1, delim1len);
225 	memcpy(delim + delim1len, delim2, delim2len);
226 	delimlen = delim1len + delim2len;
227 
228 	/* Do the work. */
229 	filter();
230 
231 	exit(EXIT_SUCCESS);
232 }
233 
234 void
filter(void)235 filter(void)
236 {
237 	char *buffer;
238 	size_t buffersize;
239 	ssize_t linelen;
240 	int line;		/* logical line number */
241 	int section;		/* logical page section */
242 	unsigned int adjblank;	/* adjacent blank lines */
243 	int donumber = 0, idx;
244 
245 	adjblank = 0;
246 	line = startnum;
247 	section = BODY;
248 
249 	buffer = NULL;
250 	buffersize = 0;
251 	while ((linelen = getline(&buffer, &buffersize, stdin)) > 0) {
252 		for (idx = FOOTER; idx <= NP_LAST; idx++) {
253 			/* Does it look like a delimiter? */
254 			if (delimlen * (idx + 1) > linelen)
255 				break;
256 			if (memcmp(buffer + delimlen * idx, delim,
257 			    delimlen) != 0)
258 				break;
259 			/* Was this the whole line? */
260 			if (buffer[delimlen * (idx + 1)] == '\n') {
261 				section = idx;
262 				adjblank = 0;
263 				if (restart)
264 					line = startnum;
265 				goto nextline;
266 			}
267 		}
268 
269 		switch (numbering_properties[section].type) {
270 		case number_all:
271 			/*
272 			 * Doing this for number_all only is disputable, but
273 			 * the standard expresses an explicit dependency on
274 			 * `-b a' etc.
275 			 */
276 			if (buffer[0] == '\n' && ++adjblank < nblank)
277 				donumber = 0;
278 			else
279 				donumber = 1, adjblank = 0;
280 			break;
281 		case number_nonempty:
282 			donumber = (buffer[0] != '\n');
283 			break;
284 		case number_none:
285 			donumber = 0;
286 			break;
287 		case number_regex:
288 			donumber =
289 			    (regexec(&numbering_properties[section].expr,
290 			    buffer, 0, NULL, 0) == 0);
291 			break;
292 		}
293 
294 		if (donumber) {
295 			(void)printf(format, width, line);
296 			line += incr;
297 			(void)fputs(sep, stdout);
298 		} else {
299 			(void)printf("%*s", width, "");
300 		}
301 		(void)fwrite(buffer, linelen, 1, stdout);
302 
303 		if (ferror(stdout))
304 			err(EXIT_FAILURE, "output error");
305 nextline:
306 		;
307 	}
308 
309 	if (ferror(stdin))
310 		err(EXIT_FAILURE, "input error");
311 
312 	free(buffer);
313 }
314 
315 /*
316  * Various support functions.
317  */
318 
319 void
parse_numbering(const char * argstr,int section)320 parse_numbering(const char *argstr, int section)
321 {
322 	int error;
323 	char errorbuf[NL_TEXTMAX];
324 
325 	switch (argstr[0]) {
326 	case 'a':
327 		numbering_properties[section].type = number_all;
328 		break;
329 	case 'n':
330 		numbering_properties[section].type = number_none;
331 		break;
332 	case 't':
333 		numbering_properties[section].type = number_nonempty;
334 		break;
335 	case 'p':
336 		/* If there was a previous expression, throw it away. */
337 		if (numbering_properties[section].type == number_regex)
338 			regfree(&numbering_properties[section].expr);
339 		else
340 			numbering_properties[section].type = number_regex;
341 
342 		/* Compile/validate the supplied regular expression. */
343 		if ((error = regcomp(&numbering_properties[section].expr,
344 		    &argstr[1], REG_NEWLINE|REG_NOSUB)) != 0) {
345 			(void)regerror(error,
346 			    &numbering_properties[section].expr,
347 			    errorbuf, sizeof(errorbuf));
348 			errx(EXIT_FAILURE,
349 			    "%s expr: %s -- %s",
350 			    numbering_properties[section].name, errorbuf,
351 			    &argstr[1]);
352 		}
353 		break;
354 	default:
355 		errx(EXIT_FAILURE,
356 		    "illegal %s line numbering type -- %s",
357 		    numbering_properties[section].name, argstr);
358 	}
359 }
360 
361 __dead void
usage(void)362 usage(void)
363 {
364 	(void)fprintf(stderr, "usage: %s [-p] [-b type] [-d delim] [-f type] "
365 	    "[-h type] [-i incr] [-l num]\n\t[-n format] [-s sep] "
366 	    "[-v startnum] [-w width] [file]\n", getprogname());
367 	exit(EXIT_FAILURE);
368 }
369