xref: /openbsd/usr.bin/nl/nl.c (revision a6445c1d)
1 /*	$OpenBSD: nl.c,v 1.3 2014/05/20 01:25:23 guenther Exp $ */
2 /*	$NetBSD: nl.c,v 1.11 2011/08/16 12:00:46 christos Exp $	*/
3 
4 /*-
5  * Copyright (c) 1999 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Klaus Klein.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 
35 #include <err.h>
36 #include <errno.h>
37 #include <limits.h>
38 #include <locale.h>
39 #include <regex.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <wchar.h>
45 
46 typedef enum {
47 	number_all,		/* number all lines */
48 	number_nonempty,	/* number non-empty lines */
49 	number_none,		/* no line numbering */
50 	number_regex		/* number lines matching regular expression */
51 } numbering_type;
52 
53 struct numbering_property {
54 	const char * const	name;		/* for diagnostics */
55 	numbering_type		type;		/* numbering type */
56 	regex_t			expr;		/* for type == number_regex */
57 };
58 
59 /* line numbering formats */
60 #define FORMAT_LN	"%-*d"	/* left justified, leading zeros suppressed */
61 #define FORMAT_RN	"%*d"	/* right justified, leading zeros suppressed */
62 #define FORMAT_RZ	"%0*d"	/* right justified, leading zeros kept */
63 
64 #define FOOTER		0
65 #define BODY		1
66 #define HEADER		2
67 #define NP_LAST		HEADER
68 
69 static struct numbering_property numbering_properties[NP_LAST + 1] = {
70 	{ "footer",	number_none,	{ 0, 0, 0, 0 } },
71 	{ "body",	number_nonempty, { 0, 0, 0, 0 } },
72 	{ "header",	number_none,	{ 0, 0, 0, 0 } },
73 };
74 
75 void		filter(void);
76 void		parse_numbering(const char *, int);
77 __dead void	usage(void);
78 
79 /*
80  * Delimiter characters that indicate the start of a logical page section.
81  */
82 static char delim[2 * MB_LEN_MAX];
83 static int delimlen;
84 
85 /*
86  * Configurable parameters.
87  */
88 
89 /* line numbering format */
90 static const char *format = FORMAT_RN;
91 
92 /* increment value used to number logical page lines */
93 static int incr = 1;
94 
95 /* number of adjacent blank lines to be considered (and numbered) as one */
96 static unsigned int nblank = 1;
97 
98 /* whether to restart numbering at logical page delimiters */
99 static int restart = 1;
100 
101 /* characters used in separating the line number and the corrsp. text line */
102 static const char *sep = "\t";
103 
104 /* initial value used to number logical page lines */
105 static int startnum = 1;
106 
107 /* number of characters to be used for the line number */
108 /* should be unsigned but required signed by `*' precision conversion */
109 static int width = 6;
110 
111 
112 int
113 main(int argc, char *argv[])
114 {
115 	int c;
116 	size_t clen;
117 	char delim1[MB_LEN_MAX] = { '\\' }, delim2[MB_LEN_MAX] = { ':' };
118 	size_t delim1len = 1, delim2len = 1;
119 	const char *errstr;
120 
121 	(void)setlocale(LC_ALL, "");
122 
123 	while ((c = getopt(argc, argv, "pb:d:f:h:i:l:n:s:v:w:")) != -1) {
124 		switch (c) {
125 		case 'p':
126 			restart = 0;
127 			break;
128 		case 'b':
129 			parse_numbering(optarg, BODY);
130 			break;
131 		case 'd':
132 			clen = mbrlen(optarg, MB_CUR_MAX, NULL);
133 			if (clen == (size_t)-1 || clen == (size_t)-2)
134 				errc(EXIT_FAILURE, EILSEQ, NULL);
135 			if (clen != 0) {
136 				memcpy(delim1, optarg, delim1len = clen);
137 				clen = mbrlen(optarg + delim1len,
138 				    MB_CUR_MAX, NULL);
139 				if (clen == (size_t)-1 || clen == (size_t)-2)
140 					errc(EXIT_FAILURE, EILSEQ, NULL);
141 				if (clen != 0) {
142 					memcpy(delim2, optarg + delim1len,
143 					    delim2len = clen);
144 					if (optarg[delim1len + clen] != '\0') {
145 						errx(EXIT_FAILURE,
146 						    "invalid delimiter: %s",
147 						    optarg);
148 					}
149 				}
150 			}
151 			break;
152 		case 'f':
153 			parse_numbering(optarg, FOOTER);
154 			break;
155 		case 'h':
156 			parse_numbering(optarg, HEADER);
157 			break;
158 		case 'i':
159 			incr = strtonum(optarg, INT_MIN, INT_MAX, &errstr);
160 			if (errstr)
161 				errx(EXIT_FAILURE, "increment value is %s: %s",
162 				    errstr, optarg);
163 			break;
164 		case 'l':
165 			nblank = strtonum(optarg, 0, UINT_MAX, &errstr);
166 			if (errstr)
167 				errx(EXIT_FAILURE,
168 				    "blank line value is %s: %s",
169 				    errstr, optarg);
170 			break;
171 		case 'n':
172 			if (strcmp(optarg, "ln") == 0) {
173 				format = FORMAT_LN;
174 			} else if (strcmp(optarg, "rn") == 0) {
175 				format = FORMAT_RN;
176 			} else if (strcmp(optarg, "rz") == 0) {
177 				format = FORMAT_RZ;
178 			} else
179 				errx(EXIT_FAILURE,
180 				    "illegal format -- %s", optarg);
181 			break;
182 		case 's':
183 			sep = optarg;
184 			break;
185 		case 'v':
186 			startnum = strtonum(optarg, INT_MIN, INT_MAX, &errstr);
187 			if (errstr)
188 				errx(EXIT_FAILURE,
189 				    "initial logical page value is %s: %s",
190 				    errstr, optarg);
191 			break;
192 		case 'w':
193 			width = strtonum(optarg, 1, INT_MAX, &errstr);
194 			if (errstr)
195 				errx(EXIT_FAILURE, "width is %s: %s", errstr,
196 				    optarg);
197 			break;
198 		case '?':
199 		default:
200 			usage();
201 			/* NOTREACHED */
202 		}
203 	}
204 	argc -= optind;
205 	argv += optind;
206 
207 	switch (argc) {
208 	case 0:
209 		break;
210 	case 1:
211 		if (strcmp(argv[0], "-") != 0 &&
212 		    freopen(argv[0], "r", stdin) == NULL)
213 			err(EXIT_FAILURE, "%s", argv[0]);
214 		break;
215 	default:
216 		usage();
217 		/* NOTREACHED */
218 	}
219 
220 	/* Generate the delimiter sequence */
221 	memcpy(delim, delim1, delim1len);
222 	memcpy(delim + delim1len, delim2, delim2len);
223 	delimlen = delim1len + delim2len;
224 
225 	/* Do the work. */
226 	filter();
227 
228 	exit(EXIT_SUCCESS);
229 }
230 
231 void
232 filter(void)
233 {
234 	char *buffer;
235 	size_t buffersize;
236 	ssize_t linelen;
237 	int line;		/* logical line number */
238 	int section;		/* logical page section */
239 	unsigned int adjblank;	/* adjacent blank lines */
240 	int donumber = 0, idx;
241 
242 	adjblank = 0;
243 	line = startnum;
244 	section = BODY;
245 
246 	buffer = NULL;
247 	buffersize = 0;
248 	while ((linelen = getline(&buffer, &buffersize, stdin)) > 0) {
249 		for (idx = FOOTER; idx <= NP_LAST; idx++) {
250 			/* Does it look like a delimiter? */
251 			if (delimlen * (idx + 1) > linelen)
252 				break;
253 			if (memcmp(buffer + delimlen * idx, delim,
254 			    delimlen) != 0)
255 				break;
256 			/* Was this the whole line? */
257 			if (buffer[delimlen * (idx + 1)] == '\n') {
258 				section = idx;
259 				adjblank = 0;
260 				if (restart)
261 					line = startnum;
262 				goto nextline;
263 			}
264 		}
265 
266 		switch (numbering_properties[section].type) {
267 		case number_all:
268 			/*
269 			 * Doing this for number_all only is disputable, but
270 			 * the standard expresses an explicit dependency on
271 			 * `-b a' etc.
272 			 */
273 			if (buffer[0] == '\n' && ++adjblank < nblank)
274 				donumber = 0;
275 			else
276 				donumber = 1, adjblank = 0;
277 			break;
278 		case number_nonempty:
279 			donumber = (buffer[0] != '\n');
280 			break;
281 		case number_none:
282 			donumber = 0;
283 			break;
284 		case number_regex:
285 			donumber =
286 			    (regexec(&numbering_properties[section].expr,
287 			    buffer, 0, NULL, 0) == 0);
288 			break;
289 		}
290 
291 		if (donumber) {
292 			(void)printf(format, width, line);
293 			line += incr;
294 			(void)fputs(sep, stdout);
295 		} else {
296 			(void)printf("%*s", width, "");
297 		}
298 		(void)fwrite(buffer, linelen, 1, stdout);
299 
300 		if (ferror(stdout))
301 			err(EXIT_FAILURE, "output error");
302 nextline:
303 		;
304 	}
305 
306 	if (ferror(stdin))
307 		err(EXIT_FAILURE, "input error");
308 
309 	free(buffer);
310 }
311 
312 /*
313  * Various support functions.
314  */
315 
316 void
317 parse_numbering(const char *argstr, int section)
318 {
319 	int error;
320 	char errorbuf[NL_TEXTMAX];
321 
322 	switch (argstr[0]) {
323 	case 'a':
324 		numbering_properties[section].type = number_all;
325 		break;
326 	case 'n':
327 		numbering_properties[section].type = number_none;
328 		break;
329 	case 't':
330 		numbering_properties[section].type = number_nonempty;
331 		break;
332 	case 'p':
333 		/* If there was a previous expression, throw it away. */
334 		if (numbering_properties[section].type == number_regex)
335 			regfree(&numbering_properties[section].expr);
336 		else
337 			numbering_properties[section].type = number_regex;
338 
339 		/* Compile/validate the supplied regular expression. */
340 		if ((error = regcomp(&numbering_properties[section].expr,
341 		    &argstr[1], REG_NEWLINE|REG_NOSUB)) != 0) {
342 			(void)regerror(error,
343 			    &numbering_properties[section].expr,
344 			    errorbuf, sizeof(errorbuf));
345 			errx(EXIT_FAILURE,
346 			    "%s expr: %s -- %s",
347 			    numbering_properties[section].name, errorbuf,
348 			    &argstr[1]);
349 		}
350 		break;
351 	default:
352 		errx(EXIT_FAILURE,
353 		    "illegal %s line numbering type -- %s",
354 		    numbering_properties[section].name, argstr);
355 	}
356 }
357 
358 __dead void
359 usage(void)
360 {
361 	(void)fprintf(stderr, "usage: %s [-p] [-b type] [-d delim] [-f type] "
362 	    "[-h type] [-i incr] [-l num]\n\t[-n format] [-s sep] "
363 	    "[-v startnum] [-w width] [file]\n", getprogname());
364 	exit(EXIT_FAILURE);
365 }
366