xref: /netbsd/usr.bin/nl/nl.c (revision bf9ec67e)
1 /*	$NetBSD: nl.c,v 1.6 2000/07/03 02:51:27 matt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Klaus Klein.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT(
42 "@(#) Copyright (c) 1999\
43  The NetBSD Foundation, Inc.  All rights reserved.");
44 __RCSID("$NetBSD: nl.c,v 1.6 2000/07/03 02:51:27 matt Exp $");
45 #endif
46 
47 #include <errno.h>
48 #include <limits.h>
49 #include <locale.h>
50 #include <regex.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <unistd.h>
55 
56 typedef enum {
57 	number_all,		/* number all lines */
58 	number_nonempty,	/* number non-empty lines */
59 	number_none,		/* no line numbering */
60 	number_regex		/* number lines matching regular expression */
61 } numbering_type;
62 
63 struct numbering_property {
64 	const char * const	name;		/* for diagnostics */
65 	numbering_type		type;		/* numbering type */
66 	regex_t			expr;		/* for type == number_regex */
67 };
68 
69 /* line numbering formats */
70 #define FORMAT_LN	"%-*d"	/* left justified, leading zeros suppressed */
71 #define FORMAT_RN	"%*d"	/* right justified, leading zeros suppressed */
72 #define FORMAT_RZ	"%0*d"	/* right justified, leading zeros kept */
73 
74 #define FOOTER		0
75 #define BODY		1
76 #define HEADER		2
77 #define NP_LAST		HEADER
78 
79 static struct numbering_property numbering_properties[NP_LAST + 1] = {
80 	{ "footer",	number_none	},
81 	{ "body",	number_nonempty	},
82 	{ "header",	number_none	}
83 };
84 
85 #define max(a, b)	((a) > (b) ? (a) : (b))
86 
87 /*
88  * Maximum number of characters required for a decimal representation of a
89  * (signed) int; courtesy of tzcode.
90  */
91 #define INT_STRLEN_MAXIMUM \
92 	((sizeof (int) * CHAR_BIT - 1) * 302 / 1000 + 2)
93 
94 static void	filter __P((void));
95 int		main __P((int, char *[]));
96 static void	parse_numbering __P((const char *, int));
97 static void	usage __P((void));
98 
99 /*
100  * Pointer to dynamically allocated input line buffer, and its size.
101  */
102 static char *buffer;
103 static size_t buffersize;
104 
105 /*
106  * Dynamically allocated buffer suitable for string representation of ints.
107  */
108 static char *intbuffer;
109 
110 /*
111  * Configurable parameters.
112  */
113 /* delimiter characters that indicate the start of a logical page section */
114 static char delim[2] = { '\\', ':' };
115 
116 /* line numbering format */
117 static const char *format = FORMAT_RN;
118 
119 /* increment value used to number logical page lines */
120 static int incr = 1;
121 
122 /* number of adjacent blank lines to be considered (and numbered) as one */
123 static unsigned int nblank = 1;
124 
125 /* whether to restart numbering at logical page delimiters */
126 static int restart = 1;
127 
128 /* characters used in separating the line number and the corrsp. text line */
129 static const char *sep = "\t";
130 
131 /* initial value used to number logical page lines */
132 static int startnum = 1;
133 
134 /* number of characters to be used for the line number */
135 /* should be unsigned but required signed by `*' precision conversion */
136 static int width = 6;
137 
138 
139 int
140 main(argc, argv)
141 	int argc;
142 	char *argv[];
143 {
144 	int c;
145 	long val;
146 	unsigned long uval;
147 	char *ep;
148 	size_t intbuffersize;
149 
150 	(void)setlocale(LC_ALL, "");
151 
152 	/*
153 	 * Note: this implementation strictly conforms to the XBD Utility
154 	 * Syntax Guidelines and does not permit the optional `file' operand
155 	 * to be intermingled with the options, which is defined in the
156 	 * XCU specification (Issue 5) but declared an obsolescent feature that
157 	 * will be removed from a future issue.  It shouldn't matter, though.
158 	 */
159 	while ((c = getopt(argc, argv, "pb:d:f:h:i:l:n:s:v:w:")) != -1) {
160 		switch (c) {
161 		case 'p':
162 			restart = 0;
163 			break;
164 		case 'b':
165 			parse_numbering(optarg, BODY);
166 			break;
167 		case 'd':
168 			if (optarg[0] != '\0')
169 				delim[0] = optarg[0];
170 			if (optarg[1] != '\0')
171 				delim[1] = optarg[1];
172 			/* at most two delimiter characters */
173 			if (optarg[2] != '\0') {
174 				(void)fprintf(stderr,
175 				    "nl: invalid delim argument -- %s\n",
176 				    optarg);
177 				exit(EXIT_FAILURE);
178 				/* NOTREACHED */
179 			}
180 			break;
181 		case 'f':
182 			parse_numbering(optarg, FOOTER);
183 			break;
184 		case 'h':
185 			parse_numbering(optarg, HEADER);
186 			break;
187 		case 'i':
188 			errno = 0;
189 			val = strtol(optarg, &ep, 10);
190 			if ((ep != NULL && *ep != '\0') ||
191 			 ((val == LONG_MIN || val == LONG_MAX) && errno != 0)) {
192 				(void)fprintf(stderr,
193 				    "invalid incr argument -- %s\n", optarg);
194 				exit(EXIT_FAILURE);
195 			}
196 			incr = (int)val;
197 			break;
198 		case 'l':
199 			errno = 0;
200 			uval = strtoul(optarg, &ep, 10);
201 			if ((ep != NULL && *ep != '\0') ||
202 			    (uval == ULONG_MAX && errno != 0)) {
203 				(void)fprintf(stderr,
204 				    "invalid num argument -- %s\n", optarg);
205 				exit(EXIT_FAILURE);
206 			}
207 			nblank = (unsigned int)uval;
208 			break;
209 		case 'n':
210 			if (strcmp(optarg, "ln") == 0) {
211 				format = FORMAT_LN;
212 			} else if (strcmp(optarg, "rn") == 0) {
213 				format = FORMAT_RN;
214 			} else if (strcmp(optarg, "rz") == 0) {
215 				format = FORMAT_RZ;
216 			} else {
217 				(void)fprintf(stderr,
218 				    "nl: illegal format -- %s\n", optarg);
219 				exit(EXIT_FAILURE);
220 			}
221 			break;
222 		case 's':
223 			sep = optarg;
224 			break;
225 		case 'v':
226 			errno = 0;
227 			val = strtol(optarg, &ep, 10);
228 			if ((ep != NULL && *ep != '\0') ||
229 			 ((val == LONG_MIN || val == LONG_MAX) && errno != 0)) {
230 				(void)fprintf(stderr,
231 				    "invalid startnum value -- %s\n", optarg);
232 				exit(EXIT_FAILURE);
233 			}
234 			startnum = (int)val;
235 			break;
236 		case 'w':
237 			errno = 0;
238 			val = strtol(optarg, &ep, 10);
239 			if ((ep != NULL && *ep != '\0') ||
240 			 ((val == LONG_MIN || val == LONG_MAX) && errno != 0)) {
241 				(void)fprintf(stderr,
242 				    "invalid width value -- %s\n", optarg);
243 				exit(EXIT_FAILURE);
244 			}
245 			width = (int)val;
246 			if (!(width > 0)) {
247 				(void)fprintf(stderr,
248 				    "nl: width argument must be > 0 -- %d\n",
249 				    width);
250 				 exit(EXIT_FAILURE);
251 			}
252 			break;
253 		case '?':
254 		default:
255 			usage();
256 			/* NOTREACHED */
257 		}
258 	}
259 	argc -= optind;
260 	argv += optind;
261 
262 	switch (argc) {
263 	case 0:
264 		break;
265 	case 1:
266 		if (freopen(argv[0], "r", stdin) == NULL) {
267 			perror(argv[0]);
268 			exit(EXIT_FAILURE);
269 		}
270 		break;
271 	default:
272 		usage();
273 		/* NOTREACHED */
274 	}
275 
276 	/* Determine the maximum input line length to operate on. */
277 	if ((val = sysconf(_SC_LINE_MAX)) == -1) /* ignore errno */
278 		val = LINE_MAX;
279 	/* Allocate sufficient buffer space (including the terminating NUL). */
280 	buffersize = (size_t)val + 1;
281 	if ((buffer = malloc(buffersize)) == NULL) {
282 		perror("cannot allocate input line buffer");
283 		exit(EXIT_FAILURE);
284 	}
285 
286 	/* Allocate a buffer suitable for preformatting line number. */
287 	intbuffersize = max(INT_STRLEN_MAXIMUM, width) + 1;	/* NUL */
288 	if ((intbuffer = malloc(intbuffersize)) == NULL) {
289 		perror("cannot allocate preformatting buffer");
290 		exit(EXIT_FAILURE);
291 	}
292 
293 	/* Do the work. */
294 	filter();
295 
296 	exit(EXIT_SUCCESS);
297 	/* NOTREACHED */
298 }
299 
300 static void
301 filter()
302 {
303 	int line;		/* logical line number */
304 	int section;		/* logical page section */
305 	unsigned int adjblank;	/* adjacent blank lines */
306 	int consumed;		/* intbuffer measurement */
307 	int donumber, idx;
308 
309 	adjblank = 0;
310 	line = startnum;
311 	section = BODY;
312 #ifdef __GNUC__
313 	(void)&donumber;	/* avoid bogus `uninitialized' warning */
314 #endif
315 
316 	while (fgets(buffer, (int)buffersize, stdin) != NULL) {
317 		for (idx = FOOTER; idx <= NP_LAST; idx++) {
318 			/* Does it look like a delimiter? */
319 			if (buffer[2 * idx + 0] == delim[0] &&
320 			    buffer[2 * idx + 1] == delim[1]) {
321 				/* Was this the whole line? */
322 				if (buffer[2 * idx + 2] == '\n') {
323 					section = idx;
324 					adjblank = 0;
325 					if (restart)
326 						line = startnum;
327 					goto nextline;
328 				}
329 			} else {
330 				break;
331 			}
332 		}
333 
334 		switch (numbering_properties[section].type) {
335 		case number_all:
336 			/*
337 			 * Doing this for number_all only is disputable, but
338 			 * the standard expresses an explicit dependency on
339 			 * `-b a' etc.
340 			 */
341 			if (buffer[0] == '\n' && ++adjblank < nblank)
342 				donumber = 0;
343 			else
344 				donumber = 1, adjblank = 0;
345 			break;
346 		case number_nonempty:
347 			donumber = (buffer[0] != '\n');
348 			break;
349 		case number_none:
350 			donumber = 0;
351 			break;
352 		case number_regex:
353 			donumber =
354 			    (regexec(&numbering_properties[section].expr,
355 			    buffer, 0, NULL, 0) == 0);
356 			break;
357 		}
358 
359 		if (donumber) {
360 			/* Note: sprintf() is safe here. */
361 			consumed = sprintf(intbuffer, format, width, line);
362 			(void)printf("%s",
363 			    intbuffer + max(0, consumed - width));
364 			line += incr;
365 		} else {
366 			(void)printf("%*s", width, "");
367 		}
368 		(void)printf("%s%s", sep, buffer);
369 
370 		if (ferror(stdout)) {
371 			perror("output error");
372 			exit(EXIT_FAILURE);
373 		}
374 nextline:
375 		;
376 	}
377 
378 	if (ferror(stdin)) {
379 		perror("input error");
380 		exit(EXIT_FAILURE);
381 	}
382 }
383 
384 /*
385  * Various support functions.
386  */
387 
388 static void
389 parse_numbering(argstr, section)
390 	const char *argstr;
391 	int section;
392 {
393 	int error;
394 	char errorbuf[NL_TEXTMAX];
395 
396 	switch (argstr[0]) {
397 	case 'a':
398 		numbering_properties[section].type = number_all;
399 		break;
400 	case 'n':
401 		numbering_properties[section].type = number_none;
402 		break;
403 	case 't':
404 		numbering_properties[section].type = number_nonempty;
405 		break;
406 	case 'p':
407 		/* If there was a previous expression, throw it away. */
408 		if (numbering_properties[section].type == number_regex)
409 			regfree(&numbering_properties[section].expr);
410 		else
411 			numbering_properties[section].type = number_regex;
412 
413 		/* Compile/validate the supplied regular expression. */
414 		if ((error = regcomp(&numbering_properties[section].expr,
415 		    &argstr[1], REG_NEWLINE|REG_NOSUB)) != 0) {
416 			(void)regerror(error,
417 			    &numbering_properties[section].expr,
418 			    errorbuf, sizeof (errorbuf));
419 			(void)fprintf(stderr,
420 			    "nl: %s expr: %s -- %s\n",
421 			    numbering_properties[section].name, errorbuf,
422 			    &argstr[1]);
423 			exit(EXIT_FAILURE);
424 		}
425 		break;
426 	default:
427 		(void)fprintf(stderr,
428 		    "nl: illegal %s line numbering type -- %s\n",
429 		    numbering_properties[section].name, argstr);
430 		exit(EXIT_FAILURE);
431 	}
432 }
433 
434 static void
435 usage()
436 {
437 
438 	(void)fprintf(stderr, "usage: nl [-p] [-b type] [-d delim] [-f type] \
439 [-h type] [-i incr] [-l num]\n\t[-n format] [-s sep] [-v startnum] [-w width] \
440 [file]\n");
441 	exit(EXIT_FAILURE);
442 }
443