xref: /openbsd/usr.bin/mandoc/main.c (revision 17df1aa7)
1 /*	$Id: main.c,v 1.25 2010/05/14 01:54:37 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009 Kristaps Dzonsons <kristaps@kth.se>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <sys/stat.h>
18 
19 #include <assert.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <unistd.h>
26 
27 #include "mdoc.h"
28 #include "man.h"
29 #include "main.h"
30 
31 #define	UNCONST(a)	((void *)(uintptr_t)(const void *)(a))
32 
33 typedef	void		(*out_mdoc)(void *, const struct mdoc *);
34 typedef	void		(*out_man)(void *, const struct man *);
35 typedef	void		(*out_free)(void *);
36 
37 struct	buf {
38 	char	 	 *buf;
39 	size_t		  sz;
40 };
41 
42 enum	intt {
43 	INTT_AUTO,
44 	INTT_MDOC,
45 	INTT_MAN
46 };
47 
48 enum	outt {
49 	OUTT_ASCII = 0,
50 	OUTT_TREE,
51 	OUTT_HTML,
52 	OUTT_XHTML,
53 	OUTT_LINT
54 };
55 
56 struct	curparse {
57 	const char	 *file;		/* Current parse. */
58 	int		  fd;		/* Current parse. */
59 	int		  wflags;
60 #define	WARN_WALL	 (1 << 0)	/* All-warnings mask. */
61 #define	WARN_WERR	 (1 << 2)	/* Warnings->errors. */
62 	int		  fflags;
63 #define	FL_IGN_SCOPE	 (1 << 0) 	/* Ignore scope errors. */
64 #define	FL_NIGN_ESCAPE	 (1 << 1) 	/* Don't ignore bad escapes. */
65 #define	FL_NIGN_MACRO	 (1 << 2) 	/* Don't ignore bad macros. */
66 #define	FL_IGN_ERRORS	 (1 << 4)	/* Ignore failed parse. */
67 	enum intt	  inttype;	/* Input parsers... */
68 	struct man	 *man;
69 	struct man	 *lastman;
70 	struct mdoc	 *mdoc;
71 	struct mdoc	 *lastmdoc;
72 	enum outt	  outtype;	/* Output devices... */
73 	out_mdoc	  outmdoc;
74 	out_man	  	  outman;
75 	out_free	  outfree;
76 	void		 *outdata;
77 	char		  outopts[BUFSIZ];
78 };
79 
80 #define	FL_STRICT	  FL_NIGN_ESCAPE | \
81 			  FL_NIGN_MACRO
82 
83 static	int		  foptions(int *, char *);
84 static	int		  toptions(struct curparse *, char *);
85 static	int		  moptions(enum intt *, char *);
86 static	int		  woptions(int *, char *);
87 static	int		  merr(void *, int, int, const char *);
88 static	int		  mwarn(void *, int, int, const char *);
89 static	int		  ffile(struct buf *, struct buf *,
90 				const char *, struct curparse *);
91 static	int		  fdesc(struct buf *, struct buf *,
92 				struct curparse *);
93 static	int		  pset(const char *, int, struct curparse *,
94 				struct man **, struct mdoc **);
95 static	struct man	 *man_init(struct curparse *);
96 static	struct mdoc	 *mdoc_init(struct curparse *);
97 static	void		  version(void) __attribute__((noreturn));
98 static	void		  usage(void) __attribute__((noreturn));
99 
100 static	const char	 *progname;
101 
102 
103 int
104 main(int argc, char *argv[])
105 {
106 	int		 c, rc;
107 	struct buf	 ln, blk;
108 	struct curparse	 curp;
109 
110 	progname = strrchr(argv[0], '/');
111 	if (progname == NULL)
112 		progname = argv[0];
113 	else
114 		++progname;
115 
116 	memset(&curp, 0, sizeof(struct curparse));
117 
118 	curp.inttype = INTT_AUTO;
119 	curp.outtype = OUTT_ASCII;
120 
121 	/* LINTED */
122 	while (-1 != (c = getopt(argc, argv, "f:m:O:T:VW:")))
123 		switch (c) {
124 		case ('f'):
125 			if ( ! foptions(&curp.fflags, optarg))
126 				return(EXIT_FAILURE);
127 			break;
128 		case ('m'):
129 			if ( ! moptions(&curp.inttype, optarg))
130 				return(EXIT_FAILURE);
131 			break;
132 		case ('O'):
133 			(void)strlcat(curp.outopts, optarg, BUFSIZ);
134 			(void)strlcat(curp.outopts, ",", BUFSIZ);
135 			break;
136 		case ('T'):
137 			if ( ! toptions(&curp, optarg))
138 				return(EXIT_FAILURE);
139 			break;
140 		case ('W'):
141 			if ( ! woptions(&curp.wflags, optarg))
142 				return(EXIT_FAILURE);
143 			break;
144 		case ('V'):
145 			version();
146 			/* NOTREACHED */
147 		default:
148 			usage();
149 			/* NOTREACHED */
150 		}
151 
152 	argc -= optind;
153 	argv += optind;
154 
155 	memset(&ln, 0, sizeof(struct buf));
156 	memset(&blk, 0, sizeof(struct buf));
157 
158 	rc = 1;
159 
160 	if (NULL == *argv) {
161 		curp.file = "<stdin>";
162 		curp.fd = STDIN_FILENO;
163 
164 		c = fdesc(&blk, &ln, &curp);
165 		if ( ! (FL_IGN_ERRORS & curp.fflags))
166 			rc = 1 == c ? 1 : 0;
167 		else
168 			rc = -1 == c ? 0 : 1;
169 	}
170 
171 	while (rc && *argv) {
172 		c = ffile(&blk, &ln, *argv, &curp);
173 		if ( ! (FL_IGN_ERRORS & curp.fflags))
174 			rc = 1 == c ? 1 : 0;
175 		else
176 			rc = -1 == c ? 0 : 1;
177 
178 		argv++;
179 		if (*argv && rc) {
180 			if (curp.lastman)
181 				man_reset(curp.lastman);
182 			if (curp.lastmdoc)
183 				mdoc_reset(curp.lastmdoc);
184 			curp.lastman = NULL;
185 			curp.lastmdoc = NULL;
186 		}
187 	}
188 
189 	if (blk.buf)
190 		free(blk.buf);
191 	if (ln.buf)
192 		free(ln.buf);
193 	if (curp.outfree)
194 		(*curp.outfree)(curp.outdata);
195 	if (curp.mdoc)
196 		mdoc_free(curp.mdoc);
197 	if (curp.man)
198 		man_free(curp.man);
199 
200 	return(rc ? EXIT_SUCCESS : EXIT_FAILURE);
201 }
202 
203 
204 static void
205 version(void)
206 {
207 
208 	(void)printf("%s %s\n", progname, VERSION);
209 	exit(EXIT_SUCCESS);
210 }
211 
212 
213 static void
214 usage(void)
215 {
216 
217 	(void)fprintf(stderr, "usage: %s [-V] [-foption] "
218 			"[-mformat] [-Ooption] [-Toutput] "
219 			"[-Werr] [file...]\n", progname);
220 	exit(EXIT_FAILURE);
221 }
222 
223 
224 static struct man *
225 man_init(struct curparse *curp)
226 {
227 	int		 pflags;
228 	struct man_cb	 mancb;
229 
230 	mancb.man_err = merr;
231 	mancb.man_warn = mwarn;
232 
233 	/* Defaults from mandoc.1. */
234 
235 	pflags = MAN_IGN_MACRO | MAN_IGN_ESCAPE;
236 
237 	if (curp->fflags & FL_NIGN_MACRO)
238 		pflags &= ~MAN_IGN_MACRO;
239 	if (curp->fflags & FL_NIGN_ESCAPE)
240 		pflags &= ~MAN_IGN_ESCAPE;
241 
242 	return(man_alloc(curp, pflags, &mancb));
243 }
244 
245 
246 static struct mdoc *
247 mdoc_init(struct curparse *curp)
248 {
249 	int		 pflags;
250 	struct mdoc_cb	 mdoccb;
251 
252 	mdoccb.mdoc_err = merr;
253 	mdoccb.mdoc_warn = mwarn;
254 
255 	/* Defaults from mandoc.1. */
256 
257 	pflags = MDOC_IGN_MACRO | MDOC_IGN_ESCAPE;
258 
259 	if (curp->fflags & FL_IGN_SCOPE)
260 		pflags |= MDOC_IGN_SCOPE;
261 	if (curp->fflags & FL_NIGN_ESCAPE)
262 		pflags &= ~MDOC_IGN_ESCAPE;
263 	if (curp->fflags & FL_NIGN_MACRO)
264 		pflags &= ~MDOC_IGN_MACRO;
265 
266 	return(mdoc_alloc(curp, pflags, &mdoccb));
267 }
268 
269 
270 static int
271 ffile(struct buf *blk, struct buf *ln,
272 		const char *file, struct curparse *curp)
273 {
274 	int		 c;
275 
276 	curp->file = file;
277 	if (-1 == (curp->fd = open(curp->file, O_RDONLY, 0))) {
278 		perror(curp->file);
279 		return(-1);
280 	}
281 
282 	c = fdesc(blk, ln, curp);
283 
284 	if (-1 == close(curp->fd))
285 		perror(curp->file);
286 
287 	return(c);
288 }
289 
290 
291 static int
292 fdesc(struct buf *blk, struct buf *ln, struct curparse *curp)
293 {
294 	size_t		 sz;
295 	ssize_t		 ssz;
296 	struct stat	 st;
297 	int		 j, i, pos, lnn, comment;
298 	struct man	*man;
299 	struct mdoc	*mdoc;
300 
301 	sz = BUFSIZ;
302 	man = NULL;
303 	mdoc = NULL;
304 
305 	/*
306 	 * Two buffers: ln and buf.  buf is the input buffer optimised
307 	 * here for each file's block size.  ln is a line buffer.  Both
308 	 * growable, hence passed in by ptr-ptr.
309 	 */
310 
311 	if (-1 == fstat(curp->fd, &st))
312 		perror(curp->file);
313 	else if ((size_t)st.st_blksize > sz)
314 		sz = st.st_blksize;
315 
316 	if (sz > blk->sz) {
317 		blk->buf = realloc(blk->buf, sz);
318 		if (NULL == blk->buf) {
319 			perror(NULL);
320 			exit(EXIT_FAILURE);
321 		}
322 		blk->sz = sz;
323 	}
324 
325 	/* Fill buf with file blocksize. */
326 
327 	for (lnn = pos = comment = 0; ; ) {
328 		if (-1 == (ssz = read(curp->fd, blk->buf, sz))) {
329 			perror(curp->file);
330 			return(-1);
331 		} else if (0 == ssz)
332 			break;
333 
334 		/* Parse the read block into partial or full lines. */
335 
336 		for (i = 0; i < (int)ssz; i++) {
337 			if (pos >= (int)ln->sz) {
338 				ln->sz += 256; /* Step-size. */
339 				ln->buf = realloc(ln->buf, ln->sz);
340 				if (NULL == ln->buf) {
341 					perror(NULL);
342 					return(EXIT_FAILURE);
343 				}
344 			}
345 
346 			if ('\n' != blk->buf[i]) {
347 				if (comment)
348 					continue;
349 				ln->buf[pos++] = blk->buf[i];
350 
351 				/* Handle in-line `\"' comments. */
352 
353 				if (1 == pos || '\"' != ln->buf[pos - 1])
354 					continue;
355 
356 				for (j = pos - 2; j >= 0; j--)
357 					if ('\\' != ln->buf[j])
358 						break;
359 
360 				if ( ! ((pos - 2 - j) % 2))
361 					continue;
362 
363 				comment = 1;
364 				pos -= 2;
365 				for (; pos > 0; --pos) {
366 					if (ln->buf[pos] != ' ')
367 						break;
368 					if (ln->buf[pos - 1] == '\\')
369 						break;
370 				}
371 				continue;
372 			}
373 
374 			/* Handle escaped `\\n' newlines. */
375 
376 			if (pos > 0 && 0 == comment &&
377 					'\\' == ln->buf[pos - 1]) {
378 				for (j = pos - 1; j >= 0; j--)
379 					if ('\\' != ln->buf[j])
380 						break;
381 				if ( ! ((pos - j) % 2)) {
382 					pos--;
383 					lnn++;
384 					continue;
385 				}
386 			}
387 
388 			ln->buf[pos] = 0;
389 			lnn++;
390 
391 			/* If unset, assign parser in pset(). */
392 
393 			if ( ! (man || mdoc) && ! pset(ln->buf,
394 						pos, curp, &man, &mdoc))
395 				return(-1);
396 
397 			pos = comment = 0;
398 
399 			/* Pass down into parsers. */
400 
401 			if (man && ! man_parseln(man, lnn, ln->buf))
402 				return(0);
403 			if (mdoc && ! mdoc_parseln(mdoc, lnn, ln->buf))
404 				return(0);
405 		}
406 	}
407 
408 	/* NOTE a parser may not have been assigned, yet. */
409 
410 	if ( ! (man || mdoc)) {
411 		fprintf(stderr, "%s: Not a manual\n", curp->file);
412 		return(0);
413 	}
414 
415 	if (mdoc && ! mdoc_endparse(mdoc))
416 		return(0);
417 	if (man && ! man_endparse(man))
418 		return(0);
419 
420 	/* If unset, allocate output dev now (if applicable). */
421 
422 	if ( ! (curp->outman && curp->outmdoc)) {
423 		switch (curp->outtype) {
424 		case (OUTT_XHTML):
425 			curp->outdata = xhtml_alloc(curp->outopts);
426 			curp->outman = html_man;
427 			curp->outmdoc = html_mdoc;
428 			curp->outfree = html_free;
429 			break;
430 		case (OUTT_HTML):
431 			curp->outdata = html_alloc(curp->outopts);
432 			curp->outman = html_man;
433 			curp->outmdoc = html_mdoc;
434 			curp->outfree = html_free;
435 			break;
436 		case (OUTT_TREE):
437 			curp->outman = tree_man;
438 			curp->outmdoc = tree_mdoc;
439 			break;
440 		case (OUTT_LINT):
441 			break;
442 		default:
443 			curp->outdata = ascii_alloc();
444 			curp->outman = terminal_man;
445 			curp->outmdoc = terminal_mdoc;
446 			curp->outfree = terminal_free;
447 			break;
448 		}
449 	}
450 
451 	/* Execute the out device, if it exists. */
452 
453 	if (man && curp->outman)
454 		(*curp->outman)(curp->outdata, man);
455 	if (mdoc && curp->outmdoc)
456 		(*curp->outmdoc)(curp->outdata, mdoc);
457 
458 	return(1);
459 }
460 
461 
462 static int
463 pset(const char *buf, int pos, struct curparse *curp,
464 		struct man **man, struct mdoc **mdoc)
465 {
466 	int		 i;
467 
468 	/*
469 	 * Try to intuit which kind of manual parser should be used.  If
470 	 * passed in by command-line (-man, -mdoc), then use that
471 	 * explicitly.  If passed as -mandoc, then try to guess from the
472 	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
473 	 * default to -man, which is more lenient.
474 	 */
475 
476 	if (buf[0] == '.') {
477 		for (i = 1; buf[i]; i++)
478 			if (' ' != buf[i] && '\t' != buf[i])
479 				break;
480 		if (0 == buf[i])
481 			return(1);
482 	}
483 
484 	switch (curp->inttype) {
485 	case (INTT_MDOC):
486 		if (NULL == curp->mdoc)
487 			curp->mdoc = mdoc_init(curp);
488 		if (NULL == (*mdoc = curp->mdoc))
489 			return(0);
490 		curp->lastmdoc = *mdoc;
491 		return(1);
492 	case (INTT_MAN):
493 		if (NULL == curp->man)
494 			curp->man = man_init(curp);
495 		if (NULL == (*man = curp->man))
496 			return(0);
497 		curp->lastman = *man;
498 		return(1);
499 	default:
500 		break;
501 	}
502 
503 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
504 		if (NULL == curp->mdoc)
505 			curp->mdoc = mdoc_init(curp);
506 		if (NULL == (*mdoc = curp->mdoc))
507 			return(0);
508 		curp->lastmdoc = *mdoc;
509 		return(1);
510 	}
511 
512 	if (NULL == curp->man)
513 		curp->man = man_init(curp);
514 	if (NULL == (*man = curp->man))
515 		return(0);
516 	curp->lastman = *man;
517 	return(1);
518 }
519 
520 
521 static int
522 moptions(enum intt *tflags, char *arg)
523 {
524 
525 	if (0 == strcmp(arg, "doc"))
526 		*tflags = INTT_MDOC;
527 	else if (0 == strcmp(arg, "andoc"))
528 		*tflags = INTT_AUTO;
529 	else if (0 == strcmp(arg, "an"))
530 		*tflags = INTT_MAN;
531 	else {
532 		fprintf(stderr, "%s: Bad argument\n", arg);
533 		return(0);
534 	}
535 
536 	return(1);
537 }
538 
539 
540 static int
541 toptions(struct curparse *curp, char *arg)
542 {
543 
544 	if (0 == strcmp(arg, "ascii"))
545 		curp->outtype = OUTT_ASCII;
546 	else if (0 == strcmp(arg, "lint")) {
547 		curp->outtype = OUTT_LINT;
548 		curp->wflags |= WARN_WALL;
549 		curp->fflags |= FL_STRICT;
550 	}
551 	else if (0 == strcmp(arg, "tree"))
552 		curp->outtype = OUTT_TREE;
553 	else if (0 == strcmp(arg, "html"))
554 		curp->outtype = OUTT_HTML;
555 	else if (0 == strcmp(arg, "xhtml"))
556 		curp->outtype = OUTT_XHTML;
557 	else {
558 		fprintf(stderr, "%s: Bad argument\n", arg);
559 		return(0);
560 	}
561 
562 	return(1);
563 }
564 
565 
566 static int
567 foptions(int *fflags, char *arg)
568 {
569 	char		*v, *o;
570 	const char	*toks[8];
571 
572 	toks[0] = "ign-scope";
573 	toks[1] = "no-ign-escape";
574 	toks[2] = "no-ign-macro";
575 	toks[3] = "ign-errors";
576 	toks[4] = "strict";
577 	toks[5] = "ign-escape";
578 	toks[6] = NULL;
579 
580 	while (*arg) {
581 		o = arg;
582 		switch (getsubopt(&arg, UNCONST(toks), &v)) {
583 		case (0):
584 			*fflags |= FL_IGN_SCOPE;
585 			break;
586 		case (1):
587 			*fflags |= FL_NIGN_ESCAPE;
588 			break;
589 		case (2):
590 			*fflags |= FL_NIGN_MACRO;
591 			break;
592 		case (3):
593 			*fflags |= FL_IGN_ERRORS;
594 			break;
595 		case (4):
596 			*fflags |= FL_STRICT;
597 			break;
598 		case (5):
599 			*fflags &= ~FL_NIGN_ESCAPE;
600 			break;
601 		default:
602 			fprintf(stderr, "%s: Bad argument\n", o);
603 			return(0);
604 		}
605 	}
606 
607 	return(1);
608 }
609 
610 
611 static int
612 woptions(int *wflags, char *arg)
613 {
614 	char		*v, *o;
615 	const char	*toks[3];
616 
617 	toks[0] = "all";
618 	toks[1] = "error";
619 	toks[2] = NULL;
620 
621 	while (*arg) {
622 		o = arg;
623 		switch (getsubopt(&arg, UNCONST(toks), &v)) {
624 		case (0):
625 			*wflags |= WARN_WALL;
626 			break;
627 		case (1):
628 			*wflags |= WARN_WERR;
629 			break;
630 		default:
631 			fprintf(stderr, "%s: Bad argument\n", o);
632 			return(0);
633 		}
634 	}
635 
636 	return(1);
637 }
638 
639 
640 /* ARGSUSED */
641 static int
642 merr(void *arg, int line, int col, const char *msg)
643 {
644 	struct curparse *curp;
645 
646 	curp = (struct curparse *)arg;
647 
648 	(void)fprintf(stderr, "%s:%d:%d: error: %s\n",
649 			curp->file, line, col + 1, msg);
650 
651 	return(0);
652 }
653 
654 
655 static int
656 mwarn(void *arg, int line, int col, const char *msg)
657 {
658 	struct curparse *curp;
659 
660 	curp = (struct curparse *)arg;
661 
662 	if ( ! (curp->wflags & WARN_WALL))
663 		return(1);
664 
665 	(void)fprintf(stderr, "%s:%d:%d: warning: %s\n",
666 			curp->file, line, col + 1, msg);
667 
668 	if ( ! (curp->wflags & WARN_WERR))
669 		return(1);
670 
671 	return(0);
672 }
673 
674