xref: /dragonfly/contrib/mdocml/read.c (revision aeaecd48)
1 /*	$Id: read.c,v 1.79 2014/08/06 15:09:05 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #ifdef HAVE_MMAP
24 # include <sys/stat.h>
25 # include <sys/mman.h>
26 #endif
27 
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdarg.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 
39 #include "mandoc.h"
40 #include "mandoc_aux.h"
41 #include "libmandoc.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "main.h"
45 
46 #define	REPARSE_LIMIT	1000
47 
48 struct	buf {
49 	char		 *buf; /* binary input buffer */
50 	size_t		  sz; /* size of binary buffer */
51 };
52 
53 struct	mparse {
54 	enum mandoclevel  file_status; /* status of current parse */
55 	enum mandoclevel  wlevel; /* ignore messages below this */
56 	int		  line; /* line number in the file */
57 	int		  options; /* parser options */
58 	struct man	 *pman; /* persistent man parser */
59 	struct mdoc	 *pmdoc; /* persistent mdoc parser */
60 	struct man	 *man; /* man parser */
61 	struct mdoc	 *mdoc; /* mdoc parser */
62 	struct roff	 *roff; /* roff parser (!NULL) */
63 	char		 *sodest; /* filename pointed to by .so */
64 	int		  reparse_count; /* finite interp. stack */
65 	mandocmsg	  mmsg; /* warning/error message handler */
66 	const char	 *file;
67 	struct buf	 *secondary;
68 	const char	 *defos; /* default operating system */
69 };
70 
71 static	void	  resize_buf(struct buf *, size_t);
72 static	void	  mparse_buf_r(struct mparse *, struct buf, int);
73 static	void	  pset(const char *, int, struct mparse *);
74 static	int	  read_whole_file(struct mparse *, const char *, int,
75 				struct buf *, int *);
76 static	void	  mparse_end(struct mparse *);
77 static	void	  mparse_parse_buffer(struct mparse *, struct buf,
78 			const char *);
79 
80 static	const enum mandocerr	mandoclimits[MANDOCLEVEL_MAX] = {
81 	MANDOCERR_OK,
82 	MANDOCERR_WARNING,
83 	MANDOCERR_WARNING,
84 	MANDOCERR_ERROR,
85 	MANDOCERR_FATAL,
86 	MANDOCERR_MAX,
87 	MANDOCERR_MAX
88 };
89 
90 static	const char * const	mandocerrs[MANDOCERR_MAX] = {
91 	"ok",
92 
93 	"generic warning",
94 
95 	/* related to the prologue */
96 	"missing manual title, using UNTITLED",
97 	"missing manual title, using \"\"",
98 	"lower case character in document title",
99 	"missing manual section, using \"\"",
100 	"unknown manual section",
101 	"unknown manual volume or arch",
102 	"missing date, using today's date",
103 	"cannot parse date, using it verbatim",
104 	"missing Os macro, using \"\"",
105 	"duplicate prologue macro",
106 	"late prologue macro",
107 	"skipping late title macro",
108 	"prologue macros out of order",
109 
110 	/* related to document structure */
111 	".so is fragile, better use ln(1)",
112 	"no document body",
113 	"content before first section header",
114 	"first section is not \"NAME\"",
115 	"bad NAME section contents",
116 	"sections out of conventional order",
117 	"duplicate section title",
118 	"unexpected section",
119 
120 	/* related to macros and nesting */
121 	"obsolete macro",
122 	"skipping paragraph macro",
123 	"moving paragraph macro out of list",
124 	"skipping no-space macro",
125 	"blocks badly nested",
126 	"nested displays are not portable",
127 	"moving content out of list",
128 	".Vt block has child macro",
129 	"fill mode already enabled, skipping",
130 	"fill mode already disabled, skipping",
131 	"line scope broken",
132 
133 	/* related to missing macro arguments */
134 	"skipping empty request",
135 	"conditional request controls empty scope",
136 	"skipping empty macro",
137 	"empty argument, using 0n",
138 	"argument count wrong",
139 	"missing display type, using -ragged",
140 	"list type is not the first argument",
141 	"missing -width in -tag list, using 8n",
142 	"missing utility name, using \"\"",
143 	"empty head in list item",
144 	"empty list item",
145 	"missing font type, using \\fR",
146 	"unknown font type, using \\fR",
147 	"missing -std argument, adding it",
148 
149 	/* related to bad macro arguments */
150 	"unterminated quoted argument",
151 	"duplicate argument",
152 	"skipping duplicate argument",
153 	"skipping duplicate display type",
154 	"skipping duplicate list type",
155 	"skipping -width argument",
156 	"unknown AT&T UNIX version",
157 	"invalid content in Rs block",
158 	"invalid Boolean argument",
159 	"unknown font, skipping request",
160 
161 	/* related to plain text */
162 	"blank line in fill mode, using .sp",
163 	"tab in filled text",
164 	"whitespace at end of input line",
165 	"bad comment style",
166 	"invalid escape sequence",
167 	"undefined string, using \"\"",
168 
169 	"generic error",
170 
171 	/* related to equations */
172 	"unexpected equation scope closure",
173 	"equation scope open on exit",
174 	"overlapping equation scopes",
175 	"unexpected end of equation",
176 	"equation syntax error",
177 
178 	/* related to tables */
179 	"bad table syntax",
180 	"bad table option",
181 	"bad table layout",
182 	"no table layout cells specified",
183 	"no table data cells specified",
184 	"ignore data in cell",
185 	"data block still open",
186 	"ignoring extra data cells",
187 
188 	/* related to document structure and macros */
189 	"input stack limit exceeded, infinite loop?",
190 	"skipping bad character",
191 	"skipping unknown macro",
192 	"skipping item outside list",
193 	"skipping column outside column list",
194 	"skipping end of block that is not open",
195 	"inserting missing end of block",
196 	"appending missing end of block",
197 
198 	/* related to request and macro arguments */
199 	"escaped character not allowed in a name",
200 	"argument count wrong",
201 	"missing list type, using -item",
202 	"missing manual name, using \"\"",
203 	"uname(3) system call failed, using UNKNOWN",
204 	"unknown standard specifier",
205 	"skipping request without numeric argument",
206 	"skipping all arguments",
207 	"skipping excess arguments",
208 
209 	"generic fatal error",
210 
211 	"input too large",
212 	"NOT IMPLEMENTED: Bd -file",
213 	"NOT IMPLEMENTED: .so with absolute path or \"..\"",
214 	".so request failed",
215 
216 	/* system errors */
217 	NULL,
218 	"cannot stat file",
219 	"cannot read file",
220 };
221 
222 static	const char * const	mandoclevels[MANDOCLEVEL_MAX] = {
223 	"SUCCESS",
224 	"RESERVED",
225 	"WARNING",
226 	"ERROR",
227 	"FATAL",
228 	"BADARG",
229 	"SYSERR"
230 };
231 
232 
233 static void
234 resize_buf(struct buf *buf, size_t initial)
235 {
236 
237 	buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
238 	buf->buf = mandoc_realloc(buf->buf, buf->sz);
239 }
240 
241 static void
242 pset(const char *buf, int pos, struct mparse *curp)
243 {
244 	int		 i;
245 
246 	/*
247 	 * Try to intuit which kind of manual parser should be used.  If
248 	 * passed in by command-line (-man, -mdoc), then use that
249 	 * explicitly.  If passed as -mandoc, then try to guess from the
250 	 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
251 	 * default to -man, which is more lenient.
252 	 *
253 	 * Separate out pmdoc/pman from mdoc/man: the first persists
254 	 * through all parsers, while the latter is used per-parse.
255 	 */
256 
257 	if ('.' == buf[0] || '\'' == buf[0]) {
258 		for (i = 1; buf[i]; i++)
259 			if (' ' != buf[i] && '\t' != buf[i])
260 				break;
261 		if ('\0' == buf[i])
262 			return;
263 	}
264 
265 	if (MPARSE_MDOC & curp->options) {
266 		curp->mdoc = curp->pmdoc;
267 		return;
268 	} else if (MPARSE_MAN & curp->options) {
269 		curp->man = curp->pman;
270 		return;
271 	}
272 
273 	if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3))  {
274 		if (NULL == curp->pmdoc)
275 			curp->pmdoc = mdoc_alloc(
276 			    curp->roff, curp, curp->defos,
277 			    MPARSE_QUICK & curp->options ? 1 : 0);
278 		assert(curp->pmdoc);
279 		curp->mdoc = curp->pmdoc;
280 		return;
281 	}
282 
283 	if (NULL == curp->pman)
284 		curp->pman = man_alloc(curp->roff, curp,
285 		    MPARSE_QUICK & curp->options ? 1 : 0);
286 	assert(curp->pman);
287 	curp->man = curp->pman;
288 }
289 
290 /*
291  * Main parse routine for an opened file.  This is called for each
292  * opened file and simply loops around the full input file, possibly
293  * nesting (i.e., with `so').
294  */
295 static void
296 mparse_buf_r(struct mparse *curp, struct buf blk, int start)
297 {
298 	const struct tbl_span	*span;
299 	struct buf	 ln;
300 	enum rofferr	 rr;
301 	int		 i, of, rc;
302 	int		 pos; /* byte number in the ln buffer */
303 	int		 lnn; /* line number in the real file */
304 	unsigned char	 c;
305 
306 	memset(&ln, 0, sizeof(struct buf));
307 
308 	lnn = curp->line;
309 	pos = 0;
310 
311 	for (i = 0; i < (int)blk.sz; ) {
312 		if (0 == pos && '\0' == blk.buf[i])
313 			break;
314 
315 		if (start) {
316 			curp->line = lnn;
317 			curp->reparse_count = 0;
318 		}
319 
320 		while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
321 
322 			/*
323 			 * When finding an unescaped newline character,
324 			 * leave the character loop to process the line.
325 			 * Skip a preceding carriage return, if any.
326 			 */
327 
328 			if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
329 			    '\n' == blk.buf[i + 1])
330 				++i;
331 			if ('\n' == blk.buf[i]) {
332 				++i;
333 				++lnn;
334 				break;
335 			}
336 
337 			/*
338 			 * Make sure we have space for at least
339 			 * one backslash and one other character
340 			 * and the trailing NUL byte.
341 			 */
342 
343 			if (pos + 2 >= (int)ln.sz)
344 				resize_buf(&ln, 256);
345 
346 			/*
347 			 * Warn about bogus characters.  If you're using
348 			 * non-ASCII encoding, you're screwing your
349 			 * readers.  Since I'd rather this not happen,
350 			 * I'll be helpful and replace these characters
351 			 * with "?", so we don't display gibberish.
352 			 * Note to manual writers: use special characters.
353 			 */
354 
355 			c = (unsigned char) blk.buf[i];
356 
357 			if ( ! (isascii(c) &&
358 			    (isgraph(c) || isblank(c)))) {
359 				mandoc_vmsg(MANDOCERR_BADCHAR, curp,
360 				    curp->line, pos, "0x%x", c);
361 				i++;
362 				ln.buf[pos++] = '?';
363 				continue;
364 			}
365 
366 			/* Trailing backslash = a plain char. */
367 
368 			if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
369 				ln.buf[pos++] = blk.buf[i++];
370 				continue;
371 			}
372 
373 			/*
374 			 * Found escape and at least one other character.
375 			 * When it's a newline character, skip it.
376 			 * When there is a carriage return in between,
377 			 * skip that one as well.
378 			 */
379 
380 			if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
381 			    '\n' == blk.buf[i + 2])
382 				++i;
383 			if ('\n' == blk.buf[i + 1]) {
384 				i += 2;
385 				++lnn;
386 				continue;
387 			}
388 
389 			if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
390 				i += 2;
391 				/* Comment, skip to end of line */
392 				for (; i < (int)blk.sz; ++i) {
393 					if ('\n' == blk.buf[i]) {
394 						++i;
395 						++lnn;
396 						break;
397 					}
398 				}
399 
400 				/* Backout trailing whitespaces */
401 				for (; pos > 0; --pos) {
402 					if (ln.buf[pos - 1] != ' ')
403 						break;
404 					if (pos > 2 && ln.buf[pos - 2] == '\\')
405 						break;
406 				}
407 				break;
408 			}
409 
410 			/* Catch escaped bogus characters. */
411 
412 			c = (unsigned char) blk.buf[i+1];
413 
414 			if ( ! (isascii(c) &&
415 			    (isgraph(c) || isblank(c)))) {
416 				mandoc_vmsg(MANDOCERR_BADCHAR, curp,
417 				    curp->line, pos, "0x%x", c);
418 				i += 2;
419 				ln.buf[pos++] = '?';
420 				continue;
421 			}
422 
423 			/* Some other escape sequence, copy & cont. */
424 
425 			ln.buf[pos++] = blk.buf[i++];
426 			ln.buf[pos++] = blk.buf[i++];
427 		}
428 
429 		if (pos >= (int)ln.sz)
430 			resize_buf(&ln, 256);
431 
432 		ln.buf[pos] = '\0';
433 
434 		/*
435 		 * A significant amount of complexity is contained by
436 		 * the roff preprocessor.  It's line-oriented but can be
437 		 * expressed on one line, so we need at times to
438 		 * readjust our starting point and re-run it.  The roff
439 		 * preprocessor can also readjust the buffers with new
440 		 * data, so we pass them in wholesale.
441 		 */
442 
443 		of = 0;
444 
445 		/*
446 		 * Maintain a lookaside buffer of all parsed lines.  We
447 		 * only do this if mparse_keep() has been invoked (the
448 		 * buffer may be accessed with mparse_getkeep()).
449 		 */
450 
451 		if (curp->secondary) {
452 			curp->secondary->buf = mandoc_realloc(
453 			    curp->secondary->buf,
454 			    curp->secondary->sz + pos + 2);
455 			memcpy(curp->secondary->buf +
456 			    curp->secondary->sz,
457 			    ln.buf, pos);
458 			curp->secondary->sz += pos;
459 			curp->secondary->buf
460 				[curp->secondary->sz] = '\n';
461 			curp->secondary->sz++;
462 			curp->secondary->buf
463 				[curp->secondary->sz] = '\0';
464 		}
465 rerun:
466 		rr = roff_parseln(curp->roff, curp->line,
467 		    &ln.buf, &ln.sz, of, &of);
468 
469 		switch (rr) {
470 		case ROFF_REPARSE:
471 			if (REPARSE_LIMIT >= ++curp->reparse_count)
472 				mparse_buf_r(curp, ln, 0);
473 			else
474 				mandoc_msg(MANDOCERR_ROFFLOOP, curp,
475 				    curp->line, pos, NULL);
476 			pos = 0;
477 			continue;
478 		case ROFF_APPEND:
479 			pos = (int)strlen(ln.buf);
480 			continue;
481 		case ROFF_RERUN:
482 			goto rerun;
483 		case ROFF_IGN:
484 			pos = 0;
485 			continue;
486 		case ROFF_ERR:
487 			assert(MANDOCLEVEL_FATAL <= curp->file_status);
488 			break;
489 		case ROFF_SO:
490 			if (0 == (MPARSE_SO & curp->options) &&
491 			    (i >= (int)blk.sz || '\0' == blk.buf[i])) {
492 				curp->sodest = mandoc_strdup(ln.buf + of);
493 				free(ln.buf);
494 				return;
495 			}
496 			/*
497 			 * We remove `so' clauses from our lookaside
498 			 * buffer because we're going to descend into
499 			 * the file recursively.
500 			 */
501 			if (curp->secondary)
502 				curp->secondary->sz -= pos + 1;
503 			mparse_readfd(curp, -1, ln.buf + of);
504 			if (MANDOCLEVEL_FATAL <= curp->file_status) {
505 				mandoc_vmsg(MANDOCERR_SO_FAIL,
506 				    curp, curp->line, pos,
507 				    ".so %s", ln.buf + of);
508 				break;
509 			}
510 			pos = 0;
511 			continue;
512 		default:
513 			break;
514 		}
515 
516 		/*
517 		 * If we encounter errors in the recursive parse, make
518 		 * sure we don't continue parsing.
519 		 */
520 
521 		if (MANDOCLEVEL_FATAL <= curp->file_status)
522 			break;
523 
524 		/*
525 		 * If input parsers have not been allocated, do so now.
526 		 * We keep these instanced between parsers, but set them
527 		 * locally per parse routine since we can use different
528 		 * parsers with each one.
529 		 */
530 
531 		if ( ! (curp->man || curp->mdoc))
532 			pset(ln.buf + of, pos - of, curp);
533 
534 		/*
535 		 * Lastly, push down into the parsers themselves.  One
536 		 * of these will have already been set in the pset()
537 		 * routine.
538 		 * If libroff returns ROFF_TBL, then add it to the
539 		 * currently open parse.  Since we only get here if
540 		 * there does exist data (see tbl_data.c), we're
541 		 * guaranteed that something's been allocated.
542 		 * Do the same for ROFF_EQN.
543 		 */
544 
545 		rc = -1;
546 
547 		if (ROFF_TBL == rr)
548 			while (NULL != (span = roff_span(curp->roff))) {
549 				rc = curp->man ?
550 				    man_addspan(curp->man, span) :
551 				    mdoc_addspan(curp->mdoc, span);
552 				if (0 == rc)
553 					break;
554 			}
555 		else if (ROFF_EQN == rr)
556 			rc = curp->mdoc ?
557 			    mdoc_addeqn(curp->mdoc,
558 				roff_eqn(curp->roff)) :
559 			    man_addeqn(curp->man,
560 				roff_eqn(curp->roff));
561 		else if (curp->man || curp->mdoc)
562 			rc = curp->man ?
563 			    man_parseln(curp->man,
564 				curp->line, ln.buf, of) :
565 			    mdoc_parseln(curp->mdoc,
566 				curp->line, ln.buf, of);
567 
568 		if (0 == rc) {
569 			assert(MANDOCLEVEL_FATAL <= curp->file_status);
570 			break;
571 		} else if (2 == rc)
572 			break;
573 
574 		/* Temporary buffers typically are not full. */
575 
576 		if (0 == start && '\0' == blk.buf[i])
577 			break;
578 
579 		/* Start the next input line. */
580 
581 		pos = 0;
582 	}
583 
584 	free(ln.buf);
585 }
586 
587 static int
588 read_whole_file(struct mparse *curp, const char *file, int fd,
589 		struct buf *fb, int *with_mmap)
590 {
591 	size_t		 off;
592 	ssize_t		 ssz;
593 
594 #ifdef	HAVE_MMAP
595 	struct stat	 st;
596 	if (-1 == fstat(fd, &st)) {
597 		curp->file_status = MANDOCLEVEL_SYSERR;
598 		if (curp->mmsg)
599 			(*curp->mmsg)(MANDOCERR_SYSSTAT, curp->file_status,
600 			    file, 0, 0, strerror(errno));
601 		return(0);
602 	}
603 
604 	/*
605 	 * If we're a regular file, try just reading in the whole entry
606 	 * via mmap().  This is faster than reading it into blocks, and
607 	 * since each file is only a few bytes to begin with, I'm not
608 	 * concerned that this is going to tank any machines.
609 	 */
610 
611 	if (S_ISREG(st.st_mode)) {
612 		if (st.st_size >= (1U << 31)) {
613 			curp->file_status = MANDOCLEVEL_FATAL;
614 			if (curp->mmsg)
615 				(*curp->mmsg)(MANDOCERR_TOOLARGE,
616 				    curp->file_status, file, 0, 0, NULL);
617 			return(0);
618 		}
619 		*with_mmap = 1;
620 		fb->sz = (size_t)st.st_size;
621 		fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
622 		if (fb->buf != MAP_FAILED)
623 			return(1);
624 	}
625 #endif
626 
627 	/*
628 	 * If this isn't a regular file (like, say, stdin), then we must
629 	 * go the old way and just read things in bit by bit.
630 	 */
631 
632 	*with_mmap = 0;
633 	off = 0;
634 	fb->sz = 0;
635 	fb->buf = NULL;
636 	for (;;) {
637 		if (off == fb->sz) {
638 			if (fb->sz == (1U << 31)) {
639 				curp->file_status = MANDOCLEVEL_FATAL;
640 				if (curp->mmsg)
641 					(*curp->mmsg)(MANDOCERR_TOOLARGE,
642 					    curp->file_status,
643 					    file, 0, 0, NULL);
644 				break;
645 			}
646 			resize_buf(fb, 65536);
647 		}
648 		ssz = read(fd, fb->buf + (int)off, fb->sz - off);
649 		if (ssz == 0) {
650 			fb->sz = off;
651 			return(1);
652 		}
653 		if (ssz == -1) {
654 			curp->file_status = MANDOCLEVEL_SYSERR;
655 			if (curp->mmsg)
656 				(*curp->mmsg)(MANDOCERR_SYSREAD,
657 				    curp->file_status, file, 0, 0,
658 				    strerror(errno));
659 			break;
660 		}
661 		off += (size_t)ssz;
662 	}
663 
664 	free(fb->buf);
665 	fb->buf = NULL;
666 	return(0);
667 }
668 
669 static void
670 mparse_end(struct mparse *curp)
671 {
672 
673 	if (MANDOCLEVEL_FATAL <= curp->file_status)
674 		return;
675 
676 	if (curp->mdoc == NULL &&
677 	    curp->man == NULL &&
678 	    curp->sodest == NULL) {
679 		if (curp->options & MPARSE_MDOC)
680 			curp->mdoc = curp->pmdoc;
681 		else {
682 			if (curp->pman == NULL)
683 				curp->pman = man_alloc(curp->roff, curp,
684 				    curp->options & MPARSE_QUICK ? 1 : 0);
685 			curp->man = curp->pman;
686 		}
687 	}
688 
689 	if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
690 		assert(MANDOCLEVEL_FATAL <= curp->file_status);
691 		return;
692 	}
693 
694 	if (curp->man && ! man_endparse(curp->man)) {
695 		assert(MANDOCLEVEL_FATAL <= curp->file_status);
696 		return;
697 	}
698 
699 	roff_endparse(curp->roff);
700 }
701 
702 static void
703 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
704 {
705 	const char	*svfile;
706 	static int	 recursion_depth;
707 
708 	if (64 < recursion_depth) {
709 		mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
710 		return;
711 	}
712 
713 	/* Line number is per-file. */
714 	svfile = curp->file;
715 	curp->file = file;
716 	curp->line = 1;
717 	recursion_depth++;
718 
719 	mparse_buf_r(curp, blk, 1);
720 
721 	if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
722 		mparse_end(curp);
723 
724 	curp->file = svfile;
725 }
726 
727 enum mandoclevel
728 mparse_readmem(struct mparse *curp, const void *buf, size_t len,
729 		const char *file)
730 {
731 	struct buf blk;
732 
733 	blk.buf = UNCONST(buf);
734 	blk.sz = len;
735 
736 	mparse_parse_buffer(curp, blk, file);
737 	return(curp->file_status);
738 }
739 
740 enum mandoclevel
741 mparse_readfd(struct mparse *curp, int fd, const char *file)
742 {
743 	struct buf	 blk;
744 	int		 with_mmap;
745 
746 	if (-1 == fd && -1 == (fd = open(file, O_RDONLY, 0))) {
747 		curp->file_status = MANDOCLEVEL_SYSERR;
748 		if (curp->mmsg)
749 			(*curp->mmsg)(MANDOCERR_SYSOPEN,
750 			    curp->file_status,
751 			    file, 0, 0, strerror(errno));
752 		goto out;
753 	}
754 
755 	/*
756 	 * Run for each opened file; may be called more than once for
757 	 * each full parse sequence if the opened file is nested (i.e.,
758 	 * from `so').  Simply sucks in the whole file and moves into
759 	 * the parse phase for the file.
760 	 */
761 
762 	if ( ! read_whole_file(curp, file, fd, &blk, &with_mmap))
763 		goto out;
764 
765 	mparse_parse_buffer(curp, blk, file);
766 
767 #ifdef	HAVE_MMAP
768 	if (with_mmap)
769 		munmap(blk.buf, blk.sz);
770 	else
771 #endif
772 		free(blk.buf);
773 
774 	if (STDIN_FILENO != fd && -1 == close(fd))
775 		perror(file);
776 out:
777 	return(curp->file_status);
778 }
779 
780 struct mparse *
781 mparse_alloc(int options, enum mandoclevel wlevel,
782 		mandocmsg mmsg, const char *defos)
783 {
784 	struct mparse	*curp;
785 
786 	assert(wlevel <= MANDOCLEVEL_FATAL);
787 
788 	curp = mandoc_calloc(1, sizeof(struct mparse));
789 
790 	curp->options = options;
791 	curp->wlevel = wlevel;
792 	curp->mmsg = mmsg;
793 	curp->defos = defos;
794 
795 	curp->roff = roff_alloc(curp, options);
796 	if (curp->options & MPARSE_MDOC)
797 		curp->pmdoc = mdoc_alloc(
798 		    curp->roff, curp, curp->defos,
799 		    curp->options & MPARSE_QUICK ? 1 : 0);
800 	if (curp->options & MPARSE_MAN)
801 		curp->pman = man_alloc(curp->roff, curp,
802 		    curp->options & MPARSE_QUICK ? 1 : 0);
803 
804 	return(curp);
805 }
806 
807 void
808 mparse_reset(struct mparse *curp)
809 {
810 
811 	roff_reset(curp->roff);
812 
813 	if (curp->mdoc)
814 		mdoc_reset(curp->mdoc);
815 	if (curp->man)
816 		man_reset(curp->man);
817 	if (curp->secondary)
818 		curp->secondary->sz = 0;
819 
820 	curp->file_status = MANDOCLEVEL_OK;
821 	curp->mdoc = NULL;
822 	curp->man = NULL;
823 
824 	free(curp->sodest);
825 	curp->sodest = NULL;
826 }
827 
828 void
829 mparse_free(struct mparse *curp)
830 {
831 
832 	if (curp->pmdoc)
833 		mdoc_free(curp->pmdoc);
834 	if (curp->pman)
835 		man_free(curp->pman);
836 	if (curp->roff)
837 		roff_free(curp->roff);
838 	if (curp->secondary)
839 		free(curp->secondary->buf);
840 
841 	free(curp->secondary);
842 	free(curp->sodest);
843 	free(curp);
844 }
845 
846 void
847 mparse_result(struct mparse *curp,
848 	struct mdoc **mdoc, struct man **man, char **sodest)
849 {
850 
851 	if (sodest && NULL != (*sodest = curp->sodest)) {
852 		*mdoc = NULL;
853 		*man = NULL;
854 		return;
855 	}
856 	if (mdoc)
857 		*mdoc = curp->mdoc;
858 	if (man)
859 		*man = curp->man;
860 }
861 
862 void
863 mandoc_vmsg(enum mandocerr t, struct mparse *m,
864 		int ln, int pos, const char *fmt, ...)
865 {
866 	char		 buf[256];
867 	va_list		 ap;
868 
869 	va_start(ap, fmt);
870 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
871 	va_end(ap);
872 
873 	mandoc_msg(t, m, ln, pos, buf);
874 }
875 
876 void
877 mandoc_msg(enum mandocerr er, struct mparse *m,
878 		int ln, int col, const char *msg)
879 {
880 	enum mandoclevel level;
881 
882 	level = MANDOCLEVEL_FATAL;
883 	while (er < mandoclimits[level])
884 		level--;
885 
886 	if (level < m->wlevel)
887 		return;
888 
889 	if (m->mmsg)
890 		(*m->mmsg)(er, level, m->file, ln, col, msg);
891 
892 	if (m->file_status < level)
893 		m->file_status = level;
894 }
895 
896 const char *
897 mparse_strerror(enum mandocerr er)
898 {
899 
900 	return(mandocerrs[er]);
901 }
902 
903 const char *
904 mparse_strlevel(enum mandoclevel lvl)
905 {
906 	return(mandoclevels[lvl]);
907 }
908 
909 void
910 mparse_keep(struct mparse *p)
911 {
912 
913 	assert(NULL == p->secondary);
914 	p->secondary = mandoc_calloc(1, sizeof(struct buf));
915 }
916 
917 const char *
918 mparse_getkeep(const struct mparse *p)
919 {
920 
921 	assert(p->secondary);
922 	return(p->secondary->sz ? p->secondary->buf : NULL);
923 }
924