xref: /openbsd/usr.bin/mandoc/roff.c (revision 3d8817e4)
1 /*	$Id: roff.c,v 1.36 2011/04/24 16:28:48 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <assert.h>
19 #include <errno.h>
20 #include <ctype.h>
21 #include <limits.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <stdio.h>
25 
26 #include "mandoc.h"
27 #include "libroff.h"
28 #include "libmandoc.h"
29 
30 #define	RSTACK_MAX	128
31 
32 enum	rofft {
33 	ROFF_ad,
34 	ROFF_am,
35 	ROFF_ami,
36 	ROFF_am1,
37 	ROFF_de,
38 	ROFF_dei,
39 	ROFF_de1,
40 	ROFF_ds,
41 	ROFF_el,
42 	ROFF_hy,
43 	ROFF_ie,
44 	ROFF_if,
45 	ROFF_ig,
46 	ROFF_it,
47 	ROFF_ne,
48 	ROFF_nh,
49 	ROFF_nr,
50 	ROFF_ns,
51 	ROFF_ps,
52 	ROFF_rm,
53 	ROFF_so,
54 	ROFF_ta,
55 	ROFF_tr,
56 	ROFF_TS,
57 	ROFF_TE,
58 	ROFF_T_,
59 	ROFF_EQ,
60 	ROFF_EN,
61 	ROFF_cblock,
62 	ROFF_ccond, /* FIXME: remove this. */
63 	ROFF_USERDEF,
64 	ROFF_MAX
65 };
66 
67 enum	roffrule {
68 	ROFFRULE_ALLOW,
69 	ROFFRULE_DENY
70 };
71 
72 struct	roffstr {
73 	char		*name; /* key of symbol */
74 	char		*string; /* current value */
75 	struct roffstr	*next; /* next in list */
76 };
77 
78 struct	roff {
79 	struct mparse	*parse; /* parse point */
80 	struct roffnode	*last; /* leaf of stack */
81 	enum roffrule	 rstack[RSTACK_MAX]; /* stack of !`ie' rules */
82 	int		 rstackpos; /* position in rstack */
83 	struct regset	*regs; /* read/writable registers */
84 	struct roffstr	*first_string; /* user-defined strings & macros */
85 	const char	*current_string; /* value of last called user macro */
86 	struct tbl_node	*first_tbl; /* first table parsed */
87 	struct tbl_node	*last_tbl; /* last table parsed */
88 	struct tbl_node	*tbl; /* current table being parsed */
89 	struct eqn_node	*last_eqn; /* last equation parsed */
90 	struct eqn_node	*first_eqn; /* first equation parsed */
91 	struct eqn_node	*eqn; /* current equation being parsed */
92 };
93 
94 struct	roffnode {
95 	enum rofft	 tok; /* type of node */
96 	struct roffnode	*parent; /* up one in stack */
97 	int		 line; /* parse line */
98 	int		 col; /* parse col */
99 	char		*name; /* node name, e.g. macro name */
100 	char		*end; /* end-rules: custom token */
101 	int		 endspan; /* end-rules: next-line or infty */
102 	enum roffrule	 rule; /* current evaluation rule */
103 };
104 
105 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
106 			 enum rofft tok, /* tok of macro */ \
107 		 	 char **bufp, /* input buffer */ \
108 			 size_t *szp, /* size of input buffer */ \
109 			 int ln, /* parse line */ \
110 			 int ppos, /* original pos in buffer */ \
111 			 int pos, /* current pos in buffer */ \
112 			 int *offs /* reset offset of buffer data */
113 
114 typedef	enum rofferr (*roffproc)(ROFF_ARGS);
115 
116 struct	roffmac {
117 	const char	*name; /* macro name */
118 	roffproc	 proc; /* process new macro */
119 	roffproc	 text; /* process as child text of macro */
120 	roffproc	 sub; /* process as child of macro */
121 	int		 flags;
122 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
123 	struct roffmac	*next;
124 };
125 
126 static	enum rofferr	 roff_block(ROFF_ARGS);
127 static	enum rofferr	 roff_block_text(ROFF_ARGS);
128 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
129 static	enum rofferr	 roff_cblock(ROFF_ARGS);
130 static	enum rofferr	 roff_ccond(ROFF_ARGS);
131 static	enum rofferr	 roff_cond(ROFF_ARGS);
132 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
133 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
134 static	enum rofferr	 roff_ds(ROFF_ARGS);
135 static	enum roffrule	 roff_evalcond(const char *, int *);
136 static	void		 roff_freestr(struct roff *);
137 static	char		*roff_getname(struct roff *, char **, int, int);
138 static	const char	*roff_getstrn(const struct roff *,
139 				const char *, size_t);
140 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
141 static	enum rofferr	 roff_nr(ROFF_ARGS);
142 static	int		 roff_res(struct roff *,
143 				char **, size_t *, int);
144 static	enum rofferr	 roff_rm(ROFF_ARGS);
145 static	void		 roff_setstr(struct roff *,
146 				const char *, const char *, int);
147 static	enum rofferr	 roff_so(ROFF_ARGS);
148 static	enum rofferr	 roff_TE(ROFF_ARGS);
149 static	enum rofferr	 roff_TS(ROFF_ARGS);
150 static	enum rofferr	 roff_EQ(ROFF_ARGS);
151 static	enum rofferr	 roff_EN(ROFF_ARGS);
152 static	enum rofferr	 roff_T_(ROFF_ARGS);
153 static	enum rofferr	 roff_userdef(ROFF_ARGS);
154 
155 /* See roff_hash_find() */
156 
157 #define	ASCII_HI	 126
158 #define	ASCII_LO	 33
159 #define	HASHWIDTH	(ASCII_HI - ASCII_LO + 1)
160 
161 static	struct roffmac	*hash[HASHWIDTH];
162 
163 static	struct roffmac	 roffs[ROFF_MAX] = {
164 	{ "ad", roff_line_ignore, NULL, NULL, 0, NULL },
165 	{ "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
166 	{ "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
167 	{ "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
168 	{ "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
169 	{ "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
170 	{ "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
171 	{ "ds", roff_ds, NULL, NULL, 0, NULL },
172 	{ "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
173 	{ "hy", roff_line_ignore, NULL, NULL, 0, NULL },
174 	{ "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
175 	{ "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
176 	{ "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
177 	{ "it", roff_line_ignore, NULL, NULL, 0, NULL },
178 	{ "ne", roff_line_ignore, NULL, NULL, 0, NULL },
179 	{ "nh", roff_line_ignore, NULL, NULL, 0, NULL },
180 	{ "nr", roff_nr, NULL, NULL, 0, NULL },
181 	{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
182 	{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
183 	{ "rm", roff_rm, NULL, NULL, 0, NULL },
184 	{ "so", roff_so, NULL, NULL, 0, NULL },
185 	{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
186 	{ "tr", roff_line_ignore, NULL, NULL, 0, NULL },
187 	{ "TS", roff_TS, NULL, NULL, 0, NULL },
188 	{ "TE", roff_TE, NULL, NULL, 0, NULL },
189 	{ "T&", roff_T_, NULL, NULL, 0, NULL },
190 	{ "EQ", roff_EQ, NULL, NULL, 0, NULL },
191 	{ "EN", roff_EN, NULL, NULL, 0, NULL },
192 	{ ".", roff_cblock, NULL, NULL, 0, NULL },
193 	{ "\\}", roff_ccond, NULL, NULL, 0, NULL },
194 	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
195 };
196 
197 static	void		 roff_free1(struct roff *);
198 static	enum rofft	 roff_hash_find(const char *, size_t);
199 static	void		 roff_hash_init(void);
200 static	void		 roffnode_cleanscope(struct roff *);
201 static	void		 roffnode_push(struct roff *, enum rofft,
202 				const char *, int, int);
203 static	void		 roffnode_pop(struct roff *);
204 static	enum rofft	 roff_parse(struct roff *, const char *, int *);
205 static	int		 roff_parse_nat(const char *, unsigned int *);
206 
207 /* See roff_hash_find() */
208 #define	ROFF_HASH(p)	(p[0] - ASCII_LO)
209 
210 static void
211 roff_hash_init(void)
212 {
213 	struct roffmac	 *n;
214 	int		  buc, i;
215 
216 	for (i = 0; i < (int)ROFF_USERDEF; i++) {
217 		assert(roffs[i].name[0] >= ASCII_LO);
218 		assert(roffs[i].name[0] <= ASCII_HI);
219 
220 		buc = ROFF_HASH(roffs[i].name);
221 
222 		if (NULL != (n = hash[buc])) {
223 			for ( ; n->next; n = n->next)
224 				/* Do nothing. */ ;
225 			n->next = &roffs[i];
226 		} else
227 			hash[buc] = &roffs[i];
228 	}
229 }
230 
231 
232 /*
233  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
234  * the nil-terminated string name could be found.
235  */
236 static enum rofft
237 roff_hash_find(const char *p, size_t s)
238 {
239 	int		 buc;
240 	struct roffmac	*n;
241 
242 	/*
243 	 * libroff has an extremely simple hashtable, for the time
244 	 * being, which simply keys on the first character, which must
245 	 * be printable, then walks a chain.  It works well enough until
246 	 * optimised.
247 	 */
248 
249 	if (p[0] < ASCII_LO || p[0] > ASCII_HI)
250 		return(ROFF_MAX);
251 
252 	buc = ROFF_HASH(p);
253 
254 	if (NULL == (n = hash[buc]))
255 		return(ROFF_MAX);
256 	for ( ; n; n = n->next)
257 		if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
258 			return((enum rofft)(n - roffs));
259 
260 	return(ROFF_MAX);
261 }
262 
263 
264 /*
265  * Pop the current node off of the stack of roff instructions currently
266  * pending.
267  */
268 static void
269 roffnode_pop(struct roff *r)
270 {
271 	struct roffnode	*p;
272 
273 	assert(r->last);
274 	p = r->last;
275 
276 	r->last = r->last->parent;
277 	free(p->name);
278 	free(p->end);
279 	free(p);
280 }
281 
282 
283 /*
284  * Push a roff node onto the instruction stack.  This must later be
285  * removed with roffnode_pop().
286  */
287 static void
288 roffnode_push(struct roff *r, enum rofft tok, const char *name,
289 		int line, int col)
290 {
291 	struct roffnode	*p;
292 
293 	p = mandoc_calloc(1, sizeof(struct roffnode));
294 	p->tok = tok;
295 	if (name)
296 		p->name = mandoc_strdup(name);
297 	p->parent = r->last;
298 	p->line = line;
299 	p->col = col;
300 	p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
301 
302 	r->last = p;
303 }
304 
305 
306 static void
307 roff_free1(struct roff *r)
308 {
309 	struct tbl_node	*t;
310 	struct eqn_node	*e;
311 
312 	while (NULL != (t = r->first_tbl)) {
313 		r->first_tbl = t->next;
314 		tbl_free(t);
315 	}
316 
317 	r->first_tbl = r->last_tbl = r->tbl = NULL;
318 
319 	while (NULL != (e = r->first_eqn)) {
320 		r->first_eqn = e->next;
321 		eqn_free(e);
322 	}
323 
324 	r->first_eqn = r->last_eqn = r->eqn = NULL;
325 
326 	while (r->last)
327 		roffnode_pop(r);
328 
329 	roff_freestr(r);
330 }
331 
332 
333 void
334 roff_reset(struct roff *r)
335 {
336 
337 	roff_free1(r);
338 }
339 
340 
341 void
342 roff_free(struct roff *r)
343 {
344 
345 	roff_free1(r);
346 	free(r);
347 }
348 
349 
350 struct roff *
351 roff_alloc(struct regset *regs, struct mparse *parse)
352 {
353 	struct roff	*r;
354 
355 	r = mandoc_calloc(1, sizeof(struct roff));
356 	r->regs = regs;
357 	r->parse = parse;
358 	r->rstackpos = -1;
359 
360 	roff_hash_init();
361 	return(r);
362 }
363 
364 
365 /*
366  * Pre-filter each and every line for reserved words (one beginning with
367  * `\*', e.g., `\*(ab').  These must be handled before the actual line
368  * is processed.
369  */
370 static int
371 roff_res(struct roff *r, char **bufp, size_t *szp, int pos)
372 {
373 	const char	*stesc;	/* start of an escape sequence ('\\') */
374 	const char	*stnam;	/* start of the name, after "[(*" */
375 	const char	*cp;	/* end of the name, e.g. before ']' */
376 	const char	*res;	/* the string to be substituted */
377 	int		 i, maxl;
378 	size_t		 nsz;
379 	char		*n;
380 
381 	/* Search for a leading backslash and save a pointer to it. */
382 
383 	cp = *bufp + pos;
384 	while (NULL != (cp = strchr(cp, '\\'))) {
385 		stesc = cp++;
386 
387 		/*
388 		 * The second character must be an asterisk.
389 		 * If it isn't, skip it anyway:  It is escaped,
390 		 * so it can't start another escape sequence.
391 		 */
392 
393 		if ('\0' == *cp)
394 			return(1);
395 		if ('*' != *cp++)
396 			continue;
397 
398 		/*
399 		 * The third character decides the length
400 		 * of the name of the string.
401 		 * Save a pointer to the name.
402 		 */
403 
404 		switch (*cp) {
405 		case ('\0'):
406 			return(1);
407 		case ('('):
408 			cp++;
409 			maxl = 2;
410 			break;
411 		case ('['):
412 			cp++;
413 			maxl = 0;
414 			break;
415 		default:
416 			maxl = 1;
417 			break;
418 		}
419 		stnam = cp;
420 
421 		/* Advance to the end of the name. */
422 
423 		for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
424 			if ('\0' == *cp)
425 				return(1); /* Error. */
426 			if (0 == maxl && ']' == *cp)
427 				break;
428 		}
429 
430 		/*
431 		 * Retrieve the replacement string; if it is
432 		 * undefined, resume searching for escapes.
433 		 */
434 
435 		res = roff_getstrn(r, stnam, (size_t)i);
436 
437 		if (NULL == res) {
438 			cp -= maxl ? 1 : 0;
439 			continue;
440 		}
441 
442 		/* Replace the escape sequence by the string. */
443 
444 		nsz = *szp + strlen(res) + 1;
445 		n = mandoc_malloc(nsz);
446 
447 		strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
448 		strlcat(n, res, nsz);
449 		strlcat(n, cp + (maxl ? 0 : 1), nsz);
450 
451 		free(*bufp);
452 
453 		*bufp = n;
454 		*szp = nsz;
455 		return(0);
456 	}
457 
458 	return(1);
459 }
460 
461 
462 enum rofferr
463 roff_parseln(struct roff *r, int ln, char **bufp,
464 		size_t *szp, int pos, int *offs)
465 {
466 	enum rofft	 t;
467 	enum rofferr	 e;
468 	int		 ppos, ctl;
469 
470 	/*
471 	 * Run the reserved-word filter only if we have some reserved
472 	 * words to fill in.
473 	 */
474 
475 	if (r->first_string && ! roff_res(r, bufp, szp, pos))
476 		return(ROFF_REPARSE);
477 
478 	ppos = pos;
479 	ctl = mandoc_getcontrol(*bufp, &pos);
480 
481 	/*
482 	 * First, if a scope is open and we're not a macro, pass the
483 	 * text through the macro's filter.  If a scope isn't open and
484 	 * we're not a macro, just let it through.
485 	 * Finally, if there's an equation scope open, divert it into it
486 	 * no matter our state.
487 	 */
488 
489 	if (r->last && ! ctl) {
490 		t = r->last->tok;
491 		assert(roffs[t].text);
492 		e = (*roffs[t].text)
493 			(r, t, bufp, szp, ln, pos, pos, offs);
494 		assert(ROFF_IGN == e || ROFF_CONT == e);
495 		if (ROFF_CONT != e)
496 			return(e);
497 		if (r->eqn)
498 			return(eqn_read(&r->eqn, ln, *bufp, pos));
499 		if (r->tbl)
500 			return(tbl_read(r->tbl, ln, *bufp, pos));
501 		return(ROFF_CONT);
502 	} else if ( ! ctl) {
503 		if (r->eqn)
504 			return(eqn_read(&r->eqn, ln, *bufp, pos));
505 		if (r->tbl)
506 			return(tbl_read(r->tbl, ln, *bufp, pos));
507 		return(ROFF_CONT);
508 	} else if (r->eqn)
509 		return(eqn_read(&r->eqn, ln, *bufp, ppos));
510 
511 	/*
512 	 * If a scope is open, go to the child handler for that macro,
513 	 * as it may want to preprocess before doing anything with it.
514 	 * Don't do so if an equation is open.
515 	 */
516 
517 	if (r->last) {
518 		t = r->last->tok;
519 		assert(roffs[t].sub);
520 		return((*roffs[t].sub)
521 				(r, t, bufp, szp,
522 				 ln, ppos, pos, offs));
523 	}
524 
525 	/*
526 	 * Lastly, as we've no scope open, try to look up and execute
527 	 * the new macro.  If no macro is found, simply return and let
528 	 * the compilers handle it.
529 	 */
530 
531 	if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
532 		return(ROFF_CONT);
533 
534 	assert(roffs[t].proc);
535 	return((*roffs[t].proc)
536 			(r, t, bufp, szp,
537 			 ln, ppos, pos, offs));
538 }
539 
540 
541 void
542 roff_endparse(struct roff *r)
543 {
544 
545 	if (r->last)
546 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
547 				r->last->line, r->last->col, NULL);
548 
549 	if (r->eqn) {
550 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
551 				r->eqn->eqn.line, r->eqn->eqn.pos, NULL);
552 		eqn_end(r->eqn);
553 		r->eqn = NULL;
554 	}
555 
556 	if (r->tbl) {
557 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
558 				r->tbl->line, r->tbl->pos, NULL);
559 		tbl_end(r->tbl);
560 		r->tbl = NULL;
561 	}
562 }
563 
564 /*
565  * Parse a roff node's type from the input buffer.  This must be in the
566  * form of ".foo xxx" in the usual way.
567  */
568 static enum rofft
569 roff_parse(struct roff *r, const char *buf, int *pos)
570 {
571 	const char	*mac;
572 	size_t		 maclen;
573 	enum rofft	 t;
574 
575 	if ('\0' == buf[*pos] || '"' == buf[*pos])
576 		return(ROFF_MAX);
577 
578 	mac = buf + *pos;
579 	maclen = strcspn(mac, " \\\t\0");
580 
581 	t = (r->current_string = roff_getstrn(r, mac, maclen))
582 	    ? ROFF_USERDEF : roff_hash_find(mac, maclen);
583 
584 	*pos += (int)maclen;
585 
586 	while (buf[*pos] && ' ' == buf[*pos])
587 		(*pos)++;
588 
589 	return(t);
590 }
591 
592 
593 static int
594 roff_parse_nat(const char *buf, unsigned int *res)
595 {
596 	char		*ep;
597 	long		 lval;
598 
599 	errno = 0;
600 	lval = strtol(buf, &ep, 10);
601 	if (buf[0] == '\0' || *ep != '\0')
602 		return(0);
603 	if ((errno == ERANGE &&
604 			(lval == LONG_MAX || lval == LONG_MIN)) ||
605 			(lval > INT_MAX || lval < 0))
606 		return(0);
607 
608 	*res = (unsigned int)lval;
609 	return(1);
610 }
611 
612 
613 /* ARGSUSED */
614 static enum rofferr
615 roff_cblock(ROFF_ARGS)
616 {
617 
618 	/*
619 	 * A block-close `..' should only be invoked as a child of an
620 	 * ignore macro, otherwise raise a warning and just ignore it.
621 	 */
622 
623 	if (NULL == r->last) {
624 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
625 		return(ROFF_IGN);
626 	}
627 
628 	switch (r->last->tok) {
629 	case (ROFF_am):
630 		/* FALLTHROUGH */
631 	case (ROFF_ami):
632 		/* FALLTHROUGH */
633 	case (ROFF_am1):
634 		/* FALLTHROUGH */
635 	case (ROFF_de):
636 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
637 		/* FALLTHROUGH */
638 	case (ROFF_dei):
639 		/* FALLTHROUGH */
640 	case (ROFF_ig):
641 		break;
642 	default:
643 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
644 		return(ROFF_IGN);
645 	}
646 
647 	if ((*bufp)[pos])
648 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
649 
650 	roffnode_pop(r);
651 	roffnode_cleanscope(r);
652 	return(ROFF_IGN);
653 
654 }
655 
656 
657 static void
658 roffnode_cleanscope(struct roff *r)
659 {
660 
661 	while (r->last) {
662 		if (--r->last->endspan < 0)
663 			break;
664 		roffnode_pop(r);
665 	}
666 }
667 
668 
669 /* ARGSUSED */
670 static enum rofferr
671 roff_ccond(ROFF_ARGS)
672 {
673 
674 	if (NULL == r->last) {
675 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
676 		return(ROFF_IGN);
677 	}
678 
679 	switch (r->last->tok) {
680 	case (ROFF_el):
681 		/* FALLTHROUGH */
682 	case (ROFF_ie):
683 		/* FALLTHROUGH */
684 	case (ROFF_if):
685 		break;
686 	default:
687 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
688 		return(ROFF_IGN);
689 	}
690 
691 	if (r->last->endspan > -1) {
692 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
693 		return(ROFF_IGN);
694 	}
695 
696 	if ((*bufp)[pos])
697 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
698 
699 	roffnode_pop(r);
700 	roffnode_cleanscope(r);
701 	return(ROFF_IGN);
702 }
703 
704 
705 /* ARGSUSED */
706 static enum rofferr
707 roff_block(ROFF_ARGS)
708 {
709 	int		sv;
710 	size_t		sz;
711 	char		*name;
712 
713 	name = NULL;
714 
715 	if (ROFF_ig != tok) {
716 		if ('\0' == (*bufp)[pos]) {
717 			mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
718 			return(ROFF_IGN);
719 		}
720 
721 		/*
722 		 * Re-write `de1', since we don't really care about
723 		 * groff's strange compatibility mode, into `de'.
724 		 */
725 
726 		if (ROFF_de1 == tok)
727 			tok = ROFF_de;
728 		if (ROFF_de == tok)
729 			name = *bufp + pos;
730 		else
731 			mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
732 			    roffs[tok].name);
733 
734 		while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
735 			pos++;
736 
737 		while (isspace((unsigned char)(*bufp)[pos]))
738 			(*bufp)[pos++] = '\0';
739 	}
740 
741 	roffnode_push(r, tok, name, ln, ppos);
742 
743 	/*
744 	 * At the beginning of a `de' macro, clear the existing string
745 	 * with the same name, if there is one.  New content will be
746 	 * added from roff_block_text() in multiline mode.
747 	 */
748 
749 	if (ROFF_de == tok)
750 		roff_setstr(r, name, "", 0);
751 
752 	if ('\0' == (*bufp)[pos])
753 		return(ROFF_IGN);
754 
755 	/* If present, process the custom end-of-line marker. */
756 
757 	sv = pos;
758 	while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
759 		pos++;
760 
761 	/*
762 	 * Note: groff does NOT like escape characters in the input.
763 	 * Instead of detecting this, we're just going to let it fly and
764 	 * to hell with it.
765 	 */
766 
767 	assert(pos > sv);
768 	sz = (size_t)(pos - sv);
769 
770 	if (1 == sz && '.' == (*bufp)[sv])
771 		return(ROFF_IGN);
772 
773 	r->last->end = mandoc_malloc(sz + 1);
774 
775 	memcpy(r->last->end, *bufp + sv, sz);
776 	r->last->end[(int)sz] = '\0';
777 
778 	if ((*bufp)[pos])
779 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
780 
781 	return(ROFF_IGN);
782 }
783 
784 
785 /* ARGSUSED */
786 static enum rofferr
787 roff_block_sub(ROFF_ARGS)
788 {
789 	enum rofft	t;
790 	int		i, j;
791 
792 	/*
793 	 * First check whether a custom macro exists at this level.  If
794 	 * it does, then check against it.  This is some of groff's
795 	 * stranger behaviours.  If we encountered a custom end-scope
796 	 * tag and that tag also happens to be a "real" macro, then we
797 	 * need to try interpreting it again as a real macro.  If it's
798 	 * not, then return ignore.  Else continue.
799 	 */
800 
801 	if (r->last->end) {
802 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
803 			if ((*bufp)[i] != r->last->end[j])
804 				break;
805 
806 		if ('\0' == r->last->end[j] &&
807 				('\0' == (*bufp)[i] ||
808 				 ' ' == (*bufp)[i] ||
809 				 '\t' == (*bufp)[i])) {
810 			roffnode_pop(r);
811 			roffnode_cleanscope(r);
812 
813 			while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
814 				i++;
815 
816 			pos = i;
817 			if (ROFF_MAX != roff_parse(r, *bufp, &pos))
818 				return(ROFF_RERUN);
819 			return(ROFF_IGN);
820 		}
821 	}
822 
823 	/*
824 	 * If we have no custom end-query or lookup failed, then try
825 	 * pulling it out of the hashtable.
826 	 */
827 
828 	t = roff_parse(r, *bufp, &pos);
829 
830 	/*
831 	 * Macros other than block-end are only significant
832 	 * in `de' blocks; elsewhere, simply throw them away.
833 	 */
834 	if (ROFF_cblock != t) {
835 		if (ROFF_de == tok)
836 			roff_setstr(r, r->last->name, *bufp + ppos, 1);
837 		return(ROFF_IGN);
838 	}
839 
840 	assert(roffs[t].proc);
841 	return((*roffs[t].proc)(r, t, bufp, szp,
842 				ln, ppos, pos, offs));
843 }
844 
845 
846 /* ARGSUSED */
847 static enum rofferr
848 roff_block_text(ROFF_ARGS)
849 {
850 
851 	if (ROFF_de == tok)
852 		roff_setstr(r, r->last->name, *bufp + pos, 1);
853 
854 	return(ROFF_IGN);
855 }
856 
857 
858 /* ARGSUSED */
859 static enum rofferr
860 roff_cond_sub(ROFF_ARGS)
861 {
862 	enum rofft	 t;
863 	enum roffrule	 rr;
864 
865 	rr = r->last->rule;
866 
867 	/*
868 	 * Clean out scope.  If we've closed ourselves, then don't
869 	 * continue.
870 	 */
871 
872 	roffnode_cleanscope(r);
873 
874 	if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos))) {
875 		if ('\\' == (*bufp)[pos] && '}' == (*bufp)[pos + 1])
876 			return(roff_ccond
877 				(r, ROFF_ccond, bufp, szp,
878 				 ln, pos, pos + 2, offs));
879 		return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
880 	}
881 
882 	/*
883 	 * A denied conditional must evaluate its children if and only
884 	 * if they're either structurally required (such as loops and
885 	 * conditionals) or a closing macro.
886 	 */
887 	if (ROFFRULE_DENY == rr)
888 		if ( ! (ROFFMAC_STRUCT & roffs[t].flags))
889 			if (ROFF_ccond != t)
890 				return(ROFF_IGN);
891 
892 	assert(roffs[t].proc);
893 	return((*roffs[t].proc)(r, t, bufp, szp,
894 				ln, ppos, pos, offs));
895 }
896 
897 
898 /* ARGSUSED */
899 static enum rofferr
900 roff_cond_text(ROFF_ARGS)
901 {
902 	char		*ep, *st;
903 	enum roffrule	 rr;
904 
905 	rr = r->last->rule;
906 
907 	/*
908 	 * We display the value of the text if out current evaluation
909 	 * scope permits us to do so.
910 	 */
911 
912 	/* FIXME: use roff_ccond? */
913 
914 	st = &(*bufp)[pos];
915 	if (NULL == (ep = strstr(st, "\\}"))) {
916 		roffnode_cleanscope(r);
917 		return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
918 	}
919 
920 	if (ep == st || (ep > st && '\\' != *(ep - 1)))
921 		roffnode_pop(r);
922 
923 	roffnode_cleanscope(r);
924 	return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
925 }
926 
927 
928 static enum roffrule
929 roff_evalcond(const char *v, int *pos)
930 {
931 
932 	switch (v[*pos]) {
933 	case ('n'):
934 		(*pos)++;
935 		return(ROFFRULE_ALLOW);
936 	case ('e'):
937 		/* FALLTHROUGH */
938 	case ('o'):
939 		/* FALLTHROUGH */
940 	case ('t'):
941 		(*pos)++;
942 		return(ROFFRULE_DENY);
943 	default:
944 		break;
945 	}
946 
947 	while (v[*pos] && ' ' != v[*pos])
948 		(*pos)++;
949 	return(ROFFRULE_DENY);
950 }
951 
952 /* ARGSUSED */
953 static enum rofferr
954 roff_line_ignore(ROFF_ARGS)
955 {
956 
957 	if (ROFF_it == tok)
958 		mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos, "it");
959 
960 	return(ROFF_IGN);
961 }
962 
963 /* ARGSUSED */
964 static enum rofferr
965 roff_cond(ROFF_ARGS)
966 {
967 	int		 sv;
968 	enum roffrule	 rule;
969 
970 	/*
971 	 * An `.el' has no conditional body: it will consume the value
972 	 * of the current rstack entry set in prior `ie' calls or
973 	 * defaults to DENY.
974 	 *
975 	 * If we're not an `el', however, then evaluate the conditional.
976 	 */
977 
978 	rule = ROFF_el == tok ?
979 		(r->rstackpos < 0 ?
980 		 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
981 		roff_evalcond(*bufp, &pos);
982 
983 	sv = pos;
984 	while (' ' == (*bufp)[pos])
985 		pos++;
986 
987 	/*
988 	 * Roff is weird.  If we have just white-space after the
989 	 * conditional, it's considered the BODY and we exit without
990 	 * really doing anything.  Warn about this.  It's probably
991 	 * wrong.
992 	 */
993 
994 	if ('\0' == (*bufp)[pos] && sv != pos) {
995 		mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
996 		return(ROFF_IGN);
997 	}
998 
999 	roffnode_push(r, tok, NULL, ln, ppos);
1000 
1001 	r->last->rule = rule;
1002 
1003 	/*
1004 	 * An if-else will put the NEGATION of the current evaluated
1005 	 * conditional into the stack of rules.
1006 	 */
1007 
1008 	if (ROFF_ie == tok) {
1009 		if (r->rstackpos == RSTACK_MAX - 1) {
1010 			mandoc_msg(MANDOCERR_MEM,
1011 				r->parse, ln, ppos, NULL);
1012 			return(ROFF_ERR);
1013 		}
1014 		r->rstack[++r->rstackpos] =
1015 			ROFFRULE_DENY == r->last->rule ?
1016 			ROFFRULE_ALLOW : ROFFRULE_DENY;
1017 	}
1018 
1019 	/* If the parent has false as its rule, then so do we. */
1020 
1021 	if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1022 		r->last->rule = ROFFRULE_DENY;
1023 
1024 	/*
1025 	 * Determine scope.  If we're invoked with "\{" trailing the
1026 	 * conditional, then we're in a multiline scope.  Else our scope
1027 	 * expires on the next line.
1028 	 */
1029 
1030 	r->last->endspan = 1;
1031 
1032 	if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1033 		r->last->endspan = -1;
1034 		pos += 2;
1035 	}
1036 
1037 	/*
1038 	 * If there are no arguments on the line, the next-line scope is
1039 	 * assumed.
1040 	 */
1041 
1042 	if ('\0' == (*bufp)[pos])
1043 		return(ROFF_IGN);
1044 
1045 	/* Otherwise re-run the roff parser after recalculating. */
1046 
1047 	*offs = pos;
1048 	return(ROFF_RERUN);
1049 }
1050 
1051 
1052 /* ARGSUSED */
1053 static enum rofferr
1054 roff_ds(ROFF_ARGS)
1055 {
1056 	char		*name, *string;
1057 
1058 	/*
1059 	 * A symbol is named by the first word following the macro
1060 	 * invocation up to a space.  Its value is anything after the
1061 	 * name's trailing whitespace and optional double-quote.  Thus,
1062 	 *
1063 	 *  [.ds foo "bar  "     ]
1064 	 *
1065 	 * will have `bar  "     ' as its value.
1066 	 */
1067 
1068 	string = *bufp + pos;
1069 	name = roff_getname(r, &string, ln, pos);
1070 	if ('\0' == *name)
1071 		return(ROFF_IGN);
1072 
1073 	/* Read past initial double-quote. */
1074 	if ('"' == *string)
1075 		string++;
1076 
1077 	/* The rest is the value. */
1078 	roff_setstr(r, name, string, 0);
1079 	return(ROFF_IGN);
1080 }
1081 
1082 
1083 /* ARGSUSED */
1084 static enum rofferr
1085 roff_nr(ROFF_ARGS)
1086 {
1087 	const char	*key;
1088 	char		*val;
1089 	struct reg	*rg;
1090 
1091 	val = *bufp + pos;
1092 	key = roff_getname(r, &val, ln, pos);
1093 	rg = r->regs->regs;
1094 
1095 	if (0 == strcmp(key, "nS")) {
1096 		rg[(int)REG_nS].set = 1;
1097 		if ( ! roff_parse_nat(val, &rg[(int)REG_nS].v.u))
1098 			rg[(int)REG_nS].v.u = 0;
1099 	}
1100 
1101 	return(ROFF_IGN);
1102 }
1103 
1104 /* ARGSUSED */
1105 static enum rofferr
1106 roff_rm(ROFF_ARGS)
1107 {
1108 	const char	 *name;
1109 	char		 *cp;
1110 
1111 	cp = *bufp + pos;
1112 	while ('\0' != *cp) {
1113 		name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1114 		if ('\0' != *name)
1115 			roff_setstr(r, name, NULL, 0);
1116 	}
1117 	return(ROFF_IGN);
1118 }
1119 
1120 /* ARGSUSED */
1121 static enum rofferr
1122 roff_TE(ROFF_ARGS)
1123 {
1124 
1125 	if (NULL == r->tbl)
1126 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1127 	else
1128 		tbl_end(r->tbl);
1129 
1130 	r->tbl = NULL;
1131 	return(ROFF_IGN);
1132 }
1133 
1134 /* ARGSUSED */
1135 static enum rofferr
1136 roff_T_(ROFF_ARGS)
1137 {
1138 
1139 	if (NULL == r->tbl)
1140 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1141 	else
1142 		tbl_restart(ppos, ln, r->tbl);
1143 
1144 	return(ROFF_IGN);
1145 }
1146 
1147 /* ARGSUSED */
1148 static enum rofferr
1149 roff_EQ(ROFF_ARGS)
1150 {
1151 	struct eqn_node	*e;
1152 
1153 	assert(NULL == r->eqn);
1154 	e = eqn_alloc(ppos, ln);
1155 
1156 	if (r->last_eqn)
1157 		r->last_eqn->next = e;
1158 	else
1159 		r->first_eqn = r->last_eqn = e;
1160 
1161 	r->eqn = r->last_eqn = e;
1162 	return(ROFF_IGN);
1163 }
1164 
1165 /* ARGSUSED */
1166 static enum rofferr
1167 roff_EN(ROFF_ARGS)
1168 {
1169 
1170 	mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1171 	return(ROFF_IGN);
1172 }
1173 
1174 /* ARGSUSED */
1175 static enum rofferr
1176 roff_TS(ROFF_ARGS)
1177 {
1178 	struct tbl_node	*t;
1179 
1180 	if (r->tbl) {
1181 		mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1182 		tbl_end(r->tbl);
1183 	}
1184 
1185 	t = tbl_alloc(ppos, ln, r->parse);
1186 
1187 	if (r->last_tbl)
1188 		r->last_tbl->next = t;
1189 	else
1190 		r->first_tbl = r->last_tbl = t;
1191 
1192 	r->tbl = r->last_tbl = t;
1193 	return(ROFF_IGN);
1194 }
1195 
1196 /* ARGSUSED */
1197 static enum rofferr
1198 roff_so(ROFF_ARGS)
1199 {
1200 	char *name;
1201 
1202 	mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1203 
1204 	/*
1205 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
1206 	 * opening anything that's not in our cwd or anything beneath
1207 	 * it.  Thus, explicitly disallow traversing up the file-system
1208 	 * or using absolute paths.
1209 	 */
1210 
1211 	name = *bufp + pos;
1212 	if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1213 		mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1214 		return(ROFF_ERR);
1215 	}
1216 
1217 	*offs = pos;
1218 	return(ROFF_SO);
1219 }
1220 
1221 /* ARGSUSED */
1222 static enum rofferr
1223 roff_userdef(ROFF_ARGS)
1224 {
1225 	const char	 *arg[9];
1226 	char		 *cp, *n1, *n2;
1227 	int		  i;
1228 
1229 	/*
1230 	 * Collect pointers to macro argument strings
1231 	 * and null-terminate them.
1232 	 */
1233 	cp = *bufp + pos;
1234 	for (i = 0; i < 9; i++)
1235 		arg[i] = '\0' == *cp ? "" :
1236 		    mandoc_getarg(r->parse, &cp, ln, &pos);
1237 
1238 	/*
1239 	 * Expand macro arguments.
1240 	 */
1241 	*szp = 0;
1242 	n1 = cp = mandoc_strdup(r->current_string);
1243 	while (NULL != (cp = strstr(cp, "\\$"))) {
1244 		i = cp[2] - '1';
1245 		if (0 > i || 8 < i) {
1246 			/* Not an argument invocation. */
1247 			cp += 2;
1248 			continue;
1249 		}
1250 
1251 		*szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1252 		n2 = mandoc_malloc(*szp);
1253 
1254 		strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1255 		strlcat(n2, arg[i], *szp);
1256 		strlcat(n2, cp + 3, *szp);
1257 
1258 		cp = n2 + (cp - n1);
1259 		free(n1);
1260 		n1 = n2;
1261 	}
1262 
1263 	/*
1264 	 * Replace the macro invocation
1265 	 * by the expanded macro.
1266 	 */
1267 	free(*bufp);
1268 	*bufp = n1;
1269 	if (0 == *szp)
1270 		*szp = strlen(*bufp) + 1;
1271 
1272 	return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1273 	   ROFF_REPARSE : ROFF_APPEND);
1274 }
1275 
1276 static char *
1277 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1278 {
1279 	char	 *name, *cp;
1280 
1281 	name = *cpp;
1282 	if ('\0' == *name)
1283 		return(name);
1284 
1285 	/* Read until end of name. */
1286 	for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1287 		if ('\\' != *cp)
1288 			continue;
1289 		cp++;
1290 		if ('\\' == *cp)
1291 			continue;
1292 		mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1293 		*cp = '\0';
1294 		name = cp;
1295 	}
1296 
1297 	/* Nil-terminate name. */
1298 	if ('\0' != *cp)
1299 		*(cp++) = '\0';
1300 
1301 	/* Read past spaces. */
1302 	while (' ' == *cp)
1303 		cp++;
1304 
1305 	*cpp = cp;
1306 	return(name);
1307 }
1308 
1309 /*
1310  * Store *string into the user-defined string called *name.
1311  * In multiline mode, append to an existing entry and append '\n';
1312  * else replace the existing entry, if there is one.
1313  * To clear an existing entry, call with (*r, *name, NULL, 0).
1314  */
1315 static void
1316 roff_setstr(struct roff *r, const char *name, const char *string,
1317 	int multiline)
1318 {
1319 	struct roffstr	 *n;
1320 	char		 *c;
1321 	size_t		  oldch, newch;
1322 
1323 	/* Search for an existing string with the same name. */
1324 	n = r->first_string;
1325 	while (n && strcmp(name, n->name))
1326 		n = n->next;
1327 
1328 	if (NULL == n) {
1329 		/* Create a new string table entry. */
1330 		n = mandoc_malloc(sizeof(struct roffstr));
1331 		n->name = mandoc_strdup(name);
1332 		n->string = NULL;
1333 		n->next = r->first_string;
1334 		r->first_string = n;
1335 	} else if (0 == multiline) {
1336 		/* In multiline mode, append; else replace. */
1337 		free(n->string);
1338 		n->string = NULL;
1339 	}
1340 
1341 	if (NULL == string)
1342 		return;
1343 
1344 	/*
1345 	 * One additional byte for the '\n' in multiline mode,
1346 	 * and one for the terminating '\0'.
1347 	 */
1348 	newch = strlen(string) + (multiline ? 2u : 1u);
1349 	if (NULL == n->string) {
1350 		n->string = mandoc_malloc(newch);
1351 		*n->string = '\0';
1352 		oldch = 0;
1353 	} else {
1354 		oldch = strlen(n->string);
1355 		n->string = mandoc_realloc(n->string, oldch + newch);
1356 	}
1357 
1358 	/* Skip existing content in the destination buffer. */
1359 	c = n->string + (int)oldch;
1360 
1361 	/* Append new content to the destination buffer. */
1362 	while (*string) {
1363 		/*
1364 		 * Rudimentary roff copy mode:
1365 		 * Handle escaped backslashes.
1366 		 */
1367 		if ('\\' == *string && '\\' == *(string + 1))
1368 			string++;
1369 		*c++ = *string++;
1370 	}
1371 
1372 	/* Append terminating bytes. */
1373 	if (multiline)
1374 		*c++ = '\n';
1375 	*c = '\0';
1376 }
1377 
1378 static const char *
1379 roff_getstrn(const struct roff *r, const char *name, size_t len)
1380 {
1381 	const struct roffstr *n;
1382 
1383 	n = r->first_string;
1384 	while (n && (strncmp(name, n->name, len) || '\0' != n->name[(int)len]))
1385 		n = n->next;
1386 
1387 	return(n ? n->string : NULL);
1388 }
1389 
1390 static void
1391 roff_freestr(struct roff *r)
1392 {
1393 	struct roffstr	 *n, *nn;
1394 
1395 	for (n = r->first_string; n; n = nn) {
1396 		free(n->name);
1397 		free(n->string);
1398 		nn = n->next;
1399 		free(n);
1400 	}
1401 
1402 	r->first_string = NULL;
1403 }
1404 
1405 const struct tbl_span *
1406 roff_span(const struct roff *r)
1407 {
1408 
1409 	return(r->tbl ? tbl_span(r->tbl) : NULL);
1410 }
1411 
1412 const struct eqn *
1413 roff_eqn(const struct roff *r)
1414 {
1415 
1416 	return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1417 }
1418