xref: /dragonfly/contrib/mdocml/roff.c (revision 938e74dc)
1 /*	$Id: roff.c,v 1.188 2013/12/25 00:50:05 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 #include "mandoc.h"
29 #include "libroff.h"
30 #include "libmandoc.h"
31 
32 /* Maximum number of nested if-else conditionals. */
33 #define	RSTACK_MAX	128
34 
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define	EXPAND_LIMIT	1000
37 
38 enum	rofft {
39 	ROFF_ad,
40 	ROFF_am,
41 	ROFF_ami,
42 	ROFF_am1,
43 	ROFF_cc,
44 	ROFF_de,
45 	ROFF_dei,
46 	ROFF_de1,
47 	ROFF_ds,
48 	ROFF_el,
49 	ROFF_fam,
50 	ROFF_hw,
51 	ROFF_hy,
52 	ROFF_ie,
53 	ROFF_if,
54 	ROFF_ig,
55 	ROFF_it,
56 	ROFF_ne,
57 	ROFF_nh,
58 	ROFF_nr,
59 	ROFF_ns,
60 	ROFF_ps,
61 	ROFF_rm,
62 	ROFF_so,
63 	ROFF_ta,
64 	ROFF_tr,
65 	ROFF_Dd,
66 	ROFF_TH,
67 	ROFF_TS,
68 	ROFF_TE,
69 	ROFF_T_,
70 	ROFF_EQ,
71 	ROFF_EN,
72 	ROFF_cblock,
73 	ROFF_ccond,
74 	ROFF_USERDEF,
75 	ROFF_MAX
76 };
77 
78 enum	roffrule {
79 	ROFFRULE_DENY,
80 	ROFFRULE_ALLOW
81 };
82 
83 /*
84  * An incredibly-simple string buffer.
85  */
86 struct	roffstr {
87 	char		*p; /* nil-terminated buffer */
88 	size_t		 sz; /* saved strlen(p) */
89 };
90 
91 /*
92  * A key-value roffstr pair as part of a singly-linked list.
93  */
94 struct	roffkv {
95 	struct roffstr	 key;
96 	struct roffstr	 val;
97 	struct roffkv	*next; /* next in list */
98 };
99 
100 /*
101  * A single number register as part of a singly-linked list.
102  */
103 struct	roffreg {
104 	struct roffstr	 key;
105 	int		 val;
106 	struct roffreg	*next;
107 };
108 
109 struct	roff {
110 	enum mparset	 parsetype; /* requested parse type */
111 	struct mparse	*parse; /* parse point */
112 	struct roffnode	*last; /* leaf of stack */
113 	enum roffrule	 rstack[RSTACK_MAX]; /* stack of !`ie' rules */
114 	char		 control; /* control character */
115 	int		 rstackpos; /* position in rstack */
116 	struct roffreg	*regtab; /* number registers */
117 	struct roffkv	*strtab; /* user-defined strings & macros */
118 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
119 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
120 	const char	*current_string; /* value of last called user macro */
121 	struct tbl_node	*first_tbl; /* first table parsed */
122 	struct tbl_node	*last_tbl; /* last table parsed */
123 	struct tbl_node	*tbl; /* current table being parsed */
124 	struct eqn_node	*last_eqn; /* last equation parsed */
125 	struct eqn_node	*first_eqn; /* first equation parsed */
126 	struct eqn_node	*eqn; /* current equation being parsed */
127 };
128 
129 struct	roffnode {
130 	enum rofft	 tok; /* type of node */
131 	struct roffnode	*parent; /* up one in stack */
132 	int		 line; /* parse line */
133 	int		 col; /* parse col */
134 	char		*name; /* node name, e.g. macro name */
135 	char		*end; /* end-rules: custom token */
136 	int		 endspan; /* end-rules: next-line or infty */
137 	enum roffrule	 rule; /* current evaluation rule */
138 };
139 
140 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
141 			 enum rofft tok, /* tok of macro */ \
142 		 	 char **bufp, /* input buffer */ \
143 			 size_t *szp, /* size of input buffer */ \
144 			 int ln, /* parse line */ \
145 			 int ppos, /* original pos in buffer */ \
146 			 int pos, /* current pos in buffer */ \
147 			 int *offs /* reset offset of buffer data */
148 
149 typedef	enum rofferr (*roffproc)(ROFF_ARGS);
150 
151 struct	roffmac {
152 	const char	*name; /* macro name */
153 	roffproc	 proc; /* process new macro */
154 	roffproc	 text; /* process as child text of macro */
155 	roffproc	 sub; /* process as child of macro */
156 	int		 flags;
157 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
158 	struct roffmac	*next;
159 };
160 
161 struct	predef {
162 	const char	*name; /* predefined input name */
163 	const char	*str; /* replacement symbol */
164 };
165 
166 #define	PREDEF(__name, __str) \
167 	{ (__name), (__str) },
168 
169 static	enum rofft	 roffhash_find(const char *, size_t);
170 static	void		 roffhash_init(void);
171 static	void		 roffnode_cleanscope(struct roff *);
172 static	void		 roffnode_pop(struct roff *);
173 static	void		 roffnode_push(struct roff *, enum rofft,
174 				const char *, int, int);
175 static	enum rofferr	 roff_block(ROFF_ARGS);
176 static	enum rofferr	 roff_block_text(ROFF_ARGS);
177 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
178 static	enum rofferr	 roff_cblock(ROFF_ARGS);
179 static	enum rofferr	 roff_cc(ROFF_ARGS);
180 static	enum rofferr	 roff_ccond(ROFF_ARGS);
181 static	enum rofferr	 roff_cond(ROFF_ARGS);
182 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
183 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
184 static	enum rofferr	 roff_ds(ROFF_ARGS);
185 static	enum roffrule	 roff_evalcond(const char *, int *);
186 static	void		 roff_free1(struct roff *);
187 static	void		 roff_freereg(struct roffreg *);
188 static	void		 roff_freestr(struct roffkv *);
189 static	char		*roff_getname(struct roff *, char **, int, int);
190 static	int		 roff_getnum(const char *, int *, int *);
191 static	int		 roff_getop(const char *, int *, char *);
192 static	int		 roff_getregn(const struct roff *,
193 				const char *, size_t);
194 static	const char	*roff_getstrn(const struct roff *,
195 				const char *, size_t);
196 static	enum rofferr	 roff_it(ROFF_ARGS);
197 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
198 static	enum rofferr	 roff_nr(ROFF_ARGS);
199 static	void		 roff_openeqn(struct roff *, const char *,
200 				int, int, const char *);
201 static	enum rofft	 roff_parse(struct roff *, const char *, int *);
202 static	enum rofferr	 roff_parsetext(char **, size_t *, int, int *);
203 static	enum rofferr	 roff_res(struct roff *,
204 				char **, size_t *, int, int);
205 static	enum rofferr	 roff_rm(ROFF_ARGS);
206 static	void		 roff_setstr(struct roff *,
207 				const char *, const char *, int);
208 static	void		 roff_setstrn(struct roffkv **, const char *,
209 				size_t, const char *, size_t, int);
210 static	enum rofferr	 roff_so(ROFF_ARGS);
211 static	enum rofferr	 roff_tr(ROFF_ARGS);
212 static	enum rofferr	 roff_Dd(ROFF_ARGS);
213 static	enum rofferr	 roff_TH(ROFF_ARGS);
214 static	enum rofferr	 roff_TE(ROFF_ARGS);
215 static	enum rofferr	 roff_TS(ROFF_ARGS);
216 static	enum rofferr	 roff_EQ(ROFF_ARGS);
217 static	enum rofferr	 roff_EN(ROFF_ARGS);
218 static	enum rofferr	 roff_T_(ROFF_ARGS);
219 static	enum rofferr	 roff_userdef(ROFF_ARGS);
220 
221 /* See roffhash_find() */
222 
223 #define	ASCII_HI	 126
224 #define	ASCII_LO	 33
225 #define	HASHWIDTH	(ASCII_HI - ASCII_LO + 1)
226 
227 static	struct roffmac	*hash[HASHWIDTH];
228 
229 static	struct roffmac	 roffs[ROFF_MAX] = {
230 	{ "ad", roff_line_ignore, NULL, NULL, 0, NULL },
231 	{ "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 	{ "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
233 	{ "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
234 	{ "cc", roff_cc, NULL, NULL, 0, NULL },
235 	{ "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
236 	{ "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
237 	{ "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 	{ "ds", roff_ds, NULL, NULL, 0, NULL },
239 	{ "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
240 	{ "fam", roff_line_ignore, NULL, NULL, 0, NULL },
241 	{ "hw", roff_line_ignore, NULL, NULL, 0, NULL },
242 	{ "hy", roff_line_ignore, NULL, NULL, 0, NULL },
243 	{ "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
244 	{ "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
245 	{ "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
246 	{ "it", roff_it, NULL, NULL, 0, NULL },
247 	{ "ne", roff_line_ignore, NULL, NULL, 0, NULL },
248 	{ "nh", roff_line_ignore, NULL, NULL, 0, NULL },
249 	{ "nr", roff_nr, NULL, NULL, 0, NULL },
250 	{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
251 	{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
252 	{ "rm", roff_rm, NULL, NULL, 0, NULL },
253 	{ "so", roff_so, NULL, NULL, 0, NULL },
254 	{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
255 	{ "tr", roff_tr, NULL, NULL, 0, NULL },
256 	{ "Dd", roff_Dd, NULL, NULL, 0, NULL },
257 	{ "TH", roff_TH, NULL, NULL, 0, NULL },
258 	{ "TS", roff_TS, NULL, NULL, 0, NULL },
259 	{ "TE", roff_TE, NULL, NULL, 0, NULL },
260 	{ "T&", roff_T_, NULL, NULL, 0, NULL },
261 	{ "EQ", roff_EQ, NULL, NULL, 0, NULL },
262 	{ "EN", roff_EN, NULL, NULL, 0, NULL },
263 	{ ".", roff_cblock, NULL, NULL, 0, NULL },
264 	{ "\\}", roff_ccond, NULL, NULL, 0, NULL },
265 	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
266 };
267 
268 const	char *const __mdoc_reserved[] = {
269 	"Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
270 	"Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
271 	"Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
272 	"Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
273 	"Ds", "Dt", "Dv", "Dx", "D1",
274 	"Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
275 	"En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
276 	"Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
277 	"Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
278 	"Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
279 	"Oc", "Oo", "Op", "Os", "Ot", "Ox",
280 	"Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
281 	"Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
282 	"Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
283 	"Ss", "St", "Sx", "Sy",
284 	"Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
285 	"%A", "%B", "%D", "%I", "%J", "%N", "%O",
286 	"%P", "%Q", "%R", "%T", "%U", "%V",
287 	NULL
288 };
289 
290 const	char *const __man_reserved[] = {
291 	"AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
292 	"EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
293 	"LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
294 	"R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
295 	"TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
296 	NULL
297 };
298 
299 /* Array of injected predefined strings. */
300 #define	PREDEFS_MAX	 38
301 static	const struct predef predefs[PREDEFS_MAX] = {
302 #include "predefs.in"
303 };
304 
305 /* See roffhash_find() */
306 #define	ROFF_HASH(p)	(p[0] - ASCII_LO)
307 
308 static	int	 roffit_lines;  /* number of lines to delay */
309 static	char	*roffit_macro;  /* nil-terminated macro line */
310 
311 static void
312 roffhash_init(void)
313 {
314 	struct roffmac	 *n;
315 	int		  buc, i;
316 
317 	for (i = 0; i < (int)ROFF_USERDEF; i++) {
318 		assert(roffs[i].name[0] >= ASCII_LO);
319 		assert(roffs[i].name[0] <= ASCII_HI);
320 
321 		buc = ROFF_HASH(roffs[i].name);
322 
323 		if (NULL != (n = hash[buc])) {
324 			for ( ; n->next; n = n->next)
325 				/* Do nothing. */ ;
326 			n->next = &roffs[i];
327 		} else
328 			hash[buc] = &roffs[i];
329 	}
330 }
331 
332 /*
333  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
334  * the nil-terminated string name could be found.
335  */
336 static enum rofft
337 roffhash_find(const char *p, size_t s)
338 {
339 	int		 buc;
340 	struct roffmac	*n;
341 
342 	/*
343 	 * libroff has an extremely simple hashtable, for the time
344 	 * being, which simply keys on the first character, which must
345 	 * be printable, then walks a chain.  It works well enough until
346 	 * optimised.
347 	 */
348 
349 	if (p[0] < ASCII_LO || p[0] > ASCII_HI)
350 		return(ROFF_MAX);
351 
352 	buc = ROFF_HASH(p);
353 
354 	if (NULL == (n = hash[buc]))
355 		return(ROFF_MAX);
356 	for ( ; n; n = n->next)
357 		if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
358 			return((enum rofft)(n - roffs));
359 
360 	return(ROFF_MAX);
361 }
362 
363 
364 /*
365  * Pop the current node off of the stack of roff instructions currently
366  * pending.
367  */
368 static void
369 roffnode_pop(struct roff *r)
370 {
371 	struct roffnode	*p;
372 
373 	assert(r->last);
374 	p = r->last;
375 
376 	r->last = r->last->parent;
377 	free(p->name);
378 	free(p->end);
379 	free(p);
380 }
381 
382 
383 /*
384  * Push a roff node onto the instruction stack.  This must later be
385  * removed with roffnode_pop().
386  */
387 static void
388 roffnode_push(struct roff *r, enum rofft tok, const char *name,
389 		int line, int col)
390 {
391 	struct roffnode	*p;
392 
393 	p = mandoc_calloc(1, sizeof(struct roffnode));
394 	p->tok = tok;
395 	if (name)
396 		p->name = mandoc_strdup(name);
397 	p->parent = r->last;
398 	p->line = line;
399 	p->col = col;
400 	p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
401 
402 	r->last = p;
403 }
404 
405 
406 static void
407 roff_free1(struct roff *r)
408 {
409 	struct tbl_node	*tbl;
410 	struct eqn_node	*e;
411 	int		 i;
412 
413 	while (NULL != (tbl = r->first_tbl)) {
414 		r->first_tbl = tbl->next;
415 		tbl_free(tbl);
416 	}
417 
418 	r->first_tbl = r->last_tbl = r->tbl = NULL;
419 
420 	while (NULL != (e = r->first_eqn)) {
421 		r->first_eqn = e->next;
422 		eqn_free(e);
423 	}
424 
425 	r->first_eqn = r->last_eqn = r->eqn = NULL;
426 
427 	while (r->last)
428 		roffnode_pop(r);
429 
430 	roff_freestr(r->strtab);
431 	roff_freestr(r->xmbtab);
432 
433 	r->strtab = r->xmbtab = NULL;
434 
435 	roff_freereg(r->regtab);
436 
437 	r->regtab = NULL;
438 
439 	if (r->xtab)
440 		for (i = 0; i < 128; i++)
441 			free(r->xtab[i].p);
442 
443 	free(r->xtab);
444 	r->xtab = NULL;
445 }
446 
447 void
448 roff_reset(struct roff *r)
449 {
450 	int		 i;
451 
452 	roff_free1(r);
453 
454 	r->control = 0;
455 
456 	for (i = 0; i < PREDEFS_MAX; i++)
457 		roff_setstr(r, predefs[i].name, predefs[i].str, 0);
458 }
459 
460 
461 void
462 roff_free(struct roff *r)
463 {
464 
465 	roff_free1(r);
466 	free(r);
467 }
468 
469 
470 struct roff *
471 roff_alloc(enum mparset type, struct mparse *parse)
472 {
473 	struct roff	*r;
474 	int		 i;
475 
476 	r = mandoc_calloc(1, sizeof(struct roff));
477 	r->parsetype = type;
478 	r->parse = parse;
479 	r->rstackpos = -1;
480 
481 	roffhash_init();
482 
483 	for (i = 0; i < PREDEFS_MAX; i++)
484 		roff_setstr(r, predefs[i].name, predefs[i].str, 0);
485 
486 	return(r);
487 }
488 
489 /*
490  * In the current line, expand user-defined strings ("\*")
491  * and references to number registers ("\n").
492  * Also check the syntax of other escape sequences.
493  */
494 static enum rofferr
495 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
496 {
497 	char		 ubuf[12]; /* buffer to print the number */
498 	const char	*stesc;	/* start of an escape sequence ('\\') */
499 	const char	*stnam;	/* start of the name, after "[(*" */
500 	const char	*cp;	/* end of the name, e.g. before ']' */
501 	const char	*res;	/* the string to be substituted */
502 	char		*nbuf;	/* new buffer to copy bufp to */
503 	size_t		 nsz;	/* size of the new buffer */
504 	size_t		 maxl;  /* expected length of the escape name */
505 	size_t		 naml;	/* actual length of the escape name */
506 	int		 expand_count;	/* to avoid infinite loops */
507 
508 	expand_count = 0;
509 
510 again:
511 	cp = *bufp + pos;
512 	while (NULL != (cp = strchr(cp, '\\'))) {
513 		stesc = cp++;
514 
515 		/*
516 		 * The second character must be an asterisk or an n.
517 		 * If it isn't, skip it anyway:  It is escaped,
518 		 * so it can't start another escape sequence.
519 		 */
520 
521 		if ('\0' == *cp)
522 			return(ROFF_CONT);
523 
524 		switch (*cp) {
525 		case ('*'):
526 			res = NULL;
527 			break;
528 		case ('n'):
529 			res = ubuf;
530 			break;
531 		default:
532 			if (ESCAPE_ERROR != mandoc_escape(&cp, NULL, NULL))
533 				continue;
534 			mandoc_msg
535 				(MANDOCERR_BADESCAPE, r->parse,
536 				 ln, (int)(stesc - *bufp), NULL);
537 			return(ROFF_CONT);
538 		}
539 
540 		cp++;
541 
542 		/*
543 		 * The third character decides the length
544 		 * of the name of the string or register.
545 		 * Save a pointer to the name.
546 		 */
547 
548 		switch (*cp) {
549 		case ('\0'):
550 			return(ROFF_CONT);
551 		case ('('):
552 			cp++;
553 			maxl = 2;
554 			break;
555 		case ('['):
556 			cp++;
557 			maxl = 0;
558 			break;
559 		default:
560 			maxl = 1;
561 			break;
562 		}
563 		stnam = cp;
564 
565 		/* Advance to the end of the name. */
566 
567 		for (naml = 0; 0 == maxl || naml < maxl; naml++, cp++) {
568 			if ('\0' == *cp) {
569 				mandoc_msg
570 					(MANDOCERR_BADESCAPE,
571 					 r->parse, ln,
572 					 (int)(stesc - *bufp), NULL);
573 				return(ROFF_CONT);
574 			}
575 			if (0 == maxl && ']' == *cp)
576 				break;
577 		}
578 
579 		/*
580 		 * Retrieve the replacement string; if it is
581 		 * undefined, resume searching for escapes.
582 		 */
583 
584 		if (NULL == res)
585 			res = roff_getstrn(r, stnam, naml);
586 		else
587 			snprintf(ubuf, sizeof(ubuf), "%d",
588 			    roff_getregn(r, stnam, naml));
589 
590 		if (NULL == res) {
591 			mandoc_msg
592 				(MANDOCERR_BADESCAPE, r->parse,
593 				 ln, (int)(stesc - *bufp), NULL);
594 			res = "";
595 		}
596 
597 		/* Replace the escape sequence by the string. */
598 
599 		pos = stesc - *bufp;
600 
601 		nsz = *szp + strlen(res) + 1;
602 		nbuf = mandoc_malloc(nsz);
603 
604 		strlcpy(nbuf, *bufp, (size_t)(stesc - *bufp + 1));
605 		strlcat(nbuf, res, nsz);
606 		strlcat(nbuf, cp + (maxl ? 0 : 1), nsz);
607 
608 		free(*bufp);
609 
610 		*bufp = nbuf;
611 		*szp = nsz;
612 
613 		if (EXPAND_LIMIT >= ++expand_count)
614 			goto again;
615 
616 		/* Just leave the string unexpanded. */
617 		mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
618 		return(ROFF_IGN);
619 	}
620 	return(ROFF_CONT);
621 }
622 
623 /*
624  * Process text streams:
625  * Convert all breakable hyphens into ASCII_HYPH.
626  * Decrement and spring input line trap.
627  */
628 static enum rofferr
629 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
630 {
631 	size_t		 sz;
632 	const char	*start;
633 	char		*p;
634 	int		 isz;
635 	enum mandoc_esc	 esc;
636 
637 	start = p = *bufp + pos;
638 
639 	while ('\0' != *p) {
640 		sz = strcspn(p, "-\\");
641 		p += sz;
642 
643 		if ('\0' == *p)
644 			break;
645 
646 		if ('\\' == *p) {
647 			/* Skip over escapes. */
648 			p++;
649 			esc = mandoc_escape
650 				((const char const **)&p, NULL, NULL);
651 			if (ESCAPE_ERROR == esc)
652 				break;
653 			continue;
654 		} else if (p == start) {
655 			p++;
656 			continue;
657 		}
658 
659 		if (isalpha((unsigned char)p[-1]) &&
660 		    isalpha((unsigned char)p[1]))
661 			*p = ASCII_HYPH;
662 		p++;
663 	}
664 
665 	/* Spring the input line trap. */
666 	if (1 == roffit_lines) {
667 		isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
668 		if (-1 == isz) {
669 			perror(NULL);
670 			exit((int)MANDOCLEVEL_SYSERR);
671 		}
672 		free(*bufp);
673 		*bufp = p;
674 		*szp = isz + 1;
675 		*offs = 0;
676 		free(roffit_macro);
677 		roffit_lines = 0;
678 		return(ROFF_REPARSE);
679 	} else if (1 < roffit_lines)
680 		--roffit_lines;
681 	return(ROFF_CONT);
682 }
683 
684 enum rofferr
685 roff_parseln(struct roff *r, int ln, char **bufp,
686 		size_t *szp, int pos, int *offs)
687 {
688 	enum rofft	 t;
689 	enum rofferr	 e;
690 	int		 ppos, ctl;
691 
692 	/*
693 	 * Run the reserved-word filter only if we have some reserved
694 	 * words to fill in.
695 	 */
696 
697 	e = roff_res(r, bufp, szp, ln, pos);
698 	if (ROFF_IGN == e)
699 		return(e);
700 	assert(ROFF_CONT == e);
701 
702 	ppos = pos;
703 	ctl = roff_getcontrol(r, *bufp, &pos);
704 
705 	/*
706 	 * First, if a scope is open and we're not a macro, pass the
707 	 * text through the macro's filter.  If a scope isn't open and
708 	 * we're not a macro, just let it through.
709 	 * Finally, if there's an equation scope open, divert it into it
710 	 * no matter our state.
711 	 */
712 
713 	if (r->last && ! ctl) {
714 		t = r->last->tok;
715 		assert(roffs[t].text);
716 		e = (*roffs[t].text)
717 			(r, t, bufp, szp, ln, pos, pos, offs);
718 		assert(ROFF_IGN == e || ROFF_CONT == e);
719 		if (ROFF_CONT != e)
720 			return(e);
721 	}
722 	if (r->eqn)
723 		return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
724 	if ( ! ctl) {
725 		if (r->tbl)
726 			return(tbl_read(r->tbl, ln, *bufp, pos));
727 		return(roff_parsetext(bufp, szp, pos, offs));
728 	}
729 
730 	/*
731 	 * If a scope is open, go to the child handler for that macro,
732 	 * as it may want to preprocess before doing anything with it.
733 	 * Don't do so if an equation is open.
734 	 */
735 
736 	if (r->last) {
737 		t = r->last->tok;
738 		assert(roffs[t].sub);
739 		return((*roffs[t].sub)
740 				(r, t, bufp, szp,
741 				 ln, ppos, pos, offs));
742 	}
743 
744 	/*
745 	 * Lastly, as we've no scope open, try to look up and execute
746 	 * the new macro.  If no macro is found, simply return and let
747 	 * the compilers handle it.
748 	 */
749 
750 	if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
751 		return(ROFF_CONT);
752 
753 	assert(roffs[t].proc);
754 	return((*roffs[t].proc)
755 			(r, t, bufp, szp,
756 			 ln, ppos, pos, offs));
757 }
758 
759 
760 void
761 roff_endparse(struct roff *r)
762 {
763 
764 	if (r->last)
765 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
766 				r->last->line, r->last->col, NULL);
767 
768 	if (r->eqn) {
769 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
770 				r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
771 		eqn_end(&r->eqn);
772 	}
773 
774 	if (r->tbl) {
775 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
776 				r->tbl->line, r->tbl->pos, NULL);
777 		tbl_end(&r->tbl);
778 	}
779 }
780 
781 /*
782  * Parse a roff node's type from the input buffer.  This must be in the
783  * form of ".foo xxx" in the usual way.
784  */
785 static enum rofft
786 roff_parse(struct roff *r, const char *buf, int *pos)
787 {
788 	const char	*mac;
789 	size_t		 maclen;
790 	enum rofft	 t;
791 
792 	if ('\0' == buf[*pos] || '"' == buf[*pos] ||
793 			'\t' == buf[*pos] || ' ' == buf[*pos])
794 		return(ROFF_MAX);
795 
796 	/*
797 	 * We stop the macro parse at an escape, tab, space, or nil.
798 	 * However, `\}' is also a valid macro, so make sure we don't
799 	 * clobber it by seeing the `\' as the end of token.
800 	 */
801 
802 	mac = buf + *pos;
803 	maclen = strcspn(mac + 1, " \\\t\0") + 1;
804 
805 	t = (r->current_string = roff_getstrn(r, mac, maclen))
806 	    ? ROFF_USERDEF : roffhash_find(mac, maclen);
807 
808 	*pos += (int)maclen;
809 
810 	while (buf[*pos] && ' ' == buf[*pos])
811 		(*pos)++;
812 
813 	return(t);
814 }
815 
816 /* ARGSUSED */
817 static enum rofferr
818 roff_cblock(ROFF_ARGS)
819 {
820 
821 	/*
822 	 * A block-close `..' should only be invoked as a child of an
823 	 * ignore macro, otherwise raise a warning and just ignore it.
824 	 */
825 
826 	if (NULL == r->last) {
827 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
828 		return(ROFF_IGN);
829 	}
830 
831 	switch (r->last->tok) {
832 	case (ROFF_am):
833 		/* FALLTHROUGH */
834 	case (ROFF_ami):
835 		/* FALLTHROUGH */
836 	case (ROFF_am1):
837 		/* FALLTHROUGH */
838 	case (ROFF_de):
839 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
840 		/* FALLTHROUGH */
841 	case (ROFF_dei):
842 		/* FALLTHROUGH */
843 	case (ROFF_ig):
844 		break;
845 	default:
846 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
847 		return(ROFF_IGN);
848 	}
849 
850 	if ((*bufp)[pos])
851 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
852 
853 	roffnode_pop(r);
854 	roffnode_cleanscope(r);
855 	return(ROFF_IGN);
856 
857 }
858 
859 
860 static void
861 roffnode_cleanscope(struct roff *r)
862 {
863 
864 	while (r->last) {
865 		if (--r->last->endspan != 0)
866 			break;
867 		roffnode_pop(r);
868 	}
869 }
870 
871 
872 /* ARGSUSED */
873 static enum rofferr
874 roff_ccond(ROFF_ARGS)
875 {
876 
877 	if (NULL == r->last) {
878 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
879 		return(ROFF_IGN);
880 	}
881 
882 	switch (r->last->tok) {
883 	case (ROFF_el):
884 		/* FALLTHROUGH */
885 	case (ROFF_ie):
886 		/* FALLTHROUGH */
887 	case (ROFF_if):
888 		break;
889 	default:
890 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
891 		return(ROFF_IGN);
892 	}
893 
894 	if (r->last->endspan > -1) {
895 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
896 		return(ROFF_IGN);
897 	}
898 
899 	if ((*bufp)[pos])
900 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
901 
902 	roffnode_pop(r);
903 	roffnode_cleanscope(r);
904 	return(ROFF_IGN);
905 }
906 
907 
908 /* ARGSUSED */
909 static enum rofferr
910 roff_block(ROFF_ARGS)
911 {
912 	int		sv;
913 	size_t		sz;
914 	char		*name;
915 
916 	name = NULL;
917 
918 	if (ROFF_ig != tok) {
919 		if ('\0' == (*bufp)[pos]) {
920 			mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
921 			return(ROFF_IGN);
922 		}
923 
924 		/*
925 		 * Re-write `de1', since we don't really care about
926 		 * groff's strange compatibility mode, into `de'.
927 		 */
928 
929 		if (ROFF_de1 == tok)
930 			tok = ROFF_de;
931 		if (ROFF_de == tok)
932 			name = *bufp + pos;
933 		else
934 			mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
935 			    roffs[tok].name);
936 
937 		while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
938 			pos++;
939 
940 		while (isspace((unsigned char)(*bufp)[pos]))
941 			(*bufp)[pos++] = '\0';
942 	}
943 
944 	roffnode_push(r, tok, name, ln, ppos);
945 
946 	/*
947 	 * At the beginning of a `de' macro, clear the existing string
948 	 * with the same name, if there is one.  New content will be
949 	 * added from roff_block_text() in multiline mode.
950 	 */
951 
952 	if (ROFF_de == tok)
953 		roff_setstr(r, name, "", 0);
954 
955 	if ('\0' == (*bufp)[pos])
956 		return(ROFF_IGN);
957 
958 	/* If present, process the custom end-of-line marker. */
959 
960 	sv = pos;
961 	while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
962 		pos++;
963 
964 	/*
965 	 * Note: groff does NOT like escape characters in the input.
966 	 * Instead of detecting this, we're just going to let it fly and
967 	 * to hell with it.
968 	 */
969 
970 	assert(pos > sv);
971 	sz = (size_t)(pos - sv);
972 
973 	if (1 == sz && '.' == (*bufp)[sv])
974 		return(ROFF_IGN);
975 
976 	r->last->end = mandoc_malloc(sz + 1);
977 
978 	memcpy(r->last->end, *bufp + sv, sz);
979 	r->last->end[(int)sz] = '\0';
980 
981 	if ((*bufp)[pos])
982 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
983 
984 	return(ROFF_IGN);
985 }
986 
987 
988 /* ARGSUSED */
989 static enum rofferr
990 roff_block_sub(ROFF_ARGS)
991 {
992 	enum rofft	t;
993 	int		i, j;
994 
995 	/*
996 	 * First check whether a custom macro exists at this level.  If
997 	 * it does, then check against it.  This is some of groff's
998 	 * stranger behaviours.  If we encountered a custom end-scope
999 	 * tag and that tag also happens to be a "real" macro, then we
1000 	 * need to try interpreting it again as a real macro.  If it's
1001 	 * not, then return ignore.  Else continue.
1002 	 */
1003 
1004 	if (r->last->end) {
1005 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
1006 			if ((*bufp)[i] != r->last->end[j])
1007 				break;
1008 
1009 		if ('\0' == r->last->end[j] &&
1010 				('\0' == (*bufp)[i] ||
1011 				 ' ' == (*bufp)[i] ||
1012 				 '\t' == (*bufp)[i])) {
1013 			roffnode_pop(r);
1014 			roffnode_cleanscope(r);
1015 
1016 			while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1017 				i++;
1018 
1019 			pos = i;
1020 			if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1021 				return(ROFF_RERUN);
1022 			return(ROFF_IGN);
1023 		}
1024 	}
1025 
1026 	/*
1027 	 * If we have no custom end-query or lookup failed, then try
1028 	 * pulling it out of the hashtable.
1029 	 */
1030 
1031 	t = roff_parse(r, *bufp, &pos);
1032 
1033 	/*
1034 	 * Macros other than block-end are only significant
1035 	 * in `de' blocks; elsewhere, simply throw them away.
1036 	 */
1037 	if (ROFF_cblock != t) {
1038 		if (ROFF_de == tok)
1039 			roff_setstr(r, r->last->name, *bufp + ppos, 1);
1040 		return(ROFF_IGN);
1041 	}
1042 
1043 	assert(roffs[t].proc);
1044 	return((*roffs[t].proc)(r, t, bufp, szp,
1045 				ln, ppos, pos, offs));
1046 }
1047 
1048 
1049 /* ARGSUSED */
1050 static enum rofferr
1051 roff_block_text(ROFF_ARGS)
1052 {
1053 
1054 	if (ROFF_de == tok)
1055 		roff_setstr(r, r->last->name, *bufp + pos, 1);
1056 
1057 	return(ROFF_IGN);
1058 }
1059 
1060 
1061 /* ARGSUSED */
1062 static enum rofferr
1063 roff_cond_sub(ROFF_ARGS)
1064 {
1065 	enum rofft	 t;
1066 	enum roffrule	 rr;
1067 	char		*ep;
1068 
1069 	rr = r->last->rule;
1070 	roffnode_cleanscope(r);
1071 	t = roff_parse(r, *bufp, &pos);
1072 
1073 	/*
1074 	 * Fully handle known macros when they are structurally
1075 	 * required or when the conditional evaluated to true.
1076 	 */
1077 
1078 	if ((ROFF_MAX != t) &&
1079 	    (ROFF_ccond == t || ROFFRULE_ALLOW == rr ||
1080 	     ROFFMAC_STRUCT & roffs[t].flags)) {
1081 		assert(roffs[t].proc);
1082 		return((*roffs[t].proc)(r, t, bufp, szp,
1083 					ln, ppos, pos, offs));
1084 	}
1085 
1086 	/* Always check for the closing delimiter `\}'. */
1087 
1088 	ep = &(*bufp)[pos];
1089 	while (NULL != (ep = strchr(ep, '\\'))) {
1090 		if ('}' != *(++ep))
1091 			continue;
1092 
1093 		/*
1094 		 * If we're at the end of line, then just chop
1095 		 * off the \} and resize the buffer.
1096 		 * If we aren't, then convert it to spaces.
1097 		 */
1098 
1099 		if ('\0' == *(ep + 1)) {
1100 			*--ep = '\0';
1101 			*szp -= 2;
1102 		} else
1103 			*(ep - 1) = *ep = ' ';
1104 
1105 		roff_ccond(r, ROFF_ccond, bufp, szp,
1106 				ln, pos, pos + 2, offs);
1107 		break;
1108 	}
1109 	return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1110 }
1111 
1112 /* ARGSUSED */
1113 static enum rofferr
1114 roff_cond_text(ROFF_ARGS)
1115 {
1116 	char		*ep;
1117 	enum roffrule	 rr;
1118 
1119 	rr = r->last->rule;
1120 	roffnode_cleanscope(r);
1121 
1122 	ep = &(*bufp)[pos];
1123 	for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1124 		ep++;
1125 		if ('}' != *ep)
1126 			continue;
1127 		*ep = '&';
1128 		roff_ccond(r, ROFF_ccond, bufp, szp,
1129 				ln, pos, pos + 2, offs);
1130 	}
1131 	return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1132 }
1133 
1134 static int
1135 roff_getnum(const char *v, int *pos, int *res)
1136 {
1137 	int p, n;
1138 
1139 	p = *pos;
1140 	n = v[p] == '-';
1141 	if (n)
1142 		p++;
1143 
1144 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
1145 		*res += 10 * *res + v[p] - '0';
1146 	if (p == *pos + n)
1147 		return 0;
1148 
1149 	if (n)
1150 		*res = -*res;
1151 
1152 	*pos = p;
1153 	return 1;
1154 }
1155 
1156 static int
1157 roff_getop(const char *v, int *pos, char *res)
1158 {
1159 	int e;
1160 
1161 	*res = v[*pos];
1162 	e = v[*pos + 1] == '=';
1163 
1164 	switch (*res) {
1165 	case '=':
1166 		break;
1167 	case '>':
1168 		if (e)
1169 			*res = 'g';
1170 		break;
1171 	case '<':
1172 		if (e)
1173 			*res = 'l';
1174 		break;
1175 	default:
1176 		return(0);
1177 	}
1178 
1179 	*pos += 1 + e;
1180 
1181 	return(*res);
1182 }
1183 
1184 static enum roffrule
1185 roff_evalcond(const char *v, int *pos)
1186 {
1187 	int	 not, lh, rh;
1188 	char	 op;
1189 
1190 	switch (v[*pos]) {
1191 	case ('n'):
1192 		(*pos)++;
1193 		return(ROFFRULE_ALLOW);
1194 	case ('e'):
1195 		/* FALLTHROUGH */
1196 	case ('o'):
1197 		/* FALLTHROUGH */
1198 	case ('t'):
1199 		(*pos)++;
1200 		return(ROFFRULE_DENY);
1201 	case ('!'):
1202 		(*pos)++;
1203 		not = 1;
1204 		break;
1205 	default:
1206 		not = 0;
1207 		break;
1208 	}
1209 
1210 	if (!roff_getnum(v, pos, &lh))
1211 		return ROFFRULE_DENY;
1212 	if (!roff_getop(v, pos, &op)) {
1213 		if (lh < 0)
1214 			lh = 0;
1215 		goto out;
1216 	}
1217 	if (!roff_getnum(v, pos, &rh))
1218 		return ROFFRULE_DENY;
1219 	switch (op) {
1220 	case 'g':
1221 		lh = lh >= rh;
1222 		break;
1223 	case 'l':
1224 		lh = lh <= rh;
1225 		break;
1226 	case '=':
1227 		lh = lh == rh;
1228 		break;
1229 	case '>':
1230 		lh = lh > rh;
1231 		break;
1232 	case '<':
1233 		lh = lh < rh;
1234 		break;
1235 	default:
1236 		return ROFFRULE_DENY;
1237 	}
1238 out:
1239 	if (not)
1240 		lh = !lh;
1241 	return lh ? ROFFRULE_ALLOW : ROFFRULE_DENY;
1242 }
1243 
1244 /* ARGSUSED */
1245 static enum rofferr
1246 roff_line_ignore(ROFF_ARGS)
1247 {
1248 
1249 	return(ROFF_IGN);
1250 }
1251 
1252 /* ARGSUSED */
1253 static enum rofferr
1254 roff_cond(ROFF_ARGS)
1255 {
1256 
1257 	roffnode_push(r, tok, NULL, ln, ppos);
1258 
1259 	/*
1260 	 * An `.el' has no conditional body: it will consume the value
1261 	 * of the current rstack entry set in prior `ie' calls or
1262 	 * defaults to DENY.
1263 	 *
1264 	 * If we're not an `el', however, then evaluate the conditional.
1265 	 */
1266 
1267 	r->last->rule = ROFF_el == tok ?
1268 		(r->rstackpos < 0 ?
1269 		 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1270 		roff_evalcond(*bufp, &pos);
1271 
1272 	/*
1273 	 * An if-else will put the NEGATION of the current evaluated
1274 	 * conditional into the stack of rules.
1275 	 */
1276 
1277 	if (ROFF_ie == tok) {
1278 		if (r->rstackpos == RSTACK_MAX - 1) {
1279 			mandoc_msg(MANDOCERR_MEM,
1280 				r->parse, ln, ppos, NULL);
1281 			return(ROFF_ERR);
1282 		}
1283 		r->rstack[++r->rstackpos] =
1284 			ROFFRULE_DENY == r->last->rule ?
1285 			ROFFRULE_ALLOW : ROFFRULE_DENY;
1286 	}
1287 
1288 	/* If the parent has false as its rule, then so do we. */
1289 
1290 	if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1291 		r->last->rule = ROFFRULE_DENY;
1292 
1293 	/*
1294 	 * Determine scope.
1295 	 * If there is nothing on the line after the conditional,
1296 	 * not even whitespace, use next-line scope.
1297 	 */
1298 
1299 	if ('\0' == (*bufp)[pos]) {
1300 		r->last->endspan = 2;
1301 		goto out;
1302 	}
1303 
1304 	while (' ' == (*bufp)[pos])
1305 		pos++;
1306 
1307 	/* An opening brace requests multiline scope. */
1308 
1309 	if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1310 		r->last->endspan = -1;
1311 		pos += 2;
1312 		goto out;
1313 	}
1314 
1315 	/*
1316 	 * Anything else following the conditional causes
1317 	 * single-line scope.  Warn if the scope contains
1318 	 * nothing but trailing whitespace.
1319 	 */
1320 
1321 	if ('\0' == (*bufp)[pos])
1322 		mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1323 
1324 	r->last->endspan = 1;
1325 
1326 out:
1327 	*offs = pos;
1328 	return(ROFF_RERUN);
1329 }
1330 
1331 
1332 /* ARGSUSED */
1333 static enum rofferr
1334 roff_ds(ROFF_ARGS)
1335 {
1336 	char		*name, *string;
1337 
1338 	/*
1339 	 * A symbol is named by the first word following the macro
1340 	 * invocation up to a space.  Its value is anything after the
1341 	 * name's trailing whitespace and optional double-quote.  Thus,
1342 	 *
1343 	 *  [.ds foo "bar  "     ]
1344 	 *
1345 	 * will have `bar  "     ' as its value.
1346 	 */
1347 
1348 	string = *bufp + pos;
1349 	name = roff_getname(r, &string, ln, pos);
1350 	if ('\0' == *name)
1351 		return(ROFF_IGN);
1352 
1353 	/* Read past initial double-quote. */
1354 	if ('"' == *string)
1355 		string++;
1356 
1357 	/* The rest is the value. */
1358 	roff_setstr(r, name, string, 0);
1359 	return(ROFF_IGN);
1360 }
1361 
1362 void
1363 roff_setreg(struct roff *r, const char *name, int val, char sign)
1364 {
1365 	struct roffreg	*reg;
1366 
1367 	/* Search for an existing register with the same name. */
1368 	reg = r->regtab;
1369 
1370 	while (reg && strcmp(name, reg->key.p))
1371 		reg = reg->next;
1372 
1373 	if (NULL == reg) {
1374 		/* Create a new register. */
1375 		reg = mandoc_malloc(sizeof(struct roffreg));
1376 		reg->key.p = mandoc_strdup(name);
1377 		reg->key.sz = strlen(name);
1378 		reg->val = 0;
1379 		reg->next = r->regtab;
1380 		r->regtab = reg;
1381 	}
1382 
1383 	if ('+' == sign)
1384 		reg->val += val;
1385 	else if ('-' == sign)
1386 		reg->val -= val;
1387 	else
1388 		reg->val = val;
1389 }
1390 
1391 int
1392 roff_getreg(const struct roff *r, const char *name)
1393 {
1394 	struct roffreg	*reg;
1395 
1396 	for (reg = r->regtab; reg; reg = reg->next)
1397 		if (0 == strcmp(name, reg->key.p))
1398 			return(reg->val);
1399 
1400 	return(0);
1401 }
1402 
1403 static int
1404 roff_getregn(const struct roff *r, const char *name, size_t len)
1405 {
1406 	struct roffreg	*reg;
1407 
1408 	for (reg = r->regtab; reg; reg = reg->next)
1409 		if (len == reg->key.sz &&
1410 		    0 == strncmp(name, reg->key.p, len))
1411 			return(reg->val);
1412 
1413 	return(0);
1414 }
1415 
1416 static void
1417 roff_freereg(struct roffreg *reg)
1418 {
1419 	struct roffreg	*old_reg;
1420 
1421 	while (NULL != reg) {
1422 		free(reg->key.p);
1423 		old_reg = reg;
1424 		reg = reg->next;
1425 		free(old_reg);
1426 	}
1427 }
1428 
1429 /* ARGSUSED */
1430 static enum rofferr
1431 roff_nr(ROFF_ARGS)
1432 {
1433 	const char	*key;
1434 	char		*val;
1435 	size_t		 sz;
1436 	int		 iv;
1437 	char		 sign;
1438 
1439 	val = *bufp + pos;
1440 	key = roff_getname(r, &val, ln, pos);
1441 
1442 	sign = *val;
1443 	if ('+' == sign || '-' == sign)
1444 		val++;
1445 
1446 	sz = strspn(val, "0123456789");
1447 	iv = sz ? mandoc_strntoi(val, sz, 10) : 0;
1448 
1449 	roff_setreg(r, key, iv, sign);
1450 
1451 	return(ROFF_IGN);
1452 }
1453 
1454 /* ARGSUSED */
1455 static enum rofferr
1456 roff_rm(ROFF_ARGS)
1457 {
1458 	const char	 *name;
1459 	char		 *cp;
1460 
1461 	cp = *bufp + pos;
1462 	while ('\0' != *cp) {
1463 		name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1464 		if ('\0' != *name)
1465 			roff_setstr(r, name, NULL, 0);
1466 	}
1467 	return(ROFF_IGN);
1468 }
1469 
1470 /* ARGSUSED */
1471 static enum rofferr
1472 roff_it(ROFF_ARGS)
1473 {
1474 	char		*cp;
1475 	size_t		 len;
1476 	int		 iv;
1477 
1478 	/* Parse the number of lines. */
1479 	cp = *bufp + pos;
1480 	len = strcspn(cp, " \t");
1481 	cp[len] = '\0';
1482 	if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1483 		mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1484 				ln, ppos, *bufp + 1);
1485 		return(ROFF_IGN);
1486 	}
1487 	cp += len + 1;
1488 
1489 	/* Arm the input line trap. */
1490 	roffit_lines = iv;
1491 	roffit_macro = mandoc_strdup(cp);
1492 	return(ROFF_IGN);
1493 }
1494 
1495 /* ARGSUSED */
1496 static enum rofferr
1497 roff_Dd(ROFF_ARGS)
1498 {
1499 	const char *const	*cp;
1500 
1501 	if (MPARSE_MDOC != r->parsetype)
1502 		for (cp = __mdoc_reserved; *cp; cp++)
1503 			roff_setstr(r, *cp, NULL, 0);
1504 
1505 	return(ROFF_CONT);
1506 }
1507 
1508 /* ARGSUSED */
1509 static enum rofferr
1510 roff_TH(ROFF_ARGS)
1511 {
1512 	const char *const	*cp;
1513 
1514 	if (MPARSE_MDOC != r->parsetype)
1515 		for (cp = __man_reserved; *cp; cp++)
1516 			roff_setstr(r, *cp, NULL, 0);
1517 
1518 	return(ROFF_CONT);
1519 }
1520 
1521 /* ARGSUSED */
1522 static enum rofferr
1523 roff_TE(ROFF_ARGS)
1524 {
1525 
1526 	if (NULL == r->tbl)
1527 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1528 	else
1529 		tbl_end(&r->tbl);
1530 
1531 	return(ROFF_IGN);
1532 }
1533 
1534 /* ARGSUSED */
1535 static enum rofferr
1536 roff_T_(ROFF_ARGS)
1537 {
1538 
1539 	if (NULL == r->tbl)
1540 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1541 	else
1542 		tbl_restart(ppos, ln, r->tbl);
1543 
1544 	return(ROFF_IGN);
1545 }
1546 
1547 #if 0
1548 static int
1549 roff_closeeqn(struct roff *r)
1550 {
1551 
1552 	return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1553 }
1554 #endif
1555 
1556 static void
1557 roff_openeqn(struct roff *r, const char *name, int line,
1558 		int offs, const char *buf)
1559 {
1560 	struct eqn_node *e;
1561 	int		 poff;
1562 
1563 	assert(NULL == r->eqn);
1564 	e = eqn_alloc(name, offs, line, r->parse);
1565 
1566 	if (r->last_eqn)
1567 		r->last_eqn->next = e;
1568 	else
1569 		r->first_eqn = r->last_eqn = e;
1570 
1571 	r->eqn = r->last_eqn = e;
1572 
1573 	if (buf) {
1574 		poff = 0;
1575 		eqn_read(&r->eqn, line, buf, offs, &poff);
1576 	}
1577 }
1578 
1579 /* ARGSUSED */
1580 static enum rofferr
1581 roff_EQ(ROFF_ARGS)
1582 {
1583 
1584 	roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1585 	return(ROFF_IGN);
1586 }
1587 
1588 /* ARGSUSED */
1589 static enum rofferr
1590 roff_EN(ROFF_ARGS)
1591 {
1592 
1593 	mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1594 	return(ROFF_IGN);
1595 }
1596 
1597 /* ARGSUSED */
1598 static enum rofferr
1599 roff_TS(ROFF_ARGS)
1600 {
1601 	struct tbl_node	*tbl;
1602 
1603 	if (r->tbl) {
1604 		mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1605 		tbl_end(&r->tbl);
1606 	}
1607 
1608 	tbl = tbl_alloc(ppos, ln, r->parse);
1609 
1610 	if (r->last_tbl)
1611 		r->last_tbl->next = tbl;
1612 	else
1613 		r->first_tbl = r->last_tbl = tbl;
1614 
1615 	r->tbl = r->last_tbl = tbl;
1616 	return(ROFF_IGN);
1617 }
1618 
1619 /* ARGSUSED */
1620 static enum rofferr
1621 roff_cc(ROFF_ARGS)
1622 {
1623 	const char	*p;
1624 
1625 	p = *bufp + pos;
1626 
1627 	if ('\0' == *p || '.' == (r->control = *p++))
1628 		r->control = 0;
1629 
1630 	if ('\0' != *p)
1631 		mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1632 
1633 	return(ROFF_IGN);
1634 }
1635 
1636 /* ARGSUSED */
1637 static enum rofferr
1638 roff_tr(ROFF_ARGS)
1639 {
1640 	const char	*p, *first, *second;
1641 	size_t		 fsz, ssz;
1642 	enum mandoc_esc	 esc;
1643 
1644 	p = *bufp + pos;
1645 
1646 	if ('\0' == *p) {
1647 		mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1648 		return(ROFF_IGN);
1649 	}
1650 
1651 	while ('\0' != *p) {
1652 		fsz = ssz = 1;
1653 
1654 		first = p++;
1655 		if ('\\' == *first) {
1656 			esc = mandoc_escape(&p, NULL, NULL);
1657 			if (ESCAPE_ERROR == esc) {
1658 				mandoc_msg
1659 					(MANDOCERR_BADESCAPE, r->parse,
1660 					 ln, (int)(p - *bufp), NULL);
1661 				return(ROFF_IGN);
1662 			}
1663 			fsz = (size_t)(p - first);
1664 		}
1665 
1666 		second = p++;
1667 		if ('\\' == *second) {
1668 			esc = mandoc_escape(&p, NULL, NULL);
1669 			if (ESCAPE_ERROR == esc) {
1670 				mandoc_msg
1671 					(MANDOCERR_BADESCAPE, r->parse,
1672 					 ln, (int)(p - *bufp), NULL);
1673 				return(ROFF_IGN);
1674 			}
1675 			ssz = (size_t)(p - second);
1676 		} else if ('\0' == *second) {
1677 			mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1678 					ln, (int)(p - *bufp), NULL);
1679 			second = " ";
1680 			p--;
1681 		}
1682 
1683 		if (fsz > 1) {
1684 			roff_setstrn(&r->xmbtab, first,
1685 					fsz, second, ssz, 0);
1686 			continue;
1687 		}
1688 
1689 		if (NULL == r->xtab)
1690 			r->xtab = mandoc_calloc
1691 				(128, sizeof(struct roffstr));
1692 
1693 		free(r->xtab[(int)*first].p);
1694 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1695 		r->xtab[(int)*first].sz = ssz;
1696 	}
1697 
1698 	return(ROFF_IGN);
1699 }
1700 
1701 /* ARGSUSED */
1702 static enum rofferr
1703 roff_so(ROFF_ARGS)
1704 {
1705 	char *name;
1706 
1707 	mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1708 
1709 	/*
1710 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
1711 	 * opening anything that's not in our cwd or anything beneath
1712 	 * it.  Thus, explicitly disallow traversing up the file-system
1713 	 * or using absolute paths.
1714 	 */
1715 
1716 	name = *bufp + pos;
1717 	if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1718 		mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1719 		return(ROFF_ERR);
1720 	}
1721 
1722 	*offs = pos;
1723 	return(ROFF_SO);
1724 }
1725 
1726 /* ARGSUSED */
1727 static enum rofferr
1728 roff_userdef(ROFF_ARGS)
1729 {
1730 	const char	 *arg[9];
1731 	char		 *cp, *n1, *n2;
1732 	int		  i;
1733 
1734 	/*
1735 	 * Collect pointers to macro argument strings
1736 	 * and NUL-terminate them.
1737 	 */
1738 	cp = *bufp + pos;
1739 	for (i = 0; i < 9; i++)
1740 		arg[i] = '\0' == *cp ? "" :
1741 		    mandoc_getarg(r->parse, &cp, ln, &pos);
1742 
1743 	/*
1744 	 * Expand macro arguments.
1745 	 */
1746 	*szp = 0;
1747 	n1 = cp = mandoc_strdup(r->current_string);
1748 	while (NULL != (cp = strstr(cp, "\\$"))) {
1749 		i = cp[2] - '1';
1750 		if (0 > i || 8 < i) {
1751 			/* Not an argument invocation. */
1752 			cp += 2;
1753 			continue;
1754 		}
1755 
1756 		*szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1757 		n2 = mandoc_malloc(*szp);
1758 
1759 		strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1760 		strlcat(n2, arg[i], *szp);
1761 		strlcat(n2, cp + 3, *szp);
1762 
1763 		cp = n2 + (cp - n1);
1764 		free(n1);
1765 		n1 = n2;
1766 	}
1767 
1768 	/*
1769 	 * Replace the macro invocation
1770 	 * by the expanded macro.
1771 	 */
1772 	free(*bufp);
1773 	*bufp = n1;
1774 	if (0 == *szp)
1775 		*szp = strlen(*bufp) + 1;
1776 
1777 	return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1778 	   ROFF_REPARSE : ROFF_APPEND);
1779 }
1780 
1781 static char *
1782 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1783 {
1784 	char	 *name, *cp;
1785 
1786 	name = *cpp;
1787 	if ('\0' == *name)
1788 		return(name);
1789 
1790 	/* Read until end of name. */
1791 	for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1792 		if ('\\' != *cp)
1793 			continue;
1794 		cp++;
1795 		if ('\\' == *cp)
1796 			continue;
1797 		mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1798 		*cp = '\0';
1799 		name = cp;
1800 	}
1801 
1802 	/* Nil-terminate name. */
1803 	if ('\0' != *cp)
1804 		*(cp++) = '\0';
1805 
1806 	/* Read past spaces. */
1807 	while (' ' == *cp)
1808 		cp++;
1809 
1810 	*cpp = cp;
1811 	return(name);
1812 }
1813 
1814 /*
1815  * Store *string into the user-defined string called *name.
1816  * In multiline mode, append to an existing entry and append '\n';
1817  * else replace the existing entry, if there is one.
1818  * To clear an existing entry, call with (*r, *name, NULL, 0).
1819  */
1820 static void
1821 roff_setstr(struct roff *r, const char *name, const char *string,
1822 	int multiline)
1823 {
1824 
1825 	roff_setstrn(&r->strtab, name, strlen(name), string,
1826 			string ? strlen(string) : 0, multiline);
1827 }
1828 
1829 static void
1830 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1831 		const char *string, size_t stringsz, int multiline)
1832 {
1833 	struct roffkv	*n;
1834 	char		*c;
1835 	int		 i;
1836 	size_t		 oldch, newch;
1837 
1838 	/* Search for an existing string with the same name. */
1839 	n = *r;
1840 
1841 	while (n && strcmp(name, n->key.p))
1842 		n = n->next;
1843 
1844 	if (NULL == n) {
1845 		/* Create a new string table entry. */
1846 		n = mandoc_malloc(sizeof(struct roffkv));
1847 		n->key.p = mandoc_strndup(name, namesz);
1848 		n->key.sz = namesz;
1849 		n->val.p = NULL;
1850 		n->val.sz = 0;
1851 		n->next = *r;
1852 		*r = n;
1853 	} else if (0 == multiline) {
1854 		/* In multiline mode, append; else replace. */
1855 		free(n->val.p);
1856 		n->val.p = NULL;
1857 		n->val.sz = 0;
1858 	}
1859 
1860 	if (NULL == string)
1861 		return;
1862 
1863 	/*
1864 	 * One additional byte for the '\n' in multiline mode,
1865 	 * and one for the terminating '\0'.
1866 	 */
1867 	newch = stringsz + (multiline ? 2u : 1u);
1868 
1869 	if (NULL == n->val.p) {
1870 		n->val.p = mandoc_malloc(newch);
1871 		*n->val.p = '\0';
1872 		oldch = 0;
1873 	} else {
1874 		oldch = n->val.sz;
1875 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1876 	}
1877 
1878 	/* Skip existing content in the destination buffer. */
1879 	c = n->val.p + (int)oldch;
1880 
1881 	/* Append new content to the destination buffer. */
1882 	i = 0;
1883 	while (i < (int)stringsz) {
1884 		/*
1885 		 * Rudimentary roff copy mode:
1886 		 * Handle escaped backslashes.
1887 		 */
1888 		if ('\\' == string[i] && '\\' == string[i + 1])
1889 			i++;
1890 		*c++ = string[i++];
1891 	}
1892 
1893 	/* Append terminating bytes. */
1894 	if (multiline)
1895 		*c++ = '\n';
1896 
1897 	*c = '\0';
1898 	n->val.sz = (int)(c - n->val.p);
1899 }
1900 
1901 static const char *
1902 roff_getstrn(const struct roff *r, const char *name, size_t len)
1903 {
1904 	const struct roffkv *n;
1905 
1906 	for (n = r->strtab; n; n = n->next)
1907 		if (0 == strncmp(name, n->key.p, len) &&
1908 				'\0' == n->key.p[(int)len])
1909 			return(n->val.p);
1910 
1911 	return(NULL);
1912 }
1913 
1914 static void
1915 roff_freestr(struct roffkv *r)
1916 {
1917 	struct roffkv	 *n, *nn;
1918 
1919 	for (n = r; n; n = nn) {
1920 		free(n->key.p);
1921 		free(n->val.p);
1922 		nn = n->next;
1923 		free(n);
1924 	}
1925 }
1926 
1927 const struct tbl_span *
1928 roff_span(const struct roff *r)
1929 {
1930 
1931 	return(r->tbl ? tbl_span(r->tbl) : NULL);
1932 }
1933 
1934 const struct eqn *
1935 roff_eqn(const struct roff *r)
1936 {
1937 
1938 	return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1939 }
1940 
1941 /*
1942  * Duplicate an input string, making the appropriate character
1943  * conversations (as stipulated by `tr') along the way.
1944  * Returns a heap-allocated string with all the replacements made.
1945  */
1946 char *
1947 roff_strdup(const struct roff *r, const char *p)
1948 {
1949 	const struct roffkv *cp;
1950 	char		*res;
1951 	const char	*pp;
1952 	size_t		 ssz, sz;
1953 	enum mandoc_esc	 esc;
1954 
1955 	if (NULL == r->xmbtab && NULL == r->xtab)
1956 		return(mandoc_strdup(p));
1957 	else if ('\0' == *p)
1958 		return(mandoc_strdup(""));
1959 
1960 	/*
1961 	 * Step through each character looking for term matches
1962 	 * (remember that a `tr' can be invoked with an escape, which is
1963 	 * a glyph but the escape is multi-character).
1964 	 * We only do this if the character hash has been initialised
1965 	 * and the string is >0 length.
1966 	 */
1967 
1968 	res = NULL;
1969 	ssz = 0;
1970 
1971 	while ('\0' != *p) {
1972 		if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1973 			sz = r->xtab[(int)*p].sz;
1974 			res = mandoc_realloc(res, ssz + sz + 1);
1975 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1976 			ssz += sz;
1977 			p++;
1978 			continue;
1979 		} else if ('\\' != *p) {
1980 			res = mandoc_realloc(res, ssz + 2);
1981 			res[ssz++] = *p++;
1982 			continue;
1983 		}
1984 
1985 		/* Search for term matches. */
1986 		for (cp = r->xmbtab; cp; cp = cp->next)
1987 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
1988 				break;
1989 
1990 		if (NULL != cp) {
1991 			/*
1992 			 * A match has been found.
1993 			 * Append the match to the array and move
1994 			 * forward by its keysize.
1995 			 */
1996 			res = mandoc_realloc
1997 				(res, ssz + cp->val.sz + 1);
1998 			memcpy(res + ssz, cp->val.p, cp->val.sz);
1999 			ssz += cp->val.sz;
2000 			p += (int)cp->key.sz;
2001 			continue;
2002 		}
2003 
2004 		/*
2005 		 * Handle escapes carefully: we need to copy
2006 		 * over just the escape itself, or else we might
2007 		 * do replacements within the escape itself.
2008 		 * Make sure to pass along the bogus string.
2009 		 */
2010 		pp = p++;
2011 		esc = mandoc_escape(&p, NULL, NULL);
2012 		if (ESCAPE_ERROR == esc) {
2013 			sz = strlen(pp);
2014 			res = mandoc_realloc(res, ssz + sz + 1);
2015 			memcpy(res + ssz, pp, sz);
2016 			break;
2017 		}
2018 		/*
2019 		 * We bail out on bad escapes.
2020 		 * No need to warn: we already did so when
2021 		 * roff_res() was called.
2022 		 */
2023 		sz = (int)(p - pp);
2024 		res = mandoc_realloc(res, ssz + sz + 1);
2025 		memcpy(res + ssz, pp, sz);
2026 		ssz += sz;
2027 	}
2028 
2029 	res[(int)ssz] = '\0';
2030 	return(res);
2031 }
2032 
2033 /*
2034  * Find out whether a line is a macro line or not.
2035  * If it is, adjust the current position and return one; if it isn't,
2036  * return zero and don't change the current position.
2037  * If the control character has been set with `.cc', then let that grain
2038  * precedence.
2039  * This is slighly contrary to groff, where using the non-breaking
2040  * control character when `cc' has been invoked will cause the
2041  * non-breaking macro contents to be printed verbatim.
2042  */
2043 int
2044 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
2045 {
2046 	int		pos;
2047 
2048 	pos = *ppos;
2049 
2050 	if (0 != r->control && cp[pos] == r->control)
2051 		pos++;
2052 	else if (0 != r->control)
2053 		return(0);
2054 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
2055 		pos += 2;
2056 	else if ('.' == cp[pos] || '\'' == cp[pos])
2057 		pos++;
2058 	else
2059 		return(0);
2060 
2061 	while (' ' == cp[pos] || '\t' == cp[pos])
2062 		pos++;
2063 
2064 	*ppos = pos;
2065 	return(1);
2066 }
2067