xref: /dragonfly/contrib/mdocml/roff.c (revision 0dace59e)
1 /*	$Id: roff.c,v 1.178 2013/07/13 12:52:07 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 
28 #include "mandoc.h"
29 #include "libroff.h"
30 #include "libmandoc.h"
31 
32 /* Maximum number of nested if-else conditionals. */
33 #define	RSTACK_MAX	128
34 
35 /* Maximum number of string expansions per line, to break infinite loops. */
36 #define	EXPAND_LIMIT	1000
37 
38 enum	rofft {
39 	ROFF_ad,
40 	ROFF_am,
41 	ROFF_ami,
42 	ROFF_am1,
43 	ROFF_cc,
44 	ROFF_de,
45 	ROFF_dei,
46 	ROFF_de1,
47 	ROFF_ds,
48 	ROFF_el,
49 	ROFF_hy,
50 	ROFF_ie,
51 	ROFF_if,
52 	ROFF_ig,
53 	ROFF_it,
54 	ROFF_ne,
55 	ROFF_nh,
56 	ROFF_nr,
57 	ROFF_ns,
58 	ROFF_ps,
59 	ROFF_rm,
60 	ROFF_so,
61 	ROFF_ta,
62 	ROFF_tr,
63 	ROFF_Dd,
64 	ROFF_TH,
65 	ROFF_TS,
66 	ROFF_TE,
67 	ROFF_T_,
68 	ROFF_EQ,
69 	ROFF_EN,
70 	ROFF_cblock,
71 	ROFF_ccond,
72 	ROFF_USERDEF,
73 	ROFF_MAX
74 };
75 
76 enum	roffrule {
77 	ROFFRULE_ALLOW,
78 	ROFFRULE_DENY
79 };
80 
81 /*
82  * A single register entity.  If "set" is zero, the value of the
83  * register should be the default one, which is per-register.
84  * Registers are assumed to be unsigned ints for now.
85  */
86 struct	reg {
87 	int		 set; /* whether set or not */
88 	unsigned int	 u; /* unsigned integer */
89 };
90 
91 /*
92  * An incredibly-simple string buffer.
93  */
94 struct	roffstr {
95 	char		*p; /* nil-terminated buffer */
96 	size_t		 sz; /* saved strlen(p) */
97 };
98 
99 /*
100  * A key-value roffstr pair as part of a singly-linked list.
101  */
102 struct	roffkv {
103 	struct roffstr	 key;
104 	struct roffstr	 val;
105 	struct roffkv	*next; /* next in list */
106 };
107 
108 struct	roff {
109 	enum mparset	 parsetype; /* requested parse type */
110 	struct mparse	*parse; /* parse point */
111 	struct roffnode	*last; /* leaf of stack */
112 	enum roffrule	 rstack[RSTACK_MAX]; /* stack of !`ie' rules */
113 	char		 control; /* control character */
114 	int		 rstackpos; /* position in rstack */
115 	struct reg	 regs[REG__MAX];
116 	struct roffkv	*strtab; /* user-defined strings & macros */
117 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
118 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
119 	const char	*current_string; /* value of last called user macro */
120 	struct tbl_node	*first_tbl; /* first table parsed */
121 	struct tbl_node	*last_tbl; /* last table parsed */
122 	struct tbl_node	*tbl; /* current table being parsed */
123 	struct eqn_node	*last_eqn; /* last equation parsed */
124 	struct eqn_node	*first_eqn; /* first equation parsed */
125 	struct eqn_node	*eqn; /* current equation being parsed */
126 };
127 
128 struct	roffnode {
129 	enum rofft	 tok; /* type of node */
130 	struct roffnode	*parent; /* up one in stack */
131 	int		 line; /* parse line */
132 	int		 col; /* parse col */
133 	char		*name; /* node name, e.g. macro name */
134 	char		*end; /* end-rules: custom token */
135 	int		 endspan; /* end-rules: next-line or infty */
136 	enum roffrule	 rule; /* current evaluation rule */
137 };
138 
139 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
140 			 enum rofft tok, /* tok of macro */ \
141 		 	 char **bufp, /* input buffer */ \
142 			 size_t *szp, /* size of input buffer */ \
143 			 int ln, /* parse line */ \
144 			 int ppos, /* original pos in buffer */ \
145 			 int pos, /* current pos in buffer */ \
146 			 int *offs /* reset offset of buffer data */
147 
148 typedef	enum rofferr (*roffproc)(ROFF_ARGS);
149 
150 struct	roffmac {
151 	const char	*name; /* macro name */
152 	roffproc	 proc; /* process new macro */
153 	roffproc	 text; /* process as child text of macro */
154 	roffproc	 sub; /* process as child of macro */
155 	int		 flags;
156 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
157 	struct roffmac	*next;
158 };
159 
160 struct	predef {
161 	const char	*name; /* predefined input name */
162 	const char	*str; /* replacement symbol */
163 };
164 
165 #define	PREDEF(__name, __str) \
166 	{ (__name), (__str) },
167 
168 static	enum rofft	 roffhash_find(const char *, size_t);
169 static	void		 roffhash_init(void);
170 static	void		 roffnode_cleanscope(struct roff *);
171 static	void		 roffnode_pop(struct roff *);
172 static	void		 roffnode_push(struct roff *, enum rofft,
173 				const char *, int, int);
174 static	enum rofferr	 roff_block(ROFF_ARGS);
175 static	enum rofferr	 roff_block_text(ROFF_ARGS);
176 static	enum rofferr	 roff_block_sub(ROFF_ARGS);
177 static	enum rofferr	 roff_cblock(ROFF_ARGS);
178 static	enum rofferr	 roff_cc(ROFF_ARGS);
179 static	enum rofferr	 roff_ccond(ROFF_ARGS);
180 static	enum rofferr	 roff_cond(ROFF_ARGS);
181 static	enum rofferr	 roff_cond_text(ROFF_ARGS);
182 static	enum rofferr	 roff_cond_sub(ROFF_ARGS);
183 static	enum rofferr	 roff_ds(ROFF_ARGS);
184 static	enum roffrule	 roff_evalcond(const char *, int *);
185 static	void		 roff_free1(struct roff *);
186 static	void		 roff_freestr(struct roffkv *);
187 static	char		*roff_getname(struct roff *, char **, int, int);
188 static	const char	*roff_getstrn(const struct roff *,
189 				const char *, size_t);
190 static	enum rofferr	 roff_it(ROFF_ARGS);
191 static	enum rofferr	 roff_line_ignore(ROFF_ARGS);
192 static	enum rofferr	 roff_nr(ROFF_ARGS);
193 static	void		 roff_openeqn(struct roff *, const char *,
194 				int, int, const char *);
195 static	enum rofft	 roff_parse(struct roff *, const char *, int *);
196 static	enum rofferr	 roff_parsetext(char **, size_t *, int, int *);
197 static	enum rofferr	 roff_res(struct roff *,
198 				char **, size_t *, int, int);
199 static	enum rofferr	 roff_rm(ROFF_ARGS);
200 static	void		 roff_setstr(struct roff *,
201 				const char *, const char *, int);
202 static	void		 roff_setstrn(struct roffkv **, const char *,
203 				size_t, const char *, size_t, int);
204 static	enum rofferr	 roff_so(ROFF_ARGS);
205 static	enum rofferr	 roff_tr(ROFF_ARGS);
206 static	enum rofferr	 roff_Dd(ROFF_ARGS);
207 static	enum rofferr	 roff_TH(ROFF_ARGS);
208 static	enum rofferr	 roff_TE(ROFF_ARGS);
209 static	enum rofferr	 roff_TS(ROFF_ARGS);
210 static	enum rofferr	 roff_EQ(ROFF_ARGS);
211 static	enum rofferr	 roff_EN(ROFF_ARGS);
212 static	enum rofferr	 roff_T_(ROFF_ARGS);
213 static	enum rofferr	 roff_userdef(ROFF_ARGS);
214 
215 /* See roffhash_find() */
216 
217 #define	ASCII_HI	 126
218 #define	ASCII_LO	 33
219 #define	HASHWIDTH	(ASCII_HI - ASCII_LO + 1)
220 
221 static	struct roffmac	*hash[HASHWIDTH];
222 
223 static	struct roffmac	 roffs[ROFF_MAX] = {
224 	{ "ad", roff_line_ignore, NULL, NULL, 0, NULL },
225 	{ "am", roff_block, roff_block_text, roff_block_sub, 0, NULL },
226 	{ "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL },
227 	{ "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
228 	{ "cc", roff_cc, NULL, NULL, 0, NULL },
229 	{ "de", roff_block, roff_block_text, roff_block_sub, 0, NULL },
230 	{ "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL },
231 	{ "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL },
232 	{ "ds", roff_ds, NULL, NULL, 0, NULL },
233 	{ "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
234 	{ "hy", roff_line_ignore, NULL, NULL, 0, NULL },
235 	{ "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
236 	{ "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL },
237 	{ "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL },
238 	{ "it", roff_it, NULL, NULL, 0, NULL },
239 	{ "ne", roff_line_ignore, NULL, NULL, 0, NULL },
240 	{ "nh", roff_line_ignore, NULL, NULL, 0, NULL },
241 	{ "nr", roff_nr, NULL, NULL, 0, NULL },
242 	{ "ns", roff_line_ignore, NULL, NULL, 0, NULL },
243 	{ "ps", roff_line_ignore, NULL, NULL, 0, NULL },
244 	{ "rm", roff_rm, NULL, NULL, 0, NULL },
245 	{ "so", roff_so, NULL, NULL, 0, NULL },
246 	{ "ta", roff_line_ignore, NULL, NULL, 0, NULL },
247 	{ "tr", roff_tr, NULL, NULL, 0, NULL },
248 	{ "Dd", roff_Dd, NULL, NULL, 0, NULL },
249 	{ "TH", roff_TH, NULL, NULL, 0, NULL },
250 	{ "TS", roff_TS, NULL, NULL, 0, NULL },
251 	{ "TE", roff_TE, NULL, NULL, 0, NULL },
252 	{ "T&", roff_T_, NULL, NULL, 0, NULL },
253 	{ "EQ", roff_EQ, NULL, NULL, 0, NULL },
254 	{ "EN", roff_EN, NULL, NULL, 0, NULL },
255 	{ ".", roff_cblock, NULL, NULL, 0, NULL },
256 	{ "\\}", roff_ccond, NULL, NULL, 0, NULL },
257 	{ NULL, roff_userdef, NULL, NULL, 0, NULL },
258 };
259 
260 const	char *const __mdoc_reserved[] = {
261 	"Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
262 	"Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq",
263 	"Brc", "Bro", "Brq", "Bsx", "Bt", "Bx",
264 	"Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq",
265 	"Ds", "Dt", "Dv", "Dx", "D1",
266 	"Ec", "Ed", "Ef", "Ek", "El", "Em", "em",
267 	"En", "Eo", "Eq", "Er", "Es", "Ev", "Ex",
268 	"Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx",
269 	"Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", "LP",
270 	"Me", "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx",
271 	"Oc", "Oo", "Op", "Os", "Ot", "Ox",
272 	"Pa", "Pc", "Pf", "Po", "Pp", "PP", "pp", "Pq",
273 	"Qc", "Ql", "Qo", "Qq", "Or", "Rd", "Re", "Rs", "Rv",
274 	"Sc", "Sf", "Sh", "SH", "Sm", "So", "Sq",
275 	"Ss", "St", "Sx", "Sy",
276 	"Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr",
277 	"%A", "%B", "%D", "%I", "%J", "%N", "%O",
278 	"%P", "%Q", "%R", "%T", "%U", "%V",
279 	NULL
280 };
281 
282 const	char *const __man_reserved[] = {
283 	"AT", "B", "BI", "BR", "BT", "DE", "DS", "DT",
284 	"EE", "EN", "EQ", "EX", "HF", "HP", "I", "IB", "IP", "IR",
285 	"LP", "ME", "MT", "OP", "P", "PD", "PP", "PT",
286 	"R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", "SY",
287 	"TE", "TH", "TP", "TQ", "TS", "T&", "UC", "UE", "UR", "YS",
288 	NULL
289 };
290 
291 /* Array of injected predefined strings. */
292 #define	PREDEFS_MAX	 38
293 static	const struct predef predefs[PREDEFS_MAX] = {
294 #include "predefs.in"
295 };
296 
297 /* See roffhash_find() */
298 #define	ROFF_HASH(p)	(p[0] - ASCII_LO)
299 
300 static	int	 roffit_lines;  /* number of lines to delay */
301 static	char	*roffit_macro;  /* nil-terminated macro line */
302 
303 static void
304 roffhash_init(void)
305 {
306 	struct roffmac	 *n;
307 	int		  buc, i;
308 
309 	for (i = 0; i < (int)ROFF_USERDEF; i++) {
310 		assert(roffs[i].name[0] >= ASCII_LO);
311 		assert(roffs[i].name[0] <= ASCII_HI);
312 
313 		buc = ROFF_HASH(roffs[i].name);
314 
315 		if (NULL != (n = hash[buc])) {
316 			for ( ; n->next; n = n->next)
317 				/* Do nothing. */ ;
318 			n->next = &roffs[i];
319 		} else
320 			hash[buc] = &roffs[i];
321 	}
322 }
323 
324 /*
325  * Look up a roff token by its name.  Returns ROFF_MAX if no macro by
326  * the nil-terminated string name could be found.
327  */
328 static enum rofft
329 roffhash_find(const char *p, size_t s)
330 {
331 	int		 buc;
332 	struct roffmac	*n;
333 
334 	/*
335 	 * libroff has an extremely simple hashtable, for the time
336 	 * being, which simply keys on the first character, which must
337 	 * be printable, then walks a chain.  It works well enough until
338 	 * optimised.
339 	 */
340 
341 	if (p[0] < ASCII_LO || p[0] > ASCII_HI)
342 		return(ROFF_MAX);
343 
344 	buc = ROFF_HASH(p);
345 
346 	if (NULL == (n = hash[buc]))
347 		return(ROFF_MAX);
348 	for ( ; n; n = n->next)
349 		if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s])
350 			return((enum rofft)(n - roffs));
351 
352 	return(ROFF_MAX);
353 }
354 
355 
356 /*
357  * Pop the current node off of the stack of roff instructions currently
358  * pending.
359  */
360 static void
361 roffnode_pop(struct roff *r)
362 {
363 	struct roffnode	*p;
364 
365 	assert(r->last);
366 	p = r->last;
367 
368 	r->last = r->last->parent;
369 	free(p->name);
370 	free(p->end);
371 	free(p);
372 }
373 
374 
375 /*
376  * Push a roff node onto the instruction stack.  This must later be
377  * removed with roffnode_pop().
378  */
379 static void
380 roffnode_push(struct roff *r, enum rofft tok, const char *name,
381 		int line, int col)
382 {
383 	struct roffnode	*p;
384 
385 	p = mandoc_calloc(1, sizeof(struct roffnode));
386 	p->tok = tok;
387 	if (name)
388 		p->name = mandoc_strdup(name);
389 	p->parent = r->last;
390 	p->line = line;
391 	p->col = col;
392 	p->rule = p->parent ? p->parent->rule : ROFFRULE_DENY;
393 
394 	r->last = p;
395 }
396 
397 
398 static void
399 roff_free1(struct roff *r)
400 {
401 	struct tbl_node	*tbl;
402 	struct eqn_node	*e;
403 	int		 i;
404 
405 	while (NULL != (tbl = r->first_tbl)) {
406 		r->first_tbl = tbl->next;
407 		tbl_free(tbl);
408 	}
409 
410 	r->first_tbl = r->last_tbl = r->tbl = NULL;
411 
412 	while (NULL != (e = r->first_eqn)) {
413 		r->first_eqn = e->next;
414 		eqn_free(e);
415 	}
416 
417 	r->first_eqn = r->last_eqn = r->eqn = NULL;
418 
419 	while (r->last)
420 		roffnode_pop(r);
421 
422 	roff_freestr(r->strtab);
423 	roff_freestr(r->xmbtab);
424 
425 	r->strtab = r->xmbtab = NULL;
426 
427 	if (r->xtab)
428 		for (i = 0; i < 128; i++)
429 			free(r->xtab[i].p);
430 
431 	free(r->xtab);
432 	r->xtab = NULL;
433 }
434 
435 void
436 roff_reset(struct roff *r)
437 {
438 	int		 i;
439 
440 	roff_free1(r);
441 
442 	r->control = 0;
443 	memset(&r->regs, 0, sizeof(struct reg) * REG__MAX);
444 
445 	for (i = 0; i < PREDEFS_MAX; i++)
446 		roff_setstr(r, predefs[i].name, predefs[i].str, 0);
447 }
448 
449 
450 void
451 roff_free(struct roff *r)
452 {
453 
454 	roff_free1(r);
455 	free(r);
456 }
457 
458 
459 struct roff *
460 roff_alloc(enum mparset type, struct mparse *parse)
461 {
462 	struct roff	*r;
463 	int		 i;
464 
465 	r = mandoc_calloc(1, sizeof(struct roff));
466 	r->parsetype = type;
467 	r->parse = parse;
468 	r->rstackpos = -1;
469 
470 	roffhash_init();
471 
472 	for (i = 0; i < PREDEFS_MAX; i++)
473 		roff_setstr(r, predefs[i].name, predefs[i].str, 0);
474 
475 	return(r);
476 }
477 
478 /*
479  * Pre-filter each and every line for reserved words (one beginning with
480  * `\*', e.g., `\*(ab').  These must be handled before the actual line
481  * is processed.
482  * This also checks the syntax of regular escapes.
483  */
484 static enum rofferr
485 roff_res(struct roff *r, char **bufp, size_t *szp, int ln, int pos)
486 {
487 	enum mandoc_esc	 esc;
488 	const char	*stesc;	/* start of an escape sequence ('\\') */
489 	const char	*stnam;	/* start of the name, after "[(*" */
490 	const char	*cp;	/* end of the name, e.g. before ']' */
491 	const char	*res;	/* the string to be substituted */
492 	int		 i, maxl, expand_count;
493 	size_t		 nsz;
494 	char		*n;
495 
496 	expand_count = 0;
497 
498 again:
499 	cp = *bufp + pos;
500 	while (NULL != (cp = strchr(cp, '\\'))) {
501 		stesc = cp++;
502 
503 		/*
504 		 * The second character must be an asterisk.
505 		 * If it isn't, skip it anyway:  It is escaped,
506 		 * so it can't start another escape sequence.
507 		 */
508 
509 		if ('\0' == *cp)
510 			return(ROFF_CONT);
511 
512 		if ('*' != *cp) {
513 			res = cp;
514 			esc = mandoc_escape(&cp, NULL, NULL);
515 			if (ESCAPE_ERROR != esc)
516 				continue;
517 			cp = res;
518 			mandoc_msg
519 				(MANDOCERR_BADESCAPE, r->parse,
520 				 ln, (int)(stesc - *bufp), NULL);
521 			return(ROFF_CONT);
522 		}
523 
524 		cp++;
525 
526 		/*
527 		 * The third character decides the length
528 		 * of the name of the string.
529 		 * Save a pointer to the name.
530 		 */
531 
532 		switch (*cp) {
533 		case ('\0'):
534 			return(ROFF_CONT);
535 		case ('('):
536 			cp++;
537 			maxl = 2;
538 			break;
539 		case ('['):
540 			cp++;
541 			maxl = 0;
542 			break;
543 		default:
544 			maxl = 1;
545 			break;
546 		}
547 		stnam = cp;
548 
549 		/* Advance to the end of the name. */
550 
551 		for (i = 0; 0 == maxl || i < maxl; i++, cp++) {
552 			if ('\0' == *cp) {
553 				mandoc_msg
554 					(MANDOCERR_BADESCAPE,
555 					 r->parse, ln,
556 					 (int)(stesc - *bufp), NULL);
557 				return(ROFF_CONT);
558 			}
559 			if (0 == maxl && ']' == *cp)
560 				break;
561 		}
562 
563 		/*
564 		 * Retrieve the replacement string; if it is
565 		 * undefined, resume searching for escapes.
566 		 */
567 
568 		res = roff_getstrn(r, stnam, (size_t)i);
569 
570 		if (NULL == res) {
571 			mandoc_msg
572 				(MANDOCERR_BADESCAPE, r->parse,
573 				 ln, (int)(stesc - *bufp), NULL);
574 			res = "";
575 		}
576 
577 		/* Replace the escape sequence by the string. */
578 
579 		pos = stesc - *bufp;
580 
581 		nsz = *szp + strlen(res) + 1;
582 		n = mandoc_malloc(nsz);
583 
584 		strlcpy(n, *bufp, (size_t)(stesc - *bufp + 1));
585 		strlcat(n, res, nsz);
586 		strlcat(n, cp + (maxl ? 0 : 1), nsz);
587 
588 		free(*bufp);
589 
590 		*bufp = n;
591 		*szp = nsz;
592 
593 		if (EXPAND_LIMIT >= ++expand_count)
594 			goto again;
595 
596 		/* Just leave the string unexpanded. */
597 		mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, ln, pos, NULL);
598 		return(ROFF_IGN);
599 	}
600 	return(ROFF_CONT);
601 }
602 
603 /*
604  * Process text streams:
605  * Convert all breakable hyphens into ASCII_HYPH.
606  * Decrement and spring input line trap.
607  */
608 static enum rofferr
609 roff_parsetext(char **bufp, size_t *szp, int pos, int *offs)
610 {
611 	size_t		 sz;
612 	const char	*start;
613 	char		*p;
614 	int		 isz;
615 	enum mandoc_esc	 esc;
616 
617 	start = p = *bufp + pos;
618 
619 	while ('\0' != *p) {
620 		sz = strcspn(p, "-\\");
621 		p += sz;
622 
623 		if ('\0' == *p)
624 			break;
625 
626 		if ('\\' == *p) {
627 			/* Skip over escapes. */
628 			p++;
629 			esc = mandoc_escape
630 				((const char **)&p, NULL, NULL);
631 			if (ESCAPE_ERROR == esc)
632 				break;
633 			continue;
634 		} else if (p == start) {
635 			p++;
636 			continue;
637 		}
638 
639 		if (isalpha((unsigned char)p[-1]) &&
640 		    isalpha((unsigned char)p[1]))
641 			*p = ASCII_HYPH;
642 		p++;
643 	}
644 
645 	/* Spring the input line trap. */
646 	if (1 == roffit_lines) {
647 		isz = asprintf(&p, "%s\n.%s", *bufp, roffit_macro);
648 		if (-1 == isz) {
649 			perror(NULL);
650 			exit((int)MANDOCLEVEL_SYSERR);
651 		}
652 		free(*bufp);
653 		*bufp = p;
654 		*szp = isz + 1;
655 		*offs = 0;
656 		free(roffit_macro);
657 		roffit_lines = 0;
658 		return(ROFF_REPARSE);
659 	} else if (1 < roffit_lines)
660 		--roffit_lines;
661 	return(ROFF_CONT);
662 }
663 
664 enum rofferr
665 roff_parseln(struct roff *r, int ln, char **bufp,
666 		size_t *szp, int pos, int *offs)
667 {
668 	enum rofft	 t;
669 	enum rofferr	 e;
670 	int		 ppos, ctl;
671 
672 	/*
673 	 * Run the reserved-word filter only if we have some reserved
674 	 * words to fill in.
675 	 */
676 
677 	e = roff_res(r, bufp, szp, ln, pos);
678 	if (ROFF_IGN == e)
679 		return(e);
680 	assert(ROFF_CONT == e);
681 
682 	ppos = pos;
683 	ctl = roff_getcontrol(r, *bufp, &pos);
684 
685 	/*
686 	 * First, if a scope is open and we're not a macro, pass the
687 	 * text through the macro's filter.  If a scope isn't open and
688 	 * we're not a macro, just let it through.
689 	 * Finally, if there's an equation scope open, divert it into it
690 	 * no matter our state.
691 	 */
692 
693 	if (r->last && ! ctl) {
694 		t = r->last->tok;
695 		assert(roffs[t].text);
696 		e = (*roffs[t].text)
697 			(r, t, bufp, szp, ln, pos, pos, offs);
698 		assert(ROFF_IGN == e || ROFF_CONT == e);
699 		if (ROFF_CONT != e)
700 			return(e);
701 		if (r->eqn)
702 			return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
703 		if (r->tbl)
704 			return(tbl_read(r->tbl, ln, *bufp, pos));
705 		return(roff_parsetext(bufp, szp, pos, offs));
706 	} else if ( ! ctl) {
707 		if (r->eqn)
708 			return(eqn_read(&r->eqn, ln, *bufp, pos, offs));
709 		if (r->tbl)
710 			return(tbl_read(r->tbl, ln, *bufp, pos));
711 		return(roff_parsetext(bufp, szp, pos, offs));
712 	} else if (r->eqn)
713 		return(eqn_read(&r->eqn, ln, *bufp, ppos, offs));
714 
715 	/*
716 	 * If a scope is open, go to the child handler for that macro,
717 	 * as it may want to preprocess before doing anything with it.
718 	 * Don't do so if an equation is open.
719 	 */
720 
721 	if (r->last) {
722 		t = r->last->tok;
723 		assert(roffs[t].sub);
724 		return((*roffs[t].sub)
725 				(r, t, bufp, szp,
726 				 ln, ppos, pos, offs));
727 	}
728 
729 	/*
730 	 * Lastly, as we've no scope open, try to look up and execute
731 	 * the new macro.  If no macro is found, simply return and let
732 	 * the compilers handle it.
733 	 */
734 
735 	if (ROFF_MAX == (t = roff_parse(r, *bufp, &pos)))
736 		return(ROFF_CONT);
737 
738 	assert(roffs[t].proc);
739 	return((*roffs[t].proc)
740 			(r, t, bufp, szp,
741 			 ln, ppos, pos, offs));
742 }
743 
744 
745 void
746 roff_endparse(struct roff *r)
747 {
748 
749 	if (r->last)
750 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
751 				r->last->line, r->last->col, NULL);
752 
753 	if (r->eqn) {
754 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
755 				r->eqn->eqn.ln, r->eqn->eqn.pos, NULL);
756 		eqn_end(&r->eqn);
757 	}
758 
759 	if (r->tbl) {
760 		mandoc_msg(MANDOCERR_SCOPEEXIT, r->parse,
761 				r->tbl->line, r->tbl->pos, NULL);
762 		tbl_end(&r->tbl);
763 	}
764 }
765 
766 /*
767  * Parse a roff node's type from the input buffer.  This must be in the
768  * form of ".foo xxx" in the usual way.
769  */
770 static enum rofft
771 roff_parse(struct roff *r, const char *buf, int *pos)
772 {
773 	const char	*mac;
774 	size_t		 maclen;
775 	enum rofft	 t;
776 
777 	if ('\0' == buf[*pos] || '"' == buf[*pos] ||
778 			'\t' == buf[*pos] || ' ' == buf[*pos])
779 		return(ROFF_MAX);
780 
781 	/*
782 	 * We stop the macro parse at an escape, tab, space, or nil.
783 	 * However, `\}' is also a valid macro, so make sure we don't
784 	 * clobber it by seeing the `\' as the end of token.
785 	 */
786 
787 	mac = buf + *pos;
788 	maclen = strcspn(mac + 1, " \\\t\0") + 1;
789 
790 	t = (r->current_string = roff_getstrn(r, mac, maclen))
791 	    ? ROFF_USERDEF : roffhash_find(mac, maclen);
792 
793 	*pos += (int)maclen;
794 
795 	while (buf[*pos] && ' ' == buf[*pos])
796 		(*pos)++;
797 
798 	return(t);
799 }
800 
801 /* ARGSUSED */
802 static enum rofferr
803 roff_cblock(ROFF_ARGS)
804 {
805 
806 	/*
807 	 * A block-close `..' should only be invoked as a child of an
808 	 * ignore macro, otherwise raise a warning and just ignore it.
809 	 */
810 
811 	if (NULL == r->last) {
812 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
813 		return(ROFF_IGN);
814 	}
815 
816 	switch (r->last->tok) {
817 	case (ROFF_am):
818 		/* FALLTHROUGH */
819 	case (ROFF_ami):
820 		/* FALLTHROUGH */
821 	case (ROFF_am1):
822 		/* FALLTHROUGH */
823 	case (ROFF_de):
824 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
825 		/* FALLTHROUGH */
826 	case (ROFF_dei):
827 		/* FALLTHROUGH */
828 	case (ROFF_ig):
829 		break;
830 	default:
831 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
832 		return(ROFF_IGN);
833 	}
834 
835 	if ((*bufp)[pos])
836 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
837 
838 	roffnode_pop(r);
839 	roffnode_cleanscope(r);
840 	return(ROFF_IGN);
841 
842 }
843 
844 
845 static void
846 roffnode_cleanscope(struct roff *r)
847 {
848 
849 	while (r->last) {
850 		if (--r->last->endspan != 0)
851 			break;
852 		roffnode_pop(r);
853 	}
854 }
855 
856 
857 /* ARGSUSED */
858 static enum rofferr
859 roff_ccond(ROFF_ARGS)
860 {
861 
862 	if (NULL == r->last) {
863 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
864 		return(ROFF_IGN);
865 	}
866 
867 	switch (r->last->tok) {
868 	case (ROFF_el):
869 		/* FALLTHROUGH */
870 	case (ROFF_ie):
871 		/* FALLTHROUGH */
872 	case (ROFF_if):
873 		break;
874 	default:
875 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
876 		return(ROFF_IGN);
877 	}
878 
879 	if (r->last->endspan > -1) {
880 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
881 		return(ROFF_IGN);
882 	}
883 
884 	if ((*bufp)[pos])
885 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
886 
887 	roffnode_pop(r);
888 	roffnode_cleanscope(r);
889 	return(ROFF_IGN);
890 }
891 
892 
893 /* ARGSUSED */
894 static enum rofferr
895 roff_block(ROFF_ARGS)
896 {
897 	int		sv;
898 	size_t		sz;
899 	char		*name;
900 
901 	name = NULL;
902 
903 	if (ROFF_ig != tok) {
904 		if ('\0' == (*bufp)[pos]) {
905 			mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
906 			return(ROFF_IGN);
907 		}
908 
909 		/*
910 		 * Re-write `de1', since we don't really care about
911 		 * groff's strange compatibility mode, into `de'.
912 		 */
913 
914 		if (ROFF_de1 == tok)
915 			tok = ROFF_de;
916 		if (ROFF_de == tok)
917 			name = *bufp + pos;
918 		else
919 			mandoc_msg(MANDOCERR_REQUEST, r->parse, ln, ppos,
920 			    roffs[tok].name);
921 
922 		while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
923 			pos++;
924 
925 		while (isspace((unsigned char)(*bufp)[pos]))
926 			(*bufp)[pos++] = '\0';
927 	}
928 
929 	roffnode_push(r, tok, name, ln, ppos);
930 
931 	/*
932 	 * At the beginning of a `de' macro, clear the existing string
933 	 * with the same name, if there is one.  New content will be
934 	 * added from roff_block_text() in multiline mode.
935 	 */
936 
937 	if (ROFF_de == tok)
938 		roff_setstr(r, name, "", 0);
939 
940 	if ('\0' == (*bufp)[pos])
941 		return(ROFF_IGN);
942 
943 	/* If present, process the custom end-of-line marker. */
944 
945 	sv = pos;
946 	while ((*bufp)[pos] && ! isspace((unsigned char)(*bufp)[pos]))
947 		pos++;
948 
949 	/*
950 	 * Note: groff does NOT like escape characters in the input.
951 	 * Instead of detecting this, we're just going to let it fly and
952 	 * to hell with it.
953 	 */
954 
955 	assert(pos > sv);
956 	sz = (size_t)(pos - sv);
957 
958 	if (1 == sz && '.' == (*bufp)[sv])
959 		return(ROFF_IGN);
960 
961 	r->last->end = mandoc_malloc(sz + 1);
962 
963 	memcpy(r->last->end, *bufp + sv, sz);
964 	r->last->end[(int)sz] = '\0';
965 
966 	if ((*bufp)[pos])
967 		mandoc_msg(MANDOCERR_ARGSLOST, r->parse, ln, pos, NULL);
968 
969 	return(ROFF_IGN);
970 }
971 
972 
973 /* ARGSUSED */
974 static enum rofferr
975 roff_block_sub(ROFF_ARGS)
976 {
977 	enum rofft	t;
978 	int		i, j;
979 
980 	/*
981 	 * First check whether a custom macro exists at this level.  If
982 	 * it does, then check against it.  This is some of groff's
983 	 * stranger behaviours.  If we encountered a custom end-scope
984 	 * tag and that tag also happens to be a "real" macro, then we
985 	 * need to try interpreting it again as a real macro.  If it's
986 	 * not, then return ignore.  Else continue.
987 	 */
988 
989 	if (r->last->end) {
990 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
991 			if ((*bufp)[i] != r->last->end[j])
992 				break;
993 
994 		if ('\0' == r->last->end[j] &&
995 				('\0' == (*bufp)[i] ||
996 				 ' ' == (*bufp)[i] ||
997 				 '\t' == (*bufp)[i])) {
998 			roffnode_pop(r);
999 			roffnode_cleanscope(r);
1000 
1001 			while (' ' == (*bufp)[i] || '\t' == (*bufp)[i])
1002 				i++;
1003 
1004 			pos = i;
1005 			if (ROFF_MAX != roff_parse(r, *bufp, &pos))
1006 				return(ROFF_RERUN);
1007 			return(ROFF_IGN);
1008 		}
1009 	}
1010 
1011 	/*
1012 	 * If we have no custom end-query or lookup failed, then try
1013 	 * pulling it out of the hashtable.
1014 	 */
1015 
1016 	t = roff_parse(r, *bufp, &pos);
1017 
1018 	/*
1019 	 * Macros other than block-end are only significant
1020 	 * in `de' blocks; elsewhere, simply throw them away.
1021 	 */
1022 	if (ROFF_cblock != t) {
1023 		if (ROFF_de == tok)
1024 			roff_setstr(r, r->last->name, *bufp + ppos, 1);
1025 		return(ROFF_IGN);
1026 	}
1027 
1028 	assert(roffs[t].proc);
1029 	return((*roffs[t].proc)(r, t, bufp, szp,
1030 				ln, ppos, pos, offs));
1031 }
1032 
1033 
1034 /* ARGSUSED */
1035 static enum rofferr
1036 roff_block_text(ROFF_ARGS)
1037 {
1038 
1039 	if (ROFF_de == tok)
1040 		roff_setstr(r, r->last->name, *bufp + pos, 1);
1041 
1042 	return(ROFF_IGN);
1043 }
1044 
1045 
1046 /* ARGSUSED */
1047 static enum rofferr
1048 roff_cond_sub(ROFF_ARGS)
1049 {
1050 	enum rofft	 t;
1051 	enum roffrule	 rr;
1052 	char		*ep;
1053 
1054 	rr = r->last->rule;
1055 	roffnode_cleanscope(r);
1056 	t = roff_parse(r, *bufp, &pos);
1057 
1058 	/*
1059 	 * Fully handle known macros when they are structurally
1060 	 * required or when the conditional evaluated to true.
1061 	 */
1062 
1063 	if ((ROFF_MAX != t) &&
1064 	    (ROFF_ccond == t || ROFFRULE_ALLOW == rr ||
1065 	     ROFFMAC_STRUCT & roffs[t].flags)) {
1066 		assert(roffs[t].proc);
1067 		return((*roffs[t].proc)(r, t, bufp, szp,
1068 					ln, ppos, pos, offs));
1069 	}
1070 
1071 	/* Always check for the closing delimiter `\}'. */
1072 
1073 	ep = &(*bufp)[pos];
1074 	while (NULL != (ep = strchr(ep, '\\'))) {
1075 		if ('}' != *(++ep))
1076 			continue;
1077 
1078 		/*
1079 		 * If we're at the end of line, then just chop
1080 		 * off the \} and resize the buffer.
1081 		 * If we aren't, then convert it to spaces.
1082 		 */
1083 
1084 		if ('\0' == *(ep + 1)) {
1085 			*--ep = '\0';
1086 			*szp -= 2;
1087 		} else
1088 			*(ep - 1) = *ep = ' ';
1089 
1090 		roff_ccond(r, ROFF_ccond, bufp, szp,
1091 				ln, pos, pos + 2, offs);
1092 		break;
1093 	}
1094 	return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1095 }
1096 
1097 /* ARGSUSED */
1098 static enum rofferr
1099 roff_cond_text(ROFF_ARGS)
1100 {
1101 	char		*ep;
1102 	enum roffrule	 rr;
1103 
1104 	rr = r->last->rule;
1105 	roffnode_cleanscope(r);
1106 
1107 	ep = &(*bufp)[pos];
1108 	for ( ; NULL != (ep = strchr(ep, '\\')); ep++) {
1109 		ep++;
1110 		if ('}' != *ep)
1111 			continue;
1112 		*ep = '&';
1113 		roff_ccond(r, ROFF_ccond, bufp, szp,
1114 				ln, pos, pos + 2, offs);
1115 	}
1116 	return(ROFFRULE_DENY == rr ? ROFF_IGN : ROFF_CONT);
1117 }
1118 
1119 static enum roffrule
1120 roff_evalcond(const char *v, int *pos)
1121 {
1122 
1123 	switch (v[*pos]) {
1124 	case ('n'):
1125 		(*pos)++;
1126 		return(ROFFRULE_ALLOW);
1127 	case ('e'):
1128 		/* FALLTHROUGH */
1129 	case ('o'):
1130 		/* FALLTHROUGH */
1131 	case ('t'):
1132 		(*pos)++;
1133 		return(ROFFRULE_DENY);
1134 	default:
1135 		break;
1136 	}
1137 
1138 	while (v[*pos] && ' ' != v[*pos])
1139 		(*pos)++;
1140 	return(ROFFRULE_DENY);
1141 }
1142 
1143 /* ARGSUSED */
1144 static enum rofferr
1145 roff_line_ignore(ROFF_ARGS)
1146 {
1147 
1148 	return(ROFF_IGN);
1149 }
1150 
1151 /* ARGSUSED */
1152 static enum rofferr
1153 roff_cond(ROFF_ARGS)
1154 {
1155 
1156 	roffnode_push(r, tok, NULL, ln, ppos);
1157 
1158 	/*
1159 	 * An `.el' has no conditional body: it will consume the value
1160 	 * of the current rstack entry set in prior `ie' calls or
1161 	 * defaults to DENY.
1162 	 *
1163 	 * If we're not an `el', however, then evaluate the conditional.
1164 	 */
1165 
1166 	r->last->rule = ROFF_el == tok ?
1167 		(r->rstackpos < 0 ?
1168 		 ROFFRULE_DENY : r->rstack[r->rstackpos--]) :
1169 		roff_evalcond(*bufp, &pos);
1170 
1171 	/*
1172 	 * An if-else will put the NEGATION of the current evaluated
1173 	 * conditional into the stack of rules.
1174 	 */
1175 
1176 	if (ROFF_ie == tok) {
1177 		if (r->rstackpos == RSTACK_MAX - 1) {
1178 			mandoc_msg(MANDOCERR_MEM,
1179 				r->parse, ln, ppos, NULL);
1180 			return(ROFF_ERR);
1181 		}
1182 		r->rstack[++r->rstackpos] =
1183 			ROFFRULE_DENY == r->last->rule ?
1184 			ROFFRULE_ALLOW : ROFFRULE_DENY;
1185 	}
1186 
1187 	/* If the parent has false as its rule, then so do we. */
1188 
1189 	if (r->last->parent && ROFFRULE_DENY == r->last->parent->rule)
1190 		r->last->rule = ROFFRULE_DENY;
1191 
1192 	/*
1193 	 * Determine scope.
1194 	 * If there is nothing on the line after the conditional,
1195 	 * not even whitespace, use next-line scope.
1196 	 */
1197 
1198 	if ('\0' == (*bufp)[pos]) {
1199 		r->last->endspan = 2;
1200 		goto out;
1201 	}
1202 
1203 	while (' ' == (*bufp)[pos])
1204 		pos++;
1205 
1206 	/* An opening brace requests multiline scope. */
1207 
1208 	if ('\\' == (*bufp)[pos] && '{' == (*bufp)[pos + 1]) {
1209 		r->last->endspan = -1;
1210 		pos += 2;
1211 		goto out;
1212 	}
1213 
1214 	/*
1215 	 * Anything else following the conditional causes
1216 	 * single-line scope.  Warn if the scope contains
1217 	 * nothing but trailing whitespace.
1218 	 */
1219 
1220 	if ('\0' == (*bufp)[pos])
1221 		mandoc_msg(MANDOCERR_NOARGS, r->parse, ln, ppos, NULL);
1222 
1223 	r->last->endspan = 1;
1224 
1225 out:
1226 	*offs = pos;
1227 	return(ROFF_RERUN);
1228 }
1229 
1230 
1231 /* ARGSUSED */
1232 static enum rofferr
1233 roff_ds(ROFF_ARGS)
1234 {
1235 	char		*name, *string;
1236 
1237 	/*
1238 	 * A symbol is named by the first word following the macro
1239 	 * invocation up to a space.  Its value is anything after the
1240 	 * name's trailing whitespace and optional double-quote.  Thus,
1241 	 *
1242 	 *  [.ds foo "bar  "     ]
1243 	 *
1244 	 * will have `bar  "     ' as its value.
1245 	 */
1246 
1247 	string = *bufp + pos;
1248 	name = roff_getname(r, &string, ln, pos);
1249 	if ('\0' == *name)
1250 		return(ROFF_IGN);
1251 
1252 	/* Read past initial double-quote. */
1253 	if ('"' == *string)
1254 		string++;
1255 
1256 	/* The rest is the value. */
1257 	roff_setstr(r, name, string, 0);
1258 	return(ROFF_IGN);
1259 }
1260 
1261 int
1262 roff_regisset(const struct roff *r, enum regs reg)
1263 {
1264 
1265 	return(r->regs[(int)reg].set);
1266 }
1267 
1268 unsigned int
1269 roff_regget(const struct roff *r, enum regs reg)
1270 {
1271 
1272 	return(r->regs[(int)reg].u);
1273 }
1274 
1275 void
1276 roff_regunset(struct roff *r, enum regs reg)
1277 {
1278 
1279 	r->regs[(int)reg].set = 0;
1280 }
1281 
1282 /* ARGSUSED */
1283 static enum rofferr
1284 roff_nr(ROFF_ARGS)
1285 {
1286 	const char	*key;
1287 	char		*val;
1288 	int		 iv;
1289 
1290 	val = *bufp + pos;
1291 	key = roff_getname(r, &val, ln, pos);
1292 
1293 	if (0 == strcmp(key, "nS")) {
1294 		r->regs[(int)REG_nS].set = 1;
1295 		if ((iv = mandoc_strntoi(val, strlen(val), 10)) >= 0)
1296 			r->regs[(int)REG_nS].u = (unsigned)iv;
1297 		else
1298 			r->regs[(int)REG_nS].u = 0u;
1299 	}
1300 
1301 	return(ROFF_IGN);
1302 }
1303 
1304 /* ARGSUSED */
1305 static enum rofferr
1306 roff_rm(ROFF_ARGS)
1307 {
1308 	const char	 *name;
1309 	char		 *cp;
1310 
1311 	cp = *bufp + pos;
1312 	while ('\0' != *cp) {
1313 		name = roff_getname(r, &cp, ln, (int)(cp - *bufp));
1314 		if ('\0' != *name)
1315 			roff_setstr(r, name, NULL, 0);
1316 	}
1317 	return(ROFF_IGN);
1318 }
1319 
1320 /* ARGSUSED */
1321 static enum rofferr
1322 roff_it(ROFF_ARGS)
1323 {
1324 	char		*cp;
1325 	size_t		 len;
1326 	int		 iv;
1327 
1328 	/* Parse the number of lines. */
1329 	cp = *bufp + pos;
1330 	len = strcspn(cp, " \t");
1331 	cp[len] = '\0';
1332 	if ((iv = mandoc_strntoi(cp, len, 10)) <= 0) {
1333 		mandoc_msg(MANDOCERR_NUMERIC, r->parse,
1334 				ln, ppos, *bufp + 1);
1335 		return(ROFF_IGN);
1336 	}
1337 	cp += len + 1;
1338 
1339 	/* Arm the input line trap. */
1340 	roffit_lines = iv;
1341 	roffit_macro = mandoc_strdup(cp);
1342 	return(ROFF_IGN);
1343 }
1344 
1345 /* ARGSUSED */
1346 static enum rofferr
1347 roff_Dd(ROFF_ARGS)
1348 {
1349 	const char *const	*cp;
1350 
1351 	if (MPARSE_MDOC != r->parsetype)
1352 		for (cp = __mdoc_reserved; *cp; cp++)
1353 			roff_setstr(r, *cp, NULL, 0);
1354 
1355 	return(ROFF_CONT);
1356 }
1357 
1358 /* ARGSUSED */
1359 static enum rofferr
1360 roff_TH(ROFF_ARGS)
1361 {
1362 	const char *const	*cp;
1363 
1364 	if (MPARSE_MDOC != r->parsetype)
1365 		for (cp = __man_reserved; *cp; cp++)
1366 			roff_setstr(r, *cp, NULL, 0);
1367 
1368 	return(ROFF_CONT);
1369 }
1370 
1371 /* ARGSUSED */
1372 static enum rofferr
1373 roff_TE(ROFF_ARGS)
1374 {
1375 
1376 	if (NULL == r->tbl)
1377 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1378 	else
1379 		tbl_end(&r->tbl);
1380 
1381 	return(ROFF_IGN);
1382 }
1383 
1384 /* ARGSUSED */
1385 static enum rofferr
1386 roff_T_(ROFF_ARGS)
1387 {
1388 
1389 	if (NULL == r->tbl)
1390 		mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1391 	else
1392 		tbl_restart(ppos, ln, r->tbl);
1393 
1394 	return(ROFF_IGN);
1395 }
1396 
1397 #if 0
1398 static int
1399 roff_closeeqn(struct roff *r)
1400 {
1401 
1402 	return(r->eqn && ROFF_EQN == eqn_end(&r->eqn) ? 1 : 0);
1403 }
1404 #endif
1405 
1406 static void
1407 roff_openeqn(struct roff *r, const char *name, int line,
1408 		int offs, const char *buf)
1409 {
1410 	struct eqn_node *e;
1411 	int		 poff;
1412 
1413 	assert(NULL == r->eqn);
1414 	e = eqn_alloc(name, offs, line, r->parse);
1415 
1416 	if (r->last_eqn)
1417 		r->last_eqn->next = e;
1418 	else
1419 		r->first_eqn = r->last_eqn = e;
1420 
1421 	r->eqn = r->last_eqn = e;
1422 
1423 	if (buf) {
1424 		poff = 0;
1425 		eqn_read(&r->eqn, line, buf, offs, &poff);
1426 	}
1427 }
1428 
1429 /* ARGSUSED */
1430 static enum rofferr
1431 roff_EQ(ROFF_ARGS)
1432 {
1433 
1434 	roff_openeqn(r, *bufp + pos, ln, ppos, NULL);
1435 	return(ROFF_IGN);
1436 }
1437 
1438 /* ARGSUSED */
1439 static enum rofferr
1440 roff_EN(ROFF_ARGS)
1441 {
1442 
1443 	mandoc_msg(MANDOCERR_NOSCOPE, r->parse, ln, ppos, NULL);
1444 	return(ROFF_IGN);
1445 }
1446 
1447 /* ARGSUSED */
1448 static enum rofferr
1449 roff_TS(ROFF_ARGS)
1450 {
1451 	struct tbl_node	*tbl;
1452 
1453 	if (r->tbl) {
1454 		mandoc_msg(MANDOCERR_SCOPEBROKEN, r->parse, ln, ppos, NULL);
1455 		tbl_end(&r->tbl);
1456 	}
1457 
1458 	tbl = tbl_alloc(ppos, ln, r->parse);
1459 
1460 	if (r->last_tbl)
1461 		r->last_tbl->next = tbl;
1462 	else
1463 		r->first_tbl = r->last_tbl = tbl;
1464 
1465 	r->tbl = r->last_tbl = tbl;
1466 	return(ROFF_IGN);
1467 }
1468 
1469 /* ARGSUSED */
1470 static enum rofferr
1471 roff_cc(ROFF_ARGS)
1472 {
1473 	const char	*p;
1474 
1475 	p = *bufp + pos;
1476 
1477 	if ('\0' == *p || '.' == (r->control = *p++))
1478 		r->control = 0;
1479 
1480 	if ('\0' != *p)
1481 		mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1482 
1483 	return(ROFF_IGN);
1484 }
1485 
1486 /* ARGSUSED */
1487 static enum rofferr
1488 roff_tr(ROFF_ARGS)
1489 {
1490 	const char	*p, *first, *second;
1491 	size_t		 fsz, ssz;
1492 	enum mandoc_esc	 esc;
1493 
1494 	p = *bufp + pos;
1495 
1496 	if ('\0' == *p) {
1497 		mandoc_msg(MANDOCERR_ARGCOUNT, r->parse, ln, ppos, NULL);
1498 		return(ROFF_IGN);
1499 	}
1500 
1501 	while ('\0' != *p) {
1502 		fsz = ssz = 1;
1503 
1504 		first = p++;
1505 		if ('\\' == *first) {
1506 			esc = mandoc_escape(&p, NULL, NULL);
1507 			if (ESCAPE_ERROR == esc) {
1508 				mandoc_msg
1509 					(MANDOCERR_BADESCAPE, r->parse,
1510 					 ln, (int)(p - *bufp), NULL);
1511 				return(ROFF_IGN);
1512 			}
1513 			fsz = (size_t)(p - first);
1514 		}
1515 
1516 		second = p++;
1517 		if ('\\' == *second) {
1518 			esc = mandoc_escape(&p, NULL, NULL);
1519 			if (ESCAPE_ERROR == esc) {
1520 				mandoc_msg
1521 					(MANDOCERR_BADESCAPE, r->parse,
1522 					 ln, (int)(p - *bufp), NULL);
1523 				return(ROFF_IGN);
1524 			}
1525 			ssz = (size_t)(p - second);
1526 		} else if ('\0' == *second) {
1527 			mandoc_msg(MANDOCERR_ARGCOUNT, r->parse,
1528 					ln, (int)(p - *bufp), NULL);
1529 			second = " ";
1530 			p--;
1531 		}
1532 
1533 		if (fsz > 1) {
1534 			roff_setstrn(&r->xmbtab, first,
1535 					fsz, second, ssz, 0);
1536 			continue;
1537 		}
1538 
1539 		if (NULL == r->xtab)
1540 			r->xtab = mandoc_calloc
1541 				(128, sizeof(struct roffstr));
1542 
1543 		free(r->xtab[(int)*first].p);
1544 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
1545 		r->xtab[(int)*first].sz = ssz;
1546 	}
1547 
1548 	return(ROFF_IGN);
1549 }
1550 
1551 /* ARGSUSED */
1552 static enum rofferr
1553 roff_so(ROFF_ARGS)
1554 {
1555 	char *name;
1556 
1557 	mandoc_msg(MANDOCERR_SO, r->parse, ln, ppos, NULL);
1558 
1559 	/*
1560 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
1561 	 * opening anything that's not in our cwd or anything beneath
1562 	 * it.  Thus, explicitly disallow traversing up the file-system
1563 	 * or using absolute paths.
1564 	 */
1565 
1566 	name = *bufp + pos;
1567 	if ('/' == *name || strstr(name, "../") || strstr(name, "/..")) {
1568 		mandoc_msg(MANDOCERR_SOPATH, r->parse, ln, pos, NULL);
1569 		return(ROFF_ERR);
1570 	}
1571 
1572 	*offs = pos;
1573 	return(ROFF_SO);
1574 }
1575 
1576 /* ARGSUSED */
1577 static enum rofferr
1578 roff_userdef(ROFF_ARGS)
1579 {
1580 	const char	 *arg[9];
1581 	char		 *cp, *n1, *n2;
1582 	int		  i;
1583 
1584 	/*
1585 	 * Collect pointers to macro argument strings
1586 	 * and null-terminate them.
1587 	 */
1588 	cp = *bufp + pos;
1589 	for (i = 0; i < 9; i++)
1590 		arg[i] = '\0' == *cp ? "" :
1591 		    mandoc_getarg(r->parse, &cp, ln, &pos);
1592 
1593 	/*
1594 	 * Expand macro arguments.
1595 	 */
1596 	*szp = 0;
1597 	n1 = cp = mandoc_strdup(r->current_string);
1598 	while (NULL != (cp = strstr(cp, "\\$"))) {
1599 		i = cp[2] - '1';
1600 		if (0 > i || 8 < i) {
1601 			/* Not an argument invocation. */
1602 			cp += 2;
1603 			continue;
1604 		}
1605 
1606 		*szp = strlen(n1) - 3 + strlen(arg[i]) + 1;
1607 		n2 = mandoc_malloc(*szp);
1608 
1609 		strlcpy(n2, n1, (size_t)(cp - n1 + 1));
1610 		strlcat(n2, arg[i], *szp);
1611 		strlcat(n2, cp + 3, *szp);
1612 
1613 		cp = n2 + (cp - n1);
1614 		free(n1);
1615 		n1 = n2;
1616 	}
1617 
1618 	/*
1619 	 * Replace the macro invocation
1620 	 * by the expanded macro.
1621 	 */
1622 	free(*bufp);
1623 	*bufp = n1;
1624 	if (0 == *szp)
1625 		*szp = strlen(*bufp) + 1;
1626 
1627 	return(*szp > 1 && '\n' == (*bufp)[(int)*szp - 2] ?
1628 	   ROFF_REPARSE : ROFF_APPEND);
1629 }
1630 
1631 static char *
1632 roff_getname(struct roff *r, char **cpp, int ln, int pos)
1633 {
1634 	char	 *name, *cp;
1635 
1636 	name = *cpp;
1637 	if ('\0' == *name)
1638 		return(name);
1639 
1640 	/* Read until end of name. */
1641 	for (cp = name; '\0' != *cp && ' ' != *cp; cp++) {
1642 		if ('\\' != *cp)
1643 			continue;
1644 		cp++;
1645 		if ('\\' == *cp)
1646 			continue;
1647 		mandoc_msg(MANDOCERR_NAMESC, r->parse, ln, pos, NULL);
1648 		*cp = '\0';
1649 		name = cp;
1650 	}
1651 
1652 	/* Nil-terminate name. */
1653 	if ('\0' != *cp)
1654 		*(cp++) = '\0';
1655 
1656 	/* Read past spaces. */
1657 	while (' ' == *cp)
1658 		cp++;
1659 
1660 	*cpp = cp;
1661 	return(name);
1662 }
1663 
1664 /*
1665  * Store *string into the user-defined string called *name.
1666  * In multiline mode, append to an existing entry and append '\n';
1667  * else replace the existing entry, if there is one.
1668  * To clear an existing entry, call with (*r, *name, NULL, 0).
1669  */
1670 static void
1671 roff_setstr(struct roff *r, const char *name, const char *string,
1672 	int multiline)
1673 {
1674 
1675 	roff_setstrn(&r->strtab, name, strlen(name), string,
1676 			string ? strlen(string) : 0, multiline);
1677 }
1678 
1679 static void
1680 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
1681 		const char *string, size_t stringsz, int multiline)
1682 {
1683 	struct roffkv	*n;
1684 	char		*c;
1685 	int		 i;
1686 	size_t		 oldch, newch;
1687 
1688 	/* Search for an existing string with the same name. */
1689 	n = *r;
1690 
1691 	while (n && strcmp(name, n->key.p))
1692 		n = n->next;
1693 
1694 	if (NULL == n) {
1695 		/* Create a new string table entry. */
1696 		n = mandoc_malloc(sizeof(struct roffkv));
1697 		n->key.p = mandoc_strndup(name, namesz);
1698 		n->key.sz = namesz;
1699 		n->val.p = NULL;
1700 		n->val.sz = 0;
1701 		n->next = *r;
1702 		*r = n;
1703 	} else if (0 == multiline) {
1704 		/* In multiline mode, append; else replace. */
1705 		free(n->val.p);
1706 		n->val.p = NULL;
1707 		n->val.sz = 0;
1708 	}
1709 
1710 	if (NULL == string)
1711 		return;
1712 
1713 	/*
1714 	 * One additional byte for the '\n' in multiline mode,
1715 	 * and one for the terminating '\0'.
1716 	 */
1717 	newch = stringsz + (multiline ? 2u : 1u);
1718 
1719 	if (NULL == n->val.p) {
1720 		n->val.p = mandoc_malloc(newch);
1721 		*n->val.p = '\0';
1722 		oldch = 0;
1723 	} else {
1724 		oldch = n->val.sz;
1725 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
1726 	}
1727 
1728 	/* Skip existing content in the destination buffer. */
1729 	c = n->val.p + (int)oldch;
1730 
1731 	/* Append new content to the destination buffer. */
1732 	i = 0;
1733 	while (i < (int)stringsz) {
1734 		/*
1735 		 * Rudimentary roff copy mode:
1736 		 * Handle escaped backslashes.
1737 		 */
1738 		if ('\\' == string[i] && '\\' == string[i + 1])
1739 			i++;
1740 		*c++ = string[i++];
1741 	}
1742 
1743 	/* Append terminating bytes. */
1744 	if (multiline)
1745 		*c++ = '\n';
1746 
1747 	*c = '\0';
1748 	n->val.sz = (int)(c - n->val.p);
1749 }
1750 
1751 static const char *
1752 roff_getstrn(const struct roff *r, const char *name, size_t len)
1753 {
1754 	const struct roffkv *n;
1755 
1756 	for (n = r->strtab; n; n = n->next)
1757 		if (0 == strncmp(name, n->key.p, len) &&
1758 				'\0' == n->key.p[(int)len])
1759 			return(n->val.p);
1760 
1761 	return(NULL);
1762 }
1763 
1764 static void
1765 roff_freestr(struct roffkv *r)
1766 {
1767 	struct roffkv	 *n, *nn;
1768 
1769 	for (n = r; n; n = nn) {
1770 		free(n->key.p);
1771 		free(n->val.p);
1772 		nn = n->next;
1773 		free(n);
1774 	}
1775 }
1776 
1777 const struct tbl_span *
1778 roff_span(const struct roff *r)
1779 {
1780 
1781 	return(r->tbl ? tbl_span(r->tbl) : NULL);
1782 }
1783 
1784 const struct eqn *
1785 roff_eqn(const struct roff *r)
1786 {
1787 
1788 	return(r->last_eqn ? &r->last_eqn->eqn : NULL);
1789 }
1790 
1791 /*
1792  * Duplicate an input string, making the appropriate character
1793  * conversations (as stipulated by `tr') along the way.
1794  * Returns a heap-allocated string with all the replacements made.
1795  */
1796 char *
1797 roff_strdup(const struct roff *r, const char *p)
1798 {
1799 	const struct roffkv *cp;
1800 	char		*res;
1801 	const char	*pp;
1802 	size_t		 ssz, sz;
1803 	enum mandoc_esc	 esc;
1804 
1805 	if (NULL == r->xmbtab && NULL == r->xtab)
1806 		return(mandoc_strdup(p));
1807 	else if ('\0' == *p)
1808 		return(mandoc_strdup(""));
1809 
1810 	/*
1811 	 * Step through each character looking for term matches
1812 	 * (remember that a `tr' can be invoked with an escape, which is
1813 	 * a glyph but the escape is multi-character).
1814 	 * We only do this if the character hash has been initialised
1815 	 * and the string is >0 length.
1816 	 */
1817 
1818 	res = NULL;
1819 	ssz = 0;
1820 
1821 	while ('\0' != *p) {
1822 		if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) {
1823 			sz = r->xtab[(int)*p].sz;
1824 			res = mandoc_realloc(res, ssz + sz + 1);
1825 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
1826 			ssz += sz;
1827 			p++;
1828 			continue;
1829 		} else if ('\\' != *p) {
1830 			res = mandoc_realloc(res, ssz + 2);
1831 			res[ssz++] = *p++;
1832 			continue;
1833 		}
1834 
1835 		/* Search for term matches. */
1836 		for (cp = r->xmbtab; cp; cp = cp->next)
1837 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
1838 				break;
1839 
1840 		if (NULL != cp) {
1841 			/*
1842 			 * A match has been found.
1843 			 * Append the match to the array and move
1844 			 * forward by its keysize.
1845 			 */
1846 			res = mandoc_realloc
1847 				(res, ssz + cp->val.sz + 1);
1848 			memcpy(res + ssz, cp->val.p, cp->val.sz);
1849 			ssz += cp->val.sz;
1850 			p += (int)cp->key.sz;
1851 			continue;
1852 		}
1853 
1854 		/*
1855 		 * Handle escapes carefully: we need to copy
1856 		 * over just the escape itself, or else we might
1857 		 * do replacements within the escape itself.
1858 		 * Make sure to pass along the bogus string.
1859 		 */
1860 		pp = p++;
1861 		esc = mandoc_escape(&p, NULL, NULL);
1862 		if (ESCAPE_ERROR == esc) {
1863 			sz = strlen(pp);
1864 			res = mandoc_realloc(res, ssz + sz + 1);
1865 			memcpy(res + ssz, pp, sz);
1866 			break;
1867 		}
1868 		/*
1869 		 * We bail out on bad escapes.
1870 		 * No need to warn: we already did so when
1871 		 * roff_res() was called.
1872 		 */
1873 		sz = (int)(p - pp);
1874 		res = mandoc_realloc(res, ssz + sz + 1);
1875 		memcpy(res + ssz, pp, sz);
1876 		ssz += sz;
1877 	}
1878 
1879 	res[(int)ssz] = '\0';
1880 	return(res);
1881 }
1882 
1883 /*
1884  * Find out whether a line is a macro line or not.
1885  * If it is, adjust the current position and return one; if it isn't,
1886  * return zero and don't change the current position.
1887  * If the control character has been set with `.cc', then let that grain
1888  * precedence.
1889  * This is slighly contrary to groff, where using the non-breaking
1890  * control character when `cc' has been invoked will cause the
1891  * non-breaking macro contents to be printed verbatim.
1892  */
1893 int
1894 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
1895 {
1896 	int		pos;
1897 
1898 	pos = *ppos;
1899 
1900 	if (0 != r->control && cp[pos] == r->control)
1901 		pos++;
1902 	else if (0 != r->control)
1903 		return(0);
1904 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
1905 		pos += 2;
1906 	else if ('.' == cp[pos] || '\'' == cp[pos])
1907 		pos++;
1908 	else
1909 		return(0);
1910 
1911 	while (' ' == cp[pos] || '\t' == cp[pos])
1912 		pos++;
1913 
1914 	*ppos = pos;
1915 	return(1);
1916 }
1917