xref: /dragonfly/contrib/mdocml/roff.c (revision de78d61c)
1 /*	$Id: roff.c,v 1.363 2019/02/06 21:11:43 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40 
41 /*
42  * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
43  * that an escape sequence resulted from copy-in processing and
44  * needs to be checked or interpolated.  As it is used nowhere
45  * else, it is defined here rather than in a header file.
46  */
47 #define	ASCII_ESC	27
48 
49 /* Maximum number of string expansions per line, to break infinite loops. */
50 #define	EXPAND_LIMIT	1000
51 
52 /* Types of definitions of macros and strings. */
53 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
54 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
55 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
56 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
57 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
58 			 ROFFDEF_REN | ROFFDEF_STD)
59 #define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
60 
61 /* --- data types --------------------------------------------------------- */
62 
63 /*
64  * An incredibly-simple string buffer.
65  */
66 struct	roffstr {
67 	char		*p; /* nil-terminated buffer */
68 	size_t		 sz; /* saved strlen(p) */
69 };
70 
71 /*
72  * A key-value roffstr pair as part of a singly-linked list.
73  */
74 struct	roffkv {
75 	struct roffstr	 key;
76 	struct roffstr	 val;
77 	struct roffkv	*next; /* next in list */
78 };
79 
80 /*
81  * A single number register as part of a singly-linked list.
82  */
83 struct	roffreg {
84 	struct roffstr	 key;
85 	int		 val;
86 	int		 step;
87 	struct roffreg	*next;
88 };
89 
90 /*
91  * Association of request and macro names with token IDs.
92  */
93 struct	roffreq {
94 	enum roff_tok	 tok;
95 	char		 name[];
96 };
97 
98 /*
99  * A macro processing context.
100  * More than one is needed when macro calls are nested.
101  */
102 struct	mctx {
103 	char		**argv;
104 	int		 argc;
105 	int		 argsz;
106 };
107 
108 struct	roff {
109 	struct roff_man	*man; /* mdoc or man parser */
110 	struct roffnode	*last; /* leaf of stack */
111 	struct mctx	*mstack; /* stack of macro contexts */
112 	int		*rstack; /* stack of inverted `ie' values */
113 	struct ohash	*reqtab; /* request lookup table */
114 	struct roffreg	*regtab; /* number registers */
115 	struct roffkv	*strtab; /* user-defined strings & macros */
116 	struct roffkv	*rentab; /* renamed strings & macros */
117 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
118 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
119 	const char	*current_string; /* value of last called user macro */
120 	struct tbl_node	*first_tbl; /* first table parsed */
121 	struct tbl_node	*last_tbl; /* last table parsed */
122 	struct tbl_node	*tbl; /* current table being parsed */
123 	struct eqn_node	*last_eqn; /* equation parser */
124 	struct eqn_node	*eqn; /* active equation parser */
125 	int		 eqn_inline; /* current equation is inline */
126 	int		 options; /* parse options */
127 	int		 mstacksz; /* current size of mstack */
128 	int		 mstackpos; /* position in mstack */
129 	int		 rstacksz; /* current size limit of rstack */
130 	int		 rstackpos; /* position in rstack */
131 	int		 format; /* current file in mdoc or man format */
132 	char		 control; /* control character */
133 	char		 escape; /* escape character */
134 };
135 
136 struct	roffnode {
137 	enum roff_tok	 tok; /* type of node */
138 	struct roffnode	*parent; /* up one in stack */
139 	int		 line; /* parse line */
140 	int		 col; /* parse col */
141 	char		*name; /* node name, e.g. macro name */
142 	char		*end; /* end-rules: custom token */
143 	int		 endspan; /* end-rules: next-line or infty */
144 	int		 rule; /* current evaluation rule */
145 };
146 
147 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
148 			 enum roff_tok tok, /* tok of macro */ \
149 			 struct buf *buf, /* input buffer */ \
150 			 int ln, /* parse line */ \
151 			 int ppos, /* original pos in buffer */ \
152 			 int pos, /* current pos in buffer */ \
153 			 int *offs /* reset offset of buffer data */
154 
155 typedef	int (*roffproc)(ROFF_ARGS);
156 
157 struct	roffmac {
158 	roffproc	 proc; /* process new macro */
159 	roffproc	 text; /* process as child text of macro */
160 	roffproc	 sub; /* process as child of macro */
161 	int		 flags;
162 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
163 };
164 
165 struct	predef {
166 	const char	*name; /* predefined input name */
167 	const char	*str; /* replacement symbol */
168 };
169 
170 #define	PREDEF(__name, __str) \
171 	{ (__name), (__str) },
172 
173 /* --- function prototypes ------------------------------------------------ */
174 
175 static	int		 roffnode_cleanscope(struct roff *);
176 static	int		 roffnode_pop(struct roff *);
177 static	void		 roffnode_push(struct roff *, enum roff_tok,
178 				const char *, int, int);
179 static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
180 static	int		 roff_als(ROFF_ARGS);
181 static	int		 roff_block(ROFF_ARGS);
182 static	int		 roff_block_text(ROFF_ARGS);
183 static	int		 roff_block_sub(ROFF_ARGS);
184 static	int		 roff_cblock(ROFF_ARGS);
185 static	int		 roff_cc(ROFF_ARGS);
186 static	int		 roff_ccond(struct roff *, int, int);
187 static	int		 roff_char(ROFF_ARGS);
188 static	int		 roff_cond(ROFF_ARGS);
189 static	int		 roff_cond_text(ROFF_ARGS);
190 static	int		 roff_cond_sub(ROFF_ARGS);
191 static	int		 roff_ds(ROFF_ARGS);
192 static	int		 roff_ec(ROFF_ARGS);
193 static	int		 roff_eo(ROFF_ARGS);
194 static	int		 roff_eqndelim(struct roff *, struct buf *, int);
195 static	int		 roff_evalcond(struct roff *r, int, char *, int *);
196 static	int		 roff_evalnum(struct roff *, int,
197 				const char *, int *, int *, int);
198 static	int		 roff_evalpar(struct roff *, int,
199 				const char *, int *, int *, int);
200 static	int		 roff_evalstrcond(const char *, int *);
201 static	int		 roff_expand(struct roff *, struct buf *,
202 				int, int, char);
203 static	void		 roff_free1(struct roff *);
204 static	void		 roff_freereg(struct roffreg *);
205 static	void		 roff_freestr(struct roffkv *);
206 static	size_t		 roff_getname(struct roff *, char **, int, int);
207 static	int		 roff_getnum(const char *, int *, int *, int);
208 static	int		 roff_getop(const char *, int *, char *);
209 static	int		 roff_getregn(struct roff *,
210 				const char *, size_t, char);
211 static	int		 roff_getregro(const struct roff *,
212 				const char *name);
213 static	const char	*roff_getstrn(struct roff *,
214 				const char *, size_t, int *);
215 static	int		 roff_hasregn(const struct roff *,
216 				const char *, size_t);
217 static	int		 roff_insec(ROFF_ARGS);
218 static	int		 roff_it(ROFF_ARGS);
219 static	int		 roff_line_ignore(ROFF_ARGS);
220 static	void		 roff_man_alloc1(struct roff_man *);
221 static	void		 roff_man_free1(struct roff_man *);
222 static	int		 roff_manyarg(ROFF_ARGS);
223 static	int		 roff_noarg(ROFF_ARGS);
224 static	int		 roff_nop(ROFF_ARGS);
225 static	int		 roff_nr(ROFF_ARGS);
226 static	int		 roff_onearg(ROFF_ARGS);
227 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
228 				int, int);
229 static	int		 roff_parsetext(struct roff *, struct buf *,
230 				int, int *);
231 static	int		 roff_renamed(ROFF_ARGS);
232 static	int		 roff_return(ROFF_ARGS);
233 static	int		 roff_rm(ROFF_ARGS);
234 static	int		 roff_rn(ROFF_ARGS);
235 static	int		 roff_rr(ROFF_ARGS);
236 static	void		 roff_setregn(struct roff *, const char *,
237 				size_t, int, char, int);
238 static	void		 roff_setstr(struct roff *,
239 				const char *, const char *, int);
240 static	void		 roff_setstrn(struct roffkv **, const char *,
241 				size_t, const char *, size_t, int);
242 static	int		 roff_shift(ROFF_ARGS);
243 static	int		 roff_so(ROFF_ARGS);
244 static	int		 roff_tr(ROFF_ARGS);
245 static	int		 roff_Dd(ROFF_ARGS);
246 static	int		 roff_TE(ROFF_ARGS);
247 static	int		 roff_TS(ROFF_ARGS);
248 static	int		 roff_EQ(ROFF_ARGS);
249 static	int		 roff_EN(ROFF_ARGS);
250 static	int		 roff_T_(ROFF_ARGS);
251 static	int		 roff_unsupp(ROFF_ARGS);
252 static	int		 roff_userdef(ROFF_ARGS);
253 
254 /* --- constant data ------------------------------------------------------ */
255 
256 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
257 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
258 
259 const char *__roff_name[MAN_MAX + 1] = {
260 	"br",		"ce",		"fi",		"ft",
261 	"ll",		"mc",		"nf",
262 	"po",		"rj",		"sp",
263 	"ta",		"ti",		NULL,
264 	"ab",		"ad",		"af",		"aln",
265 	"als",		"am",		"am1",		"ami",
266 	"ami1",		"as",		"as1",		"asciify",
267 	"backtrace",	"bd",		"bleedat",	"blm",
268         "box",		"boxa",		"bp",		"BP",
269 	"break",	"breakchar",	"brnl",		"brp",
270 	"brpnl",	"c2",		"cc",
271 	"cf",		"cflags",	"ch",		"char",
272 	"chop",		"class",	"close",	"CL",
273 	"color",	"composite",	"continue",	"cp",
274 	"cropat",	"cs",		"cu",		"da",
275 	"dch",		"Dd",		"de",		"de1",
276 	"defcolor",	"dei",		"dei1",		"device",
277 	"devicem",	"di",		"do",		"ds",
278 	"ds1",		"dwh",		"dt",		"ec",
279 	"ecr",		"ecs",		"el",		"em",
280 	"EN",		"eo",		"EP",		"EQ",
281 	"errprint",	"ev",		"evc",		"ex",
282 	"fallback",	"fam",		"fc",		"fchar",
283 	"fcolor",	"fdeferlig",	"feature",	"fkern",
284 	"fl",		"flig",		"fp",		"fps",
285 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
286 	"fzoom",	"gcolor",	"hc",		"hcode",
287 	"hidechar",	"hla",		"hlm",		"hpf",
288 	"hpfa",		"hpfcode",	"hw",		"hy",
289 	"hylang",	"hylen",	"hym",		"hypp",
290 	"hys",		"ie",		"if",		"ig",
291 	"index",	"it",		"itc",		"IX",
292 	"kern",		"kernafter",	"kernbefore",	"kernpair",
293 	"lc",		"lc_ctype",	"lds",		"length",
294 	"letadj",	"lf",		"lg",		"lhang",
295 	"linetabs",	"lnr",		"lnrf",		"lpfx",
296 	"ls",		"lsm",		"lt",
297 	"mediasize",	"minss",	"mk",		"mso",
298 	"na",		"ne",		"nh",		"nhychar",
299 	"nm",		"nn",		"nop",		"nr",
300 	"nrf",		"nroff",	"ns",		"nx",
301 	"open",		"opena",	"os",		"output",
302 	"padj",		"papersize",	"pc",		"pev",
303 	"pi",		"PI",		"pl",		"pm",
304 	"pn",		"pnr",		"ps",
305 	"psbb",		"pshape",	"pso",		"ptr",
306 	"pvs",		"rchar",	"rd",		"recursionlimit",
307 	"return",	"rfschar",	"rhang",
308 	"rm",		"rn",		"rnn",		"rr",
309 	"rs",		"rt",		"schar",	"sentchar",
310 	"shc",		"shift",	"sizes",	"so",
311 	"spacewidth",	"special",	"spreadwarn",	"ss",
312 	"sty",		"substring",	"sv",		"sy",
313 	"T&",		"tc",		"TE",
314 	"TH",		"tkf",		"tl",
315 	"tm",		"tm1",		"tmc",		"tr",
316 	"track",	"transchar",	"trf",		"trimat",
317 	"trin",		"trnt",		"troff",	"TS",
318 	"uf",		"ul",		"unformat",	"unwatch",
319 	"unwatchn",	"vpt",		"vs",		"warn",
320 	"warnscale",	"watch",	"watchlength",	"watchn",
321 	"wh",		"while",	"write",	"writec",
322 	"writem",	"xflag",	".",		NULL,
323 	NULL,		"text",
324 	"Dd",		"Dt",		"Os",		"Sh",
325 	"Ss",		"Pp",		"D1",		"Dl",
326 	"Bd",		"Ed",		"Bl",		"El",
327 	"It",		"Ad",		"An",		"Ap",
328 	"Ar",		"Cd",		"Cm",		"Dv",
329 	"Er",		"Ev",		"Ex",		"Fa",
330 	"Fd",		"Fl",		"Fn",		"Ft",
331 	"Ic",		"In",		"Li",		"Nd",
332 	"Nm",		"Op",		"Ot",		"Pa",
333 	"Rv",		"St",		"Va",		"Vt",
334 	"Xr",		"%A",		"%B",		"%D",
335 	"%I",		"%J",		"%N",		"%O",
336 	"%P",		"%R",		"%T",		"%V",
337 	"Ac",		"Ao",		"Aq",		"At",
338 	"Bc",		"Bf",		"Bo",		"Bq",
339 	"Bsx",		"Bx",		"Db",		"Dc",
340 	"Do",		"Dq",		"Ec",		"Ef",
341 	"Em",		"Eo",		"Fx",		"Ms",
342 	"No",		"Ns",		"Nx",		"Ox",
343 	"Pc",		"Pf",		"Po",		"Pq",
344 	"Qc",		"Ql",		"Qo",		"Qq",
345 	"Re",		"Rs",		"Sc",		"So",
346 	"Sq",		"Sm",		"Sx",		"Sy",
347 	"Tn",		"Ux",		"Xc",		"Xo",
348 	"Fo",		"Fc",		"Oo",		"Oc",
349 	"Bk",		"Ek",		"Bt",		"Hf",
350 	"Fr",		"Ud",		"Lb",		"Lp",
351 	"Lk",		"Mt",		"Brq",		"Bro",
352 	"Brc",		"%C",		"Es",		"En",
353 	"Dx",		"%Q",		"%U",		"Ta",
354 	NULL,
355 	"TH",		"SH",		"SS",		"TP",
356 	"TQ",
357 	"LP",		"PP",		"P",		"IP",
358 	"HP",		"SM",		"SB",		"BI",
359 	"IB",		"BR",		"RB",		"R",
360 	"B",		"I",		"IR",		"RI",
361 	"RE",		"RS",		"DT",		"UC",
362 	"PD",		"AT",		"in",
363 	"SY",		"YS",		"OP",
364 	"EX",		"EE",		"UR",
365 	"UE",		"MT",		"ME",		NULL
366 };
367 const	char *const *roff_name = __roff_name;
368 
369 static	struct roffmac	 roffs[TOKEN_NONE] = {
370 	{ roff_noarg, NULL, NULL, 0 },  /* br */
371 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
372 	{ roff_noarg, NULL, NULL, 0 },  /* fi */
373 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
374 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
375 	{ roff_onearg, NULL, NULL, 0 },  /* mc */
376 	{ roff_noarg, NULL, NULL, 0 },  /* nf */
377 	{ roff_onearg, NULL, NULL, 0 },  /* po */
378 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
379 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
380 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
381 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
382 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
383 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
384 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
385 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
386 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
387 	{ roff_als, NULL, NULL, 0 },  /* als */
388 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
389 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
390 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
391 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
392 	{ roff_ds, NULL, NULL, 0 },  /* as */
393 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
394 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
395 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
396 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
397 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
398 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
399 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
400 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
401 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
402 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
403 	{ roff_unsupp, NULL, NULL, 0 },  /* break */
404 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
405 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
406 	{ roff_noarg, NULL, NULL, 0 },  /* brp */
407 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
408 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
409 	{ roff_cc, NULL, NULL, 0 },  /* cc */
410 	{ roff_insec, NULL, NULL, 0 },  /* cf */
411 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
412 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
413 	{ roff_char, NULL, NULL, 0 },  /* char */
414 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
415 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
416 	{ roff_insec, NULL, NULL, 0 },  /* close */
417 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
418 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
419 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
420 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
421 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
422 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
423 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
424 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
425 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
426 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
427 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
428 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
429 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
430 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
431 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
432 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
433 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
434 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
435 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
436 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
437 	{ roff_ds, NULL, NULL, 0 },  /* ds */
438 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
439 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
440 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
441 	{ roff_ec, NULL, NULL, 0 },  /* ec */
442 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
443 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
444 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
445 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
446 	{ roff_EN, NULL, NULL, 0 },  /* EN */
447 	{ roff_eo, NULL, NULL, 0 },  /* eo */
448 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
449 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
450 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
451 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
452 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
453 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
454 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
455 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
456 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
457 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
458 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
459 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
460 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
461 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
462 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
463 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
464 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
465 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
466 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
467 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
468 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
469 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
470 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
471 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
472 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
473 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
474 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
475 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
476 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
477 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
478 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
479 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
481 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
482 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
486 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
487 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
488 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
489 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
490 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
491 	{ roff_it, NULL, NULL, 0 },  /* it */
492 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
493 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
494 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
495 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
496 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
497 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
498 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
499 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
500 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
501 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
502 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
503 	{ roff_insec, NULL, NULL, 0 },  /* lf */
504 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
505 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
506 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
507 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
508 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
509 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
510 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
511 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
512 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
513 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
514 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
515 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
516 	{ roff_insec, NULL, NULL, 0 },  /* mso */
517 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
518 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
519 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
520 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
521 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
522 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
523 	{ roff_nop, NULL, NULL, 0 },  /* nop */
524 	{ roff_nr, NULL, NULL, 0 },  /* nr */
525 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
526 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
527 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
528 	{ roff_insec, NULL, NULL, 0 },  /* nx */
529 	{ roff_insec, NULL, NULL, 0 },  /* open */
530 	{ roff_insec, NULL, NULL, 0 },  /* opena */
531 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
532 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
533 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
534 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
535 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
536 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
537 	{ roff_insec, NULL, NULL, 0 },  /* pi */
538 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
539 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
540 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
541 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
542 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
543 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
544 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
545 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
546 	{ roff_insec, NULL, NULL, 0 },  /* pso */
547 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
548 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
549 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
550 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
551 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
552 	{ roff_return, NULL, NULL, 0 },  /* return */
553 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
554 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
555 	{ roff_rm, NULL, NULL, 0 },  /* rm */
556 	{ roff_rn, NULL, NULL, 0 },  /* rn */
557 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
558 	{ roff_rr, NULL, NULL, 0 },  /* rr */
559 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
560 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
561 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
562 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
563 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
564 	{ roff_shift, NULL, NULL, 0 },  /* shift */
565 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
566 	{ roff_so, NULL, NULL, 0 },  /* so */
567 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
568 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
569 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
570 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
571 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
572 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
573 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
574 	{ roff_insec, NULL, NULL, 0 },  /* sy */
575 	{ roff_T_, NULL, NULL, 0 },  /* T& */
576 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
577 	{ roff_TE, NULL, NULL, 0 },  /* TE */
578 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
579 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
580 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
581 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
582 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
583 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
584 	{ roff_tr, NULL, NULL, 0 },  /* tr */
585 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
586 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
587 	{ roff_insec, NULL, NULL, 0 },  /* trf */
588 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
589 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
590 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
591 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
592 	{ roff_TS, NULL, NULL, 0 },  /* TS */
593 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
594 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
595 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
596 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
597 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
598 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
599 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
600 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
601 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
602 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
603 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
604 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
605 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
606 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
607 	{ roff_insec, NULL, NULL, 0 },  /* write */
608 	{ roff_insec, NULL, NULL, 0 },  /* writec */
609 	{ roff_insec, NULL, NULL, 0 },  /* writem */
610 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
611 	{ roff_cblock, NULL, NULL, 0 },  /* . */
612 	{ roff_renamed, NULL, NULL, 0 },
613 	{ roff_userdef, NULL, NULL, 0 }
614 };
615 
616 /* Array of injected predefined strings. */
617 #define	PREDEFS_MAX	 38
618 static	const struct predef predefs[PREDEFS_MAX] = {
619 #include "predefs.in"
620 };
621 
622 static	int	 roffce_lines;	/* number of input lines to center */
623 static	struct roff_node *roffce_node;  /* active request */
624 static	int	 roffit_lines;  /* number of lines to delay */
625 static	char	*roffit_macro;  /* nil-terminated macro line */
626 
627 
628 /* --- request table ------------------------------------------------------ */
629 
630 struct ohash *
631 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
632 {
633 	struct ohash	*htab;
634 	struct roffreq	*req;
635 	enum roff_tok	 tok;
636 	size_t		 sz;
637 	unsigned int	 slot;
638 
639 	htab = mandoc_malloc(sizeof(*htab));
640 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
641 
642 	for (tok = mintok; tok < maxtok; tok++) {
643 		if (roff_name[tok] == NULL)
644 			continue;
645 		sz = strlen(roff_name[tok]);
646 		req = mandoc_malloc(sizeof(*req) + sz + 1);
647 		req->tok = tok;
648 		memcpy(req->name, roff_name[tok], sz + 1);
649 		slot = ohash_qlookup(htab, req->name);
650 		ohash_insert(htab, slot, req);
651 	}
652 	return htab;
653 }
654 
655 void
656 roffhash_free(struct ohash *htab)
657 {
658 	struct roffreq	*req;
659 	unsigned int	 slot;
660 
661 	if (htab == NULL)
662 		return;
663 	for (req = ohash_first(htab, &slot); req != NULL;
664 	     req = ohash_next(htab, &slot))
665 		free(req);
666 	ohash_delete(htab);
667 	free(htab);
668 }
669 
670 enum roff_tok
671 roffhash_find(struct ohash *htab, const char *name, size_t sz)
672 {
673 	struct roffreq	*req;
674 	const char	*end;
675 
676 	if (sz) {
677 		end = name + sz;
678 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
679 	} else
680 		req = ohash_find(htab, ohash_qlookup(htab, name));
681 	return req == NULL ? TOKEN_NONE : req->tok;
682 }
683 
684 /* --- stack of request blocks -------------------------------------------- */
685 
686 /*
687  * Pop the current node off of the stack of roff instructions currently
688  * pending.
689  */
690 static int
691 roffnode_pop(struct roff *r)
692 {
693 	struct roffnode	*p;
694 	int		 inloop;
695 
696 	p = r->last;
697 	inloop = p->tok == ROFF_while;
698 	r->last = p->parent;
699 	free(p->name);
700 	free(p->end);
701 	free(p);
702 	return inloop;
703 }
704 
705 /*
706  * Push a roff node onto the instruction stack.  This must later be
707  * removed with roffnode_pop().
708  */
709 static void
710 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
711 		int line, int col)
712 {
713 	struct roffnode	*p;
714 
715 	p = mandoc_calloc(1, sizeof(struct roffnode));
716 	p->tok = tok;
717 	if (name)
718 		p->name = mandoc_strdup(name);
719 	p->parent = r->last;
720 	p->line = line;
721 	p->col = col;
722 	p->rule = p->parent ? p->parent->rule : 0;
723 
724 	r->last = p;
725 }
726 
727 /* --- roff parser state data management ---------------------------------- */
728 
729 static void
730 roff_free1(struct roff *r)
731 {
732 	int		 i;
733 
734 	tbl_free(r->first_tbl);
735 	r->first_tbl = r->last_tbl = r->tbl = NULL;
736 
737 	eqn_free(r->last_eqn);
738 	r->last_eqn = r->eqn = NULL;
739 
740 	while (r->mstackpos >= 0)
741 		roff_userret(r);
742 
743 	while (r->last)
744 		roffnode_pop(r);
745 
746 	free (r->rstack);
747 	r->rstack = NULL;
748 	r->rstacksz = 0;
749 	r->rstackpos = -1;
750 
751 	roff_freereg(r->regtab);
752 	r->regtab = NULL;
753 
754 	roff_freestr(r->strtab);
755 	roff_freestr(r->rentab);
756 	roff_freestr(r->xmbtab);
757 	r->strtab = r->rentab = r->xmbtab = NULL;
758 
759 	if (r->xtab)
760 		for (i = 0; i < 128; i++)
761 			free(r->xtab[i].p);
762 	free(r->xtab);
763 	r->xtab = NULL;
764 }
765 
766 void
767 roff_reset(struct roff *r)
768 {
769 	roff_free1(r);
770 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
771 	r->control = '\0';
772 	r->escape = '\\';
773 	roffce_lines = 0;
774 	roffce_node = NULL;
775 	roffit_lines = 0;
776 	roffit_macro = NULL;
777 }
778 
779 void
780 roff_free(struct roff *r)
781 {
782 	int	 	 i;
783 
784 	roff_free1(r);
785 	for (i = 0; i < r->mstacksz; i++)
786 		free(r->mstack[i].argv);
787 	free(r->mstack);
788 	roffhash_free(r->reqtab);
789 	free(r);
790 }
791 
792 struct roff *
793 roff_alloc(int options)
794 {
795 	struct roff	*r;
796 
797 	r = mandoc_calloc(1, sizeof(struct roff));
798 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
799 	r->options = options;
800 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
801 	r->mstackpos = -1;
802 	r->rstackpos = -1;
803 	r->escape = '\\';
804 	return r;
805 }
806 
807 /* --- syntax tree state data management ---------------------------------- */
808 
809 static void
810 roff_man_free1(struct roff_man *man)
811 {
812 	if (man->meta.first != NULL)
813 		roff_node_delete(man, man->meta.first);
814 	free(man->meta.msec);
815 	free(man->meta.vol);
816 	free(man->meta.os);
817 	free(man->meta.arch);
818 	free(man->meta.title);
819 	free(man->meta.name);
820 	free(man->meta.date);
821 	free(man->meta.sodest);
822 }
823 
824 void
825 roff_state_reset(struct roff_man *man)
826 {
827 	man->last = man->meta.first;
828 	man->last_es = NULL;
829 	man->flags = 0;
830 	man->lastsec = man->lastnamed = SEC_NONE;
831 	man->next = ROFF_NEXT_CHILD;
832 	roff_setreg(man->roff, "nS", 0, '=');
833 }
834 
835 static void
836 roff_man_alloc1(struct roff_man *man)
837 {
838 	memset(&man->meta, 0, sizeof(man->meta));
839 	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
840 	man->meta.first->type = ROFFT_ROOT;
841 	man->meta.macroset = MACROSET_NONE;
842 	roff_state_reset(man);
843 }
844 
845 void
846 roff_man_reset(struct roff_man *man)
847 {
848 	roff_man_free1(man);
849 	roff_man_alloc1(man);
850 }
851 
852 void
853 roff_man_free(struct roff_man *man)
854 {
855 	roff_man_free1(man);
856 	free(man);
857 }
858 
859 struct roff_man *
860 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
861 {
862 	struct roff_man *man;
863 
864 	man = mandoc_calloc(1, sizeof(*man));
865 	man->roff = roff;
866 	man->os_s = os_s;
867 	man->quick = quick;
868 	roff_man_alloc1(man);
869 	roff->man = man;
870 	return man;
871 }
872 
873 /* --- syntax tree handling ----------------------------------------------- */
874 
875 struct roff_node *
876 roff_node_alloc(struct roff_man *man, int line, int pos,
877 	enum roff_type type, int tok)
878 {
879 	struct roff_node	*n;
880 
881 	n = mandoc_calloc(1, sizeof(*n));
882 	n->line = line;
883 	n->pos = pos;
884 	n->tok = tok;
885 	n->type = type;
886 	n->sec = man->lastsec;
887 
888 	if (man->flags & MDOC_SYNOPSIS)
889 		n->flags |= NODE_SYNPRETTY;
890 	else
891 		n->flags &= ~NODE_SYNPRETTY;
892 	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
893 		n->flags |= NODE_NOFILL;
894 	else
895 		n->flags &= ~NODE_NOFILL;
896 	if (man->flags & MDOC_NEWLINE)
897 		n->flags |= NODE_LINE;
898 	man->flags &= ~MDOC_NEWLINE;
899 
900 	return n;
901 }
902 
903 void
904 roff_node_append(struct roff_man *man, struct roff_node *n)
905 {
906 
907 	switch (man->next) {
908 	case ROFF_NEXT_SIBLING:
909 		if (man->last->next != NULL) {
910 			n->next = man->last->next;
911 			man->last->next->prev = n;
912 		} else
913 			man->last->parent->last = n;
914 		man->last->next = n;
915 		n->prev = man->last;
916 		n->parent = man->last->parent;
917 		break;
918 	case ROFF_NEXT_CHILD:
919 		if (man->last->child != NULL) {
920 			n->next = man->last->child;
921 			man->last->child->prev = n;
922 		} else
923 			man->last->last = n;
924 		man->last->child = n;
925 		n->parent = man->last;
926 		break;
927 	default:
928 		abort();
929 	}
930 	man->last = n;
931 
932 	switch (n->type) {
933 	case ROFFT_HEAD:
934 		n->parent->head = n;
935 		break;
936 	case ROFFT_BODY:
937 		if (n->end != ENDBODY_NOT)
938 			return;
939 		n->parent->body = n;
940 		break;
941 	case ROFFT_TAIL:
942 		n->parent->tail = n;
943 		break;
944 	default:
945 		return;
946 	}
947 
948 	/*
949 	 * Copy over the normalised-data pointer of our parent.  Not
950 	 * everybody has one, but copying a null pointer is fine.
951 	 */
952 
953 	n->norm = n->parent->norm;
954 	assert(n->parent->type == ROFFT_BLOCK);
955 }
956 
957 void
958 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
959 {
960 	struct roff_node	*n;
961 
962 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
963 	n->string = roff_strdup(man->roff, word);
964 	roff_node_append(man, n);
965 	n->flags |= NODE_VALID | NODE_ENDED;
966 	man->next = ROFF_NEXT_SIBLING;
967 }
968 
969 void
970 roff_word_append(struct roff_man *man, const char *word)
971 {
972 	struct roff_node	*n;
973 	char			*addstr, *newstr;
974 
975 	n = man->last;
976 	addstr = roff_strdup(man->roff, word);
977 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
978 	free(addstr);
979 	free(n->string);
980 	n->string = newstr;
981 	man->next = ROFF_NEXT_SIBLING;
982 }
983 
984 void
985 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
986 {
987 	struct roff_node	*n;
988 
989 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
990 	roff_node_append(man, n);
991 	man->next = ROFF_NEXT_CHILD;
992 }
993 
994 struct roff_node *
995 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
996 {
997 	struct roff_node	*n;
998 
999 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1000 	roff_node_append(man, n);
1001 	man->next = ROFF_NEXT_CHILD;
1002 	return n;
1003 }
1004 
1005 struct roff_node *
1006 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1007 {
1008 	struct roff_node	*n;
1009 
1010 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1011 	roff_node_append(man, n);
1012 	man->next = ROFF_NEXT_CHILD;
1013 	return n;
1014 }
1015 
1016 struct roff_node *
1017 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1018 {
1019 	struct roff_node	*n;
1020 
1021 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1022 	roff_node_append(man, n);
1023 	man->next = ROFF_NEXT_CHILD;
1024 	return n;
1025 }
1026 
1027 static void
1028 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1029 {
1030 	struct roff_node	*n;
1031 	struct tbl_span		*span;
1032 
1033 	if (man->meta.macroset == MACROSET_MAN)
1034 		man_breakscope(man, ROFF_TS);
1035 	while ((span = tbl_span(tbl)) != NULL) {
1036 		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1037 		n->span = span;
1038 		roff_node_append(man, n);
1039 		n->flags |= NODE_VALID | NODE_ENDED;
1040 		man->next = ROFF_NEXT_SIBLING;
1041 	}
1042 }
1043 
1044 void
1045 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1046 {
1047 
1048 	/* Adjust siblings. */
1049 
1050 	if (n->prev)
1051 		n->prev->next = n->next;
1052 	if (n->next)
1053 		n->next->prev = n->prev;
1054 
1055 	/* Adjust parent. */
1056 
1057 	if (n->parent != NULL) {
1058 		if (n->parent->child == n)
1059 			n->parent->child = n->next;
1060 		if (n->parent->last == n)
1061 			n->parent->last = n->prev;
1062 	}
1063 
1064 	/* Adjust parse point. */
1065 
1066 	if (man == NULL)
1067 		return;
1068 	if (man->last == n) {
1069 		if (n->prev == NULL) {
1070 			man->last = n->parent;
1071 			man->next = ROFF_NEXT_CHILD;
1072 		} else {
1073 			man->last = n->prev;
1074 			man->next = ROFF_NEXT_SIBLING;
1075 		}
1076 	}
1077 	if (man->meta.first == n)
1078 		man->meta.first = NULL;
1079 }
1080 
1081 void
1082 roff_node_relink(struct roff_man *man, struct roff_node *n)
1083 {
1084 	roff_node_unlink(man, n);
1085 	n->prev = n->next = NULL;
1086 	roff_node_append(man, n);
1087 }
1088 
1089 void
1090 roff_node_free(struct roff_node *n)
1091 {
1092 
1093 	if (n->args != NULL)
1094 		mdoc_argv_free(n->args);
1095 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1096 		free(n->norm);
1097 	eqn_box_free(n->eqn);
1098 	free(n->string);
1099 	free(n);
1100 }
1101 
1102 void
1103 roff_node_delete(struct roff_man *man, struct roff_node *n)
1104 {
1105 
1106 	while (n->child != NULL)
1107 		roff_node_delete(man, n->child);
1108 	roff_node_unlink(man, n);
1109 	roff_node_free(n);
1110 }
1111 
1112 void
1113 deroff(char **dest, const struct roff_node *n)
1114 {
1115 	char	*cp;
1116 	size_t	 sz;
1117 
1118 	if (n->type != ROFFT_TEXT) {
1119 		for (n = n->child; n != NULL; n = n->next)
1120 			deroff(dest, n);
1121 		return;
1122 	}
1123 
1124 	/* Skip leading whitespace. */
1125 
1126 	for (cp = n->string; *cp != '\0'; cp++) {
1127 		if (cp[0] == '\\' && cp[1] != '\0' &&
1128 		    strchr(" %&0^|~", cp[1]) != NULL)
1129 			cp++;
1130 		else if ( ! isspace((unsigned char)*cp))
1131 			break;
1132 	}
1133 
1134 	/* Skip trailing backslash. */
1135 
1136 	sz = strlen(cp);
1137 	if (sz > 0 && cp[sz - 1] == '\\')
1138 		sz--;
1139 
1140 	/* Skip trailing whitespace. */
1141 
1142 	for (; sz; sz--)
1143 		if ( ! isspace((unsigned char)cp[sz-1]))
1144 			break;
1145 
1146 	/* Skip empty strings. */
1147 
1148 	if (sz == 0)
1149 		return;
1150 
1151 	if (*dest == NULL) {
1152 		*dest = mandoc_strndup(cp, sz);
1153 		return;
1154 	}
1155 
1156 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1157 	free(*dest);
1158 	*dest = cp;
1159 }
1160 
1161 /* --- main functions of the roff parser ---------------------------------- */
1162 
1163 /*
1164  * In the current line, expand escape sequences that produce parsable
1165  * input text.  Also check the syntax of the remaining escape sequences,
1166  * which typically produce output glyphs or change formatter state.
1167  */
1168 static int
1169 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1170 {
1171 	struct mctx	*ctx;	/* current macro call context */
1172 	char		 ubuf[24]; /* buffer to print the number */
1173 	struct roff_node *n;	/* used for header comments */
1174 	const char	*start;	/* start of the string to process */
1175 	char		*stesc;	/* start of an escape sequence ('\\') */
1176 	const char	*esct;	/* type of esccape sequence */
1177 	char		*ep;	/* end of comment string */
1178 	const char	*stnam;	/* start of the name, after "[(*" */
1179 	const char	*cp;	/* end of the name, e.g. before ']' */
1180 	const char	*res;	/* the string to be substituted */
1181 	char		*nbuf;	/* new buffer to copy buf->buf to */
1182 	size_t		 maxl;  /* expected length of the escape name */
1183 	size_t		 naml;	/* actual length of the escape name */
1184 	size_t		 asz;	/* length of the replacement */
1185 	size_t		 rsz;	/* length of the rest of the string */
1186 	int		 inaml;	/* length returned from mandoc_escape() */
1187 	int		 expand_count;	/* to avoid infinite loops */
1188 	int		 npos;	/* position in numeric expression */
1189 	int		 arg_complete; /* argument not interrupted by eol */
1190 	int		 quote_args; /* true for \\$@, false for \\$* */
1191 	int		 done;	/* no more input available */
1192 	int		 deftype; /* type of definition to paste */
1193 	int		 rcsid;	/* kind of RCS id seen */
1194 	enum mandocerr	 err;	/* for escape sequence problems */
1195 	char		 sign;	/* increment number register */
1196 	char		 term;	/* character terminating the escape */
1197 
1198 	/* Search forward for comments. */
1199 
1200 	done = 0;
1201 	start = buf->buf + pos;
1202 	for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1203 		if (stesc[0] != newesc || stesc[1] == '\0')
1204 			continue;
1205 		stesc++;
1206 		if (*stesc != '"' && *stesc != '#')
1207 			continue;
1208 
1209 		/* Comment found, look for RCS id. */
1210 
1211 		rcsid = 0;
1212 		if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1213 			rcsid = 1 << MANDOC_OS_OPENBSD;
1214 			cp += 8;
1215 		} else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1216 			rcsid = 1 << MANDOC_OS_NETBSD;
1217 			cp += 7;
1218 		}
1219 		if (cp != NULL &&
1220 		    isalnum((unsigned char)*cp) == 0 &&
1221 		    strchr(cp, '$') != NULL) {
1222 			if (r->man->meta.rcsids & rcsid)
1223 				mandoc_msg(MANDOCERR_RCS_REP, ln,
1224 				    (int)(stesc - buf->buf) + 1,
1225 				    "%s", stesc + 1);
1226 			r->man->meta.rcsids |= rcsid;
1227 		}
1228 
1229 		/* Handle trailing whitespace. */
1230 
1231 		ep = strchr(stesc--, '\0') - 1;
1232 		if (*ep == '\n') {
1233 			done = 1;
1234 			ep--;
1235 		}
1236 		if (*ep == ' ' || *ep == '\t')
1237 			mandoc_msg(MANDOCERR_SPACE_EOL,
1238 			    ln, (int)(ep - buf->buf), NULL);
1239 
1240 		/*
1241 		 * Save comments preceding the title macro
1242 		 * in the syntax tree.
1243 		 */
1244 
1245 		if (newesc != ASCII_ESC && r->format == 0) {
1246 			while (*ep == ' ' || *ep == '\t')
1247 				ep--;
1248 			ep[1] = '\0';
1249 			n = roff_node_alloc(r->man,
1250 			    ln, stesc + 1 - buf->buf,
1251 			    ROFFT_COMMENT, TOKEN_NONE);
1252 			n->string = mandoc_strdup(stesc + 2);
1253 			roff_node_append(r->man, n);
1254 			n->flags |= NODE_VALID | NODE_ENDED;
1255 			r->man->next = ROFF_NEXT_SIBLING;
1256 		}
1257 
1258 		/* Line continuation with comment. */
1259 
1260 		if (stesc[1] == '#') {
1261 			*stesc = '\0';
1262 			return ROFF_IGN | ROFF_APPEND;
1263 		}
1264 
1265 		/* Discard normal comments. */
1266 
1267 		while (stesc > start && stesc[-1] == ' ' &&
1268 		    (stesc == start + 1 || stesc[-2] != '\\'))
1269 			stesc--;
1270 		*stesc = '\0';
1271 		break;
1272 	}
1273 	if (stesc == start)
1274 		return ROFF_CONT;
1275 	stesc--;
1276 
1277 	/* Notice the end of the input. */
1278 
1279 	if (*stesc == '\n') {
1280 		*stesc-- = '\0';
1281 		done = 1;
1282 	}
1283 
1284 	expand_count = 0;
1285 	while (stesc >= start) {
1286 		if (*stesc != newesc) {
1287 
1288 			/*
1289 			 * If we have a non-standard escape character,
1290 			 * escape literal backslashes because all
1291 			 * processing in subsequent functions uses
1292 			 * the standard escaping rules.
1293 			 */
1294 
1295 			if (newesc != ASCII_ESC && *stesc == '\\') {
1296 				*stesc = '\0';
1297 				buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1298 				    buf->buf, stesc + 1) + 1;
1299 				start = nbuf + pos;
1300 				stesc = nbuf + (stesc - buf->buf);
1301 				free(buf->buf);
1302 				buf->buf = nbuf;
1303 			}
1304 
1305 			/* Search backwards for the next escape. */
1306 
1307 			stesc--;
1308 			continue;
1309 		}
1310 
1311 		/* If it is escaped, skip it. */
1312 
1313 		for (cp = stesc - 1; cp >= start; cp--)
1314 			if (*cp != r->escape)
1315 				break;
1316 
1317 		if ((stesc - cp) % 2 == 0) {
1318 			while (stesc > cp)
1319 				*stesc-- = '\\';
1320 			continue;
1321 		} else if (stesc[1] != '\0') {
1322 			*stesc = '\\';
1323 		} else {
1324 			*stesc-- = '\0';
1325 			if (done)
1326 				continue;
1327 			else
1328 				return ROFF_IGN | ROFF_APPEND;
1329 		}
1330 
1331 		/* Decide whether to expand or to check only. */
1332 
1333 		term = '\0';
1334 		cp = stesc + 1;
1335 		if (*cp == 'E')
1336 			cp++;
1337 		esct = cp;
1338 		switch (*esct) {
1339 		case '*':
1340 		case '$':
1341 			res = NULL;
1342 			break;
1343 		case 'B':
1344 		case 'w':
1345 			term = cp[1];
1346 			/* FALLTHROUGH */
1347 		case 'n':
1348 			sign = cp[1];
1349 			if (sign == '+' || sign == '-')
1350 				cp++;
1351 			res = ubuf;
1352 			break;
1353 		default:
1354 			err = MANDOCERR_OK;
1355 			switch(mandoc_escape(&cp, &stnam, &inaml)) {
1356 			case ESCAPE_SPECIAL:
1357 				if (mchars_spec2cp(stnam, inaml) >= 0)
1358 					break;
1359 				/* FALLTHROUGH */
1360 			case ESCAPE_ERROR:
1361 				err = MANDOCERR_ESC_BAD;
1362 				break;
1363 			case ESCAPE_UNDEF:
1364 				err = MANDOCERR_ESC_UNDEF;
1365 				break;
1366 			case ESCAPE_UNSUPP:
1367 				err = MANDOCERR_ESC_UNSUPP;
1368 				break;
1369 			default:
1370 				break;
1371 			}
1372 			if (err != MANDOCERR_OK)
1373 				mandoc_msg(err, ln, (int)(stesc - buf->buf),
1374 				    "%.*s", (int)(cp - stesc), stesc);
1375 			stesc--;
1376 			continue;
1377 		}
1378 
1379 		if (EXPAND_LIMIT < ++expand_count) {
1380 			mandoc_msg(MANDOCERR_ROFFLOOP,
1381 			    ln, (int)(stesc - buf->buf), NULL);
1382 			return ROFF_IGN;
1383 		}
1384 
1385 		/*
1386 		 * The third character decides the length
1387 		 * of the name of the string or register.
1388 		 * Save a pointer to the name.
1389 		 */
1390 
1391 		if (term == '\0') {
1392 			switch (*++cp) {
1393 			case '\0':
1394 				maxl = 0;
1395 				break;
1396 			case '(':
1397 				cp++;
1398 				maxl = 2;
1399 				break;
1400 			case '[':
1401 				cp++;
1402 				term = ']';
1403 				maxl = 0;
1404 				break;
1405 			default:
1406 				maxl = 1;
1407 				break;
1408 			}
1409 		} else {
1410 			cp += 2;
1411 			maxl = 0;
1412 		}
1413 		stnam = cp;
1414 
1415 		/* Advance to the end of the name. */
1416 
1417 		naml = 0;
1418 		arg_complete = 1;
1419 		while (maxl == 0 || naml < maxl) {
1420 			if (*cp == '\0') {
1421 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
1422 				    (int)(stesc - buf->buf), "%s", stesc);
1423 				arg_complete = 0;
1424 				break;
1425 			}
1426 			if (maxl == 0 && *cp == term) {
1427 				cp++;
1428 				break;
1429 			}
1430 			if (*cp++ != '\\' || *esct != 'w') {
1431 				naml++;
1432 				continue;
1433 			}
1434 			switch (mandoc_escape(&cp, NULL, NULL)) {
1435 			case ESCAPE_SPECIAL:
1436 			case ESCAPE_UNICODE:
1437 			case ESCAPE_NUMBERED:
1438 			case ESCAPE_UNDEF:
1439 			case ESCAPE_OVERSTRIKE:
1440 				naml++;
1441 				break;
1442 			default:
1443 				break;
1444 			}
1445 		}
1446 
1447 		/*
1448 		 * Retrieve the replacement string; if it is
1449 		 * undefined, resume searching for escapes.
1450 		 */
1451 
1452 		switch (*esct) {
1453 		case '*':
1454 			if (arg_complete) {
1455 				deftype = ROFFDEF_USER | ROFFDEF_PRE;
1456 				res = roff_getstrn(r, stnam, naml, &deftype);
1457 
1458 				/*
1459 				 * If not overriden, let \*(.T
1460 				 * through to the formatters.
1461 				 */
1462 
1463 				if (res == NULL && naml == 2 &&
1464 				    stnam[0] == '.' && stnam[1] == 'T') {
1465 					roff_setstrn(&r->strtab,
1466 					    ".T", 2, NULL, 0, 0);
1467 					stesc--;
1468 					continue;
1469 				}
1470 			}
1471 			break;
1472 		case '$':
1473 			if (r->mstackpos < 0) {
1474 				mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1475 				    (int)(stesc - buf->buf), "%.3s", stesc);
1476 				break;
1477 			}
1478 			ctx = r->mstack + r->mstackpos;
1479 			npos = esct[1] - '1';
1480 			if (npos >= 0 && npos <= 8) {
1481 				res = npos < ctx->argc ?
1482 				    ctx->argv[npos] : "";
1483 				break;
1484 			}
1485 			if (esct[1] == '*')
1486 				quote_args = 0;
1487 			else if (esct[1] == '@')
1488 				quote_args = 1;
1489 			else {
1490 				mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1491 				    (int)(stesc - buf->buf), "%.3s", stesc);
1492 				break;
1493 			}
1494 			asz = 0;
1495 			for (npos = 0; npos < ctx->argc; npos++) {
1496 				if (npos)
1497 					asz++;  /* blank */
1498 				if (quote_args)
1499 					asz += 2;  /* quotes */
1500 				asz += strlen(ctx->argv[npos]);
1501 			}
1502 			if (asz != 3) {
1503 				rsz = buf->sz - (stesc - buf->buf) - 3;
1504 				if (asz < 3)
1505 					memmove(stesc + asz, stesc + 3, rsz);
1506 				buf->sz += asz - 3;
1507 				nbuf = mandoc_realloc(buf->buf, buf->sz);
1508 				start = nbuf + pos;
1509 				stesc = nbuf + (stesc - buf->buf);
1510 				buf->buf = nbuf;
1511 				if (asz > 3)
1512 					memmove(stesc + asz, stesc + 3, rsz);
1513 			}
1514 			for (npos = 0; npos < ctx->argc; npos++) {
1515 				if (npos)
1516 					*stesc++ = ' ';
1517 				if (quote_args)
1518 					*stesc++ = '"';
1519 				cp = ctx->argv[npos];
1520 				while (*cp != '\0')
1521 					*stesc++ = *cp++;
1522 				if (quote_args)
1523 					*stesc++ = '"';
1524 			}
1525 			continue;
1526 		case 'B':
1527 			npos = 0;
1528 			ubuf[0] = arg_complete &&
1529 			    roff_evalnum(r, ln, stnam, &npos,
1530 			      NULL, ROFFNUM_SCALE) &&
1531 			    stnam + npos + 1 == cp ? '1' : '0';
1532 			ubuf[1] = '\0';
1533 			break;
1534 		case 'n':
1535 			if (arg_complete)
1536 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1537 				    roff_getregn(r, stnam, naml, sign));
1538 			else
1539 				ubuf[0] = '\0';
1540 			break;
1541 		case 'w':
1542 			/* use even incomplete args */
1543 			(void)snprintf(ubuf, sizeof(ubuf), "%d",
1544 			    24 * (int)naml);
1545 			break;
1546 		}
1547 
1548 		if (res == NULL) {
1549 			if (*esct == '*')
1550 				mandoc_msg(MANDOCERR_STR_UNDEF,
1551 				    ln, (int)(stesc - buf->buf),
1552 				    "%.*s", (int)naml, stnam);
1553 			res = "";
1554 		} else if (buf->sz + strlen(res) > SHRT_MAX) {
1555 			mandoc_msg(MANDOCERR_ROFFLOOP,
1556 			    ln, (int)(stesc - buf->buf), NULL);
1557 			return ROFF_IGN;
1558 		}
1559 
1560 		/* Replace the escape sequence by the string. */
1561 
1562 		*stesc = '\0';
1563 		buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1564 		    buf->buf, res, cp) + 1;
1565 
1566 		/* Prepare for the next replacement. */
1567 
1568 		start = nbuf + pos;
1569 		stesc = nbuf + (stesc - buf->buf) + strlen(res);
1570 		free(buf->buf);
1571 		buf->buf = nbuf;
1572 	}
1573 	return ROFF_CONT;
1574 }
1575 
1576 /*
1577  * Parse a quoted or unquoted roff-style request or macro argument.
1578  * Return a pointer to the parsed argument, which is either the original
1579  * pointer or advanced by one byte in case the argument is quoted.
1580  * NUL-terminate the argument in place.
1581  * Collapse pairs of quotes inside quoted arguments.
1582  * Advance the argument pointer to the next argument,
1583  * or to the NUL byte terminating the argument line.
1584  */
1585 char *
1586 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1587 {
1588 	struct buf	 buf;
1589 	char	 	*cp, *start;
1590 	int		 newesc, pairs, quoted, white;
1591 
1592 	/* Quoting can only start with a new word. */
1593 	start = *cpp;
1594 	quoted = 0;
1595 	if ('"' == *start) {
1596 		quoted = 1;
1597 		start++;
1598 	}
1599 
1600 	newesc = pairs = white = 0;
1601 	for (cp = start; '\0' != *cp; cp++) {
1602 
1603 		/*
1604 		 * Move the following text left
1605 		 * after quoted quotes and after "\\" and "\t".
1606 		 */
1607 		if (pairs)
1608 			cp[-pairs] = cp[0];
1609 
1610 		if ('\\' == cp[0]) {
1611 			/*
1612 			 * In copy mode, translate double to single
1613 			 * backslashes and backslash-t to literal tabs.
1614 			 */
1615 			switch (cp[1]) {
1616 			case 'a':
1617 			case 't':
1618 				cp[-pairs] = '\t';
1619 				pairs++;
1620 				cp++;
1621 				break;
1622 			case '\\':
1623 				newesc = 1;
1624 				cp[-pairs] = ASCII_ESC;
1625 				pairs++;
1626 				cp++;
1627 				break;
1628 			case ' ':
1629 				/* Skip escaped blanks. */
1630 				if (0 == quoted)
1631 					cp++;
1632 				break;
1633 			default:
1634 				break;
1635 			}
1636 		} else if (0 == quoted) {
1637 			if (' ' == cp[0]) {
1638 				/* Unescaped blanks end unquoted args. */
1639 				white = 1;
1640 				break;
1641 			}
1642 		} else if ('"' == cp[0]) {
1643 			if ('"' == cp[1]) {
1644 				/* Quoted quotes collapse. */
1645 				pairs++;
1646 				cp++;
1647 			} else {
1648 				/* Unquoted quotes end quoted args. */
1649 				quoted = 2;
1650 				break;
1651 			}
1652 		}
1653 	}
1654 
1655 	/* Quoted argument without a closing quote. */
1656 	if (1 == quoted)
1657 		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1658 
1659 	/* NUL-terminate this argument and move to the next one. */
1660 	if (pairs)
1661 		cp[-pairs] = '\0';
1662 	if ('\0' != *cp) {
1663 		*cp++ = '\0';
1664 		while (' ' == *cp)
1665 			cp++;
1666 	}
1667 	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1668 	*cpp = cp;
1669 
1670 	if ('\0' == *cp && (white || ' ' == cp[-1]))
1671 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1672 
1673 	start = mandoc_strdup(start);
1674 	if (newesc == 0)
1675 		return start;
1676 
1677 	buf.buf = start;
1678 	buf.sz = strlen(start) + 1;
1679 	buf.next = NULL;
1680 	if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1681 		free(buf.buf);
1682 		buf.buf = mandoc_strdup("");
1683 	}
1684 	return buf.buf;
1685 }
1686 
1687 
1688 /*
1689  * Process text streams.
1690  */
1691 static int
1692 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1693 {
1694 	size_t		 sz;
1695 	const char	*start;
1696 	char		*p;
1697 	int		 isz;
1698 	enum mandoc_esc	 esc;
1699 
1700 	/* Spring the input line trap. */
1701 
1702 	if (roffit_lines == 1) {
1703 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1704 		free(buf->buf);
1705 		buf->buf = p;
1706 		buf->sz = isz + 1;
1707 		*offs = 0;
1708 		free(roffit_macro);
1709 		roffit_lines = 0;
1710 		return ROFF_REPARSE;
1711 	} else if (roffit_lines > 1)
1712 		--roffit_lines;
1713 
1714 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1715 		if (roffce_lines < 1) {
1716 			r->man->last = roffce_node;
1717 			r->man->next = ROFF_NEXT_SIBLING;
1718 			roffce_lines = 0;
1719 			roffce_node = NULL;
1720 		} else
1721 			roffce_lines--;
1722 	}
1723 
1724 	/* Convert all breakable hyphens into ASCII_HYPH. */
1725 
1726 	start = p = buf->buf + pos;
1727 
1728 	while (*p != '\0') {
1729 		sz = strcspn(p, "-\\");
1730 		p += sz;
1731 
1732 		if (*p == '\0')
1733 			break;
1734 
1735 		if (*p == '\\') {
1736 			/* Skip over escapes. */
1737 			p++;
1738 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1739 			if (esc == ESCAPE_ERROR)
1740 				break;
1741 			while (*p == '-')
1742 				p++;
1743 			continue;
1744 		} else if (p == start) {
1745 			p++;
1746 			continue;
1747 		}
1748 
1749 		if (isalpha((unsigned char)p[-1]) &&
1750 		    isalpha((unsigned char)p[1]))
1751 			*p = ASCII_HYPH;
1752 		p++;
1753 	}
1754 	return ROFF_CONT;
1755 }
1756 
1757 int
1758 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs)
1759 {
1760 	enum roff_tok	 t;
1761 	int		 e;
1762 	int		 pos;	/* parse point */
1763 	int		 spos;	/* saved parse point for messages */
1764 	int		 ppos;	/* original offset in buf->buf */
1765 	int		 ctl;	/* macro line (boolean) */
1766 
1767 	ppos = pos = *offs;
1768 
1769 	/* Handle in-line equation delimiters. */
1770 
1771 	if (r->tbl == NULL &&
1772 	    r->last_eqn != NULL && r->last_eqn->delim &&
1773 	    (r->eqn == NULL || r->eqn_inline)) {
1774 		e = roff_eqndelim(r, buf, pos);
1775 		if (e == ROFF_REPARSE)
1776 			return e;
1777 		assert(e == ROFF_CONT);
1778 	}
1779 
1780 	/* Expand some escape sequences. */
1781 
1782 	e = roff_expand(r, buf, ln, pos, r->escape);
1783 	if ((e & ROFF_MASK) == ROFF_IGN)
1784 		return e;
1785 	assert(e == ROFF_CONT);
1786 
1787 	ctl = roff_getcontrol(r, buf->buf, &pos);
1788 
1789 	/*
1790 	 * First, if a scope is open and we're not a macro, pass the
1791 	 * text through the macro's filter.
1792 	 * Equations process all content themselves.
1793 	 * Tables process almost all content themselves, but we want
1794 	 * to warn about macros before passing it there.
1795 	 */
1796 
1797 	if (r->last != NULL && ! ctl) {
1798 		t = r->last->tok;
1799 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1800 		if ((e & ROFF_MASK) == ROFF_IGN)
1801 			return e;
1802 		e &= ~ROFF_MASK;
1803 	} else
1804 		e = ROFF_IGN;
1805 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1806 		eqn_read(r->eqn, buf->buf + ppos);
1807 		return e;
1808 	}
1809 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1810 		tbl_read(r->tbl, ln, buf->buf, ppos);
1811 		roff_addtbl(r->man, ln, r->tbl);
1812 		return e;
1813 	}
1814 	if ( ! ctl)
1815 		return roff_parsetext(r, buf, pos, offs) | e;
1816 
1817 	/* Skip empty request lines. */
1818 
1819 	if (buf->buf[pos] == '"') {
1820 		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1821 		return ROFF_IGN;
1822 	} else if (buf->buf[pos] == '\0')
1823 		return ROFF_IGN;
1824 
1825 	/*
1826 	 * If a scope is open, go to the child handler for that macro,
1827 	 * as it may want to preprocess before doing anything with it.
1828 	 * Don't do so if an equation is open.
1829 	 */
1830 
1831 	if (r->last) {
1832 		t = r->last->tok;
1833 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1834 	}
1835 
1836 	/* No scope is open.  This is a new request or macro. */
1837 
1838 	spos = pos;
1839 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1840 
1841 	/* Tables ignore most macros. */
1842 
1843 	if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1844 	    t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1845 		mandoc_msg(MANDOCERR_TBLMACRO,
1846 		    ln, pos, "%s", buf->buf + spos);
1847 		if (t != TOKEN_NONE)
1848 			return ROFF_IGN;
1849 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1850 			pos++;
1851 		while (buf->buf[pos] == ' ')
1852 			pos++;
1853 		tbl_read(r->tbl, ln, buf->buf, pos);
1854 		roff_addtbl(r->man, ln, r->tbl);
1855 		return ROFF_IGN;
1856 	}
1857 
1858 	/* For now, let high level macros abort .ce mode. */
1859 
1860 	if (ctl && roffce_node != NULL &&
1861 	    (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1862 	     t == ROFF_TH || t == ROFF_TS)) {
1863 		r->man->last = roffce_node;
1864 		r->man->next = ROFF_NEXT_SIBLING;
1865 		roffce_lines = 0;
1866 		roffce_node = NULL;
1867 	}
1868 
1869 	/*
1870 	 * This is neither a roff request nor a user-defined macro.
1871 	 * Let the standard macro set parsers handle it.
1872 	 */
1873 
1874 	if (t == TOKEN_NONE)
1875 		return ROFF_CONT;
1876 
1877 	/* Execute a roff request or a user defined macro. */
1878 
1879 	return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1880 }
1881 
1882 /*
1883  * Internal interface function to tell the roff parser that execution
1884  * of the current macro ended.  This is required because macro
1885  * definitions usually do not end with a .return request.
1886  */
1887 void
1888 roff_userret(struct roff *r)
1889 {
1890 	struct mctx	*ctx;
1891 	int		 i;
1892 
1893 	assert(r->mstackpos >= 0);
1894 	ctx = r->mstack + r->mstackpos;
1895 	for (i = 0; i < ctx->argc; i++)
1896 		free(ctx->argv[i]);
1897 	ctx->argc = 0;
1898 	r->mstackpos--;
1899 }
1900 
1901 void
1902 roff_endparse(struct roff *r)
1903 {
1904 	if (r->last != NULL)
1905 		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1906 		    r->last->col, "%s", roff_name[r->last->tok]);
1907 
1908 	if (r->eqn != NULL) {
1909 		mandoc_msg(MANDOCERR_BLK_NOEND,
1910 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1911 		eqn_parse(r->eqn);
1912 		r->eqn = NULL;
1913 	}
1914 
1915 	if (r->tbl != NULL) {
1916 		tbl_end(r->tbl, 1);
1917 		r->tbl = NULL;
1918 	}
1919 }
1920 
1921 /*
1922  * Parse a roff node's type from the input buffer.  This must be in the
1923  * form of ".foo xxx" in the usual way.
1924  */
1925 static enum roff_tok
1926 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1927 {
1928 	char		*cp;
1929 	const char	*mac;
1930 	size_t		 maclen;
1931 	int		 deftype;
1932 	enum roff_tok	 t;
1933 
1934 	cp = buf + *pos;
1935 
1936 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1937 		return TOKEN_NONE;
1938 
1939 	mac = cp;
1940 	maclen = roff_getname(r, &cp, ln, ppos);
1941 
1942 	deftype = ROFFDEF_USER | ROFFDEF_REN;
1943 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
1944 	switch (deftype) {
1945 	case ROFFDEF_USER:
1946 		t = ROFF_USERDEF;
1947 		break;
1948 	case ROFFDEF_REN:
1949 		t = ROFF_RENAMED;
1950 		break;
1951 	default:
1952 		t = roffhash_find(r->reqtab, mac, maclen);
1953 		break;
1954 	}
1955 	if (t != TOKEN_NONE)
1956 		*pos = cp - buf;
1957 	else if (deftype == ROFFDEF_UNDEF) {
1958 		/* Using an undefined macro defines it to be empty. */
1959 		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
1960 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
1961 	}
1962 	return t;
1963 }
1964 
1965 /* --- handling of request blocks ----------------------------------------- */
1966 
1967 static int
1968 roff_cblock(ROFF_ARGS)
1969 {
1970 
1971 	/*
1972 	 * A block-close `..' should only be invoked as a child of an
1973 	 * ignore macro, otherwise raise a warning and just ignore it.
1974 	 */
1975 
1976 	if (r->last == NULL) {
1977 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1978 		return ROFF_IGN;
1979 	}
1980 
1981 	switch (r->last->tok) {
1982 	case ROFF_am:
1983 		/* ROFF_am1 is remapped to ROFF_am in roff_block(). */
1984 	case ROFF_ami:
1985 	case ROFF_de:
1986 		/* ROFF_de1 is remapped to ROFF_de in roff_block(). */
1987 	case ROFF_dei:
1988 	case ROFF_ig:
1989 		break;
1990 	default:
1991 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
1992 		return ROFF_IGN;
1993 	}
1994 
1995 	if (buf->buf[pos] != '\0')
1996 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
1997 		    ".. %s", buf->buf + pos);
1998 
1999 	roffnode_pop(r);
2000 	roffnode_cleanscope(r);
2001 	return ROFF_IGN;
2002 
2003 }
2004 
2005 static int
2006 roffnode_cleanscope(struct roff *r)
2007 {
2008 	int inloop;
2009 
2010 	inloop = 0;
2011 	while (r->last != NULL) {
2012 		if (--r->last->endspan != 0)
2013 			break;
2014 		inloop += roffnode_pop(r);
2015 	}
2016 	return inloop;
2017 }
2018 
2019 static int
2020 roff_ccond(struct roff *r, int ln, int ppos)
2021 {
2022 	if (NULL == r->last) {
2023 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2024 		return 0;
2025 	}
2026 
2027 	switch (r->last->tok) {
2028 	case ROFF_el:
2029 	case ROFF_ie:
2030 	case ROFF_if:
2031 	case ROFF_while:
2032 		break;
2033 	default:
2034 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2035 		return 0;
2036 	}
2037 
2038 	if (r->last->endspan > -1) {
2039 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2040 		return 0;
2041 	}
2042 
2043 	return roffnode_pop(r) + roffnode_cleanscope(r);
2044 }
2045 
2046 static int
2047 roff_block(ROFF_ARGS)
2048 {
2049 	const char	*name, *value;
2050 	char		*call, *cp, *iname, *rname;
2051 	size_t		 csz, namesz, rsz;
2052 	int		 deftype;
2053 
2054 	/* Ignore groff compatibility mode for now. */
2055 
2056 	if (tok == ROFF_de1)
2057 		tok = ROFF_de;
2058 	else if (tok == ROFF_dei1)
2059 		tok = ROFF_dei;
2060 	else if (tok == ROFF_am1)
2061 		tok = ROFF_am;
2062 	else if (tok == ROFF_ami1)
2063 		tok = ROFF_ami;
2064 
2065 	/* Parse the macro name argument. */
2066 
2067 	cp = buf->buf + pos;
2068 	if (tok == ROFF_ig) {
2069 		iname = NULL;
2070 		namesz = 0;
2071 	} else {
2072 		iname = cp;
2073 		namesz = roff_getname(r, &cp, ln, ppos);
2074 		iname[namesz] = '\0';
2075 	}
2076 
2077 	/* Resolve the macro name argument if it is indirect. */
2078 
2079 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2080 		deftype = ROFFDEF_USER;
2081 		name = roff_getstrn(r, iname, namesz, &deftype);
2082 		if (name == NULL) {
2083 			mandoc_msg(MANDOCERR_STR_UNDEF,
2084 			    ln, (int)(iname - buf->buf),
2085 			    "%.*s", (int)namesz, iname);
2086 			namesz = 0;
2087 		} else
2088 			namesz = strlen(name);
2089 	} else
2090 		name = iname;
2091 
2092 	if (namesz == 0 && tok != ROFF_ig) {
2093 		mandoc_msg(MANDOCERR_REQ_EMPTY,
2094 		    ln, ppos, "%s", roff_name[tok]);
2095 		return ROFF_IGN;
2096 	}
2097 
2098 	roffnode_push(r, tok, name, ln, ppos);
2099 
2100 	/*
2101 	 * At the beginning of a `de' macro, clear the existing string
2102 	 * with the same name, if there is one.  New content will be
2103 	 * appended from roff_block_text() in multiline mode.
2104 	 */
2105 
2106 	if (tok == ROFF_de || tok == ROFF_dei) {
2107 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2108 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2109 	} else if (tok == ROFF_am || tok == ROFF_ami) {
2110 		deftype = ROFFDEF_ANY;
2111 		value = roff_getstrn(r, iname, namesz, &deftype);
2112 		switch (deftype) {  /* Before appending, ... */
2113 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2114 			roff_setstrn(&r->strtab, name, namesz,
2115 			    value, strlen(value), 0);
2116 			break;
2117 		case ROFFDEF_REN: /* call original standard macro. */
2118 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2119 			    (int)strlen(value), value);
2120 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2121 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2122 			free(call);
2123 			break;
2124 		case ROFFDEF_STD:  /* rename and call standard macro. */
2125 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2126 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2127 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2128 			    (int)rsz, rname);
2129 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2130 			free(call);
2131 			free(rname);
2132 			break;
2133 		default:
2134 			break;
2135 		}
2136 	}
2137 
2138 	if (*cp == '\0')
2139 		return ROFF_IGN;
2140 
2141 	/* Get the custom end marker. */
2142 
2143 	iname = cp;
2144 	namesz = roff_getname(r, &cp, ln, ppos);
2145 
2146 	/* Resolve the end marker if it is indirect. */
2147 
2148 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2149 		deftype = ROFFDEF_USER;
2150 		name = roff_getstrn(r, iname, namesz, &deftype);
2151 		if (name == NULL) {
2152 			mandoc_msg(MANDOCERR_STR_UNDEF,
2153 			    ln, (int)(iname - buf->buf),
2154 			    "%.*s", (int)namesz, iname);
2155 			namesz = 0;
2156 		} else
2157 			namesz = strlen(name);
2158 	} else
2159 		name = iname;
2160 
2161 	if (namesz)
2162 		r->last->end = mandoc_strndup(name, namesz);
2163 
2164 	if (*cp != '\0')
2165 		mandoc_msg(MANDOCERR_ARG_EXCESS,
2166 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2167 
2168 	return ROFF_IGN;
2169 }
2170 
2171 static int
2172 roff_block_sub(ROFF_ARGS)
2173 {
2174 	enum roff_tok	t;
2175 	int		i, j;
2176 
2177 	/*
2178 	 * First check whether a custom macro exists at this level.  If
2179 	 * it does, then check against it.  This is some of groff's
2180 	 * stranger behaviours.  If we encountered a custom end-scope
2181 	 * tag and that tag also happens to be a "real" macro, then we
2182 	 * need to try interpreting it again as a real macro.  If it's
2183 	 * not, then return ignore.  Else continue.
2184 	 */
2185 
2186 	if (r->last->end) {
2187 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2188 			if (buf->buf[i] != r->last->end[j])
2189 				break;
2190 
2191 		if (r->last->end[j] == '\0' &&
2192 		    (buf->buf[i] == '\0' ||
2193 		     buf->buf[i] == ' ' ||
2194 		     buf->buf[i] == '\t')) {
2195 			roffnode_pop(r);
2196 			roffnode_cleanscope(r);
2197 
2198 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2199 				i++;
2200 
2201 			pos = i;
2202 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2203 			    TOKEN_NONE)
2204 				return ROFF_RERUN;
2205 			return ROFF_IGN;
2206 		}
2207 	}
2208 
2209 	/*
2210 	 * If we have no custom end-query or lookup failed, then try
2211 	 * pulling it out of the hashtable.
2212 	 */
2213 
2214 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2215 
2216 	if (t != ROFF_cblock) {
2217 		if (tok != ROFF_ig)
2218 			roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2219 		return ROFF_IGN;
2220 	}
2221 
2222 	return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2223 }
2224 
2225 static int
2226 roff_block_text(ROFF_ARGS)
2227 {
2228 
2229 	if (tok != ROFF_ig)
2230 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2231 
2232 	return ROFF_IGN;
2233 }
2234 
2235 static int
2236 roff_cond_sub(ROFF_ARGS)
2237 {
2238 	char		*ep;
2239 	int		 endloop, irc, rr;
2240 	enum roff_tok	 t;
2241 
2242 	irc = ROFF_IGN;
2243 	rr = r->last->rule;
2244 	endloop = tok != ROFF_while ? ROFF_IGN :
2245 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2246 	if (roffnode_cleanscope(r))
2247 		irc |= endloop;
2248 
2249 	/*
2250 	 * If `\}' occurs on a macro line without a preceding macro,
2251 	 * drop the line completely.
2252 	 */
2253 
2254 	ep = buf->buf + pos;
2255 	if (ep[0] == '\\' && ep[1] == '}')
2256 		rr = 0;
2257 
2258 	/*
2259 	 * The closing delimiter `\}' rewinds the conditional scope
2260 	 * but is otherwise ignored when interpreting the line.
2261 	 */
2262 
2263 	while ((ep = strchr(ep, '\\')) != NULL) {
2264 		switch (ep[1]) {
2265 		case '}':
2266 			memmove(ep, ep + 2, strlen(ep + 2) + 1);
2267 			if (roff_ccond(r, ln, ep - buf->buf))
2268 				irc |= endloop;
2269 			break;
2270 		case '\0':
2271 			++ep;
2272 			break;
2273 		default:
2274 			ep += 2;
2275 			break;
2276 		}
2277 	}
2278 
2279 	/*
2280 	 * Fully handle known macros when they are structurally
2281 	 * required or when the conditional evaluated to true.
2282 	 */
2283 
2284 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2285 	irc |= t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) ?
2286 	    (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) :
2287 	    rr ? ROFF_CONT : ROFF_IGN;
2288 	return irc;
2289 }
2290 
2291 static int
2292 roff_cond_text(ROFF_ARGS)
2293 {
2294 	char		*ep;
2295 	int		 endloop, irc, rr;
2296 
2297 	irc = ROFF_IGN;
2298 	rr = r->last->rule;
2299 	endloop = tok != ROFF_while ? ROFF_IGN :
2300 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2301 	if (roffnode_cleanscope(r))
2302 		irc |= endloop;
2303 
2304 	/*
2305 	 * If `\}' occurs on a text line with neither preceding
2306 	 * nor following characters, drop the line completely.
2307 	 */
2308 
2309 	ep = buf->buf + pos;
2310 	if (strcmp(ep, "\\}") == 0)
2311 		rr = 0;
2312 
2313 	/*
2314 	 * The closing delimiter `\}' rewinds the conditional scope
2315 	 * but is otherwise ignored when interpreting the line.
2316 	 */
2317 
2318 	while ((ep = strchr(ep, '\\')) != NULL) {
2319 		switch (ep[1]) {
2320 		case '}':
2321 			memmove(ep, ep + 2, strlen(ep + 2) + 1);
2322 			if (roff_ccond(r, ln, ep - buf->buf))
2323 				irc |= endloop;
2324 			break;
2325 		case '\0':
2326 			++ep;
2327 			break;
2328 		default:
2329 			ep += 2;
2330 			break;
2331 		}
2332 	}
2333 	if (rr)
2334 		irc |= ROFF_CONT;
2335 	return irc;
2336 }
2337 
2338 /* --- handling of numeric and conditional expressions -------------------- */
2339 
2340 /*
2341  * Parse a single signed integer number.  Stop at the first non-digit.
2342  * If there is at least one digit, return success and advance the
2343  * parse point, else return failure and let the parse point unchanged.
2344  * Ignore overflows, treat them just like the C language.
2345  */
2346 static int
2347 roff_getnum(const char *v, int *pos, int *res, int flags)
2348 {
2349 	int	 myres, scaled, n, p;
2350 
2351 	if (NULL == res)
2352 		res = &myres;
2353 
2354 	p = *pos;
2355 	n = v[p] == '-';
2356 	if (n || v[p] == '+')
2357 		p++;
2358 
2359 	if (flags & ROFFNUM_WHITE)
2360 		while (isspace((unsigned char)v[p]))
2361 			p++;
2362 
2363 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2364 		*res = 10 * *res + v[p] - '0';
2365 	if (p == *pos + n)
2366 		return 0;
2367 
2368 	if (n)
2369 		*res = -*res;
2370 
2371 	/* Each number may be followed by one optional scaling unit. */
2372 
2373 	switch (v[p]) {
2374 	case 'f':
2375 		scaled = *res * 65536;
2376 		break;
2377 	case 'i':
2378 		scaled = *res * 240;
2379 		break;
2380 	case 'c':
2381 		scaled = *res * 240 / 2.54;
2382 		break;
2383 	case 'v':
2384 	case 'P':
2385 		scaled = *res * 40;
2386 		break;
2387 	case 'm':
2388 	case 'n':
2389 		scaled = *res * 24;
2390 		break;
2391 	case 'p':
2392 		scaled = *res * 10 / 3;
2393 		break;
2394 	case 'u':
2395 		scaled = *res;
2396 		break;
2397 	case 'M':
2398 		scaled = *res * 6 / 25;
2399 		break;
2400 	default:
2401 		scaled = *res;
2402 		p--;
2403 		break;
2404 	}
2405 	if (flags & ROFFNUM_SCALE)
2406 		*res = scaled;
2407 
2408 	*pos = p + 1;
2409 	return 1;
2410 }
2411 
2412 /*
2413  * Evaluate a string comparison condition.
2414  * The first character is the delimiter.
2415  * Succeed if the string up to its second occurrence
2416  * matches the string up to its third occurence.
2417  * Advance the cursor after the third occurrence
2418  * or lacking that, to the end of the line.
2419  */
2420 static int
2421 roff_evalstrcond(const char *v, int *pos)
2422 {
2423 	const char	*s1, *s2, *s3;
2424 	int		 match;
2425 
2426 	match = 0;
2427 	s1 = v + *pos;		/* initial delimiter */
2428 	s2 = s1 + 1;		/* for scanning the first string */
2429 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2430 
2431 	if (NULL == s3)		/* found no middle delimiter */
2432 		goto out;
2433 
2434 	while ('\0' != *++s3) {
2435 		if (*s2 != *s3) {  /* mismatch */
2436 			s3 = strchr(s3, *s1);
2437 			break;
2438 		}
2439 		if (*s3 == *s1) {  /* found the final delimiter */
2440 			match = 1;
2441 			break;
2442 		}
2443 		s2++;
2444 	}
2445 
2446 out:
2447 	if (NULL == s3)
2448 		s3 = strchr(s2, '\0');
2449 	else if (*s3 != '\0')
2450 		s3++;
2451 	*pos = s3 - v;
2452 	return match;
2453 }
2454 
2455 /*
2456  * Evaluate an optionally negated single character, numerical,
2457  * or string condition.
2458  */
2459 static int
2460 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2461 {
2462 	const char	*start, *end;
2463 	char		*cp, *name;
2464 	size_t		 sz;
2465 	int		 deftype, len, number, savepos, istrue, wanttrue;
2466 
2467 	if ('!' == v[*pos]) {
2468 		wanttrue = 0;
2469 		(*pos)++;
2470 	} else
2471 		wanttrue = 1;
2472 
2473 	switch (v[*pos]) {
2474 	case '\0':
2475 		return 0;
2476 	case 'n':
2477 	case 'o':
2478 		(*pos)++;
2479 		return wanttrue;
2480 	case 'e':
2481 	case 't':
2482 	case 'v':
2483 		(*pos)++;
2484 		return !wanttrue;
2485 	case 'c':
2486 		do {
2487 			(*pos)++;
2488 		} while (v[*pos] == ' ');
2489 
2490 		/*
2491 		 * Quirk for groff compatibility:
2492 		 * The horizontal tab is neither available nor unavailable.
2493 		 */
2494 
2495 		if (v[*pos] == '\t') {
2496 			(*pos)++;
2497 			return 0;
2498 		}
2499 
2500 		/* Printable ASCII characters are available. */
2501 
2502 		if (v[*pos] != '\\') {
2503 			(*pos)++;
2504 			return wanttrue;
2505 		}
2506 
2507 		end = v + ++*pos;
2508 		switch (mandoc_escape(&end, &start, &len)) {
2509 		case ESCAPE_SPECIAL:
2510 			istrue = mchars_spec2cp(start, len) != -1;
2511 			break;
2512 		case ESCAPE_UNICODE:
2513 			istrue = 1;
2514 			break;
2515 		case ESCAPE_NUMBERED:
2516 			istrue = mchars_num2char(start, len) != -1;
2517 			break;
2518 		default:
2519 			istrue = !wanttrue;
2520 			break;
2521 		}
2522 		*pos = end - v;
2523 		return istrue == wanttrue;
2524 	case 'd':
2525 	case 'r':
2526 		cp = v + *pos + 1;
2527 		while (*cp == ' ')
2528 			cp++;
2529 		name = cp;
2530 		sz = roff_getname(r, &cp, ln, cp - v);
2531 		if (sz == 0)
2532 			istrue = 0;
2533 		else if (v[*pos] == 'r')
2534 			istrue = roff_hasregn(r, name, sz);
2535 		else {
2536 			deftype = ROFFDEF_ANY;
2537 		        roff_getstrn(r, name, sz, &deftype);
2538 			istrue = !!deftype;
2539 		}
2540 		*pos = (name + sz) - v;
2541 		return istrue == wanttrue;
2542 	default:
2543 		break;
2544 	}
2545 
2546 	savepos = *pos;
2547 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2548 		return (number > 0) == wanttrue;
2549 	else if (*pos == savepos)
2550 		return roff_evalstrcond(v, pos) == wanttrue;
2551 	else
2552 		return 0;
2553 }
2554 
2555 static int
2556 roff_line_ignore(ROFF_ARGS)
2557 {
2558 
2559 	return ROFF_IGN;
2560 }
2561 
2562 static int
2563 roff_insec(ROFF_ARGS)
2564 {
2565 
2566 	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2567 	return ROFF_IGN;
2568 }
2569 
2570 static int
2571 roff_unsupp(ROFF_ARGS)
2572 {
2573 
2574 	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2575 	return ROFF_IGN;
2576 }
2577 
2578 static int
2579 roff_cond(ROFF_ARGS)
2580 {
2581 	int	 irc;
2582 
2583 	roffnode_push(r, tok, NULL, ln, ppos);
2584 
2585 	/*
2586 	 * An `.el' has no conditional body: it will consume the value
2587 	 * of the current rstack entry set in prior `ie' calls or
2588 	 * defaults to DENY.
2589 	 *
2590 	 * If we're not an `el', however, then evaluate the conditional.
2591 	 */
2592 
2593 	r->last->rule = tok == ROFF_el ?
2594 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2595 	    roff_evalcond(r, ln, buf->buf, &pos);
2596 
2597 	/*
2598 	 * An if-else will put the NEGATION of the current evaluated
2599 	 * conditional into the stack of rules.
2600 	 */
2601 
2602 	if (tok == ROFF_ie) {
2603 		if (r->rstackpos + 1 == r->rstacksz) {
2604 			r->rstacksz += 16;
2605 			r->rstack = mandoc_reallocarray(r->rstack,
2606 			    r->rstacksz, sizeof(int));
2607 		}
2608 		r->rstack[++r->rstackpos] = !r->last->rule;
2609 	}
2610 
2611 	/* If the parent has false as its rule, then so do we. */
2612 
2613 	if (r->last->parent && !r->last->parent->rule)
2614 		r->last->rule = 0;
2615 
2616 	/*
2617 	 * Determine scope.
2618 	 * If there is nothing on the line after the conditional,
2619 	 * not even whitespace, use next-line scope.
2620 	 * Except that .while does not support next-line scope.
2621 	 */
2622 
2623 	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2624 		r->last->endspan = 2;
2625 		goto out;
2626 	}
2627 
2628 	while (buf->buf[pos] == ' ')
2629 		pos++;
2630 
2631 	/* An opening brace requests multiline scope. */
2632 
2633 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2634 		r->last->endspan = -1;
2635 		pos += 2;
2636 		while (buf->buf[pos] == ' ')
2637 			pos++;
2638 		goto out;
2639 	}
2640 
2641 	/*
2642 	 * Anything else following the conditional causes
2643 	 * single-line scope.  Warn if the scope contains
2644 	 * nothing but trailing whitespace.
2645 	 */
2646 
2647 	if (buf->buf[pos] == '\0')
2648 		mandoc_msg(MANDOCERR_COND_EMPTY,
2649 		    ln, ppos, "%s", roff_name[tok]);
2650 
2651 	r->last->endspan = 1;
2652 
2653 out:
2654 	*offs = pos;
2655 	irc = ROFF_RERUN;
2656 	if (tok == ROFF_while)
2657 		irc |= ROFF_WHILE;
2658 	return irc;
2659 }
2660 
2661 static int
2662 roff_ds(ROFF_ARGS)
2663 {
2664 	char		*string;
2665 	const char	*name;
2666 	size_t		 namesz;
2667 
2668 	/* Ignore groff compatibility mode for now. */
2669 
2670 	if (tok == ROFF_ds1)
2671 		tok = ROFF_ds;
2672 	else if (tok == ROFF_as1)
2673 		tok = ROFF_as;
2674 
2675 	/*
2676 	 * The first word is the name of the string.
2677 	 * If it is empty or terminated by an escape sequence,
2678 	 * abort the `ds' request without defining anything.
2679 	 */
2680 
2681 	name = string = buf->buf + pos;
2682 	if (*name == '\0')
2683 		return ROFF_IGN;
2684 
2685 	namesz = roff_getname(r, &string, ln, pos);
2686 	switch (name[namesz]) {
2687 	case '\\':
2688 		return ROFF_IGN;
2689 	case '\t':
2690 		string = buf->buf + pos + namesz;
2691 		break;
2692 	default:
2693 		break;
2694 	}
2695 
2696 	/* Read past the initial double-quote, if any. */
2697 	if (*string == '"')
2698 		string++;
2699 
2700 	/* The rest is the value. */
2701 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2702 	    ROFF_as == tok);
2703 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2704 	return ROFF_IGN;
2705 }
2706 
2707 /*
2708  * Parse a single operator, one or two characters long.
2709  * If the operator is recognized, return success and advance the
2710  * parse point, else return failure and let the parse point unchanged.
2711  */
2712 static int
2713 roff_getop(const char *v, int *pos, char *res)
2714 {
2715 
2716 	*res = v[*pos];
2717 
2718 	switch (*res) {
2719 	case '+':
2720 	case '-':
2721 	case '*':
2722 	case '/':
2723 	case '%':
2724 	case '&':
2725 	case ':':
2726 		break;
2727 	case '<':
2728 		switch (v[*pos + 1]) {
2729 		case '=':
2730 			*res = 'l';
2731 			(*pos)++;
2732 			break;
2733 		case '>':
2734 			*res = '!';
2735 			(*pos)++;
2736 			break;
2737 		case '?':
2738 			*res = 'i';
2739 			(*pos)++;
2740 			break;
2741 		default:
2742 			break;
2743 		}
2744 		break;
2745 	case '>':
2746 		switch (v[*pos + 1]) {
2747 		case '=':
2748 			*res = 'g';
2749 			(*pos)++;
2750 			break;
2751 		case '?':
2752 			*res = 'a';
2753 			(*pos)++;
2754 			break;
2755 		default:
2756 			break;
2757 		}
2758 		break;
2759 	case '=':
2760 		if ('=' == v[*pos + 1])
2761 			(*pos)++;
2762 		break;
2763 	default:
2764 		return 0;
2765 	}
2766 	(*pos)++;
2767 
2768 	return *res;
2769 }
2770 
2771 /*
2772  * Evaluate either a parenthesized numeric expression
2773  * or a single signed integer number.
2774  */
2775 static int
2776 roff_evalpar(struct roff *r, int ln,
2777 	const char *v, int *pos, int *res, int flags)
2778 {
2779 
2780 	if ('(' != v[*pos])
2781 		return roff_getnum(v, pos, res, flags);
2782 
2783 	(*pos)++;
2784 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2785 		return 0;
2786 
2787 	/*
2788 	 * Omission of the closing parenthesis
2789 	 * is an error in validation mode,
2790 	 * but ignored in evaluation mode.
2791 	 */
2792 
2793 	if (')' == v[*pos])
2794 		(*pos)++;
2795 	else if (NULL == res)
2796 		return 0;
2797 
2798 	return 1;
2799 }
2800 
2801 /*
2802  * Evaluate a complete numeric expression.
2803  * Proceed left to right, there is no concept of precedence.
2804  */
2805 static int
2806 roff_evalnum(struct roff *r, int ln, const char *v,
2807 	int *pos, int *res, int flags)
2808 {
2809 	int		 mypos, operand2;
2810 	char		 operator;
2811 
2812 	if (NULL == pos) {
2813 		mypos = 0;
2814 		pos = &mypos;
2815 	}
2816 
2817 	if (flags & ROFFNUM_WHITE)
2818 		while (isspace((unsigned char)v[*pos]))
2819 			(*pos)++;
2820 
2821 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2822 		return 0;
2823 
2824 	while (1) {
2825 		if (flags & ROFFNUM_WHITE)
2826 			while (isspace((unsigned char)v[*pos]))
2827 				(*pos)++;
2828 
2829 		if ( ! roff_getop(v, pos, &operator))
2830 			break;
2831 
2832 		if (flags & ROFFNUM_WHITE)
2833 			while (isspace((unsigned char)v[*pos]))
2834 				(*pos)++;
2835 
2836 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2837 			return 0;
2838 
2839 		if (flags & ROFFNUM_WHITE)
2840 			while (isspace((unsigned char)v[*pos]))
2841 				(*pos)++;
2842 
2843 		if (NULL == res)
2844 			continue;
2845 
2846 		switch (operator) {
2847 		case '+':
2848 			*res += operand2;
2849 			break;
2850 		case '-':
2851 			*res -= operand2;
2852 			break;
2853 		case '*':
2854 			*res *= operand2;
2855 			break;
2856 		case '/':
2857 			if (operand2 == 0) {
2858 				mandoc_msg(MANDOCERR_DIVZERO,
2859 					ln, *pos, "%s", v);
2860 				*res = 0;
2861 				break;
2862 			}
2863 			*res /= operand2;
2864 			break;
2865 		case '%':
2866 			if (operand2 == 0) {
2867 				mandoc_msg(MANDOCERR_DIVZERO,
2868 					ln, *pos, "%s", v);
2869 				*res = 0;
2870 				break;
2871 			}
2872 			*res %= operand2;
2873 			break;
2874 		case '<':
2875 			*res = *res < operand2;
2876 			break;
2877 		case '>':
2878 			*res = *res > operand2;
2879 			break;
2880 		case 'l':
2881 			*res = *res <= operand2;
2882 			break;
2883 		case 'g':
2884 			*res = *res >= operand2;
2885 			break;
2886 		case '=':
2887 			*res = *res == operand2;
2888 			break;
2889 		case '!':
2890 			*res = *res != operand2;
2891 			break;
2892 		case '&':
2893 			*res = *res && operand2;
2894 			break;
2895 		case ':':
2896 			*res = *res || operand2;
2897 			break;
2898 		case 'i':
2899 			if (operand2 < *res)
2900 				*res = operand2;
2901 			break;
2902 		case 'a':
2903 			if (operand2 > *res)
2904 				*res = operand2;
2905 			break;
2906 		default:
2907 			abort();
2908 		}
2909 	}
2910 	return 1;
2911 }
2912 
2913 /* --- register management ------------------------------------------------ */
2914 
2915 void
2916 roff_setreg(struct roff *r, const char *name, int val, char sign)
2917 {
2918 	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
2919 }
2920 
2921 static void
2922 roff_setregn(struct roff *r, const char *name, size_t len,
2923     int val, char sign, int step)
2924 {
2925 	struct roffreg	*reg;
2926 
2927 	/* Search for an existing register with the same name. */
2928 	reg = r->regtab;
2929 
2930 	while (reg != NULL && (reg->key.sz != len ||
2931 	    strncmp(reg->key.p, name, len) != 0))
2932 		reg = reg->next;
2933 
2934 	if (NULL == reg) {
2935 		/* Create a new register. */
2936 		reg = mandoc_malloc(sizeof(struct roffreg));
2937 		reg->key.p = mandoc_strndup(name, len);
2938 		reg->key.sz = len;
2939 		reg->val = 0;
2940 		reg->step = 0;
2941 		reg->next = r->regtab;
2942 		r->regtab = reg;
2943 	}
2944 
2945 	if ('+' == sign)
2946 		reg->val += val;
2947 	else if ('-' == sign)
2948 		reg->val -= val;
2949 	else
2950 		reg->val = val;
2951 	if (step != INT_MIN)
2952 		reg->step = step;
2953 }
2954 
2955 /*
2956  * Handle some predefined read-only number registers.
2957  * For now, return -1 if the requested register is not predefined;
2958  * in case a predefined read-only register having the value -1
2959  * were to turn up, another special value would have to be chosen.
2960  */
2961 static int
2962 roff_getregro(const struct roff *r, const char *name)
2963 {
2964 
2965 	switch (*name) {
2966 	case '$':  /* Number of arguments of the last macro evaluated. */
2967 		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
2968 	case 'A':  /* ASCII approximation mode is always off. */
2969 		return 0;
2970 	case 'g':  /* Groff compatibility mode is always on. */
2971 		return 1;
2972 	case 'H':  /* Fixed horizontal resolution. */
2973 		return 24;
2974 	case 'j':  /* Always adjust left margin only. */
2975 		return 0;
2976 	case 'T':  /* Some output device is always defined. */
2977 		return 1;
2978 	case 'V':  /* Fixed vertical resolution. */
2979 		return 40;
2980 	default:
2981 		return -1;
2982 	}
2983 }
2984 
2985 int
2986 roff_getreg(struct roff *r, const char *name)
2987 {
2988 	return roff_getregn(r, name, strlen(name), '\0');
2989 }
2990 
2991 static int
2992 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
2993 {
2994 	struct roffreg	*reg;
2995 	int		 val;
2996 
2997 	if ('.' == name[0] && 2 == len) {
2998 		val = roff_getregro(r, name + 1);
2999 		if (-1 != val)
3000 			return val;
3001 	}
3002 
3003 	for (reg = r->regtab; reg; reg = reg->next) {
3004 		if (len == reg->key.sz &&
3005 		    0 == strncmp(name, reg->key.p, len)) {
3006 			switch (sign) {
3007 			case '+':
3008 				reg->val += reg->step;
3009 				break;
3010 			case '-':
3011 				reg->val -= reg->step;
3012 				break;
3013 			default:
3014 				break;
3015 			}
3016 			return reg->val;
3017 		}
3018 	}
3019 
3020 	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3021 	return 0;
3022 }
3023 
3024 static int
3025 roff_hasregn(const struct roff *r, const char *name, size_t len)
3026 {
3027 	struct roffreg	*reg;
3028 	int		 val;
3029 
3030 	if ('.' == name[0] && 2 == len) {
3031 		val = roff_getregro(r, name + 1);
3032 		if (-1 != val)
3033 			return 1;
3034 	}
3035 
3036 	for (reg = r->regtab; reg; reg = reg->next)
3037 		if (len == reg->key.sz &&
3038 		    0 == strncmp(name, reg->key.p, len))
3039 			return 1;
3040 
3041 	return 0;
3042 }
3043 
3044 static void
3045 roff_freereg(struct roffreg *reg)
3046 {
3047 	struct roffreg	*old_reg;
3048 
3049 	while (NULL != reg) {
3050 		free(reg->key.p);
3051 		old_reg = reg;
3052 		reg = reg->next;
3053 		free(old_reg);
3054 	}
3055 }
3056 
3057 static int
3058 roff_nr(ROFF_ARGS)
3059 {
3060 	char		*key, *val, *step;
3061 	size_t		 keysz;
3062 	int		 iv, is, len;
3063 	char		 sign;
3064 
3065 	key = val = buf->buf + pos;
3066 	if (*key == '\0')
3067 		return ROFF_IGN;
3068 
3069 	keysz = roff_getname(r, &val, ln, pos);
3070 	if (key[keysz] == '\\' || key[keysz] == '\t')
3071 		return ROFF_IGN;
3072 
3073 	sign = *val;
3074 	if (sign == '+' || sign == '-')
3075 		val++;
3076 
3077 	len = 0;
3078 	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3079 		return ROFF_IGN;
3080 
3081 	step = val + len;
3082 	while (isspace((unsigned char)*step))
3083 		step++;
3084 	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3085 		is = INT_MIN;
3086 
3087 	roff_setregn(r, key, keysz, iv, sign, is);
3088 	return ROFF_IGN;
3089 }
3090 
3091 static int
3092 roff_rr(ROFF_ARGS)
3093 {
3094 	struct roffreg	*reg, **prev;
3095 	char		*name, *cp;
3096 	size_t		 namesz;
3097 
3098 	name = cp = buf->buf + pos;
3099 	if (*name == '\0')
3100 		return ROFF_IGN;
3101 	namesz = roff_getname(r, &cp, ln, pos);
3102 	name[namesz] = '\0';
3103 
3104 	prev = &r->regtab;
3105 	while (1) {
3106 		reg = *prev;
3107 		if (reg == NULL || !strcmp(name, reg->key.p))
3108 			break;
3109 		prev = &reg->next;
3110 	}
3111 	if (reg != NULL) {
3112 		*prev = reg->next;
3113 		free(reg->key.p);
3114 		free(reg);
3115 	}
3116 	return ROFF_IGN;
3117 }
3118 
3119 /* --- handler functions for roff requests -------------------------------- */
3120 
3121 static int
3122 roff_rm(ROFF_ARGS)
3123 {
3124 	const char	 *name;
3125 	char		 *cp;
3126 	size_t		  namesz;
3127 
3128 	cp = buf->buf + pos;
3129 	while (*cp != '\0') {
3130 		name = cp;
3131 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3132 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3133 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3134 		if (name[namesz] == '\\' || name[namesz] == '\t')
3135 			break;
3136 	}
3137 	return ROFF_IGN;
3138 }
3139 
3140 static int
3141 roff_it(ROFF_ARGS)
3142 {
3143 	int		 iv;
3144 
3145 	/* Parse the number of lines. */
3146 
3147 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3148 		mandoc_msg(MANDOCERR_IT_NONUM,
3149 		    ln, ppos, "%s", buf->buf + 1);
3150 		return ROFF_IGN;
3151 	}
3152 
3153 	while (isspace((unsigned char)buf->buf[pos]))
3154 		pos++;
3155 
3156 	/*
3157 	 * Arm the input line trap.
3158 	 * Special-casing "an-trap" is an ugly workaround to cope
3159 	 * with DocBook stupidly fiddling with man(7) internals.
3160 	 */
3161 
3162 	roffit_lines = iv;
3163 	roffit_macro = mandoc_strdup(iv != 1 ||
3164 	    strcmp(buf->buf + pos, "an-trap") ?
3165 	    buf->buf + pos : "br");
3166 	return ROFF_IGN;
3167 }
3168 
3169 static int
3170 roff_Dd(ROFF_ARGS)
3171 {
3172 	int		 mask;
3173 	enum roff_tok	 t, te;
3174 
3175 	switch (tok) {
3176 	case ROFF_Dd:
3177 		tok = MDOC_Dd;
3178 		te = MDOC_MAX;
3179 		if (r->format == 0)
3180 			r->format = MPARSE_MDOC;
3181 		mask = MPARSE_MDOC | MPARSE_QUICK;
3182 		break;
3183 	case ROFF_TH:
3184 		tok = MAN_TH;
3185 		te = MAN_MAX;
3186 		if (r->format == 0)
3187 			r->format = MPARSE_MAN;
3188 		mask = MPARSE_QUICK;
3189 		break;
3190 	default:
3191 		abort();
3192 	}
3193 	if ((r->options & mask) == 0)
3194 		for (t = tok; t < te; t++)
3195 			roff_setstr(r, roff_name[t], NULL, 0);
3196 	return ROFF_CONT;
3197 }
3198 
3199 static int
3200 roff_TE(ROFF_ARGS)
3201 {
3202 	r->man->flags &= ~ROFF_NONOFILL;
3203 	if (r->tbl == NULL) {
3204 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3205 		return ROFF_IGN;
3206 	}
3207 	if (tbl_end(r->tbl, 0) == 0) {
3208 		r->tbl = NULL;
3209 		free(buf->buf);
3210 		buf->buf = mandoc_strdup(".sp");
3211 		buf->sz = 4;
3212 		*offs = 0;
3213 		return ROFF_REPARSE;
3214 	}
3215 	r->tbl = NULL;
3216 	return ROFF_IGN;
3217 }
3218 
3219 static int
3220 roff_T_(ROFF_ARGS)
3221 {
3222 
3223 	if (NULL == r->tbl)
3224 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3225 	else
3226 		tbl_restart(ln, ppos, r->tbl);
3227 
3228 	return ROFF_IGN;
3229 }
3230 
3231 /*
3232  * Handle in-line equation delimiters.
3233  */
3234 static int
3235 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3236 {
3237 	char		*cp1, *cp2;
3238 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3239 
3240 	/*
3241 	 * Outside equations, look for an opening delimiter.
3242 	 * If we are inside an equation, we already know it is
3243 	 * in-line, or this function wouldn't have been called;
3244 	 * so look for a closing delimiter.
3245 	 */
3246 
3247 	cp1 = buf->buf + pos;
3248 	cp2 = strchr(cp1, r->eqn == NULL ?
3249 	    r->last_eqn->odelim : r->last_eqn->cdelim);
3250 	if (cp2 == NULL)
3251 		return ROFF_CONT;
3252 
3253 	*cp2++ = '\0';
3254 	bef_pr = bef_nl = aft_nl = aft_pr = "";
3255 
3256 	/* Handle preceding text, protecting whitespace. */
3257 
3258 	if (*buf->buf != '\0') {
3259 		if (r->eqn == NULL)
3260 			bef_pr = "\\&";
3261 		bef_nl = "\n";
3262 	}
3263 
3264 	/*
3265 	 * Prepare replacing the delimiter with an equation macro
3266 	 * and drop leading white space from the equation.
3267 	 */
3268 
3269 	if (r->eqn == NULL) {
3270 		while (*cp2 == ' ')
3271 			cp2++;
3272 		mac = ".EQ";
3273 	} else
3274 		mac = ".EN";
3275 
3276 	/* Handle following text, protecting whitespace. */
3277 
3278 	if (*cp2 != '\0') {
3279 		aft_nl = "\n";
3280 		if (r->eqn != NULL)
3281 			aft_pr = "\\&";
3282 	}
3283 
3284 	/* Do the actual replacement. */
3285 
3286 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3287 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3288 	free(buf->buf);
3289 	buf->buf = cp1;
3290 
3291 	/* Toggle the in-line state of the eqn subsystem. */
3292 
3293 	r->eqn_inline = r->eqn == NULL;
3294 	return ROFF_REPARSE;
3295 }
3296 
3297 static int
3298 roff_EQ(ROFF_ARGS)
3299 {
3300 	struct roff_node	*n;
3301 
3302 	if (r->man->meta.macroset == MACROSET_MAN)
3303 		man_breakscope(r->man, ROFF_EQ);
3304 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3305 	if (ln > r->man->last->line)
3306 		n->flags |= NODE_LINE;
3307 	n->eqn = eqn_box_new();
3308 	roff_node_append(r->man, n);
3309 	r->man->next = ROFF_NEXT_SIBLING;
3310 
3311 	assert(r->eqn == NULL);
3312 	if (r->last_eqn == NULL)
3313 		r->last_eqn = eqn_alloc();
3314 	else
3315 		eqn_reset(r->last_eqn);
3316 	r->eqn = r->last_eqn;
3317 	r->eqn->node = n;
3318 
3319 	if (buf->buf[pos] != '\0')
3320 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3321 		    ".EQ %s", buf->buf + pos);
3322 
3323 	return ROFF_IGN;
3324 }
3325 
3326 static int
3327 roff_EN(ROFF_ARGS)
3328 {
3329 	if (r->eqn != NULL) {
3330 		eqn_parse(r->eqn);
3331 		r->eqn = NULL;
3332 	} else
3333 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3334 	if (buf->buf[pos] != '\0')
3335 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3336 		    "EN %s", buf->buf + pos);
3337 	return ROFF_IGN;
3338 }
3339 
3340 static int
3341 roff_TS(ROFF_ARGS)
3342 {
3343 	if (r->tbl != NULL) {
3344 		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3345 		tbl_end(r->tbl, 0);
3346 	}
3347 	r->man->flags |= ROFF_NONOFILL;
3348 	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3349 	if (r->last_tbl == NULL)
3350 		r->first_tbl = r->tbl;
3351 	r->last_tbl = r->tbl;
3352 	return ROFF_IGN;
3353 }
3354 
3355 static int
3356 roff_noarg(ROFF_ARGS)
3357 {
3358 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3359 		man_breakscope(r->man, tok);
3360 	if (tok == ROFF_brp)
3361 		tok = ROFF_br;
3362 	roff_elem_alloc(r->man, ln, ppos, tok);
3363 	if (buf->buf[pos] != '\0')
3364 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3365 		   "%s %s", roff_name[tok], buf->buf + pos);
3366 	if (tok == ROFF_nf)
3367 		r->man->flags |= ROFF_NOFILL;
3368 	else if (tok == ROFF_fi)
3369 		r->man->flags &= ~ROFF_NOFILL;
3370 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3371 	r->man->next = ROFF_NEXT_SIBLING;
3372 	return ROFF_IGN;
3373 }
3374 
3375 static int
3376 roff_onearg(ROFF_ARGS)
3377 {
3378 	struct roff_node	*n;
3379 	char			*cp;
3380 	int			 npos;
3381 
3382 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3383 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3384 	     tok == ROFF_ti))
3385 		man_breakscope(r->man, tok);
3386 
3387 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3388 		r->man->last = roffce_node;
3389 		r->man->next = ROFF_NEXT_SIBLING;
3390 	}
3391 
3392 	roff_elem_alloc(r->man, ln, ppos, tok);
3393 	n = r->man->last;
3394 
3395 	cp = buf->buf + pos;
3396 	if (*cp != '\0') {
3397 		while (*cp != '\0' && *cp != ' ')
3398 			cp++;
3399 		while (*cp == ' ')
3400 			*cp++ = '\0';
3401 		if (*cp != '\0')
3402 			mandoc_msg(MANDOCERR_ARG_EXCESS,
3403 			    ln, (int)(cp - buf->buf),
3404 			    "%s ... %s", roff_name[tok], cp);
3405 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3406 	}
3407 
3408 	if (tok == ROFF_ce || tok == ROFF_rj) {
3409 		if (r->man->last->type == ROFFT_ELEM) {
3410 			roff_word_alloc(r->man, ln, pos, "1");
3411 			r->man->last->flags |= NODE_NOSRC;
3412 		}
3413 		npos = 0;
3414 		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3415 		    &roffce_lines, 0) == 0) {
3416 			mandoc_msg(MANDOCERR_CE_NONUM,
3417 			    ln, pos, "ce %s", buf->buf + pos);
3418 			roffce_lines = 1;
3419 		}
3420 		if (roffce_lines < 1) {
3421 			r->man->last = r->man->last->parent;
3422 			roffce_node = NULL;
3423 			roffce_lines = 0;
3424 		} else
3425 			roffce_node = r->man->last->parent;
3426 	} else {
3427 		n->flags |= NODE_VALID | NODE_ENDED;
3428 		r->man->last = n;
3429 	}
3430 	n->flags |= NODE_LINE;
3431 	r->man->next = ROFF_NEXT_SIBLING;
3432 	return ROFF_IGN;
3433 }
3434 
3435 static int
3436 roff_manyarg(ROFF_ARGS)
3437 {
3438 	struct roff_node	*n;
3439 	char			*sp, *ep;
3440 
3441 	roff_elem_alloc(r->man, ln, ppos, tok);
3442 	n = r->man->last;
3443 
3444 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3445 		while (*ep != '\0' && *ep != ' ')
3446 			ep++;
3447 		while (*ep == ' ')
3448 			*ep++ = '\0';
3449 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3450 	}
3451 
3452 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3453 	r->man->last = n;
3454 	r->man->next = ROFF_NEXT_SIBLING;
3455 	return ROFF_IGN;
3456 }
3457 
3458 static int
3459 roff_als(ROFF_ARGS)
3460 {
3461 	char		*oldn, *newn, *end, *value;
3462 	size_t		 oldsz, newsz, valsz;
3463 
3464 	newn = oldn = buf->buf + pos;
3465 	if (*newn == '\0')
3466 		return ROFF_IGN;
3467 
3468 	newsz = roff_getname(r, &oldn, ln, pos);
3469 	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3470 		return ROFF_IGN;
3471 
3472 	end = oldn;
3473 	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3474 	if (oldsz == 0)
3475 		return ROFF_IGN;
3476 
3477 	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3478 	    (int)oldsz, oldn);
3479 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3480 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3481 	free(value);
3482 	return ROFF_IGN;
3483 }
3484 
3485 static int
3486 roff_cc(ROFF_ARGS)
3487 {
3488 	const char	*p;
3489 
3490 	p = buf->buf + pos;
3491 
3492 	if (*p == '\0' || (r->control = *p++) == '.')
3493 		r->control = '\0';
3494 
3495 	if (*p != '\0')
3496 		mandoc_msg(MANDOCERR_ARG_EXCESS,
3497 		    ln, p - buf->buf, "cc ... %s", p);
3498 
3499 	return ROFF_IGN;
3500 }
3501 
3502 static int
3503 roff_char(ROFF_ARGS)
3504 {
3505 	const char	*p, *kp, *vp;
3506 	size_t		 ksz, vsz;
3507 	int		 font;
3508 
3509 	/* Parse the character to be replaced. */
3510 
3511 	kp = buf->buf + pos;
3512 	p = kp + 1;
3513 	if (*kp == '\0' || (*kp == '\\' &&
3514 	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3515 	    (*p != ' ' && *p != '\0')) {
3516 		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3517 		return ROFF_IGN;
3518 	}
3519 	ksz = p - kp;
3520 	while (*p == ' ')
3521 		p++;
3522 
3523 	/*
3524 	 * If the replacement string contains a font escape sequence,
3525 	 * we have to restore the font at the end.
3526 	 */
3527 
3528 	vp = p;
3529 	vsz = strlen(p);
3530 	font = 0;
3531 	while (*p != '\0') {
3532 		if (*p++ != '\\')
3533 			continue;
3534 		switch (mandoc_escape(&p, NULL, NULL)) {
3535 		case ESCAPE_FONT:
3536 		case ESCAPE_FONTROMAN:
3537 		case ESCAPE_FONTITALIC:
3538 		case ESCAPE_FONTBOLD:
3539 		case ESCAPE_FONTBI:
3540 		case ESCAPE_FONTCW:
3541 		case ESCAPE_FONTPREV:
3542 			font++;
3543 			break;
3544 		default:
3545 			break;
3546 		}
3547 	}
3548 	if (font > 1)
3549 		mandoc_msg(MANDOCERR_CHAR_FONT,
3550 		    ln, (int)(vp - buf->buf), "%s", vp);
3551 
3552 	/*
3553 	 * Approximate the effect of .char using the .tr tables.
3554 	 * XXX In groff, .char and .tr interact differently.
3555 	 */
3556 
3557 	if (ksz == 1) {
3558 		if (r->xtab == NULL)
3559 			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3560 		assert((unsigned int)*kp < 128);
3561 		free(r->xtab[(int)*kp].p);
3562 		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3563 		    "%s%s", vp, font ? "\fP" : "");
3564 	} else {
3565 		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3566 		if (font)
3567 			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3568 	}
3569 	return ROFF_IGN;
3570 }
3571 
3572 static int
3573 roff_ec(ROFF_ARGS)
3574 {
3575 	const char	*p;
3576 
3577 	p = buf->buf + pos;
3578 	if (*p == '\0')
3579 		r->escape = '\\';
3580 	else {
3581 		r->escape = *p;
3582 		if (*++p != '\0')
3583 			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3584 			    (int)(p - buf->buf), "ec ... %s", p);
3585 	}
3586 	return ROFF_IGN;
3587 }
3588 
3589 static int
3590 roff_eo(ROFF_ARGS)
3591 {
3592 	r->escape = '\0';
3593 	if (buf->buf[pos] != '\0')
3594 		mandoc_msg(MANDOCERR_ARG_SKIP,
3595 		    ln, pos, "eo %s", buf->buf + pos);
3596 	return ROFF_IGN;
3597 }
3598 
3599 static int
3600 roff_nop(ROFF_ARGS)
3601 {
3602 	while (buf->buf[pos] == ' ')
3603 		pos++;
3604 	*offs = pos;
3605 	return ROFF_RERUN;
3606 }
3607 
3608 static int
3609 roff_tr(ROFF_ARGS)
3610 {
3611 	const char	*p, *first, *second;
3612 	size_t		 fsz, ssz;
3613 	enum mandoc_esc	 esc;
3614 
3615 	p = buf->buf + pos;
3616 
3617 	if (*p == '\0') {
3618 		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3619 		return ROFF_IGN;
3620 	}
3621 
3622 	while (*p != '\0') {
3623 		fsz = ssz = 1;
3624 
3625 		first = p++;
3626 		if (*first == '\\') {
3627 			esc = mandoc_escape(&p, NULL, NULL);
3628 			if (esc == ESCAPE_ERROR) {
3629 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3630 				    (int)(p - buf->buf), "%s", first);
3631 				return ROFF_IGN;
3632 			}
3633 			fsz = (size_t)(p - first);
3634 		}
3635 
3636 		second = p++;
3637 		if (*second == '\\') {
3638 			esc = mandoc_escape(&p, NULL, NULL);
3639 			if (esc == ESCAPE_ERROR) {
3640 				mandoc_msg(MANDOCERR_ESC_BAD, ln,
3641 				    (int)(p - buf->buf), "%s", second);
3642 				return ROFF_IGN;
3643 			}
3644 			ssz = (size_t)(p - second);
3645 		} else if (*second == '\0') {
3646 			mandoc_msg(MANDOCERR_TR_ODD, ln,
3647 			    (int)(first - buf->buf), "tr %s", first);
3648 			second = " ";
3649 			p--;
3650 		}
3651 
3652 		if (fsz > 1) {
3653 			roff_setstrn(&r->xmbtab, first, fsz,
3654 			    second, ssz, 0);
3655 			continue;
3656 		}
3657 
3658 		if (r->xtab == NULL)
3659 			r->xtab = mandoc_calloc(128,
3660 			    sizeof(struct roffstr));
3661 
3662 		free(r->xtab[(int)*first].p);
3663 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3664 		r->xtab[(int)*first].sz = ssz;
3665 	}
3666 
3667 	return ROFF_IGN;
3668 }
3669 
3670 /*
3671  * Implementation of the .return request.
3672  * There is no need to call roff_userret() from here.
3673  * The read module will call that after rewinding the reader stack
3674  * to the place from where the current macro was called.
3675  */
3676 static int
3677 roff_return(ROFF_ARGS)
3678 {
3679 	if (r->mstackpos >= 0)
3680 		return ROFF_IGN | ROFF_USERRET;
3681 
3682 	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3683 	return ROFF_IGN;
3684 }
3685 
3686 static int
3687 roff_rn(ROFF_ARGS)
3688 {
3689 	const char	*value;
3690 	char		*oldn, *newn, *end;
3691 	size_t		 oldsz, newsz;
3692 	int		 deftype;
3693 
3694 	oldn = newn = buf->buf + pos;
3695 	if (*oldn == '\0')
3696 		return ROFF_IGN;
3697 
3698 	oldsz = roff_getname(r, &newn, ln, pos);
3699 	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3700 		return ROFF_IGN;
3701 
3702 	end = newn;
3703 	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3704 	if (newsz == 0)
3705 		return ROFF_IGN;
3706 
3707 	deftype = ROFFDEF_ANY;
3708 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3709 	switch (deftype) {
3710 	case ROFFDEF_USER:
3711 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3712 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3713 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3714 		break;
3715 	case ROFFDEF_PRE:
3716 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3717 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3718 		break;
3719 	case ROFFDEF_REN:
3720 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3721 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3722 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3723 		break;
3724 	case ROFFDEF_STD:
3725 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3726 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3727 		break;
3728 	default:
3729 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3730 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3731 		break;
3732 	}
3733 	return ROFF_IGN;
3734 }
3735 
3736 static int
3737 roff_shift(ROFF_ARGS)
3738 {
3739 	struct mctx	*ctx;
3740 	int		 levels, i;
3741 
3742 	levels = 1;
3743 	if (buf->buf[pos] != '\0' &&
3744 	    roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3745 		mandoc_msg(MANDOCERR_CE_NONUM,
3746 		    ln, pos, "shift %s", buf->buf + pos);
3747 		levels = 1;
3748 	}
3749 	if (r->mstackpos < 0) {
3750 		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3751 		return ROFF_IGN;
3752 	}
3753 	ctx = r->mstack + r->mstackpos;
3754 	if (levels > ctx->argc) {
3755 		mandoc_msg(MANDOCERR_SHIFT,
3756 		    ln, pos, "%d, but max is %d", levels, ctx->argc);
3757 		levels = ctx->argc;
3758 	}
3759 	if (levels == 0)
3760 		return ROFF_IGN;
3761 	for (i = 0; i < levels; i++)
3762 		free(ctx->argv[i]);
3763 	ctx->argc -= levels;
3764 	for (i = 0; i < ctx->argc; i++)
3765 		ctx->argv[i] = ctx->argv[i + levels];
3766 	return ROFF_IGN;
3767 }
3768 
3769 static int
3770 roff_so(ROFF_ARGS)
3771 {
3772 	char *name, *cp;
3773 
3774 	name = buf->buf + pos;
3775 	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3776 
3777 	/*
3778 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3779 	 * opening anything that's not in our cwd or anything beneath
3780 	 * it.  Thus, explicitly disallow traversing up the file-system
3781 	 * or using absolute paths.
3782 	 */
3783 
3784 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3785 		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3786 		buf->sz = mandoc_asprintf(&cp,
3787 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3788 		free(buf->buf);
3789 		buf->buf = cp;
3790 		*offs = 0;
3791 		return ROFF_REPARSE;
3792 	}
3793 
3794 	*offs = pos;
3795 	return ROFF_SO;
3796 }
3797 
3798 /* --- user defined strings and macros ------------------------------------ */
3799 
3800 static int
3801 roff_userdef(ROFF_ARGS)
3802 {
3803 	struct mctx	 *ctx;
3804 	char		 *arg, *ap, *dst, *src;
3805 	size_t		  sz;
3806 
3807 	/* Initialize a new macro stack context. */
3808 
3809 	if (++r->mstackpos == r->mstacksz) {
3810 		r->mstack = mandoc_recallocarray(r->mstack,
3811 		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3812 		r->mstacksz += 8;
3813 	}
3814 	ctx = r->mstack + r->mstackpos;
3815 	ctx->argsz = 0;
3816 	ctx->argc = 0;
3817 	ctx->argv = NULL;
3818 
3819 	/*
3820 	 * Collect pointers to macro argument strings,
3821 	 * NUL-terminating them and escaping quotes.
3822 	 */
3823 
3824 	src = buf->buf + pos;
3825 	while (*src != '\0') {
3826 		if (ctx->argc == ctx->argsz) {
3827 			ctx->argsz += 8;
3828 			ctx->argv = mandoc_reallocarray(ctx->argv,
3829 			    ctx->argsz, sizeof(*ctx->argv));
3830 		}
3831 		arg = roff_getarg(r, &src, ln, &pos);
3832 		sz = 1;  /* For the terminating NUL. */
3833 		for (ap = arg; *ap != '\0'; ap++)
3834 			sz += *ap == '"' ? 4 : 1;
3835 		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3836 		for (ap = arg; *ap != '\0'; ap++) {
3837 			if (*ap == '"') {
3838 				memcpy(dst, "\\(dq", 4);
3839 				dst += 4;
3840 			} else
3841 				*dst++ = *ap;
3842 		}
3843 		*dst = '\0';
3844 		free(arg);
3845 	}
3846 
3847 	/* Replace the macro invocation by the macro definition. */
3848 
3849 	free(buf->buf);
3850 	buf->buf = mandoc_strdup(r->current_string);
3851 	buf->sz = strlen(buf->buf) + 1;
3852 	*offs = 0;
3853 
3854 	return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ?
3855 	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3856 }
3857 
3858 /*
3859  * Calling a high-level macro that was renamed with .rn.
3860  * r->current_string has already been set up by roff_parse().
3861  */
3862 static int
3863 roff_renamed(ROFF_ARGS)
3864 {
3865 	char	*nbuf;
3866 
3867 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
3868 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
3869 	free(buf->buf);
3870 	buf->buf = nbuf;
3871 	*offs = 0;
3872 	return ROFF_CONT;
3873 }
3874 
3875 /*
3876  * Measure the length in bytes of the roff identifier at *cpp
3877  * and advance the pointer to the next word.
3878  */
3879 static size_t
3880 roff_getname(struct roff *r, char **cpp, int ln, int pos)
3881 {
3882 	char	 *name, *cp;
3883 	size_t	  namesz;
3884 
3885 	name = *cpp;
3886 	if (*name == '\0')
3887 		return 0;
3888 
3889 	/* Advance cp to the byte after the end of the name. */
3890 
3891 	for (cp = name; 1; cp++) {
3892 		namesz = cp - name;
3893 		if (*cp == '\0')
3894 			break;
3895 		if (*cp == ' ' || *cp == '\t') {
3896 			cp++;
3897 			break;
3898 		}
3899 		if (*cp != '\\')
3900 			continue;
3901 		if (cp[1] == '{' || cp[1] == '}')
3902 			break;
3903 		if (*++cp == '\\')
3904 			continue;
3905 		mandoc_msg(MANDOCERR_NAMESC, ln, pos,
3906 		    "%.*s", (int)(cp - name + 1), name);
3907 		mandoc_escape((const char **)&cp, NULL, NULL);
3908 		break;
3909 	}
3910 
3911 	/* Read past spaces. */
3912 
3913 	while (*cp == ' ')
3914 		cp++;
3915 
3916 	*cpp = cp;
3917 	return namesz;
3918 }
3919 
3920 /*
3921  * Store *string into the user-defined string called *name.
3922  * To clear an existing entry, call with (*r, *name, NULL, 0).
3923  * append == 0: replace mode
3924  * append == 1: single-line append mode
3925  * append == 2: multiline append mode, append '\n' after each call
3926  */
3927 static void
3928 roff_setstr(struct roff *r, const char *name, const char *string,
3929 	int append)
3930 {
3931 	size_t	 namesz;
3932 
3933 	namesz = strlen(name);
3934 	roff_setstrn(&r->strtab, name, namesz, string,
3935 	    string ? strlen(string) : 0, append);
3936 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3937 }
3938 
3939 static void
3940 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
3941 		const char *string, size_t stringsz, int append)
3942 {
3943 	struct roffkv	*n;
3944 	char		*c;
3945 	int		 i;
3946 	size_t		 oldch, newch;
3947 
3948 	/* Search for an existing string with the same name. */
3949 	n = *r;
3950 
3951 	while (n && (namesz != n->key.sz ||
3952 			strncmp(n->key.p, name, namesz)))
3953 		n = n->next;
3954 
3955 	if (NULL == n) {
3956 		/* Create a new string table entry. */
3957 		n = mandoc_malloc(sizeof(struct roffkv));
3958 		n->key.p = mandoc_strndup(name, namesz);
3959 		n->key.sz = namesz;
3960 		n->val.p = NULL;
3961 		n->val.sz = 0;
3962 		n->next = *r;
3963 		*r = n;
3964 	} else if (0 == append) {
3965 		free(n->val.p);
3966 		n->val.p = NULL;
3967 		n->val.sz = 0;
3968 	}
3969 
3970 	if (NULL == string)
3971 		return;
3972 
3973 	/*
3974 	 * One additional byte for the '\n' in multiline mode,
3975 	 * and one for the terminating '\0'.
3976 	 */
3977 	newch = stringsz + (1 < append ? 2u : 1u);
3978 
3979 	if (NULL == n->val.p) {
3980 		n->val.p = mandoc_malloc(newch);
3981 		*n->val.p = '\0';
3982 		oldch = 0;
3983 	} else {
3984 		oldch = n->val.sz;
3985 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
3986 	}
3987 
3988 	/* Skip existing content in the destination buffer. */
3989 	c = n->val.p + (int)oldch;
3990 
3991 	/* Append new content to the destination buffer. */
3992 	i = 0;
3993 	while (i < (int)stringsz) {
3994 		/*
3995 		 * Rudimentary roff copy mode:
3996 		 * Handle escaped backslashes.
3997 		 */
3998 		if ('\\' == string[i] && '\\' == string[i + 1])
3999 			i++;
4000 		*c++ = string[i++];
4001 	}
4002 
4003 	/* Append terminating bytes. */
4004 	if (1 < append)
4005 		*c++ = '\n';
4006 
4007 	*c = '\0';
4008 	n->val.sz = (int)(c - n->val.p);
4009 }
4010 
4011 static const char *
4012 roff_getstrn(struct roff *r, const char *name, size_t len,
4013     int *deftype)
4014 {
4015 	const struct roffkv	*n;
4016 	int			 found, i;
4017 	enum roff_tok		 tok;
4018 
4019 	found = 0;
4020 	for (n = r->strtab; n != NULL; n = n->next) {
4021 		if (strncmp(name, n->key.p, len) != 0 ||
4022 		    n->key.p[len] != '\0' || n->val.p == NULL)
4023 			continue;
4024 		if (*deftype & ROFFDEF_USER) {
4025 			*deftype = ROFFDEF_USER;
4026 			return n->val.p;
4027 		} else {
4028 			found = 1;
4029 			break;
4030 		}
4031 	}
4032 	for (n = r->rentab; n != NULL; n = n->next) {
4033 		if (strncmp(name, n->key.p, len) != 0 ||
4034 		    n->key.p[len] != '\0' || n->val.p == NULL)
4035 			continue;
4036 		if (*deftype & ROFFDEF_REN) {
4037 			*deftype = ROFFDEF_REN;
4038 			return n->val.p;
4039 		} else {
4040 			found = 1;
4041 			break;
4042 		}
4043 	}
4044 	for (i = 0; i < PREDEFS_MAX; i++) {
4045 		if (strncmp(name, predefs[i].name, len) != 0 ||
4046 		    predefs[i].name[len] != '\0')
4047 			continue;
4048 		if (*deftype & ROFFDEF_PRE) {
4049 			*deftype = ROFFDEF_PRE;
4050 			return predefs[i].str;
4051 		} else {
4052 			found = 1;
4053 			break;
4054 		}
4055 	}
4056 	if (r->man->meta.macroset != MACROSET_MAN) {
4057 		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4058 			if (strncmp(name, roff_name[tok], len) != 0 ||
4059 			    roff_name[tok][len] != '\0')
4060 				continue;
4061 			if (*deftype & ROFFDEF_STD) {
4062 				*deftype = ROFFDEF_STD;
4063 				return NULL;
4064 			} else {
4065 				found = 1;
4066 				break;
4067 			}
4068 		}
4069 	}
4070 	if (r->man->meta.macroset != MACROSET_MDOC) {
4071 		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4072 			if (strncmp(name, roff_name[tok], len) != 0 ||
4073 			    roff_name[tok][len] != '\0')
4074 				continue;
4075 			if (*deftype & ROFFDEF_STD) {
4076 				*deftype = ROFFDEF_STD;
4077 				return NULL;
4078 			} else {
4079 				found = 1;
4080 				break;
4081 			}
4082 		}
4083 	}
4084 
4085 	if (found == 0 && *deftype != ROFFDEF_ANY) {
4086 		if (*deftype & ROFFDEF_REN) {
4087 			/*
4088 			 * This might still be a request,
4089 			 * so do not treat it as undefined yet.
4090 			 */
4091 			*deftype = ROFFDEF_UNDEF;
4092 			return NULL;
4093 		}
4094 
4095 		/* Using an undefined string defines it to be empty. */
4096 
4097 		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4098 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4099 	}
4100 
4101 	*deftype = 0;
4102 	return NULL;
4103 }
4104 
4105 static void
4106 roff_freestr(struct roffkv *r)
4107 {
4108 	struct roffkv	 *n, *nn;
4109 
4110 	for (n = r; n; n = nn) {
4111 		free(n->key.p);
4112 		free(n->val.p);
4113 		nn = n->next;
4114 		free(n);
4115 	}
4116 }
4117 
4118 /* --- accessors and utility functions ------------------------------------ */
4119 
4120 /*
4121  * Duplicate an input string, making the appropriate character
4122  * conversations (as stipulated by `tr') along the way.
4123  * Returns a heap-allocated string with all the replacements made.
4124  */
4125 char *
4126 roff_strdup(const struct roff *r, const char *p)
4127 {
4128 	const struct roffkv *cp;
4129 	char		*res;
4130 	const char	*pp;
4131 	size_t		 ssz, sz;
4132 	enum mandoc_esc	 esc;
4133 
4134 	if (NULL == r->xmbtab && NULL == r->xtab)
4135 		return mandoc_strdup(p);
4136 	else if ('\0' == *p)
4137 		return mandoc_strdup("");
4138 
4139 	/*
4140 	 * Step through each character looking for term matches
4141 	 * (remember that a `tr' can be invoked with an escape, which is
4142 	 * a glyph but the escape is multi-character).
4143 	 * We only do this if the character hash has been initialised
4144 	 * and the string is >0 length.
4145 	 */
4146 
4147 	res = NULL;
4148 	ssz = 0;
4149 
4150 	while ('\0' != *p) {
4151 		assert((unsigned int)*p < 128);
4152 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4153 			sz = r->xtab[(int)*p].sz;
4154 			res = mandoc_realloc(res, ssz + sz + 1);
4155 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4156 			ssz += sz;
4157 			p++;
4158 			continue;
4159 		} else if ('\\' != *p) {
4160 			res = mandoc_realloc(res, ssz + 2);
4161 			res[ssz++] = *p++;
4162 			continue;
4163 		}
4164 
4165 		/* Search for term matches. */
4166 		for (cp = r->xmbtab; cp; cp = cp->next)
4167 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4168 				break;
4169 
4170 		if (NULL != cp) {
4171 			/*
4172 			 * A match has been found.
4173 			 * Append the match to the array and move
4174 			 * forward by its keysize.
4175 			 */
4176 			res = mandoc_realloc(res,
4177 			    ssz + cp->val.sz + 1);
4178 			memcpy(res + ssz, cp->val.p, cp->val.sz);
4179 			ssz += cp->val.sz;
4180 			p += (int)cp->key.sz;
4181 			continue;
4182 		}
4183 
4184 		/*
4185 		 * Handle escapes carefully: we need to copy
4186 		 * over just the escape itself, or else we might
4187 		 * do replacements within the escape itself.
4188 		 * Make sure to pass along the bogus string.
4189 		 */
4190 		pp = p++;
4191 		esc = mandoc_escape(&p, NULL, NULL);
4192 		if (ESCAPE_ERROR == esc) {
4193 			sz = strlen(pp);
4194 			res = mandoc_realloc(res, ssz + sz + 1);
4195 			memcpy(res + ssz, pp, sz);
4196 			break;
4197 		}
4198 		/*
4199 		 * We bail out on bad escapes.
4200 		 * No need to warn: we already did so when
4201 		 * roff_expand() was called.
4202 		 */
4203 		sz = (int)(p - pp);
4204 		res = mandoc_realloc(res, ssz + sz + 1);
4205 		memcpy(res + ssz, pp, sz);
4206 		ssz += sz;
4207 	}
4208 
4209 	res[(int)ssz] = '\0';
4210 	return res;
4211 }
4212 
4213 int
4214 roff_getformat(const struct roff *r)
4215 {
4216 
4217 	return r->format;
4218 }
4219 
4220 /*
4221  * Find out whether a line is a macro line or not.
4222  * If it is, adjust the current position and return one; if it isn't,
4223  * return zero and don't change the current position.
4224  * If the control character has been set with `.cc', then let that grain
4225  * precedence.
4226  * This is slighly contrary to groff, where using the non-breaking
4227  * control character when `cc' has been invoked will cause the
4228  * non-breaking macro contents to be printed verbatim.
4229  */
4230 int
4231 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4232 {
4233 	int		pos;
4234 
4235 	pos = *ppos;
4236 
4237 	if (r->control != '\0' && cp[pos] == r->control)
4238 		pos++;
4239 	else if (r->control != '\0')
4240 		return 0;
4241 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4242 		pos += 2;
4243 	else if ('.' == cp[pos] || '\'' == cp[pos])
4244 		pos++;
4245 	else
4246 		return 0;
4247 
4248 	while (' ' == cp[pos] || '\t' == cp[pos])
4249 		pos++;
4250 
4251 	*ppos = pos;
4252 	return 1;
4253 }
4254