xref: /openbsd/usr.bin/mandoc/roff.c (revision f6697133)
1 /* $OpenBSD: roff.c,v 1.272 2023/10/24 20:30:49 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010-2015, 2017-2023 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the roff(7) parser for mandoc(1).
19  */
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40 
41 /* Maximum number of string expansions per line, to break infinite loops. */
42 #define	EXPAND_LIMIT	1000
43 
44 /* Types of definitions of macros and strings. */
45 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
46 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
47 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
48 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
49 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
50 			 ROFFDEF_REN | ROFFDEF_STD)
51 #define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
52 
53 /* --- data types --------------------------------------------------------- */
54 
55 /*
56  * An incredibly-simple string buffer.
57  */
58 struct	roffstr {
59 	char		*p; /* nil-terminated buffer */
60 	size_t		 sz; /* saved strlen(p) */
61 };
62 
63 /*
64  * A key-value roffstr pair as part of a singly-linked list.
65  */
66 struct	roffkv {
67 	struct roffstr	 key;
68 	struct roffstr	 val;
69 	struct roffkv	*next; /* next in list */
70 };
71 
72 /*
73  * A single number register as part of a singly-linked list.
74  */
75 struct	roffreg {
76 	struct roffstr	 key;
77 	int		 val;
78 	int		 step;
79 	struct roffreg	*next;
80 };
81 
82 /*
83  * Association of request and macro names with token IDs.
84  */
85 struct	roffreq {
86 	enum roff_tok	 tok;
87 	char		 name[];
88 };
89 
90 /*
91  * A macro processing context.
92  * More than one is needed when macro calls are nested.
93  */
94 struct	mctx {
95 	char		**argv;
96 	int		 argc;
97 	int		 argsz;
98 };
99 
100 struct	roff {
101 	struct roff_man	*man; /* mdoc or man parser */
102 	struct roffnode	*last; /* leaf of stack */
103 	struct mctx	*mstack; /* stack of macro contexts */
104 	int		*rstack; /* stack of inverted `ie' values */
105 	struct ohash	*reqtab; /* request lookup table */
106 	struct roffreg	*regtab; /* number registers */
107 	struct roffkv	*strtab; /* user-defined strings & macros */
108 	struct roffkv	*rentab; /* renamed strings & macros */
109 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
110 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
111 	const char	*current_string; /* value of last called user macro */
112 	struct tbl_node	*first_tbl; /* first table parsed */
113 	struct tbl_node	*last_tbl; /* last table parsed */
114 	struct tbl_node	*tbl; /* current table being parsed */
115 	struct eqn_node	*last_eqn; /* equation parser */
116 	struct eqn_node	*eqn; /* active equation parser */
117 	int		 eqn_inline; /* current equation is inline */
118 	int		 options; /* parse options */
119 	int		 mstacksz; /* current size of mstack */
120 	int		 mstackpos; /* position in mstack */
121 	int		 rstacksz; /* current size limit of rstack */
122 	int		 rstackpos; /* position in rstack */
123 	int		 format; /* current file in mdoc or man format */
124 	char		 control; /* control character */
125 	char		 escape; /* escape character */
126 };
127 
128 /*
129  * A macro definition, condition, or ignored block.
130  */
131 struct	roffnode {
132 	enum roff_tok	 tok; /* type of node */
133 	struct roffnode	*parent; /* up one in stack */
134 	int		 line; /* parse line */
135 	int		 col; /* parse col */
136 	char		*name; /* node name, e.g. macro name */
137 	char		*end; /* custom end macro of the block */
138 	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
139 	int		 rule; /* content is: 1=evaluated 0=skipped */
140 };
141 
142 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
143 			 enum roff_tok tok, /* tok of macro */ \
144 			 struct buf *buf, /* input buffer */ \
145 			 int ln, /* parse line */ \
146 			 int ppos, /* original pos in buffer */ \
147 			 int pos, /* current pos in buffer */ \
148 			 int *offs /* reset offset of buffer data */
149 
150 typedef	int (*roffproc)(ROFF_ARGS);
151 
152 struct	roffmac {
153 	roffproc	 proc; /* process new macro */
154 	roffproc	 text; /* process as child text of macro */
155 	roffproc	 sub; /* process as child of macro */
156 	int		 flags;
157 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
158 };
159 
160 struct	predef {
161 	const char	*name; /* predefined input name */
162 	const char	*str; /* replacement symbol */
163 };
164 
165 #define	PREDEF(__name, __str) \
166 	{ (__name), (__str) },
167 
168 /* --- function prototypes ------------------------------------------------ */
169 
170 static	int		 roffnode_cleanscope(struct roff *);
171 static	int		 roffnode_pop(struct roff *);
172 static	void		 roffnode_push(struct roff *, enum roff_tok,
173 				const char *, int, int);
174 static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
175 static	int		 roff_als(ROFF_ARGS);
176 static	int		 roff_block(ROFF_ARGS);
177 static	int		 roff_block_text(ROFF_ARGS);
178 static	int		 roff_block_sub(ROFF_ARGS);
179 static	int		 roff_break(ROFF_ARGS);
180 static	int		 roff_cblock(ROFF_ARGS);
181 static	int		 roff_cc(ROFF_ARGS);
182 static	int		 roff_ccond(struct roff *, int, int);
183 static	int		 roff_char(ROFF_ARGS);
184 static	int		 roff_cond(ROFF_ARGS);
185 static	int		 roff_cond_checkend(ROFF_ARGS);
186 static	int		 roff_cond_text(ROFF_ARGS);
187 static	int		 roff_cond_sub(ROFF_ARGS);
188 static	int		 roff_ds(ROFF_ARGS);
189 static	int		 roff_ec(ROFF_ARGS);
190 static	int		 roff_eo(ROFF_ARGS);
191 static	int		 roff_eqndelim(struct roff *, struct buf *, int);
192 static	int		 roff_evalcond(struct roff *, int, char *, int *);
193 static	int		 roff_evalnum(struct roff *, int,
194 				const char *, int *, int *, int);
195 static	int		 roff_evalpar(struct roff *, int,
196 				const char *, int *, int *, int);
197 static	int		 roff_evalstrcond(const char *, int *);
198 static	int		 roff_expand(struct roff *, struct buf *,
199 				int, int, char);
200 static	void		 roff_expand_patch(struct buf *, int,
201 				const char *, int);
202 static	void		 roff_free1(struct roff *);
203 static	void		 roff_freereg(struct roffreg *);
204 static	void		 roff_freestr(struct roffkv *);
205 static	size_t		 roff_getname(struct roff *, char **, int, int);
206 static	int		 roff_getnum(const char *, int *, int *, int);
207 static	int		 roff_getop(const char *, int *, char *);
208 static	int		 roff_getregn(struct roff *,
209 				const char *, size_t, char);
210 static	int		 roff_getregro(const struct roff *,
211 				const char *name);
212 static	const char	*roff_getstrn(struct roff *,
213 				const char *, size_t, int *);
214 static	int		 roff_hasregn(const struct roff *,
215 				const char *, size_t);
216 static	int		 roff_insec(ROFF_ARGS);
217 static	int		 roff_it(ROFF_ARGS);
218 static	int		 roff_line_ignore(ROFF_ARGS);
219 static	void		 roff_man_alloc1(struct roff_man *);
220 static	void		 roff_man_free1(struct roff_man *);
221 static	int		 roff_manyarg(ROFF_ARGS);
222 static	int		 roff_mc(ROFF_ARGS);
223 static	int		 roff_noarg(ROFF_ARGS);
224 static	int		 roff_nop(ROFF_ARGS);
225 static	int		 roff_nr(ROFF_ARGS);
226 static	int		 roff_onearg(ROFF_ARGS);
227 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
228 				int, int);
229 static	int		 roff_parse_comment(struct roff *, struct buf *,
230 				int, int, char);
231 static	int		 roff_parsetext(struct roff *, struct buf *,
232 				int, int *);
233 static	int		 roff_renamed(ROFF_ARGS);
234 static	int		 roff_req_or_macro(ROFF_ARGS);
235 static	int		 roff_return(ROFF_ARGS);
236 static	int		 roff_rm(ROFF_ARGS);
237 static	int		 roff_rn(ROFF_ARGS);
238 static	int		 roff_rr(ROFF_ARGS);
239 static	void		 roff_setregn(struct roff *, const char *,
240 				size_t, int, char, int);
241 static	void		 roff_setstr(struct roff *,
242 				const char *, const char *, int);
243 static	void		 roff_setstrn(struct roffkv **, const char *,
244 				size_t, const char *, size_t, int);
245 static	int		 roff_shift(ROFF_ARGS);
246 static	int		 roff_so(ROFF_ARGS);
247 static	int		 roff_tr(ROFF_ARGS);
248 static	int		 roff_Dd(ROFF_ARGS);
249 static	int		 roff_TE(ROFF_ARGS);
250 static	int		 roff_TS(ROFF_ARGS);
251 static	int		 roff_EQ(ROFF_ARGS);
252 static	int		 roff_EN(ROFF_ARGS);
253 static	int		 roff_T_(ROFF_ARGS);
254 static	int		 roff_unsupp(ROFF_ARGS);
255 static	int		 roff_userdef(ROFF_ARGS);
256 
257 /* --- constant data ------------------------------------------------------ */
258 
259 #define	ROFFNUM_SCALE	(1 << 0)  /* Honour scaling in roff_getnum(). */
260 #define	ROFFNUM_WHITE	(1 << 1)  /* Skip whitespace in roff_evalnum(). */
261 
262 const char *__roff_name[MAN_MAX + 1] = {
263 	"br",		"ce",		"fi",		"ft",
264 	"ll",		"mc",		"nf",
265 	"po",		"rj",		"sp",
266 	"ta",		"ti",		NULL,
267 	"ab",		"ad",		"af",		"aln",
268 	"als",		"am",		"am1",		"ami",
269 	"ami1",		"as",		"as1",		"asciify",
270 	"backtrace",	"bd",		"bleedat",	"blm",
271         "box",		"boxa",		"bp",		"BP",
272 	"break",	"breakchar",	"brnl",		"brp",
273 	"brpnl",	"c2",		"cc",
274 	"cf",		"cflags",	"ch",		"char",
275 	"chop",		"class",	"close",	"CL",
276 	"color",	"composite",	"continue",	"cp",
277 	"cropat",	"cs",		"cu",		"da",
278 	"dch",		"Dd",		"de",		"de1",
279 	"defcolor",	"dei",		"dei1",		"device",
280 	"devicem",	"di",		"do",		"ds",
281 	"ds1",		"dwh",		"dt",		"ec",
282 	"ecr",		"ecs",		"el",		"em",
283 	"EN",		"eo",		"EP",		"EQ",
284 	"errprint",	"ev",		"evc",		"ex",
285 	"fallback",	"fam",		"fc",		"fchar",
286 	"fcolor",	"fdeferlig",	"feature",	"fkern",
287 	"fl",		"flig",		"fp",		"fps",
288 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
289 	"fzoom",	"gcolor",	"hc",		"hcode",
290 	"hidechar",	"hla",		"hlm",		"hpf",
291 	"hpfa",		"hpfcode",	"hw",		"hy",
292 	"hylang",	"hylen",	"hym",		"hypp",
293 	"hys",		"ie",		"if",		"ig",
294 	"index",	"it",		"itc",		"IX",
295 	"kern",		"kernafter",	"kernbefore",	"kernpair",
296 	"lc",		"lc_ctype",	"lds",		"length",
297 	"letadj",	"lf",		"lg",		"lhang",
298 	"linetabs",	"lnr",		"lnrf",		"lpfx",
299 	"ls",		"lsm",		"lt",
300 	"mediasize",	"minss",	"mk",		"mso",
301 	"na",		"ne",		"nh",		"nhychar",
302 	"nm",		"nn",		"nop",		"nr",
303 	"nrf",		"nroff",	"ns",		"nx",
304 	"open",		"opena",	"os",		"output",
305 	"padj",		"papersize",	"pc",		"pev",
306 	"pi",		"PI",		"pl",		"pm",
307 	"pn",		"pnr",		"ps",
308 	"psbb",		"pshape",	"pso",		"ptr",
309 	"pvs",		"rchar",	"rd",		"recursionlimit",
310 	"return",	"rfschar",	"rhang",
311 	"rm",		"rn",		"rnn",		"rr",
312 	"rs",		"rt",		"schar",	"sentchar",
313 	"shc",		"shift",	"sizes",	"so",
314 	"spacewidth",	"special",	"spreadwarn",	"ss",
315 	"sty",		"substring",	"sv",		"sy",
316 	"T&",		"tc",		"TE",
317 	"TH",		"tkf",		"tl",
318 	"tm",		"tm1",		"tmc",		"tr",
319 	"track",	"transchar",	"trf",		"trimat",
320 	"trin",		"trnt",		"troff",	"TS",
321 	"uf",		"ul",		"unformat",	"unwatch",
322 	"unwatchn",	"vpt",		"vs",		"warn",
323 	"warnscale",	"watch",	"watchlength",	"watchn",
324 	"wh",		"while",	"write",	"writec",
325 	"writem",	"xflag",	".",		NULL,
326 	NULL,		"text",
327 	"Dd",		"Dt",		"Os",		"Sh",
328 	"Ss",		"Pp",		"D1",		"Dl",
329 	"Bd",		"Ed",		"Bl",		"El",
330 	"It",		"Ad",		"An",		"Ap",
331 	"Ar",		"Cd",		"Cm",		"Dv",
332 	"Er",		"Ev",		"Ex",		"Fa",
333 	"Fd",		"Fl",		"Fn",		"Ft",
334 	"Ic",		"In",		"Li",		"Nd",
335 	"Nm",		"Op",		"Ot",		"Pa",
336 	"Rv",		"St",		"Va",		"Vt",
337 	"Xr",		"%A",		"%B",		"%D",
338 	"%I",		"%J",		"%N",		"%O",
339 	"%P",		"%R",		"%T",		"%V",
340 	"Ac",		"Ao",		"Aq",		"At",
341 	"Bc",		"Bf",		"Bo",		"Bq",
342 	"Bsx",		"Bx",		"Db",		"Dc",
343 	"Do",		"Dq",		"Ec",		"Ef",
344 	"Em",		"Eo",		"Fx",		"Ms",
345 	"No",		"Ns",		"Nx",		"Ox",
346 	"Pc",		"Pf",		"Po",		"Pq",
347 	"Qc",		"Ql",		"Qo",		"Qq",
348 	"Re",		"Rs",		"Sc",		"So",
349 	"Sq",		"Sm",		"Sx",		"Sy",
350 	"Tn",		"Ux",		"Xc",		"Xo",
351 	"Fo",		"Fc",		"Oo",		"Oc",
352 	"Bk",		"Ek",		"Bt",		"Hf",
353 	"Fr",		"Ud",		"Lb",		"Lp",
354 	"Lk",		"Mt",		"Brq",		"Bro",
355 	"Brc",		"%C",		"Es",		"En",
356 	"Dx",		"%Q",		"%U",		"Ta",
357 	"Tg",		NULL,
358 	"TH",		"SH",		"SS",		"TP",
359 	"TQ",
360 	"LP",		"PP",		"P",		"IP",
361 	"HP",		"SM",		"SB",		"BI",
362 	"IB",		"BR",		"RB",		"R",
363 	"B",		"I",		"IR",		"RI",
364 	"RE",		"RS",		"DT",		"UC",
365 	"PD",		"AT",		"in",
366 	"SY",		"YS",		"OP",
367 	"EX",		"EE",		"UR",
368 	"UE",		"MT",		"ME",		"MR",
369 	NULL
370 };
371 const	char *const *roff_name = __roff_name;
372 
373 static	struct roffmac	 roffs[TOKEN_NONE] = {
374 	{ roff_noarg, NULL, NULL, 0 },  /* br */
375 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
376 	{ roff_noarg, NULL, NULL, 0 },  /* fi */
377 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
378 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
379 	{ roff_mc, NULL, NULL, 0 },  /* mc */
380 	{ roff_noarg, NULL, NULL, 0 },  /* nf */
381 	{ roff_onearg, NULL, NULL, 0 },  /* po */
382 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
383 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
384 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
385 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
386 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
387 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
388 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
389 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
390 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
391 	{ roff_als, NULL, NULL, 0 },  /* als */
392 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
393 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
394 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
395 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
396 	{ roff_ds, NULL, NULL, 0 },  /* as */
397 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
398 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
399 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
400 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
401 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
402 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
403 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
404 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
405 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
406 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
407 	{ roff_break, NULL, NULL, 0 },  /* break */
408 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
409 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
410 	{ roff_noarg, NULL, NULL, 0 },  /* brp */
411 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
412 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
413 	{ roff_cc, NULL, NULL, 0 },  /* cc */
414 	{ roff_insec, NULL, NULL, 0 },  /* cf */
415 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
416 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
417 	{ roff_char, NULL, NULL, 0 },  /* char */
418 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
419 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
420 	{ roff_insec, NULL, NULL, 0 },  /* close */
421 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
422 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
423 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
424 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
425 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
426 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
427 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
428 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
429 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
430 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
431 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
432 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
433 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
434 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
435 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
436 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
437 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
438 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
439 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
440 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
441 	{ roff_ds, NULL, NULL, 0 },  /* ds */
442 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
443 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
444 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
445 	{ roff_ec, NULL, NULL, 0 },  /* ec */
446 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
447 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
448 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
449 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
450 	{ roff_EN, NULL, NULL, 0 },  /* EN */
451 	{ roff_eo, NULL, NULL, 0 },  /* eo */
452 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
453 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
454 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
455 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
456 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
457 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
458 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
459 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
460 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
461 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
462 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
463 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
464 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
465 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
466 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
467 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
468 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
469 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
470 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
471 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
472 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
473 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
474 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
475 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
476 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
477 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
478 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
479 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
481 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
482 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
486 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
487 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
488 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
489 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
490 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
491 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
492 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
493 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
494 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
495 	{ roff_it, NULL, NULL, 0 },  /* it */
496 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
497 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
498 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
499 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
500 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
501 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
502 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
503 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
504 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
505 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
506 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
507 	{ roff_insec, NULL, NULL, 0 },  /* lf */
508 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
509 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
510 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
511 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
512 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
513 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
514 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
515 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
516 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
517 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
518 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
519 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
520 	{ roff_insec, NULL, NULL, 0 },  /* mso */
521 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
522 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
523 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
524 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
525 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
526 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
527 	{ roff_nop, NULL, NULL, 0 },  /* nop */
528 	{ roff_nr, NULL, NULL, 0 },  /* nr */
529 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
530 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
531 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
532 	{ roff_insec, NULL, NULL, 0 },  /* nx */
533 	{ roff_insec, NULL, NULL, 0 },  /* open */
534 	{ roff_insec, NULL, NULL, 0 },  /* opena */
535 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
536 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
537 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
538 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
539 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
540 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
541 	{ roff_insec, NULL, NULL, 0 },  /* pi */
542 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
543 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
544 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
545 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
546 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
547 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
548 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
549 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
550 	{ roff_insec, NULL, NULL, 0 },  /* pso */
551 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
552 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
553 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
554 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
555 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
556 	{ roff_return, NULL, NULL, 0 },  /* return */
557 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
558 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
559 	{ roff_rm, NULL, NULL, 0 },  /* rm */
560 	{ roff_rn, NULL, NULL, 0 },  /* rn */
561 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
562 	{ roff_rr, NULL, NULL, 0 },  /* rr */
563 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
564 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
565 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
566 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
567 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
568 	{ roff_shift, NULL, NULL, 0 },  /* shift */
569 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
570 	{ roff_so, NULL, NULL, 0 },  /* so */
571 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
572 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
573 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
574 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
575 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
576 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
577 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
578 	{ roff_insec, NULL, NULL, 0 },  /* sy */
579 	{ roff_T_, NULL, NULL, 0 },  /* T& */
580 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
581 	{ roff_TE, NULL, NULL, 0 },  /* TE */
582 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
583 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
584 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
585 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
586 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
587 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
588 	{ roff_tr, NULL, NULL, 0 },  /* tr */
589 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
590 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
591 	{ roff_insec, NULL, NULL, 0 },  /* trf */
592 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
593 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
594 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
595 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
596 	{ roff_TS, NULL, NULL, 0 },  /* TS */
597 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
598 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
599 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
600 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
601 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
602 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
603 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
604 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
605 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
606 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
607 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
608 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
609 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
610 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
611 	{ roff_insec, NULL, NULL, 0 },  /* write */
612 	{ roff_insec, NULL, NULL, 0 },  /* writec */
613 	{ roff_insec, NULL, NULL, 0 },  /* writem */
614 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
615 	{ roff_cblock, NULL, NULL, 0 },  /* . */
616 	{ roff_renamed, NULL, NULL, 0 },
617 	{ roff_userdef, NULL, NULL, 0 }
618 };
619 
620 /* Array of injected predefined strings. */
621 #define	PREDEFS_MAX	 38
622 static	const struct predef predefs[PREDEFS_MAX] = {
623 #include "predefs.in"
624 };
625 
626 static	int	 roffce_lines;	/* number of input lines to center */
627 static	struct roff_node *roffce_node;  /* active request */
628 static	int	 roffit_lines;  /* number of lines to delay */
629 static	char	*roffit_macro;  /* nil-terminated macro line */
630 
631 
632 /* --- request table ------------------------------------------------------ */
633 
634 struct ohash *
roffhash_alloc(enum roff_tok mintok,enum roff_tok maxtok)635 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
636 {
637 	struct ohash	*htab;
638 	struct roffreq	*req;
639 	enum roff_tok	 tok;
640 	size_t		 sz;
641 	unsigned int	 slot;
642 
643 	htab = mandoc_malloc(sizeof(*htab));
644 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
645 
646 	for (tok = mintok; tok < maxtok; tok++) {
647 		if (roff_name[tok] == NULL)
648 			continue;
649 		sz = strlen(roff_name[tok]);
650 		req = mandoc_malloc(sizeof(*req) + sz + 1);
651 		req->tok = tok;
652 		memcpy(req->name, roff_name[tok], sz + 1);
653 		slot = ohash_qlookup(htab, req->name);
654 		ohash_insert(htab, slot, req);
655 	}
656 	return htab;
657 }
658 
659 void
roffhash_free(struct ohash * htab)660 roffhash_free(struct ohash *htab)
661 {
662 	struct roffreq	*req;
663 	unsigned int	 slot;
664 
665 	if (htab == NULL)
666 		return;
667 	for (req = ohash_first(htab, &slot); req != NULL;
668 	     req = ohash_next(htab, &slot))
669 		free(req);
670 	ohash_delete(htab);
671 	free(htab);
672 }
673 
674 enum roff_tok
roffhash_find(struct ohash * htab,const char * name,size_t sz)675 roffhash_find(struct ohash *htab, const char *name, size_t sz)
676 {
677 	struct roffreq	*req;
678 	const char	*end;
679 
680 	if (sz) {
681 		end = name + sz;
682 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
683 	} else
684 		req = ohash_find(htab, ohash_qlookup(htab, name));
685 	return req == NULL ? TOKEN_NONE : req->tok;
686 }
687 
688 /* --- stack of request blocks -------------------------------------------- */
689 
690 /*
691  * Pop the current node off of the stack of roff instructions currently
692  * pending.  Return 1 if it is a loop or 0 otherwise.
693  */
694 static int
roffnode_pop(struct roff * r)695 roffnode_pop(struct roff *r)
696 {
697 	struct roffnode	*p;
698 	int		 inloop;
699 
700 	p = r->last;
701 	inloop = p->tok == ROFF_while;
702 	r->last = p->parent;
703 	free(p->name);
704 	free(p->end);
705 	free(p);
706 	return inloop;
707 }
708 
709 /*
710  * Push a roff node onto the instruction stack.  This must later be
711  * removed with roffnode_pop().
712  */
713 static void
roffnode_push(struct roff * r,enum roff_tok tok,const char * name,int line,int col)714 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
715 		int line, int col)
716 {
717 	struct roffnode	*p;
718 
719 	p = mandoc_calloc(1, sizeof(struct roffnode));
720 	p->tok = tok;
721 	if (name)
722 		p->name = mandoc_strdup(name);
723 	p->parent = r->last;
724 	p->line = line;
725 	p->col = col;
726 	p->rule = p->parent ? p->parent->rule : 0;
727 
728 	r->last = p;
729 }
730 
731 /* --- roff parser state data management ---------------------------------- */
732 
733 static void
roff_free1(struct roff * r)734 roff_free1(struct roff *r)
735 {
736 	int		 i;
737 
738 	tbl_free(r->first_tbl);
739 	r->first_tbl = r->last_tbl = r->tbl = NULL;
740 
741 	eqn_free(r->last_eqn);
742 	r->last_eqn = r->eqn = NULL;
743 
744 	while (r->mstackpos >= 0)
745 		roff_userret(r);
746 
747 	while (r->last)
748 		roffnode_pop(r);
749 
750 	free (r->rstack);
751 	r->rstack = NULL;
752 	r->rstacksz = 0;
753 	r->rstackpos = -1;
754 
755 	roff_freereg(r->regtab);
756 	r->regtab = NULL;
757 
758 	roff_freestr(r->strtab);
759 	roff_freestr(r->rentab);
760 	roff_freestr(r->xmbtab);
761 	r->strtab = r->rentab = r->xmbtab = NULL;
762 
763 	if (r->xtab)
764 		for (i = 0; i < 128; i++)
765 			free(r->xtab[i].p);
766 	free(r->xtab);
767 	r->xtab = NULL;
768 }
769 
770 void
roff_reset(struct roff * r)771 roff_reset(struct roff *r)
772 {
773 	roff_free1(r);
774 	r->options |= MPARSE_COMMENT;
775 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
776 	r->control = '\0';
777 	r->escape = '\\';
778 	roffce_lines = 0;
779 	roffce_node = NULL;
780 	roffit_lines = 0;
781 	roffit_macro = NULL;
782 }
783 
784 void
roff_free(struct roff * r)785 roff_free(struct roff *r)
786 {
787 	int		 i;
788 
789 	roff_free1(r);
790 	for (i = 0; i < r->mstacksz; i++)
791 		free(r->mstack[i].argv);
792 	free(r->mstack);
793 	roffhash_free(r->reqtab);
794 	free(r);
795 }
796 
797 struct roff *
roff_alloc(int options)798 roff_alloc(int options)
799 {
800 	struct roff	*r;
801 
802 	r = mandoc_calloc(1, sizeof(struct roff));
803 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
804 	r->options = options | MPARSE_COMMENT;
805 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
806 	r->mstackpos = -1;
807 	r->rstackpos = -1;
808 	r->escape = '\\';
809 	return r;
810 }
811 
812 /* --- syntax tree state data management ---------------------------------- */
813 
814 static void
roff_man_free1(struct roff_man * man)815 roff_man_free1(struct roff_man *man)
816 {
817 	if (man->meta.first != NULL)
818 		roff_node_delete(man, man->meta.first);
819 	free(man->meta.msec);
820 	free(man->meta.vol);
821 	free(man->meta.os);
822 	free(man->meta.arch);
823 	free(man->meta.title);
824 	free(man->meta.name);
825 	free(man->meta.date);
826 	free(man->meta.sodest);
827 }
828 
829 void
roff_state_reset(struct roff_man * man)830 roff_state_reset(struct roff_man *man)
831 {
832 	man->last = man->meta.first;
833 	man->last_es = NULL;
834 	man->flags = 0;
835 	man->lastsec = man->lastnamed = SEC_NONE;
836 	man->next = ROFF_NEXT_CHILD;
837 	roff_setreg(man->roff, "nS", 0, '=');
838 }
839 
840 static void
roff_man_alloc1(struct roff_man * man)841 roff_man_alloc1(struct roff_man *man)
842 {
843 	memset(&man->meta, 0, sizeof(man->meta));
844 	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
845 	man->meta.first->type = ROFFT_ROOT;
846 	man->meta.macroset = MACROSET_NONE;
847 	roff_state_reset(man);
848 }
849 
850 void
roff_man_reset(struct roff_man * man)851 roff_man_reset(struct roff_man *man)
852 {
853 	roff_man_free1(man);
854 	roff_man_alloc1(man);
855 }
856 
857 void
roff_man_free(struct roff_man * man)858 roff_man_free(struct roff_man *man)
859 {
860 	roff_man_free1(man);
861 	free(man->os_r);
862 	free(man);
863 }
864 
865 struct roff_man *
roff_man_alloc(struct roff * roff,const char * os_s,int quick)866 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
867 {
868 	struct roff_man *man;
869 
870 	man = mandoc_calloc(1, sizeof(*man));
871 	man->roff = roff;
872 	man->os_s = os_s;
873 	man->quick = quick;
874 	roff_man_alloc1(man);
875 	roff->man = man;
876 	return man;
877 }
878 
879 /* --- syntax tree handling ----------------------------------------------- */
880 
881 struct roff_node *
roff_node_alloc(struct roff_man * man,int line,int pos,enum roff_type type,int tok)882 roff_node_alloc(struct roff_man *man, int line, int pos,
883 	enum roff_type type, int tok)
884 {
885 	struct roff_node	*n;
886 
887 	n = mandoc_calloc(1, sizeof(*n));
888 	n->line = line;
889 	n->pos = pos;
890 	n->tok = tok;
891 	n->type = type;
892 	n->sec = man->lastsec;
893 
894 	if (man->flags & MDOC_SYNOPSIS)
895 		n->flags |= NODE_SYNPRETTY;
896 	else
897 		n->flags &= ~NODE_SYNPRETTY;
898 	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
899 		n->flags |= NODE_NOFILL;
900 	else
901 		n->flags &= ~NODE_NOFILL;
902 	if (man->flags & MDOC_NEWLINE)
903 		n->flags |= NODE_LINE;
904 	man->flags &= ~MDOC_NEWLINE;
905 
906 	return n;
907 }
908 
909 void
roff_node_append(struct roff_man * man,struct roff_node * n)910 roff_node_append(struct roff_man *man, struct roff_node *n)
911 {
912 
913 	switch (man->next) {
914 	case ROFF_NEXT_SIBLING:
915 		if (man->last->next != NULL) {
916 			n->next = man->last->next;
917 			man->last->next->prev = n;
918 		} else
919 			man->last->parent->last = n;
920 		man->last->next = n;
921 		n->prev = man->last;
922 		n->parent = man->last->parent;
923 		break;
924 	case ROFF_NEXT_CHILD:
925 		if (man->last->child != NULL) {
926 			n->next = man->last->child;
927 			man->last->child->prev = n;
928 		} else
929 			man->last->last = n;
930 		man->last->child = n;
931 		n->parent = man->last;
932 		break;
933 	default:
934 		abort();
935 	}
936 	man->last = n;
937 
938 	switch (n->type) {
939 	case ROFFT_HEAD:
940 		n->parent->head = n;
941 		break;
942 	case ROFFT_BODY:
943 		if (n->end != ENDBODY_NOT)
944 			return;
945 		n->parent->body = n;
946 		break;
947 	case ROFFT_TAIL:
948 		n->parent->tail = n;
949 		break;
950 	default:
951 		return;
952 	}
953 
954 	/*
955 	 * Copy over the normalised-data pointer of our parent.  Not
956 	 * everybody has one, but copying a null pointer is fine.
957 	 */
958 
959 	n->norm = n->parent->norm;
960 	assert(n->parent->type == ROFFT_BLOCK);
961 }
962 
963 void
roff_word_alloc(struct roff_man * man,int line,int pos,const char * word)964 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
965 {
966 	struct roff_node	*n;
967 
968 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
969 	n->string = roff_strdup(man->roff, word);
970 	roff_node_append(man, n);
971 	n->flags |= NODE_VALID | NODE_ENDED;
972 	man->next = ROFF_NEXT_SIBLING;
973 }
974 
975 void
roff_word_append(struct roff_man * man,const char * word)976 roff_word_append(struct roff_man *man, const char *word)
977 {
978 	struct roff_node	*n;
979 	char			*addstr, *newstr;
980 
981 	n = man->last;
982 	addstr = roff_strdup(man->roff, word);
983 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
984 	free(addstr);
985 	free(n->string);
986 	n->string = newstr;
987 	man->next = ROFF_NEXT_SIBLING;
988 }
989 
990 void
roff_elem_alloc(struct roff_man * man,int line,int pos,int tok)991 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
992 {
993 	struct roff_node	*n;
994 
995 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
996 	roff_node_append(man, n);
997 	man->next = ROFF_NEXT_CHILD;
998 }
999 
1000 struct roff_node *
roff_block_alloc(struct roff_man * man,int line,int pos,int tok)1001 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1002 {
1003 	struct roff_node	*n;
1004 
1005 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1006 	roff_node_append(man, n);
1007 	man->next = ROFF_NEXT_CHILD;
1008 	return n;
1009 }
1010 
1011 struct roff_node *
roff_head_alloc(struct roff_man * man,int line,int pos,int tok)1012 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1013 {
1014 	struct roff_node	*n;
1015 
1016 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1017 	roff_node_append(man, n);
1018 	man->next = ROFF_NEXT_CHILD;
1019 	return n;
1020 }
1021 
1022 struct roff_node *
roff_body_alloc(struct roff_man * man,int line,int pos,int tok)1023 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1024 {
1025 	struct roff_node	*n;
1026 
1027 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1028 	roff_node_append(man, n);
1029 	man->next = ROFF_NEXT_CHILD;
1030 	return n;
1031 }
1032 
1033 static void
roff_addtbl(struct roff_man * man,int line,struct tbl_node * tbl)1034 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1035 {
1036 	struct roff_node	*n;
1037 	struct tbl_span		*span;
1038 
1039 	if (man->meta.macroset == MACROSET_MAN)
1040 		man_breakscope(man, ROFF_TS);
1041 	while ((span = tbl_span(tbl)) != NULL) {
1042 		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1043 		n->span = span;
1044 		roff_node_append(man, n);
1045 		n->flags |= NODE_VALID | NODE_ENDED;
1046 		man->next = ROFF_NEXT_SIBLING;
1047 	}
1048 }
1049 
1050 void
roff_node_unlink(struct roff_man * man,struct roff_node * n)1051 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1052 {
1053 
1054 	/* Adjust siblings. */
1055 
1056 	if (n->prev)
1057 		n->prev->next = n->next;
1058 	if (n->next)
1059 		n->next->prev = n->prev;
1060 
1061 	/* Adjust parent. */
1062 
1063 	if (n->parent != NULL) {
1064 		if (n->parent->child == n)
1065 			n->parent->child = n->next;
1066 		if (n->parent->last == n)
1067 			n->parent->last = n->prev;
1068 	}
1069 
1070 	/* Adjust parse point. */
1071 
1072 	if (man == NULL)
1073 		return;
1074 	if (man->last == n) {
1075 		if (n->prev == NULL) {
1076 			man->last = n->parent;
1077 			man->next = ROFF_NEXT_CHILD;
1078 		} else {
1079 			man->last = n->prev;
1080 			man->next = ROFF_NEXT_SIBLING;
1081 		}
1082 	}
1083 	if (man->meta.first == n)
1084 		man->meta.first = NULL;
1085 }
1086 
1087 void
roff_node_relink(struct roff_man * man,struct roff_node * n)1088 roff_node_relink(struct roff_man *man, struct roff_node *n)
1089 {
1090 	roff_node_unlink(man, n);
1091 	n->prev = n->next = NULL;
1092 	roff_node_append(man, n);
1093 }
1094 
1095 void
roff_node_free(struct roff_node * n)1096 roff_node_free(struct roff_node *n)
1097 {
1098 
1099 	if (n->args != NULL)
1100 		mdoc_argv_free(n->args);
1101 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1102 		free(n->norm);
1103 	eqn_box_free(n->eqn);
1104 	free(n->string);
1105 	free(n->tag);
1106 	free(n);
1107 }
1108 
1109 void
roff_node_delete(struct roff_man * man,struct roff_node * n)1110 roff_node_delete(struct roff_man *man, struct roff_node *n)
1111 {
1112 
1113 	while (n->child != NULL)
1114 		roff_node_delete(man, n->child);
1115 	roff_node_unlink(man, n);
1116 	roff_node_free(n);
1117 }
1118 
1119 int
roff_node_transparent(struct roff_node * n)1120 roff_node_transparent(struct roff_node *n)
1121 {
1122 	if (n == NULL)
1123 		return 0;
1124 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1125 		return 1;
1126 	return roff_tok_transparent(n->tok);
1127 }
1128 
1129 int
roff_tok_transparent(enum roff_tok tok)1130 roff_tok_transparent(enum roff_tok tok)
1131 {
1132 	switch (tok) {
1133 	case ROFF_ft:
1134 	case ROFF_ll:
1135 	case ROFF_mc:
1136 	case ROFF_po:
1137 	case ROFF_ta:
1138 	case MDOC_Db:
1139 	case MDOC_Es:
1140 	case MDOC_Sm:
1141 	case MDOC_Tg:
1142 	case MAN_DT:
1143 	case MAN_UC:
1144 	case MAN_PD:
1145 	case MAN_AT:
1146 		return 1;
1147 	default:
1148 		return 0;
1149 	}
1150 }
1151 
1152 struct roff_node *
roff_node_child(struct roff_node * n)1153 roff_node_child(struct roff_node *n)
1154 {
1155 	for (n = n->child; roff_node_transparent(n); n = n->next)
1156 		continue;
1157 	return n;
1158 }
1159 
1160 struct roff_node *
roff_node_prev(struct roff_node * n)1161 roff_node_prev(struct roff_node *n)
1162 {
1163 	do {
1164 		n = n->prev;
1165 	} while (roff_node_transparent(n));
1166 	return n;
1167 }
1168 
1169 struct roff_node *
roff_node_next(struct roff_node * n)1170 roff_node_next(struct roff_node *n)
1171 {
1172 	do {
1173 		n = n->next;
1174 	} while (roff_node_transparent(n));
1175 	return n;
1176 }
1177 
1178 void
deroff(char ** dest,const struct roff_node * n)1179 deroff(char **dest, const struct roff_node *n)
1180 {
1181 	char	*cp;
1182 	size_t	 sz;
1183 
1184 	if (n->string == NULL) {
1185 		for (n = n->child; n != NULL; n = n->next)
1186 			deroff(dest, n);
1187 		return;
1188 	}
1189 
1190 	/* Skip leading whitespace. */
1191 
1192 	for (cp = n->string; *cp != '\0'; cp++) {
1193 		if (cp[0] == '\\' && cp[1] != '\0' &&
1194 		    strchr(" %&0^|~", cp[1]) != NULL)
1195 			cp++;
1196 		else if ( ! isspace((unsigned char)*cp))
1197 			break;
1198 	}
1199 
1200 	/* Skip trailing backslash. */
1201 
1202 	sz = strlen(cp);
1203 	if (sz > 0 && cp[sz - 1] == '\\')
1204 		sz--;
1205 
1206 	/* Skip trailing whitespace. */
1207 
1208 	for (; sz; sz--)
1209 		if ( ! isspace((unsigned char)cp[sz-1]))
1210 			break;
1211 
1212 	/* Skip empty strings. */
1213 
1214 	if (sz == 0)
1215 		return;
1216 
1217 	if (*dest == NULL) {
1218 		*dest = mandoc_strndup(cp, sz);
1219 		return;
1220 	}
1221 
1222 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1223 	free(*dest);
1224 	*dest = cp;
1225 }
1226 
1227 /* --- main functions of the roff parser ---------------------------------- */
1228 
1229 /*
1230  * Save comments preceding the title macro, for example in order to
1231  * preserve Copyright and license headers in HTML output,
1232  * provide diagnostics about RCS ids and trailing whitespace in comments,
1233  * then discard comments including preceding whitespace.
1234  * This function also handles input line continuation.
1235  */
1236 static int
roff_parse_comment(struct roff * r,struct buf * buf,int ln,int pos,char ec)1237 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1238 {
1239 	struct roff_node *n;	/* used for header comments */
1240 	const char	*start;	/* start of the string to process */
1241 	const char	*cp;	/* for RCS id parsing */
1242 	char		*stesc;	/* start of an escape sequence ('\\') */
1243 	char		*ep;	/* end of comment string */
1244 	int		 rcsid;	/* kind of RCS id seen */
1245 
1246 	for (start = stesc = buf->buf + pos;; stesc++) {
1247 		/*
1248 		 * XXX Ugly hack: Remove the newline character that
1249 		 * mparse_buf_r() appended to mark the end of input
1250 		 * if it is not preceded by an escape character.
1251 		 */
1252 		if (stesc[0] == '\n') {
1253 			assert(stesc[1] == '\0');
1254 			stesc[0] = '\0';
1255 		}
1256 
1257 		/* The line ends without continuation or comment. */
1258 		if (stesc[0] == '\0')
1259 			return ROFF_CONT;
1260 
1261 		/* Unescaped byte: skip it. */
1262 		if (stesc[0] != ec)
1263 			continue;
1264 
1265 		/*
1266 		 * XXX Ugly hack: Do not attempt to append another line
1267 		 * if the function mparse_buf_r() appended a newline
1268 		 * character to indicate the end of input.
1269 		 */
1270 		if (stesc[1] == '\n') {
1271 			assert(stesc[2] == '\0');
1272 			stesc[0] = '\0';
1273 			return ROFF_CONT;
1274 		}
1275 
1276 		/*
1277 		 * An escape character at the end of an input line
1278 		 * requests line continuation.
1279 		 */
1280 		if (stesc[1] == '\0') {
1281 			stesc[0] = '\0';
1282 			return ROFF_IGN | ROFF_APPEND;
1283 		}
1284 
1285 		/* Found a comment: process it. */
1286 		if (stesc[1] == '"' || stesc[1] == '#')
1287 			break;
1288 
1289 		/* Escaped escape character: skip them both. */
1290 		if (stesc[1] == ec)
1291 			stesc++;
1292 	}
1293 
1294 	/* Look for an RCS id in the comment. */
1295 
1296 	rcsid = 0;
1297 	if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1298 		rcsid = 1 << MANDOC_OS_OPENBSD;
1299 		cp += 8;
1300 	} else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1301 		rcsid = 1 << MANDOC_OS_NETBSD;
1302 		cp += 7;
1303 	}
1304 	if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1305 	    strchr(cp, '$') != NULL) {
1306 		if (r->man->meta.rcsids & rcsid)
1307 			mandoc_msg(MANDOCERR_RCS_REP, ln,
1308 			    (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1309 		r->man->meta.rcsids |= rcsid;
1310 	}
1311 
1312 	/* Warn about trailing whitespace at the end of the comment. */
1313 
1314 	ep = strchr(stesc + 2, '\0') - 1;
1315 	if (*ep == '\n')
1316 		*ep-- = '\0';
1317 	if (*ep == ' ' || *ep == '\t')
1318 		mandoc_msg(MANDOCERR_SPACE_EOL,
1319 		    ln, (int)(ep - buf->buf), NULL);
1320 
1321 	/* Save comments preceding the title macro in the syntax tree. */
1322 
1323 	if (r->options & MPARSE_COMMENT) {
1324 		while (*ep == ' ' || *ep == '\t')
1325 			ep--;
1326 		ep[1] = '\0';
1327 		n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1328 		    ROFFT_COMMENT, TOKEN_NONE);
1329 		n->string = mandoc_strdup(stesc + 2);
1330 		roff_node_append(r->man, n);
1331 		n->flags |= NODE_VALID | NODE_ENDED;
1332 		r->man->next = ROFF_NEXT_SIBLING;
1333 	}
1334 
1335 	/* The comment requests line continuation. */
1336 
1337 	if (stesc[1] == '#') {
1338 		*stesc = '\0';
1339 		return ROFF_IGN | ROFF_APPEND;
1340 	}
1341 
1342 	/* Discard the comment including preceding whitespace. */
1343 
1344 	while (stesc > start && stesc[-1] == ' ' &&
1345 	    (stesc == start + 1 || stesc[-2] != '\\'))
1346 		stesc--;
1347 	*stesc = '\0';
1348 	return ROFF_CONT;
1349 }
1350 
1351 /*
1352  * In the current line, expand escape sequences that produce parsable
1353  * input text.  Also check the syntax of the remaining escape sequences,
1354  * which typically produce output glyphs or change formatter state.
1355  */
1356 static int
roff_expand(struct roff * r,struct buf * buf,int ln,int pos,char ec)1357 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1358 {
1359 	char		 ubuf[24];	/* buffer to print a number */
1360 	struct mctx	*ctx;		/* current macro call context */
1361 	const char	*res;		/* the string to be pasted */
1362 	const char	*src;		/* source for copying */
1363 	char		*dst;		/* destination for copying */
1364 	enum mandoc_esc	 subtype;	/* return value from roff_escape */
1365 	int		 iesc;		/* index of leading escape char */
1366 	int		 inam;		/* index of the escape name */
1367 	int		 iarg;		/* index beginning the argument */
1368 	int		 iendarg;	/* index right after the argument */
1369 	int		 iend;		/* index right after the sequence */
1370 	int		 isrc, idst;	/* to reduce \\ and \. in names */
1371 	int		 deftype;	/* type of definition to paste */
1372 	int		 argi;		/* macro argument index */
1373 	int		 quote_args;	/* true for \\$@, false for \\$* */
1374 	int		 asz;		/* length of the replacement */
1375 	int		 rsz;		/* length of the rest of the string */
1376 	int		 npos;		/* position in numeric expression */
1377 	int		 expand_count;	/* to avoid infinite loops */
1378 
1379 	expand_count = 0;
1380 	while (buf->buf[pos] != '\0') {
1381 
1382 		/*
1383 		 * Skip plain ASCII characters.
1384 		 * If we have a non-standard escape character,
1385 		 * escape literal backslashes because all processing in
1386 		 * subsequent functions uses the standard escaping rules.
1387 		 */
1388 
1389 		if (buf->buf[pos] != ec) {
1390 			if (buf->buf[pos] == '\\') {
1391 				roff_expand_patch(buf, pos, "\\e", pos + 1);
1392 				pos++;
1393 			}
1394 			pos++;
1395 			continue;
1396 		}
1397 
1398 		/*
1399 		 * Parse escape sequences,
1400 		 * issue diagnostic messages when appropriate,
1401 		 * and skip sequences that do not need expansion.
1402 		 * If we have a non-standard escape character, translate
1403 		 * it to backslashes and translate backslashes to \e.
1404 		 */
1405 
1406 		if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
1407 		    &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
1408 			while (pos < iend) {
1409 				if (buf->buf[pos] == ec) {
1410 					buf->buf[pos] = '\\';
1411 					if (pos + 1 < iend)
1412 						pos++;
1413 				} else if (buf->buf[pos] == '\\') {
1414 					roff_expand_patch(buf,
1415 					    pos, "\\e", pos + 1);
1416 					pos++;
1417 					iend++;
1418 				}
1419 				pos++;
1420 			}
1421 			continue;
1422 		}
1423 
1424 		/* Reduce \\ and \. in names. */
1425 
1426 		if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
1427 			isrc = idst = iarg;
1428 			while (isrc < iendarg) {
1429 				if (isrc + 1 < iendarg &&
1430 				    buf->buf[isrc] == '\\' &&
1431 				    (buf->buf[isrc + 1] == '\\' ||
1432 				     buf->buf[isrc + 1] == '.'))
1433 					isrc++;
1434 				buf->buf[idst++] = buf->buf[isrc++];
1435 			}
1436 			iendarg -= isrc - idst;
1437 		}
1438 
1439 		/* Handle expansion. */
1440 
1441 		res = NULL;
1442 		switch (buf->buf[inam]) {
1443 		case '*':
1444 			if (iendarg == iarg)
1445 				break;
1446 			deftype = ROFFDEF_USER | ROFFDEF_PRE;
1447 			if ((res = roff_getstrn(r, buf->buf + iarg,
1448 			    iendarg - iarg, &deftype)) != NULL)
1449 				break;
1450 
1451 			/*
1452 			 * If not overridden,
1453 			 * let \*(.T through to the formatters.
1454 			 */
1455 
1456 			if (iendarg - iarg == 2 &&
1457 			    buf->buf[iarg] == '.' &&
1458 			    buf->buf[iarg + 1] == 'T') {
1459 				roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
1460 				pos = iend;
1461 				continue;
1462 			}
1463 
1464 			mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
1465 			    "%.*s", iendarg - iarg, buf->buf + iarg);
1466 			break;
1467 
1468 		case '$':
1469 			if (r->mstackpos < 0) {
1470 				mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
1471 				    "%.*s", iend - iesc, buf->buf + iesc);
1472 				break;
1473 			}
1474 			ctx = r->mstack + r->mstackpos;
1475 			argi = buf->buf[iarg] - '1';
1476 			if (argi >= 0 && argi <= 8) {
1477 				if (argi < ctx->argc)
1478 					res = ctx->argv[argi];
1479 				break;
1480 			}
1481 			if (buf->buf[iarg] == '*')
1482 				quote_args = 0;
1483 			else if (buf->buf[iarg] == '@')
1484 				quote_args = 1;
1485 			else {
1486 				mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
1487 				    "%.*s", iend - iesc, buf->buf + iesc);
1488 				break;
1489 			}
1490 			asz = 0;
1491 			for (argi = 0; argi < ctx->argc; argi++) {
1492 				if (argi)
1493 					asz++;  /* blank */
1494 				if (quote_args)
1495 					asz += 2;  /* quotes */
1496 				asz += strlen(ctx->argv[argi]);
1497 			}
1498 			if (asz != iend - iesc) {
1499 				rsz = buf->sz - iend;
1500 				if (asz < iend - iesc)
1501 					memmove(buf->buf + iesc + asz,
1502 					    buf->buf + iend, rsz);
1503 				buf->sz = iesc + asz + rsz;
1504 				buf->buf = mandoc_realloc(buf->buf, buf->sz);
1505 				if (asz > iend - iesc)
1506 					memmove(buf->buf + iesc + asz,
1507 					    buf->buf + iend, rsz);
1508 			}
1509 			dst = buf->buf + iesc;
1510 			for (argi = 0; argi < ctx->argc; argi++) {
1511 				if (argi)
1512 					*dst++ = ' ';
1513 				if (quote_args)
1514 					*dst++ = '"';
1515 				src = ctx->argv[argi];
1516 				while (*src != '\0')
1517 					*dst++ = *src++;
1518 				if (quote_args)
1519 					*dst++ = '"';
1520 			}
1521 			continue;
1522 		case 'A':
1523 			ubuf[0] = iendarg > iarg ? '1' : '0';
1524 			ubuf[1] = '\0';
1525 			res = ubuf;
1526 			break;
1527 		case 'B':
1528 			npos = 0;
1529 			ubuf[0] = iendarg > iarg && iend > iendarg &&
1530 			    roff_evalnum(r, ln, buf->buf + iarg, &npos,
1531 					 NULL, ROFFNUM_SCALE) &&
1532 			    npos == iendarg - iarg ? '1' : '0';
1533 			ubuf[1] = '\0';
1534 			res = ubuf;
1535 			break;
1536 		case 'V':
1537 			mandoc_msg(MANDOCERR_UNSUPP, ln, iesc,
1538 			    "%.*s", iend - iesc, buf->buf + iesc);
1539 			roff_expand_patch(buf, iendarg, "}", iend);
1540 			roff_expand_patch(buf, iesc, "${", iarg);
1541 			continue;
1542 		case 'g':
1543 			break;
1544 		case 'n':
1545 			if (iendarg > iarg)
1546 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1547 				    roff_getregn(r, buf->buf + iarg,
1548 				    iendarg - iarg, buf->buf[inam + 1]));
1549 			else
1550 				ubuf[0] = '\0';
1551 			res = ubuf;
1552 			break;
1553 		case 'w':
1554 			rsz = 0;
1555 			subtype = ESCAPE_UNDEF;
1556 			while (iarg < iendarg) {
1557 				asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1;
1558 				if (buf->buf[iarg] != '\\') {
1559 					rsz += asz;
1560 					iarg++;
1561 					continue;
1562 				}
1563 				switch ((subtype = roff_escape(buf->buf, 0,
1564 				    iarg, NULL, NULL, NULL, NULL, &iarg))) {
1565 				case ESCAPE_SPECIAL:
1566 				case ESCAPE_NUMBERED:
1567 				case ESCAPE_UNICODE:
1568 				case ESCAPE_OVERSTRIKE:
1569 				case ESCAPE_UNDEF:
1570 					break;
1571 				case ESCAPE_DEVICE:
1572 					asz *= 8;
1573 					break;
1574 				case ESCAPE_EXPAND:
1575 					abort();
1576 				default:
1577 					continue;
1578 				}
1579 				rsz += asz;
1580 			}
1581 			(void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24);
1582 			res = ubuf;
1583 			break;
1584 		default:
1585 			break;
1586 		}
1587 		if (res == NULL)
1588 			res = "";
1589 		if (++expand_count > EXPAND_LIMIT ||
1590 		    buf->sz + strlen(res) > SHRT_MAX) {
1591 			mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
1592 			return ROFF_IGN;
1593 		}
1594 		roff_expand_patch(buf, iesc, res, iend);
1595 	}
1596 	return ROFF_CONT;
1597 }
1598 
1599 /*
1600  * Replace the substring from the start position (inclusive)
1601  * to end position (exclusive) with the repl(acement) string.
1602  */
1603 static void
roff_expand_patch(struct buf * buf,int start,const char * repl,int end)1604 roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
1605 {
1606 	char	*nbuf;
1607 
1608 	buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf,
1609 	    repl, buf->buf + end) + 1;
1610 	free(buf->buf);
1611 	buf->buf = nbuf;
1612 }
1613 
1614 /*
1615  * Parse a quoted or unquoted roff-style request or macro argument.
1616  * Return a pointer to the parsed argument, which is either the original
1617  * pointer or advanced by one byte in case the argument is quoted.
1618  * NUL-terminate the argument in place.
1619  * Collapse pairs of quotes inside quoted arguments.
1620  * Advance the argument pointer to the next argument,
1621  * or to the NUL byte terminating the argument line.
1622  */
1623 char *
roff_getarg(struct roff * r,char ** cpp,int ln,int * pos)1624 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1625 {
1626 	struct buf	 buf;
1627 	char		*cp, *start;
1628 	int		 newesc, pairs, quoted, white;
1629 
1630 	/* Quoting can only start with a new word. */
1631 	start = *cpp;
1632 	quoted = 0;
1633 	if ('"' == *start) {
1634 		quoted = 1;
1635 		start++;
1636 	}
1637 
1638 	newesc = pairs = white = 0;
1639 	for (cp = start; '\0' != *cp; cp++) {
1640 
1641 		/*
1642 		 * Move the following text left
1643 		 * after quoted quotes and after "\\" and "\t".
1644 		 */
1645 		if (pairs)
1646 			cp[-pairs] = cp[0];
1647 
1648 		if ('\\' == cp[0]) {
1649 			/*
1650 			 * In copy mode, translate double to single
1651 			 * backslashes and backslash-t to literal tabs.
1652 			 */
1653 			switch (cp[1]) {
1654 			case 'a':
1655 			case 't':
1656 				cp[-pairs] = '\t';
1657 				pairs++;
1658 				cp++;
1659 				break;
1660 			case '\\':
1661 				cp[-pairs] = '\\';
1662 				newesc = 1;
1663 				pairs++;
1664 				cp++;
1665 				break;
1666 			case ' ':
1667 				/* Skip escaped blanks. */
1668 				if (0 == quoted)
1669 					cp++;
1670 				break;
1671 			default:
1672 				break;
1673 			}
1674 		} else if (0 == quoted) {
1675 			if (' ' == cp[0]) {
1676 				/* Unescaped blanks end unquoted args. */
1677 				white = 1;
1678 				break;
1679 			}
1680 		} else if ('"' == cp[0]) {
1681 			if ('"' == cp[1]) {
1682 				/* Quoted quotes collapse. */
1683 				pairs++;
1684 				cp++;
1685 			} else {
1686 				/* Unquoted quotes end quoted args. */
1687 				quoted = 2;
1688 				break;
1689 			}
1690 		}
1691 	}
1692 
1693 	/* Quoted argument without a closing quote. */
1694 	if (1 == quoted)
1695 		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1696 
1697 	/* NUL-terminate this argument and move to the next one. */
1698 	if (pairs)
1699 		cp[-pairs] = '\0';
1700 	if ('\0' != *cp) {
1701 		*cp++ = '\0';
1702 		while (' ' == *cp)
1703 			cp++;
1704 	}
1705 	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1706 	*cpp = cp;
1707 
1708 	if ('\0' == *cp && (white || ' ' == cp[-1]))
1709 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1710 
1711 	start = mandoc_strdup(start);
1712 	if (newesc == 0)
1713 		return start;
1714 
1715 	buf.buf = start;
1716 	buf.sz = strlen(start) + 1;
1717 	buf.next = NULL;
1718 	if (roff_expand(r, &buf, ln, 0, '\\') == ROFF_IGN) {
1719 		free(buf.buf);
1720 		buf.buf = mandoc_strdup("");
1721 	}
1722 	return buf.buf;
1723 }
1724 
1725 
1726 /*
1727  * Process text streams.
1728  */
1729 static int
roff_parsetext(struct roff * r,struct buf * buf,int pos,int * offs)1730 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1731 {
1732 	size_t		 sz;
1733 	const char	*start;
1734 	char		*p;
1735 	int		 isz;
1736 	enum mandoc_esc	 esc;
1737 
1738 	/* Spring the input line trap. */
1739 
1740 	if (roffit_lines == 1) {
1741 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1742 		free(buf->buf);
1743 		buf->buf = p;
1744 		buf->sz = isz + 1;
1745 		*offs = 0;
1746 		free(roffit_macro);
1747 		roffit_lines = 0;
1748 		return ROFF_REPARSE;
1749 	} else if (roffit_lines > 1)
1750 		--roffit_lines;
1751 
1752 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1753 		if (roffce_lines < 1) {
1754 			r->man->last = roffce_node;
1755 			r->man->next = ROFF_NEXT_SIBLING;
1756 			roffce_lines = 0;
1757 			roffce_node = NULL;
1758 		} else
1759 			roffce_lines--;
1760 	}
1761 
1762 	/* Convert all breakable hyphens into ASCII_HYPH. */
1763 
1764 	start = p = buf->buf + pos;
1765 
1766 	while (*p != '\0') {
1767 		sz = strcspn(p, "-\\");
1768 		p += sz;
1769 
1770 		if (*p == '\0')
1771 			break;
1772 
1773 		if (*p == '\\') {
1774 			/* Skip over escapes. */
1775 			p++;
1776 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1777 			if (esc == ESCAPE_ERROR)
1778 				break;
1779 			while (*p == '-')
1780 				p++;
1781 			continue;
1782 		} else if (p == start) {
1783 			p++;
1784 			continue;
1785 		}
1786 
1787 		if (isalpha((unsigned char)p[-1]) &&
1788 		    isalpha((unsigned char)p[1]))
1789 			*p = ASCII_HYPH;
1790 		p++;
1791 	}
1792 	return ROFF_CONT;
1793 }
1794 
1795 int
roff_parseln(struct roff * r,int ln,struct buf * buf,int * offs,size_t len)1796 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1797 {
1798 	enum roff_tok	 t;
1799 	int		 e;
1800 	int		 pos;	/* parse point */
1801 	int		 spos;	/* saved parse point for messages */
1802 	int		 ppos;	/* original offset in buf->buf */
1803 	int		 ctl;	/* macro line (boolean) */
1804 
1805 	ppos = pos = *offs;
1806 
1807 	if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1808 	    (r->man->flags & ROFF_NOFILL) == 0 &&
1809 	    strchr(" .\\", buf->buf[pos]) == NULL &&
1810 	    buf->buf[pos] != r->control &&
1811 	    strcspn(buf->buf, " ") < 80)
1812 		mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1813 		    "%.20s...", buf->buf + pos);
1814 
1815 	/* Handle in-line equation delimiters. */
1816 
1817 	if (r->tbl == NULL &&
1818 	    r->last_eqn != NULL && r->last_eqn->delim &&
1819 	    (r->eqn == NULL || r->eqn_inline)) {
1820 		e = roff_eqndelim(r, buf, pos);
1821 		if (e == ROFF_REPARSE)
1822 			return e;
1823 		assert(e == ROFF_CONT);
1824 	}
1825 
1826 	/* Handle comments and escape sequences. */
1827 
1828 	e = roff_parse_comment(r, buf, ln, pos, r->escape);
1829 	if ((e & ROFF_MASK) == ROFF_IGN)
1830 		return e;
1831 	assert(e == ROFF_CONT);
1832 
1833 	e = roff_expand(r, buf, ln, pos, r->escape);
1834 	if ((e & ROFF_MASK) == ROFF_IGN)
1835 		return e;
1836 	assert(e == ROFF_CONT);
1837 
1838 	ctl = roff_getcontrol(r, buf->buf, &pos);
1839 
1840 	/*
1841 	 * First, if a scope is open and we're not a macro, pass the
1842 	 * text through the macro's filter.
1843 	 * Equations process all content themselves.
1844 	 * Tables process almost all content themselves, but we want
1845 	 * to warn about macros before passing it there.
1846 	 */
1847 
1848 	if (r->last != NULL && ! ctl) {
1849 		t = r->last->tok;
1850 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1851 		if ((e & ROFF_MASK) == ROFF_IGN)
1852 			return e;
1853 		e &= ~ROFF_MASK;
1854 	} else
1855 		e = ROFF_IGN;
1856 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1857 		eqn_read(r->eqn, buf->buf + ppos);
1858 		return e;
1859 	}
1860 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1861 		tbl_read(r->tbl, ln, buf->buf, ppos);
1862 		roff_addtbl(r->man, ln, r->tbl);
1863 		return e;
1864 	}
1865 	if ( ! ctl) {
1866 		r->options &= ~MPARSE_COMMENT;
1867 		return roff_parsetext(r, buf, pos, offs) | e;
1868 	}
1869 
1870 	/* Skip empty request lines. */
1871 
1872 	if (buf->buf[pos] == '"') {
1873 		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1874 		return ROFF_IGN;
1875 	} else if (buf->buf[pos] == '\0')
1876 		return ROFF_IGN;
1877 
1878 	/*
1879 	 * If a scope is open, go to the child handler for that macro,
1880 	 * as it may want to preprocess before doing anything with it.
1881 	 */
1882 
1883 	if (r->last) {
1884 		t = r->last->tok;
1885 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1886 	}
1887 
1888 	r->options &= ~MPARSE_COMMENT;
1889 	spos = pos;
1890 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1891 	return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1892 }
1893 
1894 /*
1895  * Handle a new request or macro.
1896  * May be called outside any scope or from inside a conditional scope.
1897  */
1898 static int
roff_req_or_macro(ROFF_ARGS)1899 roff_req_or_macro(ROFF_ARGS) {
1900 
1901 	/* For now, tables ignore most macros and some request. */
1902 
1903 	if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1904 	    tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1905 	    tok == ROFF_sp)) {
1906 		mandoc_msg(MANDOCERR_TBLMACRO,
1907 		    ln, ppos, "%s", buf->buf + ppos);
1908 		if (tok != TOKEN_NONE)
1909 			return ROFF_IGN;
1910 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1911 			pos++;
1912 		while (buf->buf[pos] == ' ')
1913 			pos++;
1914 		tbl_read(r->tbl, ln, buf->buf, pos);
1915 		roff_addtbl(r->man, ln, r->tbl);
1916 		return ROFF_IGN;
1917 	}
1918 
1919 	/* For now, let high level macros abort .ce mode. */
1920 
1921 	if (roffce_node != NULL &&
1922 	    (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1923 	     tok == ROFF_TH || tok == ROFF_TS)) {
1924 		r->man->last = roffce_node;
1925 		r->man->next = ROFF_NEXT_SIBLING;
1926 		roffce_lines = 0;
1927 		roffce_node = NULL;
1928 	}
1929 
1930 	/*
1931 	 * This is neither a roff request nor a user-defined macro.
1932 	 * Let the standard macro set parsers handle it.
1933 	 */
1934 
1935 	if (tok == TOKEN_NONE)
1936 		return ROFF_CONT;
1937 
1938 	/* Execute a roff request or a user-defined macro. */
1939 
1940 	return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1941 }
1942 
1943 /*
1944  * Internal interface function to tell the roff parser that execution
1945  * of the current macro ended.  This is required because macro
1946  * definitions usually do not end with a .return request.
1947  */
1948 void
roff_userret(struct roff * r)1949 roff_userret(struct roff *r)
1950 {
1951 	struct mctx	*ctx;
1952 	int		 i;
1953 
1954 	assert(r->mstackpos >= 0);
1955 	ctx = r->mstack + r->mstackpos;
1956 	for (i = 0; i < ctx->argc; i++)
1957 		free(ctx->argv[i]);
1958 	ctx->argc = 0;
1959 	r->mstackpos--;
1960 }
1961 
1962 void
roff_endparse(struct roff * r)1963 roff_endparse(struct roff *r)
1964 {
1965 	if (r->last != NULL)
1966 		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1967 		    r->last->col, "%s", roff_name[r->last->tok]);
1968 
1969 	if (r->eqn != NULL) {
1970 		mandoc_msg(MANDOCERR_BLK_NOEND,
1971 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1972 		eqn_parse(r->eqn);
1973 		r->eqn = NULL;
1974 	}
1975 
1976 	if (r->tbl != NULL) {
1977 		tbl_end(r->tbl, 1);
1978 		r->tbl = NULL;
1979 	}
1980 }
1981 
1982 /*
1983  * Parse the request or macro name at buf[*pos].
1984  * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
1985  * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
1986  * As a side effect, set r->current_string to the definition or to NULL.
1987  */
1988 static enum roff_tok
roff_parse(struct roff * r,char * buf,int * pos,int ln,int ppos)1989 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1990 {
1991 	char		*cp;
1992 	const char	*mac;
1993 	size_t		 maclen;
1994 	int		 deftype;
1995 	enum roff_tok	 t;
1996 
1997 	cp = buf + *pos;
1998 
1999 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2000 		return TOKEN_NONE;
2001 
2002 	mac = cp;
2003 	maclen = roff_getname(r, &cp, ln, ppos);
2004 
2005 	deftype = ROFFDEF_USER | ROFFDEF_REN;
2006 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2007 	switch (deftype) {
2008 	case ROFFDEF_USER:
2009 		t = ROFF_USERDEF;
2010 		break;
2011 	case ROFFDEF_REN:
2012 		t = ROFF_RENAMED;
2013 		break;
2014 	default:
2015 		t = roffhash_find(r->reqtab, mac, maclen);
2016 		break;
2017 	}
2018 	if (t != TOKEN_NONE)
2019 		*pos = cp - buf;
2020 	else if (deftype == ROFFDEF_UNDEF) {
2021 		/* Using an undefined macro defines it to be empty. */
2022 		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2023 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2024 	}
2025 	return t;
2026 }
2027 
2028 /* --- handling of request blocks ----------------------------------------- */
2029 
2030 /*
2031  * Close a macro definition block or an "ignore" block.
2032  */
2033 static int
roff_cblock(ROFF_ARGS)2034 roff_cblock(ROFF_ARGS)
2035 {
2036 	int	 rr;
2037 
2038 	if (r->last == NULL) {
2039 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2040 		return ROFF_IGN;
2041 	}
2042 
2043 	switch (r->last->tok) {
2044 	case ROFF_am:
2045 	case ROFF_ami:
2046 	case ROFF_de:
2047 	case ROFF_dei:
2048 	case ROFF_ig:
2049 		break;
2050 	case ROFF_am1:
2051 	case ROFF_de1:
2052 		/* Remapped in roff_block(). */
2053 		abort();
2054 	default:
2055 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2056 		return ROFF_IGN;
2057 	}
2058 
2059 	roffnode_pop(r);
2060 	roffnode_cleanscope(r);
2061 
2062 	/*
2063 	 * If a conditional block with braces is still open,
2064 	 * check for "\}" block end markers.
2065 	 */
2066 
2067 	if (r->last != NULL && r->last->endspan < 0) {
2068 		rr = 1;  /* If arguments follow "\}", warn about them. */
2069 		roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2070 	}
2071 
2072 	if (buf->buf[pos] != '\0')
2073 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2074 		    ".. %s", buf->buf + pos);
2075 
2076 	return ROFF_IGN;
2077 }
2078 
2079 /*
2080  * Pop all nodes ending at the end of the current input line.
2081  * Return the number of loops ended.
2082  */
2083 static int
roffnode_cleanscope(struct roff * r)2084 roffnode_cleanscope(struct roff *r)
2085 {
2086 	int inloop;
2087 
2088 	inloop = 0;
2089 	while (r->last != NULL && r->last->endspan > 0) {
2090 		if (--r->last->endspan != 0)
2091 			break;
2092 		inloop += roffnode_pop(r);
2093 	}
2094 	return inloop;
2095 }
2096 
2097 /*
2098  * Handle the closing "\}" of a conditional block.
2099  * Apart from generating warnings, this only pops nodes.
2100  * Return the number of loops ended.
2101  */
2102 static int
roff_ccond(struct roff * r,int ln,int ppos)2103 roff_ccond(struct roff *r, int ln, int ppos)
2104 {
2105 	if (NULL == r->last) {
2106 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2107 		return 0;
2108 	}
2109 
2110 	switch (r->last->tok) {
2111 	case ROFF_el:
2112 	case ROFF_ie:
2113 	case ROFF_if:
2114 	case ROFF_while:
2115 		break;
2116 	default:
2117 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2118 		return 0;
2119 	}
2120 
2121 	if (r->last->endspan > -1) {
2122 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2123 		return 0;
2124 	}
2125 
2126 	return roffnode_pop(r) + roffnode_cleanscope(r);
2127 }
2128 
2129 static int
roff_block(ROFF_ARGS)2130 roff_block(ROFF_ARGS)
2131 {
2132 	const char	*name, *value;
2133 	char		*call, *cp, *iname, *rname;
2134 	size_t		 csz, namesz, rsz;
2135 	int		 deftype;
2136 
2137 	/* Ignore groff compatibility mode for now. */
2138 
2139 	if (tok == ROFF_de1)
2140 		tok = ROFF_de;
2141 	else if (tok == ROFF_dei1)
2142 		tok = ROFF_dei;
2143 	else if (tok == ROFF_am1)
2144 		tok = ROFF_am;
2145 	else if (tok == ROFF_ami1)
2146 		tok = ROFF_ami;
2147 
2148 	/* Parse the macro name argument. */
2149 
2150 	cp = buf->buf + pos;
2151 	if (tok == ROFF_ig) {
2152 		iname = NULL;
2153 		namesz = 0;
2154 	} else {
2155 		iname = cp;
2156 		namesz = roff_getname(r, &cp, ln, ppos);
2157 		iname[namesz] = '\0';
2158 	}
2159 
2160 	/* Resolve the macro name argument if it is indirect. */
2161 
2162 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2163 		deftype = ROFFDEF_USER;
2164 		name = roff_getstrn(r, iname, namesz, &deftype);
2165 		if (name == NULL) {
2166 			mandoc_msg(MANDOCERR_STR_UNDEF,
2167 			    ln, (int)(iname - buf->buf),
2168 			    "%.*s", (int)namesz, iname);
2169 			namesz = 0;
2170 		} else
2171 			namesz = strlen(name);
2172 	} else
2173 		name = iname;
2174 
2175 	if (namesz == 0 && tok != ROFF_ig) {
2176 		mandoc_msg(MANDOCERR_REQ_EMPTY,
2177 		    ln, ppos, "%s", roff_name[tok]);
2178 		return ROFF_IGN;
2179 	}
2180 
2181 	roffnode_push(r, tok, name, ln, ppos);
2182 
2183 	/*
2184 	 * At the beginning of a `de' macro, clear the existing string
2185 	 * with the same name, if there is one.  New content will be
2186 	 * appended from roff_block_text() in multiline mode.
2187 	 */
2188 
2189 	if (tok == ROFF_de || tok == ROFF_dei) {
2190 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2191 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2192 	} else if (tok == ROFF_am || tok == ROFF_ami) {
2193 		deftype = ROFFDEF_ANY;
2194 		value = roff_getstrn(r, iname, namesz, &deftype);
2195 		switch (deftype) {  /* Before appending, ... */
2196 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2197 			roff_setstrn(&r->strtab, name, namesz,
2198 			    value, strlen(value), 0);
2199 			break;
2200 		case ROFFDEF_REN: /* call original standard macro. */
2201 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2202 			    (int)strlen(value), value);
2203 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2204 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2205 			free(call);
2206 			break;
2207 		case ROFFDEF_STD:  /* rename and call standard macro. */
2208 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2209 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2210 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2211 			    (int)rsz, rname);
2212 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2213 			free(call);
2214 			free(rname);
2215 			break;
2216 		default:
2217 			break;
2218 		}
2219 	}
2220 
2221 	if (*cp == '\0')
2222 		return ROFF_IGN;
2223 
2224 	/* Get the custom end marker. */
2225 
2226 	iname = cp;
2227 	namesz = roff_getname(r, &cp, ln, ppos);
2228 
2229 	/* Resolve the end marker if it is indirect. */
2230 
2231 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2232 		deftype = ROFFDEF_USER;
2233 		name = roff_getstrn(r, iname, namesz, &deftype);
2234 		if (name == NULL) {
2235 			mandoc_msg(MANDOCERR_STR_UNDEF,
2236 			    ln, (int)(iname - buf->buf),
2237 			    "%.*s", (int)namesz, iname);
2238 			namesz = 0;
2239 		} else
2240 			namesz = strlen(name);
2241 	} else
2242 		name = iname;
2243 
2244 	if (namesz)
2245 		r->last->end = mandoc_strndup(name, namesz);
2246 
2247 	if (*cp != '\0')
2248 		mandoc_msg(MANDOCERR_ARG_EXCESS,
2249 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2250 
2251 	return ROFF_IGN;
2252 }
2253 
2254 static int
roff_block_sub(ROFF_ARGS)2255 roff_block_sub(ROFF_ARGS)
2256 {
2257 	enum roff_tok	t;
2258 	int		i, j;
2259 
2260 	/*
2261 	 * If a custom end marker is a user-defined or predefined macro
2262 	 * or a request, interpret it.
2263 	 */
2264 
2265 	if (r->last->end) {
2266 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2267 			if (buf->buf[i] != r->last->end[j])
2268 				break;
2269 
2270 		if (r->last->end[j] == '\0' &&
2271 		    (buf->buf[i] == '\0' ||
2272 		     buf->buf[i] == ' ' ||
2273 		     buf->buf[i] == '\t')) {
2274 			roffnode_pop(r);
2275 			roffnode_cleanscope(r);
2276 
2277 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2278 				i++;
2279 
2280 			pos = i;
2281 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2282 			    TOKEN_NONE)
2283 				return ROFF_RERUN;
2284 			return ROFF_IGN;
2285 		}
2286 	}
2287 
2288 	/* Handle the standard end marker. */
2289 
2290 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2291 	if (t == ROFF_cblock)
2292 		return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2293 
2294 	/* Not an end marker, so append the line to the block. */
2295 
2296 	if (tok != ROFF_ig)
2297 		roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2298 	return ROFF_IGN;
2299 }
2300 
2301 static int
roff_block_text(ROFF_ARGS)2302 roff_block_text(ROFF_ARGS)
2303 {
2304 
2305 	if (tok != ROFF_ig)
2306 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2307 
2308 	return ROFF_IGN;
2309 }
2310 
2311 /*
2312  * Check for a closing "\}" and handle it.
2313  * In this function, the final "int *offs" argument is used for
2314  * different purposes than elsewhere:
2315  * Input: *offs == 0: caller wants to discard arguments following \}
2316  *        *offs == 1: caller wants to preserve text following \}
2317  * Output: *offs = 0: tell caller to discard input line
2318  *         *offs = 1: tell caller to use input line
2319  */
2320 static int
roff_cond_checkend(ROFF_ARGS)2321 roff_cond_checkend(ROFF_ARGS)
2322 {
2323 	char		*ep;
2324 	int		 endloop, irc, rr;
2325 
2326 	irc = ROFF_IGN;
2327 	rr = r->last->rule;
2328 	endloop = tok != ROFF_while ? ROFF_IGN :
2329 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2330 	if (roffnode_cleanscope(r))
2331 		irc |= endloop;
2332 
2333 	/*
2334 	 * If "\}" occurs on a macro line without a preceding macro or
2335 	 * a text line contains nothing else, drop the line completely.
2336 	 */
2337 
2338 	ep = buf->buf + pos;
2339 	if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2340 		rr = 0;
2341 
2342 	/*
2343 	 * The closing delimiter "\}" rewinds the conditional scope
2344 	 * but is otherwise ignored when interpreting the line.
2345 	 */
2346 
2347 	while ((ep = strchr(ep, '\\')) != NULL) {
2348 		switch (ep[1]) {
2349 		case '}':
2350 			if (ep[2] == '\0')
2351 				ep[0] = '\0';
2352 			else if (rr)
2353 				ep[1] = '&';
2354 			else
2355 				memmove(ep, ep + 2, strlen(ep + 2) + 1);
2356 			if (roff_ccond(r, ln, ep - buf->buf))
2357 				irc |= endloop;
2358 			break;
2359 		case '\0':
2360 			++ep;
2361 			break;
2362 		default:
2363 			ep += 2;
2364 			break;
2365 		}
2366 	}
2367 	*offs = rr;
2368 	return irc;
2369 }
2370 
2371 /*
2372  * Parse and process a request or macro line in conditional scope.
2373  */
2374 static int
roff_cond_sub(ROFF_ARGS)2375 roff_cond_sub(ROFF_ARGS)
2376 {
2377 	struct roffnode	*bl;
2378 	int		 irc, rr, spos;
2379 	enum roff_tok	 t;
2380 
2381 	rr = 0;  /* If arguments follow "\}", skip them. */
2382 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2383 	spos = pos;
2384 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2385 
2386 	/*
2387 	 * Handle requests and macros if the conditional evaluated
2388 	 * to true or if they are structurally required.
2389 	 * The .break request is always handled specially.
2390 	 */
2391 
2392 	if (t == ROFF_break) {
2393 		if (irc & ROFF_LOOPMASK)
2394 			irc = ROFF_IGN | ROFF_LOOPEXIT;
2395 		else if (rr) {
2396 			for (bl = r->last; bl != NULL; bl = bl->parent) {
2397 				bl->rule = 0;
2398 				if (bl->tok == ROFF_while)
2399 					break;
2400 			}
2401 		}
2402 	} else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2403 		irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2404 		if (irc & ROFF_WHILE)
2405 			irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2406 	}
2407 	return irc;
2408 }
2409 
2410 /*
2411  * Parse and process a text line in conditional scope.
2412  */
2413 static int
roff_cond_text(ROFF_ARGS)2414 roff_cond_text(ROFF_ARGS)
2415 {
2416 	int	 irc, rr;
2417 
2418 	rr = 1;  /* If arguments follow "\}", preserve them. */
2419 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2420 	if (rr)
2421 		irc |= ROFF_CONT;
2422 	return irc;
2423 }
2424 
2425 /* --- handling of numeric and conditional expressions -------------------- */
2426 
2427 /*
2428  * Parse a single signed integer number.  Stop at the first non-digit.
2429  * If there is at least one digit, return success and advance the
2430  * parse point, else return failure and let the parse point unchanged.
2431  * Ignore overflows, treat them just like the C language.
2432  */
2433 static int
roff_getnum(const char * v,int * pos,int * res,int flags)2434 roff_getnum(const char *v, int *pos, int *res, int flags)
2435 {
2436 	int	 myres, scaled, n, p;
2437 
2438 	if (NULL == res)
2439 		res = &myres;
2440 
2441 	p = *pos;
2442 	n = v[p] == '-';
2443 	if (n || v[p] == '+')
2444 		p++;
2445 
2446 	if (flags & ROFFNUM_WHITE)
2447 		while (isspace((unsigned char)v[p]))
2448 			p++;
2449 
2450 	for (*res = 0; isdigit((unsigned char)v[p]); p++)
2451 		*res = 10 * *res + v[p] - '0';
2452 	if (p == *pos + n)
2453 		return 0;
2454 
2455 	if (n)
2456 		*res = -*res;
2457 
2458 	/* Each number may be followed by one optional scaling unit. */
2459 
2460 	switch (v[p]) {
2461 	case 'f':
2462 		scaled = *res * 65536;
2463 		break;
2464 	case 'i':
2465 		scaled = *res * 240;
2466 		break;
2467 	case 'c':
2468 		scaled = *res * 240 / 2.54;
2469 		break;
2470 	case 'v':
2471 	case 'P':
2472 		scaled = *res * 40;
2473 		break;
2474 	case 'm':
2475 	case 'n':
2476 		scaled = *res * 24;
2477 		break;
2478 	case 'p':
2479 		scaled = *res * 10 / 3;
2480 		break;
2481 	case 'u':
2482 		scaled = *res;
2483 		break;
2484 	case 'M':
2485 		scaled = *res * 6 / 25;
2486 		break;
2487 	default:
2488 		scaled = *res;
2489 		p--;
2490 		break;
2491 	}
2492 	if (flags & ROFFNUM_SCALE)
2493 		*res = scaled;
2494 
2495 	*pos = p + 1;
2496 	return 1;
2497 }
2498 
2499 /*
2500  * Evaluate a string comparison condition.
2501  * The first character is the delimiter.
2502  * Succeed if the string up to its second occurrence
2503  * matches the string up to its third occurrence.
2504  * Advance the cursor after the third occurrence
2505  * or lacking that, to the end of the line.
2506  */
2507 static int
roff_evalstrcond(const char * v,int * pos)2508 roff_evalstrcond(const char *v, int *pos)
2509 {
2510 	const char	*s1, *s2, *s3;
2511 	int		 match;
2512 
2513 	match = 0;
2514 	s1 = v + *pos;		/* initial delimiter */
2515 	s2 = s1 + 1;		/* for scanning the first string */
2516 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2517 
2518 	if (NULL == s3)		/* found no middle delimiter */
2519 		goto out;
2520 
2521 	while ('\0' != *++s3) {
2522 		if (*s2 != *s3) {  /* mismatch */
2523 			s3 = strchr(s3, *s1);
2524 			break;
2525 		}
2526 		if (*s3 == *s1) {  /* found the final delimiter */
2527 			match = 1;
2528 			break;
2529 		}
2530 		s2++;
2531 	}
2532 
2533 out:
2534 	if (NULL == s3)
2535 		s3 = strchr(s2, '\0');
2536 	else if (*s3 != '\0')
2537 		s3++;
2538 	*pos = s3 - v;
2539 	return match;
2540 }
2541 
2542 /*
2543  * Evaluate an optionally negated single character, numerical,
2544  * or string condition.
2545  */
2546 static int
roff_evalcond(struct roff * r,int ln,char * v,int * pos)2547 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2548 {
2549 	const char	*start, *end;
2550 	char		*cp, *name;
2551 	size_t		 sz;
2552 	int		 deftype, len, number, savepos, istrue, wanttrue;
2553 
2554 	if ('!' == v[*pos]) {
2555 		wanttrue = 0;
2556 		(*pos)++;
2557 	} else
2558 		wanttrue = 1;
2559 
2560 	switch (v[*pos]) {
2561 	case '\0':
2562 		return 0;
2563 	case 'n':
2564 	case 'o':
2565 		(*pos)++;
2566 		return wanttrue;
2567 	case 'e':
2568 	case 't':
2569 	case 'v':
2570 		(*pos)++;
2571 		return !wanttrue;
2572 	case 'c':
2573 		do {
2574 			(*pos)++;
2575 		} while (v[*pos] == ' ');
2576 
2577 		/*
2578 		 * Quirk for groff compatibility:
2579 		 * The horizontal tab is neither available nor unavailable.
2580 		 */
2581 
2582 		if (v[*pos] == '\t') {
2583 			(*pos)++;
2584 			return 0;
2585 		}
2586 
2587 		/* Printable ASCII characters are available. */
2588 
2589 		if (v[*pos] != '\\') {
2590 			(*pos)++;
2591 			return wanttrue;
2592 		}
2593 
2594 		end = v + ++*pos;
2595 		switch (mandoc_escape(&end, &start, &len)) {
2596 		case ESCAPE_SPECIAL:
2597 			istrue = mchars_spec2cp(start, len) != -1;
2598 			break;
2599 		case ESCAPE_UNICODE:
2600 			istrue = 1;
2601 			break;
2602 		case ESCAPE_NUMBERED:
2603 			istrue = mchars_num2char(start, len) != -1;
2604 			break;
2605 		default:
2606 			istrue = !wanttrue;
2607 			break;
2608 		}
2609 		*pos = end - v;
2610 		return istrue == wanttrue;
2611 	case 'd':
2612 	case 'r':
2613 		cp = v + *pos + 1;
2614 		while (*cp == ' ')
2615 			cp++;
2616 		name = cp;
2617 		sz = roff_getname(r, &cp, ln, cp - v);
2618 		if (sz == 0)
2619 			istrue = 0;
2620 		else if (v[*pos] == 'r')
2621 			istrue = roff_hasregn(r, name, sz);
2622 		else {
2623 			deftype = ROFFDEF_ANY;
2624 		        roff_getstrn(r, name, sz, &deftype);
2625 			istrue = !!deftype;
2626 		}
2627 		*pos = (name + sz) - v;
2628 		return istrue == wanttrue;
2629 	default:
2630 		break;
2631 	}
2632 
2633 	savepos = *pos;
2634 	if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2635 		return (number > 0) == wanttrue;
2636 	else if (*pos == savepos)
2637 		return roff_evalstrcond(v, pos) == wanttrue;
2638 	else
2639 		return 0;
2640 }
2641 
2642 static int
roff_line_ignore(ROFF_ARGS)2643 roff_line_ignore(ROFF_ARGS)
2644 {
2645 
2646 	return ROFF_IGN;
2647 }
2648 
2649 static int
roff_insec(ROFF_ARGS)2650 roff_insec(ROFF_ARGS)
2651 {
2652 
2653 	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2654 	return ROFF_IGN;
2655 }
2656 
2657 static int
roff_unsupp(ROFF_ARGS)2658 roff_unsupp(ROFF_ARGS)
2659 {
2660 
2661 	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2662 	return ROFF_IGN;
2663 }
2664 
2665 static int
roff_cond(ROFF_ARGS)2666 roff_cond(ROFF_ARGS)
2667 {
2668 	int	 irc;
2669 
2670 	roffnode_push(r, tok, NULL, ln, ppos);
2671 
2672 	/*
2673 	 * An `.el' has no conditional body: it will consume the value
2674 	 * of the current rstack entry set in prior `ie' calls or
2675 	 * defaults to DENY.
2676 	 *
2677 	 * If we're not an `el', however, then evaluate the conditional.
2678 	 */
2679 
2680 	r->last->rule = tok == ROFF_el ?
2681 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2682 	    roff_evalcond(r, ln, buf->buf, &pos);
2683 
2684 	/*
2685 	 * An if-else will put the NEGATION of the current evaluated
2686 	 * conditional into the stack of rules.
2687 	 */
2688 
2689 	if (tok == ROFF_ie) {
2690 		if (r->rstackpos + 1 == r->rstacksz) {
2691 			r->rstacksz += 16;
2692 			r->rstack = mandoc_reallocarray(r->rstack,
2693 			    r->rstacksz, sizeof(int));
2694 		}
2695 		r->rstack[++r->rstackpos] = !r->last->rule;
2696 	}
2697 
2698 	/* If the parent has false as its rule, then so do we. */
2699 
2700 	if (r->last->parent && !r->last->parent->rule)
2701 		r->last->rule = 0;
2702 
2703 	/*
2704 	 * Determine scope.
2705 	 * If there is nothing on the line after the conditional,
2706 	 * not even whitespace, use next-line scope.
2707 	 * Except that .while does not support next-line scope.
2708 	 */
2709 
2710 	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2711 		r->last->endspan = 2;
2712 		goto out;
2713 	}
2714 
2715 	while (buf->buf[pos] == ' ')
2716 		pos++;
2717 
2718 	/* An opening brace requests multiline scope. */
2719 
2720 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2721 		r->last->endspan = -1;
2722 		pos += 2;
2723 		while (buf->buf[pos] == ' ')
2724 			pos++;
2725 		goto out;
2726 	}
2727 
2728 	/*
2729 	 * Anything else following the conditional causes
2730 	 * single-line scope.  Warn if the scope contains
2731 	 * nothing but trailing whitespace.
2732 	 */
2733 
2734 	if (buf->buf[pos] == '\0')
2735 		mandoc_msg(MANDOCERR_COND_EMPTY,
2736 		    ln, ppos, "%s", roff_name[tok]);
2737 
2738 	r->last->endspan = 1;
2739 
2740 out:
2741 	*offs = pos;
2742 	irc = ROFF_RERUN;
2743 	if (tok == ROFF_while)
2744 		irc |= ROFF_WHILE;
2745 	return irc;
2746 }
2747 
2748 static int
roff_ds(ROFF_ARGS)2749 roff_ds(ROFF_ARGS)
2750 {
2751 	char		*string;
2752 	const char	*name;
2753 	size_t		 namesz;
2754 
2755 	/* Ignore groff compatibility mode for now. */
2756 
2757 	if (tok == ROFF_ds1)
2758 		tok = ROFF_ds;
2759 	else if (tok == ROFF_as1)
2760 		tok = ROFF_as;
2761 
2762 	/*
2763 	 * The first word is the name of the string.
2764 	 * If it is empty or terminated by an escape sequence,
2765 	 * abort the `ds' request without defining anything.
2766 	 */
2767 
2768 	name = string = buf->buf + pos;
2769 	if (*name == '\0')
2770 		return ROFF_IGN;
2771 
2772 	namesz = roff_getname(r, &string, ln, pos);
2773 	switch (name[namesz]) {
2774 	case '\\':
2775 		return ROFF_IGN;
2776 	case '\t':
2777 		string = buf->buf + pos + namesz;
2778 		break;
2779 	default:
2780 		break;
2781 	}
2782 
2783 	/* Read past the initial double-quote, if any. */
2784 	if (*string == '"')
2785 		string++;
2786 
2787 	/* The rest is the value. */
2788 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2789 	    ROFF_as == tok);
2790 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2791 	return ROFF_IGN;
2792 }
2793 
2794 /*
2795  * Parse a single operator, one or two characters long.
2796  * If the operator is recognized, return success and advance the
2797  * parse point, else return failure and let the parse point unchanged.
2798  */
2799 static int
roff_getop(const char * v,int * pos,char * res)2800 roff_getop(const char *v, int *pos, char *res)
2801 {
2802 
2803 	*res = v[*pos];
2804 
2805 	switch (*res) {
2806 	case '+':
2807 	case '-':
2808 	case '*':
2809 	case '/':
2810 	case '%':
2811 	case '&':
2812 	case ':':
2813 		break;
2814 	case '<':
2815 		switch (v[*pos + 1]) {
2816 		case '=':
2817 			*res = 'l';
2818 			(*pos)++;
2819 			break;
2820 		case '>':
2821 			*res = '!';
2822 			(*pos)++;
2823 			break;
2824 		case '?':
2825 			*res = 'i';
2826 			(*pos)++;
2827 			break;
2828 		default:
2829 			break;
2830 		}
2831 		break;
2832 	case '>':
2833 		switch (v[*pos + 1]) {
2834 		case '=':
2835 			*res = 'g';
2836 			(*pos)++;
2837 			break;
2838 		case '?':
2839 			*res = 'a';
2840 			(*pos)++;
2841 			break;
2842 		default:
2843 			break;
2844 		}
2845 		break;
2846 	case '=':
2847 		if ('=' == v[*pos + 1])
2848 			(*pos)++;
2849 		break;
2850 	default:
2851 		return 0;
2852 	}
2853 	(*pos)++;
2854 
2855 	return *res;
2856 }
2857 
2858 /*
2859  * Evaluate either a parenthesized numeric expression
2860  * or a single signed integer number.
2861  */
2862 static int
roff_evalpar(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2863 roff_evalpar(struct roff *r, int ln,
2864 	const char *v, int *pos, int *res, int flags)
2865 {
2866 
2867 	if ('(' != v[*pos])
2868 		return roff_getnum(v, pos, res, flags);
2869 
2870 	(*pos)++;
2871 	if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2872 		return 0;
2873 
2874 	/*
2875 	 * Omission of the closing parenthesis
2876 	 * is an error in validation mode,
2877 	 * but ignored in evaluation mode.
2878 	 */
2879 
2880 	if (')' == v[*pos])
2881 		(*pos)++;
2882 	else if (NULL == res)
2883 		return 0;
2884 
2885 	return 1;
2886 }
2887 
2888 /*
2889  * Evaluate a complete numeric expression.
2890  * Proceed left to right, there is no concept of precedence.
2891  */
2892 static int
roff_evalnum(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2893 roff_evalnum(struct roff *r, int ln, const char *v,
2894 	int *pos, int *res, int flags)
2895 {
2896 	int		 mypos, operand2;
2897 	char		 operator;
2898 
2899 	if (NULL == pos) {
2900 		mypos = 0;
2901 		pos = &mypos;
2902 	}
2903 
2904 	if (flags & ROFFNUM_WHITE)
2905 		while (isspace((unsigned char)v[*pos]))
2906 			(*pos)++;
2907 
2908 	if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2909 		return 0;
2910 
2911 	while (1) {
2912 		if (flags & ROFFNUM_WHITE)
2913 			while (isspace((unsigned char)v[*pos]))
2914 				(*pos)++;
2915 
2916 		if ( ! roff_getop(v, pos, &operator))
2917 			break;
2918 
2919 		if (flags & ROFFNUM_WHITE)
2920 			while (isspace((unsigned char)v[*pos]))
2921 				(*pos)++;
2922 
2923 		if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2924 			return 0;
2925 
2926 		if (flags & ROFFNUM_WHITE)
2927 			while (isspace((unsigned char)v[*pos]))
2928 				(*pos)++;
2929 
2930 		if (NULL == res)
2931 			continue;
2932 
2933 		switch (operator) {
2934 		case '+':
2935 			*res += operand2;
2936 			break;
2937 		case '-':
2938 			*res -= operand2;
2939 			break;
2940 		case '*':
2941 			*res *= operand2;
2942 			break;
2943 		case '/':
2944 			if (operand2 == 0) {
2945 				mandoc_msg(MANDOCERR_DIVZERO,
2946 					ln, *pos, "%s", v);
2947 				*res = 0;
2948 				break;
2949 			}
2950 			*res /= operand2;
2951 			break;
2952 		case '%':
2953 			if (operand2 == 0) {
2954 				mandoc_msg(MANDOCERR_DIVZERO,
2955 					ln, *pos, "%s", v);
2956 				*res = 0;
2957 				break;
2958 			}
2959 			*res %= operand2;
2960 			break;
2961 		case '<':
2962 			*res = *res < operand2;
2963 			break;
2964 		case '>':
2965 			*res = *res > operand2;
2966 			break;
2967 		case 'l':
2968 			*res = *res <= operand2;
2969 			break;
2970 		case 'g':
2971 			*res = *res >= operand2;
2972 			break;
2973 		case '=':
2974 			*res = *res == operand2;
2975 			break;
2976 		case '!':
2977 			*res = *res != operand2;
2978 			break;
2979 		case '&':
2980 			*res = *res && operand2;
2981 			break;
2982 		case ':':
2983 			*res = *res || operand2;
2984 			break;
2985 		case 'i':
2986 			if (operand2 < *res)
2987 				*res = operand2;
2988 			break;
2989 		case 'a':
2990 			if (operand2 > *res)
2991 				*res = operand2;
2992 			break;
2993 		default:
2994 			abort();
2995 		}
2996 	}
2997 	return 1;
2998 }
2999 
3000 /* --- register management ------------------------------------------------ */
3001 
3002 void
roff_setreg(struct roff * r,const char * name,int val,char sign)3003 roff_setreg(struct roff *r, const char *name, int val, char sign)
3004 {
3005 	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3006 }
3007 
3008 static void
roff_setregn(struct roff * r,const char * name,size_t len,int val,char sign,int step)3009 roff_setregn(struct roff *r, const char *name, size_t len,
3010     int val, char sign, int step)
3011 {
3012 	struct roffreg	*reg;
3013 
3014 	/* Search for an existing register with the same name. */
3015 	reg = r->regtab;
3016 
3017 	while (reg != NULL && (reg->key.sz != len ||
3018 	    strncmp(reg->key.p, name, len) != 0))
3019 		reg = reg->next;
3020 
3021 	if (NULL == reg) {
3022 		/* Create a new register. */
3023 		reg = mandoc_malloc(sizeof(struct roffreg));
3024 		reg->key.p = mandoc_strndup(name, len);
3025 		reg->key.sz = len;
3026 		reg->val = 0;
3027 		reg->step = 0;
3028 		reg->next = r->regtab;
3029 		r->regtab = reg;
3030 	}
3031 
3032 	if ('+' == sign)
3033 		reg->val += val;
3034 	else if ('-' == sign)
3035 		reg->val -= val;
3036 	else
3037 		reg->val = val;
3038 	if (step != INT_MIN)
3039 		reg->step = step;
3040 }
3041 
3042 /*
3043  * Handle some predefined read-only number registers.
3044  * For now, return -1 if the requested register is not predefined;
3045  * in case a predefined read-only register having the value -1
3046  * were to turn up, another special value would have to be chosen.
3047  */
3048 static int
roff_getregro(const struct roff * r,const char * name)3049 roff_getregro(const struct roff *r, const char *name)
3050 {
3051 
3052 	switch (*name) {
3053 	case '$':  /* Number of arguments of the last macro evaluated. */
3054 		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3055 	case 'A':  /* ASCII approximation mode is always off. */
3056 		return 0;
3057 	case 'g':  /* Groff compatibility mode is always on. */
3058 		return 1;
3059 	case 'H':  /* Fixed horizontal resolution. */
3060 		return 24;
3061 	case 'j':  /* Always adjust left margin only. */
3062 		return 0;
3063 	case 'T':  /* Some output device is always defined. */
3064 		return 1;
3065 	case 'V':  /* Fixed vertical resolution. */
3066 		return 40;
3067 	default:
3068 		return -1;
3069 	}
3070 }
3071 
3072 int
roff_getreg(struct roff * r,const char * name)3073 roff_getreg(struct roff *r, const char *name)
3074 {
3075 	return roff_getregn(r, name, strlen(name), '\0');
3076 }
3077 
3078 static int
roff_getregn(struct roff * r,const char * name,size_t len,char sign)3079 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3080 {
3081 	struct roffreg	*reg;
3082 	int		 val;
3083 
3084 	if ('.' == name[0] && 2 == len) {
3085 		val = roff_getregro(r, name + 1);
3086 		if (-1 != val)
3087 			return val;
3088 	}
3089 
3090 	for (reg = r->regtab; reg; reg = reg->next) {
3091 		if (len == reg->key.sz &&
3092 		    0 == strncmp(name, reg->key.p, len)) {
3093 			switch (sign) {
3094 			case '+':
3095 				reg->val += reg->step;
3096 				break;
3097 			case '-':
3098 				reg->val -= reg->step;
3099 				break;
3100 			default:
3101 				break;
3102 			}
3103 			return reg->val;
3104 		}
3105 	}
3106 
3107 	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3108 	return 0;
3109 }
3110 
3111 static int
roff_hasregn(const struct roff * r,const char * name,size_t len)3112 roff_hasregn(const struct roff *r, const char *name, size_t len)
3113 {
3114 	struct roffreg	*reg;
3115 	int		 val;
3116 
3117 	if ('.' == name[0] && 2 == len) {
3118 		val = roff_getregro(r, name + 1);
3119 		if (-1 != val)
3120 			return 1;
3121 	}
3122 
3123 	for (reg = r->regtab; reg; reg = reg->next)
3124 		if (len == reg->key.sz &&
3125 		    0 == strncmp(name, reg->key.p, len))
3126 			return 1;
3127 
3128 	return 0;
3129 }
3130 
3131 static void
roff_freereg(struct roffreg * reg)3132 roff_freereg(struct roffreg *reg)
3133 {
3134 	struct roffreg	*old_reg;
3135 
3136 	while (NULL != reg) {
3137 		free(reg->key.p);
3138 		old_reg = reg;
3139 		reg = reg->next;
3140 		free(old_reg);
3141 	}
3142 }
3143 
3144 static int
roff_nr(ROFF_ARGS)3145 roff_nr(ROFF_ARGS)
3146 {
3147 	char		*key, *val, *step;
3148 	size_t		 keysz;
3149 	int		 iv, is, len;
3150 	char		 sign;
3151 
3152 	key = val = buf->buf + pos;
3153 	if (*key == '\0')
3154 		return ROFF_IGN;
3155 
3156 	keysz = roff_getname(r, &val, ln, pos);
3157 	if (key[keysz] == '\\' || key[keysz] == '\t')
3158 		return ROFF_IGN;
3159 
3160 	sign = *val;
3161 	if (sign == '+' || sign == '-')
3162 		val++;
3163 
3164 	len = 0;
3165 	if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3166 		return ROFF_IGN;
3167 
3168 	step = val + len;
3169 	while (isspace((unsigned char)*step))
3170 		step++;
3171 	if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3172 		is = INT_MIN;
3173 
3174 	roff_setregn(r, key, keysz, iv, sign, is);
3175 	return ROFF_IGN;
3176 }
3177 
3178 static int
roff_rr(ROFF_ARGS)3179 roff_rr(ROFF_ARGS)
3180 {
3181 	struct roffreg	*reg, **prev;
3182 	char		*name, *cp;
3183 	size_t		 namesz;
3184 
3185 	name = cp = buf->buf + pos;
3186 	if (*name == '\0')
3187 		return ROFF_IGN;
3188 	namesz = roff_getname(r, &cp, ln, pos);
3189 	name[namesz] = '\0';
3190 
3191 	prev = &r->regtab;
3192 	while (1) {
3193 		reg = *prev;
3194 		if (reg == NULL || !strcmp(name, reg->key.p))
3195 			break;
3196 		prev = &reg->next;
3197 	}
3198 	if (reg != NULL) {
3199 		*prev = reg->next;
3200 		free(reg->key.p);
3201 		free(reg);
3202 	}
3203 	return ROFF_IGN;
3204 }
3205 
3206 /* --- handler functions for roff requests -------------------------------- */
3207 
3208 static int
roff_rm(ROFF_ARGS)3209 roff_rm(ROFF_ARGS)
3210 {
3211 	const char	 *name;
3212 	char		 *cp;
3213 	size_t		  namesz;
3214 
3215 	cp = buf->buf + pos;
3216 	while (*cp != '\0') {
3217 		name = cp;
3218 		namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3219 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3220 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3221 		if (name[namesz] == '\\' || name[namesz] == '\t')
3222 			break;
3223 	}
3224 	return ROFF_IGN;
3225 }
3226 
3227 static int
roff_it(ROFF_ARGS)3228 roff_it(ROFF_ARGS)
3229 {
3230 	int		 iv;
3231 
3232 	/* Parse the number of lines. */
3233 
3234 	if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3235 		mandoc_msg(MANDOCERR_IT_NONUM,
3236 		    ln, ppos, "%s", buf->buf + 1);
3237 		return ROFF_IGN;
3238 	}
3239 
3240 	while (isspace((unsigned char)buf->buf[pos]))
3241 		pos++;
3242 
3243 	/*
3244 	 * Arm the input line trap.
3245 	 * Special-casing "an-trap" is an ugly workaround to cope
3246 	 * with DocBook stupidly fiddling with man(7) internals.
3247 	 */
3248 
3249 	roffit_lines = iv;
3250 	roffit_macro = mandoc_strdup(iv != 1 ||
3251 	    strcmp(buf->buf + pos, "an-trap") ?
3252 	    buf->buf + pos : "br");
3253 	return ROFF_IGN;
3254 }
3255 
3256 static int
roff_Dd(ROFF_ARGS)3257 roff_Dd(ROFF_ARGS)
3258 {
3259 	int		 mask;
3260 	enum roff_tok	 t, te;
3261 
3262 	switch (tok) {
3263 	case ROFF_Dd:
3264 		tok = MDOC_Dd;
3265 		te = MDOC_MAX;
3266 		if (r->format == 0)
3267 			r->format = MPARSE_MDOC;
3268 		mask = MPARSE_MDOC | MPARSE_QUICK;
3269 		break;
3270 	case ROFF_TH:
3271 		tok = MAN_TH;
3272 		te = MAN_MAX;
3273 		if (r->format == 0)
3274 			r->format = MPARSE_MAN;
3275 		mask = MPARSE_QUICK;
3276 		break;
3277 	default:
3278 		abort();
3279 	}
3280 	if ((r->options & mask) == 0)
3281 		for (t = tok; t < te; t++)
3282 			roff_setstr(r, roff_name[t], NULL, 0);
3283 	return ROFF_CONT;
3284 }
3285 
3286 static int
roff_TE(ROFF_ARGS)3287 roff_TE(ROFF_ARGS)
3288 {
3289 	r->man->flags &= ~ROFF_NONOFILL;
3290 	if (r->tbl == NULL) {
3291 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3292 		return ROFF_IGN;
3293 	}
3294 	if (tbl_end(r->tbl, 0) == 0) {
3295 		r->tbl = NULL;
3296 		free(buf->buf);
3297 		buf->buf = mandoc_strdup(".sp");
3298 		buf->sz = 4;
3299 		*offs = 0;
3300 		return ROFF_REPARSE;
3301 	}
3302 	r->tbl = NULL;
3303 	return ROFF_IGN;
3304 }
3305 
3306 static int
roff_T_(ROFF_ARGS)3307 roff_T_(ROFF_ARGS)
3308 {
3309 
3310 	if (NULL == r->tbl)
3311 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3312 	else
3313 		tbl_restart(ln, ppos, r->tbl);
3314 
3315 	return ROFF_IGN;
3316 }
3317 
3318 /*
3319  * Handle in-line equation delimiters.
3320  */
3321 static int
roff_eqndelim(struct roff * r,struct buf * buf,int pos)3322 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3323 {
3324 	char		*cp1, *cp2;
3325 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3326 
3327 	/*
3328 	 * Outside equations, look for an opening delimiter.
3329 	 * If we are inside an equation, we already know it is
3330 	 * in-line, or this function wouldn't have been called;
3331 	 * so look for a closing delimiter.
3332 	 */
3333 
3334 	cp1 = buf->buf + pos;
3335 	cp2 = strchr(cp1, r->eqn == NULL ?
3336 	    r->last_eqn->odelim : r->last_eqn->cdelim);
3337 	if (cp2 == NULL)
3338 		return ROFF_CONT;
3339 
3340 	*cp2++ = '\0';
3341 	bef_pr = bef_nl = aft_nl = aft_pr = "";
3342 
3343 	/* Handle preceding text, protecting whitespace. */
3344 
3345 	if (*buf->buf != '\0') {
3346 		if (r->eqn == NULL)
3347 			bef_pr = "\\&";
3348 		bef_nl = "\n";
3349 	}
3350 
3351 	/*
3352 	 * Prepare replacing the delimiter with an equation macro
3353 	 * and drop leading white space from the equation.
3354 	 */
3355 
3356 	if (r->eqn == NULL) {
3357 		while (*cp2 == ' ')
3358 			cp2++;
3359 		mac = ".EQ";
3360 	} else
3361 		mac = ".EN";
3362 
3363 	/* Handle following text, protecting whitespace. */
3364 
3365 	if (*cp2 != '\0') {
3366 		aft_nl = "\n";
3367 		if (r->eqn != NULL)
3368 			aft_pr = "\\&";
3369 	}
3370 
3371 	/* Do the actual replacement. */
3372 
3373 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3374 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3375 	free(buf->buf);
3376 	buf->buf = cp1;
3377 
3378 	/* Toggle the in-line state of the eqn subsystem. */
3379 
3380 	r->eqn_inline = r->eqn == NULL;
3381 	return ROFF_REPARSE;
3382 }
3383 
3384 static int
roff_EQ(ROFF_ARGS)3385 roff_EQ(ROFF_ARGS)
3386 {
3387 	struct roff_node	*n;
3388 
3389 	if (r->man->meta.macroset == MACROSET_MAN)
3390 		man_breakscope(r->man, ROFF_EQ);
3391 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3392 	if (ln > r->man->last->line)
3393 		n->flags |= NODE_LINE;
3394 	n->eqn = eqn_box_new();
3395 	roff_node_append(r->man, n);
3396 	r->man->next = ROFF_NEXT_SIBLING;
3397 
3398 	assert(r->eqn == NULL);
3399 	if (r->last_eqn == NULL)
3400 		r->last_eqn = eqn_alloc();
3401 	else
3402 		eqn_reset(r->last_eqn);
3403 	r->eqn = r->last_eqn;
3404 	r->eqn->node = n;
3405 
3406 	if (buf->buf[pos] != '\0')
3407 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3408 		    ".EQ %s", buf->buf + pos);
3409 
3410 	return ROFF_IGN;
3411 }
3412 
3413 static int
roff_EN(ROFF_ARGS)3414 roff_EN(ROFF_ARGS)
3415 {
3416 	if (r->eqn != NULL) {
3417 		eqn_parse(r->eqn);
3418 		r->eqn = NULL;
3419 	} else
3420 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3421 	if (buf->buf[pos] != '\0')
3422 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3423 		    "EN %s", buf->buf + pos);
3424 	return ROFF_IGN;
3425 }
3426 
3427 static int
roff_TS(ROFF_ARGS)3428 roff_TS(ROFF_ARGS)
3429 {
3430 	if (r->tbl != NULL) {
3431 		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3432 		tbl_end(r->tbl, 0);
3433 	}
3434 	r->man->flags |= ROFF_NONOFILL;
3435 	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3436 	if (r->last_tbl == NULL)
3437 		r->first_tbl = r->tbl;
3438 	r->last_tbl = r->tbl;
3439 	return ROFF_IGN;
3440 }
3441 
3442 static int
roff_noarg(ROFF_ARGS)3443 roff_noarg(ROFF_ARGS)
3444 {
3445 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3446 		man_breakscope(r->man, tok);
3447 	if (tok == ROFF_brp)
3448 		tok = ROFF_br;
3449 	roff_elem_alloc(r->man, ln, ppos, tok);
3450 	if (buf->buf[pos] != '\0')
3451 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3452 		   "%s %s", roff_name[tok], buf->buf + pos);
3453 	if (tok == ROFF_nf)
3454 		r->man->flags |= ROFF_NOFILL;
3455 	else if (tok == ROFF_fi)
3456 		r->man->flags &= ~ROFF_NOFILL;
3457 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3458 	r->man->next = ROFF_NEXT_SIBLING;
3459 	return ROFF_IGN;
3460 }
3461 
3462 static int
roff_onearg(ROFF_ARGS)3463 roff_onearg(ROFF_ARGS)
3464 {
3465 	struct roff_node	*n;
3466 	char			*cp;
3467 	int			 npos;
3468 
3469 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3470 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3471 	     tok == ROFF_ti))
3472 		man_breakscope(r->man, tok);
3473 
3474 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3475 		r->man->last = roffce_node;
3476 		r->man->next = ROFF_NEXT_SIBLING;
3477 	}
3478 
3479 	roff_elem_alloc(r->man, ln, ppos, tok);
3480 	n = r->man->last;
3481 
3482 	cp = buf->buf + pos;
3483 	if (*cp != '\0') {
3484 		while (*cp != '\0' && *cp != ' ')
3485 			cp++;
3486 		while (*cp == ' ')
3487 			*cp++ = '\0';
3488 		if (*cp != '\0')
3489 			mandoc_msg(MANDOCERR_ARG_EXCESS,
3490 			    ln, (int)(cp - buf->buf),
3491 			    "%s ... %s", roff_name[tok], cp);
3492 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3493 	}
3494 
3495 	if (tok == ROFF_ce || tok == ROFF_rj) {
3496 		if (r->man->last->type == ROFFT_ELEM) {
3497 			roff_word_alloc(r->man, ln, pos, "1");
3498 			r->man->last->flags |= NODE_NOSRC;
3499 		}
3500 		npos = 0;
3501 		if (roff_evalnum(r, ln, r->man->last->string, &npos,
3502 		    &roffce_lines, 0) == 0) {
3503 			mandoc_msg(MANDOCERR_CE_NONUM,
3504 			    ln, pos, "ce %s", buf->buf + pos);
3505 			roffce_lines = 1;
3506 		}
3507 		if (roffce_lines < 1) {
3508 			r->man->last = r->man->last->parent;
3509 			roffce_node = NULL;
3510 			roffce_lines = 0;
3511 		} else
3512 			roffce_node = r->man->last->parent;
3513 	} else {
3514 		n->flags |= NODE_VALID | NODE_ENDED;
3515 		r->man->last = n;
3516 	}
3517 	n->flags |= NODE_LINE;
3518 	r->man->next = ROFF_NEXT_SIBLING;
3519 	return ROFF_IGN;
3520 }
3521 
3522 static int
roff_manyarg(ROFF_ARGS)3523 roff_manyarg(ROFF_ARGS)
3524 {
3525 	struct roff_node	*n;
3526 	char			*sp, *ep;
3527 
3528 	roff_elem_alloc(r->man, ln, ppos, tok);
3529 	n = r->man->last;
3530 
3531 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3532 		while (*ep != '\0' && *ep != ' ')
3533 			ep++;
3534 		while (*ep == ' ')
3535 			*ep++ = '\0';
3536 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3537 	}
3538 
3539 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3540 	r->man->last = n;
3541 	r->man->next = ROFF_NEXT_SIBLING;
3542 	return ROFF_IGN;
3543 }
3544 
3545 static int
roff_als(ROFF_ARGS)3546 roff_als(ROFF_ARGS)
3547 {
3548 	char		*oldn, *newn, *end, *value;
3549 	size_t		 oldsz, newsz, valsz;
3550 
3551 	newn = oldn = buf->buf + pos;
3552 	if (*newn == '\0')
3553 		return ROFF_IGN;
3554 
3555 	newsz = roff_getname(r, &oldn, ln, pos);
3556 	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3557 		return ROFF_IGN;
3558 
3559 	end = oldn;
3560 	oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3561 	if (oldsz == 0)
3562 		return ROFF_IGN;
3563 
3564 	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3565 	    (int)oldsz, oldn);
3566 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3567 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3568 	free(value);
3569 	return ROFF_IGN;
3570 }
3571 
3572 /*
3573  * The .break request only makes sense inside conditionals,
3574  * and that case is already handled in roff_cond_sub().
3575  */
3576 static int
roff_break(ROFF_ARGS)3577 roff_break(ROFF_ARGS)
3578 {
3579 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3580 	return ROFF_IGN;
3581 }
3582 
3583 static int
roff_cc(ROFF_ARGS)3584 roff_cc(ROFF_ARGS)
3585 {
3586 	const char	*p;
3587 
3588 	p = buf->buf + pos;
3589 
3590 	if (*p == '\0' || (r->control = *p++) == '.')
3591 		r->control = '\0';
3592 
3593 	if (*p != '\0')
3594 		mandoc_msg(MANDOCERR_ARG_EXCESS,
3595 		    ln, p - buf->buf, "cc ... %s", p);
3596 
3597 	return ROFF_IGN;
3598 }
3599 
3600 static int
roff_char(ROFF_ARGS)3601 roff_char(ROFF_ARGS)
3602 {
3603 	const char	*p, *kp, *vp;
3604 	size_t		 ksz, vsz;
3605 	int		 font;
3606 
3607 	/* Parse the character to be replaced. */
3608 
3609 	kp = buf->buf + pos;
3610 	p = kp + 1;
3611 	if (*kp == '\0' || (*kp == '\\' &&
3612 	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3613 	    (*p != ' ' && *p != '\0')) {
3614 		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3615 		return ROFF_IGN;
3616 	}
3617 	ksz = p - kp;
3618 	while (*p == ' ')
3619 		p++;
3620 
3621 	/*
3622 	 * If the replacement string contains a font escape sequence,
3623 	 * we have to restore the font at the end.
3624 	 */
3625 
3626 	vp = p;
3627 	vsz = strlen(p);
3628 	font = 0;
3629 	while (*p != '\0') {
3630 		if (*p++ != '\\')
3631 			continue;
3632 		switch (mandoc_escape(&p, NULL, NULL)) {
3633 		case ESCAPE_FONT:
3634 		case ESCAPE_FONTROMAN:
3635 		case ESCAPE_FONTITALIC:
3636 		case ESCAPE_FONTBOLD:
3637 		case ESCAPE_FONTBI:
3638 		case ESCAPE_FONTCR:
3639 		case ESCAPE_FONTCB:
3640 		case ESCAPE_FONTCI:
3641 		case ESCAPE_FONTPREV:
3642 			font++;
3643 			break;
3644 		default:
3645 			break;
3646 		}
3647 	}
3648 	if (font > 1)
3649 		mandoc_msg(MANDOCERR_CHAR_FONT,
3650 		    ln, (int)(vp - buf->buf), "%s", vp);
3651 
3652 	/*
3653 	 * Approximate the effect of .char using the .tr tables.
3654 	 * XXX In groff, .char and .tr interact differently.
3655 	 */
3656 
3657 	if (ksz == 1) {
3658 		if (r->xtab == NULL)
3659 			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3660 		assert((unsigned int)*kp < 128);
3661 		free(r->xtab[(int)*kp].p);
3662 		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3663 		    "%s%s", vp, font ? "\fP" : "");
3664 	} else {
3665 		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3666 		if (font)
3667 			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3668 	}
3669 	return ROFF_IGN;
3670 }
3671 
3672 static int
roff_ec(ROFF_ARGS)3673 roff_ec(ROFF_ARGS)
3674 {
3675 	const char	*p;
3676 
3677 	p = buf->buf + pos;
3678 	if (*p == '\0')
3679 		r->escape = '\\';
3680 	else {
3681 		r->escape = *p;
3682 		if (*++p != '\0')
3683 			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3684 			    (int)(p - buf->buf), "ec ... %s", p);
3685 	}
3686 	return ROFF_IGN;
3687 }
3688 
3689 static int
roff_eo(ROFF_ARGS)3690 roff_eo(ROFF_ARGS)
3691 {
3692 	r->escape = '\0';
3693 	if (buf->buf[pos] != '\0')
3694 		mandoc_msg(MANDOCERR_ARG_SKIP,
3695 		    ln, pos, "eo %s", buf->buf + pos);
3696 	return ROFF_IGN;
3697 }
3698 
3699 static int
roff_mc(ROFF_ARGS)3700 roff_mc(ROFF_ARGS)
3701 {
3702 	struct roff_node	*n;
3703 	char			*cp;
3704 
3705 	/* Parse the first argument. */
3706 
3707 	cp = buf->buf + pos;
3708 	if (*cp != '\0')
3709 		cp++;
3710 	if (buf->buf[pos] == '\\') {
3711 		switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3712 		case ESCAPE_SPECIAL:
3713 		case ESCAPE_UNICODE:
3714 		case ESCAPE_NUMBERED:
3715 			break;
3716 		default:
3717 			*cp = '\0';
3718 			mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3719 			    "mc %s", buf->buf + pos);
3720 			buf->buf[pos] = '\0';
3721 			break;
3722 		}
3723 	}
3724 
3725 	/* Ignore additional arguments. */
3726 
3727 	while (*cp == ' ')
3728 		*cp++ = '\0';
3729 	if (*cp != '\0') {
3730 		mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3731 		    "mc ... %s", cp);
3732 		*cp = '\0';
3733 	}
3734 
3735 	/* Create the .mc node. */
3736 
3737 	roff_elem_alloc(r->man, ln, ppos, tok);
3738 	n = r->man->last;
3739 	if (buf->buf[pos] != '\0')
3740 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3741 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3742 	r->man->last = n;
3743 	r->man->next = ROFF_NEXT_SIBLING;
3744 	return ROFF_IGN;
3745 }
3746 
3747 static int
roff_nop(ROFF_ARGS)3748 roff_nop(ROFF_ARGS)
3749 {
3750 	while (buf->buf[pos] == ' ')
3751 		pos++;
3752 	*offs = pos;
3753 	return ROFF_RERUN;
3754 }
3755 
3756 static int
roff_tr(ROFF_ARGS)3757 roff_tr(ROFF_ARGS)
3758 {
3759 	const char	*p, *first, *second;
3760 	size_t		 fsz, ssz;
3761 
3762 	p = buf->buf + pos;
3763 
3764 	if (*p == '\0') {
3765 		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3766 		return ROFF_IGN;
3767 	}
3768 
3769 	while (*p != '\0') {
3770 		fsz = ssz = 1;
3771 
3772 		first = p++;
3773 		if (*first == '\\') {
3774 			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3775 				return ROFF_IGN;
3776 			fsz = (size_t)(p - first);
3777 		}
3778 
3779 		second = p++;
3780 		if (*second == '\\') {
3781 			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3782 				return ROFF_IGN;
3783 			ssz = (size_t)(p - second);
3784 		} else if (*second == '\0') {
3785 			mandoc_msg(MANDOCERR_TR_ODD, ln,
3786 			    (int)(first - buf->buf), "tr %s", first);
3787 			second = " ";
3788 			p--;
3789 		}
3790 
3791 		if (fsz > 1) {
3792 			roff_setstrn(&r->xmbtab, first, fsz,
3793 			    second, ssz, 0);
3794 			continue;
3795 		}
3796 
3797 		if (r->xtab == NULL)
3798 			r->xtab = mandoc_calloc(128,
3799 			    sizeof(struct roffstr));
3800 
3801 		free(r->xtab[(int)*first].p);
3802 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3803 		r->xtab[(int)*first].sz = ssz;
3804 	}
3805 
3806 	return ROFF_IGN;
3807 }
3808 
3809 /*
3810  * Implementation of the .return request.
3811  * There is no need to call roff_userret() from here.
3812  * The read module will call that after rewinding the reader stack
3813  * to the place from where the current macro was called.
3814  */
3815 static int
roff_return(ROFF_ARGS)3816 roff_return(ROFF_ARGS)
3817 {
3818 	if (r->mstackpos >= 0)
3819 		return ROFF_IGN | ROFF_USERRET;
3820 
3821 	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3822 	return ROFF_IGN;
3823 }
3824 
3825 static int
roff_rn(ROFF_ARGS)3826 roff_rn(ROFF_ARGS)
3827 {
3828 	const char	*value;
3829 	char		*oldn, *newn, *end;
3830 	size_t		 oldsz, newsz;
3831 	int		 deftype;
3832 
3833 	oldn = newn = buf->buf + pos;
3834 	if (*oldn == '\0')
3835 		return ROFF_IGN;
3836 
3837 	oldsz = roff_getname(r, &newn, ln, pos);
3838 	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3839 		return ROFF_IGN;
3840 
3841 	end = newn;
3842 	newsz = roff_getname(r, &end, ln, newn - buf->buf);
3843 	if (newsz == 0)
3844 		return ROFF_IGN;
3845 
3846 	deftype = ROFFDEF_ANY;
3847 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3848 	switch (deftype) {
3849 	case ROFFDEF_USER:
3850 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3851 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3852 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3853 		break;
3854 	case ROFFDEF_PRE:
3855 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3856 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3857 		break;
3858 	case ROFFDEF_REN:
3859 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3860 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3861 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3862 		break;
3863 	case ROFFDEF_STD:
3864 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3865 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3866 		break;
3867 	default:
3868 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3869 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3870 		break;
3871 	}
3872 	return ROFF_IGN;
3873 }
3874 
3875 static int
roff_shift(ROFF_ARGS)3876 roff_shift(ROFF_ARGS)
3877 {
3878 	struct mctx	*ctx;
3879 	int		 argpos, levels, i;
3880 
3881 	argpos = pos;
3882 	levels = 1;
3883 	if (buf->buf[pos] != '\0' &&
3884 	    roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3885 		mandoc_msg(MANDOCERR_CE_NONUM,
3886 		    ln, pos, "shift %s", buf->buf + pos);
3887 		levels = 1;
3888 	}
3889 	if (r->mstackpos < 0) {
3890 		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3891 		return ROFF_IGN;
3892 	}
3893 	ctx = r->mstack + r->mstackpos;
3894 	if (levels > ctx->argc) {
3895 		mandoc_msg(MANDOCERR_SHIFT,
3896 		    ln, argpos, "%d, but max is %d", levels, ctx->argc);
3897 		levels = ctx->argc;
3898 	}
3899 	if (levels < 0) {
3900 		mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3901 		levels = 0;
3902 	}
3903 	if (levels == 0)
3904 		return ROFF_IGN;
3905 	for (i = 0; i < levels; i++)
3906 		free(ctx->argv[i]);
3907 	ctx->argc -= levels;
3908 	for (i = 0; i < ctx->argc; i++)
3909 		ctx->argv[i] = ctx->argv[i + levels];
3910 	return ROFF_IGN;
3911 }
3912 
3913 static int
roff_so(ROFF_ARGS)3914 roff_so(ROFF_ARGS)
3915 {
3916 	char *name, *cp;
3917 
3918 	name = buf->buf + pos;
3919 	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3920 
3921 	/*
3922 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3923 	 * opening anything that's not in our cwd or anything beneath
3924 	 * it.  Thus, explicitly disallow traversing up the file-system
3925 	 * or using absolute paths.
3926 	 */
3927 
3928 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3929 		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3930 		buf->sz = mandoc_asprintf(&cp,
3931 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3932 		free(buf->buf);
3933 		buf->buf = cp;
3934 		*offs = 0;
3935 		return ROFF_REPARSE;
3936 	}
3937 
3938 	*offs = pos;
3939 	return ROFF_SO;
3940 }
3941 
3942 /* --- user defined strings and macros ------------------------------------ */
3943 
3944 static int
roff_userdef(ROFF_ARGS)3945 roff_userdef(ROFF_ARGS)
3946 {
3947 	struct mctx	 *ctx;
3948 	char		 *arg, *ap, *dst, *src;
3949 	size_t		  sz;
3950 
3951 	/* If the macro is empty, ignore it altogether. */
3952 
3953 	if (*r->current_string == '\0')
3954 		return ROFF_IGN;
3955 
3956 	/* Initialize a new macro stack context. */
3957 
3958 	if (++r->mstackpos == r->mstacksz) {
3959 		r->mstack = mandoc_recallocarray(r->mstack,
3960 		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3961 		r->mstacksz += 8;
3962 	}
3963 	ctx = r->mstack + r->mstackpos;
3964 	ctx->argc = 0;
3965 
3966 	/*
3967 	 * Collect pointers to macro argument strings,
3968 	 * NUL-terminating them and escaping quotes.
3969 	 */
3970 
3971 	src = buf->buf + pos;
3972 	while (*src != '\0') {
3973 		if (ctx->argc == ctx->argsz) {
3974 			ctx->argsz += 8;
3975 			ctx->argv = mandoc_reallocarray(ctx->argv,
3976 			    ctx->argsz, sizeof(*ctx->argv));
3977 		}
3978 		arg = roff_getarg(r, &src, ln, &pos);
3979 		sz = 1;  /* For the terminating NUL. */
3980 		for (ap = arg; *ap != '\0'; ap++)
3981 			sz += *ap == '"' ? 4 : 1;
3982 		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3983 		for (ap = arg; *ap != '\0'; ap++) {
3984 			if (*ap == '"') {
3985 				memcpy(dst, "\\(dq", 4);
3986 				dst += 4;
3987 			} else
3988 				*dst++ = *ap;
3989 		}
3990 		*dst = '\0';
3991 		free(arg);
3992 	}
3993 
3994 	/* Replace the macro invocation by the macro definition. */
3995 
3996 	free(buf->buf);
3997 	buf->buf = mandoc_strdup(r->current_string);
3998 	buf->sz = strlen(buf->buf) + 1;
3999 	*offs = 0;
4000 
4001 	return buf->buf[buf->sz - 2] == '\n' ?
4002 	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4003 }
4004 
4005 /*
4006  * Calling a high-level macro that was renamed with .rn.
4007  * r->current_string has already been set up by roff_parse().
4008  */
4009 static int
roff_renamed(ROFF_ARGS)4010 roff_renamed(ROFF_ARGS)
4011 {
4012 	char	*nbuf;
4013 
4014 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4015 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4016 	free(buf->buf);
4017 	buf->buf = nbuf;
4018 	*offs = 0;
4019 	return ROFF_CONT;
4020 }
4021 
4022 /*
4023  * Measure the length in bytes of the roff identifier at *cpp
4024  * and advance the pointer to the next word.
4025  */
4026 static size_t
roff_getname(struct roff * r,char ** cpp,int ln,int pos)4027 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4028 {
4029 	char	 *name, *cp;
4030 	int	  namesz, inam, iend;
4031 
4032 	name = *cpp;
4033 	if (*name == '\0')
4034 		return 0;
4035 
4036 	/* Advance cp to the byte after the end of the name. */
4037 
4038 	cp = name;
4039 	namesz = 0;
4040 	for (;;) {
4041 		if (*cp == '\0')
4042 			break;
4043 		if (*cp == ' ' || *cp == '\t') {
4044 			cp++;
4045 			break;
4046 		}
4047 		if (*cp != '\\') {
4048 			if (name + namesz < cp) {
4049 				name[namesz] = *cp;
4050 				*cp = ' ';
4051 			}
4052 			namesz++;
4053 			cp++;
4054 			continue;
4055 		}
4056 		if (cp[1] == '{' || cp[1] == '}')
4057 			break;
4058 		if (roff_escape(cp, 0, 0, NULL, &inam,
4059 		    NULL, NULL, &iend) != ESCAPE_UNDEF) {
4060 			mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4061 			    "%.*s%.*s", namesz, name, iend, cp);
4062 			cp += iend;
4063 			break;
4064 		}
4065 
4066 		/*
4067 		 * In an identifier, \\, \., \G and so on
4068 		 * are reduced to \, ., G and so on,
4069 		 * vaguely similar to copy mode.
4070 		 */
4071 
4072 		name[namesz++] = cp[inam];
4073 		while (iend--) {
4074 			if (cp >= name + namesz)
4075 				*cp = ' ';
4076 			cp++;
4077 		}
4078 	}
4079 
4080 	/* Read past spaces. */
4081 
4082 	while (*cp == ' ')
4083 		cp++;
4084 
4085 	*cpp = cp;
4086 	return namesz;
4087 }
4088 
4089 /*
4090  * Store *string into the user-defined string called *name.
4091  * To clear an existing entry, call with (*r, *name, NULL, 0).
4092  * append == 0: replace mode
4093  * append == 1: single-line append mode
4094  * append == 2: multiline append mode, append '\n' after each call
4095  */
4096 static void
roff_setstr(struct roff * r,const char * name,const char * string,int append)4097 roff_setstr(struct roff *r, const char *name, const char *string,
4098 	int append)
4099 {
4100 	size_t	 namesz;
4101 
4102 	namesz = strlen(name);
4103 	roff_setstrn(&r->strtab, name, namesz, string,
4104 	    string ? strlen(string) : 0, append);
4105 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4106 }
4107 
4108 static void
roff_setstrn(struct roffkv ** r,const char * name,size_t namesz,const char * string,size_t stringsz,int append)4109 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4110 		const char *string, size_t stringsz, int append)
4111 {
4112 	struct roffkv	*n;
4113 	char		*c;
4114 	int		 i;
4115 	size_t		 oldch, newch;
4116 
4117 	/* Search for an existing string with the same name. */
4118 	n = *r;
4119 
4120 	while (n && (namesz != n->key.sz ||
4121 			strncmp(n->key.p, name, namesz)))
4122 		n = n->next;
4123 
4124 	if (NULL == n) {
4125 		/* Create a new string table entry. */
4126 		n = mandoc_malloc(sizeof(struct roffkv));
4127 		n->key.p = mandoc_strndup(name, namesz);
4128 		n->key.sz = namesz;
4129 		n->val.p = NULL;
4130 		n->val.sz = 0;
4131 		n->next = *r;
4132 		*r = n;
4133 	} else if (0 == append) {
4134 		free(n->val.p);
4135 		n->val.p = NULL;
4136 		n->val.sz = 0;
4137 	}
4138 
4139 	if (NULL == string)
4140 		return;
4141 
4142 	/*
4143 	 * One additional byte for the '\n' in multiline mode,
4144 	 * and one for the terminating '\0'.
4145 	 */
4146 	newch = stringsz + (1 < append ? 2u : 1u);
4147 
4148 	if (NULL == n->val.p) {
4149 		n->val.p = mandoc_malloc(newch);
4150 		*n->val.p = '\0';
4151 		oldch = 0;
4152 	} else {
4153 		oldch = n->val.sz;
4154 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4155 	}
4156 
4157 	/* Skip existing content in the destination buffer. */
4158 	c = n->val.p + (int)oldch;
4159 
4160 	/* Append new content to the destination buffer. */
4161 	i = 0;
4162 	while (i < (int)stringsz) {
4163 		/*
4164 		 * Rudimentary roff copy mode:
4165 		 * Handle escaped backslashes.
4166 		 */
4167 		if ('\\' == string[i] && '\\' == string[i + 1])
4168 			i++;
4169 		*c++ = string[i++];
4170 	}
4171 
4172 	/* Append terminating bytes. */
4173 	if (1 < append)
4174 		*c++ = '\n';
4175 
4176 	*c = '\0';
4177 	n->val.sz = (int)(c - n->val.p);
4178 }
4179 
4180 static const char *
roff_getstrn(struct roff * r,const char * name,size_t len,int * deftype)4181 roff_getstrn(struct roff *r, const char *name, size_t len,
4182     int *deftype)
4183 {
4184 	const struct roffkv	*n;
4185 	int			 found, i;
4186 	enum roff_tok		 tok;
4187 
4188 	found = 0;
4189 	for (n = r->strtab; n != NULL; n = n->next) {
4190 		if (strncmp(name, n->key.p, len) != 0 ||
4191 		    n->key.p[len] != '\0' || n->val.p == NULL)
4192 			continue;
4193 		if (*deftype & ROFFDEF_USER) {
4194 			*deftype = ROFFDEF_USER;
4195 			return n->val.p;
4196 		} else {
4197 			found = 1;
4198 			break;
4199 		}
4200 	}
4201 	for (n = r->rentab; n != NULL; n = n->next) {
4202 		if (strncmp(name, n->key.p, len) != 0 ||
4203 		    n->key.p[len] != '\0' || n->val.p == NULL)
4204 			continue;
4205 		if (*deftype & ROFFDEF_REN) {
4206 			*deftype = ROFFDEF_REN;
4207 			return n->val.p;
4208 		} else {
4209 			found = 1;
4210 			break;
4211 		}
4212 	}
4213 	for (i = 0; i < PREDEFS_MAX; i++) {
4214 		if (strncmp(name, predefs[i].name, len) != 0 ||
4215 		    predefs[i].name[len] != '\0')
4216 			continue;
4217 		if (*deftype & ROFFDEF_PRE) {
4218 			*deftype = ROFFDEF_PRE;
4219 			return predefs[i].str;
4220 		} else {
4221 			found = 1;
4222 			break;
4223 		}
4224 	}
4225 	if (r->man->meta.macroset != MACROSET_MAN) {
4226 		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4227 			if (strncmp(name, roff_name[tok], len) != 0 ||
4228 			    roff_name[tok][len] != '\0')
4229 				continue;
4230 			if (*deftype & ROFFDEF_STD) {
4231 				*deftype = ROFFDEF_STD;
4232 				return NULL;
4233 			} else {
4234 				found = 1;
4235 				break;
4236 			}
4237 		}
4238 	}
4239 	if (r->man->meta.macroset != MACROSET_MDOC) {
4240 		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4241 			if (strncmp(name, roff_name[tok], len) != 0 ||
4242 			    roff_name[tok][len] != '\0')
4243 				continue;
4244 			if (*deftype & ROFFDEF_STD) {
4245 				*deftype = ROFFDEF_STD;
4246 				return NULL;
4247 			} else {
4248 				found = 1;
4249 				break;
4250 			}
4251 		}
4252 	}
4253 
4254 	if (found == 0 && *deftype != ROFFDEF_ANY) {
4255 		if (*deftype & ROFFDEF_REN) {
4256 			/*
4257 			 * This might still be a request,
4258 			 * so do not treat it as undefined yet.
4259 			 */
4260 			*deftype = ROFFDEF_UNDEF;
4261 			return NULL;
4262 		}
4263 
4264 		/* Using an undefined string defines it to be empty. */
4265 
4266 		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4267 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4268 	}
4269 
4270 	*deftype = 0;
4271 	return NULL;
4272 }
4273 
4274 static void
roff_freestr(struct roffkv * r)4275 roff_freestr(struct roffkv *r)
4276 {
4277 	struct roffkv	 *n, *nn;
4278 
4279 	for (n = r; n; n = nn) {
4280 		free(n->key.p);
4281 		free(n->val.p);
4282 		nn = n->next;
4283 		free(n);
4284 	}
4285 }
4286 
4287 /* --- accessors and utility functions ------------------------------------ */
4288 
4289 /*
4290  * Duplicate an input string, making the appropriate character
4291  * conversations (as stipulated by `tr') along the way.
4292  * Returns a heap-allocated string with all the replacements made.
4293  */
4294 char *
roff_strdup(const struct roff * r,const char * p)4295 roff_strdup(const struct roff *r, const char *p)
4296 {
4297 	const struct roffkv *cp;
4298 	char		*res;
4299 	const char	*pp;
4300 	size_t		 ssz, sz;
4301 	enum mandoc_esc	 esc;
4302 
4303 	if (NULL == r->xmbtab && NULL == r->xtab)
4304 		return mandoc_strdup(p);
4305 	else if ('\0' == *p)
4306 		return mandoc_strdup("");
4307 
4308 	/*
4309 	 * Step through each character looking for term matches
4310 	 * (remember that a `tr' can be invoked with an escape, which is
4311 	 * a glyph but the escape is multi-character).
4312 	 * We only do this if the character hash has been initialised
4313 	 * and the string is >0 length.
4314 	 */
4315 
4316 	res = NULL;
4317 	ssz = 0;
4318 
4319 	while ('\0' != *p) {
4320 		assert((unsigned int)*p < 128);
4321 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4322 			sz = r->xtab[(int)*p].sz;
4323 			res = mandoc_realloc(res, ssz + sz + 1);
4324 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4325 			ssz += sz;
4326 			p++;
4327 			continue;
4328 		} else if ('\\' != *p) {
4329 			res = mandoc_realloc(res, ssz + 2);
4330 			res[ssz++] = *p++;
4331 			continue;
4332 		}
4333 
4334 		/* Search for term matches. */
4335 		for (cp = r->xmbtab; cp; cp = cp->next)
4336 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4337 				break;
4338 
4339 		if (NULL != cp) {
4340 			/*
4341 			 * A match has been found.
4342 			 * Append the match to the array and move
4343 			 * forward by its keysize.
4344 			 */
4345 			res = mandoc_realloc(res,
4346 			    ssz + cp->val.sz + 1);
4347 			memcpy(res + ssz, cp->val.p, cp->val.sz);
4348 			ssz += cp->val.sz;
4349 			p += (int)cp->key.sz;
4350 			continue;
4351 		}
4352 
4353 		/*
4354 		 * Handle escapes carefully: we need to copy
4355 		 * over just the escape itself, or else we might
4356 		 * do replacements within the escape itself.
4357 		 * Make sure to pass along the bogus string.
4358 		 */
4359 		pp = p++;
4360 		esc = mandoc_escape(&p, NULL, NULL);
4361 		if (ESCAPE_ERROR == esc) {
4362 			sz = strlen(pp);
4363 			res = mandoc_realloc(res, ssz + sz + 1);
4364 			memcpy(res + ssz, pp, sz);
4365 			break;
4366 		}
4367 		/*
4368 		 * We bail out on bad escapes.
4369 		 * No need to warn: we already did so when
4370 		 * roff_expand() was called.
4371 		 */
4372 		sz = (int)(p - pp);
4373 		res = mandoc_realloc(res, ssz + sz + 1);
4374 		memcpy(res + ssz, pp, sz);
4375 		ssz += sz;
4376 	}
4377 
4378 	res[(int)ssz] = '\0';
4379 	return res;
4380 }
4381 
4382 int
roff_getformat(const struct roff * r)4383 roff_getformat(const struct roff *r)
4384 {
4385 
4386 	return r->format;
4387 }
4388 
4389 /*
4390  * Find out whether a line is a macro line or not.
4391  * If it is, adjust the current position and return one; if it isn't,
4392  * return zero and don't change the current position.
4393  * If the control character has been set with `.cc', then let that grain
4394  * precedence.
4395  * This is slightly contrary to groff, where using the non-breaking
4396  * control character when `cc' has been invoked will cause the
4397  * non-breaking macro contents to be printed verbatim.
4398  */
4399 int
roff_getcontrol(const struct roff * r,const char * cp,int * ppos)4400 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4401 {
4402 	int		pos;
4403 
4404 	pos = *ppos;
4405 
4406 	if (r->control != '\0' && cp[pos] == r->control)
4407 		pos++;
4408 	else if (r->control != '\0')
4409 		return 0;
4410 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4411 		pos += 2;
4412 	else if ('.' == cp[pos] || '\'' == cp[pos])
4413 		pos++;
4414 	else
4415 		return 0;
4416 
4417 	while (' ' == cp[pos] || '\t' == cp[pos])
4418 		pos++;
4419 
4420 	*ppos = pos;
4421 	return 1;
4422 }
4423