xref: /openbsd/usr.bin/mandoc/roff.c (revision 7c5f3f4e)
1 /* $OpenBSD: roff.c,v 1.276 2025/01/06 18:48:13 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010-2015, 2017-2025 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the roff(7) parser for mandoc(1).
19  */
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40 
41 /* Maximum number of string expansions per line, to break infinite loops. */
42 #define	EXPAND_LIMIT	1000
43 
44 /* Types of definitions of macros and strings. */
45 #define	ROFFDEF_USER	(1 << 1)  /* User-defined. */
46 #define	ROFFDEF_PRE	(1 << 2)  /* Predefined. */
47 #define	ROFFDEF_REN	(1 << 3)  /* Renamed standard macro. */
48 #define	ROFFDEF_STD	(1 << 4)  /* mdoc(7) or man(7) macro. */
49 #define	ROFFDEF_ANY	(ROFFDEF_USER | ROFFDEF_PRE | \
50 			 ROFFDEF_REN | ROFFDEF_STD)
51 #define	ROFFDEF_UNDEF	(1 << 5)  /* Completely undefined. */
52 
53 /* --- data types --------------------------------------------------------- */
54 
55 /*
56  * An incredibly-simple string buffer.
57  */
58 struct	roffstr {
59 	char		*p; /* nil-terminated buffer */
60 	size_t		 sz; /* saved strlen(p) */
61 };
62 
63 /*
64  * A key-value roffstr pair as part of a singly-linked list.
65  */
66 struct	roffkv {
67 	struct roffstr	 key;
68 	struct roffstr	 val;
69 	struct roffkv	*next; /* next in list */
70 };
71 
72 /*
73  * A single number register as part of a singly-linked list.
74  */
75 struct	roffreg {
76 	struct roffstr	 key;
77 	int		 val;
78 	int		 step;
79 	struct roffreg	*next;
80 };
81 
82 /*
83  * Association of request and macro names with token IDs.
84  */
85 struct	roffreq {
86 	enum roff_tok	 tok;
87 	char		 name[];
88 };
89 
90 /*
91  * A macro processing context.
92  * More than one is needed when macro calls are nested.
93  */
94 struct	mctx {
95 	char		**argv;
96 	int		 argc;
97 	int		 argsz;
98 };
99 
100 struct	roff {
101 	struct roff_man	*man; /* mdoc or man parser */
102 	struct roffnode	*last; /* leaf of stack */
103 	struct mctx	*mstack; /* stack of macro contexts */
104 	int		*rstack; /* stack of inverted `ie' values */
105 	struct ohash	*reqtab; /* request lookup table */
106 	struct roffreg	*regtab; /* number registers */
107 	struct roffkv	*strtab; /* user-defined strings & macros */
108 	struct roffkv	*rentab; /* renamed strings & macros */
109 	struct roffkv	*xmbtab; /* multi-byte trans table (`tr') */
110 	struct roffstr	*xtab; /* single-byte trans table (`tr') */
111 	const char	*current_string; /* value of last called user macro */
112 	struct tbl_node	*first_tbl; /* first table parsed */
113 	struct tbl_node	*last_tbl; /* last table parsed */
114 	struct tbl_node	*tbl; /* current table being parsed */
115 	struct eqn_node	*last_eqn; /* equation parser */
116 	struct eqn_node	*eqn; /* active equation parser */
117 	int		 eqn_inline; /* current equation is inline */
118 	int		 options; /* parse options */
119 	int		 mstacksz; /* current size of mstack */
120 	int		 mstackpos; /* position in mstack */
121 	int		 rstacksz; /* current size limit of rstack */
122 	int		 rstackpos; /* position in rstack */
123 	int		 format; /* current file in mdoc or man format */
124 	char		 control; /* control character */
125 	char		 escape; /* escape character */
126 };
127 
128 /*
129  * A macro definition, condition, or ignored block.
130  */
131 struct	roffnode {
132 	enum roff_tok	 tok; /* type of node */
133 	struct roffnode	*parent; /* up one in stack */
134 	int		 line; /* parse line */
135 	int		 col; /* parse col */
136 	char		*name; /* node name, e.g. macro name */
137 	char		*end; /* custom end macro of the block */
138 	int		 endspan; /* scope to: 1=eol 2=next line -1=\} */
139 	int		 rule; /* content is: 1=evaluated 0=skipped */
140 };
141 
142 #define	ROFF_ARGS	 struct roff *r, /* parse ctx */ \
143 			 enum roff_tok tok, /* tok of macro */ \
144 			 struct buf *buf, /* input buffer */ \
145 			 int ln, /* parse line */ \
146 			 int ppos, /* original pos in buffer */ \
147 			 int pos, /* current pos in buffer */ \
148 			 int *offs /* reset offset of buffer data */
149 
150 typedef	int (*roffproc)(ROFF_ARGS);
151 
152 struct	roffmac {
153 	roffproc	 proc; /* process new macro */
154 	roffproc	 text; /* process as child text of macro */
155 	roffproc	 sub; /* process as child of macro */
156 	int		 flags;
157 #define	ROFFMAC_STRUCT	(1 << 0) /* always interpret */
158 };
159 
160 struct	predef {
161 	const char	*name; /* predefined input name */
162 	const char	*str; /* replacement symbol */
163 };
164 
165 #define	PREDEF(__name, __str) \
166 	{ (__name), (__str) },
167 
168 /* --- function prototypes ------------------------------------------------ */
169 
170 static	int		 roffnode_cleanscope(struct roff *);
171 static	int		 roffnode_pop(struct roff *);
172 static	void		 roffnode_push(struct roff *, enum roff_tok,
173 				const char *, int, int);
174 static	void		 roff_addtbl(struct roff_man *, int, struct tbl_node *);
175 static	int		 roff_als(ROFF_ARGS);
176 static	int		 roff_block(ROFF_ARGS);
177 static	int		 roff_block_text(ROFF_ARGS);
178 static	int		 roff_block_sub(ROFF_ARGS);
179 static	int		 roff_break(ROFF_ARGS);
180 static	int		 roff_cblock(ROFF_ARGS);
181 static	int		 roff_cc(ROFF_ARGS);
182 static	int		 roff_ccond(struct roff *, int, int);
183 static	int		 roff_char(ROFF_ARGS);
184 static	int		 roff_cond(ROFF_ARGS);
185 static	int		 roff_cond_checkend(ROFF_ARGS);
186 static	int		 roff_cond_text(ROFF_ARGS);
187 static	int		 roff_cond_sub(ROFF_ARGS);
188 static	int		 roff_ds(ROFF_ARGS);
189 static	int		 roff_ec(ROFF_ARGS);
190 static	int		 roff_eo(ROFF_ARGS);
191 static	int		 roff_eqndelim(struct roff *, struct buf *, int);
192 static	int		 roff_evalcond(struct roff *, int, char *, int *);
193 static	int		 roff_evalpar(int, const char *, int *, int *,
194 				char, int);
195 static	int		 roff_evalstrcond(const char *, int *);
196 static	int		 roff_expand(struct roff *, struct buf *,
197 				int, int, char);
198 static	void		 roff_expand_patch(struct buf *, int,
199 				const char *, int);
200 static	void		 roff_free1(struct roff *);
201 static	void		 roff_freereg(struct roffreg *);
202 static	void		 roff_freestr(struct roffkv *);
203 static	size_t		 roff_getname(char **, int, int);
204 static	int		 roff_getnum(const char *, int *, int *, char, int);
205 static	int		 roff_getop(const char *, int *, char *);
206 static	int		 roff_getregn(struct roff *,
207 				const char *, size_t, char);
208 static	int		 roff_getregro(const struct roff *,
209 				const char *name);
210 static	const char	*roff_getstrn(struct roff *,
211 				const char *, size_t, int *);
212 static	int		 roff_hasregn(const struct roff *,
213 				const char *, size_t);
214 static	int		 roff_insec(ROFF_ARGS);
215 static	int		 roff_it(ROFF_ARGS);
216 static	int		 roff_line_ignore(ROFF_ARGS);
217 static	void		 roff_man_alloc1(struct roff_man *);
218 static	void		 roff_man_free1(struct roff_man *);
219 static	int		 roff_manyarg(ROFF_ARGS);
220 static	int		 roff_mc(ROFF_ARGS);
221 static	int		 roff_noarg(ROFF_ARGS);
222 static	int		 roff_nop(ROFF_ARGS);
223 static	int		 roff_nr(ROFF_ARGS);
224 static	int		 roff_onearg(ROFF_ARGS);
225 static	enum roff_tok	 roff_parse(struct roff *, char *, int *,
226 				int, int);
227 static	int		 roff_parse_comment(struct roff *, struct buf *,
228 				int, int, char);
229 static	int		 roff_parsetext(struct roff *, struct buf *,
230 				int, int *);
231 static	int		 roff_renamed(ROFF_ARGS);
232 static	int		 roff_req_or_macro(ROFF_ARGS);
233 static	int		 roff_return(ROFF_ARGS);
234 static	int		 roff_rm(ROFF_ARGS);
235 static	int		 roff_rn(ROFF_ARGS);
236 static	int		 roff_rr(ROFF_ARGS);
237 static	void		 roff_setregn(struct roff *, const char *,
238 				size_t, int, char, int);
239 static	void		 roff_setstr(struct roff *,
240 				const char *, const char *, int);
241 static	void		 roff_setstrn(struct roffkv **, const char *,
242 				size_t, const char *, size_t, int);
243 static	int		 roff_shift(ROFF_ARGS);
244 static	int		 roff_so(ROFF_ARGS);
245 static	int		 roff_tr(ROFF_ARGS);
246 static	int		 roff_Dd(ROFF_ARGS);
247 static	int		 roff_TE(ROFF_ARGS);
248 static	int		 roff_TS(ROFF_ARGS);
249 static	int		 roff_EQ(ROFF_ARGS);
250 static	int		 roff_EN(ROFF_ARGS);
251 static	int		 roff_T_(ROFF_ARGS);
252 static	int		 roff_unsupp(ROFF_ARGS);
253 static	int		 roff_userdef(ROFF_ARGS);
254 
255 /* --- constant data ------------------------------------------------------ */
256 
257 const char *__roff_name[MAN_MAX + 1] = {
258 	"br",		"ce",		"fi",		"ft",
259 	"ll",		"mc",		"nf",
260 	"po",		"rj",		"sp",
261 	"ta",		"ti",		NULL,
262 	"ab",		"ad",		"af",		"aln",
263 	"als",		"am",		"am1",		"ami",
264 	"ami1",		"as",		"as1",		"asciify",
265 	"backtrace",	"bd",		"bleedat",	"blm",
266         "box",		"boxa",		"bp",		"BP",
267 	"break",	"breakchar",	"brnl",		"brp",
268 	"brpnl",	"c2",		"cc",
269 	"cf",		"cflags",	"ch",		"char",
270 	"chop",		"class",	"close",	"CL",
271 	"color",	"composite",	"continue",	"cp",
272 	"cropat",	"cs",		"cu",		"da",
273 	"dch",		"Dd",		"de",		"de1",
274 	"defcolor",	"dei",		"dei1",		"device",
275 	"devicem",	"di",		"do",		"ds",
276 	"ds1",		"dwh",		"dt",		"ec",
277 	"ecr",		"ecs",		"el",		"em",
278 	"EN",		"eo",		"EP",		"EQ",
279 	"errprint",	"ev",		"evc",		"ex",
280 	"fallback",	"fam",		"fc",		"fchar",
281 	"fcolor",	"fdeferlig",	"feature",	"fkern",
282 	"fl",		"flig",		"fp",		"fps",
283 	"fschar",	"fspacewidth",	"fspecial",	"ftr",
284 	"fzoom",	"gcolor",	"hc",		"hcode",
285 	"hidechar",	"hla",		"hlm",		"hpf",
286 	"hpfa",		"hpfcode",	"hw",		"hy",
287 	"hylang",	"hylen",	"hym",		"hypp",
288 	"hys",		"ie",		"if",		"ig",
289 	"index",	"it",		"itc",		"IX",
290 	"kern",		"kernafter",	"kernbefore",	"kernpair",
291 	"lc",		"lc_ctype",	"lds",		"length",
292 	"letadj",	"lf",		"lg",		"lhang",
293 	"linetabs",	"lnr",		"lnrf",		"lpfx",
294 	"ls",		"lsm",		"lt",
295 	"mediasize",	"minss",	"mk",		"mso",
296 	"na",		"ne",		"nh",		"nhychar",
297 	"nm",		"nn",		"nop",		"nr",
298 	"nrf",		"nroff",	"ns",		"nx",
299 	"open",		"opena",	"os",		"output",
300 	"padj",		"papersize",	"pc",		"pev",
301 	"pi",		"PI",		"pl",		"pm",
302 	"pn",		"pnr",		"ps",
303 	"psbb",		"pshape",	"pso",		"ptr",
304 	"pvs",		"rchar",	"rd",		"recursionlimit",
305 	"return",	"rfschar",	"rhang",
306 	"rm",		"rn",		"rnn",		"rr",
307 	"rs",		"rt",		"schar",	"sentchar",
308 	"shc",		"shift",	"sizes",	"so",
309 	"spacewidth",	"special",	"spreadwarn",	"ss",
310 	"sty",		"substring",	"sv",		"sy",
311 	"T&",		"tc",		"TE",
312 	"TH",		"tkf",		"tl",
313 	"tm",		"tm1",		"tmc",		"tr",
314 	"track",	"transchar",	"trf",		"trimat",
315 	"trin",		"trnt",		"troff",	"TS",
316 	"uf",		"ul",		"unformat",	"unwatch",
317 	"unwatchn",	"vpt",		"vs",		"warn",
318 	"warnscale",	"watch",	"watchlength",	"watchn",
319 	"wh",		"while",	"write",	"writec",
320 	"writem",	"xflag",	".",		NULL,
321 	NULL,		"text",
322 	"Dd",		"Dt",		"Os",		"Sh",
323 	"Ss",		"Pp",		"D1",		"Dl",
324 	"Bd",		"Ed",		"Bl",		"El",
325 	"It",		"Ad",		"An",		"Ap",
326 	"Ar",		"Cd",		"Cm",		"Dv",
327 	"Er",		"Ev",		"Ex",		"Fa",
328 	"Fd",		"Fl",		"Fn",		"Ft",
329 	"Ic",		"In",		"Li",		"Nd",
330 	"Nm",		"Op",		"Ot",		"Pa",
331 	"Rv",		"St",		"Va",		"Vt",
332 	"Xr",		"%A",		"%B",		"%D",
333 	"%I",		"%J",		"%N",		"%O",
334 	"%P",		"%R",		"%T",		"%V",
335 	"Ac",		"Ao",		"Aq",		"At",
336 	"Bc",		"Bf",		"Bo",		"Bq",
337 	"Bsx",		"Bx",		"Db",		"Dc",
338 	"Do",		"Dq",		"Ec",		"Ef",
339 	"Em",		"Eo",		"Fx",		"Ms",
340 	"No",		"Ns",		"Nx",		"Ox",
341 	"Pc",		"Pf",		"Po",		"Pq",
342 	"Qc",		"Ql",		"Qo",		"Qq",
343 	"Re",		"Rs",		"Sc",		"So",
344 	"Sq",		"Sm",		"Sx",		"Sy",
345 	"Tn",		"Ux",		"Xc",		"Xo",
346 	"Fo",		"Fc",		"Oo",		"Oc",
347 	"Bk",		"Ek",		"Bt",		"Hf",
348 	"Fr",		"Ud",		"Lb",		"Lp",
349 	"Lk",		"Mt",		"Brq",		"Bro",
350 	"Brc",		"%C",		"Es",		"En",
351 	"Dx",		"%Q",		"%U",		"Ta",
352 	"Tg",		NULL,
353 	"TH",		"SH",		"SS",		"TP",
354 	"TQ",
355 	"LP",		"PP",		"P",		"IP",
356 	"HP",		"SM",		"SB",		"BI",
357 	"IB",		"BR",		"RB",		"R",
358 	"B",		"I",		"IR",		"RI",
359 	"RE",		"RS",		"DT",		"UC",
360 	"PD",		"AT",		"in",
361 	"SY",		"YS",		"OP",
362 	"EX",		"EE",		"UR",
363 	"UE",		"MT",		"ME",		"MR",
364 	NULL
365 };
366 const	char *const *roff_name = __roff_name;
367 
368 static	struct roffmac	 roffs[TOKEN_NONE] = {
369 	{ roff_noarg, NULL, NULL, 0 },  /* br */
370 	{ roff_onearg, NULL, NULL, 0 },  /* ce */
371 	{ roff_noarg, NULL, NULL, 0 },  /* fi */
372 	{ roff_onearg, NULL, NULL, 0 },  /* ft */
373 	{ roff_onearg, NULL, NULL, 0 },  /* ll */
374 	{ roff_mc, NULL, NULL, 0 },  /* mc */
375 	{ roff_noarg, NULL, NULL, 0 },  /* nf */
376 	{ roff_onearg, NULL, NULL, 0 },  /* po */
377 	{ roff_onearg, NULL, NULL, 0 },  /* rj */
378 	{ roff_onearg, NULL, NULL, 0 },  /* sp */
379 	{ roff_manyarg, NULL, NULL, 0 },  /* ta */
380 	{ roff_onearg, NULL, NULL, 0 },  /* ti */
381 	{ NULL, NULL, NULL, 0 },  /* ROFF_MAX */
382 	{ roff_unsupp, NULL, NULL, 0 },  /* ab */
383 	{ roff_line_ignore, NULL, NULL, 0 },  /* ad */
384 	{ roff_line_ignore, NULL, NULL, 0 },  /* af */
385 	{ roff_unsupp, NULL, NULL, 0 },  /* aln */
386 	{ roff_als, NULL, NULL, 0 },  /* als */
387 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am */
388 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* am1 */
389 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami */
390 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ami1 */
391 	{ roff_ds, NULL, NULL, 0 },  /* as */
392 	{ roff_ds, NULL, NULL, 0 },  /* as1 */
393 	{ roff_unsupp, NULL, NULL, 0 },  /* asciify */
394 	{ roff_line_ignore, NULL, NULL, 0 },  /* backtrace */
395 	{ roff_line_ignore, NULL, NULL, 0 },  /* bd */
396 	{ roff_line_ignore, NULL, NULL, 0 },  /* bleedat */
397 	{ roff_unsupp, NULL, NULL, 0 },  /* blm */
398 	{ roff_unsupp, NULL, NULL, 0 },  /* box */
399 	{ roff_unsupp, NULL, NULL, 0 },  /* boxa */
400 	{ roff_line_ignore, NULL, NULL, 0 },  /* bp */
401 	{ roff_unsupp, NULL, NULL, 0 },  /* BP */
402 	{ roff_break, NULL, NULL, 0 },  /* break */
403 	{ roff_line_ignore, NULL, NULL, 0 },  /* breakchar */
404 	{ roff_line_ignore, NULL, NULL, 0 },  /* brnl */
405 	{ roff_noarg, NULL, NULL, 0 },  /* brp */
406 	{ roff_line_ignore, NULL, NULL, 0 },  /* brpnl */
407 	{ roff_unsupp, NULL, NULL, 0 },  /* c2 */
408 	{ roff_cc, NULL, NULL, 0 },  /* cc */
409 	{ roff_insec, NULL, NULL, 0 },  /* cf */
410 	{ roff_line_ignore, NULL, NULL, 0 },  /* cflags */
411 	{ roff_line_ignore, NULL, NULL, 0 },  /* ch */
412 	{ roff_char, NULL, NULL, 0 },  /* char */
413 	{ roff_unsupp, NULL, NULL, 0 },  /* chop */
414 	{ roff_line_ignore, NULL, NULL, 0 },  /* class */
415 	{ roff_insec, NULL, NULL, 0 },  /* close */
416 	{ roff_unsupp, NULL, NULL, 0 },  /* CL */
417 	{ roff_line_ignore, NULL, NULL, 0 },  /* color */
418 	{ roff_unsupp, NULL, NULL, 0 },  /* composite */
419 	{ roff_unsupp, NULL, NULL, 0 },  /* continue */
420 	{ roff_line_ignore, NULL, NULL, 0 },  /* cp */
421 	{ roff_line_ignore, NULL, NULL, 0 },  /* cropat */
422 	{ roff_line_ignore, NULL, NULL, 0 },  /* cs */
423 	{ roff_line_ignore, NULL, NULL, 0 },  /* cu */
424 	{ roff_unsupp, NULL, NULL, 0 },  /* da */
425 	{ roff_unsupp, NULL, NULL, 0 },  /* dch */
426 	{ roff_Dd, NULL, NULL, 0 },  /* Dd */
427 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de */
428 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* de1 */
429 	{ roff_line_ignore, NULL, NULL, 0 },  /* defcolor */
430 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei */
431 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* dei1 */
432 	{ roff_unsupp, NULL, NULL, 0 },  /* device */
433 	{ roff_unsupp, NULL, NULL, 0 },  /* devicem */
434 	{ roff_unsupp, NULL, NULL, 0 },  /* di */
435 	{ roff_unsupp, NULL, NULL, 0 },  /* do */
436 	{ roff_ds, NULL, NULL, 0 },  /* ds */
437 	{ roff_ds, NULL, NULL, 0 },  /* ds1 */
438 	{ roff_unsupp, NULL, NULL, 0 },  /* dwh */
439 	{ roff_unsupp, NULL, NULL, 0 },  /* dt */
440 	{ roff_ec, NULL, NULL, 0 },  /* ec */
441 	{ roff_unsupp, NULL, NULL, 0 },  /* ecr */
442 	{ roff_unsupp, NULL, NULL, 0 },  /* ecs */
443 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* el */
444 	{ roff_unsupp, NULL, NULL, 0 },  /* em */
445 	{ roff_EN, NULL, NULL, 0 },  /* EN */
446 	{ roff_eo, NULL, NULL, 0 },  /* eo */
447 	{ roff_unsupp, NULL, NULL, 0 },  /* EP */
448 	{ roff_EQ, NULL, NULL, 0 },  /* EQ */
449 	{ roff_line_ignore, NULL, NULL, 0 },  /* errprint */
450 	{ roff_unsupp, NULL, NULL, 0 },  /* ev */
451 	{ roff_unsupp, NULL, NULL, 0 },  /* evc */
452 	{ roff_unsupp, NULL, NULL, 0 },  /* ex */
453 	{ roff_line_ignore, NULL, NULL, 0 },  /* fallback */
454 	{ roff_line_ignore, NULL, NULL, 0 },  /* fam */
455 	{ roff_unsupp, NULL, NULL, 0 },  /* fc */
456 	{ roff_unsupp, NULL, NULL, 0 },  /* fchar */
457 	{ roff_line_ignore, NULL, NULL, 0 },  /* fcolor */
458 	{ roff_line_ignore, NULL, NULL, 0 },  /* fdeferlig */
459 	{ roff_line_ignore, NULL, NULL, 0 },  /* feature */
460 	{ roff_line_ignore, NULL, NULL, 0 },  /* fkern */
461 	{ roff_line_ignore, NULL, NULL, 0 },  /* fl */
462 	{ roff_line_ignore, NULL, NULL, 0 },  /* flig */
463 	{ roff_line_ignore, NULL, NULL, 0 },  /* fp */
464 	{ roff_line_ignore, NULL, NULL, 0 },  /* fps */
465 	{ roff_unsupp, NULL, NULL, 0 },  /* fschar */
466 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspacewidth */
467 	{ roff_line_ignore, NULL, NULL, 0 },  /* fspecial */
468 	{ roff_line_ignore, NULL, NULL, 0 },  /* ftr */
469 	{ roff_line_ignore, NULL, NULL, 0 },  /* fzoom */
470 	{ roff_line_ignore, NULL, NULL, 0 },  /* gcolor */
471 	{ roff_line_ignore, NULL, NULL, 0 },  /* hc */
472 	{ roff_line_ignore, NULL, NULL, 0 },  /* hcode */
473 	{ roff_line_ignore, NULL, NULL, 0 },  /* hidechar */
474 	{ roff_line_ignore, NULL, NULL, 0 },  /* hla */
475 	{ roff_line_ignore, NULL, NULL, 0 },  /* hlm */
476 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpf */
477 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfa */
478 	{ roff_line_ignore, NULL, NULL, 0 },  /* hpfcode */
479 	{ roff_line_ignore, NULL, NULL, 0 },  /* hw */
480 	{ roff_line_ignore, NULL, NULL, 0 },  /* hy */
481 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylang */
482 	{ roff_line_ignore, NULL, NULL, 0 },  /* hylen */
483 	{ roff_line_ignore, NULL, NULL, 0 },  /* hym */
484 	{ roff_line_ignore, NULL, NULL, 0 },  /* hypp */
485 	{ roff_line_ignore, NULL, NULL, 0 },  /* hys */
486 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* ie */
487 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT },  /* if */
488 	{ roff_block, roff_block_text, roff_block_sub, 0 },  /* ig */
489 	{ roff_unsupp, NULL, NULL, 0 },  /* index */
490 	{ roff_it, NULL, NULL, 0 },  /* it */
491 	{ roff_unsupp, NULL, NULL, 0 },  /* itc */
492 	{ roff_line_ignore, NULL, NULL, 0 },  /* IX */
493 	{ roff_line_ignore, NULL, NULL, 0 },  /* kern */
494 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernafter */
495 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernbefore */
496 	{ roff_line_ignore, NULL, NULL, 0 },  /* kernpair */
497 	{ roff_unsupp, NULL, NULL, 0 },  /* lc */
498 	{ roff_unsupp, NULL, NULL, 0 },  /* lc_ctype */
499 	{ roff_unsupp, NULL, NULL, 0 },  /* lds */
500 	{ roff_unsupp, NULL, NULL, 0 },  /* length */
501 	{ roff_line_ignore, NULL, NULL, 0 },  /* letadj */
502 	{ roff_insec, NULL, NULL, 0 },  /* lf */
503 	{ roff_line_ignore, NULL, NULL, 0 },  /* lg */
504 	{ roff_line_ignore, NULL, NULL, 0 },  /* lhang */
505 	{ roff_unsupp, NULL, NULL, 0 },  /* linetabs */
506 	{ roff_unsupp, NULL, NULL, 0 },  /* lnr */
507 	{ roff_unsupp, NULL, NULL, 0 },  /* lnrf */
508 	{ roff_unsupp, NULL, NULL, 0 },  /* lpfx */
509 	{ roff_line_ignore, NULL, NULL, 0 },  /* ls */
510 	{ roff_unsupp, NULL, NULL, 0 },  /* lsm */
511 	{ roff_line_ignore, NULL, NULL, 0 },  /* lt */
512 	{ roff_line_ignore, NULL, NULL, 0 },  /* mediasize */
513 	{ roff_line_ignore, NULL, NULL, 0 },  /* minss */
514 	{ roff_line_ignore, NULL, NULL, 0 },  /* mk */
515 	{ roff_insec, NULL, NULL, 0 },  /* mso */
516 	{ roff_line_ignore, NULL, NULL, 0 },  /* na */
517 	{ roff_line_ignore, NULL, NULL, 0 },  /* ne */
518 	{ roff_line_ignore, NULL, NULL, 0 },  /* nh */
519 	{ roff_line_ignore, NULL, NULL, 0 },  /* nhychar */
520 	{ roff_unsupp, NULL, NULL, 0 },  /* nm */
521 	{ roff_unsupp, NULL, NULL, 0 },  /* nn */
522 	{ roff_nop, NULL, NULL, 0 },  /* nop */
523 	{ roff_nr, NULL, NULL, 0 },  /* nr */
524 	{ roff_unsupp, NULL, NULL, 0 },  /* nrf */
525 	{ roff_line_ignore, NULL, NULL, 0 },  /* nroff */
526 	{ roff_line_ignore, NULL, NULL, 0 },  /* ns */
527 	{ roff_insec, NULL, NULL, 0 },  /* nx */
528 	{ roff_insec, NULL, NULL, 0 },  /* open */
529 	{ roff_insec, NULL, NULL, 0 },  /* opena */
530 	{ roff_line_ignore, NULL, NULL, 0 },  /* os */
531 	{ roff_unsupp, NULL, NULL, 0 },  /* output */
532 	{ roff_line_ignore, NULL, NULL, 0 },  /* padj */
533 	{ roff_line_ignore, NULL, NULL, 0 },  /* papersize */
534 	{ roff_line_ignore, NULL, NULL, 0 },  /* pc */
535 	{ roff_line_ignore, NULL, NULL, 0 },  /* pev */
536 	{ roff_insec, NULL, NULL, 0 },  /* pi */
537 	{ roff_unsupp, NULL, NULL, 0 },  /* PI */
538 	{ roff_line_ignore, NULL, NULL, 0 },  /* pl */
539 	{ roff_line_ignore, NULL, NULL, 0 },  /* pm */
540 	{ roff_line_ignore, NULL, NULL, 0 },  /* pn */
541 	{ roff_line_ignore, NULL, NULL, 0 },  /* pnr */
542 	{ roff_line_ignore, NULL, NULL, 0 },  /* ps */
543 	{ roff_unsupp, NULL, NULL, 0 },  /* psbb */
544 	{ roff_unsupp, NULL, NULL, 0 },  /* pshape */
545 	{ roff_insec, NULL, NULL, 0 },  /* pso */
546 	{ roff_line_ignore, NULL, NULL, 0 },  /* ptr */
547 	{ roff_line_ignore, NULL, NULL, 0 },  /* pvs */
548 	{ roff_unsupp, NULL, NULL, 0 },  /* rchar */
549 	{ roff_line_ignore, NULL, NULL, 0 },  /* rd */
550 	{ roff_line_ignore, NULL, NULL, 0 },  /* recursionlimit */
551 	{ roff_return, NULL, NULL, 0 },  /* return */
552 	{ roff_unsupp, NULL, NULL, 0 },  /* rfschar */
553 	{ roff_line_ignore, NULL, NULL, 0 },  /* rhang */
554 	{ roff_rm, NULL, NULL, 0 },  /* rm */
555 	{ roff_rn, NULL, NULL, 0 },  /* rn */
556 	{ roff_unsupp, NULL, NULL, 0 },  /* rnn */
557 	{ roff_rr, NULL, NULL, 0 },  /* rr */
558 	{ roff_line_ignore, NULL, NULL, 0 },  /* rs */
559 	{ roff_line_ignore, NULL, NULL, 0 },  /* rt */
560 	{ roff_unsupp, NULL, NULL, 0 },  /* schar */
561 	{ roff_line_ignore, NULL, NULL, 0 },  /* sentchar */
562 	{ roff_line_ignore, NULL, NULL, 0 },  /* shc */
563 	{ roff_shift, NULL, NULL, 0 },  /* shift */
564 	{ roff_line_ignore, NULL, NULL, 0 },  /* sizes */
565 	{ roff_so, NULL, NULL, 0 },  /* so */
566 	{ roff_line_ignore, NULL, NULL, 0 },  /* spacewidth */
567 	{ roff_line_ignore, NULL, NULL, 0 },  /* special */
568 	{ roff_line_ignore, NULL, NULL, 0 },  /* spreadwarn */
569 	{ roff_line_ignore, NULL, NULL, 0 },  /* ss */
570 	{ roff_line_ignore, NULL, NULL, 0 },  /* sty */
571 	{ roff_unsupp, NULL, NULL, 0 },  /* substring */
572 	{ roff_line_ignore, NULL, NULL, 0 },  /* sv */
573 	{ roff_insec, NULL, NULL, 0 },  /* sy */
574 	{ roff_T_, NULL, NULL, 0 },  /* T& */
575 	{ roff_unsupp, NULL, NULL, 0 },  /* tc */
576 	{ roff_TE, NULL, NULL, 0 },  /* TE */
577 	{ roff_Dd, NULL, NULL, 0 },  /* TH */
578 	{ roff_line_ignore, NULL, NULL, 0 },  /* tkf */
579 	{ roff_unsupp, NULL, NULL, 0 },  /* tl */
580 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm */
581 	{ roff_line_ignore, NULL, NULL, 0 },  /* tm1 */
582 	{ roff_line_ignore, NULL, NULL, 0 },  /* tmc */
583 	{ roff_tr, NULL, NULL, 0 },  /* tr */
584 	{ roff_line_ignore, NULL, NULL, 0 },  /* track */
585 	{ roff_line_ignore, NULL, NULL, 0 },  /* transchar */
586 	{ roff_insec, NULL, NULL, 0 },  /* trf */
587 	{ roff_line_ignore, NULL, NULL, 0 },  /* trimat */
588 	{ roff_unsupp, NULL, NULL, 0 },  /* trin */
589 	{ roff_unsupp, NULL, NULL, 0 },  /* trnt */
590 	{ roff_line_ignore, NULL, NULL, 0 },  /* troff */
591 	{ roff_TS, NULL, NULL, 0 },  /* TS */
592 	{ roff_line_ignore, NULL, NULL, 0 },  /* uf */
593 	{ roff_line_ignore, NULL, NULL, 0 },  /* ul */
594 	{ roff_unsupp, NULL, NULL, 0 },  /* unformat */
595 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatch */
596 	{ roff_line_ignore, NULL, NULL, 0 },  /* unwatchn */
597 	{ roff_line_ignore, NULL, NULL, 0 },  /* vpt */
598 	{ roff_line_ignore, NULL, NULL, 0 },  /* vs */
599 	{ roff_line_ignore, NULL, NULL, 0 },  /* warn */
600 	{ roff_line_ignore, NULL, NULL, 0 },  /* warnscale */
601 	{ roff_line_ignore, NULL, NULL, 0 },  /* watch */
602 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchlength */
603 	{ roff_line_ignore, NULL, NULL, 0 },  /* watchn */
604 	{ roff_unsupp, NULL, NULL, 0 },  /* wh */
605 	{ roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
606 	{ roff_insec, NULL, NULL, 0 },  /* write */
607 	{ roff_insec, NULL, NULL, 0 },  /* writec */
608 	{ roff_insec, NULL, NULL, 0 },  /* writem */
609 	{ roff_line_ignore, NULL, NULL, 0 },  /* xflag */
610 	{ roff_cblock, NULL, NULL, 0 },  /* . */
611 	{ roff_renamed, NULL, NULL, 0 },
612 	{ roff_userdef, NULL, NULL, 0 }
613 };
614 
615 /* Array of injected predefined strings. */
616 #define	PREDEFS_MAX	 38
617 static	const struct predef predefs[PREDEFS_MAX] = {
618 #include "predefs.in"
619 };
620 
621 static	int	 roffce_lines;	/* number of input lines to center */
622 static	struct roff_node *roffce_node;  /* active request */
623 static	int	 roffit_lines;  /* number of lines to delay */
624 static	char	*roffit_macro;  /* nil-terminated macro line */
625 
626 
627 /* --- request table ------------------------------------------------------ */
628 
629 struct ohash *
roffhash_alloc(enum roff_tok mintok,enum roff_tok maxtok)630 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
631 {
632 	struct ohash	*htab;
633 	struct roffreq	*req;
634 	enum roff_tok	 tok;
635 	size_t		 sz;
636 	unsigned int	 slot;
637 
638 	htab = mandoc_malloc(sizeof(*htab));
639 	mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
640 
641 	for (tok = mintok; tok < maxtok; tok++) {
642 		if (roff_name[tok] == NULL)
643 			continue;
644 		sz = strlen(roff_name[tok]);
645 		req = mandoc_malloc(sizeof(*req) + sz + 1);
646 		req->tok = tok;
647 		memcpy(req->name, roff_name[tok], sz + 1);
648 		slot = ohash_qlookup(htab, req->name);
649 		ohash_insert(htab, slot, req);
650 	}
651 	return htab;
652 }
653 
654 void
roffhash_free(struct ohash * htab)655 roffhash_free(struct ohash *htab)
656 {
657 	struct roffreq	*req;
658 	unsigned int	 slot;
659 
660 	if (htab == NULL)
661 		return;
662 	for (req = ohash_first(htab, &slot); req != NULL;
663 	     req = ohash_next(htab, &slot))
664 		free(req);
665 	ohash_delete(htab);
666 	free(htab);
667 }
668 
669 enum roff_tok
roffhash_find(struct ohash * htab,const char * name,size_t sz)670 roffhash_find(struct ohash *htab, const char *name, size_t sz)
671 {
672 	struct roffreq	*req;
673 	const char	*end;
674 
675 	if (sz) {
676 		end = name + sz;
677 		req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
678 	} else
679 		req = ohash_find(htab, ohash_qlookup(htab, name));
680 	return req == NULL ? TOKEN_NONE : req->tok;
681 }
682 
683 /* --- stack of request blocks -------------------------------------------- */
684 
685 /*
686  * Pop the current node off of the stack of roff instructions currently
687  * pending.  Return 1 if it is a loop or 0 otherwise.
688  */
689 static int
roffnode_pop(struct roff * r)690 roffnode_pop(struct roff *r)
691 {
692 	struct roffnode	*p;
693 	int		 inloop;
694 
695 	p = r->last;
696 	inloop = p->tok == ROFF_while;
697 	r->last = p->parent;
698 	free(p->name);
699 	free(p->end);
700 	free(p);
701 	return inloop;
702 }
703 
704 /*
705  * Push a roff node onto the instruction stack.  This must later be
706  * removed with roffnode_pop().
707  */
708 static void
roffnode_push(struct roff * r,enum roff_tok tok,const char * name,int line,int col)709 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
710 		int line, int col)
711 {
712 	struct roffnode	*p;
713 
714 	p = mandoc_calloc(1, sizeof(struct roffnode));
715 	p->tok = tok;
716 	if (name)
717 		p->name = mandoc_strdup(name);
718 	p->parent = r->last;
719 	p->line = line;
720 	p->col = col;
721 	p->rule = p->parent ? p->parent->rule : 0;
722 
723 	r->last = p;
724 }
725 
726 /* --- roff parser state data management ---------------------------------- */
727 
728 static void
roff_free1(struct roff * r)729 roff_free1(struct roff *r)
730 {
731 	int		 i;
732 
733 	tbl_free(r->first_tbl);
734 	r->first_tbl = r->last_tbl = r->tbl = NULL;
735 
736 	eqn_free(r->last_eqn);
737 	r->last_eqn = r->eqn = NULL;
738 
739 	while (r->mstackpos >= 0)
740 		roff_userret(r);
741 
742 	while (r->last)
743 		roffnode_pop(r);
744 
745 	free (r->rstack);
746 	r->rstack = NULL;
747 	r->rstacksz = 0;
748 	r->rstackpos = -1;
749 
750 	roff_freereg(r->regtab);
751 	r->regtab = NULL;
752 
753 	roff_freestr(r->strtab);
754 	roff_freestr(r->rentab);
755 	roff_freestr(r->xmbtab);
756 	r->strtab = r->rentab = r->xmbtab = NULL;
757 
758 	if (r->xtab)
759 		for (i = 0; i < 128; i++)
760 			free(r->xtab[i].p);
761 	free(r->xtab);
762 	r->xtab = NULL;
763 }
764 
765 void
roff_reset(struct roff * r)766 roff_reset(struct roff *r)
767 {
768 	roff_free1(r);
769 	r->options |= MPARSE_COMMENT;
770 	r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
771 	r->control = '\0';
772 	r->escape = '\\';
773 	roffce_lines = 0;
774 	roffce_node = NULL;
775 	roffit_lines = 0;
776 	roffit_macro = NULL;
777 }
778 
779 void
roff_free(struct roff * r)780 roff_free(struct roff *r)
781 {
782 	int		 i;
783 
784 	roff_free1(r);
785 	for (i = 0; i < r->mstacksz; i++)
786 		free(r->mstack[i].argv);
787 	free(r->mstack);
788 	roffhash_free(r->reqtab);
789 	free(r);
790 }
791 
792 struct roff *
roff_alloc(int options)793 roff_alloc(int options)
794 {
795 	struct roff	*r;
796 
797 	r = mandoc_calloc(1, sizeof(struct roff));
798 	r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
799 	r->options = options | MPARSE_COMMENT;
800 	r->format = options & (MPARSE_MDOC | MPARSE_MAN);
801 	r->mstackpos = -1;
802 	r->rstackpos = -1;
803 	r->escape = '\\';
804 	return r;
805 }
806 
807 /* --- syntax tree state data management ---------------------------------- */
808 
809 static void
roff_man_free1(struct roff_man * man)810 roff_man_free1(struct roff_man *man)
811 {
812 	if (man->meta.first != NULL)
813 		roff_node_delete(man, man->meta.first);
814 	free(man->meta.msec);
815 	free(man->meta.vol);
816 	free(man->meta.os);
817 	free(man->meta.arch);
818 	free(man->meta.title);
819 	free(man->meta.name);
820 	free(man->meta.date);
821 	free(man->meta.sodest);
822 }
823 
824 void
roff_state_reset(struct roff_man * man)825 roff_state_reset(struct roff_man *man)
826 {
827 	man->last = man->meta.first;
828 	man->last_es = NULL;
829 	man->flags = 0;
830 	man->lastsec = man->lastnamed = SEC_NONE;
831 	man->next = ROFF_NEXT_CHILD;
832 	roff_setreg(man->roff, "nS", 0, '=');
833 }
834 
835 static void
roff_man_alloc1(struct roff_man * man)836 roff_man_alloc1(struct roff_man *man)
837 {
838 	memset(&man->meta, 0, sizeof(man->meta));
839 	man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
840 	man->meta.first->type = ROFFT_ROOT;
841 	man->meta.macroset = MACROSET_NONE;
842 	roff_state_reset(man);
843 }
844 
845 void
roff_man_reset(struct roff_man * man)846 roff_man_reset(struct roff_man *man)
847 {
848 	roff_man_free1(man);
849 	roff_man_alloc1(man);
850 }
851 
852 void
roff_man_free(struct roff_man * man)853 roff_man_free(struct roff_man *man)
854 {
855 	roff_man_free1(man);
856 	free(man->os_r);
857 	free(man);
858 }
859 
860 struct roff_man *
roff_man_alloc(struct roff * roff,const char * os_s,int quick)861 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
862 {
863 	struct roff_man *man;
864 
865 	man = mandoc_calloc(1, sizeof(*man));
866 	man->roff = roff;
867 	man->os_s = os_s;
868 	man->quick = quick;
869 	roff_man_alloc1(man);
870 	roff->man = man;
871 	return man;
872 }
873 
874 /* --- syntax tree handling ----------------------------------------------- */
875 
876 struct roff_node *
roff_node_alloc(struct roff_man * man,int line,int pos,enum roff_type type,int tok)877 roff_node_alloc(struct roff_man *man, int line, int pos,
878 	enum roff_type type, int tok)
879 {
880 	struct roff_node	*n;
881 
882 	n = mandoc_calloc(1, sizeof(*n));
883 	n->line = line;
884 	n->pos = pos;
885 	n->tok = tok;
886 	n->type = type;
887 	n->sec = man->lastsec;
888 
889 	if (man->flags & MDOC_SYNOPSIS)
890 		n->flags |= NODE_SYNPRETTY;
891 	else
892 		n->flags &= ~NODE_SYNPRETTY;
893 	if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
894 		n->flags |= NODE_NOFILL;
895 	else
896 		n->flags &= ~NODE_NOFILL;
897 	if (man->flags & MDOC_NEWLINE)
898 		n->flags |= NODE_LINE;
899 	man->flags &= ~MDOC_NEWLINE;
900 
901 	return n;
902 }
903 
904 void
roff_node_append(struct roff_man * man,struct roff_node * n)905 roff_node_append(struct roff_man *man, struct roff_node *n)
906 {
907 
908 	switch (man->next) {
909 	case ROFF_NEXT_SIBLING:
910 		if (man->last->next != NULL) {
911 			n->next = man->last->next;
912 			man->last->next->prev = n;
913 		} else
914 			man->last->parent->last = n;
915 		man->last->next = n;
916 		n->prev = man->last;
917 		n->parent = man->last->parent;
918 		break;
919 	case ROFF_NEXT_CHILD:
920 		if (man->last->child != NULL) {
921 			n->next = man->last->child;
922 			man->last->child->prev = n;
923 		} else
924 			man->last->last = n;
925 		man->last->child = n;
926 		n->parent = man->last;
927 		break;
928 	default:
929 		abort();
930 	}
931 	man->last = n;
932 
933 	switch (n->type) {
934 	case ROFFT_HEAD:
935 		n->parent->head = n;
936 		break;
937 	case ROFFT_BODY:
938 		if (n->end != ENDBODY_NOT)
939 			return;
940 		n->parent->body = n;
941 		break;
942 	case ROFFT_TAIL:
943 		n->parent->tail = n;
944 		break;
945 	default:
946 		return;
947 	}
948 
949 	/*
950 	 * Copy over the normalised-data pointer of our parent.  Not
951 	 * everybody has one, but copying a null pointer is fine.
952 	 */
953 
954 	n->norm = n->parent->norm;
955 	assert(n->parent->type == ROFFT_BLOCK);
956 }
957 
958 void
roff_word_alloc(struct roff_man * man,int line,int pos,const char * word)959 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
960 {
961 	struct roff_node	*n;
962 
963 	n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
964 	n->string = roff_strdup(man->roff, word);
965 	roff_node_append(man, n);
966 	n->flags |= NODE_VALID | NODE_ENDED;
967 	man->next = ROFF_NEXT_SIBLING;
968 }
969 
970 void
roff_word_append(struct roff_man * man,const char * word)971 roff_word_append(struct roff_man *man, const char *word)
972 {
973 	struct roff_node	*n;
974 	char			*addstr, *newstr;
975 
976 	n = man->last;
977 	addstr = roff_strdup(man->roff, word);
978 	mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
979 	free(addstr);
980 	free(n->string);
981 	n->string = newstr;
982 	man->next = ROFF_NEXT_SIBLING;
983 }
984 
985 void
roff_elem_alloc(struct roff_man * man,int line,int pos,int tok)986 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
987 {
988 	struct roff_node	*n;
989 
990 	n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
991 	roff_node_append(man, n);
992 	man->next = ROFF_NEXT_CHILD;
993 }
994 
995 struct roff_node *
roff_block_alloc(struct roff_man * man,int line,int pos,int tok)996 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
997 {
998 	struct roff_node	*n;
999 
1000 	n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1001 	roff_node_append(man, n);
1002 	man->next = ROFF_NEXT_CHILD;
1003 	return n;
1004 }
1005 
1006 struct roff_node *
roff_head_alloc(struct roff_man * man,int line,int pos,int tok)1007 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1008 {
1009 	struct roff_node	*n;
1010 
1011 	n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1012 	roff_node_append(man, n);
1013 	man->next = ROFF_NEXT_CHILD;
1014 	return n;
1015 }
1016 
1017 struct roff_node *
roff_body_alloc(struct roff_man * man,int line,int pos,int tok)1018 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1019 {
1020 	struct roff_node	*n;
1021 
1022 	n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1023 	roff_node_append(man, n);
1024 	man->next = ROFF_NEXT_CHILD;
1025 	return n;
1026 }
1027 
1028 static void
roff_addtbl(struct roff_man * man,int line,struct tbl_node * tbl)1029 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1030 {
1031 	struct roff_node	*n;
1032 	struct tbl_span		*span;
1033 
1034 	if (man->meta.macroset == MACROSET_MAN)
1035 		man_breakscope(man, ROFF_TS);
1036 	while ((span = tbl_span(tbl)) != NULL) {
1037 		n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1038 		n->span = span;
1039 		roff_node_append(man, n);
1040 		n->flags |= NODE_VALID | NODE_ENDED;
1041 		man->next = ROFF_NEXT_SIBLING;
1042 	}
1043 }
1044 
1045 void
roff_node_unlink(struct roff_man * man,struct roff_node * n)1046 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1047 {
1048 
1049 	/* Adjust siblings. */
1050 
1051 	if (n->prev)
1052 		n->prev->next = n->next;
1053 	if (n->next)
1054 		n->next->prev = n->prev;
1055 
1056 	/* Adjust parent. */
1057 
1058 	if (n->parent != NULL) {
1059 		if (n->parent->child == n)
1060 			n->parent->child = n->next;
1061 		if (n->parent->last == n)
1062 			n->parent->last = n->prev;
1063 	}
1064 
1065 	/* Adjust parse point. */
1066 
1067 	if (man == NULL)
1068 		return;
1069 	if (man->last == n) {
1070 		if (n->prev == NULL) {
1071 			man->last = n->parent;
1072 			man->next = ROFF_NEXT_CHILD;
1073 		} else {
1074 			man->last = n->prev;
1075 			man->next = ROFF_NEXT_SIBLING;
1076 		}
1077 	}
1078 	if (man->meta.first == n)
1079 		man->meta.first = NULL;
1080 }
1081 
1082 void
roff_node_relink(struct roff_man * man,struct roff_node * n)1083 roff_node_relink(struct roff_man *man, struct roff_node *n)
1084 {
1085 	roff_node_unlink(man, n);
1086 	n->prev = n->next = NULL;
1087 	roff_node_append(man, n);
1088 }
1089 
1090 void
roff_node_free(struct roff_node * n)1091 roff_node_free(struct roff_node *n)
1092 {
1093 
1094 	if (n->args != NULL)
1095 		mdoc_argv_free(n->args);
1096 	if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1097 		free(n->norm);
1098 	eqn_box_free(n->eqn);
1099 	free(n->string);
1100 	free(n->tag);
1101 	free(n);
1102 }
1103 
1104 void
roff_node_delete(struct roff_man * man,struct roff_node * n)1105 roff_node_delete(struct roff_man *man, struct roff_node *n)
1106 {
1107 
1108 	while (n->child != NULL)
1109 		roff_node_delete(man, n->child);
1110 	roff_node_unlink(man, n);
1111 	roff_node_free(n);
1112 }
1113 
1114 int
roff_node_transparent(struct roff_node * n)1115 roff_node_transparent(struct roff_node *n)
1116 {
1117 	if (n == NULL)
1118 		return 0;
1119 	if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1120 		return 1;
1121 	return roff_tok_transparent(n->tok);
1122 }
1123 
1124 int
roff_tok_transparent(enum roff_tok tok)1125 roff_tok_transparent(enum roff_tok tok)
1126 {
1127 	switch (tok) {
1128 	case ROFF_ft:
1129 	case ROFF_ll:
1130 	case ROFF_mc:
1131 	case ROFF_po:
1132 	case ROFF_ta:
1133 	case MDOC_Db:
1134 	case MDOC_Es:
1135 	case MDOC_Sm:
1136 	case MDOC_Tg:
1137 	case MAN_DT:
1138 	case MAN_UC:
1139 	case MAN_PD:
1140 	case MAN_AT:
1141 		return 1;
1142 	default:
1143 		return 0;
1144 	}
1145 }
1146 
1147 struct roff_node *
roff_node_child(struct roff_node * n)1148 roff_node_child(struct roff_node *n)
1149 {
1150 	for (n = n->child; roff_node_transparent(n); n = n->next)
1151 		continue;
1152 	return n;
1153 }
1154 
1155 struct roff_node *
roff_node_prev(struct roff_node * n)1156 roff_node_prev(struct roff_node *n)
1157 {
1158 	do {
1159 		n = n->prev;
1160 	} while (roff_node_transparent(n));
1161 	return n;
1162 }
1163 
1164 struct roff_node *
roff_node_next(struct roff_node * n)1165 roff_node_next(struct roff_node *n)
1166 {
1167 	do {
1168 		n = n->next;
1169 	} while (roff_node_transparent(n));
1170 	return n;
1171 }
1172 
1173 void
deroff(char ** dest,const struct roff_node * n)1174 deroff(char **dest, const struct roff_node *n)
1175 {
1176 	char	*cp;
1177 	size_t	 sz;
1178 
1179 	if (n->string == NULL) {
1180 		for (n = n->child; n != NULL; n = n->next)
1181 			deroff(dest, n);
1182 		return;
1183 	}
1184 
1185 	/* Skip leading whitespace. */
1186 
1187 	for (cp = n->string; *cp != '\0'; cp++) {
1188 		if (cp[0] == '\\' && cp[1] != '\0' &&
1189 		    strchr(" %&0^|~", cp[1]) != NULL)
1190 			cp++;
1191 		else if ( ! isspace((unsigned char)*cp))
1192 			break;
1193 	}
1194 
1195 	/* Skip trailing backslash. */
1196 
1197 	sz = strlen(cp);
1198 	if (sz > 0 && cp[sz - 1] == '\\')
1199 		sz--;
1200 
1201 	/* Skip trailing whitespace. */
1202 
1203 	for (; sz; sz--)
1204 		if ( ! isspace((unsigned char)cp[sz-1]))
1205 			break;
1206 
1207 	/* Skip empty strings. */
1208 
1209 	if (sz == 0)
1210 		return;
1211 
1212 	if (*dest == NULL) {
1213 		*dest = mandoc_strndup(cp, sz);
1214 		return;
1215 	}
1216 
1217 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1218 	free(*dest);
1219 	*dest = cp;
1220 }
1221 
1222 /* --- main functions of the roff parser ---------------------------------- */
1223 
1224 /*
1225  * Save comments preceding the title macro, for example in order to
1226  * preserve Copyright and license headers in HTML output,
1227  * provide diagnostics about RCS ids and trailing whitespace in comments,
1228  * then discard comments including preceding whitespace.
1229  * This function also handles input line continuation.
1230  */
1231 static int
roff_parse_comment(struct roff * r,struct buf * buf,int ln,int pos,char ec)1232 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1233 {
1234 	struct roff_node *n;	/* used for header comments */
1235 	const char	*start;	/* start of the string to process */
1236 	const char	*cp;	/* for RCS id parsing */
1237 	char		*stesc;	/* start of an escape sequence ('\\') */
1238 	char		*ep;	/* end of comment string */
1239 	int		 rcsid;	/* kind of RCS id seen */
1240 
1241 	for (start = stesc = buf->buf + pos;; stesc++) {
1242 		/*
1243 		 * XXX Ugly hack: Remove the newline character that
1244 		 * mparse_buf_r() appended to mark the end of input
1245 		 * if it is not preceded by an escape character.
1246 		 */
1247 		if (stesc[0] == '\n') {
1248 			assert(stesc[1] == '\0');
1249 			stesc[0] = '\0';
1250 		}
1251 
1252 		/* The line ends without continuation or comment. */
1253 		if (stesc[0] == '\0')
1254 			return ROFF_CONT;
1255 
1256 		/* Unescaped byte: skip it. */
1257 		if (stesc[0] != ec)
1258 			continue;
1259 
1260 		/*
1261 		 * XXX Ugly hack: Do not attempt to append another line
1262 		 * if the function mparse_buf_r() appended a newline
1263 		 * character to indicate the end of input.
1264 		 */
1265 		if (stesc[1] == '\n') {
1266 			assert(stesc[2] == '\0');
1267 			stesc[0] = '\0';
1268 			return ROFF_CONT;
1269 		}
1270 
1271 		/*
1272 		 * An escape character at the end of an input line
1273 		 * requests line continuation.
1274 		 */
1275 		if (stesc[1] == '\0') {
1276 			stesc[0] = '\0';
1277 			return ROFF_IGN | ROFF_APPEND;
1278 		}
1279 
1280 		/* Found a comment: process it. */
1281 		if (stesc[1] == '"' || stesc[1] == '#')
1282 			break;
1283 
1284 		/* Escaped escape character: skip them both. */
1285 		if (stesc[1] == ec)
1286 			stesc++;
1287 	}
1288 
1289 	/* Look for an RCS id in the comment. */
1290 
1291 	rcsid = 0;
1292 	if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1293 		rcsid = 1 << MANDOC_OS_OPENBSD;
1294 		cp += 8;
1295 	} else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1296 		rcsid = 1 << MANDOC_OS_NETBSD;
1297 		cp += 7;
1298 	}
1299 	if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1300 	    strchr(cp, '$') != NULL) {
1301 		if (r->man->meta.rcsids & rcsid)
1302 			mandoc_msg(MANDOCERR_RCS_REP, ln,
1303 			    (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1304 		r->man->meta.rcsids |= rcsid;
1305 	}
1306 
1307 	/* Warn about trailing whitespace at the end of the comment. */
1308 
1309 	ep = strchr(stesc + 2, '\0') - 1;
1310 	if (*ep == '\n')
1311 		*ep-- = '\0';
1312 	if (*ep == ' ' || *ep == '\t')
1313 		mandoc_msg(MANDOCERR_SPACE_EOL,
1314 		    ln, (int)(ep - buf->buf), NULL);
1315 
1316 	/* Save comments preceding the title macro in the syntax tree. */
1317 
1318 	if (r->options & MPARSE_COMMENT) {
1319 		while (*ep == ' ' || *ep == '\t')
1320 			ep--;
1321 		ep[1] = '\0';
1322 		n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1323 		    ROFFT_COMMENT, TOKEN_NONE);
1324 		n->string = mandoc_strdup(stesc + 2);
1325 		roff_node_append(r->man, n);
1326 		n->flags |= NODE_VALID | NODE_ENDED;
1327 		r->man->next = ROFF_NEXT_SIBLING;
1328 	}
1329 
1330 	/* The comment requests line continuation. */
1331 
1332 	if (stesc[1] == '#') {
1333 		*stesc = '\0';
1334 		return ROFF_IGN | ROFF_APPEND;
1335 	}
1336 
1337 	/* Discard the comment including preceding whitespace. */
1338 
1339 	while (stesc > start && stesc[-1] == ' ' &&
1340 	    (stesc == start + 1 || stesc[-2] != '\\'))
1341 		stesc--;
1342 	*stesc = '\0';
1343 	return ROFF_CONT;
1344 }
1345 
1346 /*
1347  * In the current line, expand escape sequences that produce parsable
1348  * input text.  Also check the syntax of the remaining escape sequences,
1349  * which typically produce output glyphs or change formatter state.
1350  */
1351 static int
roff_expand(struct roff * r,struct buf * buf,int ln,int pos,char ec)1352 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1353 {
1354 	char		 ubuf[24];	/* buffer to print a number */
1355 	struct mctx	*ctx;		/* current macro call context */
1356 	const char	*res;		/* the string to be pasted */
1357 	const char	*src;		/* source for copying */
1358 	char		*dst;		/* destination for copying */
1359 	enum mandoc_esc	 subtype;	/* return value from roff_escape */
1360 	int		 iesc;		/* index of leading escape char */
1361 	int		 inam;		/* index of the escape name */
1362 	int		 iarg;		/* index beginning the argument */
1363 	int		 iendarg;	/* index right after the argument */
1364 	int		 iend;		/* index right after the sequence */
1365 	int		 isrc, idst;	/* to reduce \\ and \. in names */
1366 	int		 deftype;	/* type of definition to paste */
1367 	int		 argi;		/* macro argument index */
1368 	int		 quote_args;	/* true for \\$@, false for \\$* */
1369 	int		 asz;		/* length of the replacement */
1370 	int		 rsz;		/* length of the rest of the string */
1371 	int		 npos;		/* position in numeric expression */
1372 	int		 expand_count;	/* to avoid infinite loops */
1373 
1374 	expand_count = 0;
1375 	while (buf->buf[pos] != '\0') {
1376 
1377 		/*
1378 		 * Skip plain ASCII characters.
1379 		 * If we have a non-standard escape character,
1380 		 * escape literal backslashes because all processing in
1381 		 * subsequent functions uses the standard escaping rules.
1382 		 */
1383 
1384 		if (buf->buf[pos] != ec) {
1385 			if (buf->buf[pos] == '\\') {
1386 				roff_expand_patch(buf, pos, "\\e", pos + 1);
1387 				pos++;
1388 			}
1389 			pos++;
1390 			continue;
1391 		}
1392 
1393 		/*
1394 		 * Parse escape sequences,
1395 		 * issue diagnostic messages when appropriate,
1396 		 * and skip sequences that do not need expansion.
1397 		 * If we have a non-standard escape character, translate
1398 		 * it to backslashes and translate backslashes to \e.
1399 		 */
1400 
1401 		if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
1402 		    &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
1403 			while (pos < iend) {
1404 				if (buf->buf[pos] == ec) {
1405 					buf->buf[pos] = '\\';
1406 					if (pos + 1 < iend)
1407 						pos++;
1408 				} else if (buf->buf[pos] == '\\') {
1409 					roff_expand_patch(buf,
1410 					    pos, "\\e", pos + 1);
1411 					pos++;
1412 					iend++;
1413 				}
1414 				pos++;
1415 			}
1416 			continue;
1417 		}
1418 
1419 		/* Reduce \\ and \. in names. */
1420 
1421 		if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
1422 			isrc = idst = iarg;
1423 			while (isrc < iendarg) {
1424 				if (isrc + 1 < iendarg &&
1425 				    buf->buf[isrc] == '\\' &&
1426 				    (buf->buf[isrc + 1] == '\\' ||
1427 				     buf->buf[isrc + 1] == '.'))
1428 					isrc++;
1429 				buf->buf[idst++] = buf->buf[isrc++];
1430 			}
1431 			iendarg -= isrc - idst;
1432 		}
1433 
1434 		/* Handle expansion. */
1435 
1436 		res = NULL;
1437 		switch (buf->buf[inam]) {
1438 		case '*':
1439 			if (iendarg == iarg)
1440 				break;
1441 			deftype = ROFFDEF_USER | ROFFDEF_PRE;
1442 			if ((res = roff_getstrn(r, buf->buf + iarg,
1443 			    iendarg - iarg, &deftype)) != NULL)
1444 				break;
1445 
1446 			/*
1447 			 * If not overridden,
1448 			 * let \*(.T through to the formatters.
1449 			 */
1450 
1451 			if (iendarg - iarg == 2 &&
1452 			    buf->buf[iarg] == '.' &&
1453 			    buf->buf[iarg + 1] == 'T') {
1454 				roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
1455 				pos = iend;
1456 				continue;
1457 			}
1458 
1459 			mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
1460 			    "%.*s", iendarg - iarg, buf->buf + iarg);
1461 			break;
1462 
1463 		case '$':
1464 			if (r->mstackpos < 0) {
1465 				mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
1466 				    "%.*s", iend - iesc, buf->buf + iesc);
1467 				break;
1468 			}
1469 			ctx = r->mstack + r->mstackpos;
1470 			argi = buf->buf[iarg] - '1';
1471 			if (argi >= 0 && argi <= 8) {
1472 				if (argi < ctx->argc)
1473 					res = ctx->argv[argi];
1474 				break;
1475 			}
1476 			if (buf->buf[iarg] == '*')
1477 				quote_args = 0;
1478 			else if (buf->buf[iarg] == '@')
1479 				quote_args = 1;
1480 			else {
1481 				mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
1482 				    "%.*s", iend - iesc, buf->buf + iesc);
1483 				break;
1484 			}
1485 			asz = 0;
1486 			for (argi = 0; argi < ctx->argc; argi++) {
1487 				if (argi)
1488 					asz++;  /* blank */
1489 				if (quote_args)
1490 					asz += 2;  /* quotes */
1491 				asz += strlen(ctx->argv[argi]);
1492 			}
1493 			if (asz != iend - iesc) {
1494 				rsz = buf->sz - iend;
1495 				if (asz < iend - iesc)
1496 					memmove(buf->buf + iesc + asz,
1497 					    buf->buf + iend, rsz);
1498 				buf->sz = iesc + asz + rsz;
1499 				buf->buf = mandoc_realloc(buf->buf, buf->sz);
1500 				if (asz > iend - iesc)
1501 					memmove(buf->buf + iesc + asz,
1502 					    buf->buf + iend, rsz);
1503 			}
1504 			dst = buf->buf + iesc;
1505 			for (argi = 0; argi < ctx->argc; argi++) {
1506 				if (argi)
1507 					*dst++ = ' ';
1508 				if (quote_args)
1509 					*dst++ = '"';
1510 				src = ctx->argv[argi];
1511 				while (*src != '\0')
1512 					*dst++ = *src++;
1513 				if (quote_args)
1514 					*dst++ = '"';
1515 			}
1516 			continue;
1517 		case 'A':
1518 			ubuf[0] = iendarg > iarg ? '1' : '0';
1519 			ubuf[1] = '\0';
1520 			res = ubuf;
1521 			break;
1522 		case 'B':
1523 			npos = 0;
1524 			ubuf[0] = iendarg > iarg && iend > iendarg &&
1525 			    roff_evalnum(ln, buf->buf + iarg, &npos,
1526 					 NULL, 'u', 0) &&
1527 			    npos == iendarg - iarg ? '1' : '0';
1528 			ubuf[1] = '\0';
1529 			res = ubuf;
1530 			break;
1531 		case 'V':
1532 			mandoc_msg(MANDOCERR_UNSUPP, ln, iesc,
1533 			    "%.*s", iend - iesc, buf->buf + iesc);
1534 			roff_expand_patch(buf, iendarg, "}", iend);
1535 			roff_expand_patch(buf, iesc, "${", iarg);
1536 			continue;
1537 		case 'g':
1538 			break;
1539 		case 'n':
1540 			if (iendarg > iarg)
1541 				(void)snprintf(ubuf, sizeof(ubuf), "%d",
1542 				    roff_getregn(r, buf->buf + iarg,
1543 				    iendarg - iarg, buf->buf[inam + 1]));
1544 			else
1545 				ubuf[0] = '\0';
1546 			res = ubuf;
1547 			break;
1548 		case 'w':
1549 			rsz = 0;
1550 			subtype = ESCAPE_UNDEF;
1551 			while (iarg < iendarg) {
1552 				asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1;
1553 				if (buf->buf[iarg] != '\\') {
1554 					rsz += asz;
1555 					iarg++;
1556 					continue;
1557 				}
1558 				switch ((subtype = roff_escape(buf->buf, 0,
1559 				    iarg, NULL, NULL, NULL, NULL, &iarg))) {
1560 				case ESCAPE_SPECIAL:
1561 				case ESCAPE_NUMBERED:
1562 				case ESCAPE_UNICODE:
1563 				case ESCAPE_OVERSTRIKE:
1564 				case ESCAPE_UNDEF:
1565 					break;
1566 				case ESCAPE_DEVICE:
1567 					asz *= 8;
1568 					break;
1569 				case ESCAPE_EXPAND:
1570 					abort();
1571 				default:
1572 					continue;
1573 				}
1574 				rsz += asz;
1575 			}
1576 			(void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24);
1577 			res = ubuf;
1578 			break;
1579 		default:
1580 			break;
1581 		}
1582 		if (res == NULL)
1583 			res = "";
1584 		if (++expand_count > EXPAND_LIMIT ||
1585 		    buf->sz + strlen(res) > SHRT_MAX) {
1586 			mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
1587 			return ROFF_IGN;
1588 		}
1589 		roff_expand_patch(buf, iesc, res, iend);
1590 	}
1591 	return ROFF_CONT;
1592 }
1593 
1594 /*
1595  * Replace the substring from the start position (inclusive)
1596  * to end position (exclusive) with the repl(acement) string.
1597  */
1598 static void
roff_expand_patch(struct buf * buf,int start,const char * repl,int end)1599 roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
1600 {
1601 	char	*nbuf;
1602 
1603 	buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf,
1604 	    repl, buf->buf + end) + 1;
1605 	free(buf->buf);
1606 	buf->buf = nbuf;
1607 }
1608 
1609 /*
1610  * Parse a quoted or unquoted roff-style request or macro argument.
1611  * Return a pointer to the parsed argument, which is either the original
1612  * pointer or advanced by one byte in case the argument is quoted.
1613  * NUL-terminate the argument in place.
1614  * Collapse pairs of quotes inside quoted arguments.
1615  * Advance the argument pointer to the next argument,
1616  * or to the NUL byte terminating the argument line.
1617  */
1618 char *
roff_getarg(struct roff * r,char ** cpp,int ln,int * pos)1619 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1620 {
1621 	struct buf	 buf;
1622 	char		*cp, *start;
1623 	int		 newesc, pairs, quoted, white;
1624 
1625 	/* Quoting can only start with a new word. */
1626 	start = *cpp;
1627 	quoted = 0;
1628 	if ('"' == *start) {
1629 		quoted = 1;
1630 		start++;
1631 	}
1632 
1633 	newesc = pairs = white = 0;
1634 	for (cp = start; '\0' != *cp; cp++) {
1635 
1636 		/*
1637 		 * Move the following text left
1638 		 * after quoted quotes and after "\\" and "\t".
1639 		 */
1640 		if (pairs)
1641 			cp[-pairs] = cp[0];
1642 
1643 		if ('\\' == cp[0]) {
1644 			/*
1645 			 * In copy mode, translate double to single
1646 			 * backslashes and backslash-t to literal tabs.
1647 			 */
1648 			switch (cp[1]) {
1649 			case 'a':
1650 			case 't':
1651 				cp[-pairs] = '\t';
1652 				pairs++;
1653 				cp++;
1654 				break;
1655 			case '\\':
1656 				cp[-pairs] = '\\';
1657 				newesc = 1;
1658 				pairs++;
1659 				cp++;
1660 				break;
1661 			case ' ':
1662 				/* Skip escaped blanks. */
1663 				if (0 == quoted)
1664 					cp++;
1665 				break;
1666 			default:
1667 				break;
1668 			}
1669 		} else if (0 == quoted) {
1670 			if (' ' == cp[0]) {
1671 				/* Unescaped blanks end unquoted args. */
1672 				white = 1;
1673 				break;
1674 			}
1675 		} else if ('"' == cp[0]) {
1676 			if ('"' == cp[1]) {
1677 				/* Quoted quotes collapse. */
1678 				pairs++;
1679 				cp++;
1680 			} else {
1681 				/* Unquoted quotes end quoted args. */
1682 				quoted = 2;
1683 				break;
1684 			}
1685 		}
1686 	}
1687 
1688 	/* Quoted argument without a closing quote. */
1689 	if (1 == quoted)
1690 		mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1691 
1692 	/* NUL-terminate this argument and move to the next one. */
1693 	if (pairs)
1694 		cp[-pairs] = '\0';
1695 	if ('\0' != *cp) {
1696 		*cp++ = '\0';
1697 		while (' ' == *cp)
1698 			cp++;
1699 	}
1700 	*pos += (int)(cp - start) + (quoted ? 1 : 0);
1701 	*cpp = cp;
1702 
1703 	if ('\0' == *cp && (white || ' ' == cp[-1]))
1704 		mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1705 
1706 	start = mandoc_strdup(start);
1707 	if (newesc == 0)
1708 		return start;
1709 
1710 	buf.buf = start;
1711 	buf.sz = strlen(start) + 1;
1712 	buf.next = NULL;
1713 	if (roff_expand(r, &buf, ln, 0, '\\') == ROFF_IGN) {
1714 		free(buf.buf);
1715 		buf.buf = mandoc_strdup("");
1716 	}
1717 	return buf.buf;
1718 }
1719 
1720 
1721 /*
1722  * Process text streams.
1723  */
1724 static int
roff_parsetext(struct roff * r,struct buf * buf,int pos,int * offs)1725 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1726 {
1727 	size_t		 sz;
1728 	const char	*start;
1729 	char		*p;
1730 	int		 isz;
1731 	enum mandoc_esc	 esc;
1732 
1733 	/* Spring the input line trap. */
1734 
1735 	if (roffit_lines == 1) {
1736 		isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1737 		free(buf->buf);
1738 		buf->buf = p;
1739 		buf->sz = isz + 1;
1740 		*offs = 0;
1741 		free(roffit_macro);
1742 		roffit_lines = 0;
1743 		return ROFF_REPARSE;
1744 	} else if (roffit_lines > 1)
1745 		--roffit_lines;
1746 
1747 	if (roffce_node != NULL && buf->buf[pos] != '\0') {
1748 		if (roffce_lines < 1) {
1749 			r->man->last = roffce_node;
1750 			r->man->next = ROFF_NEXT_SIBLING;
1751 			roffce_lines = 0;
1752 			roffce_node = NULL;
1753 		} else
1754 			roffce_lines--;
1755 	}
1756 
1757 	/* Convert all breakable hyphens into ASCII_HYPH. */
1758 
1759 	start = p = buf->buf + pos;
1760 
1761 	while (*p != '\0') {
1762 		sz = strcspn(p, "-\\");
1763 		p += sz;
1764 
1765 		if (*p == '\0')
1766 			break;
1767 
1768 		if (*p == '\\') {
1769 			/* Skip over escapes. */
1770 			p++;
1771 			esc = mandoc_escape((const char **)&p, NULL, NULL);
1772 			if (esc == ESCAPE_ERROR)
1773 				break;
1774 			while (*p == '-')
1775 				p++;
1776 			continue;
1777 		} else if (p == start) {
1778 			p++;
1779 			continue;
1780 		}
1781 
1782 		if (isalpha((unsigned char)p[-1]) &&
1783 		    isalpha((unsigned char)p[1]))
1784 			*p = ASCII_HYPH;
1785 		p++;
1786 	}
1787 	return ROFF_CONT;
1788 }
1789 
1790 int
roff_parseln(struct roff * r,int ln,struct buf * buf,int * offs,size_t len)1791 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1792 {
1793 	enum roff_tok	 t;
1794 	int		 e;
1795 	int		 pos;	/* parse point */
1796 	int		 spos;	/* saved parse point for messages */
1797 	int		 ppos;	/* original offset in buf->buf */
1798 	int		 ctl;	/* macro line (boolean) */
1799 
1800 	ppos = pos = *offs;
1801 
1802 	if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1803 	    (r->man->flags & ROFF_NOFILL) == 0 &&
1804 	    strchr(" .\\", buf->buf[pos]) == NULL &&
1805 	    buf->buf[pos] != r->control &&
1806 	    strcspn(buf->buf, " ") < 80)
1807 		mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1808 		    "%.20s...", buf->buf + pos);
1809 
1810 	/* Handle in-line equation delimiters. */
1811 
1812 	if (r->tbl == NULL &&
1813 	    r->last_eqn != NULL && r->last_eqn->delim &&
1814 	    (r->eqn == NULL || r->eqn_inline)) {
1815 		e = roff_eqndelim(r, buf, pos);
1816 		if (e == ROFF_REPARSE)
1817 			return e;
1818 		assert(e == ROFF_CONT);
1819 	}
1820 
1821 	/* Handle comments and escape sequences. */
1822 
1823 	e = roff_parse_comment(r, buf, ln, pos, r->escape);
1824 	if ((e & ROFF_MASK) == ROFF_IGN)
1825 		return e;
1826 	assert(e == ROFF_CONT);
1827 
1828 	e = roff_expand(r, buf, ln, pos, r->escape);
1829 	if ((e & ROFF_MASK) == ROFF_IGN)
1830 		return e;
1831 	assert(e == ROFF_CONT);
1832 
1833 	ctl = roff_getcontrol(r, buf->buf, &pos);
1834 
1835 	/*
1836 	 * First, if a scope is open and we're not a macro, pass the
1837 	 * text through the macro's filter.
1838 	 * Equations process all content themselves.
1839 	 * Tables process almost all content themselves, but we want
1840 	 * to warn about macros before passing it there.
1841 	 */
1842 
1843 	if (r->last != NULL && ! ctl) {
1844 		t = r->last->tok;
1845 		e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1846 		if ((e & ROFF_MASK) == ROFF_IGN)
1847 			return e;
1848 		e &= ~ROFF_MASK;
1849 	} else
1850 		e = ROFF_IGN;
1851 	if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1852 		eqn_read(r->eqn, buf->buf + ppos);
1853 		return e;
1854 	}
1855 	if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1856 		tbl_read(r->tbl, ln, buf->buf, ppos);
1857 		roff_addtbl(r->man, ln, r->tbl);
1858 		return e;
1859 	}
1860 	if ( ! ctl) {
1861 		r->options &= ~MPARSE_COMMENT;
1862 		return roff_parsetext(r, buf, pos, offs) | e;
1863 	}
1864 
1865 	/* Skip empty request lines. */
1866 
1867 	if (buf->buf[pos] == '"') {
1868 		mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1869 		return ROFF_IGN;
1870 	} else if (buf->buf[pos] == '\0')
1871 		return ROFF_IGN;
1872 
1873 	/*
1874 	 * If a scope is open, go to the child handler for that macro,
1875 	 * as it may want to preprocess before doing anything with it.
1876 	 */
1877 
1878 	if (r->last) {
1879 		t = r->last->tok;
1880 		return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1881 	}
1882 
1883 	r->options &= ~MPARSE_COMMENT;
1884 	spos = pos;
1885 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
1886 	return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1887 }
1888 
1889 /*
1890  * Handle a new request or macro.
1891  * May be called outside any scope or from inside a conditional scope.
1892  */
1893 static int
roff_req_or_macro(ROFF_ARGS)1894 roff_req_or_macro(ROFF_ARGS) {
1895 
1896 	/* For now, tables ignore most macros and some request. */
1897 
1898 	if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1899 	    tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1900 	    tok == ROFF_sp)) {
1901 		mandoc_msg(MANDOCERR_TBLMACRO,
1902 		    ln, ppos, "%s", buf->buf + ppos);
1903 		if (tok != TOKEN_NONE)
1904 			return ROFF_IGN;
1905 		while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1906 			pos++;
1907 		while (buf->buf[pos] == ' ')
1908 			pos++;
1909 		tbl_read(r->tbl, ln, buf->buf, pos);
1910 		roff_addtbl(r->man, ln, r->tbl);
1911 		return ROFF_IGN;
1912 	}
1913 
1914 	/* For now, let high level macros abort .ce mode. */
1915 
1916 	if (roffce_node != NULL &&
1917 	    (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1918 	     tok == ROFF_TH || tok == ROFF_TS)) {
1919 		r->man->last = roffce_node;
1920 		r->man->next = ROFF_NEXT_SIBLING;
1921 		roffce_lines = 0;
1922 		roffce_node = NULL;
1923 	}
1924 
1925 	/*
1926 	 * This is neither a roff request nor a user-defined macro.
1927 	 * Let the standard macro set parsers handle it.
1928 	 */
1929 
1930 	if (tok == TOKEN_NONE)
1931 		return ROFF_CONT;
1932 
1933 	/* Execute a roff request or a user-defined macro. */
1934 
1935 	return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1936 }
1937 
1938 /*
1939  * Internal interface function to tell the roff parser that execution
1940  * of the current macro ended.  This is required because macro
1941  * definitions usually do not end with a .return request.
1942  */
1943 void
roff_userret(struct roff * r)1944 roff_userret(struct roff *r)
1945 {
1946 	struct mctx	*ctx;
1947 	int		 i;
1948 
1949 	assert(r->mstackpos >= 0);
1950 	ctx = r->mstack + r->mstackpos;
1951 	for (i = 0; i < ctx->argc; i++)
1952 		free(ctx->argv[i]);
1953 	ctx->argc = 0;
1954 	r->mstackpos--;
1955 }
1956 
1957 void
roff_endparse(struct roff * r)1958 roff_endparse(struct roff *r)
1959 {
1960 	if (r->last != NULL)
1961 		mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1962 		    r->last->col, "%s", roff_name[r->last->tok]);
1963 
1964 	if (r->eqn != NULL) {
1965 		mandoc_msg(MANDOCERR_BLK_NOEND,
1966 		    r->eqn->node->line, r->eqn->node->pos, "EQ");
1967 		eqn_parse(r->eqn);
1968 		r->eqn = NULL;
1969 	}
1970 
1971 	if (r->tbl != NULL) {
1972 		tbl_end(r->tbl, 1);
1973 		r->tbl = NULL;
1974 	}
1975 }
1976 
1977 /*
1978  * Parse the request or macro name at buf[*pos].
1979  * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
1980  * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
1981  * As a side effect, set r->current_string to the definition or to NULL.
1982  */
1983 static enum roff_tok
roff_parse(struct roff * r,char * buf,int * pos,int ln,int ppos)1984 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1985 {
1986 	char		*cp;
1987 	const char	*mac;
1988 	size_t		 maclen;
1989 	int		 deftype;
1990 	enum roff_tok	 t;
1991 
1992 	cp = buf + *pos;
1993 
1994 	if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1995 		return TOKEN_NONE;
1996 
1997 	mac = cp;
1998 	maclen = roff_getname(&cp, ln, ppos);
1999 
2000 	deftype = ROFFDEF_USER | ROFFDEF_REN;
2001 	r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2002 	switch (deftype) {
2003 	case ROFFDEF_USER:
2004 		t = ROFF_USERDEF;
2005 		break;
2006 	case ROFFDEF_REN:
2007 		t = ROFF_RENAMED;
2008 		break;
2009 	default:
2010 		t = roffhash_find(r->reqtab, mac, maclen);
2011 		break;
2012 	}
2013 	if (t != TOKEN_NONE)
2014 		*pos = cp - buf;
2015 	else if (deftype == ROFFDEF_UNDEF) {
2016 		/* Using an undefined macro defines it to be empty. */
2017 		roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2018 		roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2019 	}
2020 	return t;
2021 }
2022 
2023 /* --- handling of request blocks ----------------------------------------- */
2024 
2025 /*
2026  * Close a macro definition block or an "ignore" block.
2027  */
2028 static int
roff_cblock(ROFF_ARGS)2029 roff_cblock(ROFF_ARGS)
2030 {
2031 	int	 rr;
2032 
2033 	if (r->last == NULL) {
2034 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2035 		return ROFF_IGN;
2036 	}
2037 
2038 	switch (r->last->tok) {
2039 	case ROFF_am:
2040 	case ROFF_ami:
2041 	case ROFF_de:
2042 	case ROFF_dei:
2043 	case ROFF_ig:
2044 		break;
2045 	case ROFF_am1:
2046 	case ROFF_de1:
2047 		/* Remapped in roff_block(). */
2048 		abort();
2049 	default:
2050 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2051 		return ROFF_IGN;
2052 	}
2053 
2054 	roffnode_pop(r);
2055 	roffnode_cleanscope(r);
2056 
2057 	/*
2058 	 * If a conditional block with braces is still open,
2059 	 * check for "\}" block end markers.
2060 	 */
2061 
2062 	if (r->last != NULL && r->last->endspan < 0) {
2063 		rr = 1;  /* If arguments follow "\}", warn about them. */
2064 		roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2065 	}
2066 
2067 	if (buf->buf[pos] != '\0')
2068 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2069 		    ".. %s", buf->buf + pos);
2070 
2071 	return ROFF_IGN;
2072 }
2073 
2074 /*
2075  * Pop all nodes ending at the end of the current input line.
2076  * Return the number of loops ended.
2077  */
2078 static int
roffnode_cleanscope(struct roff * r)2079 roffnode_cleanscope(struct roff *r)
2080 {
2081 	int inloop;
2082 
2083 	inloop = 0;
2084 	while (r->last != NULL && r->last->endspan > 0) {
2085 		if (--r->last->endspan != 0)
2086 			break;
2087 		inloop += roffnode_pop(r);
2088 	}
2089 	return inloop;
2090 }
2091 
2092 /*
2093  * Handle the closing "\}" of a conditional block.
2094  * Apart from generating warnings, this only pops nodes.
2095  * Return the number of loops ended.
2096  */
2097 static int
roff_ccond(struct roff * r,int ln,int ppos)2098 roff_ccond(struct roff *r, int ln, int ppos)
2099 {
2100 	if (NULL == r->last) {
2101 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2102 		return 0;
2103 	}
2104 
2105 	switch (r->last->tok) {
2106 	case ROFF_el:
2107 	case ROFF_ie:
2108 	case ROFF_if:
2109 	case ROFF_while:
2110 		break;
2111 	default:
2112 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2113 		return 0;
2114 	}
2115 
2116 	if (r->last->endspan > -1) {
2117 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2118 		return 0;
2119 	}
2120 
2121 	return roffnode_pop(r) + roffnode_cleanscope(r);
2122 }
2123 
2124 static int
roff_block(ROFF_ARGS)2125 roff_block(ROFF_ARGS)
2126 {
2127 	const char	*name, *value;
2128 	char		*call, *cp, *iname, *rname;
2129 	size_t		 csz, namesz, rsz;
2130 	int		 deftype;
2131 
2132 	/* Ignore groff compatibility mode for now. */
2133 
2134 	if (tok == ROFF_de1)
2135 		tok = ROFF_de;
2136 	else if (tok == ROFF_dei1)
2137 		tok = ROFF_dei;
2138 	else if (tok == ROFF_am1)
2139 		tok = ROFF_am;
2140 	else if (tok == ROFF_ami1)
2141 		tok = ROFF_ami;
2142 
2143 	/* Parse the macro name argument. */
2144 
2145 	cp = buf->buf + pos;
2146 	if (tok == ROFF_ig) {
2147 		iname = NULL;
2148 		namesz = 0;
2149 	} else {
2150 		iname = cp;
2151 		namesz = roff_getname(&cp, ln, ppos);
2152 		iname[namesz] = '\0';
2153 	}
2154 
2155 	/* Resolve the macro name argument if it is indirect. */
2156 
2157 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2158 		deftype = ROFFDEF_USER;
2159 		name = roff_getstrn(r, iname, namesz, &deftype);
2160 		if (name == NULL) {
2161 			mandoc_msg(MANDOCERR_STR_UNDEF,
2162 			    ln, (int)(iname - buf->buf),
2163 			    "%.*s", (int)namesz, iname);
2164 			namesz = 0;
2165 		} else
2166 			namesz = strlen(name);
2167 	} else
2168 		name = iname;
2169 
2170 	if (namesz == 0 && tok != ROFF_ig) {
2171 		mandoc_msg(MANDOCERR_REQ_EMPTY,
2172 		    ln, ppos, "%s", roff_name[tok]);
2173 		return ROFF_IGN;
2174 	}
2175 
2176 	roffnode_push(r, tok, name, ln, ppos);
2177 
2178 	/*
2179 	 * At the beginning of a `de' macro, clear the existing string
2180 	 * with the same name, if there is one.  New content will be
2181 	 * appended from roff_block_text() in multiline mode.
2182 	 */
2183 
2184 	if (tok == ROFF_de || tok == ROFF_dei) {
2185 		roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2186 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2187 	} else if (tok == ROFF_am || tok == ROFF_ami) {
2188 		deftype = ROFFDEF_ANY;
2189 		value = roff_getstrn(r, iname, namesz, &deftype);
2190 		switch (deftype) {  /* Before appending, ... */
2191 		case ROFFDEF_PRE: /* copy predefined to user-defined. */
2192 			roff_setstrn(&r->strtab, name, namesz,
2193 			    value, strlen(value), 0);
2194 			break;
2195 		case ROFFDEF_REN: /* call original standard macro. */
2196 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2197 			    (int)strlen(value), value);
2198 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2199 			roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2200 			free(call);
2201 			break;
2202 		case ROFFDEF_STD:  /* rename and call standard macro. */
2203 			rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2204 			roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2205 			csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2206 			    (int)rsz, rname);
2207 			roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2208 			free(call);
2209 			free(rname);
2210 			break;
2211 		default:
2212 			break;
2213 		}
2214 	}
2215 
2216 	if (*cp == '\0')
2217 		return ROFF_IGN;
2218 
2219 	/* Get the custom end marker. */
2220 
2221 	iname = cp;
2222 	namesz = roff_getname(&cp, ln, ppos);
2223 
2224 	/* Resolve the end marker if it is indirect. */
2225 
2226 	if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2227 		deftype = ROFFDEF_USER;
2228 		name = roff_getstrn(r, iname, namesz, &deftype);
2229 		if (name == NULL) {
2230 			mandoc_msg(MANDOCERR_STR_UNDEF,
2231 			    ln, (int)(iname - buf->buf),
2232 			    "%.*s", (int)namesz, iname);
2233 			namesz = 0;
2234 		} else
2235 			namesz = strlen(name);
2236 	} else
2237 		name = iname;
2238 
2239 	if (namesz)
2240 		r->last->end = mandoc_strndup(name, namesz);
2241 
2242 	if (*cp != '\0')
2243 		mandoc_msg(MANDOCERR_ARG_EXCESS,
2244 		    ln, pos, ".%s ... %s", roff_name[tok], cp);
2245 
2246 	return ROFF_IGN;
2247 }
2248 
2249 static int
roff_block_sub(ROFF_ARGS)2250 roff_block_sub(ROFF_ARGS)
2251 {
2252 	enum roff_tok	t;
2253 	int		i, j;
2254 
2255 	/*
2256 	 * If a custom end marker is a user-defined or predefined macro
2257 	 * or a request, interpret it.
2258 	 */
2259 
2260 	if (r->last->end) {
2261 		for (i = pos, j = 0; r->last->end[j]; j++, i++)
2262 			if (buf->buf[i] != r->last->end[j])
2263 				break;
2264 
2265 		if (r->last->end[j] == '\0' &&
2266 		    (buf->buf[i] == '\0' ||
2267 		     buf->buf[i] == ' ' ||
2268 		     buf->buf[i] == '\t')) {
2269 			roffnode_pop(r);
2270 			roffnode_cleanscope(r);
2271 
2272 			while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2273 				i++;
2274 
2275 			pos = i;
2276 			if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2277 			    TOKEN_NONE)
2278 				return ROFF_RERUN;
2279 			return ROFF_IGN;
2280 		}
2281 	}
2282 
2283 	/* Handle the standard end marker. */
2284 
2285 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2286 	if (t == ROFF_cblock)
2287 		return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2288 
2289 	/* Not an end marker, so append the line to the block. */
2290 
2291 	if (tok != ROFF_ig)
2292 		roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2293 	return ROFF_IGN;
2294 }
2295 
2296 static int
roff_block_text(ROFF_ARGS)2297 roff_block_text(ROFF_ARGS)
2298 {
2299 
2300 	if (tok != ROFF_ig)
2301 		roff_setstr(r, r->last->name, buf->buf + pos, 2);
2302 
2303 	return ROFF_IGN;
2304 }
2305 
2306 /*
2307  * Check for a closing "\}" and handle it.
2308  * In this function, the final "int *offs" argument is used for
2309  * different purposes than elsewhere:
2310  * Input: *offs == 0: caller wants to discard arguments following \}
2311  *        *offs == 1: caller wants to preserve text following \}
2312  * Output: *offs = 0: tell caller to discard input line
2313  *         *offs = 1: tell caller to use input line
2314  */
2315 static int
roff_cond_checkend(ROFF_ARGS)2316 roff_cond_checkend(ROFF_ARGS)
2317 {
2318 	char		*ep;
2319 	int		 endloop, irc, rr;
2320 
2321 	irc = ROFF_IGN;
2322 	rr = r->last->rule;
2323 	endloop = tok != ROFF_while ? ROFF_IGN :
2324 	    rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2325 	if (roffnode_cleanscope(r))
2326 		irc |= endloop;
2327 
2328 	/*
2329 	 * If "\}" occurs on a macro line without a preceding macro or
2330 	 * a text line contains nothing else, drop the line completely.
2331 	 */
2332 
2333 	ep = buf->buf + pos;
2334 	if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2335 		rr = 0;
2336 
2337 	/*
2338 	 * The closing delimiter "\}" rewinds the conditional scope
2339 	 * but is otherwise ignored when interpreting the line.
2340 	 */
2341 
2342 	while ((ep = strchr(ep, '\\')) != NULL) {
2343 		switch (ep[1]) {
2344 		case '}':
2345 			if (ep[2] == '\0')
2346 				ep[0] = '\0';
2347 			else if (rr)
2348 				ep[1] = '&';
2349 			else
2350 				memmove(ep, ep + 2, strlen(ep + 2) + 1);
2351 			if (roff_ccond(r, ln, ep - buf->buf))
2352 				irc |= endloop;
2353 			break;
2354 		case '\0':
2355 			++ep;
2356 			break;
2357 		default:
2358 			ep += 2;
2359 			break;
2360 		}
2361 	}
2362 	*offs = rr;
2363 	return irc;
2364 }
2365 
2366 /*
2367  * Parse and process a request or macro line in conditional scope.
2368  */
2369 static int
roff_cond_sub(ROFF_ARGS)2370 roff_cond_sub(ROFF_ARGS)
2371 {
2372 	struct roffnode	*bl;
2373 	int		 irc, rr, spos;
2374 	enum roff_tok	 t;
2375 
2376 	rr = 0;  /* If arguments follow "\}", skip them. */
2377 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2378 	spos = pos;
2379 	t = roff_parse(r, buf->buf, &pos, ln, ppos);
2380 
2381 	/*
2382 	 * Handle requests and macros if the conditional evaluated
2383 	 * to true or if they are structurally required.
2384 	 * The .break request is always handled specially.
2385 	 */
2386 
2387 	if (t == ROFF_break) {
2388 		if (irc & ROFF_LOOPMASK)
2389 			irc = ROFF_IGN | ROFF_LOOPEXIT;
2390 		else if (rr) {
2391 			for (bl = r->last; bl != NULL; bl = bl->parent) {
2392 				bl->rule = 0;
2393 				if (bl->tok == ROFF_while)
2394 					break;
2395 			}
2396 		}
2397 	} else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2398 		irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2399 		if (irc & ROFF_WHILE)
2400 			irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2401 	}
2402 	return irc;
2403 }
2404 
2405 /*
2406  * Parse and process a text line in conditional scope.
2407  */
2408 static int
roff_cond_text(ROFF_ARGS)2409 roff_cond_text(ROFF_ARGS)
2410 {
2411 	int	 irc, rr;
2412 
2413 	rr = 1;  /* If arguments follow "\}", preserve them. */
2414 	irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2415 	if (rr)
2416 		irc |= ROFF_CONT;
2417 	return irc;
2418 }
2419 
2420 /* --- handling of numeric and conditional expressions -------------------- */
2421 
2422 /*
2423  * Parse a single signed decimal number.  Stop at the first non-digit.
2424  * If there is at least one digit, return success and advance the
2425  * parse point, else return failure and let the parse point unchanged.
2426  * Ignore overflows, treat them just like the C language.
2427  */
2428 static int
roff_getnum(const char * v,int * pos,int * res,char unit,int skipspace)2429 roff_getnum(const char *v, int *pos, int *res, char unit, int skipspace)
2430 {
2431 	double	 frac, myres;
2432 	int	 n, p;
2433 
2434 	p = *pos;
2435 	n = v[p] == '-';
2436 	if (n || v[p] == '+')
2437 		p++;
2438 
2439 	if (skipspace)
2440 		while (isspace((unsigned char)v[p]))
2441 			p++;
2442 
2443 	for (myres = 0.0; isdigit((unsigned char)v[p]); p++)
2444 		myres = myres * 10.0 + (v[p] - '0');
2445 	if (v[p] == '.')
2446 		for (frac = 0.1; isdigit((unsigned char)v[++p]); frac *= 0.1)
2447 			myres += frac * (v[p] - '0');
2448 
2449 	if (p == *pos + n)
2450 		return 0;
2451 
2452 	if (n)
2453 		myres *= -1.0;
2454 
2455 	/* Each number may be followed by one optional scaling unit. */
2456 
2457 	if (v[p] != '\0' && strchr("ficvPmnpuM", v[p]) != NULL) {
2458 		if (unit != '\0')
2459 			unit = v[p];
2460 		p++;
2461 	}
2462 
2463 	switch (unit) {
2464 	case 'f':
2465 		myres *= 65536.0;
2466 		break;
2467 	case 'i':
2468 		myres *= 240.0;
2469 		break;
2470 	case 'c':
2471 		myres *= 240.0 / 2.54;
2472 		break;
2473 	case 'v':
2474 	case 'P':
2475 		myres *= 40.0;
2476 		break;
2477 	case 'm':
2478 	case 'n':
2479 		myres *= 24.0;
2480 		break;
2481 	case 'p':
2482 		myres *= 40.0 / 12.0;
2483 		break;
2484 	case 'u':
2485 		break;
2486 	case 'M':
2487 		myres *= 24.0 / 100.0;
2488 		break;
2489 	default:
2490 		break;
2491 	}
2492 	if (res != NULL)
2493 		*res = myres;
2494 	*pos = p;
2495 	return 1;
2496 }
2497 
2498 /*
2499  * Evaluate a string comparison condition.
2500  * The first character is the delimiter.
2501  * Succeed if the string up to its second occurrence
2502  * matches the string up to its third occurrence.
2503  * Advance the cursor after the third occurrence
2504  * or lacking that, to the end of the line.
2505  */
2506 static int
roff_evalstrcond(const char * v,int * pos)2507 roff_evalstrcond(const char *v, int *pos)
2508 {
2509 	const char	*s1, *s2, *s3;
2510 	int		 match;
2511 
2512 	match = 0;
2513 	s1 = v + *pos;		/* initial delimiter */
2514 	s2 = s1 + 1;		/* for scanning the first string */
2515 	s3 = strchr(s2, *s1);	/* for scanning the second string */
2516 
2517 	if (NULL == s3)		/* found no middle delimiter */
2518 		goto out;
2519 
2520 	while ('\0' != *++s3) {
2521 		if (*s2 != *s3) {  /* mismatch */
2522 			s3 = strchr(s3, *s1);
2523 			break;
2524 		}
2525 		if (*s3 == *s1) {  /* found the final delimiter */
2526 			match = 1;
2527 			break;
2528 		}
2529 		s2++;
2530 	}
2531 
2532 out:
2533 	if (NULL == s3)
2534 		s3 = strchr(s2, '\0');
2535 	else if (*s3 != '\0')
2536 		s3++;
2537 	*pos = s3 - v;
2538 	return match;
2539 }
2540 
2541 /*
2542  * Evaluate an optionally negated single character, numerical,
2543  * or string condition.
2544  */
2545 static int
roff_evalcond(struct roff * r,int ln,char * v,int * pos)2546 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2547 {
2548 	const char	*start, *end;
2549 	char		*cp, *name;
2550 	size_t		 sz;
2551 	int		 deftype, len, number, savepos, istrue, wanttrue;
2552 
2553 	if ('!' == v[*pos]) {
2554 		wanttrue = 0;
2555 		(*pos)++;
2556 	} else
2557 		wanttrue = 1;
2558 
2559 	switch (v[*pos]) {
2560 	case '\0':
2561 		return 0;
2562 	case 'n':
2563 	case 'o':
2564 		(*pos)++;
2565 		return wanttrue;
2566 	case 'e':
2567 	case 't':
2568 	case 'v':
2569 		(*pos)++;
2570 		return !wanttrue;
2571 	case 'c':
2572 		do {
2573 			(*pos)++;
2574 		} while (v[*pos] == ' ');
2575 
2576 		/*
2577 		 * Quirk for groff compatibility:
2578 		 * The horizontal tab is neither available nor unavailable.
2579 		 */
2580 
2581 		if (v[*pos] == '\t') {
2582 			(*pos)++;
2583 			return 0;
2584 		}
2585 
2586 		/* Printable ASCII characters are available. */
2587 
2588 		if (v[*pos] != '\\') {
2589 			(*pos)++;
2590 			return wanttrue;
2591 		}
2592 
2593 		end = v + ++*pos;
2594 		switch (mandoc_escape(&end, &start, &len)) {
2595 		case ESCAPE_SPECIAL:
2596 			istrue = mchars_spec2cp(start, len) != -1;
2597 			break;
2598 		case ESCAPE_UNICODE:
2599 			istrue = 1;
2600 			break;
2601 		case ESCAPE_NUMBERED:
2602 			istrue = mchars_num2char(start, len) != -1;
2603 			break;
2604 		default:
2605 			istrue = !wanttrue;
2606 			break;
2607 		}
2608 		*pos = end - v;
2609 		return istrue == wanttrue;
2610 	case 'd':
2611 	case 'r':
2612 		cp = v + *pos + 1;
2613 		while (*cp == ' ')
2614 			cp++;
2615 		name = cp;
2616 		sz = roff_getname(&cp, ln, cp - v);
2617 		if (sz == 0)
2618 			istrue = 0;
2619 		else if (v[*pos] == 'r')
2620 			istrue = roff_hasregn(r, name, sz);
2621 		else {
2622 			deftype = ROFFDEF_ANY;
2623 		        roff_getstrn(r, name, sz, &deftype);
2624 			istrue = !!deftype;
2625 		}
2626 		*pos = (name + sz) - v;
2627 		return istrue == wanttrue;
2628 	default:
2629 		break;
2630 	}
2631 
2632 	savepos = *pos;
2633 	if (roff_evalnum(ln, v, pos, &number, 'u', 0))
2634 		return (number > 0) == wanttrue;
2635 	else if (*pos == savepos)
2636 		return roff_evalstrcond(v, pos) == wanttrue;
2637 	else
2638 		return 0;
2639 }
2640 
2641 static int
roff_line_ignore(ROFF_ARGS)2642 roff_line_ignore(ROFF_ARGS)
2643 {
2644 
2645 	return ROFF_IGN;
2646 }
2647 
2648 static int
roff_insec(ROFF_ARGS)2649 roff_insec(ROFF_ARGS)
2650 {
2651 
2652 	mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2653 	return ROFF_IGN;
2654 }
2655 
2656 static int
roff_unsupp(ROFF_ARGS)2657 roff_unsupp(ROFF_ARGS)
2658 {
2659 
2660 	mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2661 	return ROFF_IGN;
2662 }
2663 
2664 static int
roff_cond(ROFF_ARGS)2665 roff_cond(ROFF_ARGS)
2666 {
2667 	int	 irc;
2668 
2669 	roffnode_push(r, tok, NULL, ln, ppos);
2670 
2671 	/*
2672 	 * An `.el' has no conditional body: it will consume the value
2673 	 * of the current rstack entry set in prior `ie' calls or
2674 	 * defaults to DENY.
2675 	 *
2676 	 * If we're not an `el', however, then evaluate the conditional.
2677 	 */
2678 
2679 	r->last->rule = tok == ROFF_el ?
2680 	    (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2681 	    roff_evalcond(r, ln, buf->buf, &pos);
2682 
2683 	/*
2684 	 * An if-else will put the NEGATION of the current evaluated
2685 	 * conditional into the stack of rules.
2686 	 */
2687 
2688 	if (tok == ROFF_ie) {
2689 		if (r->rstackpos + 1 == r->rstacksz) {
2690 			r->rstacksz += 16;
2691 			r->rstack = mandoc_reallocarray(r->rstack,
2692 			    r->rstacksz, sizeof(int));
2693 		}
2694 		r->rstack[++r->rstackpos] = !r->last->rule;
2695 	}
2696 
2697 	/* If the parent has false as its rule, then so do we. */
2698 
2699 	if (r->last->parent && !r->last->parent->rule)
2700 		r->last->rule = 0;
2701 
2702 	/*
2703 	 * Determine scope.
2704 	 * If there is nothing on the line after the conditional,
2705 	 * not even whitespace, use next-line scope.
2706 	 * Except that .while does not support next-line scope.
2707 	 */
2708 
2709 	if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2710 		r->last->endspan = 2;
2711 		goto out;
2712 	}
2713 
2714 	while (buf->buf[pos] == ' ')
2715 		pos++;
2716 
2717 	/* An opening brace requests multiline scope. */
2718 
2719 	if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2720 		r->last->endspan = -1;
2721 		pos += 2;
2722 		while (buf->buf[pos] == ' ')
2723 			pos++;
2724 		goto out;
2725 	}
2726 
2727 	/*
2728 	 * Anything else following the conditional causes
2729 	 * single-line scope.  Warn if the scope contains
2730 	 * nothing but trailing whitespace.
2731 	 */
2732 
2733 	if (buf->buf[pos] == '\0')
2734 		mandoc_msg(MANDOCERR_COND_EMPTY,
2735 		    ln, ppos, "%s", roff_name[tok]);
2736 
2737 	r->last->endspan = 1;
2738 
2739 out:
2740 	*offs = pos;
2741 	irc = ROFF_RERUN;
2742 	if (tok == ROFF_while)
2743 		irc |= ROFF_WHILE;
2744 	return irc;
2745 }
2746 
2747 static int
roff_ds(ROFF_ARGS)2748 roff_ds(ROFF_ARGS)
2749 {
2750 	char		*string;
2751 	const char	*name;
2752 	size_t		 namesz;
2753 
2754 	/* Ignore groff compatibility mode for now. */
2755 
2756 	if (tok == ROFF_ds1)
2757 		tok = ROFF_ds;
2758 	else if (tok == ROFF_as1)
2759 		tok = ROFF_as;
2760 
2761 	/*
2762 	 * The first word is the name of the string.
2763 	 * If it is empty or terminated by an escape sequence,
2764 	 * abort the `ds' request without defining anything.
2765 	 */
2766 
2767 	name = string = buf->buf + pos;
2768 	if (*name == '\0')
2769 		return ROFF_IGN;
2770 
2771 	namesz = roff_getname(&string, ln, pos);
2772 	switch (name[namesz]) {
2773 	case '\\':
2774 		return ROFF_IGN;
2775 	case '\t':
2776 		string = buf->buf + pos + namesz;
2777 		break;
2778 	default:
2779 		break;
2780 	}
2781 
2782 	/* Read past the initial double-quote, if any. */
2783 	if (*string == '"')
2784 		string++;
2785 
2786 	/* The rest is the value. */
2787 	roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2788 	    ROFF_as == tok);
2789 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2790 	return ROFF_IGN;
2791 }
2792 
2793 /*
2794  * Parse a single operator, one or two characters long.
2795  * If the operator is recognized, return success and advance the
2796  * parse point, else return failure and let the parse point unchanged.
2797  */
2798 static int
roff_getop(const char * v,int * pos,char * res)2799 roff_getop(const char *v, int *pos, char *res)
2800 {
2801 
2802 	*res = v[*pos];
2803 
2804 	switch (*res) {
2805 	case '+':
2806 	case '-':
2807 	case '*':
2808 	case '/':
2809 	case '%':
2810 	case '&':
2811 	case ':':
2812 		break;
2813 	case '<':
2814 		switch (v[*pos + 1]) {
2815 		case '=':
2816 			*res = 'l';
2817 			(*pos)++;
2818 			break;
2819 		case '>':
2820 			*res = '!';
2821 			(*pos)++;
2822 			break;
2823 		case '?':
2824 			*res = 'i';
2825 			(*pos)++;
2826 			break;
2827 		default:
2828 			break;
2829 		}
2830 		break;
2831 	case '>':
2832 		switch (v[*pos + 1]) {
2833 		case '=':
2834 			*res = 'g';
2835 			(*pos)++;
2836 			break;
2837 		case '?':
2838 			*res = 'a';
2839 			(*pos)++;
2840 			break;
2841 		default:
2842 			break;
2843 		}
2844 		break;
2845 	case '=':
2846 		if ('=' == v[*pos + 1])
2847 			(*pos)++;
2848 		break;
2849 	default:
2850 		return 0;
2851 	}
2852 	(*pos)++;
2853 
2854 	return *res;
2855 }
2856 
2857 /*
2858  * Evaluate either a parenthesized numeric expression
2859  * or a single signed integer number.
2860  */
2861 static int
roff_evalpar(int ln,const char * v,int * pos,int * res,char unit,int skipspace)2862 roff_evalpar(int ln, const char *v, int *pos, int *res, char unit,
2863     int skipspace)
2864 {
2865 
2866 	if ('(' != v[*pos])
2867 		return roff_getnum(v, pos, res, unit, skipspace);
2868 
2869 	(*pos)++;
2870 	if ( ! roff_evalnum(ln, v, pos, res, unit, 1))
2871 		return 0;
2872 
2873 	/*
2874 	 * Omission of the closing parenthesis
2875 	 * is an error in validation mode,
2876 	 * but ignored in evaluation mode.
2877 	 */
2878 
2879 	if (')' == v[*pos])
2880 		(*pos)++;
2881 	else if (NULL == res)
2882 		return 0;
2883 
2884 	return 1;
2885 }
2886 
2887 /*
2888  * Evaluate a complete numeric expression.
2889  * Proceed left to right, there is no concept of precedence.
2890  */
2891 int
roff_evalnum(int ln,const char * v,int * pos,int * res,char unit,int skipspace)2892 roff_evalnum(int ln, const char *v, int *pos, int *res, char unit,
2893     int skipspace)
2894 {
2895 	int		 mypos, operand2;
2896 	char		 operator;
2897 
2898 	if (NULL == pos) {
2899 		mypos = 0;
2900 		pos = &mypos;
2901 	}
2902 
2903 	if (skipspace)
2904 		while (isspace((unsigned char)v[*pos]))
2905 			(*pos)++;
2906 
2907 	if ( ! roff_evalpar(ln, v, pos, res, unit, skipspace))
2908 		return 0;
2909 
2910 	while (1) {
2911 		if (skipspace)
2912 			while (isspace((unsigned char)v[*pos]))
2913 				(*pos)++;
2914 
2915 		if ( ! roff_getop(v, pos, &operator))
2916 			break;
2917 
2918 		if (skipspace)
2919 			while (isspace((unsigned char)v[*pos]))
2920 				(*pos)++;
2921 
2922 		if ( ! roff_evalpar(ln, v, pos, &operand2, unit, skipspace))
2923 			return 0;
2924 
2925 		if (skipspace)
2926 			while (isspace((unsigned char)v[*pos]))
2927 				(*pos)++;
2928 
2929 		if (NULL == res)
2930 			continue;
2931 
2932 		switch (operator) {
2933 		case '+':
2934 			*res += operand2;
2935 			break;
2936 		case '-':
2937 			*res -= operand2;
2938 			break;
2939 		case '*':
2940 			*res *= operand2;
2941 			break;
2942 		case '/':
2943 			if (operand2 == 0) {
2944 				mandoc_msg(MANDOCERR_DIVZERO,
2945 					ln, *pos, "%s", v);
2946 				*res = 0;
2947 				break;
2948 			}
2949 			*res /= operand2;
2950 			break;
2951 		case '%':
2952 			if (operand2 == 0) {
2953 				mandoc_msg(MANDOCERR_DIVZERO,
2954 					ln, *pos, "%s", v);
2955 				*res = 0;
2956 				break;
2957 			}
2958 			*res %= operand2;
2959 			break;
2960 		case '<':
2961 			*res = *res < operand2;
2962 			break;
2963 		case '>':
2964 			*res = *res > operand2;
2965 			break;
2966 		case 'l':
2967 			*res = *res <= operand2;
2968 			break;
2969 		case 'g':
2970 			*res = *res >= operand2;
2971 			break;
2972 		case '=':
2973 			*res = *res == operand2;
2974 			break;
2975 		case '!':
2976 			*res = *res != operand2;
2977 			break;
2978 		case '&':
2979 			*res = *res && operand2;
2980 			break;
2981 		case ':':
2982 			*res = *res || operand2;
2983 			break;
2984 		case 'i':
2985 			if (operand2 < *res)
2986 				*res = operand2;
2987 			break;
2988 		case 'a':
2989 			if (operand2 > *res)
2990 				*res = operand2;
2991 			break;
2992 		default:
2993 			abort();
2994 		}
2995 	}
2996 	return 1;
2997 }
2998 
2999 /* --- register management ------------------------------------------------ */
3000 
3001 void
roff_setreg(struct roff * r,const char * name,int val,char sign)3002 roff_setreg(struct roff *r, const char *name, int val, char sign)
3003 {
3004 	roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3005 }
3006 
3007 static void
roff_setregn(struct roff * r,const char * name,size_t len,int val,char sign,int step)3008 roff_setregn(struct roff *r, const char *name, size_t len,
3009     int val, char sign, int step)
3010 {
3011 	struct roffreg	*reg;
3012 
3013 	/* Search for an existing register with the same name. */
3014 	reg = r->regtab;
3015 
3016 	while (reg != NULL && (reg->key.sz != len ||
3017 	    strncmp(reg->key.p, name, len) != 0))
3018 		reg = reg->next;
3019 
3020 	if (NULL == reg) {
3021 		/* Create a new register. */
3022 		reg = mandoc_malloc(sizeof(struct roffreg));
3023 		reg->key.p = mandoc_strndup(name, len);
3024 		reg->key.sz = len;
3025 		reg->val = 0;
3026 		reg->step = 0;
3027 		reg->next = r->regtab;
3028 		r->regtab = reg;
3029 	}
3030 
3031 	if ('+' == sign)
3032 		reg->val += val;
3033 	else if ('-' == sign)
3034 		reg->val -= val;
3035 	else
3036 		reg->val = val;
3037 	if (step != INT_MIN)
3038 		reg->step = step;
3039 }
3040 
3041 /*
3042  * Handle some predefined read-only number registers.
3043  * For now, return -1 if the requested register is not predefined;
3044  * in case a predefined read-only register having the value -1
3045  * were to turn up, another special value would have to be chosen.
3046  */
3047 static int
roff_getregro(const struct roff * r,const char * name)3048 roff_getregro(const struct roff *r, const char *name)
3049 {
3050 
3051 	switch (*name) {
3052 	case '$':  /* Number of arguments of the last macro evaluated. */
3053 		return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3054 	case 'A':  /* ASCII approximation mode is always off. */
3055 		return 0;
3056 	case 'g':  /* Groff compatibility mode is always on. */
3057 		return 1;
3058 	case 'H':  /* Fixed horizontal resolution. */
3059 		return 24;
3060 	case 'j':  /* Always adjust left margin only. */
3061 		return 0;
3062 	case 'l':  /* Fixed line width for DocBook. */
3063 		return 78 * 24;
3064 	case 'T':  /* Some output device is always defined. */
3065 		return 1;
3066 	case 'V':  /* Fixed vertical resolution. */
3067 		return 40;
3068 	default:
3069 		return -1;
3070 	}
3071 }
3072 
3073 int
roff_getreg(struct roff * r,const char * name)3074 roff_getreg(struct roff *r, const char *name)
3075 {
3076 	return roff_getregn(r, name, strlen(name), '\0');
3077 }
3078 
3079 static int
roff_getregn(struct roff * r,const char * name,size_t len,char sign)3080 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3081 {
3082 	struct roffreg	*reg;
3083 	int		 val;
3084 
3085 	if ('.' == name[0] && 2 == len) {
3086 		val = roff_getregro(r, name + 1);
3087 		if (-1 != val)
3088 			return val;
3089 	}
3090 
3091 	for (reg = r->regtab; reg; reg = reg->next) {
3092 		if (len == reg->key.sz &&
3093 		    0 == strncmp(name, reg->key.p, len)) {
3094 			switch (sign) {
3095 			case '+':
3096 				reg->val += reg->step;
3097 				break;
3098 			case '-':
3099 				reg->val -= reg->step;
3100 				break;
3101 			default:
3102 				break;
3103 			}
3104 			return reg->val;
3105 		}
3106 	}
3107 
3108 	roff_setregn(r, name, len, 0, '\0', INT_MIN);
3109 	return 0;
3110 }
3111 
3112 static int
roff_hasregn(const struct roff * r,const char * name,size_t len)3113 roff_hasregn(const struct roff *r, const char *name, size_t len)
3114 {
3115 	struct roffreg	*reg;
3116 	int		 val;
3117 
3118 	if ('.' == name[0] && 2 == len) {
3119 		val = roff_getregro(r, name + 1);
3120 		if (-1 != val)
3121 			return 1;
3122 	}
3123 
3124 	for (reg = r->regtab; reg; reg = reg->next)
3125 		if (len == reg->key.sz &&
3126 		    0 == strncmp(name, reg->key.p, len))
3127 			return 1;
3128 
3129 	return 0;
3130 }
3131 
3132 static void
roff_freereg(struct roffreg * reg)3133 roff_freereg(struct roffreg *reg)
3134 {
3135 	struct roffreg	*old_reg;
3136 
3137 	while (NULL != reg) {
3138 		free(reg->key.p);
3139 		old_reg = reg;
3140 		reg = reg->next;
3141 		free(old_reg);
3142 	}
3143 }
3144 
3145 static int
roff_nr(ROFF_ARGS)3146 roff_nr(ROFF_ARGS)
3147 {
3148 	char		*key, *val, *step;
3149 	size_t		 keysz;
3150 	int		 iv, is, len;
3151 	char		 sign;
3152 
3153 	key = val = buf->buf + pos;
3154 	if (*key == '\0')
3155 		return ROFF_IGN;
3156 
3157 	keysz = roff_getname(&val, ln, pos);
3158 	if (key[keysz] == '\\' || key[keysz] == '\t')
3159 		return ROFF_IGN;
3160 
3161 	sign = *val;
3162 	if (sign == '+' || sign == '-')
3163 		val++;
3164 
3165 	len = 0;
3166 	if (roff_evalnum(ln, val, &len, &iv, 'u', 0) == 0)
3167 		return ROFF_IGN;
3168 
3169 	step = val + len;
3170 	while (isspace((unsigned char)*step))
3171 		step++;
3172 	if (roff_evalnum(ln, step, NULL, &is, '\0', 0) == 0)
3173 		is = INT_MIN;
3174 
3175 	roff_setregn(r, key, keysz, iv, sign, is);
3176 	return ROFF_IGN;
3177 }
3178 
3179 static int
roff_rr(ROFF_ARGS)3180 roff_rr(ROFF_ARGS)
3181 {
3182 	struct roffreg	*reg, **prev;
3183 	char		*name, *cp;
3184 	size_t		 namesz;
3185 
3186 	name = cp = buf->buf + pos;
3187 	if (*name == '\0')
3188 		return ROFF_IGN;
3189 	namesz = roff_getname(&cp, ln, pos);
3190 	name[namesz] = '\0';
3191 
3192 	prev = &r->regtab;
3193 	while (1) {
3194 		reg = *prev;
3195 		if (reg == NULL || !strcmp(name, reg->key.p))
3196 			break;
3197 		prev = &reg->next;
3198 	}
3199 	if (reg != NULL) {
3200 		*prev = reg->next;
3201 		free(reg->key.p);
3202 		free(reg);
3203 	}
3204 	return ROFF_IGN;
3205 }
3206 
3207 /* --- handler functions for roff requests -------------------------------- */
3208 
3209 static int
roff_rm(ROFF_ARGS)3210 roff_rm(ROFF_ARGS)
3211 {
3212 	const char	 *name;
3213 	char		 *cp;
3214 	size_t		  namesz;
3215 
3216 	cp = buf->buf + pos;
3217 	while (*cp != '\0') {
3218 		name = cp;
3219 		namesz = roff_getname(&cp, ln, (int)(cp - buf->buf));
3220 		roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3221 		roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3222 		if (name[namesz] == '\\' || name[namesz] == '\t')
3223 			break;
3224 	}
3225 	return ROFF_IGN;
3226 }
3227 
3228 static int
roff_it(ROFF_ARGS)3229 roff_it(ROFF_ARGS)
3230 {
3231 	int		 iv;
3232 
3233 	/* Parse the number of lines. */
3234 
3235 	if ( ! roff_evalnum(ln, buf->buf, &pos, &iv, '\0', 0)) {
3236 		mandoc_msg(MANDOCERR_IT_NONUM,
3237 		    ln, ppos, "%s", buf->buf + 1);
3238 		return ROFF_IGN;
3239 	}
3240 
3241 	while (isspace((unsigned char)buf->buf[pos]))
3242 		pos++;
3243 
3244 	/*
3245 	 * Arm the input line trap.
3246 	 * Special-casing "an-trap" is an ugly workaround to cope
3247 	 * with DocBook stupidly fiddling with man(7) internals.
3248 	 */
3249 
3250 	roffit_lines = iv;
3251 	roffit_macro = mandoc_strdup(iv != 1 ||
3252 	    strcmp(buf->buf + pos, "an-trap") ?
3253 	    buf->buf + pos : "br");
3254 	return ROFF_IGN;
3255 }
3256 
3257 static int
roff_Dd(ROFF_ARGS)3258 roff_Dd(ROFF_ARGS)
3259 {
3260 	int		 mask;
3261 	enum roff_tok	 t, te;
3262 
3263 	switch (tok) {
3264 	case ROFF_Dd:
3265 		tok = MDOC_Dd;
3266 		te = MDOC_MAX;
3267 		if (r->format == 0)
3268 			r->format = MPARSE_MDOC;
3269 		mask = MPARSE_MDOC | MPARSE_QUICK;
3270 		break;
3271 	case ROFF_TH:
3272 		tok = MAN_TH;
3273 		te = MAN_MAX;
3274 		if (r->format == 0)
3275 			r->format = MPARSE_MAN;
3276 		mask = MPARSE_QUICK;
3277 		break;
3278 	default:
3279 		abort();
3280 	}
3281 	if ((r->options & mask) == 0)
3282 		for (t = tok; t < te; t++)
3283 			roff_setstr(r, roff_name[t], NULL, 0);
3284 	return ROFF_CONT;
3285 }
3286 
3287 static int
roff_TE(ROFF_ARGS)3288 roff_TE(ROFF_ARGS)
3289 {
3290 	r->man->flags &= ~ROFF_NONOFILL;
3291 	if (r->tbl == NULL) {
3292 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3293 		return ROFF_IGN;
3294 	}
3295 	if (tbl_end(r->tbl, 0) == 0) {
3296 		r->tbl = NULL;
3297 		free(buf->buf);
3298 		buf->buf = mandoc_strdup(".sp");
3299 		buf->sz = 4;
3300 		*offs = 0;
3301 		return ROFF_REPARSE;
3302 	}
3303 	r->tbl = NULL;
3304 	return ROFF_IGN;
3305 }
3306 
3307 static int
roff_T_(ROFF_ARGS)3308 roff_T_(ROFF_ARGS)
3309 {
3310 
3311 	if (NULL == r->tbl)
3312 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3313 	else
3314 		tbl_restart(ln, ppos, r->tbl);
3315 
3316 	return ROFF_IGN;
3317 }
3318 
3319 /*
3320  * Handle in-line equation delimiters.
3321  */
3322 static int
roff_eqndelim(struct roff * r,struct buf * buf,int pos)3323 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3324 {
3325 	char		*cp1, *cp2;
3326 	const char	*bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3327 
3328 	/*
3329 	 * Outside equations, look for an opening delimiter.
3330 	 * If we are inside an equation, we already know it is
3331 	 * in-line, or this function wouldn't have been called;
3332 	 * so look for a closing delimiter.
3333 	 */
3334 
3335 	cp1 = buf->buf + pos;
3336 	cp2 = strchr(cp1, r->eqn == NULL ?
3337 	    r->last_eqn->odelim : r->last_eqn->cdelim);
3338 	if (cp2 == NULL)
3339 		return ROFF_CONT;
3340 
3341 	*cp2++ = '\0';
3342 	bef_pr = bef_nl = aft_nl = aft_pr = "";
3343 
3344 	/* Handle preceding text, protecting whitespace. */
3345 
3346 	if (*buf->buf != '\0') {
3347 		if (r->eqn == NULL)
3348 			bef_pr = "\\&";
3349 		bef_nl = "\n";
3350 	}
3351 
3352 	/*
3353 	 * Prepare replacing the delimiter with an equation macro
3354 	 * and drop leading white space from the equation.
3355 	 */
3356 
3357 	if (r->eqn == NULL) {
3358 		while (*cp2 == ' ')
3359 			cp2++;
3360 		mac = ".EQ";
3361 	} else
3362 		mac = ".EN";
3363 
3364 	/* Handle following text, protecting whitespace. */
3365 
3366 	if (*cp2 != '\0') {
3367 		aft_nl = "\n";
3368 		if (r->eqn != NULL)
3369 			aft_pr = "\\&";
3370 	}
3371 
3372 	/* Do the actual replacement. */
3373 
3374 	buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3375 	    bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3376 	free(buf->buf);
3377 	buf->buf = cp1;
3378 
3379 	/* Toggle the in-line state of the eqn subsystem. */
3380 
3381 	r->eqn_inline = r->eqn == NULL;
3382 	return ROFF_REPARSE;
3383 }
3384 
3385 static int
roff_EQ(ROFF_ARGS)3386 roff_EQ(ROFF_ARGS)
3387 {
3388 	struct roff_node	*n;
3389 
3390 	if (r->man->meta.macroset == MACROSET_MAN)
3391 		man_breakscope(r->man, ROFF_EQ);
3392 	n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3393 	if (ln > r->man->last->line)
3394 		n->flags |= NODE_LINE;
3395 	n->eqn = eqn_box_new();
3396 	roff_node_append(r->man, n);
3397 	r->man->next = ROFF_NEXT_SIBLING;
3398 
3399 	assert(r->eqn == NULL);
3400 	if (r->last_eqn == NULL)
3401 		r->last_eqn = eqn_alloc();
3402 	else
3403 		eqn_reset(r->last_eqn);
3404 	r->eqn = r->last_eqn;
3405 	r->eqn->node = n;
3406 
3407 	if (buf->buf[pos] != '\0')
3408 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3409 		    ".EQ %s", buf->buf + pos);
3410 
3411 	return ROFF_IGN;
3412 }
3413 
3414 static int
roff_EN(ROFF_ARGS)3415 roff_EN(ROFF_ARGS)
3416 {
3417 	if (r->eqn != NULL) {
3418 		eqn_parse(r->eqn);
3419 		r->eqn = NULL;
3420 	} else
3421 		mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3422 	if (buf->buf[pos] != '\0')
3423 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3424 		    "EN %s", buf->buf + pos);
3425 	return ROFF_IGN;
3426 }
3427 
3428 static int
roff_TS(ROFF_ARGS)3429 roff_TS(ROFF_ARGS)
3430 {
3431 	if (r->tbl != NULL) {
3432 		mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3433 		tbl_end(r->tbl, 0);
3434 	}
3435 	r->man->flags |= ROFF_NONOFILL;
3436 	r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3437 	if (r->last_tbl == NULL)
3438 		r->first_tbl = r->tbl;
3439 	r->last_tbl = r->tbl;
3440 	return ROFF_IGN;
3441 }
3442 
3443 static int
roff_noarg(ROFF_ARGS)3444 roff_noarg(ROFF_ARGS)
3445 {
3446 	if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3447 		man_breakscope(r->man, tok);
3448 	if (tok == ROFF_brp)
3449 		tok = ROFF_br;
3450 	roff_elem_alloc(r->man, ln, ppos, tok);
3451 	if (buf->buf[pos] != '\0')
3452 		mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3453 		   "%s %s", roff_name[tok], buf->buf + pos);
3454 	if (tok == ROFF_nf)
3455 		r->man->flags |= ROFF_NOFILL;
3456 	else if (tok == ROFF_fi)
3457 		r->man->flags &= ~ROFF_NOFILL;
3458 	r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3459 	r->man->next = ROFF_NEXT_SIBLING;
3460 	return ROFF_IGN;
3461 }
3462 
3463 static int
roff_onearg(ROFF_ARGS)3464 roff_onearg(ROFF_ARGS)
3465 {
3466 	struct roff_node	*n;
3467 	char			*cp;
3468 	int			 npos;
3469 
3470 	if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3471 	    (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3472 	     tok == ROFF_ti))
3473 		man_breakscope(r->man, tok);
3474 
3475 	if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3476 		r->man->last = roffce_node;
3477 		r->man->next = ROFF_NEXT_SIBLING;
3478 	}
3479 
3480 	roff_elem_alloc(r->man, ln, ppos, tok);
3481 	n = r->man->last;
3482 
3483 	cp = buf->buf + pos;
3484 	if (*cp != '\0') {
3485 		while (*cp != '\0' && *cp != ' ')
3486 			cp++;
3487 		while (*cp == ' ')
3488 			*cp++ = '\0';
3489 		if (*cp != '\0')
3490 			mandoc_msg(MANDOCERR_ARG_EXCESS,
3491 			    ln, (int)(cp - buf->buf),
3492 			    "%s ... %s", roff_name[tok], cp);
3493 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3494 	}
3495 
3496 	if (tok == ROFF_ce || tok == ROFF_rj) {
3497 		if (r->man->last->type == ROFFT_ELEM) {
3498 			roff_word_alloc(r->man, ln, pos, "1");
3499 			r->man->last->flags |= NODE_NOSRC;
3500 		}
3501 		npos = 0;
3502 		if (roff_evalnum(ln, r->man->last->string, &npos,
3503 		    &roffce_lines, '\0', 0) == 0) {
3504 			mandoc_msg(MANDOCERR_CE_NONUM,
3505 			    ln, pos, "ce %s", buf->buf + pos);
3506 			roffce_lines = 1;
3507 		}
3508 		if (roffce_lines < 1) {
3509 			r->man->last = r->man->last->parent;
3510 			roffce_node = NULL;
3511 			roffce_lines = 0;
3512 		} else
3513 			roffce_node = r->man->last->parent;
3514 	} else {
3515 		n->flags |= NODE_VALID | NODE_ENDED;
3516 		r->man->last = n;
3517 	}
3518 	n->flags |= NODE_LINE;
3519 	r->man->next = ROFF_NEXT_SIBLING;
3520 	return ROFF_IGN;
3521 }
3522 
3523 static int
roff_manyarg(ROFF_ARGS)3524 roff_manyarg(ROFF_ARGS)
3525 {
3526 	struct roff_node	*n;
3527 	char			*sp, *ep;
3528 
3529 	roff_elem_alloc(r->man, ln, ppos, tok);
3530 	n = r->man->last;
3531 
3532 	for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3533 		while (*ep != '\0' && *ep != ' ')
3534 			ep++;
3535 		while (*ep == ' ')
3536 			*ep++ = '\0';
3537 		roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3538 	}
3539 
3540 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3541 	r->man->last = n;
3542 	r->man->next = ROFF_NEXT_SIBLING;
3543 	return ROFF_IGN;
3544 }
3545 
3546 static int
roff_als(ROFF_ARGS)3547 roff_als(ROFF_ARGS)
3548 {
3549 	char		*oldn, *newn, *end, *value;
3550 	size_t		 oldsz, newsz, valsz;
3551 
3552 	newn = oldn = buf->buf + pos;
3553 	if (*newn == '\0')
3554 		return ROFF_IGN;
3555 
3556 	newsz = roff_getname(&oldn, ln, pos);
3557 	if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3558 		return ROFF_IGN;
3559 
3560 	end = oldn;
3561 	oldsz = roff_getname(&end, ln, oldn - buf->buf);
3562 	if (oldsz == 0)
3563 		return ROFF_IGN;
3564 
3565 	valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3566 	    (int)oldsz, oldn);
3567 	roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3568 	roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3569 	free(value);
3570 	return ROFF_IGN;
3571 }
3572 
3573 /*
3574  * The .break request only makes sense inside conditionals,
3575  * and that case is already handled in roff_cond_sub().
3576  */
3577 static int
roff_break(ROFF_ARGS)3578 roff_break(ROFF_ARGS)
3579 {
3580 	mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3581 	return ROFF_IGN;
3582 }
3583 
3584 static int
roff_cc(ROFF_ARGS)3585 roff_cc(ROFF_ARGS)
3586 {
3587 	const char	*p;
3588 
3589 	p = buf->buf + pos;
3590 
3591 	if (*p == '\0' || (r->control = *p++) == '.')
3592 		r->control = '\0';
3593 
3594 	if (*p != '\0')
3595 		mandoc_msg(MANDOCERR_ARG_EXCESS,
3596 		    ln, p - buf->buf, "cc ... %s", p);
3597 
3598 	return ROFF_IGN;
3599 }
3600 
3601 static int
roff_char(ROFF_ARGS)3602 roff_char(ROFF_ARGS)
3603 {
3604 	const char	*p, *kp, *vp;
3605 	size_t		 ksz, vsz;
3606 	int		 font;
3607 
3608 	/* Parse the character to be replaced. */
3609 
3610 	kp = buf->buf + pos;
3611 	p = kp + 1;
3612 	if (*kp == '\0' || (*kp == '\\' &&
3613 	     mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3614 	    (*p != ' ' && *p != '\0')) {
3615 		mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3616 		return ROFF_IGN;
3617 	}
3618 	ksz = p - kp;
3619 	while (*p == ' ')
3620 		p++;
3621 
3622 	/*
3623 	 * If the replacement string contains a font escape sequence,
3624 	 * we have to restore the font at the end.
3625 	 */
3626 
3627 	vp = p;
3628 	vsz = strlen(p);
3629 	font = 0;
3630 	while (*p != '\0') {
3631 		if (*p++ != '\\')
3632 			continue;
3633 		switch (mandoc_escape(&p, NULL, NULL)) {
3634 		case ESCAPE_FONT:
3635 		case ESCAPE_FONTROMAN:
3636 		case ESCAPE_FONTITALIC:
3637 		case ESCAPE_FONTBOLD:
3638 		case ESCAPE_FONTBI:
3639 		case ESCAPE_FONTCR:
3640 		case ESCAPE_FONTCB:
3641 		case ESCAPE_FONTCI:
3642 		case ESCAPE_FONTPREV:
3643 			font++;
3644 			break;
3645 		default:
3646 			break;
3647 		}
3648 	}
3649 	if (font > 1)
3650 		mandoc_msg(MANDOCERR_CHAR_FONT,
3651 		    ln, (int)(vp - buf->buf), "%s", vp);
3652 
3653 	/*
3654 	 * Approximate the effect of .char using the .tr tables.
3655 	 * XXX In groff, .char and .tr interact differently.
3656 	 */
3657 
3658 	if (ksz == 1) {
3659 		if (r->xtab == NULL)
3660 			r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3661 		assert((unsigned int)*kp < 128);
3662 		free(r->xtab[(int)*kp].p);
3663 		r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3664 		    "%s%s", vp, font ? "\fP" : "");
3665 	} else {
3666 		roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3667 		if (font)
3668 			roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3669 	}
3670 	return ROFF_IGN;
3671 }
3672 
3673 static int
roff_ec(ROFF_ARGS)3674 roff_ec(ROFF_ARGS)
3675 {
3676 	const char	*p;
3677 
3678 	p = buf->buf + pos;
3679 	if (*p == '\0')
3680 		r->escape = '\\';
3681 	else {
3682 		r->escape = *p;
3683 		if (*++p != '\0')
3684 			mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3685 			    (int)(p - buf->buf), "ec ... %s", p);
3686 	}
3687 	return ROFF_IGN;
3688 }
3689 
3690 static int
roff_eo(ROFF_ARGS)3691 roff_eo(ROFF_ARGS)
3692 {
3693 	r->escape = '\0';
3694 	if (buf->buf[pos] != '\0')
3695 		mandoc_msg(MANDOCERR_ARG_SKIP,
3696 		    ln, pos, "eo %s", buf->buf + pos);
3697 	return ROFF_IGN;
3698 }
3699 
3700 static int
roff_mc(ROFF_ARGS)3701 roff_mc(ROFF_ARGS)
3702 {
3703 	struct roff_node	*n;
3704 	char			*cp;
3705 
3706 	/* Parse the first argument. */
3707 
3708 	cp = buf->buf + pos;
3709 	if (*cp != '\0')
3710 		cp++;
3711 	if (buf->buf[pos] == '\\') {
3712 		switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3713 		case ESCAPE_SPECIAL:
3714 		case ESCAPE_UNICODE:
3715 		case ESCAPE_NUMBERED:
3716 			break;
3717 		default:
3718 			*cp = '\0';
3719 			mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3720 			    "mc %s", buf->buf + pos);
3721 			buf->buf[pos] = '\0';
3722 			break;
3723 		}
3724 	}
3725 
3726 	/* Ignore additional arguments. */
3727 
3728 	while (*cp == ' ')
3729 		*cp++ = '\0';
3730 	if (*cp != '\0') {
3731 		mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3732 		    "mc ... %s", cp);
3733 		*cp = '\0';
3734 	}
3735 
3736 	/* Create the .mc node. */
3737 
3738 	roff_elem_alloc(r->man, ln, ppos, tok);
3739 	n = r->man->last;
3740 	if (buf->buf[pos] != '\0')
3741 		roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3742 	n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3743 	r->man->last = n;
3744 	r->man->next = ROFF_NEXT_SIBLING;
3745 	return ROFF_IGN;
3746 }
3747 
3748 static int
roff_nop(ROFF_ARGS)3749 roff_nop(ROFF_ARGS)
3750 {
3751 	while (buf->buf[pos] == ' ')
3752 		pos++;
3753 	*offs = pos;
3754 	return ROFF_RERUN;
3755 }
3756 
3757 static int
roff_tr(ROFF_ARGS)3758 roff_tr(ROFF_ARGS)
3759 {
3760 	const char	*p, *first, *second;
3761 	size_t		 fsz, ssz;
3762 
3763 	p = buf->buf + pos;
3764 
3765 	if (*p == '\0') {
3766 		mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3767 		return ROFF_IGN;
3768 	}
3769 
3770 	while (*p != '\0') {
3771 		fsz = ssz = 1;
3772 
3773 		first = p++;
3774 		if (*first == '\\') {
3775 			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3776 				return ROFF_IGN;
3777 			fsz = (size_t)(p - first);
3778 		}
3779 
3780 		second = p++;
3781 		if (*second == '\\') {
3782 			if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3783 				return ROFF_IGN;
3784 			ssz = (size_t)(p - second);
3785 		} else if (*second == '\0') {
3786 			mandoc_msg(MANDOCERR_TR_ODD, ln,
3787 			    (int)(first - buf->buf), "tr %s", first);
3788 			second = " ";
3789 			p--;
3790 		}
3791 
3792 		if (fsz > 1) {
3793 			roff_setstrn(&r->xmbtab, first, fsz,
3794 			    second, ssz, 0);
3795 			continue;
3796 		}
3797 
3798 		if (r->xtab == NULL)
3799 			r->xtab = mandoc_calloc(128,
3800 			    sizeof(struct roffstr));
3801 
3802 		free(r->xtab[(int)*first].p);
3803 		r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3804 		r->xtab[(int)*first].sz = ssz;
3805 	}
3806 
3807 	return ROFF_IGN;
3808 }
3809 
3810 /*
3811  * Implementation of the .return request.
3812  * There is no need to call roff_userret() from here.
3813  * The read module will call that after rewinding the reader stack
3814  * to the place from where the current macro was called.
3815  */
3816 static int
roff_return(ROFF_ARGS)3817 roff_return(ROFF_ARGS)
3818 {
3819 	if (r->mstackpos >= 0)
3820 		return ROFF_IGN | ROFF_USERRET;
3821 
3822 	mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3823 	return ROFF_IGN;
3824 }
3825 
3826 static int
roff_rn(ROFF_ARGS)3827 roff_rn(ROFF_ARGS)
3828 {
3829 	const char	*value;
3830 	char		*oldn, *newn, *end;
3831 	size_t		 oldsz, newsz;
3832 	int		 deftype;
3833 
3834 	oldn = newn = buf->buf + pos;
3835 	if (*oldn == '\0')
3836 		return ROFF_IGN;
3837 
3838 	oldsz = roff_getname(&newn, ln, pos);
3839 	if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3840 		return ROFF_IGN;
3841 
3842 	end = newn;
3843 	newsz = roff_getname(&end, ln, newn - buf->buf);
3844 	if (newsz == 0)
3845 		return ROFF_IGN;
3846 
3847 	deftype = ROFFDEF_ANY;
3848 	value = roff_getstrn(r, oldn, oldsz, &deftype);
3849 	switch (deftype) {
3850 	case ROFFDEF_USER:
3851 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3852 		roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3853 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3854 		break;
3855 	case ROFFDEF_PRE:
3856 		roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3857 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3858 		break;
3859 	case ROFFDEF_REN:
3860 		roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3861 		roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3862 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3863 		break;
3864 	case ROFFDEF_STD:
3865 		roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3866 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3867 		break;
3868 	default:
3869 		roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3870 		roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3871 		break;
3872 	}
3873 	return ROFF_IGN;
3874 }
3875 
3876 static int
roff_shift(ROFF_ARGS)3877 roff_shift(ROFF_ARGS)
3878 {
3879 	struct mctx	*ctx;
3880 	int		 argpos, levels, i;
3881 
3882 	argpos = pos;
3883 	levels = 1;
3884 	if (buf->buf[pos] != '\0' &&
3885 	    roff_evalnum(ln, buf->buf, &pos, &levels, '\0', 0) == 0) {
3886 		mandoc_msg(MANDOCERR_CE_NONUM,
3887 		    ln, pos, "shift %s", buf->buf + pos);
3888 		levels = 1;
3889 	}
3890 	if (r->mstackpos < 0) {
3891 		mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3892 		return ROFF_IGN;
3893 	}
3894 	ctx = r->mstack + r->mstackpos;
3895 	if (levels > ctx->argc) {
3896 		mandoc_msg(MANDOCERR_SHIFT,
3897 		    ln, argpos, "%d, but max is %d", levels, ctx->argc);
3898 		levels = ctx->argc;
3899 	}
3900 	if (levels < 0) {
3901 		mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3902 		levels = 0;
3903 	}
3904 	if (levels == 0)
3905 		return ROFF_IGN;
3906 	for (i = 0; i < levels; i++)
3907 		free(ctx->argv[i]);
3908 	ctx->argc -= levels;
3909 	for (i = 0; i < ctx->argc; i++)
3910 		ctx->argv[i] = ctx->argv[i + levels];
3911 	return ROFF_IGN;
3912 }
3913 
3914 static int
roff_so(ROFF_ARGS)3915 roff_so(ROFF_ARGS)
3916 {
3917 	char *name, *cp;
3918 
3919 	name = buf->buf + pos;
3920 	mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3921 
3922 	/*
3923 	 * Handle `so'.  Be EXTREMELY careful, as we shouldn't be
3924 	 * opening anything that's not in our cwd or anything beneath
3925 	 * it.  Thus, explicitly disallow traversing up the file-system
3926 	 * or using absolute paths.
3927 	 */
3928 
3929 	if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3930 		mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3931 		buf->sz = mandoc_asprintf(&cp,
3932 		    ".sp\nSee the file %s.\n.sp", name) + 1;
3933 		free(buf->buf);
3934 		buf->buf = cp;
3935 		*offs = 0;
3936 		return ROFF_REPARSE;
3937 	}
3938 
3939 	*offs = pos;
3940 	return ROFF_SO;
3941 }
3942 
3943 /* --- user defined strings and macros ------------------------------------ */
3944 
3945 static int
roff_userdef(ROFF_ARGS)3946 roff_userdef(ROFF_ARGS)
3947 {
3948 	struct mctx	 *ctx;
3949 	char		 *arg, *ap, *dst, *src;
3950 	size_t		  sz;
3951 
3952 	/* If the macro is empty, ignore it altogether. */
3953 
3954 	if (*r->current_string == '\0')
3955 		return ROFF_IGN;
3956 
3957 	/* Initialize a new macro stack context. */
3958 
3959 	if (++r->mstackpos == r->mstacksz) {
3960 		r->mstack = mandoc_recallocarray(r->mstack,
3961 		    r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3962 		r->mstacksz += 8;
3963 	}
3964 	ctx = r->mstack + r->mstackpos;
3965 	ctx->argc = 0;
3966 
3967 	/*
3968 	 * Collect pointers to macro argument strings,
3969 	 * NUL-terminating them and escaping quotes.
3970 	 */
3971 
3972 	src = buf->buf + pos;
3973 	while (*src != '\0') {
3974 		if (ctx->argc == ctx->argsz) {
3975 			ctx->argsz += 8;
3976 			ctx->argv = mandoc_reallocarray(ctx->argv,
3977 			    ctx->argsz, sizeof(*ctx->argv));
3978 		}
3979 		arg = roff_getarg(r, &src, ln, &pos);
3980 		sz = 1;  /* For the terminating NUL. */
3981 		for (ap = arg; *ap != '\0'; ap++)
3982 			sz += *ap == '"' ? 4 : 1;
3983 		ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3984 		for (ap = arg; *ap != '\0'; ap++) {
3985 			if (*ap == '"') {
3986 				memcpy(dst, "\\(dq", 4);
3987 				dst += 4;
3988 			} else
3989 				*dst++ = *ap;
3990 		}
3991 		*dst = '\0';
3992 		free(arg);
3993 	}
3994 
3995 	/* Replace the macro invocation by the macro definition. */
3996 
3997 	free(buf->buf);
3998 	buf->buf = mandoc_strdup(r->current_string);
3999 	buf->sz = strlen(buf->buf) + 1;
4000 	*offs = 0;
4001 
4002 	return buf->buf[buf->sz - 2] == '\n' ?
4003 	    ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4004 }
4005 
4006 /*
4007  * Calling a high-level macro that was renamed with .rn.
4008  * r->current_string has already been set up by roff_parse().
4009  */
4010 static int
roff_renamed(ROFF_ARGS)4011 roff_renamed(ROFF_ARGS)
4012 {
4013 	char	*nbuf;
4014 
4015 	buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4016 	    buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4017 	free(buf->buf);
4018 	buf->buf = nbuf;
4019 	*offs = 0;
4020 	return ROFF_CONT;
4021 }
4022 
4023 /*
4024  * Measure the length in bytes of the roff identifier at *cpp
4025  * and advance the pointer to the next word.
4026  */
4027 static size_t
roff_getname(char ** cpp,int ln,int pos)4028 roff_getname(char **cpp, int ln, int pos)
4029 {
4030 	char	 *name, *cp;
4031 	int	  namesz, inam, iend;
4032 
4033 	name = *cpp;
4034 	if (*name == '\0')
4035 		return 0;
4036 
4037 	/* Advance cp to the byte after the end of the name. */
4038 
4039 	cp = name;
4040 	namesz = 0;
4041 	for (;;) {
4042 		if (*cp == '\0')
4043 			break;
4044 		if (*cp == ' ' || *cp == '\t') {
4045 			cp++;
4046 			break;
4047 		}
4048 		if (*cp != '\\') {
4049 			if (name + namesz < cp) {
4050 				name[namesz] = *cp;
4051 				*cp = ' ';
4052 			}
4053 			namesz++;
4054 			cp++;
4055 			continue;
4056 		}
4057 		if (cp[1] == '{' || cp[1] == '}')
4058 			break;
4059 		if (roff_escape(cp, 0, 0, NULL, &inam,
4060 		    NULL, NULL, &iend) != ESCAPE_UNDEF) {
4061 			mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4062 			    "%.*s%.*s", namesz, name, iend, cp);
4063 			cp += iend;
4064 			break;
4065 		}
4066 
4067 		/*
4068 		 * In an identifier, \\, \., \G and so on
4069 		 * are reduced to \, ., G and so on,
4070 		 * vaguely similar to copy mode.
4071 		 */
4072 
4073 		name[namesz++] = cp[inam];
4074 		while (iend--) {
4075 			if (cp >= name + namesz)
4076 				*cp = ' ';
4077 			cp++;
4078 		}
4079 	}
4080 
4081 	/* Read past spaces. */
4082 
4083 	while (*cp == ' ')
4084 		cp++;
4085 
4086 	*cpp = cp;
4087 	return namesz;
4088 }
4089 
4090 /*
4091  * Store *string into the user-defined string called *name.
4092  * To clear an existing entry, call with (*r, *name, NULL, 0).
4093  * append == 0: replace mode
4094  * append == 1: single-line append mode
4095  * append == 2: multiline append mode, append '\n' after each call
4096  */
4097 static void
roff_setstr(struct roff * r,const char * name,const char * string,int append)4098 roff_setstr(struct roff *r, const char *name, const char *string,
4099 	int append)
4100 {
4101 	size_t	 namesz;
4102 
4103 	namesz = strlen(name);
4104 	roff_setstrn(&r->strtab, name, namesz, string,
4105 	    string ? strlen(string) : 0, append);
4106 	roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4107 }
4108 
4109 static void
roff_setstrn(struct roffkv ** r,const char * name,size_t namesz,const char * string,size_t stringsz,int append)4110 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4111 		const char *string, size_t stringsz, int append)
4112 {
4113 	struct roffkv	*n;
4114 	char		*c;
4115 	int		 i;
4116 	size_t		 oldch, newch;
4117 
4118 	/* Search for an existing string with the same name. */
4119 	n = *r;
4120 
4121 	while (n && (namesz != n->key.sz ||
4122 			strncmp(n->key.p, name, namesz)))
4123 		n = n->next;
4124 
4125 	if (NULL == n) {
4126 		/* Create a new string table entry. */
4127 		n = mandoc_malloc(sizeof(struct roffkv));
4128 		n->key.p = mandoc_strndup(name, namesz);
4129 		n->key.sz = namesz;
4130 		n->val.p = NULL;
4131 		n->val.sz = 0;
4132 		n->next = *r;
4133 		*r = n;
4134 	} else if (0 == append) {
4135 		free(n->val.p);
4136 		n->val.p = NULL;
4137 		n->val.sz = 0;
4138 	}
4139 
4140 	if (NULL == string)
4141 		return;
4142 
4143 	/*
4144 	 * One additional byte for the '\n' in multiline mode,
4145 	 * and one for the terminating '\0'.
4146 	 */
4147 	newch = stringsz + (1 < append ? 2u : 1u);
4148 
4149 	if (NULL == n->val.p) {
4150 		n->val.p = mandoc_malloc(newch);
4151 		*n->val.p = '\0';
4152 		oldch = 0;
4153 	} else {
4154 		oldch = n->val.sz;
4155 		n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4156 	}
4157 
4158 	/* Skip existing content in the destination buffer. */
4159 	c = n->val.p + (int)oldch;
4160 
4161 	/* Append new content to the destination buffer. */
4162 	i = 0;
4163 	while (i < (int)stringsz) {
4164 		/*
4165 		 * Rudimentary roff copy mode:
4166 		 * Handle escaped backslashes.
4167 		 */
4168 		if ('\\' == string[i] && '\\' == string[i + 1])
4169 			i++;
4170 		*c++ = string[i++];
4171 	}
4172 
4173 	/* Append terminating bytes. */
4174 	if (1 < append)
4175 		*c++ = '\n';
4176 
4177 	*c = '\0';
4178 	n->val.sz = (int)(c - n->val.p);
4179 }
4180 
4181 static const char *
roff_getstrn(struct roff * r,const char * name,size_t len,int * deftype)4182 roff_getstrn(struct roff *r, const char *name, size_t len,
4183     int *deftype)
4184 {
4185 	const struct roffkv	*n;
4186 	int			 found, i;
4187 	enum roff_tok		 tok;
4188 
4189 	found = 0;
4190 	for (n = r->strtab; n != NULL; n = n->next) {
4191 		if (strncmp(name, n->key.p, len) != 0 ||
4192 		    n->key.p[len] != '\0' || n->val.p == NULL)
4193 			continue;
4194 		if (*deftype & ROFFDEF_USER) {
4195 			*deftype = ROFFDEF_USER;
4196 			return n->val.p;
4197 		} else {
4198 			found = 1;
4199 			break;
4200 		}
4201 	}
4202 	for (n = r->rentab; n != NULL; n = n->next) {
4203 		if (strncmp(name, n->key.p, len) != 0 ||
4204 		    n->key.p[len] != '\0' || n->val.p == NULL)
4205 			continue;
4206 		if (*deftype & ROFFDEF_REN) {
4207 			*deftype = ROFFDEF_REN;
4208 			return n->val.p;
4209 		} else {
4210 			found = 1;
4211 			break;
4212 		}
4213 	}
4214 	for (i = 0; i < PREDEFS_MAX; i++) {
4215 		if (strncmp(name, predefs[i].name, len) != 0 ||
4216 		    predefs[i].name[len] != '\0')
4217 			continue;
4218 		if (*deftype & ROFFDEF_PRE) {
4219 			*deftype = ROFFDEF_PRE;
4220 			return predefs[i].str;
4221 		} else {
4222 			found = 1;
4223 			break;
4224 		}
4225 	}
4226 	if (r->man->meta.macroset != MACROSET_MAN) {
4227 		for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4228 			if (strncmp(name, roff_name[tok], len) != 0 ||
4229 			    roff_name[tok][len] != '\0')
4230 				continue;
4231 			if (*deftype & ROFFDEF_STD) {
4232 				*deftype = ROFFDEF_STD;
4233 				return NULL;
4234 			} else {
4235 				found = 1;
4236 				break;
4237 			}
4238 		}
4239 	}
4240 	if (r->man->meta.macroset != MACROSET_MDOC) {
4241 		for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4242 			if (strncmp(name, roff_name[tok], len) != 0 ||
4243 			    roff_name[tok][len] != '\0')
4244 				continue;
4245 			if (*deftype & ROFFDEF_STD) {
4246 				*deftype = ROFFDEF_STD;
4247 				return NULL;
4248 			} else {
4249 				found = 1;
4250 				break;
4251 			}
4252 		}
4253 	}
4254 
4255 	if (found == 0 && *deftype != ROFFDEF_ANY) {
4256 		if (*deftype & ROFFDEF_REN) {
4257 			/*
4258 			 * This might still be a request,
4259 			 * so do not treat it as undefined yet.
4260 			 */
4261 			*deftype = ROFFDEF_UNDEF;
4262 			return NULL;
4263 		}
4264 
4265 		/* Using an undefined string defines it to be empty. */
4266 
4267 		roff_setstrn(&r->strtab, name, len, "", 0, 0);
4268 		roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4269 	}
4270 
4271 	*deftype = 0;
4272 	return NULL;
4273 }
4274 
4275 static void
roff_freestr(struct roffkv * r)4276 roff_freestr(struct roffkv *r)
4277 {
4278 	struct roffkv	 *n, *nn;
4279 
4280 	for (n = r; n; n = nn) {
4281 		free(n->key.p);
4282 		free(n->val.p);
4283 		nn = n->next;
4284 		free(n);
4285 	}
4286 }
4287 
4288 /* --- accessors and utility functions ------------------------------------ */
4289 
4290 /*
4291  * Duplicate an input string, making the appropriate character
4292  * conversations (as stipulated by `tr') along the way.
4293  * Returns a heap-allocated string with all the replacements made.
4294  */
4295 char *
roff_strdup(const struct roff * r,const char * p)4296 roff_strdup(const struct roff *r, const char *p)
4297 {
4298 	const struct roffkv *cp;
4299 	char		*res;
4300 	const char	*pp;
4301 	size_t		 ssz, sz;
4302 	enum mandoc_esc	 esc;
4303 
4304 	if (NULL == r->xmbtab && NULL == r->xtab)
4305 		return mandoc_strdup(p);
4306 	else if ('\0' == *p)
4307 		return mandoc_strdup("");
4308 
4309 	/*
4310 	 * Step through each character looking for term matches
4311 	 * (remember that a `tr' can be invoked with an escape, which is
4312 	 * a glyph but the escape is multi-character).
4313 	 * We only do this if the character hash has been initialised
4314 	 * and the string is >0 length.
4315 	 */
4316 
4317 	res = NULL;
4318 	ssz = 0;
4319 
4320 	while ('\0' != *p) {
4321 		assert((unsigned int)*p < 128);
4322 		if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4323 			sz = r->xtab[(int)*p].sz;
4324 			res = mandoc_realloc(res, ssz + sz + 1);
4325 			memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4326 			ssz += sz;
4327 			p++;
4328 			continue;
4329 		} else if ('\\' != *p) {
4330 			res = mandoc_realloc(res, ssz + 2);
4331 			res[ssz++] = *p++;
4332 			continue;
4333 		}
4334 
4335 		/* Search for term matches. */
4336 		for (cp = r->xmbtab; cp; cp = cp->next)
4337 			if (0 == strncmp(p, cp->key.p, cp->key.sz))
4338 				break;
4339 
4340 		if (NULL != cp) {
4341 			/*
4342 			 * A match has been found.
4343 			 * Append the match to the array and move
4344 			 * forward by its keysize.
4345 			 */
4346 			res = mandoc_realloc(res,
4347 			    ssz + cp->val.sz + 1);
4348 			memcpy(res + ssz, cp->val.p, cp->val.sz);
4349 			ssz += cp->val.sz;
4350 			p += (int)cp->key.sz;
4351 			continue;
4352 		}
4353 
4354 		/*
4355 		 * Handle escapes carefully: we need to copy
4356 		 * over just the escape itself, or else we might
4357 		 * do replacements within the escape itself.
4358 		 * Make sure to pass along the bogus string.
4359 		 */
4360 		pp = p++;
4361 		esc = mandoc_escape(&p, NULL, NULL);
4362 		if (ESCAPE_ERROR == esc) {
4363 			sz = strlen(pp);
4364 			res = mandoc_realloc(res, ssz + sz + 1);
4365 			memcpy(res + ssz, pp, sz);
4366 			break;
4367 		}
4368 		/*
4369 		 * We bail out on bad escapes.
4370 		 * No need to warn: we already did so when
4371 		 * roff_expand() was called.
4372 		 */
4373 		sz = (int)(p - pp);
4374 		res = mandoc_realloc(res, ssz + sz + 1);
4375 		memcpy(res + ssz, pp, sz);
4376 		ssz += sz;
4377 	}
4378 
4379 	res[(int)ssz] = '\0';
4380 	return res;
4381 }
4382 
4383 int
roff_getformat(const struct roff * r)4384 roff_getformat(const struct roff *r)
4385 {
4386 
4387 	return r->format;
4388 }
4389 
4390 /*
4391  * Find out whether a line is a macro line or not.
4392  * If it is, adjust the current position and return one; if it isn't,
4393  * return zero and don't change the current position.
4394  * If the control character has been set with `.cc', then let that grain
4395  * precedence.
4396  * This is slightly contrary to groff, where using the non-breaking
4397  * control character when `cc' has been invoked will cause the
4398  * non-breaking macro contents to be printed verbatim.
4399  */
4400 int
roff_getcontrol(const struct roff * r,const char * cp,int * ppos)4401 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4402 {
4403 	int		pos;
4404 
4405 	pos = *ppos;
4406 
4407 	if (r->control != '\0' && cp[pos] == r->control)
4408 		pos++;
4409 	else if (r->control != '\0')
4410 		return 0;
4411 	else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4412 		pos += 2;
4413 	else if ('.' == cp[pos] || '\'' == cp[pos])
4414 		pos++;
4415 	else
4416 		return 0;
4417 
4418 	while (' ' == cp[pos] || '\t' == cp[pos])
4419 		pos++;
4420 
4421 	*ppos = pos;
4422 	return 1;
4423 }
4424