1 /* $OpenBSD: roff.c,v 1.276 2025/01/06 18:48:13 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2025 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40
41 /* Maximum number of string expansions per line, to break infinite loops. */
42 #define EXPAND_LIMIT 1000
43
44 /* Types of definitions of macros and strings. */
45 #define ROFFDEF_USER (1 << 1) /* User-defined. */
46 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
47 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
48 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
49 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
50 ROFFDEF_REN | ROFFDEF_STD)
51 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
52
53 /* --- data types --------------------------------------------------------- */
54
55 /*
56 * An incredibly-simple string buffer.
57 */
58 struct roffstr {
59 char *p; /* nil-terminated buffer */
60 size_t sz; /* saved strlen(p) */
61 };
62
63 /*
64 * A key-value roffstr pair as part of a singly-linked list.
65 */
66 struct roffkv {
67 struct roffstr key;
68 struct roffstr val;
69 struct roffkv *next; /* next in list */
70 };
71
72 /*
73 * A single number register as part of a singly-linked list.
74 */
75 struct roffreg {
76 struct roffstr key;
77 int val;
78 int step;
79 struct roffreg *next;
80 };
81
82 /*
83 * Association of request and macro names with token IDs.
84 */
85 struct roffreq {
86 enum roff_tok tok;
87 char name[];
88 };
89
90 /*
91 * A macro processing context.
92 * More than one is needed when macro calls are nested.
93 */
94 struct mctx {
95 char **argv;
96 int argc;
97 int argsz;
98 };
99
100 struct roff {
101 struct roff_man *man; /* mdoc or man parser */
102 struct roffnode *last; /* leaf of stack */
103 struct mctx *mstack; /* stack of macro contexts */
104 int *rstack; /* stack of inverted `ie' values */
105 struct ohash *reqtab; /* request lookup table */
106 struct roffreg *regtab; /* number registers */
107 struct roffkv *strtab; /* user-defined strings & macros */
108 struct roffkv *rentab; /* renamed strings & macros */
109 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
110 struct roffstr *xtab; /* single-byte trans table (`tr') */
111 const char *current_string; /* value of last called user macro */
112 struct tbl_node *first_tbl; /* first table parsed */
113 struct tbl_node *last_tbl; /* last table parsed */
114 struct tbl_node *tbl; /* current table being parsed */
115 struct eqn_node *last_eqn; /* equation parser */
116 struct eqn_node *eqn; /* active equation parser */
117 int eqn_inline; /* current equation is inline */
118 int options; /* parse options */
119 int mstacksz; /* current size of mstack */
120 int mstackpos; /* position in mstack */
121 int rstacksz; /* current size limit of rstack */
122 int rstackpos; /* position in rstack */
123 int format; /* current file in mdoc or man format */
124 char control; /* control character */
125 char escape; /* escape character */
126 };
127
128 /*
129 * A macro definition, condition, or ignored block.
130 */
131 struct roffnode {
132 enum roff_tok tok; /* type of node */
133 struct roffnode *parent; /* up one in stack */
134 int line; /* parse line */
135 int col; /* parse col */
136 char *name; /* node name, e.g. macro name */
137 char *end; /* custom end macro of the block */
138 int endspan; /* scope to: 1=eol 2=next line -1=\} */
139 int rule; /* content is: 1=evaluated 0=skipped */
140 };
141
142 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
143 enum roff_tok tok, /* tok of macro */ \
144 struct buf *buf, /* input buffer */ \
145 int ln, /* parse line */ \
146 int ppos, /* original pos in buffer */ \
147 int pos, /* current pos in buffer */ \
148 int *offs /* reset offset of buffer data */
149
150 typedef int (*roffproc)(ROFF_ARGS);
151
152 struct roffmac {
153 roffproc proc; /* process new macro */
154 roffproc text; /* process as child text of macro */
155 roffproc sub; /* process as child of macro */
156 int flags;
157 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
158 };
159
160 struct predef {
161 const char *name; /* predefined input name */
162 const char *str; /* replacement symbol */
163 };
164
165 #define PREDEF(__name, __str) \
166 { (__name), (__str) },
167
168 /* --- function prototypes ------------------------------------------------ */
169
170 static int roffnode_cleanscope(struct roff *);
171 static int roffnode_pop(struct roff *);
172 static void roffnode_push(struct roff *, enum roff_tok,
173 const char *, int, int);
174 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
175 static int roff_als(ROFF_ARGS);
176 static int roff_block(ROFF_ARGS);
177 static int roff_block_text(ROFF_ARGS);
178 static int roff_block_sub(ROFF_ARGS);
179 static int roff_break(ROFF_ARGS);
180 static int roff_cblock(ROFF_ARGS);
181 static int roff_cc(ROFF_ARGS);
182 static int roff_ccond(struct roff *, int, int);
183 static int roff_char(ROFF_ARGS);
184 static int roff_cond(ROFF_ARGS);
185 static int roff_cond_checkend(ROFF_ARGS);
186 static int roff_cond_text(ROFF_ARGS);
187 static int roff_cond_sub(ROFF_ARGS);
188 static int roff_ds(ROFF_ARGS);
189 static int roff_ec(ROFF_ARGS);
190 static int roff_eo(ROFF_ARGS);
191 static int roff_eqndelim(struct roff *, struct buf *, int);
192 static int roff_evalcond(struct roff *, int, char *, int *);
193 static int roff_evalpar(int, const char *, int *, int *,
194 char, int);
195 static int roff_evalstrcond(const char *, int *);
196 static int roff_expand(struct roff *, struct buf *,
197 int, int, char);
198 static void roff_expand_patch(struct buf *, int,
199 const char *, int);
200 static void roff_free1(struct roff *);
201 static void roff_freereg(struct roffreg *);
202 static void roff_freestr(struct roffkv *);
203 static size_t roff_getname(char **, int, int);
204 static int roff_getnum(const char *, int *, int *, char, int);
205 static int roff_getop(const char *, int *, char *);
206 static int roff_getregn(struct roff *,
207 const char *, size_t, char);
208 static int roff_getregro(const struct roff *,
209 const char *name);
210 static const char *roff_getstrn(struct roff *,
211 const char *, size_t, int *);
212 static int roff_hasregn(const struct roff *,
213 const char *, size_t);
214 static int roff_insec(ROFF_ARGS);
215 static int roff_it(ROFF_ARGS);
216 static int roff_line_ignore(ROFF_ARGS);
217 static void roff_man_alloc1(struct roff_man *);
218 static void roff_man_free1(struct roff_man *);
219 static int roff_manyarg(ROFF_ARGS);
220 static int roff_mc(ROFF_ARGS);
221 static int roff_noarg(ROFF_ARGS);
222 static int roff_nop(ROFF_ARGS);
223 static int roff_nr(ROFF_ARGS);
224 static int roff_onearg(ROFF_ARGS);
225 static enum roff_tok roff_parse(struct roff *, char *, int *,
226 int, int);
227 static int roff_parse_comment(struct roff *, struct buf *,
228 int, int, char);
229 static int roff_parsetext(struct roff *, struct buf *,
230 int, int *);
231 static int roff_renamed(ROFF_ARGS);
232 static int roff_req_or_macro(ROFF_ARGS);
233 static int roff_return(ROFF_ARGS);
234 static int roff_rm(ROFF_ARGS);
235 static int roff_rn(ROFF_ARGS);
236 static int roff_rr(ROFF_ARGS);
237 static void roff_setregn(struct roff *, const char *,
238 size_t, int, char, int);
239 static void roff_setstr(struct roff *,
240 const char *, const char *, int);
241 static void roff_setstrn(struct roffkv **, const char *,
242 size_t, const char *, size_t, int);
243 static int roff_shift(ROFF_ARGS);
244 static int roff_so(ROFF_ARGS);
245 static int roff_tr(ROFF_ARGS);
246 static int roff_Dd(ROFF_ARGS);
247 static int roff_TE(ROFF_ARGS);
248 static int roff_TS(ROFF_ARGS);
249 static int roff_EQ(ROFF_ARGS);
250 static int roff_EN(ROFF_ARGS);
251 static int roff_T_(ROFF_ARGS);
252 static int roff_unsupp(ROFF_ARGS);
253 static int roff_userdef(ROFF_ARGS);
254
255 /* --- constant data ------------------------------------------------------ */
256
257 const char *__roff_name[MAN_MAX + 1] = {
258 "br", "ce", "fi", "ft",
259 "ll", "mc", "nf",
260 "po", "rj", "sp",
261 "ta", "ti", NULL,
262 "ab", "ad", "af", "aln",
263 "als", "am", "am1", "ami",
264 "ami1", "as", "as1", "asciify",
265 "backtrace", "bd", "bleedat", "blm",
266 "box", "boxa", "bp", "BP",
267 "break", "breakchar", "brnl", "brp",
268 "brpnl", "c2", "cc",
269 "cf", "cflags", "ch", "char",
270 "chop", "class", "close", "CL",
271 "color", "composite", "continue", "cp",
272 "cropat", "cs", "cu", "da",
273 "dch", "Dd", "de", "de1",
274 "defcolor", "dei", "dei1", "device",
275 "devicem", "di", "do", "ds",
276 "ds1", "dwh", "dt", "ec",
277 "ecr", "ecs", "el", "em",
278 "EN", "eo", "EP", "EQ",
279 "errprint", "ev", "evc", "ex",
280 "fallback", "fam", "fc", "fchar",
281 "fcolor", "fdeferlig", "feature", "fkern",
282 "fl", "flig", "fp", "fps",
283 "fschar", "fspacewidth", "fspecial", "ftr",
284 "fzoom", "gcolor", "hc", "hcode",
285 "hidechar", "hla", "hlm", "hpf",
286 "hpfa", "hpfcode", "hw", "hy",
287 "hylang", "hylen", "hym", "hypp",
288 "hys", "ie", "if", "ig",
289 "index", "it", "itc", "IX",
290 "kern", "kernafter", "kernbefore", "kernpair",
291 "lc", "lc_ctype", "lds", "length",
292 "letadj", "lf", "lg", "lhang",
293 "linetabs", "lnr", "lnrf", "lpfx",
294 "ls", "lsm", "lt",
295 "mediasize", "minss", "mk", "mso",
296 "na", "ne", "nh", "nhychar",
297 "nm", "nn", "nop", "nr",
298 "nrf", "nroff", "ns", "nx",
299 "open", "opena", "os", "output",
300 "padj", "papersize", "pc", "pev",
301 "pi", "PI", "pl", "pm",
302 "pn", "pnr", "ps",
303 "psbb", "pshape", "pso", "ptr",
304 "pvs", "rchar", "rd", "recursionlimit",
305 "return", "rfschar", "rhang",
306 "rm", "rn", "rnn", "rr",
307 "rs", "rt", "schar", "sentchar",
308 "shc", "shift", "sizes", "so",
309 "spacewidth", "special", "spreadwarn", "ss",
310 "sty", "substring", "sv", "sy",
311 "T&", "tc", "TE",
312 "TH", "tkf", "tl",
313 "tm", "tm1", "tmc", "tr",
314 "track", "transchar", "trf", "trimat",
315 "trin", "trnt", "troff", "TS",
316 "uf", "ul", "unformat", "unwatch",
317 "unwatchn", "vpt", "vs", "warn",
318 "warnscale", "watch", "watchlength", "watchn",
319 "wh", "while", "write", "writec",
320 "writem", "xflag", ".", NULL,
321 NULL, "text",
322 "Dd", "Dt", "Os", "Sh",
323 "Ss", "Pp", "D1", "Dl",
324 "Bd", "Ed", "Bl", "El",
325 "It", "Ad", "An", "Ap",
326 "Ar", "Cd", "Cm", "Dv",
327 "Er", "Ev", "Ex", "Fa",
328 "Fd", "Fl", "Fn", "Ft",
329 "Ic", "In", "Li", "Nd",
330 "Nm", "Op", "Ot", "Pa",
331 "Rv", "St", "Va", "Vt",
332 "Xr", "%A", "%B", "%D",
333 "%I", "%J", "%N", "%O",
334 "%P", "%R", "%T", "%V",
335 "Ac", "Ao", "Aq", "At",
336 "Bc", "Bf", "Bo", "Bq",
337 "Bsx", "Bx", "Db", "Dc",
338 "Do", "Dq", "Ec", "Ef",
339 "Em", "Eo", "Fx", "Ms",
340 "No", "Ns", "Nx", "Ox",
341 "Pc", "Pf", "Po", "Pq",
342 "Qc", "Ql", "Qo", "Qq",
343 "Re", "Rs", "Sc", "So",
344 "Sq", "Sm", "Sx", "Sy",
345 "Tn", "Ux", "Xc", "Xo",
346 "Fo", "Fc", "Oo", "Oc",
347 "Bk", "Ek", "Bt", "Hf",
348 "Fr", "Ud", "Lb", "Lp",
349 "Lk", "Mt", "Brq", "Bro",
350 "Brc", "%C", "Es", "En",
351 "Dx", "%Q", "%U", "Ta",
352 "Tg", NULL,
353 "TH", "SH", "SS", "TP",
354 "TQ",
355 "LP", "PP", "P", "IP",
356 "HP", "SM", "SB", "BI",
357 "IB", "BR", "RB", "R",
358 "B", "I", "IR", "RI",
359 "RE", "RS", "DT", "UC",
360 "PD", "AT", "in",
361 "SY", "YS", "OP",
362 "EX", "EE", "UR",
363 "UE", "MT", "ME", "MR",
364 NULL
365 };
366 const char *const *roff_name = __roff_name;
367
368 static struct roffmac roffs[TOKEN_NONE] = {
369 { roff_noarg, NULL, NULL, 0 }, /* br */
370 { roff_onearg, NULL, NULL, 0 }, /* ce */
371 { roff_noarg, NULL, NULL, 0 }, /* fi */
372 { roff_onearg, NULL, NULL, 0 }, /* ft */
373 { roff_onearg, NULL, NULL, 0 }, /* ll */
374 { roff_mc, NULL, NULL, 0 }, /* mc */
375 { roff_noarg, NULL, NULL, 0 }, /* nf */
376 { roff_onearg, NULL, NULL, 0 }, /* po */
377 { roff_onearg, NULL, NULL, 0 }, /* rj */
378 { roff_onearg, NULL, NULL, 0 }, /* sp */
379 { roff_manyarg, NULL, NULL, 0 }, /* ta */
380 { roff_onearg, NULL, NULL, 0 }, /* ti */
381 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
382 { roff_unsupp, NULL, NULL, 0 }, /* ab */
383 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
384 { roff_line_ignore, NULL, NULL, 0 }, /* af */
385 { roff_unsupp, NULL, NULL, 0 }, /* aln */
386 { roff_als, NULL, NULL, 0 }, /* als */
387 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
388 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
389 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
390 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
391 { roff_ds, NULL, NULL, 0 }, /* as */
392 { roff_ds, NULL, NULL, 0 }, /* as1 */
393 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
394 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
395 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
396 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
397 { roff_unsupp, NULL, NULL, 0 }, /* blm */
398 { roff_unsupp, NULL, NULL, 0 }, /* box */
399 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
400 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
401 { roff_unsupp, NULL, NULL, 0 }, /* BP */
402 { roff_break, NULL, NULL, 0 }, /* break */
403 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
404 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
405 { roff_noarg, NULL, NULL, 0 }, /* brp */
406 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
407 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
408 { roff_cc, NULL, NULL, 0 }, /* cc */
409 { roff_insec, NULL, NULL, 0 }, /* cf */
410 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
411 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
412 { roff_char, NULL, NULL, 0 }, /* char */
413 { roff_unsupp, NULL, NULL, 0 }, /* chop */
414 { roff_line_ignore, NULL, NULL, 0 }, /* class */
415 { roff_insec, NULL, NULL, 0 }, /* close */
416 { roff_unsupp, NULL, NULL, 0 }, /* CL */
417 { roff_line_ignore, NULL, NULL, 0 }, /* color */
418 { roff_unsupp, NULL, NULL, 0 }, /* composite */
419 { roff_unsupp, NULL, NULL, 0 }, /* continue */
420 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
421 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
422 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
423 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
424 { roff_unsupp, NULL, NULL, 0 }, /* da */
425 { roff_unsupp, NULL, NULL, 0 }, /* dch */
426 { roff_Dd, NULL, NULL, 0 }, /* Dd */
427 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
428 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
429 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
430 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
431 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
432 { roff_unsupp, NULL, NULL, 0 }, /* device */
433 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
434 { roff_unsupp, NULL, NULL, 0 }, /* di */
435 { roff_unsupp, NULL, NULL, 0 }, /* do */
436 { roff_ds, NULL, NULL, 0 }, /* ds */
437 { roff_ds, NULL, NULL, 0 }, /* ds1 */
438 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
439 { roff_unsupp, NULL, NULL, 0 }, /* dt */
440 { roff_ec, NULL, NULL, 0 }, /* ec */
441 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
442 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
443 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
444 { roff_unsupp, NULL, NULL, 0 }, /* em */
445 { roff_EN, NULL, NULL, 0 }, /* EN */
446 { roff_eo, NULL, NULL, 0 }, /* eo */
447 { roff_unsupp, NULL, NULL, 0 }, /* EP */
448 { roff_EQ, NULL, NULL, 0 }, /* EQ */
449 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
450 { roff_unsupp, NULL, NULL, 0 }, /* ev */
451 { roff_unsupp, NULL, NULL, 0 }, /* evc */
452 { roff_unsupp, NULL, NULL, 0 }, /* ex */
453 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
454 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
455 { roff_unsupp, NULL, NULL, 0 }, /* fc */
456 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
457 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
458 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
459 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
460 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
461 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
462 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
463 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
464 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
465 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
467 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
468 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
470 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
471 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
472 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
473 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
474 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
475 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
476 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
477 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
478 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
486 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
487 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
488 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
489 { roff_unsupp, NULL, NULL, 0 }, /* index */
490 { roff_it, NULL, NULL, 0 }, /* it */
491 { roff_unsupp, NULL, NULL, 0 }, /* itc */
492 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
493 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
494 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
495 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
496 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
497 { roff_unsupp, NULL, NULL, 0 }, /* lc */
498 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
499 { roff_unsupp, NULL, NULL, 0 }, /* lds */
500 { roff_unsupp, NULL, NULL, 0 }, /* length */
501 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
502 { roff_insec, NULL, NULL, 0 }, /* lf */
503 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
504 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
505 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
506 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
507 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
508 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
509 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
510 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
511 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
512 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
513 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
514 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
515 { roff_insec, NULL, NULL, 0 }, /* mso */
516 { roff_line_ignore, NULL, NULL, 0 }, /* na */
517 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
518 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
519 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
520 { roff_unsupp, NULL, NULL, 0 }, /* nm */
521 { roff_unsupp, NULL, NULL, 0 }, /* nn */
522 { roff_nop, NULL, NULL, 0 }, /* nop */
523 { roff_nr, NULL, NULL, 0 }, /* nr */
524 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
525 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
526 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
527 { roff_insec, NULL, NULL, 0 }, /* nx */
528 { roff_insec, NULL, NULL, 0 }, /* open */
529 { roff_insec, NULL, NULL, 0 }, /* opena */
530 { roff_line_ignore, NULL, NULL, 0 }, /* os */
531 { roff_unsupp, NULL, NULL, 0 }, /* output */
532 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
533 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
534 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
535 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
536 { roff_insec, NULL, NULL, 0 }, /* pi */
537 { roff_unsupp, NULL, NULL, 0 }, /* PI */
538 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
539 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
540 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
541 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
542 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
543 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
544 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
545 { roff_insec, NULL, NULL, 0 }, /* pso */
546 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
547 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
548 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
549 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
550 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
551 { roff_return, NULL, NULL, 0 }, /* return */
552 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
553 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
554 { roff_rm, NULL, NULL, 0 }, /* rm */
555 { roff_rn, NULL, NULL, 0 }, /* rn */
556 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
557 { roff_rr, NULL, NULL, 0 }, /* rr */
558 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
559 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
560 { roff_unsupp, NULL, NULL, 0 }, /* schar */
561 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
562 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
563 { roff_shift, NULL, NULL, 0 }, /* shift */
564 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
565 { roff_so, NULL, NULL, 0 }, /* so */
566 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
567 { roff_line_ignore, NULL, NULL, 0 }, /* special */
568 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
569 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
570 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
571 { roff_unsupp, NULL, NULL, 0 }, /* substring */
572 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
573 { roff_insec, NULL, NULL, 0 }, /* sy */
574 { roff_T_, NULL, NULL, 0 }, /* T& */
575 { roff_unsupp, NULL, NULL, 0 }, /* tc */
576 { roff_TE, NULL, NULL, 0 }, /* TE */
577 { roff_Dd, NULL, NULL, 0 }, /* TH */
578 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
579 { roff_unsupp, NULL, NULL, 0 }, /* tl */
580 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
581 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
582 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
583 { roff_tr, NULL, NULL, 0 }, /* tr */
584 { roff_line_ignore, NULL, NULL, 0 }, /* track */
585 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
586 { roff_insec, NULL, NULL, 0 }, /* trf */
587 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
588 { roff_unsupp, NULL, NULL, 0 }, /* trin */
589 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
590 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
591 { roff_TS, NULL, NULL, 0 }, /* TS */
592 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
593 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
594 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
595 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
596 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
597 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
598 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
599 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
600 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
601 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
602 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
603 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
604 { roff_unsupp, NULL, NULL, 0 }, /* wh */
605 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
606 { roff_insec, NULL, NULL, 0 }, /* write */
607 { roff_insec, NULL, NULL, 0 }, /* writec */
608 { roff_insec, NULL, NULL, 0 }, /* writem */
609 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
610 { roff_cblock, NULL, NULL, 0 }, /* . */
611 { roff_renamed, NULL, NULL, 0 },
612 { roff_userdef, NULL, NULL, 0 }
613 };
614
615 /* Array of injected predefined strings. */
616 #define PREDEFS_MAX 38
617 static const struct predef predefs[PREDEFS_MAX] = {
618 #include "predefs.in"
619 };
620
621 static int roffce_lines; /* number of input lines to center */
622 static struct roff_node *roffce_node; /* active request */
623 static int roffit_lines; /* number of lines to delay */
624 static char *roffit_macro; /* nil-terminated macro line */
625
626
627 /* --- request table ------------------------------------------------------ */
628
629 struct ohash *
roffhash_alloc(enum roff_tok mintok,enum roff_tok maxtok)630 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
631 {
632 struct ohash *htab;
633 struct roffreq *req;
634 enum roff_tok tok;
635 size_t sz;
636 unsigned int slot;
637
638 htab = mandoc_malloc(sizeof(*htab));
639 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
640
641 for (tok = mintok; tok < maxtok; tok++) {
642 if (roff_name[tok] == NULL)
643 continue;
644 sz = strlen(roff_name[tok]);
645 req = mandoc_malloc(sizeof(*req) + sz + 1);
646 req->tok = tok;
647 memcpy(req->name, roff_name[tok], sz + 1);
648 slot = ohash_qlookup(htab, req->name);
649 ohash_insert(htab, slot, req);
650 }
651 return htab;
652 }
653
654 void
roffhash_free(struct ohash * htab)655 roffhash_free(struct ohash *htab)
656 {
657 struct roffreq *req;
658 unsigned int slot;
659
660 if (htab == NULL)
661 return;
662 for (req = ohash_first(htab, &slot); req != NULL;
663 req = ohash_next(htab, &slot))
664 free(req);
665 ohash_delete(htab);
666 free(htab);
667 }
668
669 enum roff_tok
roffhash_find(struct ohash * htab,const char * name,size_t sz)670 roffhash_find(struct ohash *htab, const char *name, size_t sz)
671 {
672 struct roffreq *req;
673 const char *end;
674
675 if (sz) {
676 end = name + sz;
677 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
678 } else
679 req = ohash_find(htab, ohash_qlookup(htab, name));
680 return req == NULL ? TOKEN_NONE : req->tok;
681 }
682
683 /* --- stack of request blocks -------------------------------------------- */
684
685 /*
686 * Pop the current node off of the stack of roff instructions currently
687 * pending. Return 1 if it is a loop or 0 otherwise.
688 */
689 static int
roffnode_pop(struct roff * r)690 roffnode_pop(struct roff *r)
691 {
692 struct roffnode *p;
693 int inloop;
694
695 p = r->last;
696 inloop = p->tok == ROFF_while;
697 r->last = p->parent;
698 free(p->name);
699 free(p->end);
700 free(p);
701 return inloop;
702 }
703
704 /*
705 * Push a roff node onto the instruction stack. This must later be
706 * removed with roffnode_pop().
707 */
708 static void
roffnode_push(struct roff * r,enum roff_tok tok,const char * name,int line,int col)709 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
710 int line, int col)
711 {
712 struct roffnode *p;
713
714 p = mandoc_calloc(1, sizeof(struct roffnode));
715 p->tok = tok;
716 if (name)
717 p->name = mandoc_strdup(name);
718 p->parent = r->last;
719 p->line = line;
720 p->col = col;
721 p->rule = p->parent ? p->parent->rule : 0;
722
723 r->last = p;
724 }
725
726 /* --- roff parser state data management ---------------------------------- */
727
728 static void
roff_free1(struct roff * r)729 roff_free1(struct roff *r)
730 {
731 int i;
732
733 tbl_free(r->first_tbl);
734 r->first_tbl = r->last_tbl = r->tbl = NULL;
735
736 eqn_free(r->last_eqn);
737 r->last_eqn = r->eqn = NULL;
738
739 while (r->mstackpos >= 0)
740 roff_userret(r);
741
742 while (r->last)
743 roffnode_pop(r);
744
745 free (r->rstack);
746 r->rstack = NULL;
747 r->rstacksz = 0;
748 r->rstackpos = -1;
749
750 roff_freereg(r->regtab);
751 r->regtab = NULL;
752
753 roff_freestr(r->strtab);
754 roff_freestr(r->rentab);
755 roff_freestr(r->xmbtab);
756 r->strtab = r->rentab = r->xmbtab = NULL;
757
758 if (r->xtab)
759 for (i = 0; i < 128; i++)
760 free(r->xtab[i].p);
761 free(r->xtab);
762 r->xtab = NULL;
763 }
764
765 void
roff_reset(struct roff * r)766 roff_reset(struct roff *r)
767 {
768 roff_free1(r);
769 r->options |= MPARSE_COMMENT;
770 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
771 r->control = '\0';
772 r->escape = '\\';
773 roffce_lines = 0;
774 roffce_node = NULL;
775 roffit_lines = 0;
776 roffit_macro = NULL;
777 }
778
779 void
roff_free(struct roff * r)780 roff_free(struct roff *r)
781 {
782 int i;
783
784 roff_free1(r);
785 for (i = 0; i < r->mstacksz; i++)
786 free(r->mstack[i].argv);
787 free(r->mstack);
788 roffhash_free(r->reqtab);
789 free(r);
790 }
791
792 struct roff *
roff_alloc(int options)793 roff_alloc(int options)
794 {
795 struct roff *r;
796
797 r = mandoc_calloc(1, sizeof(struct roff));
798 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
799 r->options = options | MPARSE_COMMENT;
800 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
801 r->mstackpos = -1;
802 r->rstackpos = -1;
803 r->escape = '\\';
804 return r;
805 }
806
807 /* --- syntax tree state data management ---------------------------------- */
808
809 static void
roff_man_free1(struct roff_man * man)810 roff_man_free1(struct roff_man *man)
811 {
812 if (man->meta.first != NULL)
813 roff_node_delete(man, man->meta.first);
814 free(man->meta.msec);
815 free(man->meta.vol);
816 free(man->meta.os);
817 free(man->meta.arch);
818 free(man->meta.title);
819 free(man->meta.name);
820 free(man->meta.date);
821 free(man->meta.sodest);
822 }
823
824 void
roff_state_reset(struct roff_man * man)825 roff_state_reset(struct roff_man *man)
826 {
827 man->last = man->meta.first;
828 man->last_es = NULL;
829 man->flags = 0;
830 man->lastsec = man->lastnamed = SEC_NONE;
831 man->next = ROFF_NEXT_CHILD;
832 roff_setreg(man->roff, "nS", 0, '=');
833 }
834
835 static void
roff_man_alloc1(struct roff_man * man)836 roff_man_alloc1(struct roff_man *man)
837 {
838 memset(&man->meta, 0, sizeof(man->meta));
839 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
840 man->meta.first->type = ROFFT_ROOT;
841 man->meta.macroset = MACROSET_NONE;
842 roff_state_reset(man);
843 }
844
845 void
roff_man_reset(struct roff_man * man)846 roff_man_reset(struct roff_man *man)
847 {
848 roff_man_free1(man);
849 roff_man_alloc1(man);
850 }
851
852 void
roff_man_free(struct roff_man * man)853 roff_man_free(struct roff_man *man)
854 {
855 roff_man_free1(man);
856 free(man->os_r);
857 free(man);
858 }
859
860 struct roff_man *
roff_man_alloc(struct roff * roff,const char * os_s,int quick)861 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
862 {
863 struct roff_man *man;
864
865 man = mandoc_calloc(1, sizeof(*man));
866 man->roff = roff;
867 man->os_s = os_s;
868 man->quick = quick;
869 roff_man_alloc1(man);
870 roff->man = man;
871 return man;
872 }
873
874 /* --- syntax tree handling ----------------------------------------------- */
875
876 struct roff_node *
roff_node_alloc(struct roff_man * man,int line,int pos,enum roff_type type,int tok)877 roff_node_alloc(struct roff_man *man, int line, int pos,
878 enum roff_type type, int tok)
879 {
880 struct roff_node *n;
881
882 n = mandoc_calloc(1, sizeof(*n));
883 n->line = line;
884 n->pos = pos;
885 n->tok = tok;
886 n->type = type;
887 n->sec = man->lastsec;
888
889 if (man->flags & MDOC_SYNOPSIS)
890 n->flags |= NODE_SYNPRETTY;
891 else
892 n->flags &= ~NODE_SYNPRETTY;
893 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
894 n->flags |= NODE_NOFILL;
895 else
896 n->flags &= ~NODE_NOFILL;
897 if (man->flags & MDOC_NEWLINE)
898 n->flags |= NODE_LINE;
899 man->flags &= ~MDOC_NEWLINE;
900
901 return n;
902 }
903
904 void
roff_node_append(struct roff_man * man,struct roff_node * n)905 roff_node_append(struct roff_man *man, struct roff_node *n)
906 {
907
908 switch (man->next) {
909 case ROFF_NEXT_SIBLING:
910 if (man->last->next != NULL) {
911 n->next = man->last->next;
912 man->last->next->prev = n;
913 } else
914 man->last->parent->last = n;
915 man->last->next = n;
916 n->prev = man->last;
917 n->parent = man->last->parent;
918 break;
919 case ROFF_NEXT_CHILD:
920 if (man->last->child != NULL) {
921 n->next = man->last->child;
922 man->last->child->prev = n;
923 } else
924 man->last->last = n;
925 man->last->child = n;
926 n->parent = man->last;
927 break;
928 default:
929 abort();
930 }
931 man->last = n;
932
933 switch (n->type) {
934 case ROFFT_HEAD:
935 n->parent->head = n;
936 break;
937 case ROFFT_BODY:
938 if (n->end != ENDBODY_NOT)
939 return;
940 n->parent->body = n;
941 break;
942 case ROFFT_TAIL:
943 n->parent->tail = n;
944 break;
945 default:
946 return;
947 }
948
949 /*
950 * Copy over the normalised-data pointer of our parent. Not
951 * everybody has one, but copying a null pointer is fine.
952 */
953
954 n->norm = n->parent->norm;
955 assert(n->parent->type == ROFFT_BLOCK);
956 }
957
958 void
roff_word_alloc(struct roff_man * man,int line,int pos,const char * word)959 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
960 {
961 struct roff_node *n;
962
963 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
964 n->string = roff_strdup(man->roff, word);
965 roff_node_append(man, n);
966 n->flags |= NODE_VALID | NODE_ENDED;
967 man->next = ROFF_NEXT_SIBLING;
968 }
969
970 void
roff_word_append(struct roff_man * man,const char * word)971 roff_word_append(struct roff_man *man, const char *word)
972 {
973 struct roff_node *n;
974 char *addstr, *newstr;
975
976 n = man->last;
977 addstr = roff_strdup(man->roff, word);
978 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
979 free(addstr);
980 free(n->string);
981 n->string = newstr;
982 man->next = ROFF_NEXT_SIBLING;
983 }
984
985 void
roff_elem_alloc(struct roff_man * man,int line,int pos,int tok)986 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
987 {
988 struct roff_node *n;
989
990 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
991 roff_node_append(man, n);
992 man->next = ROFF_NEXT_CHILD;
993 }
994
995 struct roff_node *
roff_block_alloc(struct roff_man * man,int line,int pos,int tok)996 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
997 {
998 struct roff_node *n;
999
1000 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1001 roff_node_append(man, n);
1002 man->next = ROFF_NEXT_CHILD;
1003 return n;
1004 }
1005
1006 struct roff_node *
roff_head_alloc(struct roff_man * man,int line,int pos,int tok)1007 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1008 {
1009 struct roff_node *n;
1010
1011 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1012 roff_node_append(man, n);
1013 man->next = ROFF_NEXT_CHILD;
1014 return n;
1015 }
1016
1017 struct roff_node *
roff_body_alloc(struct roff_man * man,int line,int pos,int tok)1018 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1019 {
1020 struct roff_node *n;
1021
1022 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1023 roff_node_append(man, n);
1024 man->next = ROFF_NEXT_CHILD;
1025 return n;
1026 }
1027
1028 static void
roff_addtbl(struct roff_man * man,int line,struct tbl_node * tbl)1029 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1030 {
1031 struct roff_node *n;
1032 struct tbl_span *span;
1033
1034 if (man->meta.macroset == MACROSET_MAN)
1035 man_breakscope(man, ROFF_TS);
1036 while ((span = tbl_span(tbl)) != NULL) {
1037 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1038 n->span = span;
1039 roff_node_append(man, n);
1040 n->flags |= NODE_VALID | NODE_ENDED;
1041 man->next = ROFF_NEXT_SIBLING;
1042 }
1043 }
1044
1045 void
roff_node_unlink(struct roff_man * man,struct roff_node * n)1046 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1047 {
1048
1049 /* Adjust siblings. */
1050
1051 if (n->prev)
1052 n->prev->next = n->next;
1053 if (n->next)
1054 n->next->prev = n->prev;
1055
1056 /* Adjust parent. */
1057
1058 if (n->parent != NULL) {
1059 if (n->parent->child == n)
1060 n->parent->child = n->next;
1061 if (n->parent->last == n)
1062 n->parent->last = n->prev;
1063 }
1064
1065 /* Adjust parse point. */
1066
1067 if (man == NULL)
1068 return;
1069 if (man->last == n) {
1070 if (n->prev == NULL) {
1071 man->last = n->parent;
1072 man->next = ROFF_NEXT_CHILD;
1073 } else {
1074 man->last = n->prev;
1075 man->next = ROFF_NEXT_SIBLING;
1076 }
1077 }
1078 if (man->meta.first == n)
1079 man->meta.first = NULL;
1080 }
1081
1082 void
roff_node_relink(struct roff_man * man,struct roff_node * n)1083 roff_node_relink(struct roff_man *man, struct roff_node *n)
1084 {
1085 roff_node_unlink(man, n);
1086 n->prev = n->next = NULL;
1087 roff_node_append(man, n);
1088 }
1089
1090 void
roff_node_free(struct roff_node * n)1091 roff_node_free(struct roff_node *n)
1092 {
1093
1094 if (n->args != NULL)
1095 mdoc_argv_free(n->args);
1096 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1097 free(n->norm);
1098 eqn_box_free(n->eqn);
1099 free(n->string);
1100 free(n->tag);
1101 free(n);
1102 }
1103
1104 void
roff_node_delete(struct roff_man * man,struct roff_node * n)1105 roff_node_delete(struct roff_man *man, struct roff_node *n)
1106 {
1107
1108 while (n->child != NULL)
1109 roff_node_delete(man, n->child);
1110 roff_node_unlink(man, n);
1111 roff_node_free(n);
1112 }
1113
1114 int
roff_node_transparent(struct roff_node * n)1115 roff_node_transparent(struct roff_node *n)
1116 {
1117 if (n == NULL)
1118 return 0;
1119 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1120 return 1;
1121 return roff_tok_transparent(n->tok);
1122 }
1123
1124 int
roff_tok_transparent(enum roff_tok tok)1125 roff_tok_transparent(enum roff_tok tok)
1126 {
1127 switch (tok) {
1128 case ROFF_ft:
1129 case ROFF_ll:
1130 case ROFF_mc:
1131 case ROFF_po:
1132 case ROFF_ta:
1133 case MDOC_Db:
1134 case MDOC_Es:
1135 case MDOC_Sm:
1136 case MDOC_Tg:
1137 case MAN_DT:
1138 case MAN_UC:
1139 case MAN_PD:
1140 case MAN_AT:
1141 return 1;
1142 default:
1143 return 0;
1144 }
1145 }
1146
1147 struct roff_node *
roff_node_child(struct roff_node * n)1148 roff_node_child(struct roff_node *n)
1149 {
1150 for (n = n->child; roff_node_transparent(n); n = n->next)
1151 continue;
1152 return n;
1153 }
1154
1155 struct roff_node *
roff_node_prev(struct roff_node * n)1156 roff_node_prev(struct roff_node *n)
1157 {
1158 do {
1159 n = n->prev;
1160 } while (roff_node_transparent(n));
1161 return n;
1162 }
1163
1164 struct roff_node *
roff_node_next(struct roff_node * n)1165 roff_node_next(struct roff_node *n)
1166 {
1167 do {
1168 n = n->next;
1169 } while (roff_node_transparent(n));
1170 return n;
1171 }
1172
1173 void
deroff(char ** dest,const struct roff_node * n)1174 deroff(char **dest, const struct roff_node *n)
1175 {
1176 char *cp;
1177 size_t sz;
1178
1179 if (n->string == NULL) {
1180 for (n = n->child; n != NULL; n = n->next)
1181 deroff(dest, n);
1182 return;
1183 }
1184
1185 /* Skip leading whitespace. */
1186
1187 for (cp = n->string; *cp != '\0'; cp++) {
1188 if (cp[0] == '\\' && cp[1] != '\0' &&
1189 strchr(" %&0^|~", cp[1]) != NULL)
1190 cp++;
1191 else if ( ! isspace((unsigned char)*cp))
1192 break;
1193 }
1194
1195 /* Skip trailing backslash. */
1196
1197 sz = strlen(cp);
1198 if (sz > 0 && cp[sz - 1] == '\\')
1199 sz--;
1200
1201 /* Skip trailing whitespace. */
1202
1203 for (; sz; sz--)
1204 if ( ! isspace((unsigned char)cp[sz-1]))
1205 break;
1206
1207 /* Skip empty strings. */
1208
1209 if (sz == 0)
1210 return;
1211
1212 if (*dest == NULL) {
1213 *dest = mandoc_strndup(cp, sz);
1214 return;
1215 }
1216
1217 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1218 free(*dest);
1219 *dest = cp;
1220 }
1221
1222 /* --- main functions of the roff parser ---------------------------------- */
1223
1224 /*
1225 * Save comments preceding the title macro, for example in order to
1226 * preserve Copyright and license headers in HTML output,
1227 * provide diagnostics about RCS ids and trailing whitespace in comments,
1228 * then discard comments including preceding whitespace.
1229 * This function also handles input line continuation.
1230 */
1231 static int
roff_parse_comment(struct roff * r,struct buf * buf,int ln,int pos,char ec)1232 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1233 {
1234 struct roff_node *n; /* used for header comments */
1235 const char *start; /* start of the string to process */
1236 const char *cp; /* for RCS id parsing */
1237 char *stesc; /* start of an escape sequence ('\\') */
1238 char *ep; /* end of comment string */
1239 int rcsid; /* kind of RCS id seen */
1240
1241 for (start = stesc = buf->buf + pos;; stesc++) {
1242 /*
1243 * XXX Ugly hack: Remove the newline character that
1244 * mparse_buf_r() appended to mark the end of input
1245 * if it is not preceded by an escape character.
1246 */
1247 if (stesc[0] == '\n') {
1248 assert(stesc[1] == '\0');
1249 stesc[0] = '\0';
1250 }
1251
1252 /* The line ends without continuation or comment. */
1253 if (stesc[0] == '\0')
1254 return ROFF_CONT;
1255
1256 /* Unescaped byte: skip it. */
1257 if (stesc[0] != ec)
1258 continue;
1259
1260 /*
1261 * XXX Ugly hack: Do not attempt to append another line
1262 * if the function mparse_buf_r() appended a newline
1263 * character to indicate the end of input.
1264 */
1265 if (stesc[1] == '\n') {
1266 assert(stesc[2] == '\0');
1267 stesc[0] = '\0';
1268 return ROFF_CONT;
1269 }
1270
1271 /*
1272 * An escape character at the end of an input line
1273 * requests line continuation.
1274 */
1275 if (stesc[1] == '\0') {
1276 stesc[0] = '\0';
1277 return ROFF_IGN | ROFF_APPEND;
1278 }
1279
1280 /* Found a comment: process it. */
1281 if (stesc[1] == '"' || stesc[1] == '#')
1282 break;
1283
1284 /* Escaped escape character: skip them both. */
1285 if (stesc[1] == ec)
1286 stesc++;
1287 }
1288
1289 /* Look for an RCS id in the comment. */
1290
1291 rcsid = 0;
1292 if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1293 rcsid = 1 << MANDOC_OS_OPENBSD;
1294 cp += 8;
1295 } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1296 rcsid = 1 << MANDOC_OS_NETBSD;
1297 cp += 7;
1298 }
1299 if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1300 strchr(cp, '$') != NULL) {
1301 if (r->man->meta.rcsids & rcsid)
1302 mandoc_msg(MANDOCERR_RCS_REP, ln,
1303 (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1304 r->man->meta.rcsids |= rcsid;
1305 }
1306
1307 /* Warn about trailing whitespace at the end of the comment. */
1308
1309 ep = strchr(stesc + 2, '\0') - 1;
1310 if (*ep == '\n')
1311 *ep-- = '\0';
1312 if (*ep == ' ' || *ep == '\t')
1313 mandoc_msg(MANDOCERR_SPACE_EOL,
1314 ln, (int)(ep - buf->buf), NULL);
1315
1316 /* Save comments preceding the title macro in the syntax tree. */
1317
1318 if (r->options & MPARSE_COMMENT) {
1319 while (*ep == ' ' || *ep == '\t')
1320 ep--;
1321 ep[1] = '\0';
1322 n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1323 ROFFT_COMMENT, TOKEN_NONE);
1324 n->string = mandoc_strdup(stesc + 2);
1325 roff_node_append(r->man, n);
1326 n->flags |= NODE_VALID | NODE_ENDED;
1327 r->man->next = ROFF_NEXT_SIBLING;
1328 }
1329
1330 /* The comment requests line continuation. */
1331
1332 if (stesc[1] == '#') {
1333 *stesc = '\0';
1334 return ROFF_IGN | ROFF_APPEND;
1335 }
1336
1337 /* Discard the comment including preceding whitespace. */
1338
1339 while (stesc > start && stesc[-1] == ' ' &&
1340 (stesc == start + 1 || stesc[-2] != '\\'))
1341 stesc--;
1342 *stesc = '\0';
1343 return ROFF_CONT;
1344 }
1345
1346 /*
1347 * In the current line, expand escape sequences that produce parsable
1348 * input text. Also check the syntax of the remaining escape sequences,
1349 * which typically produce output glyphs or change formatter state.
1350 */
1351 static int
roff_expand(struct roff * r,struct buf * buf,int ln,int pos,char ec)1352 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1353 {
1354 char ubuf[24]; /* buffer to print a number */
1355 struct mctx *ctx; /* current macro call context */
1356 const char *res; /* the string to be pasted */
1357 const char *src; /* source for copying */
1358 char *dst; /* destination for copying */
1359 enum mandoc_esc subtype; /* return value from roff_escape */
1360 int iesc; /* index of leading escape char */
1361 int inam; /* index of the escape name */
1362 int iarg; /* index beginning the argument */
1363 int iendarg; /* index right after the argument */
1364 int iend; /* index right after the sequence */
1365 int isrc, idst; /* to reduce \\ and \. in names */
1366 int deftype; /* type of definition to paste */
1367 int argi; /* macro argument index */
1368 int quote_args; /* true for \\$@, false for \\$* */
1369 int asz; /* length of the replacement */
1370 int rsz; /* length of the rest of the string */
1371 int npos; /* position in numeric expression */
1372 int expand_count; /* to avoid infinite loops */
1373
1374 expand_count = 0;
1375 while (buf->buf[pos] != '\0') {
1376
1377 /*
1378 * Skip plain ASCII characters.
1379 * If we have a non-standard escape character,
1380 * escape literal backslashes because all processing in
1381 * subsequent functions uses the standard escaping rules.
1382 */
1383
1384 if (buf->buf[pos] != ec) {
1385 if (buf->buf[pos] == '\\') {
1386 roff_expand_patch(buf, pos, "\\e", pos + 1);
1387 pos++;
1388 }
1389 pos++;
1390 continue;
1391 }
1392
1393 /*
1394 * Parse escape sequences,
1395 * issue diagnostic messages when appropriate,
1396 * and skip sequences that do not need expansion.
1397 * If we have a non-standard escape character, translate
1398 * it to backslashes and translate backslashes to \e.
1399 */
1400
1401 if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
1402 &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
1403 while (pos < iend) {
1404 if (buf->buf[pos] == ec) {
1405 buf->buf[pos] = '\\';
1406 if (pos + 1 < iend)
1407 pos++;
1408 } else if (buf->buf[pos] == '\\') {
1409 roff_expand_patch(buf,
1410 pos, "\\e", pos + 1);
1411 pos++;
1412 iend++;
1413 }
1414 pos++;
1415 }
1416 continue;
1417 }
1418
1419 /* Reduce \\ and \. in names. */
1420
1421 if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
1422 isrc = idst = iarg;
1423 while (isrc < iendarg) {
1424 if (isrc + 1 < iendarg &&
1425 buf->buf[isrc] == '\\' &&
1426 (buf->buf[isrc + 1] == '\\' ||
1427 buf->buf[isrc + 1] == '.'))
1428 isrc++;
1429 buf->buf[idst++] = buf->buf[isrc++];
1430 }
1431 iendarg -= isrc - idst;
1432 }
1433
1434 /* Handle expansion. */
1435
1436 res = NULL;
1437 switch (buf->buf[inam]) {
1438 case '*':
1439 if (iendarg == iarg)
1440 break;
1441 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1442 if ((res = roff_getstrn(r, buf->buf + iarg,
1443 iendarg - iarg, &deftype)) != NULL)
1444 break;
1445
1446 /*
1447 * If not overridden,
1448 * let \*(.T through to the formatters.
1449 */
1450
1451 if (iendarg - iarg == 2 &&
1452 buf->buf[iarg] == '.' &&
1453 buf->buf[iarg + 1] == 'T') {
1454 roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
1455 pos = iend;
1456 continue;
1457 }
1458
1459 mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
1460 "%.*s", iendarg - iarg, buf->buf + iarg);
1461 break;
1462
1463 case '$':
1464 if (r->mstackpos < 0) {
1465 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
1466 "%.*s", iend - iesc, buf->buf + iesc);
1467 break;
1468 }
1469 ctx = r->mstack + r->mstackpos;
1470 argi = buf->buf[iarg] - '1';
1471 if (argi >= 0 && argi <= 8) {
1472 if (argi < ctx->argc)
1473 res = ctx->argv[argi];
1474 break;
1475 }
1476 if (buf->buf[iarg] == '*')
1477 quote_args = 0;
1478 else if (buf->buf[iarg] == '@')
1479 quote_args = 1;
1480 else {
1481 mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
1482 "%.*s", iend - iesc, buf->buf + iesc);
1483 break;
1484 }
1485 asz = 0;
1486 for (argi = 0; argi < ctx->argc; argi++) {
1487 if (argi)
1488 asz++; /* blank */
1489 if (quote_args)
1490 asz += 2; /* quotes */
1491 asz += strlen(ctx->argv[argi]);
1492 }
1493 if (asz != iend - iesc) {
1494 rsz = buf->sz - iend;
1495 if (asz < iend - iesc)
1496 memmove(buf->buf + iesc + asz,
1497 buf->buf + iend, rsz);
1498 buf->sz = iesc + asz + rsz;
1499 buf->buf = mandoc_realloc(buf->buf, buf->sz);
1500 if (asz > iend - iesc)
1501 memmove(buf->buf + iesc + asz,
1502 buf->buf + iend, rsz);
1503 }
1504 dst = buf->buf + iesc;
1505 for (argi = 0; argi < ctx->argc; argi++) {
1506 if (argi)
1507 *dst++ = ' ';
1508 if (quote_args)
1509 *dst++ = '"';
1510 src = ctx->argv[argi];
1511 while (*src != '\0')
1512 *dst++ = *src++;
1513 if (quote_args)
1514 *dst++ = '"';
1515 }
1516 continue;
1517 case 'A':
1518 ubuf[0] = iendarg > iarg ? '1' : '0';
1519 ubuf[1] = '\0';
1520 res = ubuf;
1521 break;
1522 case 'B':
1523 npos = 0;
1524 ubuf[0] = iendarg > iarg && iend > iendarg &&
1525 roff_evalnum(ln, buf->buf + iarg, &npos,
1526 NULL, 'u', 0) &&
1527 npos == iendarg - iarg ? '1' : '0';
1528 ubuf[1] = '\0';
1529 res = ubuf;
1530 break;
1531 case 'V':
1532 mandoc_msg(MANDOCERR_UNSUPP, ln, iesc,
1533 "%.*s", iend - iesc, buf->buf + iesc);
1534 roff_expand_patch(buf, iendarg, "}", iend);
1535 roff_expand_patch(buf, iesc, "${", iarg);
1536 continue;
1537 case 'g':
1538 break;
1539 case 'n':
1540 if (iendarg > iarg)
1541 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1542 roff_getregn(r, buf->buf + iarg,
1543 iendarg - iarg, buf->buf[inam + 1]));
1544 else
1545 ubuf[0] = '\0';
1546 res = ubuf;
1547 break;
1548 case 'w':
1549 rsz = 0;
1550 subtype = ESCAPE_UNDEF;
1551 while (iarg < iendarg) {
1552 asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1;
1553 if (buf->buf[iarg] != '\\') {
1554 rsz += asz;
1555 iarg++;
1556 continue;
1557 }
1558 switch ((subtype = roff_escape(buf->buf, 0,
1559 iarg, NULL, NULL, NULL, NULL, &iarg))) {
1560 case ESCAPE_SPECIAL:
1561 case ESCAPE_NUMBERED:
1562 case ESCAPE_UNICODE:
1563 case ESCAPE_OVERSTRIKE:
1564 case ESCAPE_UNDEF:
1565 break;
1566 case ESCAPE_DEVICE:
1567 asz *= 8;
1568 break;
1569 case ESCAPE_EXPAND:
1570 abort();
1571 default:
1572 continue;
1573 }
1574 rsz += asz;
1575 }
1576 (void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24);
1577 res = ubuf;
1578 break;
1579 default:
1580 break;
1581 }
1582 if (res == NULL)
1583 res = "";
1584 if (++expand_count > EXPAND_LIMIT ||
1585 buf->sz + strlen(res) > SHRT_MAX) {
1586 mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
1587 return ROFF_IGN;
1588 }
1589 roff_expand_patch(buf, iesc, res, iend);
1590 }
1591 return ROFF_CONT;
1592 }
1593
1594 /*
1595 * Replace the substring from the start position (inclusive)
1596 * to end position (exclusive) with the repl(acement) string.
1597 */
1598 static void
roff_expand_patch(struct buf * buf,int start,const char * repl,int end)1599 roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
1600 {
1601 char *nbuf;
1602
1603 buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf,
1604 repl, buf->buf + end) + 1;
1605 free(buf->buf);
1606 buf->buf = nbuf;
1607 }
1608
1609 /*
1610 * Parse a quoted or unquoted roff-style request or macro argument.
1611 * Return a pointer to the parsed argument, which is either the original
1612 * pointer or advanced by one byte in case the argument is quoted.
1613 * NUL-terminate the argument in place.
1614 * Collapse pairs of quotes inside quoted arguments.
1615 * Advance the argument pointer to the next argument,
1616 * or to the NUL byte terminating the argument line.
1617 */
1618 char *
roff_getarg(struct roff * r,char ** cpp,int ln,int * pos)1619 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1620 {
1621 struct buf buf;
1622 char *cp, *start;
1623 int newesc, pairs, quoted, white;
1624
1625 /* Quoting can only start with a new word. */
1626 start = *cpp;
1627 quoted = 0;
1628 if ('"' == *start) {
1629 quoted = 1;
1630 start++;
1631 }
1632
1633 newesc = pairs = white = 0;
1634 for (cp = start; '\0' != *cp; cp++) {
1635
1636 /*
1637 * Move the following text left
1638 * after quoted quotes and after "\\" and "\t".
1639 */
1640 if (pairs)
1641 cp[-pairs] = cp[0];
1642
1643 if ('\\' == cp[0]) {
1644 /*
1645 * In copy mode, translate double to single
1646 * backslashes and backslash-t to literal tabs.
1647 */
1648 switch (cp[1]) {
1649 case 'a':
1650 case 't':
1651 cp[-pairs] = '\t';
1652 pairs++;
1653 cp++;
1654 break;
1655 case '\\':
1656 cp[-pairs] = '\\';
1657 newesc = 1;
1658 pairs++;
1659 cp++;
1660 break;
1661 case ' ':
1662 /* Skip escaped blanks. */
1663 if (0 == quoted)
1664 cp++;
1665 break;
1666 default:
1667 break;
1668 }
1669 } else if (0 == quoted) {
1670 if (' ' == cp[0]) {
1671 /* Unescaped blanks end unquoted args. */
1672 white = 1;
1673 break;
1674 }
1675 } else if ('"' == cp[0]) {
1676 if ('"' == cp[1]) {
1677 /* Quoted quotes collapse. */
1678 pairs++;
1679 cp++;
1680 } else {
1681 /* Unquoted quotes end quoted args. */
1682 quoted = 2;
1683 break;
1684 }
1685 }
1686 }
1687
1688 /* Quoted argument without a closing quote. */
1689 if (1 == quoted)
1690 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1691
1692 /* NUL-terminate this argument and move to the next one. */
1693 if (pairs)
1694 cp[-pairs] = '\0';
1695 if ('\0' != *cp) {
1696 *cp++ = '\0';
1697 while (' ' == *cp)
1698 cp++;
1699 }
1700 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1701 *cpp = cp;
1702
1703 if ('\0' == *cp && (white || ' ' == cp[-1]))
1704 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1705
1706 start = mandoc_strdup(start);
1707 if (newesc == 0)
1708 return start;
1709
1710 buf.buf = start;
1711 buf.sz = strlen(start) + 1;
1712 buf.next = NULL;
1713 if (roff_expand(r, &buf, ln, 0, '\\') == ROFF_IGN) {
1714 free(buf.buf);
1715 buf.buf = mandoc_strdup("");
1716 }
1717 return buf.buf;
1718 }
1719
1720
1721 /*
1722 * Process text streams.
1723 */
1724 static int
roff_parsetext(struct roff * r,struct buf * buf,int pos,int * offs)1725 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1726 {
1727 size_t sz;
1728 const char *start;
1729 char *p;
1730 int isz;
1731 enum mandoc_esc esc;
1732
1733 /* Spring the input line trap. */
1734
1735 if (roffit_lines == 1) {
1736 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1737 free(buf->buf);
1738 buf->buf = p;
1739 buf->sz = isz + 1;
1740 *offs = 0;
1741 free(roffit_macro);
1742 roffit_lines = 0;
1743 return ROFF_REPARSE;
1744 } else if (roffit_lines > 1)
1745 --roffit_lines;
1746
1747 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1748 if (roffce_lines < 1) {
1749 r->man->last = roffce_node;
1750 r->man->next = ROFF_NEXT_SIBLING;
1751 roffce_lines = 0;
1752 roffce_node = NULL;
1753 } else
1754 roffce_lines--;
1755 }
1756
1757 /* Convert all breakable hyphens into ASCII_HYPH. */
1758
1759 start = p = buf->buf + pos;
1760
1761 while (*p != '\0') {
1762 sz = strcspn(p, "-\\");
1763 p += sz;
1764
1765 if (*p == '\0')
1766 break;
1767
1768 if (*p == '\\') {
1769 /* Skip over escapes. */
1770 p++;
1771 esc = mandoc_escape((const char **)&p, NULL, NULL);
1772 if (esc == ESCAPE_ERROR)
1773 break;
1774 while (*p == '-')
1775 p++;
1776 continue;
1777 } else if (p == start) {
1778 p++;
1779 continue;
1780 }
1781
1782 if (isalpha((unsigned char)p[-1]) &&
1783 isalpha((unsigned char)p[1]))
1784 *p = ASCII_HYPH;
1785 p++;
1786 }
1787 return ROFF_CONT;
1788 }
1789
1790 int
roff_parseln(struct roff * r,int ln,struct buf * buf,int * offs,size_t len)1791 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1792 {
1793 enum roff_tok t;
1794 int e;
1795 int pos; /* parse point */
1796 int spos; /* saved parse point for messages */
1797 int ppos; /* original offset in buf->buf */
1798 int ctl; /* macro line (boolean) */
1799
1800 ppos = pos = *offs;
1801
1802 if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1803 (r->man->flags & ROFF_NOFILL) == 0 &&
1804 strchr(" .\\", buf->buf[pos]) == NULL &&
1805 buf->buf[pos] != r->control &&
1806 strcspn(buf->buf, " ") < 80)
1807 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1808 "%.20s...", buf->buf + pos);
1809
1810 /* Handle in-line equation delimiters. */
1811
1812 if (r->tbl == NULL &&
1813 r->last_eqn != NULL && r->last_eqn->delim &&
1814 (r->eqn == NULL || r->eqn_inline)) {
1815 e = roff_eqndelim(r, buf, pos);
1816 if (e == ROFF_REPARSE)
1817 return e;
1818 assert(e == ROFF_CONT);
1819 }
1820
1821 /* Handle comments and escape sequences. */
1822
1823 e = roff_parse_comment(r, buf, ln, pos, r->escape);
1824 if ((e & ROFF_MASK) == ROFF_IGN)
1825 return e;
1826 assert(e == ROFF_CONT);
1827
1828 e = roff_expand(r, buf, ln, pos, r->escape);
1829 if ((e & ROFF_MASK) == ROFF_IGN)
1830 return e;
1831 assert(e == ROFF_CONT);
1832
1833 ctl = roff_getcontrol(r, buf->buf, &pos);
1834
1835 /*
1836 * First, if a scope is open and we're not a macro, pass the
1837 * text through the macro's filter.
1838 * Equations process all content themselves.
1839 * Tables process almost all content themselves, but we want
1840 * to warn about macros before passing it there.
1841 */
1842
1843 if (r->last != NULL && ! ctl) {
1844 t = r->last->tok;
1845 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1846 if ((e & ROFF_MASK) == ROFF_IGN)
1847 return e;
1848 e &= ~ROFF_MASK;
1849 } else
1850 e = ROFF_IGN;
1851 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1852 eqn_read(r->eqn, buf->buf + ppos);
1853 return e;
1854 }
1855 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1856 tbl_read(r->tbl, ln, buf->buf, ppos);
1857 roff_addtbl(r->man, ln, r->tbl);
1858 return e;
1859 }
1860 if ( ! ctl) {
1861 r->options &= ~MPARSE_COMMENT;
1862 return roff_parsetext(r, buf, pos, offs) | e;
1863 }
1864
1865 /* Skip empty request lines. */
1866
1867 if (buf->buf[pos] == '"') {
1868 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1869 return ROFF_IGN;
1870 } else if (buf->buf[pos] == '\0')
1871 return ROFF_IGN;
1872
1873 /*
1874 * If a scope is open, go to the child handler for that macro,
1875 * as it may want to preprocess before doing anything with it.
1876 */
1877
1878 if (r->last) {
1879 t = r->last->tok;
1880 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1881 }
1882
1883 r->options &= ~MPARSE_COMMENT;
1884 spos = pos;
1885 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1886 return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1887 }
1888
1889 /*
1890 * Handle a new request or macro.
1891 * May be called outside any scope or from inside a conditional scope.
1892 */
1893 static int
roff_req_or_macro(ROFF_ARGS)1894 roff_req_or_macro(ROFF_ARGS) {
1895
1896 /* For now, tables ignore most macros and some request. */
1897
1898 if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1899 tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1900 tok == ROFF_sp)) {
1901 mandoc_msg(MANDOCERR_TBLMACRO,
1902 ln, ppos, "%s", buf->buf + ppos);
1903 if (tok != TOKEN_NONE)
1904 return ROFF_IGN;
1905 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1906 pos++;
1907 while (buf->buf[pos] == ' ')
1908 pos++;
1909 tbl_read(r->tbl, ln, buf->buf, pos);
1910 roff_addtbl(r->man, ln, r->tbl);
1911 return ROFF_IGN;
1912 }
1913
1914 /* For now, let high level macros abort .ce mode. */
1915
1916 if (roffce_node != NULL &&
1917 (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1918 tok == ROFF_TH || tok == ROFF_TS)) {
1919 r->man->last = roffce_node;
1920 r->man->next = ROFF_NEXT_SIBLING;
1921 roffce_lines = 0;
1922 roffce_node = NULL;
1923 }
1924
1925 /*
1926 * This is neither a roff request nor a user-defined macro.
1927 * Let the standard macro set parsers handle it.
1928 */
1929
1930 if (tok == TOKEN_NONE)
1931 return ROFF_CONT;
1932
1933 /* Execute a roff request or a user-defined macro. */
1934
1935 return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1936 }
1937
1938 /*
1939 * Internal interface function to tell the roff parser that execution
1940 * of the current macro ended. This is required because macro
1941 * definitions usually do not end with a .return request.
1942 */
1943 void
roff_userret(struct roff * r)1944 roff_userret(struct roff *r)
1945 {
1946 struct mctx *ctx;
1947 int i;
1948
1949 assert(r->mstackpos >= 0);
1950 ctx = r->mstack + r->mstackpos;
1951 for (i = 0; i < ctx->argc; i++)
1952 free(ctx->argv[i]);
1953 ctx->argc = 0;
1954 r->mstackpos--;
1955 }
1956
1957 void
roff_endparse(struct roff * r)1958 roff_endparse(struct roff *r)
1959 {
1960 if (r->last != NULL)
1961 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1962 r->last->col, "%s", roff_name[r->last->tok]);
1963
1964 if (r->eqn != NULL) {
1965 mandoc_msg(MANDOCERR_BLK_NOEND,
1966 r->eqn->node->line, r->eqn->node->pos, "EQ");
1967 eqn_parse(r->eqn);
1968 r->eqn = NULL;
1969 }
1970
1971 if (r->tbl != NULL) {
1972 tbl_end(r->tbl, 1);
1973 r->tbl = NULL;
1974 }
1975 }
1976
1977 /*
1978 * Parse the request or macro name at buf[*pos].
1979 * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
1980 * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
1981 * As a side effect, set r->current_string to the definition or to NULL.
1982 */
1983 static enum roff_tok
roff_parse(struct roff * r,char * buf,int * pos,int ln,int ppos)1984 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1985 {
1986 char *cp;
1987 const char *mac;
1988 size_t maclen;
1989 int deftype;
1990 enum roff_tok t;
1991
1992 cp = buf + *pos;
1993
1994 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
1995 return TOKEN_NONE;
1996
1997 mac = cp;
1998 maclen = roff_getname(&cp, ln, ppos);
1999
2000 deftype = ROFFDEF_USER | ROFFDEF_REN;
2001 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2002 switch (deftype) {
2003 case ROFFDEF_USER:
2004 t = ROFF_USERDEF;
2005 break;
2006 case ROFFDEF_REN:
2007 t = ROFF_RENAMED;
2008 break;
2009 default:
2010 t = roffhash_find(r->reqtab, mac, maclen);
2011 break;
2012 }
2013 if (t != TOKEN_NONE)
2014 *pos = cp - buf;
2015 else if (deftype == ROFFDEF_UNDEF) {
2016 /* Using an undefined macro defines it to be empty. */
2017 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2018 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2019 }
2020 return t;
2021 }
2022
2023 /* --- handling of request blocks ----------------------------------------- */
2024
2025 /*
2026 * Close a macro definition block or an "ignore" block.
2027 */
2028 static int
roff_cblock(ROFF_ARGS)2029 roff_cblock(ROFF_ARGS)
2030 {
2031 int rr;
2032
2033 if (r->last == NULL) {
2034 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2035 return ROFF_IGN;
2036 }
2037
2038 switch (r->last->tok) {
2039 case ROFF_am:
2040 case ROFF_ami:
2041 case ROFF_de:
2042 case ROFF_dei:
2043 case ROFF_ig:
2044 break;
2045 case ROFF_am1:
2046 case ROFF_de1:
2047 /* Remapped in roff_block(). */
2048 abort();
2049 default:
2050 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2051 return ROFF_IGN;
2052 }
2053
2054 roffnode_pop(r);
2055 roffnode_cleanscope(r);
2056
2057 /*
2058 * If a conditional block with braces is still open,
2059 * check for "\}" block end markers.
2060 */
2061
2062 if (r->last != NULL && r->last->endspan < 0) {
2063 rr = 1; /* If arguments follow "\}", warn about them. */
2064 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2065 }
2066
2067 if (buf->buf[pos] != '\0')
2068 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2069 ".. %s", buf->buf + pos);
2070
2071 return ROFF_IGN;
2072 }
2073
2074 /*
2075 * Pop all nodes ending at the end of the current input line.
2076 * Return the number of loops ended.
2077 */
2078 static int
roffnode_cleanscope(struct roff * r)2079 roffnode_cleanscope(struct roff *r)
2080 {
2081 int inloop;
2082
2083 inloop = 0;
2084 while (r->last != NULL && r->last->endspan > 0) {
2085 if (--r->last->endspan != 0)
2086 break;
2087 inloop += roffnode_pop(r);
2088 }
2089 return inloop;
2090 }
2091
2092 /*
2093 * Handle the closing "\}" of a conditional block.
2094 * Apart from generating warnings, this only pops nodes.
2095 * Return the number of loops ended.
2096 */
2097 static int
roff_ccond(struct roff * r,int ln,int ppos)2098 roff_ccond(struct roff *r, int ln, int ppos)
2099 {
2100 if (NULL == r->last) {
2101 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2102 return 0;
2103 }
2104
2105 switch (r->last->tok) {
2106 case ROFF_el:
2107 case ROFF_ie:
2108 case ROFF_if:
2109 case ROFF_while:
2110 break;
2111 default:
2112 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2113 return 0;
2114 }
2115
2116 if (r->last->endspan > -1) {
2117 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2118 return 0;
2119 }
2120
2121 return roffnode_pop(r) + roffnode_cleanscope(r);
2122 }
2123
2124 static int
roff_block(ROFF_ARGS)2125 roff_block(ROFF_ARGS)
2126 {
2127 const char *name, *value;
2128 char *call, *cp, *iname, *rname;
2129 size_t csz, namesz, rsz;
2130 int deftype;
2131
2132 /* Ignore groff compatibility mode for now. */
2133
2134 if (tok == ROFF_de1)
2135 tok = ROFF_de;
2136 else if (tok == ROFF_dei1)
2137 tok = ROFF_dei;
2138 else if (tok == ROFF_am1)
2139 tok = ROFF_am;
2140 else if (tok == ROFF_ami1)
2141 tok = ROFF_ami;
2142
2143 /* Parse the macro name argument. */
2144
2145 cp = buf->buf + pos;
2146 if (tok == ROFF_ig) {
2147 iname = NULL;
2148 namesz = 0;
2149 } else {
2150 iname = cp;
2151 namesz = roff_getname(&cp, ln, ppos);
2152 iname[namesz] = '\0';
2153 }
2154
2155 /* Resolve the macro name argument if it is indirect. */
2156
2157 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2158 deftype = ROFFDEF_USER;
2159 name = roff_getstrn(r, iname, namesz, &deftype);
2160 if (name == NULL) {
2161 mandoc_msg(MANDOCERR_STR_UNDEF,
2162 ln, (int)(iname - buf->buf),
2163 "%.*s", (int)namesz, iname);
2164 namesz = 0;
2165 } else
2166 namesz = strlen(name);
2167 } else
2168 name = iname;
2169
2170 if (namesz == 0 && tok != ROFF_ig) {
2171 mandoc_msg(MANDOCERR_REQ_EMPTY,
2172 ln, ppos, "%s", roff_name[tok]);
2173 return ROFF_IGN;
2174 }
2175
2176 roffnode_push(r, tok, name, ln, ppos);
2177
2178 /*
2179 * At the beginning of a `de' macro, clear the existing string
2180 * with the same name, if there is one. New content will be
2181 * appended from roff_block_text() in multiline mode.
2182 */
2183
2184 if (tok == ROFF_de || tok == ROFF_dei) {
2185 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2186 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2187 } else if (tok == ROFF_am || tok == ROFF_ami) {
2188 deftype = ROFFDEF_ANY;
2189 value = roff_getstrn(r, iname, namesz, &deftype);
2190 switch (deftype) { /* Before appending, ... */
2191 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2192 roff_setstrn(&r->strtab, name, namesz,
2193 value, strlen(value), 0);
2194 break;
2195 case ROFFDEF_REN: /* call original standard macro. */
2196 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2197 (int)strlen(value), value);
2198 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2199 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2200 free(call);
2201 break;
2202 case ROFFDEF_STD: /* rename and call standard macro. */
2203 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2204 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2205 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2206 (int)rsz, rname);
2207 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2208 free(call);
2209 free(rname);
2210 break;
2211 default:
2212 break;
2213 }
2214 }
2215
2216 if (*cp == '\0')
2217 return ROFF_IGN;
2218
2219 /* Get the custom end marker. */
2220
2221 iname = cp;
2222 namesz = roff_getname(&cp, ln, ppos);
2223
2224 /* Resolve the end marker if it is indirect. */
2225
2226 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2227 deftype = ROFFDEF_USER;
2228 name = roff_getstrn(r, iname, namesz, &deftype);
2229 if (name == NULL) {
2230 mandoc_msg(MANDOCERR_STR_UNDEF,
2231 ln, (int)(iname - buf->buf),
2232 "%.*s", (int)namesz, iname);
2233 namesz = 0;
2234 } else
2235 namesz = strlen(name);
2236 } else
2237 name = iname;
2238
2239 if (namesz)
2240 r->last->end = mandoc_strndup(name, namesz);
2241
2242 if (*cp != '\0')
2243 mandoc_msg(MANDOCERR_ARG_EXCESS,
2244 ln, pos, ".%s ... %s", roff_name[tok], cp);
2245
2246 return ROFF_IGN;
2247 }
2248
2249 static int
roff_block_sub(ROFF_ARGS)2250 roff_block_sub(ROFF_ARGS)
2251 {
2252 enum roff_tok t;
2253 int i, j;
2254
2255 /*
2256 * If a custom end marker is a user-defined or predefined macro
2257 * or a request, interpret it.
2258 */
2259
2260 if (r->last->end) {
2261 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2262 if (buf->buf[i] != r->last->end[j])
2263 break;
2264
2265 if (r->last->end[j] == '\0' &&
2266 (buf->buf[i] == '\0' ||
2267 buf->buf[i] == ' ' ||
2268 buf->buf[i] == '\t')) {
2269 roffnode_pop(r);
2270 roffnode_cleanscope(r);
2271
2272 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2273 i++;
2274
2275 pos = i;
2276 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2277 TOKEN_NONE)
2278 return ROFF_RERUN;
2279 return ROFF_IGN;
2280 }
2281 }
2282
2283 /* Handle the standard end marker. */
2284
2285 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2286 if (t == ROFF_cblock)
2287 return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2288
2289 /* Not an end marker, so append the line to the block. */
2290
2291 if (tok != ROFF_ig)
2292 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2293 return ROFF_IGN;
2294 }
2295
2296 static int
roff_block_text(ROFF_ARGS)2297 roff_block_text(ROFF_ARGS)
2298 {
2299
2300 if (tok != ROFF_ig)
2301 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2302
2303 return ROFF_IGN;
2304 }
2305
2306 /*
2307 * Check for a closing "\}" and handle it.
2308 * In this function, the final "int *offs" argument is used for
2309 * different purposes than elsewhere:
2310 * Input: *offs == 0: caller wants to discard arguments following \}
2311 * *offs == 1: caller wants to preserve text following \}
2312 * Output: *offs = 0: tell caller to discard input line
2313 * *offs = 1: tell caller to use input line
2314 */
2315 static int
roff_cond_checkend(ROFF_ARGS)2316 roff_cond_checkend(ROFF_ARGS)
2317 {
2318 char *ep;
2319 int endloop, irc, rr;
2320
2321 irc = ROFF_IGN;
2322 rr = r->last->rule;
2323 endloop = tok != ROFF_while ? ROFF_IGN :
2324 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2325 if (roffnode_cleanscope(r))
2326 irc |= endloop;
2327
2328 /*
2329 * If "\}" occurs on a macro line without a preceding macro or
2330 * a text line contains nothing else, drop the line completely.
2331 */
2332
2333 ep = buf->buf + pos;
2334 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2335 rr = 0;
2336
2337 /*
2338 * The closing delimiter "\}" rewinds the conditional scope
2339 * but is otherwise ignored when interpreting the line.
2340 */
2341
2342 while ((ep = strchr(ep, '\\')) != NULL) {
2343 switch (ep[1]) {
2344 case '}':
2345 if (ep[2] == '\0')
2346 ep[0] = '\0';
2347 else if (rr)
2348 ep[1] = '&';
2349 else
2350 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2351 if (roff_ccond(r, ln, ep - buf->buf))
2352 irc |= endloop;
2353 break;
2354 case '\0':
2355 ++ep;
2356 break;
2357 default:
2358 ep += 2;
2359 break;
2360 }
2361 }
2362 *offs = rr;
2363 return irc;
2364 }
2365
2366 /*
2367 * Parse and process a request or macro line in conditional scope.
2368 */
2369 static int
roff_cond_sub(ROFF_ARGS)2370 roff_cond_sub(ROFF_ARGS)
2371 {
2372 struct roffnode *bl;
2373 int irc, rr, spos;
2374 enum roff_tok t;
2375
2376 rr = 0; /* If arguments follow "\}", skip them. */
2377 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2378 spos = pos;
2379 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2380
2381 /*
2382 * Handle requests and macros if the conditional evaluated
2383 * to true or if they are structurally required.
2384 * The .break request is always handled specially.
2385 */
2386
2387 if (t == ROFF_break) {
2388 if (irc & ROFF_LOOPMASK)
2389 irc = ROFF_IGN | ROFF_LOOPEXIT;
2390 else if (rr) {
2391 for (bl = r->last; bl != NULL; bl = bl->parent) {
2392 bl->rule = 0;
2393 if (bl->tok == ROFF_while)
2394 break;
2395 }
2396 }
2397 } else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2398 irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2399 if (irc & ROFF_WHILE)
2400 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2401 }
2402 return irc;
2403 }
2404
2405 /*
2406 * Parse and process a text line in conditional scope.
2407 */
2408 static int
roff_cond_text(ROFF_ARGS)2409 roff_cond_text(ROFF_ARGS)
2410 {
2411 int irc, rr;
2412
2413 rr = 1; /* If arguments follow "\}", preserve them. */
2414 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2415 if (rr)
2416 irc |= ROFF_CONT;
2417 return irc;
2418 }
2419
2420 /* --- handling of numeric and conditional expressions -------------------- */
2421
2422 /*
2423 * Parse a single signed decimal number. Stop at the first non-digit.
2424 * If there is at least one digit, return success and advance the
2425 * parse point, else return failure and let the parse point unchanged.
2426 * Ignore overflows, treat them just like the C language.
2427 */
2428 static int
roff_getnum(const char * v,int * pos,int * res,char unit,int skipspace)2429 roff_getnum(const char *v, int *pos, int *res, char unit, int skipspace)
2430 {
2431 double frac, myres;
2432 int n, p;
2433
2434 p = *pos;
2435 n = v[p] == '-';
2436 if (n || v[p] == '+')
2437 p++;
2438
2439 if (skipspace)
2440 while (isspace((unsigned char)v[p]))
2441 p++;
2442
2443 for (myres = 0.0; isdigit((unsigned char)v[p]); p++)
2444 myres = myres * 10.0 + (v[p] - '0');
2445 if (v[p] == '.')
2446 for (frac = 0.1; isdigit((unsigned char)v[++p]); frac *= 0.1)
2447 myres += frac * (v[p] - '0');
2448
2449 if (p == *pos + n)
2450 return 0;
2451
2452 if (n)
2453 myres *= -1.0;
2454
2455 /* Each number may be followed by one optional scaling unit. */
2456
2457 if (v[p] != '\0' && strchr("ficvPmnpuM", v[p]) != NULL) {
2458 if (unit != '\0')
2459 unit = v[p];
2460 p++;
2461 }
2462
2463 switch (unit) {
2464 case 'f':
2465 myres *= 65536.0;
2466 break;
2467 case 'i':
2468 myres *= 240.0;
2469 break;
2470 case 'c':
2471 myres *= 240.0 / 2.54;
2472 break;
2473 case 'v':
2474 case 'P':
2475 myres *= 40.0;
2476 break;
2477 case 'm':
2478 case 'n':
2479 myres *= 24.0;
2480 break;
2481 case 'p':
2482 myres *= 40.0 / 12.0;
2483 break;
2484 case 'u':
2485 break;
2486 case 'M':
2487 myres *= 24.0 / 100.0;
2488 break;
2489 default:
2490 break;
2491 }
2492 if (res != NULL)
2493 *res = myres;
2494 *pos = p;
2495 return 1;
2496 }
2497
2498 /*
2499 * Evaluate a string comparison condition.
2500 * The first character is the delimiter.
2501 * Succeed if the string up to its second occurrence
2502 * matches the string up to its third occurrence.
2503 * Advance the cursor after the third occurrence
2504 * or lacking that, to the end of the line.
2505 */
2506 static int
roff_evalstrcond(const char * v,int * pos)2507 roff_evalstrcond(const char *v, int *pos)
2508 {
2509 const char *s1, *s2, *s3;
2510 int match;
2511
2512 match = 0;
2513 s1 = v + *pos; /* initial delimiter */
2514 s2 = s1 + 1; /* for scanning the first string */
2515 s3 = strchr(s2, *s1); /* for scanning the second string */
2516
2517 if (NULL == s3) /* found no middle delimiter */
2518 goto out;
2519
2520 while ('\0' != *++s3) {
2521 if (*s2 != *s3) { /* mismatch */
2522 s3 = strchr(s3, *s1);
2523 break;
2524 }
2525 if (*s3 == *s1) { /* found the final delimiter */
2526 match = 1;
2527 break;
2528 }
2529 s2++;
2530 }
2531
2532 out:
2533 if (NULL == s3)
2534 s3 = strchr(s2, '\0');
2535 else if (*s3 != '\0')
2536 s3++;
2537 *pos = s3 - v;
2538 return match;
2539 }
2540
2541 /*
2542 * Evaluate an optionally negated single character, numerical,
2543 * or string condition.
2544 */
2545 static int
roff_evalcond(struct roff * r,int ln,char * v,int * pos)2546 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2547 {
2548 const char *start, *end;
2549 char *cp, *name;
2550 size_t sz;
2551 int deftype, len, number, savepos, istrue, wanttrue;
2552
2553 if ('!' == v[*pos]) {
2554 wanttrue = 0;
2555 (*pos)++;
2556 } else
2557 wanttrue = 1;
2558
2559 switch (v[*pos]) {
2560 case '\0':
2561 return 0;
2562 case 'n':
2563 case 'o':
2564 (*pos)++;
2565 return wanttrue;
2566 case 'e':
2567 case 't':
2568 case 'v':
2569 (*pos)++;
2570 return !wanttrue;
2571 case 'c':
2572 do {
2573 (*pos)++;
2574 } while (v[*pos] == ' ');
2575
2576 /*
2577 * Quirk for groff compatibility:
2578 * The horizontal tab is neither available nor unavailable.
2579 */
2580
2581 if (v[*pos] == '\t') {
2582 (*pos)++;
2583 return 0;
2584 }
2585
2586 /* Printable ASCII characters are available. */
2587
2588 if (v[*pos] != '\\') {
2589 (*pos)++;
2590 return wanttrue;
2591 }
2592
2593 end = v + ++*pos;
2594 switch (mandoc_escape(&end, &start, &len)) {
2595 case ESCAPE_SPECIAL:
2596 istrue = mchars_spec2cp(start, len) != -1;
2597 break;
2598 case ESCAPE_UNICODE:
2599 istrue = 1;
2600 break;
2601 case ESCAPE_NUMBERED:
2602 istrue = mchars_num2char(start, len) != -1;
2603 break;
2604 default:
2605 istrue = !wanttrue;
2606 break;
2607 }
2608 *pos = end - v;
2609 return istrue == wanttrue;
2610 case 'd':
2611 case 'r':
2612 cp = v + *pos + 1;
2613 while (*cp == ' ')
2614 cp++;
2615 name = cp;
2616 sz = roff_getname(&cp, ln, cp - v);
2617 if (sz == 0)
2618 istrue = 0;
2619 else if (v[*pos] == 'r')
2620 istrue = roff_hasregn(r, name, sz);
2621 else {
2622 deftype = ROFFDEF_ANY;
2623 roff_getstrn(r, name, sz, &deftype);
2624 istrue = !!deftype;
2625 }
2626 *pos = (name + sz) - v;
2627 return istrue == wanttrue;
2628 default:
2629 break;
2630 }
2631
2632 savepos = *pos;
2633 if (roff_evalnum(ln, v, pos, &number, 'u', 0))
2634 return (number > 0) == wanttrue;
2635 else if (*pos == savepos)
2636 return roff_evalstrcond(v, pos) == wanttrue;
2637 else
2638 return 0;
2639 }
2640
2641 static int
roff_line_ignore(ROFF_ARGS)2642 roff_line_ignore(ROFF_ARGS)
2643 {
2644
2645 return ROFF_IGN;
2646 }
2647
2648 static int
roff_insec(ROFF_ARGS)2649 roff_insec(ROFF_ARGS)
2650 {
2651
2652 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2653 return ROFF_IGN;
2654 }
2655
2656 static int
roff_unsupp(ROFF_ARGS)2657 roff_unsupp(ROFF_ARGS)
2658 {
2659
2660 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2661 return ROFF_IGN;
2662 }
2663
2664 static int
roff_cond(ROFF_ARGS)2665 roff_cond(ROFF_ARGS)
2666 {
2667 int irc;
2668
2669 roffnode_push(r, tok, NULL, ln, ppos);
2670
2671 /*
2672 * An `.el' has no conditional body: it will consume the value
2673 * of the current rstack entry set in prior `ie' calls or
2674 * defaults to DENY.
2675 *
2676 * If we're not an `el', however, then evaluate the conditional.
2677 */
2678
2679 r->last->rule = tok == ROFF_el ?
2680 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2681 roff_evalcond(r, ln, buf->buf, &pos);
2682
2683 /*
2684 * An if-else will put the NEGATION of the current evaluated
2685 * conditional into the stack of rules.
2686 */
2687
2688 if (tok == ROFF_ie) {
2689 if (r->rstackpos + 1 == r->rstacksz) {
2690 r->rstacksz += 16;
2691 r->rstack = mandoc_reallocarray(r->rstack,
2692 r->rstacksz, sizeof(int));
2693 }
2694 r->rstack[++r->rstackpos] = !r->last->rule;
2695 }
2696
2697 /* If the parent has false as its rule, then so do we. */
2698
2699 if (r->last->parent && !r->last->parent->rule)
2700 r->last->rule = 0;
2701
2702 /*
2703 * Determine scope.
2704 * If there is nothing on the line after the conditional,
2705 * not even whitespace, use next-line scope.
2706 * Except that .while does not support next-line scope.
2707 */
2708
2709 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2710 r->last->endspan = 2;
2711 goto out;
2712 }
2713
2714 while (buf->buf[pos] == ' ')
2715 pos++;
2716
2717 /* An opening brace requests multiline scope. */
2718
2719 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2720 r->last->endspan = -1;
2721 pos += 2;
2722 while (buf->buf[pos] == ' ')
2723 pos++;
2724 goto out;
2725 }
2726
2727 /*
2728 * Anything else following the conditional causes
2729 * single-line scope. Warn if the scope contains
2730 * nothing but trailing whitespace.
2731 */
2732
2733 if (buf->buf[pos] == '\0')
2734 mandoc_msg(MANDOCERR_COND_EMPTY,
2735 ln, ppos, "%s", roff_name[tok]);
2736
2737 r->last->endspan = 1;
2738
2739 out:
2740 *offs = pos;
2741 irc = ROFF_RERUN;
2742 if (tok == ROFF_while)
2743 irc |= ROFF_WHILE;
2744 return irc;
2745 }
2746
2747 static int
roff_ds(ROFF_ARGS)2748 roff_ds(ROFF_ARGS)
2749 {
2750 char *string;
2751 const char *name;
2752 size_t namesz;
2753
2754 /* Ignore groff compatibility mode for now. */
2755
2756 if (tok == ROFF_ds1)
2757 tok = ROFF_ds;
2758 else if (tok == ROFF_as1)
2759 tok = ROFF_as;
2760
2761 /*
2762 * The first word is the name of the string.
2763 * If it is empty or terminated by an escape sequence,
2764 * abort the `ds' request without defining anything.
2765 */
2766
2767 name = string = buf->buf + pos;
2768 if (*name == '\0')
2769 return ROFF_IGN;
2770
2771 namesz = roff_getname(&string, ln, pos);
2772 switch (name[namesz]) {
2773 case '\\':
2774 return ROFF_IGN;
2775 case '\t':
2776 string = buf->buf + pos + namesz;
2777 break;
2778 default:
2779 break;
2780 }
2781
2782 /* Read past the initial double-quote, if any. */
2783 if (*string == '"')
2784 string++;
2785
2786 /* The rest is the value. */
2787 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2788 ROFF_as == tok);
2789 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2790 return ROFF_IGN;
2791 }
2792
2793 /*
2794 * Parse a single operator, one or two characters long.
2795 * If the operator is recognized, return success and advance the
2796 * parse point, else return failure and let the parse point unchanged.
2797 */
2798 static int
roff_getop(const char * v,int * pos,char * res)2799 roff_getop(const char *v, int *pos, char *res)
2800 {
2801
2802 *res = v[*pos];
2803
2804 switch (*res) {
2805 case '+':
2806 case '-':
2807 case '*':
2808 case '/':
2809 case '%':
2810 case '&':
2811 case ':':
2812 break;
2813 case '<':
2814 switch (v[*pos + 1]) {
2815 case '=':
2816 *res = 'l';
2817 (*pos)++;
2818 break;
2819 case '>':
2820 *res = '!';
2821 (*pos)++;
2822 break;
2823 case '?':
2824 *res = 'i';
2825 (*pos)++;
2826 break;
2827 default:
2828 break;
2829 }
2830 break;
2831 case '>':
2832 switch (v[*pos + 1]) {
2833 case '=':
2834 *res = 'g';
2835 (*pos)++;
2836 break;
2837 case '?':
2838 *res = 'a';
2839 (*pos)++;
2840 break;
2841 default:
2842 break;
2843 }
2844 break;
2845 case '=':
2846 if ('=' == v[*pos + 1])
2847 (*pos)++;
2848 break;
2849 default:
2850 return 0;
2851 }
2852 (*pos)++;
2853
2854 return *res;
2855 }
2856
2857 /*
2858 * Evaluate either a parenthesized numeric expression
2859 * or a single signed integer number.
2860 */
2861 static int
roff_evalpar(int ln,const char * v,int * pos,int * res,char unit,int skipspace)2862 roff_evalpar(int ln, const char *v, int *pos, int *res, char unit,
2863 int skipspace)
2864 {
2865
2866 if ('(' != v[*pos])
2867 return roff_getnum(v, pos, res, unit, skipspace);
2868
2869 (*pos)++;
2870 if ( ! roff_evalnum(ln, v, pos, res, unit, 1))
2871 return 0;
2872
2873 /*
2874 * Omission of the closing parenthesis
2875 * is an error in validation mode,
2876 * but ignored in evaluation mode.
2877 */
2878
2879 if (')' == v[*pos])
2880 (*pos)++;
2881 else if (NULL == res)
2882 return 0;
2883
2884 return 1;
2885 }
2886
2887 /*
2888 * Evaluate a complete numeric expression.
2889 * Proceed left to right, there is no concept of precedence.
2890 */
2891 int
roff_evalnum(int ln,const char * v,int * pos,int * res,char unit,int skipspace)2892 roff_evalnum(int ln, const char *v, int *pos, int *res, char unit,
2893 int skipspace)
2894 {
2895 int mypos, operand2;
2896 char operator;
2897
2898 if (NULL == pos) {
2899 mypos = 0;
2900 pos = &mypos;
2901 }
2902
2903 if (skipspace)
2904 while (isspace((unsigned char)v[*pos]))
2905 (*pos)++;
2906
2907 if ( ! roff_evalpar(ln, v, pos, res, unit, skipspace))
2908 return 0;
2909
2910 while (1) {
2911 if (skipspace)
2912 while (isspace((unsigned char)v[*pos]))
2913 (*pos)++;
2914
2915 if ( ! roff_getop(v, pos, &operator))
2916 break;
2917
2918 if (skipspace)
2919 while (isspace((unsigned char)v[*pos]))
2920 (*pos)++;
2921
2922 if ( ! roff_evalpar(ln, v, pos, &operand2, unit, skipspace))
2923 return 0;
2924
2925 if (skipspace)
2926 while (isspace((unsigned char)v[*pos]))
2927 (*pos)++;
2928
2929 if (NULL == res)
2930 continue;
2931
2932 switch (operator) {
2933 case '+':
2934 *res += operand2;
2935 break;
2936 case '-':
2937 *res -= operand2;
2938 break;
2939 case '*':
2940 *res *= operand2;
2941 break;
2942 case '/':
2943 if (operand2 == 0) {
2944 mandoc_msg(MANDOCERR_DIVZERO,
2945 ln, *pos, "%s", v);
2946 *res = 0;
2947 break;
2948 }
2949 *res /= operand2;
2950 break;
2951 case '%':
2952 if (operand2 == 0) {
2953 mandoc_msg(MANDOCERR_DIVZERO,
2954 ln, *pos, "%s", v);
2955 *res = 0;
2956 break;
2957 }
2958 *res %= operand2;
2959 break;
2960 case '<':
2961 *res = *res < operand2;
2962 break;
2963 case '>':
2964 *res = *res > operand2;
2965 break;
2966 case 'l':
2967 *res = *res <= operand2;
2968 break;
2969 case 'g':
2970 *res = *res >= operand2;
2971 break;
2972 case '=':
2973 *res = *res == operand2;
2974 break;
2975 case '!':
2976 *res = *res != operand2;
2977 break;
2978 case '&':
2979 *res = *res && operand2;
2980 break;
2981 case ':':
2982 *res = *res || operand2;
2983 break;
2984 case 'i':
2985 if (operand2 < *res)
2986 *res = operand2;
2987 break;
2988 case 'a':
2989 if (operand2 > *res)
2990 *res = operand2;
2991 break;
2992 default:
2993 abort();
2994 }
2995 }
2996 return 1;
2997 }
2998
2999 /* --- register management ------------------------------------------------ */
3000
3001 void
roff_setreg(struct roff * r,const char * name,int val,char sign)3002 roff_setreg(struct roff *r, const char *name, int val, char sign)
3003 {
3004 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3005 }
3006
3007 static void
roff_setregn(struct roff * r,const char * name,size_t len,int val,char sign,int step)3008 roff_setregn(struct roff *r, const char *name, size_t len,
3009 int val, char sign, int step)
3010 {
3011 struct roffreg *reg;
3012
3013 /* Search for an existing register with the same name. */
3014 reg = r->regtab;
3015
3016 while (reg != NULL && (reg->key.sz != len ||
3017 strncmp(reg->key.p, name, len) != 0))
3018 reg = reg->next;
3019
3020 if (NULL == reg) {
3021 /* Create a new register. */
3022 reg = mandoc_malloc(sizeof(struct roffreg));
3023 reg->key.p = mandoc_strndup(name, len);
3024 reg->key.sz = len;
3025 reg->val = 0;
3026 reg->step = 0;
3027 reg->next = r->regtab;
3028 r->regtab = reg;
3029 }
3030
3031 if ('+' == sign)
3032 reg->val += val;
3033 else if ('-' == sign)
3034 reg->val -= val;
3035 else
3036 reg->val = val;
3037 if (step != INT_MIN)
3038 reg->step = step;
3039 }
3040
3041 /*
3042 * Handle some predefined read-only number registers.
3043 * For now, return -1 if the requested register is not predefined;
3044 * in case a predefined read-only register having the value -1
3045 * were to turn up, another special value would have to be chosen.
3046 */
3047 static int
roff_getregro(const struct roff * r,const char * name)3048 roff_getregro(const struct roff *r, const char *name)
3049 {
3050
3051 switch (*name) {
3052 case '$': /* Number of arguments of the last macro evaluated. */
3053 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3054 case 'A': /* ASCII approximation mode is always off. */
3055 return 0;
3056 case 'g': /* Groff compatibility mode is always on. */
3057 return 1;
3058 case 'H': /* Fixed horizontal resolution. */
3059 return 24;
3060 case 'j': /* Always adjust left margin only. */
3061 return 0;
3062 case 'l': /* Fixed line width for DocBook. */
3063 return 78 * 24;
3064 case 'T': /* Some output device is always defined. */
3065 return 1;
3066 case 'V': /* Fixed vertical resolution. */
3067 return 40;
3068 default:
3069 return -1;
3070 }
3071 }
3072
3073 int
roff_getreg(struct roff * r,const char * name)3074 roff_getreg(struct roff *r, const char *name)
3075 {
3076 return roff_getregn(r, name, strlen(name), '\0');
3077 }
3078
3079 static int
roff_getregn(struct roff * r,const char * name,size_t len,char sign)3080 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3081 {
3082 struct roffreg *reg;
3083 int val;
3084
3085 if ('.' == name[0] && 2 == len) {
3086 val = roff_getregro(r, name + 1);
3087 if (-1 != val)
3088 return val;
3089 }
3090
3091 for (reg = r->regtab; reg; reg = reg->next) {
3092 if (len == reg->key.sz &&
3093 0 == strncmp(name, reg->key.p, len)) {
3094 switch (sign) {
3095 case '+':
3096 reg->val += reg->step;
3097 break;
3098 case '-':
3099 reg->val -= reg->step;
3100 break;
3101 default:
3102 break;
3103 }
3104 return reg->val;
3105 }
3106 }
3107
3108 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3109 return 0;
3110 }
3111
3112 static int
roff_hasregn(const struct roff * r,const char * name,size_t len)3113 roff_hasregn(const struct roff *r, const char *name, size_t len)
3114 {
3115 struct roffreg *reg;
3116 int val;
3117
3118 if ('.' == name[0] && 2 == len) {
3119 val = roff_getregro(r, name + 1);
3120 if (-1 != val)
3121 return 1;
3122 }
3123
3124 for (reg = r->regtab; reg; reg = reg->next)
3125 if (len == reg->key.sz &&
3126 0 == strncmp(name, reg->key.p, len))
3127 return 1;
3128
3129 return 0;
3130 }
3131
3132 static void
roff_freereg(struct roffreg * reg)3133 roff_freereg(struct roffreg *reg)
3134 {
3135 struct roffreg *old_reg;
3136
3137 while (NULL != reg) {
3138 free(reg->key.p);
3139 old_reg = reg;
3140 reg = reg->next;
3141 free(old_reg);
3142 }
3143 }
3144
3145 static int
roff_nr(ROFF_ARGS)3146 roff_nr(ROFF_ARGS)
3147 {
3148 char *key, *val, *step;
3149 size_t keysz;
3150 int iv, is, len;
3151 char sign;
3152
3153 key = val = buf->buf + pos;
3154 if (*key == '\0')
3155 return ROFF_IGN;
3156
3157 keysz = roff_getname(&val, ln, pos);
3158 if (key[keysz] == '\\' || key[keysz] == '\t')
3159 return ROFF_IGN;
3160
3161 sign = *val;
3162 if (sign == '+' || sign == '-')
3163 val++;
3164
3165 len = 0;
3166 if (roff_evalnum(ln, val, &len, &iv, 'u', 0) == 0)
3167 return ROFF_IGN;
3168
3169 step = val + len;
3170 while (isspace((unsigned char)*step))
3171 step++;
3172 if (roff_evalnum(ln, step, NULL, &is, '\0', 0) == 0)
3173 is = INT_MIN;
3174
3175 roff_setregn(r, key, keysz, iv, sign, is);
3176 return ROFF_IGN;
3177 }
3178
3179 static int
roff_rr(ROFF_ARGS)3180 roff_rr(ROFF_ARGS)
3181 {
3182 struct roffreg *reg, **prev;
3183 char *name, *cp;
3184 size_t namesz;
3185
3186 name = cp = buf->buf + pos;
3187 if (*name == '\0')
3188 return ROFF_IGN;
3189 namesz = roff_getname(&cp, ln, pos);
3190 name[namesz] = '\0';
3191
3192 prev = &r->regtab;
3193 while (1) {
3194 reg = *prev;
3195 if (reg == NULL || !strcmp(name, reg->key.p))
3196 break;
3197 prev = ®->next;
3198 }
3199 if (reg != NULL) {
3200 *prev = reg->next;
3201 free(reg->key.p);
3202 free(reg);
3203 }
3204 return ROFF_IGN;
3205 }
3206
3207 /* --- handler functions for roff requests -------------------------------- */
3208
3209 static int
roff_rm(ROFF_ARGS)3210 roff_rm(ROFF_ARGS)
3211 {
3212 const char *name;
3213 char *cp;
3214 size_t namesz;
3215
3216 cp = buf->buf + pos;
3217 while (*cp != '\0') {
3218 name = cp;
3219 namesz = roff_getname(&cp, ln, (int)(cp - buf->buf));
3220 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3221 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3222 if (name[namesz] == '\\' || name[namesz] == '\t')
3223 break;
3224 }
3225 return ROFF_IGN;
3226 }
3227
3228 static int
roff_it(ROFF_ARGS)3229 roff_it(ROFF_ARGS)
3230 {
3231 int iv;
3232
3233 /* Parse the number of lines. */
3234
3235 if ( ! roff_evalnum(ln, buf->buf, &pos, &iv, '\0', 0)) {
3236 mandoc_msg(MANDOCERR_IT_NONUM,
3237 ln, ppos, "%s", buf->buf + 1);
3238 return ROFF_IGN;
3239 }
3240
3241 while (isspace((unsigned char)buf->buf[pos]))
3242 pos++;
3243
3244 /*
3245 * Arm the input line trap.
3246 * Special-casing "an-trap" is an ugly workaround to cope
3247 * with DocBook stupidly fiddling with man(7) internals.
3248 */
3249
3250 roffit_lines = iv;
3251 roffit_macro = mandoc_strdup(iv != 1 ||
3252 strcmp(buf->buf + pos, "an-trap") ?
3253 buf->buf + pos : "br");
3254 return ROFF_IGN;
3255 }
3256
3257 static int
roff_Dd(ROFF_ARGS)3258 roff_Dd(ROFF_ARGS)
3259 {
3260 int mask;
3261 enum roff_tok t, te;
3262
3263 switch (tok) {
3264 case ROFF_Dd:
3265 tok = MDOC_Dd;
3266 te = MDOC_MAX;
3267 if (r->format == 0)
3268 r->format = MPARSE_MDOC;
3269 mask = MPARSE_MDOC | MPARSE_QUICK;
3270 break;
3271 case ROFF_TH:
3272 tok = MAN_TH;
3273 te = MAN_MAX;
3274 if (r->format == 0)
3275 r->format = MPARSE_MAN;
3276 mask = MPARSE_QUICK;
3277 break;
3278 default:
3279 abort();
3280 }
3281 if ((r->options & mask) == 0)
3282 for (t = tok; t < te; t++)
3283 roff_setstr(r, roff_name[t], NULL, 0);
3284 return ROFF_CONT;
3285 }
3286
3287 static int
roff_TE(ROFF_ARGS)3288 roff_TE(ROFF_ARGS)
3289 {
3290 r->man->flags &= ~ROFF_NONOFILL;
3291 if (r->tbl == NULL) {
3292 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3293 return ROFF_IGN;
3294 }
3295 if (tbl_end(r->tbl, 0) == 0) {
3296 r->tbl = NULL;
3297 free(buf->buf);
3298 buf->buf = mandoc_strdup(".sp");
3299 buf->sz = 4;
3300 *offs = 0;
3301 return ROFF_REPARSE;
3302 }
3303 r->tbl = NULL;
3304 return ROFF_IGN;
3305 }
3306
3307 static int
roff_T_(ROFF_ARGS)3308 roff_T_(ROFF_ARGS)
3309 {
3310
3311 if (NULL == r->tbl)
3312 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3313 else
3314 tbl_restart(ln, ppos, r->tbl);
3315
3316 return ROFF_IGN;
3317 }
3318
3319 /*
3320 * Handle in-line equation delimiters.
3321 */
3322 static int
roff_eqndelim(struct roff * r,struct buf * buf,int pos)3323 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3324 {
3325 char *cp1, *cp2;
3326 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3327
3328 /*
3329 * Outside equations, look for an opening delimiter.
3330 * If we are inside an equation, we already know it is
3331 * in-line, or this function wouldn't have been called;
3332 * so look for a closing delimiter.
3333 */
3334
3335 cp1 = buf->buf + pos;
3336 cp2 = strchr(cp1, r->eqn == NULL ?
3337 r->last_eqn->odelim : r->last_eqn->cdelim);
3338 if (cp2 == NULL)
3339 return ROFF_CONT;
3340
3341 *cp2++ = '\0';
3342 bef_pr = bef_nl = aft_nl = aft_pr = "";
3343
3344 /* Handle preceding text, protecting whitespace. */
3345
3346 if (*buf->buf != '\0') {
3347 if (r->eqn == NULL)
3348 bef_pr = "\\&";
3349 bef_nl = "\n";
3350 }
3351
3352 /*
3353 * Prepare replacing the delimiter with an equation macro
3354 * and drop leading white space from the equation.
3355 */
3356
3357 if (r->eqn == NULL) {
3358 while (*cp2 == ' ')
3359 cp2++;
3360 mac = ".EQ";
3361 } else
3362 mac = ".EN";
3363
3364 /* Handle following text, protecting whitespace. */
3365
3366 if (*cp2 != '\0') {
3367 aft_nl = "\n";
3368 if (r->eqn != NULL)
3369 aft_pr = "\\&";
3370 }
3371
3372 /* Do the actual replacement. */
3373
3374 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3375 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3376 free(buf->buf);
3377 buf->buf = cp1;
3378
3379 /* Toggle the in-line state of the eqn subsystem. */
3380
3381 r->eqn_inline = r->eqn == NULL;
3382 return ROFF_REPARSE;
3383 }
3384
3385 static int
roff_EQ(ROFF_ARGS)3386 roff_EQ(ROFF_ARGS)
3387 {
3388 struct roff_node *n;
3389
3390 if (r->man->meta.macroset == MACROSET_MAN)
3391 man_breakscope(r->man, ROFF_EQ);
3392 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3393 if (ln > r->man->last->line)
3394 n->flags |= NODE_LINE;
3395 n->eqn = eqn_box_new();
3396 roff_node_append(r->man, n);
3397 r->man->next = ROFF_NEXT_SIBLING;
3398
3399 assert(r->eqn == NULL);
3400 if (r->last_eqn == NULL)
3401 r->last_eqn = eqn_alloc();
3402 else
3403 eqn_reset(r->last_eqn);
3404 r->eqn = r->last_eqn;
3405 r->eqn->node = n;
3406
3407 if (buf->buf[pos] != '\0')
3408 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3409 ".EQ %s", buf->buf + pos);
3410
3411 return ROFF_IGN;
3412 }
3413
3414 static int
roff_EN(ROFF_ARGS)3415 roff_EN(ROFF_ARGS)
3416 {
3417 if (r->eqn != NULL) {
3418 eqn_parse(r->eqn);
3419 r->eqn = NULL;
3420 } else
3421 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3422 if (buf->buf[pos] != '\0')
3423 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3424 "EN %s", buf->buf + pos);
3425 return ROFF_IGN;
3426 }
3427
3428 static int
roff_TS(ROFF_ARGS)3429 roff_TS(ROFF_ARGS)
3430 {
3431 if (r->tbl != NULL) {
3432 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3433 tbl_end(r->tbl, 0);
3434 }
3435 r->man->flags |= ROFF_NONOFILL;
3436 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3437 if (r->last_tbl == NULL)
3438 r->first_tbl = r->tbl;
3439 r->last_tbl = r->tbl;
3440 return ROFF_IGN;
3441 }
3442
3443 static int
roff_noarg(ROFF_ARGS)3444 roff_noarg(ROFF_ARGS)
3445 {
3446 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3447 man_breakscope(r->man, tok);
3448 if (tok == ROFF_brp)
3449 tok = ROFF_br;
3450 roff_elem_alloc(r->man, ln, ppos, tok);
3451 if (buf->buf[pos] != '\0')
3452 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3453 "%s %s", roff_name[tok], buf->buf + pos);
3454 if (tok == ROFF_nf)
3455 r->man->flags |= ROFF_NOFILL;
3456 else if (tok == ROFF_fi)
3457 r->man->flags &= ~ROFF_NOFILL;
3458 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3459 r->man->next = ROFF_NEXT_SIBLING;
3460 return ROFF_IGN;
3461 }
3462
3463 static int
roff_onearg(ROFF_ARGS)3464 roff_onearg(ROFF_ARGS)
3465 {
3466 struct roff_node *n;
3467 char *cp;
3468 int npos;
3469
3470 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3471 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3472 tok == ROFF_ti))
3473 man_breakscope(r->man, tok);
3474
3475 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3476 r->man->last = roffce_node;
3477 r->man->next = ROFF_NEXT_SIBLING;
3478 }
3479
3480 roff_elem_alloc(r->man, ln, ppos, tok);
3481 n = r->man->last;
3482
3483 cp = buf->buf + pos;
3484 if (*cp != '\0') {
3485 while (*cp != '\0' && *cp != ' ')
3486 cp++;
3487 while (*cp == ' ')
3488 *cp++ = '\0';
3489 if (*cp != '\0')
3490 mandoc_msg(MANDOCERR_ARG_EXCESS,
3491 ln, (int)(cp - buf->buf),
3492 "%s ... %s", roff_name[tok], cp);
3493 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3494 }
3495
3496 if (tok == ROFF_ce || tok == ROFF_rj) {
3497 if (r->man->last->type == ROFFT_ELEM) {
3498 roff_word_alloc(r->man, ln, pos, "1");
3499 r->man->last->flags |= NODE_NOSRC;
3500 }
3501 npos = 0;
3502 if (roff_evalnum(ln, r->man->last->string, &npos,
3503 &roffce_lines, '\0', 0) == 0) {
3504 mandoc_msg(MANDOCERR_CE_NONUM,
3505 ln, pos, "ce %s", buf->buf + pos);
3506 roffce_lines = 1;
3507 }
3508 if (roffce_lines < 1) {
3509 r->man->last = r->man->last->parent;
3510 roffce_node = NULL;
3511 roffce_lines = 0;
3512 } else
3513 roffce_node = r->man->last->parent;
3514 } else {
3515 n->flags |= NODE_VALID | NODE_ENDED;
3516 r->man->last = n;
3517 }
3518 n->flags |= NODE_LINE;
3519 r->man->next = ROFF_NEXT_SIBLING;
3520 return ROFF_IGN;
3521 }
3522
3523 static int
roff_manyarg(ROFF_ARGS)3524 roff_manyarg(ROFF_ARGS)
3525 {
3526 struct roff_node *n;
3527 char *sp, *ep;
3528
3529 roff_elem_alloc(r->man, ln, ppos, tok);
3530 n = r->man->last;
3531
3532 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3533 while (*ep != '\0' && *ep != ' ')
3534 ep++;
3535 while (*ep == ' ')
3536 *ep++ = '\0';
3537 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3538 }
3539
3540 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3541 r->man->last = n;
3542 r->man->next = ROFF_NEXT_SIBLING;
3543 return ROFF_IGN;
3544 }
3545
3546 static int
roff_als(ROFF_ARGS)3547 roff_als(ROFF_ARGS)
3548 {
3549 char *oldn, *newn, *end, *value;
3550 size_t oldsz, newsz, valsz;
3551
3552 newn = oldn = buf->buf + pos;
3553 if (*newn == '\0')
3554 return ROFF_IGN;
3555
3556 newsz = roff_getname(&oldn, ln, pos);
3557 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3558 return ROFF_IGN;
3559
3560 end = oldn;
3561 oldsz = roff_getname(&end, ln, oldn - buf->buf);
3562 if (oldsz == 0)
3563 return ROFF_IGN;
3564
3565 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3566 (int)oldsz, oldn);
3567 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3568 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3569 free(value);
3570 return ROFF_IGN;
3571 }
3572
3573 /*
3574 * The .break request only makes sense inside conditionals,
3575 * and that case is already handled in roff_cond_sub().
3576 */
3577 static int
roff_break(ROFF_ARGS)3578 roff_break(ROFF_ARGS)
3579 {
3580 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3581 return ROFF_IGN;
3582 }
3583
3584 static int
roff_cc(ROFF_ARGS)3585 roff_cc(ROFF_ARGS)
3586 {
3587 const char *p;
3588
3589 p = buf->buf + pos;
3590
3591 if (*p == '\0' || (r->control = *p++) == '.')
3592 r->control = '\0';
3593
3594 if (*p != '\0')
3595 mandoc_msg(MANDOCERR_ARG_EXCESS,
3596 ln, p - buf->buf, "cc ... %s", p);
3597
3598 return ROFF_IGN;
3599 }
3600
3601 static int
roff_char(ROFF_ARGS)3602 roff_char(ROFF_ARGS)
3603 {
3604 const char *p, *kp, *vp;
3605 size_t ksz, vsz;
3606 int font;
3607
3608 /* Parse the character to be replaced. */
3609
3610 kp = buf->buf + pos;
3611 p = kp + 1;
3612 if (*kp == '\0' || (*kp == '\\' &&
3613 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3614 (*p != ' ' && *p != '\0')) {
3615 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3616 return ROFF_IGN;
3617 }
3618 ksz = p - kp;
3619 while (*p == ' ')
3620 p++;
3621
3622 /*
3623 * If the replacement string contains a font escape sequence,
3624 * we have to restore the font at the end.
3625 */
3626
3627 vp = p;
3628 vsz = strlen(p);
3629 font = 0;
3630 while (*p != '\0') {
3631 if (*p++ != '\\')
3632 continue;
3633 switch (mandoc_escape(&p, NULL, NULL)) {
3634 case ESCAPE_FONT:
3635 case ESCAPE_FONTROMAN:
3636 case ESCAPE_FONTITALIC:
3637 case ESCAPE_FONTBOLD:
3638 case ESCAPE_FONTBI:
3639 case ESCAPE_FONTCR:
3640 case ESCAPE_FONTCB:
3641 case ESCAPE_FONTCI:
3642 case ESCAPE_FONTPREV:
3643 font++;
3644 break;
3645 default:
3646 break;
3647 }
3648 }
3649 if (font > 1)
3650 mandoc_msg(MANDOCERR_CHAR_FONT,
3651 ln, (int)(vp - buf->buf), "%s", vp);
3652
3653 /*
3654 * Approximate the effect of .char using the .tr tables.
3655 * XXX In groff, .char and .tr interact differently.
3656 */
3657
3658 if (ksz == 1) {
3659 if (r->xtab == NULL)
3660 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3661 assert((unsigned int)*kp < 128);
3662 free(r->xtab[(int)*kp].p);
3663 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3664 "%s%s", vp, font ? "\fP" : "");
3665 } else {
3666 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3667 if (font)
3668 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3669 }
3670 return ROFF_IGN;
3671 }
3672
3673 static int
roff_ec(ROFF_ARGS)3674 roff_ec(ROFF_ARGS)
3675 {
3676 const char *p;
3677
3678 p = buf->buf + pos;
3679 if (*p == '\0')
3680 r->escape = '\\';
3681 else {
3682 r->escape = *p;
3683 if (*++p != '\0')
3684 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3685 (int)(p - buf->buf), "ec ... %s", p);
3686 }
3687 return ROFF_IGN;
3688 }
3689
3690 static int
roff_eo(ROFF_ARGS)3691 roff_eo(ROFF_ARGS)
3692 {
3693 r->escape = '\0';
3694 if (buf->buf[pos] != '\0')
3695 mandoc_msg(MANDOCERR_ARG_SKIP,
3696 ln, pos, "eo %s", buf->buf + pos);
3697 return ROFF_IGN;
3698 }
3699
3700 static int
roff_mc(ROFF_ARGS)3701 roff_mc(ROFF_ARGS)
3702 {
3703 struct roff_node *n;
3704 char *cp;
3705
3706 /* Parse the first argument. */
3707
3708 cp = buf->buf + pos;
3709 if (*cp != '\0')
3710 cp++;
3711 if (buf->buf[pos] == '\\') {
3712 switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3713 case ESCAPE_SPECIAL:
3714 case ESCAPE_UNICODE:
3715 case ESCAPE_NUMBERED:
3716 break;
3717 default:
3718 *cp = '\0';
3719 mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3720 "mc %s", buf->buf + pos);
3721 buf->buf[pos] = '\0';
3722 break;
3723 }
3724 }
3725
3726 /* Ignore additional arguments. */
3727
3728 while (*cp == ' ')
3729 *cp++ = '\0';
3730 if (*cp != '\0') {
3731 mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3732 "mc ... %s", cp);
3733 *cp = '\0';
3734 }
3735
3736 /* Create the .mc node. */
3737
3738 roff_elem_alloc(r->man, ln, ppos, tok);
3739 n = r->man->last;
3740 if (buf->buf[pos] != '\0')
3741 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3742 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3743 r->man->last = n;
3744 r->man->next = ROFF_NEXT_SIBLING;
3745 return ROFF_IGN;
3746 }
3747
3748 static int
roff_nop(ROFF_ARGS)3749 roff_nop(ROFF_ARGS)
3750 {
3751 while (buf->buf[pos] == ' ')
3752 pos++;
3753 *offs = pos;
3754 return ROFF_RERUN;
3755 }
3756
3757 static int
roff_tr(ROFF_ARGS)3758 roff_tr(ROFF_ARGS)
3759 {
3760 const char *p, *first, *second;
3761 size_t fsz, ssz;
3762
3763 p = buf->buf + pos;
3764
3765 if (*p == '\0') {
3766 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3767 return ROFF_IGN;
3768 }
3769
3770 while (*p != '\0') {
3771 fsz = ssz = 1;
3772
3773 first = p++;
3774 if (*first == '\\') {
3775 if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3776 return ROFF_IGN;
3777 fsz = (size_t)(p - first);
3778 }
3779
3780 second = p++;
3781 if (*second == '\\') {
3782 if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3783 return ROFF_IGN;
3784 ssz = (size_t)(p - second);
3785 } else if (*second == '\0') {
3786 mandoc_msg(MANDOCERR_TR_ODD, ln,
3787 (int)(first - buf->buf), "tr %s", first);
3788 second = " ";
3789 p--;
3790 }
3791
3792 if (fsz > 1) {
3793 roff_setstrn(&r->xmbtab, first, fsz,
3794 second, ssz, 0);
3795 continue;
3796 }
3797
3798 if (r->xtab == NULL)
3799 r->xtab = mandoc_calloc(128,
3800 sizeof(struct roffstr));
3801
3802 free(r->xtab[(int)*first].p);
3803 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3804 r->xtab[(int)*first].sz = ssz;
3805 }
3806
3807 return ROFF_IGN;
3808 }
3809
3810 /*
3811 * Implementation of the .return request.
3812 * There is no need to call roff_userret() from here.
3813 * The read module will call that after rewinding the reader stack
3814 * to the place from where the current macro was called.
3815 */
3816 static int
roff_return(ROFF_ARGS)3817 roff_return(ROFF_ARGS)
3818 {
3819 if (r->mstackpos >= 0)
3820 return ROFF_IGN | ROFF_USERRET;
3821
3822 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3823 return ROFF_IGN;
3824 }
3825
3826 static int
roff_rn(ROFF_ARGS)3827 roff_rn(ROFF_ARGS)
3828 {
3829 const char *value;
3830 char *oldn, *newn, *end;
3831 size_t oldsz, newsz;
3832 int deftype;
3833
3834 oldn = newn = buf->buf + pos;
3835 if (*oldn == '\0')
3836 return ROFF_IGN;
3837
3838 oldsz = roff_getname(&newn, ln, pos);
3839 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3840 return ROFF_IGN;
3841
3842 end = newn;
3843 newsz = roff_getname(&end, ln, newn - buf->buf);
3844 if (newsz == 0)
3845 return ROFF_IGN;
3846
3847 deftype = ROFFDEF_ANY;
3848 value = roff_getstrn(r, oldn, oldsz, &deftype);
3849 switch (deftype) {
3850 case ROFFDEF_USER:
3851 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3852 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3853 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3854 break;
3855 case ROFFDEF_PRE:
3856 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3857 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3858 break;
3859 case ROFFDEF_REN:
3860 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3861 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3862 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3863 break;
3864 case ROFFDEF_STD:
3865 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3866 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3867 break;
3868 default:
3869 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3870 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3871 break;
3872 }
3873 return ROFF_IGN;
3874 }
3875
3876 static int
roff_shift(ROFF_ARGS)3877 roff_shift(ROFF_ARGS)
3878 {
3879 struct mctx *ctx;
3880 int argpos, levels, i;
3881
3882 argpos = pos;
3883 levels = 1;
3884 if (buf->buf[pos] != '\0' &&
3885 roff_evalnum(ln, buf->buf, &pos, &levels, '\0', 0) == 0) {
3886 mandoc_msg(MANDOCERR_CE_NONUM,
3887 ln, pos, "shift %s", buf->buf + pos);
3888 levels = 1;
3889 }
3890 if (r->mstackpos < 0) {
3891 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3892 return ROFF_IGN;
3893 }
3894 ctx = r->mstack + r->mstackpos;
3895 if (levels > ctx->argc) {
3896 mandoc_msg(MANDOCERR_SHIFT,
3897 ln, argpos, "%d, but max is %d", levels, ctx->argc);
3898 levels = ctx->argc;
3899 }
3900 if (levels < 0) {
3901 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3902 levels = 0;
3903 }
3904 if (levels == 0)
3905 return ROFF_IGN;
3906 for (i = 0; i < levels; i++)
3907 free(ctx->argv[i]);
3908 ctx->argc -= levels;
3909 for (i = 0; i < ctx->argc; i++)
3910 ctx->argv[i] = ctx->argv[i + levels];
3911 return ROFF_IGN;
3912 }
3913
3914 static int
roff_so(ROFF_ARGS)3915 roff_so(ROFF_ARGS)
3916 {
3917 char *name, *cp;
3918
3919 name = buf->buf + pos;
3920 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3921
3922 /*
3923 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3924 * opening anything that's not in our cwd or anything beneath
3925 * it. Thus, explicitly disallow traversing up the file-system
3926 * or using absolute paths.
3927 */
3928
3929 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3930 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3931 buf->sz = mandoc_asprintf(&cp,
3932 ".sp\nSee the file %s.\n.sp", name) + 1;
3933 free(buf->buf);
3934 buf->buf = cp;
3935 *offs = 0;
3936 return ROFF_REPARSE;
3937 }
3938
3939 *offs = pos;
3940 return ROFF_SO;
3941 }
3942
3943 /* --- user defined strings and macros ------------------------------------ */
3944
3945 static int
roff_userdef(ROFF_ARGS)3946 roff_userdef(ROFF_ARGS)
3947 {
3948 struct mctx *ctx;
3949 char *arg, *ap, *dst, *src;
3950 size_t sz;
3951
3952 /* If the macro is empty, ignore it altogether. */
3953
3954 if (*r->current_string == '\0')
3955 return ROFF_IGN;
3956
3957 /* Initialize a new macro stack context. */
3958
3959 if (++r->mstackpos == r->mstacksz) {
3960 r->mstack = mandoc_recallocarray(r->mstack,
3961 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3962 r->mstacksz += 8;
3963 }
3964 ctx = r->mstack + r->mstackpos;
3965 ctx->argc = 0;
3966
3967 /*
3968 * Collect pointers to macro argument strings,
3969 * NUL-terminating them and escaping quotes.
3970 */
3971
3972 src = buf->buf + pos;
3973 while (*src != '\0') {
3974 if (ctx->argc == ctx->argsz) {
3975 ctx->argsz += 8;
3976 ctx->argv = mandoc_reallocarray(ctx->argv,
3977 ctx->argsz, sizeof(*ctx->argv));
3978 }
3979 arg = roff_getarg(r, &src, ln, &pos);
3980 sz = 1; /* For the terminating NUL. */
3981 for (ap = arg; *ap != '\0'; ap++)
3982 sz += *ap == '"' ? 4 : 1;
3983 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3984 for (ap = arg; *ap != '\0'; ap++) {
3985 if (*ap == '"') {
3986 memcpy(dst, "\\(dq", 4);
3987 dst += 4;
3988 } else
3989 *dst++ = *ap;
3990 }
3991 *dst = '\0';
3992 free(arg);
3993 }
3994
3995 /* Replace the macro invocation by the macro definition. */
3996
3997 free(buf->buf);
3998 buf->buf = mandoc_strdup(r->current_string);
3999 buf->sz = strlen(buf->buf) + 1;
4000 *offs = 0;
4001
4002 return buf->buf[buf->sz - 2] == '\n' ?
4003 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4004 }
4005
4006 /*
4007 * Calling a high-level macro that was renamed with .rn.
4008 * r->current_string has already been set up by roff_parse().
4009 */
4010 static int
roff_renamed(ROFF_ARGS)4011 roff_renamed(ROFF_ARGS)
4012 {
4013 char *nbuf;
4014
4015 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4016 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4017 free(buf->buf);
4018 buf->buf = nbuf;
4019 *offs = 0;
4020 return ROFF_CONT;
4021 }
4022
4023 /*
4024 * Measure the length in bytes of the roff identifier at *cpp
4025 * and advance the pointer to the next word.
4026 */
4027 static size_t
roff_getname(char ** cpp,int ln,int pos)4028 roff_getname(char **cpp, int ln, int pos)
4029 {
4030 char *name, *cp;
4031 int namesz, inam, iend;
4032
4033 name = *cpp;
4034 if (*name == '\0')
4035 return 0;
4036
4037 /* Advance cp to the byte after the end of the name. */
4038
4039 cp = name;
4040 namesz = 0;
4041 for (;;) {
4042 if (*cp == '\0')
4043 break;
4044 if (*cp == ' ' || *cp == '\t') {
4045 cp++;
4046 break;
4047 }
4048 if (*cp != '\\') {
4049 if (name + namesz < cp) {
4050 name[namesz] = *cp;
4051 *cp = ' ';
4052 }
4053 namesz++;
4054 cp++;
4055 continue;
4056 }
4057 if (cp[1] == '{' || cp[1] == '}')
4058 break;
4059 if (roff_escape(cp, 0, 0, NULL, &inam,
4060 NULL, NULL, &iend) != ESCAPE_UNDEF) {
4061 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4062 "%.*s%.*s", namesz, name, iend, cp);
4063 cp += iend;
4064 break;
4065 }
4066
4067 /*
4068 * In an identifier, \\, \., \G and so on
4069 * are reduced to \, ., G and so on,
4070 * vaguely similar to copy mode.
4071 */
4072
4073 name[namesz++] = cp[inam];
4074 while (iend--) {
4075 if (cp >= name + namesz)
4076 *cp = ' ';
4077 cp++;
4078 }
4079 }
4080
4081 /* Read past spaces. */
4082
4083 while (*cp == ' ')
4084 cp++;
4085
4086 *cpp = cp;
4087 return namesz;
4088 }
4089
4090 /*
4091 * Store *string into the user-defined string called *name.
4092 * To clear an existing entry, call with (*r, *name, NULL, 0).
4093 * append == 0: replace mode
4094 * append == 1: single-line append mode
4095 * append == 2: multiline append mode, append '\n' after each call
4096 */
4097 static void
roff_setstr(struct roff * r,const char * name,const char * string,int append)4098 roff_setstr(struct roff *r, const char *name, const char *string,
4099 int append)
4100 {
4101 size_t namesz;
4102
4103 namesz = strlen(name);
4104 roff_setstrn(&r->strtab, name, namesz, string,
4105 string ? strlen(string) : 0, append);
4106 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4107 }
4108
4109 static void
roff_setstrn(struct roffkv ** r,const char * name,size_t namesz,const char * string,size_t stringsz,int append)4110 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4111 const char *string, size_t stringsz, int append)
4112 {
4113 struct roffkv *n;
4114 char *c;
4115 int i;
4116 size_t oldch, newch;
4117
4118 /* Search for an existing string with the same name. */
4119 n = *r;
4120
4121 while (n && (namesz != n->key.sz ||
4122 strncmp(n->key.p, name, namesz)))
4123 n = n->next;
4124
4125 if (NULL == n) {
4126 /* Create a new string table entry. */
4127 n = mandoc_malloc(sizeof(struct roffkv));
4128 n->key.p = mandoc_strndup(name, namesz);
4129 n->key.sz = namesz;
4130 n->val.p = NULL;
4131 n->val.sz = 0;
4132 n->next = *r;
4133 *r = n;
4134 } else if (0 == append) {
4135 free(n->val.p);
4136 n->val.p = NULL;
4137 n->val.sz = 0;
4138 }
4139
4140 if (NULL == string)
4141 return;
4142
4143 /*
4144 * One additional byte for the '\n' in multiline mode,
4145 * and one for the terminating '\0'.
4146 */
4147 newch = stringsz + (1 < append ? 2u : 1u);
4148
4149 if (NULL == n->val.p) {
4150 n->val.p = mandoc_malloc(newch);
4151 *n->val.p = '\0';
4152 oldch = 0;
4153 } else {
4154 oldch = n->val.sz;
4155 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4156 }
4157
4158 /* Skip existing content in the destination buffer. */
4159 c = n->val.p + (int)oldch;
4160
4161 /* Append new content to the destination buffer. */
4162 i = 0;
4163 while (i < (int)stringsz) {
4164 /*
4165 * Rudimentary roff copy mode:
4166 * Handle escaped backslashes.
4167 */
4168 if ('\\' == string[i] && '\\' == string[i + 1])
4169 i++;
4170 *c++ = string[i++];
4171 }
4172
4173 /* Append terminating bytes. */
4174 if (1 < append)
4175 *c++ = '\n';
4176
4177 *c = '\0';
4178 n->val.sz = (int)(c - n->val.p);
4179 }
4180
4181 static const char *
roff_getstrn(struct roff * r,const char * name,size_t len,int * deftype)4182 roff_getstrn(struct roff *r, const char *name, size_t len,
4183 int *deftype)
4184 {
4185 const struct roffkv *n;
4186 int found, i;
4187 enum roff_tok tok;
4188
4189 found = 0;
4190 for (n = r->strtab; n != NULL; n = n->next) {
4191 if (strncmp(name, n->key.p, len) != 0 ||
4192 n->key.p[len] != '\0' || n->val.p == NULL)
4193 continue;
4194 if (*deftype & ROFFDEF_USER) {
4195 *deftype = ROFFDEF_USER;
4196 return n->val.p;
4197 } else {
4198 found = 1;
4199 break;
4200 }
4201 }
4202 for (n = r->rentab; n != NULL; n = n->next) {
4203 if (strncmp(name, n->key.p, len) != 0 ||
4204 n->key.p[len] != '\0' || n->val.p == NULL)
4205 continue;
4206 if (*deftype & ROFFDEF_REN) {
4207 *deftype = ROFFDEF_REN;
4208 return n->val.p;
4209 } else {
4210 found = 1;
4211 break;
4212 }
4213 }
4214 for (i = 0; i < PREDEFS_MAX; i++) {
4215 if (strncmp(name, predefs[i].name, len) != 0 ||
4216 predefs[i].name[len] != '\0')
4217 continue;
4218 if (*deftype & ROFFDEF_PRE) {
4219 *deftype = ROFFDEF_PRE;
4220 return predefs[i].str;
4221 } else {
4222 found = 1;
4223 break;
4224 }
4225 }
4226 if (r->man->meta.macroset != MACROSET_MAN) {
4227 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4228 if (strncmp(name, roff_name[tok], len) != 0 ||
4229 roff_name[tok][len] != '\0')
4230 continue;
4231 if (*deftype & ROFFDEF_STD) {
4232 *deftype = ROFFDEF_STD;
4233 return NULL;
4234 } else {
4235 found = 1;
4236 break;
4237 }
4238 }
4239 }
4240 if (r->man->meta.macroset != MACROSET_MDOC) {
4241 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4242 if (strncmp(name, roff_name[tok], len) != 0 ||
4243 roff_name[tok][len] != '\0')
4244 continue;
4245 if (*deftype & ROFFDEF_STD) {
4246 *deftype = ROFFDEF_STD;
4247 return NULL;
4248 } else {
4249 found = 1;
4250 break;
4251 }
4252 }
4253 }
4254
4255 if (found == 0 && *deftype != ROFFDEF_ANY) {
4256 if (*deftype & ROFFDEF_REN) {
4257 /*
4258 * This might still be a request,
4259 * so do not treat it as undefined yet.
4260 */
4261 *deftype = ROFFDEF_UNDEF;
4262 return NULL;
4263 }
4264
4265 /* Using an undefined string defines it to be empty. */
4266
4267 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4268 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4269 }
4270
4271 *deftype = 0;
4272 return NULL;
4273 }
4274
4275 static void
roff_freestr(struct roffkv * r)4276 roff_freestr(struct roffkv *r)
4277 {
4278 struct roffkv *n, *nn;
4279
4280 for (n = r; n; n = nn) {
4281 free(n->key.p);
4282 free(n->val.p);
4283 nn = n->next;
4284 free(n);
4285 }
4286 }
4287
4288 /* --- accessors and utility functions ------------------------------------ */
4289
4290 /*
4291 * Duplicate an input string, making the appropriate character
4292 * conversations (as stipulated by `tr') along the way.
4293 * Returns a heap-allocated string with all the replacements made.
4294 */
4295 char *
roff_strdup(const struct roff * r,const char * p)4296 roff_strdup(const struct roff *r, const char *p)
4297 {
4298 const struct roffkv *cp;
4299 char *res;
4300 const char *pp;
4301 size_t ssz, sz;
4302 enum mandoc_esc esc;
4303
4304 if (NULL == r->xmbtab && NULL == r->xtab)
4305 return mandoc_strdup(p);
4306 else if ('\0' == *p)
4307 return mandoc_strdup("");
4308
4309 /*
4310 * Step through each character looking for term matches
4311 * (remember that a `tr' can be invoked with an escape, which is
4312 * a glyph but the escape is multi-character).
4313 * We only do this if the character hash has been initialised
4314 * and the string is >0 length.
4315 */
4316
4317 res = NULL;
4318 ssz = 0;
4319
4320 while ('\0' != *p) {
4321 assert((unsigned int)*p < 128);
4322 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4323 sz = r->xtab[(int)*p].sz;
4324 res = mandoc_realloc(res, ssz + sz + 1);
4325 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4326 ssz += sz;
4327 p++;
4328 continue;
4329 } else if ('\\' != *p) {
4330 res = mandoc_realloc(res, ssz + 2);
4331 res[ssz++] = *p++;
4332 continue;
4333 }
4334
4335 /* Search for term matches. */
4336 for (cp = r->xmbtab; cp; cp = cp->next)
4337 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4338 break;
4339
4340 if (NULL != cp) {
4341 /*
4342 * A match has been found.
4343 * Append the match to the array and move
4344 * forward by its keysize.
4345 */
4346 res = mandoc_realloc(res,
4347 ssz + cp->val.sz + 1);
4348 memcpy(res + ssz, cp->val.p, cp->val.sz);
4349 ssz += cp->val.sz;
4350 p += (int)cp->key.sz;
4351 continue;
4352 }
4353
4354 /*
4355 * Handle escapes carefully: we need to copy
4356 * over just the escape itself, or else we might
4357 * do replacements within the escape itself.
4358 * Make sure to pass along the bogus string.
4359 */
4360 pp = p++;
4361 esc = mandoc_escape(&p, NULL, NULL);
4362 if (ESCAPE_ERROR == esc) {
4363 sz = strlen(pp);
4364 res = mandoc_realloc(res, ssz + sz + 1);
4365 memcpy(res + ssz, pp, sz);
4366 break;
4367 }
4368 /*
4369 * We bail out on bad escapes.
4370 * No need to warn: we already did so when
4371 * roff_expand() was called.
4372 */
4373 sz = (int)(p - pp);
4374 res = mandoc_realloc(res, ssz + sz + 1);
4375 memcpy(res + ssz, pp, sz);
4376 ssz += sz;
4377 }
4378
4379 res[(int)ssz] = '\0';
4380 return res;
4381 }
4382
4383 int
roff_getformat(const struct roff * r)4384 roff_getformat(const struct roff *r)
4385 {
4386
4387 return r->format;
4388 }
4389
4390 /*
4391 * Find out whether a line is a macro line or not.
4392 * If it is, adjust the current position and return one; if it isn't,
4393 * return zero and don't change the current position.
4394 * If the control character has been set with `.cc', then let that grain
4395 * precedence.
4396 * This is slightly contrary to groff, where using the non-breaking
4397 * control character when `cc' has been invoked will cause the
4398 * non-breaking macro contents to be printed verbatim.
4399 */
4400 int
roff_getcontrol(const struct roff * r,const char * cp,int * ppos)4401 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4402 {
4403 int pos;
4404
4405 pos = *ppos;
4406
4407 if (r->control != '\0' && cp[pos] == r->control)
4408 pos++;
4409 else if (r->control != '\0')
4410 return 0;
4411 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4412 pos += 2;
4413 else if ('.' == cp[pos] || '\'' == cp[pos])
4414 pos++;
4415 else
4416 return 0;
4417
4418 while (' ' == cp[pos] || '\t' == cp[pos])
4419 pos++;
4420
4421 *ppos = pos;
4422 return 1;
4423 }
4424