1 /* $OpenBSD: roff.c,v 1.272 2023/10/24 20:30:49 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2023 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "mandoc_aux.h"
32 #include "mandoc_ohash.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mandoc_parse.h"
36 #include "libmandoc.h"
37 #include "roff_int.h"
38 #include "tbl_parse.h"
39 #include "eqn_parse.h"
40
41 /* Maximum number of string expansions per line, to break infinite loops. */
42 #define EXPAND_LIMIT 1000
43
44 /* Types of definitions of macros and strings. */
45 #define ROFFDEF_USER (1 << 1) /* User-defined. */
46 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
47 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
48 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
49 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
50 ROFFDEF_REN | ROFFDEF_STD)
51 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
52
53 /* --- data types --------------------------------------------------------- */
54
55 /*
56 * An incredibly-simple string buffer.
57 */
58 struct roffstr {
59 char *p; /* nil-terminated buffer */
60 size_t sz; /* saved strlen(p) */
61 };
62
63 /*
64 * A key-value roffstr pair as part of a singly-linked list.
65 */
66 struct roffkv {
67 struct roffstr key;
68 struct roffstr val;
69 struct roffkv *next; /* next in list */
70 };
71
72 /*
73 * A single number register as part of a singly-linked list.
74 */
75 struct roffreg {
76 struct roffstr key;
77 int val;
78 int step;
79 struct roffreg *next;
80 };
81
82 /*
83 * Association of request and macro names with token IDs.
84 */
85 struct roffreq {
86 enum roff_tok tok;
87 char name[];
88 };
89
90 /*
91 * A macro processing context.
92 * More than one is needed when macro calls are nested.
93 */
94 struct mctx {
95 char **argv;
96 int argc;
97 int argsz;
98 };
99
100 struct roff {
101 struct roff_man *man; /* mdoc or man parser */
102 struct roffnode *last; /* leaf of stack */
103 struct mctx *mstack; /* stack of macro contexts */
104 int *rstack; /* stack of inverted `ie' values */
105 struct ohash *reqtab; /* request lookup table */
106 struct roffreg *regtab; /* number registers */
107 struct roffkv *strtab; /* user-defined strings & macros */
108 struct roffkv *rentab; /* renamed strings & macros */
109 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
110 struct roffstr *xtab; /* single-byte trans table (`tr') */
111 const char *current_string; /* value of last called user macro */
112 struct tbl_node *first_tbl; /* first table parsed */
113 struct tbl_node *last_tbl; /* last table parsed */
114 struct tbl_node *tbl; /* current table being parsed */
115 struct eqn_node *last_eqn; /* equation parser */
116 struct eqn_node *eqn; /* active equation parser */
117 int eqn_inline; /* current equation is inline */
118 int options; /* parse options */
119 int mstacksz; /* current size of mstack */
120 int mstackpos; /* position in mstack */
121 int rstacksz; /* current size limit of rstack */
122 int rstackpos; /* position in rstack */
123 int format; /* current file in mdoc or man format */
124 char control; /* control character */
125 char escape; /* escape character */
126 };
127
128 /*
129 * A macro definition, condition, or ignored block.
130 */
131 struct roffnode {
132 enum roff_tok tok; /* type of node */
133 struct roffnode *parent; /* up one in stack */
134 int line; /* parse line */
135 int col; /* parse col */
136 char *name; /* node name, e.g. macro name */
137 char *end; /* custom end macro of the block */
138 int endspan; /* scope to: 1=eol 2=next line -1=\} */
139 int rule; /* content is: 1=evaluated 0=skipped */
140 };
141
142 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
143 enum roff_tok tok, /* tok of macro */ \
144 struct buf *buf, /* input buffer */ \
145 int ln, /* parse line */ \
146 int ppos, /* original pos in buffer */ \
147 int pos, /* current pos in buffer */ \
148 int *offs /* reset offset of buffer data */
149
150 typedef int (*roffproc)(ROFF_ARGS);
151
152 struct roffmac {
153 roffproc proc; /* process new macro */
154 roffproc text; /* process as child text of macro */
155 roffproc sub; /* process as child of macro */
156 int flags;
157 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
158 };
159
160 struct predef {
161 const char *name; /* predefined input name */
162 const char *str; /* replacement symbol */
163 };
164
165 #define PREDEF(__name, __str) \
166 { (__name), (__str) },
167
168 /* --- function prototypes ------------------------------------------------ */
169
170 static int roffnode_cleanscope(struct roff *);
171 static int roffnode_pop(struct roff *);
172 static void roffnode_push(struct roff *, enum roff_tok,
173 const char *, int, int);
174 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
175 static int roff_als(ROFF_ARGS);
176 static int roff_block(ROFF_ARGS);
177 static int roff_block_text(ROFF_ARGS);
178 static int roff_block_sub(ROFF_ARGS);
179 static int roff_break(ROFF_ARGS);
180 static int roff_cblock(ROFF_ARGS);
181 static int roff_cc(ROFF_ARGS);
182 static int roff_ccond(struct roff *, int, int);
183 static int roff_char(ROFF_ARGS);
184 static int roff_cond(ROFF_ARGS);
185 static int roff_cond_checkend(ROFF_ARGS);
186 static int roff_cond_text(ROFF_ARGS);
187 static int roff_cond_sub(ROFF_ARGS);
188 static int roff_ds(ROFF_ARGS);
189 static int roff_ec(ROFF_ARGS);
190 static int roff_eo(ROFF_ARGS);
191 static int roff_eqndelim(struct roff *, struct buf *, int);
192 static int roff_evalcond(struct roff *, int, char *, int *);
193 static int roff_evalnum(struct roff *, int,
194 const char *, int *, int *, int);
195 static int roff_evalpar(struct roff *, int,
196 const char *, int *, int *, int);
197 static int roff_evalstrcond(const char *, int *);
198 static int roff_expand(struct roff *, struct buf *,
199 int, int, char);
200 static void roff_expand_patch(struct buf *, int,
201 const char *, int);
202 static void roff_free1(struct roff *);
203 static void roff_freereg(struct roffreg *);
204 static void roff_freestr(struct roffkv *);
205 static size_t roff_getname(struct roff *, char **, int, int);
206 static int roff_getnum(const char *, int *, int *, int);
207 static int roff_getop(const char *, int *, char *);
208 static int roff_getregn(struct roff *,
209 const char *, size_t, char);
210 static int roff_getregro(const struct roff *,
211 const char *name);
212 static const char *roff_getstrn(struct roff *,
213 const char *, size_t, int *);
214 static int roff_hasregn(const struct roff *,
215 const char *, size_t);
216 static int roff_insec(ROFF_ARGS);
217 static int roff_it(ROFF_ARGS);
218 static int roff_line_ignore(ROFF_ARGS);
219 static void roff_man_alloc1(struct roff_man *);
220 static void roff_man_free1(struct roff_man *);
221 static int roff_manyarg(ROFF_ARGS);
222 static int roff_mc(ROFF_ARGS);
223 static int roff_noarg(ROFF_ARGS);
224 static int roff_nop(ROFF_ARGS);
225 static int roff_nr(ROFF_ARGS);
226 static int roff_onearg(ROFF_ARGS);
227 static enum roff_tok roff_parse(struct roff *, char *, int *,
228 int, int);
229 static int roff_parse_comment(struct roff *, struct buf *,
230 int, int, char);
231 static int roff_parsetext(struct roff *, struct buf *,
232 int, int *);
233 static int roff_renamed(ROFF_ARGS);
234 static int roff_req_or_macro(ROFF_ARGS);
235 static int roff_return(ROFF_ARGS);
236 static int roff_rm(ROFF_ARGS);
237 static int roff_rn(ROFF_ARGS);
238 static int roff_rr(ROFF_ARGS);
239 static void roff_setregn(struct roff *, const char *,
240 size_t, int, char, int);
241 static void roff_setstr(struct roff *,
242 const char *, const char *, int);
243 static void roff_setstrn(struct roffkv **, const char *,
244 size_t, const char *, size_t, int);
245 static int roff_shift(ROFF_ARGS);
246 static int roff_so(ROFF_ARGS);
247 static int roff_tr(ROFF_ARGS);
248 static int roff_Dd(ROFF_ARGS);
249 static int roff_TE(ROFF_ARGS);
250 static int roff_TS(ROFF_ARGS);
251 static int roff_EQ(ROFF_ARGS);
252 static int roff_EN(ROFF_ARGS);
253 static int roff_T_(ROFF_ARGS);
254 static int roff_unsupp(ROFF_ARGS);
255 static int roff_userdef(ROFF_ARGS);
256
257 /* --- constant data ------------------------------------------------------ */
258
259 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
260 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
261
262 const char *__roff_name[MAN_MAX + 1] = {
263 "br", "ce", "fi", "ft",
264 "ll", "mc", "nf",
265 "po", "rj", "sp",
266 "ta", "ti", NULL,
267 "ab", "ad", "af", "aln",
268 "als", "am", "am1", "ami",
269 "ami1", "as", "as1", "asciify",
270 "backtrace", "bd", "bleedat", "blm",
271 "box", "boxa", "bp", "BP",
272 "break", "breakchar", "brnl", "brp",
273 "brpnl", "c2", "cc",
274 "cf", "cflags", "ch", "char",
275 "chop", "class", "close", "CL",
276 "color", "composite", "continue", "cp",
277 "cropat", "cs", "cu", "da",
278 "dch", "Dd", "de", "de1",
279 "defcolor", "dei", "dei1", "device",
280 "devicem", "di", "do", "ds",
281 "ds1", "dwh", "dt", "ec",
282 "ecr", "ecs", "el", "em",
283 "EN", "eo", "EP", "EQ",
284 "errprint", "ev", "evc", "ex",
285 "fallback", "fam", "fc", "fchar",
286 "fcolor", "fdeferlig", "feature", "fkern",
287 "fl", "flig", "fp", "fps",
288 "fschar", "fspacewidth", "fspecial", "ftr",
289 "fzoom", "gcolor", "hc", "hcode",
290 "hidechar", "hla", "hlm", "hpf",
291 "hpfa", "hpfcode", "hw", "hy",
292 "hylang", "hylen", "hym", "hypp",
293 "hys", "ie", "if", "ig",
294 "index", "it", "itc", "IX",
295 "kern", "kernafter", "kernbefore", "kernpair",
296 "lc", "lc_ctype", "lds", "length",
297 "letadj", "lf", "lg", "lhang",
298 "linetabs", "lnr", "lnrf", "lpfx",
299 "ls", "lsm", "lt",
300 "mediasize", "minss", "mk", "mso",
301 "na", "ne", "nh", "nhychar",
302 "nm", "nn", "nop", "nr",
303 "nrf", "nroff", "ns", "nx",
304 "open", "opena", "os", "output",
305 "padj", "papersize", "pc", "pev",
306 "pi", "PI", "pl", "pm",
307 "pn", "pnr", "ps",
308 "psbb", "pshape", "pso", "ptr",
309 "pvs", "rchar", "rd", "recursionlimit",
310 "return", "rfschar", "rhang",
311 "rm", "rn", "rnn", "rr",
312 "rs", "rt", "schar", "sentchar",
313 "shc", "shift", "sizes", "so",
314 "spacewidth", "special", "spreadwarn", "ss",
315 "sty", "substring", "sv", "sy",
316 "T&", "tc", "TE",
317 "TH", "tkf", "tl",
318 "tm", "tm1", "tmc", "tr",
319 "track", "transchar", "trf", "trimat",
320 "trin", "trnt", "troff", "TS",
321 "uf", "ul", "unformat", "unwatch",
322 "unwatchn", "vpt", "vs", "warn",
323 "warnscale", "watch", "watchlength", "watchn",
324 "wh", "while", "write", "writec",
325 "writem", "xflag", ".", NULL,
326 NULL, "text",
327 "Dd", "Dt", "Os", "Sh",
328 "Ss", "Pp", "D1", "Dl",
329 "Bd", "Ed", "Bl", "El",
330 "It", "Ad", "An", "Ap",
331 "Ar", "Cd", "Cm", "Dv",
332 "Er", "Ev", "Ex", "Fa",
333 "Fd", "Fl", "Fn", "Ft",
334 "Ic", "In", "Li", "Nd",
335 "Nm", "Op", "Ot", "Pa",
336 "Rv", "St", "Va", "Vt",
337 "Xr", "%A", "%B", "%D",
338 "%I", "%J", "%N", "%O",
339 "%P", "%R", "%T", "%V",
340 "Ac", "Ao", "Aq", "At",
341 "Bc", "Bf", "Bo", "Bq",
342 "Bsx", "Bx", "Db", "Dc",
343 "Do", "Dq", "Ec", "Ef",
344 "Em", "Eo", "Fx", "Ms",
345 "No", "Ns", "Nx", "Ox",
346 "Pc", "Pf", "Po", "Pq",
347 "Qc", "Ql", "Qo", "Qq",
348 "Re", "Rs", "Sc", "So",
349 "Sq", "Sm", "Sx", "Sy",
350 "Tn", "Ux", "Xc", "Xo",
351 "Fo", "Fc", "Oo", "Oc",
352 "Bk", "Ek", "Bt", "Hf",
353 "Fr", "Ud", "Lb", "Lp",
354 "Lk", "Mt", "Brq", "Bro",
355 "Brc", "%C", "Es", "En",
356 "Dx", "%Q", "%U", "Ta",
357 "Tg", NULL,
358 "TH", "SH", "SS", "TP",
359 "TQ",
360 "LP", "PP", "P", "IP",
361 "HP", "SM", "SB", "BI",
362 "IB", "BR", "RB", "R",
363 "B", "I", "IR", "RI",
364 "RE", "RS", "DT", "UC",
365 "PD", "AT", "in",
366 "SY", "YS", "OP",
367 "EX", "EE", "UR",
368 "UE", "MT", "ME", "MR",
369 NULL
370 };
371 const char *const *roff_name = __roff_name;
372
373 static struct roffmac roffs[TOKEN_NONE] = {
374 { roff_noarg, NULL, NULL, 0 }, /* br */
375 { roff_onearg, NULL, NULL, 0 }, /* ce */
376 { roff_noarg, NULL, NULL, 0 }, /* fi */
377 { roff_onearg, NULL, NULL, 0 }, /* ft */
378 { roff_onearg, NULL, NULL, 0 }, /* ll */
379 { roff_mc, NULL, NULL, 0 }, /* mc */
380 { roff_noarg, NULL, NULL, 0 }, /* nf */
381 { roff_onearg, NULL, NULL, 0 }, /* po */
382 { roff_onearg, NULL, NULL, 0 }, /* rj */
383 { roff_onearg, NULL, NULL, 0 }, /* sp */
384 { roff_manyarg, NULL, NULL, 0 }, /* ta */
385 { roff_onearg, NULL, NULL, 0 }, /* ti */
386 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
387 { roff_unsupp, NULL, NULL, 0 }, /* ab */
388 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
389 { roff_line_ignore, NULL, NULL, 0 }, /* af */
390 { roff_unsupp, NULL, NULL, 0 }, /* aln */
391 { roff_als, NULL, NULL, 0 }, /* als */
392 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
393 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
394 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
396 { roff_ds, NULL, NULL, 0 }, /* as */
397 { roff_ds, NULL, NULL, 0 }, /* as1 */
398 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
399 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
400 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
401 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
402 { roff_unsupp, NULL, NULL, 0 }, /* blm */
403 { roff_unsupp, NULL, NULL, 0 }, /* box */
404 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
405 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
406 { roff_unsupp, NULL, NULL, 0 }, /* BP */
407 { roff_break, NULL, NULL, 0 }, /* break */
408 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
409 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
410 { roff_noarg, NULL, NULL, 0 }, /* brp */
411 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
412 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
413 { roff_cc, NULL, NULL, 0 }, /* cc */
414 { roff_insec, NULL, NULL, 0 }, /* cf */
415 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
416 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
417 { roff_char, NULL, NULL, 0 }, /* char */
418 { roff_unsupp, NULL, NULL, 0 }, /* chop */
419 { roff_line_ignore, NULL, NULL, 0 }, /* class */
420 { roff_insec, NULL, NULL, 0 }, /* close */
421 { roff_unsupp, NULL, NULL, 0 }, /* CL */
422 { roff_line_ignore, NULL, NULL, 0 }, /* color */
423 { roff_unsupp, NULL, NULL, 0 }, /* composite */
424 { roff_unsupp, NULL, NULL, 0 }, /* continue */
425 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
426 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
427 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
428 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
429 { roff_unsupp, NULL, NULL, 0 }, /* da */
430 { roff_unsupp, NULL, NULL, 0 }, /* dch */
431 { roff_Dd, NULL, NULL, 0 }, /* Dd */
432 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
433 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
434 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
437 { roff_unsupp, NULL, NULL, 0 }, /* device */
438 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
439 { roff_unsupp, NULL, NULL, 0 }, /* di */
440 { roff_unsupp, NULL, NULL, 0 }, /* do */
441 { roff_ds, NULL, NULL, 0 }, /* ds */
442 { roff_ds, NULL, NULL, 0 }, /* ds1 */
443 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
444 { roff_unsupp, NULL, NULL, 0 }, /* dt */
445 { roff_ec, NULL, NULL, 0 }, /* ec */
446 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
447 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
448 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
449 { roff_unsupp, NULL, NULL, 0 }, /* em */
450 { roff_EN, NULL, NULL, 0 }, /* EN */
451 { roff_eo, NULL, NULL, 0 }, /* eo */
452 { roff_unsupp, NULL, NULL, 0 }, /* EP */
453 { roff_EQ, NULL, NULL, 0 }, /* EQ */
454 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
455 { roff_unsupp, NULL, NULL, 0 }, /* ev */
456 { roff_unsupp, NULL, NULL, 0 }, /* evc */
457 { roff_unsupp, NULL, NULL, 0 }, /* ex */
458 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
459 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
460 { roff_unsupp, NULL, NULL, 0 }, /* fc */
461 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
463 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
464 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
467 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
470 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
471 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
472 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
473 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
474 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
475 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
476 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
477 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
478 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
491 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
492 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
493 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
494 { roff_unsupp, NULL, NULL, 0 }, /* index */
495 { roff_it, NULL, NULL, 0 }, /* it */
496 { roff_unsupp, NULL, NULL, 0 }, /* itc */
497 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
498 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
499 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
500 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
501 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
502 { roff_unsupp, NULL, NULL, 0 }, /* lc */
503 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
504 { roff_unsupp, NULL, NULL, 0 }, /* lds */
505 { roff_unsupp, NULL, NULL, 0 }, /* length */
506 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
507 { roff_insec, NULL, NULL, 0 }, /* lf */
508 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
509 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
510 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
511 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
512 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
513 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
514 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
515 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
516 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
517 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
518 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
519 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
520 { roff_insec, NULL, NULL, 0 }, /* mso */
521 { roff_line_ignore, NULL, NULL, 0 }, /* na */
522 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
523 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
524 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
525 { roff_unsupp, NULL, NULL, 0 }, /* nm */
526 { roff_unsupp, NULL, NULL, 0 }, /* nn */
527 { roff_nop, NULL, NULL, 0 }, /* nop */
528 { roff_nr, NULL, NULL, 0 }, /* nr */
529 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
530 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
531 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
532 { roff_insec, NULL, NULL, 0 }, /* nx */
533 { roff_insec, NULL, NULL, 0 }, /* open */
534 { roff_insec, NULL, NULL, 0 }, /* opena */
535 { roff_line_ignore, NULL, NULL, 0 }, /* os */
536 { roff_unsupp, NULL, NULL, 0 }, /* output */
537 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
538 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
539 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
540 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
541 { roff_insec, NULL, NULL, 0 }, /* pi */
542 { roff_unsupp, NULL, NULL, 0 }, /* PI */
543 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
544 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
545 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
546 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
547 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
548 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
549 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
550 { roff_insec, NULL, NULL, 0 }, /* pso */
551 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
552 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
553 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
554 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
555 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
556 { roff_return, NULL, NULL, 0 }, /* return */
557 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
558 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
559 { roff_rm, NULL, NULL, 0 }, /* rm */
560 { roff_rn, NULL, NULL, 0 }, /* rn */
561 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
562 { roff_rr, NULL, NULL, 0 }, /* rr */
563 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
564 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
565 { roff_unsupp, NULL, NULL, 0 }, /* schar */
566 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
567 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
568 { roff_shift, NULL, NULL, 0 }, /* shift */
569 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
570 { roff_so, NULL, NULL, 0 }, /* so */
571 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
572 { roff_line_ignore, NULL, NULL, 0 }, /* special */
573 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
574 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
575 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
576 { roff_unsupp, NULL, NULL, 0 }, /* substring */
577 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
578 { roff_insec, NULL, NULL, 0 }, /* sy */
579 { roff_T_, NULL, NULL, 0 }, /* T& */
580 { roff_unsupp, NULL, NULL, 0 }, /* tc */
581 { roff_TE, NULL, NULL, 0 }, /* TE */
582 { roff_Dd, NULL, NULL, 0 }, /* TH */
583 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
584 { roff_unsupp, NULL, NULL, 0 }, /* tl */
585 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
586 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
587 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
588 { roff_tr, NULL, NULL, 0 }, /* tr */
589 { roff_line_ignore, NULL, NULL, 0 }, /* track */
590 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
591 { roff_insec, NULL, NULL, 0 }, /* trf */
592 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
593 { roff_unsupp, NULL, NULL, 0 }, /* trin */
594 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
595 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
596 { roff_TS, NULL, NULL, 0 }, /* TS */
597 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
598 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
599 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
600 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
601 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
602 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
603 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
604 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
605 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
606 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
607 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
608 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
609 { roff_unsupp, NULL, NULL, 0 }, /* wh */
610 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
611 { roff_insec, NULL, NULL, 0 }, /* write */
612 { roff_insec, NULL, NULL, 0 }, /* writec */
613 { roff_insec, NULL, NULL, 0 }, /* writem */
614 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
615 { roff_cblock, NULL, NULL, 0 }, /* . */
616 { roff_renamed, NULL, NULL, 0 },
617 { roff_userdef, NULL, NULL, 0 }
618 };
619
620 /* Array of injected predefined strings. */
621 #define PREDEFS_MAX 38
622 static const struct predef predefs[PREDEFS_MAX] = {
623 #include "predefs.in"
624 };
625
626 static int roffce_lines; /* number of input lines to center */
627 static struct roff_node *roffce_node; /* active request */
628 static int roffit_lines; /* number of lines to delay */
629 static char *roffit_macro; /* nil-terminated macro line */
630
631
632 /* --- request table ------------------------------------------------------ */
633
634 struct ohash *
roffhash_alloc(enum roff_tok mintok,enum roff_tok maxtok)635 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
636 {
637 struct ohash *htab;
638 struct roffreq *req;
639 enum roff_tok tok;
640 size_t sz;
641 unsigned int slot;
642
643 htab = mandoc_malloc(sizeof(*htab));
644 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
645
646 for (tok = mintok; tok < maxtok; tok++) {
647 if (roff_name[tok] == NULL)
648 continue;
649 sz = strlen(roff_name[tok]);
650 req = mandoc_malloc(sizeof(*req) + sz + 1);
651 req->tok = tok;
652 memcpy(req->name, roff_name[tok], sz + 1);
653 slot = ohash_qlookup(htab, req->name);
654 ohash_insert(htab, slot, req);
655 }
656 return htab;
657 }
658
659 void
roffhash_free(struct ohash * htab)660 roffhash_free(struct ohash *htab)
661 {
662 struct roffreq *req;
663 unsigned int slot;
664
665 if (htab == NULL)
666 return;
667 for (req = ohash_first(htab, &slot); req != NULL;
668 req = ohash_next(htab, &slot))
669 free(req);
670 ohash_delete(htab);
671 free(htab);
672 }
673
674 enum roff_tok
roffhash_find(struct ohash * htab,const char * name,size_t sz)675 roffhash_find(struct ohash *htab, const char *name, size_t sz)
676 {
677 struct roffreq *req;
678 const char *end;
679
680 if (sz) {
681 end = name + sz;
682 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
683 } else
684 req = ohash_find(htab, ohash_qlookup(htab, name));
685 return req == NULL ? TOKEN_NONE : req->tok;
686 }
687
688 /* --- stack of request blocks -------------------------------------------- */
689
690 /*
691 * Pop the current node off of the stack of roff instructions currently
692 * pending. Return 1 if it is a loop or 0 otherwise.
693 */
694 static int
roffnode_pop(struct roff * r)695 roffnode_pop(struct roff *r)
696 {
697 struct roffnode *p;
698 int inloop;
699
700 p = r->last;
701 inloop = p->tok == ROFF_while;
702 r->last = p->parent;
703 free(p->name);
704 free(p->end);
705 free(p);
706 return inloop;
707 }
708
709 /*
710 * Push a roff node onto the instruction stack. This must later be
711 * removed with roffnode_pop().
712 */
713 static void
roffnode_push(struct roff * r,enum roff_tok tok,const char * name,int line,int col)714 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
715 int line, int col)
716 {
717 struct roffnode *p;
718
719 p = mandoc_calloc(1, sizeof(struct roffnode));
720 p->tok = tok;
721 if (name)
722 p->name = mandoc_strdup(name);
723 p->parent = r->last;
724 p->line = line;
725 p->col = col;
726 p->rule = p->parent ? p->parent->rule : 0;
727
728 r->last = p;
729 }
730
731 /* --- roff parser state data management ---------------------------------- */
732
733 static void
roff_free1(struct roff * r)734 roff_free1(struct roff *r)
735 {
736 int i;
737
738 tbl_free(r->first_tbl);
739 r->first_tbl = r->last_tbl = r->tbl = NULL;
740
741 eqn_free(r->last_eqn);
742 r->last_eqn = r->eqn = NULL;
743
744 while (r->mstackpos >= 0)
745 roff_userret(r);
746
747 while (r->last)
748 roffnode_pop(r);
749
750 free (r->rstack);
751 r->rstack = NULL;
752 r->rstacksz = 0;
753 r->rstackpos = -1;
754
755 roff_freereg(r->regtab);
756 r->regtab = NULL;
757
758 roff_freestr(r->strtab);
759 roff_freestr(r->rentab);
760 roff_freestr(r->xmbtab);
761 r->strtab = r->rentab = r->xmbtab = NULL;
762
763 if (r->xtab)
764 for (i = 0; i < 128; i++)
765 free(r->xtab[i].p);
766 free(r->xtab);
767 r->xtab = NULL;
768 }
769
770 void
roff_reset(struct roff * r)771 roff_reset(struct roff *r)
772 {
773 roff_free1(r);
774 r->options |= MPARSE_COMMENT;
775 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
776 r->control = '\0';
777 r->escape = '\\';
778 roffce_lines = 0;
779 roffce_node = NULL;
780 roffit_lines = 0;
781 roffit_macro = NULL;
782 }
783
784 void
roff_free(struct roff * r)785 roff_free(struct roff *r)
786 {
787 int i;
788
789 roff_free1(r);
790 for (i = 0; i < r->mstacksz; i++)
791 free(r->mstack[i].argv);
792 free(r->mstack);
793 roffhash_free(r->reqtab);
794 free(r);
795 }
796
797 struct roff *
roff_alloc(int options)798 roff_alloc(int options)
799 {
800 struct roff *r;
801
802 r = mandoc_calloc(1, sizeof(struct roff));
803 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
804 r->options = options | MPARSE_COMMENT;
805 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
806 r->mstackpos = -1;
807 r->rstackpos = -1;
808 r->escape = '\\';
809 return r;
810 }
811
812 /* --- syntax tree state data management ---------------------------------- */
813
814 static void
roff_man_free1(struct roff_man * man)815 roff_man_free1(struct roff_man *man)
816 {
817 if (man->meta.first != NULL)
818 roff_node_delete(man, man->meta.first);
819 free(man->meta.msec);
820 free(man->meta.vol);
821 free(man->meta.os);
822 free(man->meta.arch);
823 free(man->meta.title);
824 free(man->meta.name);
825 free(man->meta.date);
826 free(man->meta.sodest);
827 }
828
829 void
roff_state_reset(struct roff_man * man)830 roff_state_reset(struct roff_man *man)
831 {
832 man->last = man->meta.first;
833 man->last_es = NULL;
834 man->flags = 0;
835 man->lastsec = man->lastnamed = SEC_NONE;
836 man->next = ROFF_NEXT_CHILD;
837 roff_setreg(man->roff, "nS", 0, '=');
838 }
839
840 static void
roff_man_alloc1(struct roff_man * man)841 roff_man_alloc1(struct roff_man *man)
842 {
843 memset(&man->meta, 0, sizeof(man->meta));
844 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
845 man->meta.first->type = ROFFT_ROOT;
846 man->meta.macroset = MACROSET_NONE;
847 roff_state_reset(man);
848 }
849
850 void
roff_man_reset(struct roff_man * man)851 roff_man_reset(struct roff_man *man)
852 {
853 roff_man_free1(man);
854 roff_man_alloc1(man);
855 }
856
857 void
roff_man_free(struct roff_man * man)858 roff_man_free(struct roff_man *man)
859 {
860 roff_man_free1(man);
861 free(man->os_r);
862 free(man);
863 }
864
865 struct roff_man *
roff_man_alloc(struct roff * roff,const char * os_s,int quick)866 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
867 {
868 struct roff_man *man;
869
870 man = mandoc_calloc(1, sizeof(*man));
871 man->roff = roff;
872 man->os_s = os_s;
873 man->quick = quick;
874 roff_man_alloc1(man);
875 roff->man = man;
876 return man;
877 }
878
879 /* --- syntax tree handling ----------------------------------------------- */
880
881 struct roff_node *
roff_node_alloc(struct roff_man * man,int line,int pos,enum roff_type type,int tok)882 roff_node_alloc(struct roff_man *man, int line, int pos,
883 enum roff_type type, int tok)
884 {
885 struct roff_node *n;
886
887 n = mandoc_calloc(1, sizeof(*n));
888 n->line = line;
889 n->pos = pos;
890 n->tok = tok;
891 n->type = type;
892 n->sec = man->lastsec;
893
894 if (man->flags & MDOC_SYNOPSIS)
895 n->flags |= NODE_SYNPRETTY;
896 else
897 n->flags &= ~NODE_SYNPRETTY;
898 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
899 n->flags |= NODE_NOFILL;
900 else
901 n->flags &= ~NODE_NOFILL;
902 if (man->flags & MDOC_NEWLINE)
903 n->flags |= NODE_LINE;
904 man->flags &= ~MDOC_NEWLINE;
905
906 return n;
907 }
908
909 void
roff_node_append(struct roff_man * man,struct roff_node * n)910 roff_node_append(struct roff_man *man, struct roff_node *n)
911 {
912
913 switch (man->next) {
914 case ROFF_NEXT_SIBLING:
915 if (man->last->next != NULL) {
916 n->next = man->last->next;
917 man->last->next->prev = n;
918 } else
919 man->last->parent->last = n;
920 man->last->next = n;
921 n->prev = man->last;
922 n->parent = man->last->parent;
923 break;
924 case ROFF_NEXT_CHILD:
925 if (man->last->child != NULL) {
926 n->next = man->last->child;
927 man->last->child->prev = n;
928 } else
929 man->last->last = n;
930 man->last->child = n;
931 n->parent = man->last;
932 break;
933 default:
934 abort();
935 }
936 man->last = n;
937
938 switch (n->type) {
939 case ROFFT_HEAD:
940 n->parent->head = n;
941 break;
942 case ROFFT_BODY:
943 if (n->end != ENDBODY_NOT)
944 return;
945 n->parent->body = n;
946 break;
947 case ROFFT_TAIL:
948 n->parent->tail = n;
949 break;
950 default:
951 return;
952 }
953
954 /*
955 * Copy over the normalised-data pointer of our parent. Not
956 * everybody has one, but copying a null pointer is fine.
957 */
958
959 n->norm = n->parent->norm;
960 assert(n->parent->type == ROFFT_BLOCK);
961 }
962
963 void
roff_word_alloc(struct roff_man * man,int line,int pos,const char * word)964 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
965 {
966 struct roff_node *n;
967
968 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
969 n->string = roff_strdup(man->roff, word);
970 roff_node_append(man, n);
971 n->flags |= NODE_VALID | NODE_ENDED;
972 man->next = ROFF_NEXT_SIBLING;
973 }
974
975 void
roff_word_append(struct roff_man * man,const char * word)976 roff_word_append(struct roff_man *man, const char *word)
977 {
978 struct roff_node *n;
979 char *addstr, *newstr;
980
981 n = man->last;
982 addstr = roff_strdup(man->roff, word);
983 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
984 free(addstr);
985 free(n->string);
986 n->string = newstr;
987 man->next = ROFF_NEXT_SIBLING;
988 }
989
990 void
roff_elem_alloc(struct roff_man * man,int line,int pos,int tok)991 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
992 {
993 struct roff_node *n;
994
995 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
996 roff_node_append(man, n);
997 man->next = ROFF_NEXT_CHILD;
998 }
999
1000 struct roff_node *
roff_block_alloc(struct roff_man * man,int line,int pos,int tok)1001 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1002 {
1003 struct roff_node *n;
1004
1005 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1006 roff_node_append(man, n);
1007 man->next = ROFF_NEXT_CHILD;
1008 return n;
1009 }
1010
1011 struct roff_node *
roff_head_alloc(struct roff_man * man,int line,int pos,int tok)1012 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1013 {
1014 struct roff_node *n;
1015
1016 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1017 roff_node_append(man, n);
1018 man->next = ROFF_NEXT_CHILD;
1019 return n;
1020 }
1021
1022 struct roff_node *
roff_body_alloc(struct roff_man * man,int line,int pos,int tok)1023 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1024 {
1025 struct roff_node *n;
1026
1027 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1028 roff_node_append(man, n);
1029 man->next = ROFF_NEXT_CHILD;
1030 return n;
1031 }
1032
1033 static void
roff_addtbl(struct roff_man * man,int line,struct tbl_node * tbl)1034 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1035 {
1036 struct roff_node *n;
1037 struct tbl_span *span;
1038
1039 if (man->meta.macroset == MACROSET_MAN)
1040 man_breakscope(man, ROFF_TS);
1041 while ((span = tbl_span(tbl)) != NULL) {
1042 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1043 n->span = span;
1044 roff_node_append(man, n);
1045 n->flags |= NODE_VALID | NODE_ENDED;
1046 man->next = ROFF_NEXT_SIBLING;
1047 }
1048 }
1049
1050 void
roff_node_unlink(struct roff_man * man,struct roff_node * n)1051 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1052 {
1053
1054 /* Adjust siblings. */
1055
1056 if (n->prev)
1057 n->prev->next = n->next;
1058 if (n->next)
1059 n->next->prev = n->prev;
1060
1061 /* Adjust parent. */
1062
1063 if (n->parent != NULL) {
1064 if (n->parent->child == n)
1065 n->parent->child = n->next;
1066 if (n->parent->last == n)
1067 n->parent->last = n->prev;
1068 }
1069
1070 /* Adjust parse point. */
1071
1072 if (man == NULL)
1073 return;
1074 if (man->last == n) {
1075 if (n->prev == NULL) {
1076 man->last = n->parent;
1077 man->next = ROFF_NEXT_CHILD;
1078 } else {
1079 man->last = n->prev;
1080 man->next = ROFF_NEXT_SIBLING;
1081 }
1082 }
1083 if (man->meta.first == n)
1084 man->meta.first = NULL;
1085 }
1086
1087 void
roff_node_relink(struct roff_man * man,struct roff_node * n)1088 roff_node_relink(struct roff_man *man, struct roff_node *n)
1089 {
1090 roff_node_unlink(man, n);
1091 n->prev = n->next = NULL;
1092 roff_node_append(man, n);
1093 }
1094
1095 void
roff_node_free(struct roff_node * n)1096 roff_node_free(struct roff_node *n)
1097 {
1098
1099 if (n->args != NULL)
1100 mdoc_argv_free(n->args);
1101 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1102 free(n->norm);
1103 eqn_box_free(n->eqn);
1104 free(n->string);
1105 free(n->tag);
1106 free(n);
1107 }
1108
1109 void
roff_node_delete(struct roff_man * man,struct roff_node * n)1110 roff_node_delete(struct roff_man *man, struct roff_node *n)
1111 {
1112
1113 while (n->child != NULL)
1114 roff_node_delete(man, n->child);
1115 roff_node_unlink(man, n);
1116 roff_node_free(n);
1117 }
1118
1119 int
roff_node_transparent(struct roff_node * n)1120 roff_node_transparent(struct roff_node *n)
1121 {
1122 if (n == NULL)
1123 return 0;
1124 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1125 return 1;
1126 return roff_tok_transparent(n->tok);
1127 }
1128
1129 int
roff_tok_transparent(enum roff_tok tok)1130 roff_tok_transparent(enum roff_tok tok)
1131 {
1132 switch (tok) {
1133 case ROFF_ft:
1134 case ROFF_ll:
1135 case ROFF_mc:
1136 case ROFF_po:
1137 case ROFF_ta:
1138 case MDOC_Db:
1139 case MDOC_Es:
1140 case MDOC_Sm:
1141 case MDOC_Tg:
1142 case MAN_DT:
1143 case MAN_UC:
1144 case MAN_PD:
1145 case MAN_AT:
1146 return 1;
1147 default:
1148 return 0;
1149 }
1150 }
1151
1152 struct roff_node *
roff_node_child(struct roff_node * n)1153 roff_node_child(struct roff_node *n)
1154 {
1155 for (n = n->child; roff_node_transparent(n); n = n->next)
1156 continue;
1157 return n;
1158 }
1159
1160 struct roff_node *
roff_node_prev(struct roff_node * n)1161 roff_node_prev(struct roff_node *n)
1162 {
1163 do {
1164 n = n->prev;
1165 } while (roff_node_transparent(n));
1166 return n;
1167 }
1168
1169 struct roff_node *
roff_node_next(struct roff_node * n)1170 roff_node_next(struct roff_node *n)
1171 {
1172 do {
1173 n = n->next;
1174 } while (roff_node_transparent(n));
1175 return n;
1176 }
1177
1178 void
deroff(char ** dest,const struct roff_node * n)1179 deroff(char **dest, const struct roff_node *n)
1180 {
1181 char *cp;
1182 size_t sz;
1183
1184 if (n->string == NULL) {
1185 for (n = n->child; n != NULL; n = n->next)
1186 deroff(dest, n);
1187 return;
1188 }
1189
1190 /* Skip leading whitespace. */
1191
1192 for (cp = n->string; *cp != '\0'; cp++) {
1193 if (cp[0] == '\\' && cp[1] != '\0' &&
1194 strchr(" %&0^|~", cp[1]) != NULL)
1195 cp++;
1196 else if ( ! isspace((unsigned char)*cp))
1197 break;
1198 }
1199
1200 /* Skip trailing backslash. */
1201
1202 sz = strlen(cp);
1203 if (sz > 0 && cp[sz - 1] == '\\')
1204 sz--;
1205
1206 /* Skip trailing whitespace. */
1207
1208 for (; sz; sz--)
1209 if ( ! isspace((unsigned char)cp[sz-1]))
1210 break;
1211
1212 /* Skip empty strings. */
1213
1214 if (sz == 0)
1215 return;
1216
1217 if (*dest == NULL) {
1218 *dest = mandoc_strndup(cp, sz);
1219 return;
1220 }
1221
1222 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1223 free(*dest);
1224 *dest = cp;
1225 }
1226
1227 /* --- main functions of the roff parser ---------------------------------- */
1228
1229 /*
1230 * Save comments preceding the title macro, for example in order to
1231 * preserve Copyright and license headers in HTML output,
1232 * provide diagnostics about RCS ids and trailing whitespace in comments,
1233 * then discard comments including preceding whitespace.
1234 * This function also handles input line continuation.
1235 */
1236 static int
roff_parse_comment(struct roff * r,struct buf * buf,int ln,int pos,char ec)1237 roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1238 {
1239 struct roff_node *n; /* used for header comments */
1240 const char *start; /* start of the string to process */
1241 const char *cp; /* for RCS id parsing */
1242 char *stesc; /* start of an escape sequence ('\\') */
1243 char *ep; /* end of comment string */
1244 int rcsid; /* kind of RCS id seen */
1245
1246 for (start = stesc = buf->buf + pos;; stesc++) {
1247 /*
1248 * XXX Ugly hack: Remove the newline character that
1249 * mparse_buf_r() appended to mark the end of input
1250 * if it is not preceded by an escape character.
1251 */
1252 if (stesc[0] == '\n') {
1253 assert(stesc[1] == '\0');
1254 stesc[0] = '\0';
1255 }
1256
1257 /* The line ends without continuation or comment. */
1258 if (stesc[0] == '\0')
1259 return ROFF_CONT;
1260
1261 /* Unescaped byte: skip it. */
1262 if (stesc[0] != ec)
1263 continue;
1264
1265 /*
1266 * XXX Ugly hack: Do not attempt to append another line
1267 * if the function mparse_buf_r() appended a newline
1268 * character to indicate the end of input.
1269 */
1270 if (stesc[1] == '\n') {
1271 assert(stesc[2] == '\0');
1272 stesc[0] = '\0';
1273 return ROFF_CONT;
1274 }
1275
1276 /*
1277 * An escape character at the end of an input line
1278 * requests line continuation.
1279 */
1280 if (stesc[1] == '\0') {
1281 stesc[0] = '\0';
1282 return ROFF_IGN | ROFF_APPEND;
1283 }
1284
1285 /* Found a comment: process it. */
1286 if (stesc[1] == '"' || stesc[1] == '#')
1287 break;
1288
1289 /* Escaped escape character: skip them both. */
1290 if (stesc[1] == ec)
1291 stesc++;
1292 }
1293
1294 /* Look for an RCS id in the comment. */
1295
1296 rcsid = 0;
1297 if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
1298 rcsid = 1 << MANDOC_OS_OPENBSD;
1299 cp += 8;
1300 } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
1301 rcsid = 1 << MANDOC_OS_NETBSD;
1302 cp += 7;
1303 }
1304 if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
1305 strchr(cp, '$') != NULL) {
1306 if (r->man->meta.rcsids & rcsid)
1307 mandoc_msg(MANDOCERR_RCS_REP, ln,
1308 (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
1309 r->man->meta.rcsids |= rcsid;
1310 }
1311
1312 /* Warn about trailing whitespace at the end of the comment. */
1313
1314 ep = strchr(stesc + 2, '\0') - 1;
1315 if (*ep == '\n')
1316 *ep-- = '\0';
1317 if (*ep == ' ' || *ep == '\t')
1318 mandoc_msg(MANDOCERR_SPACE_EOL,
1319 ln, (int)(ep - buf->buf), NULL);
1320
1321 /* Save comments preceding the title macro in the syntax tree. */
1322
1323 if (r->options & MPARSE_COMMENT) {
1324 while (*ep == ' ' || *ep == '\t')
1325 ep--;
1326 ep[1] = '\0';
1327 n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
1328 ROFFT_COMMENT, TOKEN_NONE);
1329 n->string = mandoc_strdup(stesc + 2);
1330 roff_node_append(r->man, n);
1331 n->flags |= NODE_VALID | NODE_ENDED;
1332 r->man->next = ROFF_NEXT_SIBLING;
1333 }
1334
1335 /* The comment requests line continuation. */
1336
1337 if (stesc[1] == '#') {
1338 *stesc = '\0';
1339 return ROFF_IGN | ROFF_APPEND;
1340 }
1341
1342 /* Discard the comment including preceding whitespace. */
1343
1344 while (stesc > start && stesc[-1] == ' ' &&
1345 (stesc == start + 1 || stesc[-2] != '\\'))
1346 stesc--;
1347 *stesc = '\0';
1348 return ROFF_CONT;
1349 }
1350
1351 /*
1352 * In the current line, expand escape sequences that produce parsable
1353 * input text. Also check the syntax of the remaining escape sequences,
1354 * which typically produce output glyphs or change formatter state.
1355 */
1356 static int
roff_expand(struct roff * r,struct buf * buf,int ln,int pos,char ec)1357 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
1358 {
1359 char ubuf[24]; /* buffer to print a number */
1360 struct mctx *ctx; /* current macro call context */
1361 const char *res; /* the string to be pasted */
1362 const char *src; /* source for copying */
1363 char *dst; /* destination for copying */
1364 enum mandoc_esc subtype; /* return value from roff_escape */
1365 int iesc; /* index of leading escape char */
1366 int inam; /* index of the escape name */
1367 int iarg; /* index beginning the argument */
1368 int iendarg; /* index right after the argument */
1369 int iend; /* index right after the sequence */
1370 int isrc, idst; /* to reduce \\ and \. in names */
1371 int deftype; /* type of definition to paste */
1372 int argi; /* macro argument index */
1373 int quote_args; /* true for \\$@, false for \\$* */
1374 int asz; /* length of the replacement */
1375 int rsz; /* length of the rest of the string */
1376 int npos; /* position in numeric expression */
1377 int expand_count; /* to avoid infinite loops */
1378
1379 expand_count = 0;
1380 while (buf->buf[pos] != '\0') {
1381
1382 /*
1383 * Skip plain ASCII characters.
1384 * If we have a non-standard escape character,
1385 * escape literal backslashes because all processing in
1386 * subsequent functions uses the standard escaping rules.
1387 */
1388
1389 if (buf->buf[pos] != ec) {
1390 if (buf->buf[pos] == '\\') {
1391 roff_expand_patch(buf, pos, "\\e", pos + 1);
1392 pos++;
1393 }
1394 pos++;
1395 continue;
1396 }
1397
1398 /*
1399 * Parse escape sequences,
1400 * issue diagnostic messages when appropriate,
1401 * and skip sequences that do not need expansion.
1402 * If we have a non-standard escape character, translate
1403 * it to backslashes and translate backslashes to \e.
1404 */
1405
1406 if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
1407 &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
1408 while (pos < iend) {
1409 if (buf->buf[pos] == ec) {
1410 buf->buf[pos] = '\\';
1411 if (pos + 1 < iend)
1412 pos++;
1413 } else if (buf->buf[pos] == '\\') {
1414 roff_expand_patch(buf,
1415 pos, "\\e", pos + 1);
1416 pos++;
1417 iend++;
1418 }
1419 pos++;
1420 }
1421 continue;
1422 }
1423
1424 /* Reduce \\ and \. in names. */
1425
1426 if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
1427 isrc = idst = iarg;
1428 while (isrc < iendarg) {
1429 if (isrc + 1 < iendarg &&
1430 buf->buf[isrc] == '\\' &&
1431 (buf->buf[isrc + 1] == '\\' ||
1432 buf->buf[isrc + 1] == '.'))
1433 isrc++;
1434 buf->buf[idst++] = buf->buf[isrc++];
1435 }
1436 iendarg -= isrc - idst;
1437 }
1438
1439 /* Handle expansion. */
1440
1441 res = NULL;
1442 switch (buf->buf[inam]) {
1443 case '*':
1444 if (iendarg == iarg)
1445 break;
1446 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1447 if ((res = roff_getstrn(r, buf->buf + iarg,
1448 iendarg - iarg, &deftype)) != NULL)
1449 break;
1450
1451 /*
1452 * If not overridden,
1453 * let \*(.T through to the formatters.
1454 */
1455
1456 if (iendarg - iarg == 2 &&
1457 buf->buf[iarg] == '.' &&
1458 buf->buf[iarg + 1] == 'T') {
1459 roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
1460 pos = iend;
1461 continue;
1462 }
1463
1464 mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
1465 "%.*s", iendarg - iarg, buf->buf + iarg);
1466 break;
1467
1468 case '$':
1469 if (r->mstackpos < 0) {
1470 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
1471 "%.*s", iend - iesc, buf->buf + iesc);
1472 break;
1473 }
1474 ctx = r->mstack + r->mstackpos;
1475 argi = buf->buf[iarg] - '1';
1476 if (argi >= 0 && argi <= 8) {
1477 if (argi < ctx->argc)
1478 res = ctx->argv[argi];
1479 break;
1480 }
1481 if (buf->buf[iarg] == '*')
1482 quote_args = 0;
1483 else if (buf->buf[iarg] == '@')
1484 quote_args = 1;
1485 else {
1486 mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
1487 "%.*s", iend - iesc, buf->buf + iesc);
1488 break;
1489 }
1490 asz = 0;
1491 for (argi = 0; argi < ctx->argc; argi++) {
1492 if (argi)
1493 asz++; /* blank */
1494 if (quote_args)
1495 asz += 2; /* quotes */
1496 asz += strlen(ctx->argv[argi]);
1497 }
1498 if (asz != iend - iesc) {
1499 rsz = buf->sz - iend;
1500 if (asz < iend - iesc)
1501 memmove(buf->buf + iesc + asz,
1502 buf->buf + iend, rsz);
1503 buf->sz = iesc + asz + rsz;
1504 buf->buf = mandoc_realloc(buf->buf, buf->sz);
1505 if (asz > iend - iesc)
1506 memmove(buf->buf + iesc + asz,
1507 buf->buf + iend, rsz);
1508 }
1509 dst = buf->buf + iesc;
1510 for (argi = 0; argi < ctx->argc; argi++) {
1511 if (argi)
1512 *dst++ = ' ';
1513 if (quote_args)
1514 *dst++ = '"';
1515 src = ctx->argv[argi];
1516 while (*src != '\0')
1517 *dst++ = *src++;
1518 if (quote_args)
1519 *dst++ = '"';
1520 }
1521 continue;
1522 case 'A':
1523 ubuf[0] = iendarg > iarg ? '1' : '0';
1524 ubuf[1] = '\0';
1525 res = ubuf;
1526 break;
1527 case 'B':
1528 npos = 0;
1529 ubuf[0] = iendarg > iarg && iend > iendarg &&
1530 roff_evalnum(r, ln, buf->buf + iarg, &npos,
1531 NULL, ROFFNUM_SCALE) &&
1532 npos == iendarg - iarg ? '1' : '0';
1533 ubuf[1] = '\0';
1534 res = ubuf;
1535 break;
1536 case 'V':
1537 mandoc_msg(MANDOCERR_UNSUPP, ln, iesc,
1538 "%.*s", iend - iesc, buf->buf + iesc);
1539 roff_expand_patch(buf, iendarg, "}", iend);
1540 roff_expand_patch(buf, iesc, "${", iarg);
1541 continue;
1542 case 'g':
1543 break;
1544 case 'n':
1545 if (iendarg > iarg)
1546 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1547 roff_getregn(r, buf->buf + iarg,
1548 iendarg - iarg, buf->buf[inam + 1]));
1549 else
1550 ubuf[0] = '\0';
1551 res = ubuf;
1552 break;
1553 case 'w':
1554 rsz = 0;
1555 subtype = ESCAPE_UNDEF;
1556 while (iarg < iendarg) {
1557 asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1;
1558 if (buf->buf[iarg] != '\\') {
1559 rsz += asz;
1560 iarg++;
1561 continue;
1562 }
1563 switch ((subtype = roff_escape(buf->buf, 0,
1564 iarg, NULL, NULL, NULL, NULL, &iarg))) {
1565 case ESCAPE_SPECIAL:
1566 case ESCAPE_NUMBERED:
1567 case ESCAPE_UNICODE:
1568 case ESCAPE_OVERSTRIKE:
1569 case ESCAPE_UNDEF:
1570 break;
1571 case ESCAPE_DEVICE:
1572 asz *= 8;
1573 break;
1574 case ESCAPE_EXPAND:
1575 abort();
1576 default:
1577 continue;
1578 }
1579 rsz += asz;
1580 }
1581 (void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24);
1582 res = ubuf;
1583 break;
1584 default:
1585 break;
1586 }
1587 if (res == NULL)
1588 res = "";
1589 if (++expand_count > EXPAND_LIMIT ||
1590 buf->sz + strlen(res) > SHRT_MAX) {
1591 mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
1592 return ROFF_IGN;
1593 }
1594 roff_expand_patch(buf, iesc, res, iend);
1595 }
1596 return ROFF_CONT;
1597 }
1598
1599 /*
1600 * Replace the substring from the start position (inclusive)
1601 * to end position (exclusive) with the repl(acement) string.
1602 */
1603 static void
roff_expand_patch(struct buf * buf,int start,const char * repl,int end)1604 roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
1605 {
1606 char *nbuf;
1607
1608 buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf,
1609 repl, buf->buf + end) + 1;
1610 free(buf->buf);
1611 buf->buf = nbuf;
1612 }
1613
1614 /*
1615 * Parse a quoted or unquoted roff-style request or macro argument.
1616 * Return a pointer to the parsed argument, which is either the original
1617 * pointer or advanced by one byte in case the argument is quoted.
1618 * NUL-terminate the argument in place.
1619 * Collapse pairs of quotes inside quoted arguments.
1620 * Advance the argument pointer to the next argument,
1621 * or to the NUL byte terminating the argument line.
1622 */
1623 char *
roff_getarg(struct roff * r,char ** cpp,int ln,int * pos)1624 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1625 {
1626 struct buf buf;
1627 char *cp, *start;
1628 int newesc, pairs, quoted, white;
1629
1630 /* Quoting can only start with a new word. */
1631 start = *cpp;
1632 quoted = 0;
1633 if ('"' == *start) {
1634 quoted = 1;
1635 start++;
1636 }
1637
1638 newesc = pairs = white = 0;
1639 for (cp = start; '\0' != *cp; cp++) {
1640
1641 /*
1642 * Move the following text left
1643 * after quoted quotes and after "\\" and "\t".
1644 */
1645 if (pairs)
1646 cp[-pairs] = cp[0];
1647
1648 if ('\\' == cp[0]) {
1649 /*
1650 * In copy mode, translate double to single
1651 * backslashes and backslash-t to literal tabs.
1652 */
1653 switch (cp[1]) {
1654 case 'a':
1655 case 't':
1656 cp[-pairs] = '\t';
1657 pairs++;
1658 cp++;
1659 break;
1660 case '\\':
1661 cp[-pairs] = '\\';
1662 newesc = 1;
1663 pairs++;
1664 cp++;
1665 break;
1666 case ' ':
1667 /* Skip escaped blanks. */
1668 if (0 == quoted)
1669 cp++;
1670 break;
1671 default:
1672 break;
1673 }
1674 } else if (0 == quoted) {
1675 if (' ' == cp[0]) {
1676 /* Unescaped blanks end unquoted args. */
1677 white = 1;
1678 break;
1679 }
1680 } else if ('"' == cp[0]) {
1681 if ('"' == cp[1]) {
1682 /* Quoted quotes collapse. */
1683 pairs++;
1684 cp++;
1685 } else {
1686 /* Unquoted quotes end quoted args. */
1687 quoted = 2;
1688 break;
1689 }
1690 }
1691 }
1692
1693 /* Quoted argument without a closing quote. */
1694 if (1 == quoted)
1695 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1696
1697 /* NUL-terminate this argument and move to the next one. */
1698 if (pairs)
1699 cp[-pairs] = '\0';
1700 if ('\0' != *cp) {
1701 *cp++ = '\0';
1702 while (' ' == *cp)
1703 cp++;
1704 }
1705 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1706 *cpp = cp;
1707
1708 if ('\0' == *cp && (white || ' ' == cp[-1]))
1709 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1710
1711 start = mandoc_strdup(start);
1712 if (newesc == 0)
1713 return start;
1714
1715 buf.buf = start;
1716 buf.sz = strlen(start) + 1;
1717 buf.next = NULL;
1718 if (roff_expand(r, &buf, ln, 0, '\\') == ROFF_IGN) {
1719 free(buf.buf);
1720 buf.buf = mandoc_strdup("");
1721 }
1722 return buf.buf;
1723 }
1724
1725
1726 /*
1727 * Process text streams.
1728 */
1729 static int
roff_parsetext(struct roff * r,struct buf * buf,int pos,int * offs)1730 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1731 {
1732 size_t sz;
1733 const char *start;
1734 char *p;
1735 int isz;
1736 enum mandoc_esc esc;
1737
1738 /* Spring the input line trap. */
1739
1740 if (roffit_lines == 1) {
1741 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1742 free(buf->buf);
1743 buf->buf = p;
1744 buf->sz = isz + 1;
1745 *offs = 0;
1746 free(roffit_macro);
1747 roffit_lines = 0;
1748 return ROFF_REPARSE;
1749 } else if (roffit_lines > 1)
1750 --roffit_lines;
1751
1752 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1753 if (roffce_lines < 1) {
1754 r->man->last = roffce_node;
1755 r->man->next = ROFF_NEXT_SIBLING;
1756 roffce_lines = 0;
1757 roffce_node = NULL;
1758 } else
1759 roffce_lines--;
1760 }
1761
1762 /* Convert all breakable hyphens into ASCII_HYPH. */
1763
1764 start = p = buf->buf + pos;
1765
1766 while (*p != '\0') {
1767 sz = strcspn(p, "-\\");
1768 p += sz;
1769
1770 if (*p == '\0')
1771 break;
1772
1773 if (*p == '\\') {
1774 /* Skip over escapes. */
1775 p++;
1776 esc = mandoc_escape((const char **)&p, NULL, NULL);
1777 if (esc == ESCAPE_ERROR)
1778 break;
1779 while (*p == '-')
1780 p++;
1781 continue;
1782 } else if (p == start) {
1783 p++;
1784 continue;
1785 }
1786
1787 if (isalpha((unsigned char)p[-1]) &&
1788 isalpha((unsigned char)p[1]))
1789 *p = ASCII_HYPH;
1790 p++;
1791 }
1792 return ROFF_CONT;
1793 }
1794
1795 int
roff_parseln(struct roff * r,int ln,struct buf * buf,int * offs,size_t len)1796 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1797 {
1798 enum roff_tok t;
1799 int e;
1800 int pos; /* parse point */
1801 int spos; /* saved parse point for messages */
1802 int ppos; /* original offset in buf->buf */
1803 int ctl; /* macro line (boolean) */
1804
1805 ppos = pos = *offs;
1806
1807 if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1808 (r->man->flags & ROFF_NOFILL) == 0 &&
1809 strchr(" .\\", buf->buf[pos]) == NULL &&
1810 buf->buf[pos] != r->control &&
1811 strcspn(buf->buf, " ") < 80)
1812 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1813 "%.20s...", buf->buf + pos);
1814
1815 /* Handle in-line equation delimiters. */
1816
1817 if (r->tbl == NULL &&
1818 r->last_eqn != NULL && r->last_eqn->delim &&
1819 (r->eqn == NULL || r->eqn_inline)) {
1820 e = roff_eqndelim(r, buf, pos);
1821 if (e == ROFF_REPARSE)
1822 return e;
1823 assert(e == ROFF_CONT);
1824 }
1825
1826 /* Handle comments and escape sequences. */
1827
1828 e = roff_parse_comment(r, buf, ln, pos, r->escape);
1829 if ((e & ROFF_MASK) == ROFF_IGN)
1830 return e;
1831 assert(e == ROFF_CONT);
1832
1833 e = roff_expand(r, buf, ln, pos, r->escape);
1834 if ((e & ROFF_MASK) == ROFF_IGN)
1835 return e;
1836 assert(e == ROFF_CONT);
1837
1838 ctl = roff_getcontrol(r, buf->buf, &pos);
1839
1840 /*
1841 * First, if a scope is open and we're not a macro, pass the
1842 * text through the macro's filter.
1843 * Equations process all content themselves.
1844 * Tables process almost all content themselves, but we want
1845 * to warn about macros before passing it there.
1846 */
1847
1848 if (r->last != NULL && ! ctl) {
1849 t = r->last->tok;
1850 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1851 if ((e & ROFF_MASK) == ROFF_IGN)
1852 return e;
1853 e &= ~ROFF_MASK;
1854 } else
1855 e = ROFF_IGN;
1856 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1857 eqn_read(r->eqn, buf->buf + ppos);
1858 return e;
1859 }
1860 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1861 tbl_read(r->tbl, ln, buf->buf, ppos);
1862 roff_addtbl(r->man, ln, r->tbl);
1863 return e;
1864 }
1865 if ( ! ctl) {
1866 r->options &= ~MPARSE_COMMENT;
1867 return roff_parsetext(r, buf, pos, offs) | e;
1868 }
1869
1870 /* Skip empty request lines. */
1871
1872 if (buf->buf[pos] == '"') {
1873 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1874 return ROFF_IGN;
1875 } else if (buf->buf[pos] == '\0')
1876 return ROFF_IGN;
1877
1878 /*
1879 * If a scope is open, go to the child handler for that macro,
1880 * as it may want to preprocess before doing anything with it.
1881 */
1882
1883 if (r->last) {
1884 t = r->last->tok;
1885 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1886 }
1887
1888 r->options &= ~MPARSE_COMMENT;
1889 spos = pos;
1890 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1891 return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
1892 }
1893
1894 /*
1895 * Handle a new request or macro.
1896 * May be called outside any scope or from inside a conditional scope.
1897 */
1898 static int
roff_req_or_macro(ROFF_ARGS)1899 roff_req_or_macro(ROFF_ARGS) {
1900
1901 /* For now, tables ignore most macros and some request. */
1902
1903 if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
1904 tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
1905 tok == ROFF_sp)) {
1906 mandoc_msg(MANDOCERR_TBLMACRO,
1907 ln, ppos, "%s", buf->buf + ppos);
1908 if (tok != TOKEN_NONE)
1909 return ROFF_IGN;
1910 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1911 pos++;
1912 while (buf->buf[pos] == ' ')
1913 pos++;
1914 tbl_read(r->tbl, ln, buf->buf, pos);
1915 roff_addtbl(r->man, ln, r->tbl);
1916 return ROFF_IGN;
1917 }
1918
1919 /* For now, let high level macros abort .ce mode. */
1920
1921 if (roffce_node != NULL &&
1922 (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
1923 tok == ROFF_TH || tok == ROFF_TS)) {
1924 r->man->last = roffce_node;
1925 r->man->next = ROFF_NEXT_SIBLING;
1926 roffce_lines = 0;
1927 roffce_node = NULL;
1928 }
1929
1930 /*
1931 * This is neither a roff request nor a user-defined macro.
1932 * Let the standard macro set parsers handle it.
1933 */
1934
1935 if (tok == TOKEN_NONE)
1936 return ROFF_CONT;
1937
1938 /* Execute a roff request or a user-defined macro. */
1939
1940 return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
1941 }
1942
1943 /*
1944 * Internal interface function to tell the roff parser that execution
1945 * of the current macro ended. This is required because macro
1946 * definitions usually do not end with a .return request.
1947 */
1948 void
roff_userret(struct roff * r)1949 roff_userret(struct roff *r)
1950 {
1951 struct mctx *ctx;
1952 int i;
1953
1954 assert(r->mstackpos >= 0);
1955 ctx = r->mstack + r->mstackpos;
1956 for (i = 0; i < ctx->argc; i++)
1957 free(ctx->argv[i]);
1958 ctx->argc = 0;
1959 r->mstackpos--;
1960 }
1961
1962 void
roff_endparse(struct roff * r)1963 roff_endparse(struct roff *r)
1964 {
1965 if (r->last != NULL)
1966 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1967 r->last->col, "%s", roff_name[r->last->tok]);
1968
1969 if (r->eqn != NULL) {
1970 mandoc_msg(MANDOCERR_BLK_NOEND,
1971 r->eqn->node->line, r->eqn->node->pos, "EQ");
1972 eqn_parse(r->eqn);
1973 r->eqn = NULL;
1974 }
1975
1976 if (r->tbl != NULL) {
1977 tbl_end(r->tbl, 1);
1978 r->tbl = NULL;
1979 }
1980 }
1981
1982 /*
1983 * Parse the request or macro name at buf[*pos].
1984 * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
1985 * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
1986 * As a side effect, set r->current_string to the definition or to NULL.
1987 */
1988 static enum roff_tok
roff_parse(struct roff * r,char * buf,int * pos,int ln,int ppos)1989 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
1990 {
1991 char *cp;
1992 const char *mac;
1993 size_t maclen;
1994 int deftype;
1995 enum roff_tok t;
1996
1997 cp = buf + *pos;
1998
1999 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2000 return TOKEN_NONE;
2001
2002 mac = cp;
2003 maclen = roff_getname(r, &cp, ln, ppos);
2004
2005 deftype = ROFFDEF_USER | ROFFDEF_REN;
2006 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2007 switch (deftype) {
2008 case ROFFDEF_USER:
2009 t = ROFF_USERDEF;
2010 break;
2011 case ROFFDEF_REN:
2012 t = ROFF_RENAMED;
2013 break;
2014 default:
2015 t = roffhash_find(r->reqtab, mac, maclen);
2016 break;
2017 }
2018 if (t != TOKEN_NONE)
2019 *pos = cp - buf;
2020 else if (deftype == ROFFDEF_UNDEF) {
2021 /* Using an undefined macro defines it to be empty. */
2022 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2023 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2024 }
2025 return t;
2026 }
2027
2028 /* --- handling of request blocks ----------------------------------------- */
2029
2030 /*
2031 * Close a macro definition block or an "ignore" block.
2032 */
2033 static int
roff_cblock(ROFF_ARGS)2034 roff_cblock(ROFF_ARGS)
2035 {
2036 int rr;
2037
2038 if (r->last == NULL) {
2039 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2040 return ROFF_IGN;
2041 }
2042
2043 switch (r->last->tok) {
2044 case ROFF_am:
2045 case ROFF_ami:
2046 case ROFF_de:
2047 case ROFF_dei:
2048 case ROFF_ig:
2049 break;
2050 case ROFF_am1:
2051 case ROFF_de1:
2052 /* Remapped in roff_block(). */
2053 abort();
2054 default:
2055 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2056 return ROFF_IGN;
2057 }
2058
2059 roffnode_pop(r);
2060 roffnode_cleanscope(r);
2061
2062 /*
2063 * If a conditional block with braces is still open,
2064 * check for "\}" block end markers.
2065 */
2066
2067 if (r->last != NULL && r->last->endspan < 0) {
2068 rr = 1; /* If arguments follow "\}", warn about them. */
2069 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2070 }
2071
2072 if (buf->buf[pos] != '\0')
2073 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2074 ".. %s", buf->buf + pos);
2075
2076 return ROFF_IGN;
2077 }
2078
2079 /*
2080 * Pop all nodes ending at the end of the current input line.
2081 * Return the number of loops ended.
2082 */
2083 static int
roffnode_cleanscope(struct roff * r)2084 roffnode_cleanscope(struct roff *r)
2085 {
2086 int inloop;
2087
2088 inloop = 0;
2089 while (r->last != NULL && r->last->endspan > 0) {
2090 if (--r->last->endspan != 0)
2091 break;
2092 inloop += roffnode_pop(r);
2093 }
2094 return inloop;
2095 }
2096
2097 /*
2098 * Handle the closing "\}" of a conditional block.
2099 * Apart from generating warnings, this only pops nodes.
2100 * Return the number of loops ended.
2101 */
2102 static int
roff_ccond(struct roff * r,int ln,int ppos)2103 roff_ccond(struct roff *r, int ln, int ppos)
2104 {
2105 if (NULL == r->last) {
2106 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2107 return 0;
2108 }
2109
2110 switch (r->last->tok) {
2111 case ROFF_el:
2112 case ROFF_ie:
2113 case ROFF_if:
2114 case ROFF_while:
2115 break;
2116 default:
2117 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2118 return 0;
2119 }
2120
2121 if (r->last->endspan > -1) {
2122 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2123 return 0;
2124 }
2125
2126 return roffnode_pop(r) + roffnode_cleanscope(r);
2127 }
2128
2129 static int
roff_block(ROFF_ARGS)2130 roff_block(ROFF_ARGS)
2131 {
2132 const char *name, *value;
2133 char *call, *cp, *iname, *rname;
2134 size_t csz, namesz, rsz;
2135 int deftype;
2136
2137 /* Ignore groff compatibility mode for now. */
2138
2139 if (tok == ROFF_de1)
2140 tok = ROFF_de;
2141 else if (tok == ROFF_dei1)
2142 tok = ROFF_dei;
2143 else if (tok == ROFF_am1)
2144 tok = ROFF_am;
2145 else if (tok == ROFF_ami1)
2146 tok = ROFF_ami;
2147
2148 /* Parse the macro name argument. */
2149
2150 cp = buf->buf + pos;
2151 if (tok == ROFF_ig) {
2152 iname = NULL;
2153 namesz = 0;
2154 } else {
2155 iname = cp;
2156 namesz = roff_getname(r, &cp, ln, ppos);
2157 iname[namesz] = '\0';
2158 }
2159
2160 /* Resolve the macro name argument if it is indirect. */
2161
2162 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2163 deftype = ROFFDEF_USER;
2164 name = roff_getstrn(r, iname, namesz, &deftype);
2165 if (name == NULL) {
2166 mandoc_msg(MANDOCERR_STR_UNDEF,
2167 ln, (int)(iname - buf->buf),
2168 "%.*s", (int)namesz, iname);
2169 namesz = 0;
2170 } else
2171 namesz = strlen(name);
2172 } else
2173 name = iname;
2174
2175 if (namesz == 0 && tok != ROFF_ig) {
2176 mandoc_msg(MANDOCERR_REQ_EMPTY,
2177 ln, ppos, "%s", roff_name[tok]);
2178 return ROFF_IGN;
2179 }
2180
2181 roffnode_push(r, tok, name, ln, ppos);
2182
2183 /*
2184 * At the beginning of a `de' macro, clear the existing string
2185 * with the same name, if there is one. New content will be
2186 * appended from roff_block_text() in multiline mode.
2187 */
2188
2189 if (tok == ROFF_de || tok == ROFF_dei) {
2190 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2191 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2192 } else if (tok == ROFF_am || tok == ROFF_ami) {
2193 deftype = ROFFDEF_ANY;
2194 value = roff_getstrn(r, iname, namesz, &deftype);
2195 switch (deftype) { /* Before appending, ... */
2196 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2197 roff_setstrn(&r->strtab, name, namesz,
2198 value, strlen(value), 0);
2199 break;
2200 case ROFFDEF_REN: /* call original standard macro. */
2201 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2202 (int)strlen(value), value);
2203 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2204 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2205 free(call);
2206 break;
2207 case ROFFDEF_STD: /* rename and call standard macro. */
2208 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2209 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2210 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2211 (int)rsz, rname);
2212 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2213 free(call);
2214 free(rname);
2215 break;
2216 default:
2217 break;
2218 }
2219 }
2220
2221 if (*cp == '\0')
2222 return ROFF_IGN;
2223
2224 /* Get the custom end marker. */
2225
2226 iname = cp;
2227 namesz = roff_getname(r, &cp, ln, ppos);
2228
2229 /* Resolve the end marker if it is indirect. */
2230
2231 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2232 deftype = ROFFDEF_USER;
2233 name = roff_getstrn(r, iname, namesz, &deftype);
2234 if (name == NULL) {
2235 mandoc_msg(MANDOCERR_STR_UNDEF,
2236 ln, (int)(iname - buf->buf),
2237 "%.*s", (int)namesz, iname);
2238 namesz = 0;
2239 } else
2240 namesz = strlen(name);
2241 } else
2242 name = iname;
2243
2244 if (namesz)
2245 r->last->end = mandoc_strndup(name, namesz);
2246
2247 if (*cp != '\0')
2248 mandoc_msg(MANDOCERR_ARG_EXCESS,
2249 ln, pos, ".%s ... %s", roff_name[tok], cp);
2250
2251 return ROFF_IGN;
2252 }
2253
2254 static int
roff_block_sub(ROFF_ARGS)2255 roff_block_sub(ROFF_ARGS)
2256 {
2257 enum roff_tok t;
2258 int i, j;
2259
2260 /*
2261 * If a custom end marker is a user-defined or predefined macro
2262 * or a request, interpret it.
2263 */
2264
2265 if (r->last->end) {
2266 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2267 if (buf->buf[i] != r->last->end[j])
2268 break;
2269
2270 if (r->last->end[j] == '\0' &&
2271 (buf->buf[i] == '\0' ||
2272 buf->buf[i] == ' ' ||
2273 buf->buf[i] == '\t')) {
2274 roffnode_pop(r);
2275 roffnode_cleanscope(r);
2276
2277 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2278 i++;
2279
2280 pos = i;
2281 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2282 TOKEN_NONE)
2283 return ROFF_RERUN;
2284 return ROFF_IGN;
2285 }
2286 }
2287
2288 /* Handle the standard end marker. */
2289
2290 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2291 if (t == ROFF_cblock)
2292 return roff_cblock(r, t, buf, ln, ppos, pos, offs);
2293
2294 /* Not an end marker, so append the line to the block. */
2295
2296 if (tok != ROFF_ig)
2297 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2298 return ROFF_IGN;
2299 }
2300
2301 static int
roff_block_text(ROFF_ARGS)2302 roff_block_text(ROFF_ARGS)
2303 {
2304
2305 if (tok != ROFF_ig)
2306 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2307
2308 return ROFF_IGN;
2309 }
2310
2311 /*
2312 * Check for a closing "\}" and handle it.
2313 * In this function, the final "int *offs" argument is used for
2314 * different purposes than elsewhere:
2315 * Input: *offs == 0: caller wants to discard arguments following \}
2316 * *offs == 1: caller wants to preserve text following \}
2317 * Output: *offs = 0: tell caller to discard input line
2318 * *offs = 1: tell caller to use input line
2319 */
2320 static int
roff_cond_checkend(ROFF_ARGS)2321 roff_cond_checkend(ROFF_ARGS)
2322 {
2323 char *ep;
2324 int endloop, irc, rr;
2325
2326 irc = ROFF_IGN;
2327 rr = r->last->rule;
2328 endloop = tok != ROFF_while ? ROFF_IGN :
2329 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2330 if (roffnode_cleanscope(r))
2331 irc |= endloop;
2332
2333 /*
2334 * If "\}" occurs on a macro line without a preceding macro or
2335 * a text line contains nothing else, drop the line completely.
2336 */
2337
2338 ep = buf->buf + pos;
2339 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2340 rr = 0;
2341
2342 /*
2343 * The closing delimiter "\}" rewinds the conditional scope
2344 * but is otherwise ignored when interpreting the line.
2345 */
2346
2347 while ((ep = strchr(ep, '\\')) != NULL) {
2348 switch (ep[1]) {
2349 case '}':
2350 if (ep[2] == '\0')
2351 ep[0] = '\0';
2352 else if (rr)
2353 ep[1] = '&';
2354 else
2355 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2356 if (roff_ccond(r, ln, ep - buf->buf))
2357 irc |= endloop;
2358 break;
2359 case '\0':
2360 ++ep;
2361 break;
2362 default:
2363 ep += 2;
2364 break;
2365 }
2366 }
2367 *offs = rr;
2368 return irc;
2369 }
2370
2371 /*
2372 * Parse and process a request or macro line in conditional scope.
2373 */
2374 static int
roff_cond_sub(ROFF_ARGS)2375 roff_cond_sub(ROFF_ARGS)
2376 {
2377 struct roffnode *bl;
2378 int irc, rr, spos;
2379 enum roff_tok t;
2380
2381 rr = 0; /* If arguments follow "\}", skip them. */
2382 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2383 spos = pos;
2384 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2385
2386 /*
2387 * Handle requests and macros if the conditional evaluated
2388 * to true or if they are structurally required.
2389 * The .break request is always handled specially.
2390 */
2391
2392 if (t == ROFF_break) {
2393 if (irc & ROFF_LOOPMASK)
2394 irc = ROFF_IGN | ROFF_LOOPEXIT;
2395 else if (rr) {
2396 for (bl = r->last; bl != NULL; bl = bl->parent) {
2397 bl->rule = 0;
2398 if (bl->tok == ROFF_while)
2399 break;
2400 }
2401 }
2402 } else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
2403 irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
2404 if (irc & ROFF_WHILE)
2405 irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
2406 }
2407 return irc;
2408 }
2409
2410 /*
2411 * Parse and process a text line in conditional scope.
2412 */
2413 static int
roff_cond_text(ROFF_ARGS)2414 roff_cond_text(ROFF_ARGS)
2415 {
2416 int irc, rr;
2417
2418 rr = 1; /* If arguments follow "\}", preserve them. */
2419 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2420 if (rr)
2421 irc |= ROFF_CONT;
2422 return irc;
2423 }
2424
2425 /* --- handling of numeric and conditional expressions -------------------- */
2426
2427 /*
2428 * Parse a single signed integer number. Stop at the first non-digit.
2429 * If there is at least one digit, return success and advance the
2430 * parse point, else return failure and let the parse point unchanged.
2431 * Ignore overflows, treat them just like the C language.
2432 */
2433 static int
roff_getnum(const char * v,int * pos,int * res,int flags)2434 roff_getnum(const char *v, int *pos, int *res, int flags)
2435 {
2436 int myres, scaled, n, p;
2437
2438 if (NULL == res)
2439 res = &myres;
2440
2441 p = *pos;
2442 n = v[p] == '-';
2443 if (n || v[p] == '+')
2444 p++;
2445
2446 if (flags & ROFFNUM_WHITE)
2447 while (isspace((unsigned char)v[p]))
2448 p++;
2449
2450 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2451 *res = 10 * *res + v[p] - '0';
2452 if (p == *pos + n)
2453 return 0;
2454
2455 if (n)
2456 *res = -*res;
2457
2458 /* Each number may be followed by one optional scaling unit. */
2459
2460 switch (v[p]) {
2461 case 'f':
2462 scaled = *res * 65536;
2463 break;
2464 case 'i':
2465 scaled = *res * 240;
2466 break;
2467 case 'c':
2468 scaled = *res * 240 / 2.54;
2469 break;
2470 case 'v':
2471 case 'P':
2472 scaled = *res * 40;
2473 break;
2474 case 'm':
2475 case 'n':
2476 scaled = *res * 24;
2477 break;
2478 case 'p':
2479 scaled = *res * 10 / 3;
2480 break;
2481 case 'u':
2482 scaled = *res;
2483 break;
2484 case 'M':
2485 scaled = *res * 6 / 25;
2486 break;
2487 default:
2488 scaled = *res;
2489 p--;
2490 break;
2491 }
2492 if (flags & ROFFNUM_SCALE)
2493 *res = scaled;
2494
2495 *pos = p + 1;
2496 return 1;
2497 }
2498
2499 /*
2500 * Evaluate a string comparison condition.
2501 * The first character is the delimiter.
2502 * Succeed if the string up to its second occurrence
2503 * matches the string up to its third occurrence.
2504 * Advance the cursor after the third occurrence
2505 * or lacking that, to the end of the line.
2506 */
2507 static int
roff_evalstrcond(const char * v,int * pos)2508 roff_evalstrcond(const char *v, int *pos)
2509 {
2510 const char *s1, *s2, *s3;
2511 int match;
2512
2513 match = 0;
2514 s1 = v + *pos; /* initial delimiter */
2515 s2 = s1 + 1; /* for scanning the first string */
2516 s3 = strchr(s2, *s1); /* for scanning the second string */
2517
2518 if (NULL == s3) /* found no middle delimiter */
2519 goto out;
2520
2521 while ('\0' != *++s3) {
2522 if (*s2 != *s3) { /* mismatch */
2523 s3 = strchr(s3, *s1);
2524 break;
2525 }
2526 if (*s3 == *s1) { /* found the final delimiter */
2527 match = 1;
2528 break;
2529 }
2530 s2++;
2531 }
2532
2533 out:
2534 if (NULL == s3)
2535 s3 = strchr(s2, '\0');
2536 else if (*s3 != '\0')
2537 s3++;
2538 *pos = s3 - v;
2539 return match;
2540 }
2541
2542 /*
2543 * Evaluate an optionally negated single character, numerical,
2544 * or string condition.
2545 */
2546 static int
roff_evalcond(struct roff * r,int ln,char * v,int * pos)2547 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2548 {
2549 const char *start, *end;
2550 char *cp, *name;
2551 size_t sz;
2552 int deftype, len, number, savepos, istrue, wanttrue;
2553
2554 if ('!' == v[*pos]) {
2555 wanttrue = 0;
2556 (*pos)++;
2557 } else
2558 wanttrue = 1;
2559
2560 switch (v[*pos]) {
2561 case '\0':
2562 return 0;
2563 case 'n':
2564 case 'o':
2565 (*pos)++;
2566 return wanttrue;
2567 case 'e':
2568 case 't':
2569 case 'v':
2570 (*pos)++;
2571 return !wanttrue;
2572 case 'c':
2573 do {
2574 (*pos)++;
2575 } while (v[*pos] == ' ');
2576
2577 /*
2578 * Quirk for groff compatibility:
2579 * The horizontal tab is neither available nor unavailable.
2580 */
2581
2582 if (v[*pos] == '\t') {
2583 (*pos)++;
2584 return 0;
2585 }
2586
2587 /* Printable ASCII characters are available. */
2588
2589 if (v[*pos] != '\\') {
2590 (*pos)++;
2591 return wanttrue;
2592 }
2593
2594 end = v + ++*pos;
2595 switch (mandoc_escape(&end, &start, &len)) {
2596 case ESCAPE_SPECIAL:
2597 istrue = mchars_spec2cp(start, len) != -1;
2598 break;
2599 case ESCAPE_UNICODE:
2600 istrue = 1;
2601 break;
2602 case ESCAPE_NUMBERED:
2603 istrue = mchars_num2char(start, len) != -1;
2604 break;
2605 default:
2606 istrue = !wanttrue;
2607 break;
2608 }
2609 *pos = end - v;
2610 return istrue == wanttrue;
2611 case 'd':
2612 case 'r':
2613 cp = v + *pos + 1;
2614 while (*cp == ' ')
2615 cp++;
2616 name = cp;
2617 sz = roff_getname(r, &cp, ln, cp - v);
2618 if (sz == 0)
2619 istrue = 0;
2620 else if (v[*pos] == 'r')
2621 istrue = roff_hasregn(r, name, sz);
2622 else {
2623 deftype = ROFFDEF_ANY;
2624 roff_getstrn(r, name, sz, &deftype);
2625 istrue = !!deftype;
2626 }
2627 *pos = (name + sz) - v;
2628 return istrue == wanttrue;
2629 default:
2630 break;
2631 }
2632
2633 savepos = *pos;
2634 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2635 return (number > 0) == wanttrue;
2636 else if (*pos == savepos)
2637 return roff_evalstrcond(v, pos) == wanttrue;
2638 else
2639 return 0;
2640 }
2641
2642 static int
roff_line_ignore(ROFF_ARGS)2643 roff_line_ignore(ROFF_ARGS)
2644 {
2645
2646 return ROFF_IGN;
2647 }
2648
2649 static int
roff_insec(ROFF_ARGS)2650 roff_insec(ROFF_ARGS)
2651 {
2652
2653 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2654 return ROFF_IGN;
2655 }
2656
2657 static int
roff_unsupp(ROFF_ARGS)2658 roff_unsupp(ROFF_ARGS)
2659 {
2660
2661 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2662 return ROFF_IGN;
2663 }
2664
2665 static int
roff_cond(ROFF_ARGS)2666 roff_cond(ROFF_ARGS)
2667 {
2668 int irc;
2669
2670 roffnode_push(r, tok, NULL, ln, ppos);
2671
2672 /*
2673 * An `.el' has no conditional body: it will consume the value
2674 * of the current rstack entry set in prior `ie' calls or
2675 * defaults to DENY.
2676 *
2677 * If we're not an `el', however, then evaluate the conditional.
2678 */
2679
2680 r->last->rule = tok == ROFF_el ?
2681 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2682 roff_evalcond(r, ln, buf->buf, &pos);
2683
2684 /*
2685 * An if-else will put the NEGATION of the current evaluated
2686 * conditional into the stack of rules.
2687 */
2688
2689 if (tok == ROFF_ie) {
2690 if (r->rstackpos + 1 == r->rstacksz) {
2691 r->rstacksz += 16;
2692 r->rstack = mandoc_reallocarray(r->rstack,
2693 r->rstacksz, sizeof(int));
2694 }
2695 r->rstack[++r->rstackpos] = !r->last->rule;
2696 }
2697
2698 /* If the parent has false as its rule, then so do we. */
2699
2700 if (r->last->parent && !r->last->parent->rule)
2701 r->last->rule = 0;
2702
2703 /*
2704 * Determine scope.
2705 * If there is nothing on the line after the conditional,
2706 * not even whitespace, use next-line scope.
2707 * Except that .while does not support next-line scope.
2708 */
2709
2710 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2711 r->last->endspan = 2;
2712 goto out;
2713 }
2714
2715 while (buf->buf[pos] == ' ')
2716 pos++;
2717
2718 /* An opening brace requests multiline scope. */
2719
2720 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2721 r->last->endspan = -1;
2722 pos += 2;
2723 while (buf->buf[pos] == ' ')
2724 pos++;
2725 goto out;
2726 }
2727
2728 /*
2729 * Anything else following the conditional causes
2730 * single-line scope. Warn if the scope contains
2731 * nothing but trailing whitespace.
2732 */
2733
2734 if (buf->buf[pos] == '\0')
2735 mandoc_msg(MANDOCERR_COND_EMPTY,
2736 ln, ppos, "%s", roff_name[tok]);
2737
2738 r->last->endspan = 1;
2739
2740 out:
2741 *offs = pos;
2742 irc = ROFF_RERUN;
2743 if (tok == ROFF_while)
2744 irc |= ROFF_WHILE;
2745 return irc;
2746 }
2747
2748 static int
roff_ds(ROFF_ARGS)2749 roff_ds(ROFF_ARGS)
2750 {
2751 char *string;
2752 const char *name;
2753 size_t namesz;
2754
2755 /* Ignore groff compatibility mode for now. */
2756
2757 if (tok == ROFF_ds1)
2758 tok = ROFF_ds;
2759 else if (tok == ROFF_as1)
2760 tok = ROFF_as;
2761
2762 /*
2763 * The first word is the name of the string.
2764 * If it is empty or terminated by an escape sequence,
2765 * abort the `ds' request without defining anything.
2766 */
2767
2768 name = string = buf->buf + pos;
2769 if (*name == '\0')
2770 return ROFF_IGN;
2771
2772 namesz = roff_getname(r, &string, ln, pos);
2773 switch (name[namesz]) {
2774 case '\\':
2775 return ROFF_IGN;
2776 case '\t':
2777 string = buf->buf + pos + namesz;
2778 break;
2779 default:
2780 break;
2781 }
2782
2783 /* Read past the initial double-quote, if any. */
2784 if (*string == '"')
2785 string++;
2786
2787 /* The rest is the value. */
2788 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2789 ROFF_as == tok);
2790 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2791 return ROFF_IGN;
2792 }
2793
2794 /*
2795 * Parse a single operator, one or two characters long.
2796 * If the operator is recognized, return success and advance the
2797 * parse point, else return failure and let the parse point unchanged.
2798 */
2799 static int
roff_getop(const char * v,int * pos,char * res)2800 roff_getop(const char *v, int *pos, char *res)
2801 {
2802
2803 *res = v[*pos];
2804
2805 switch (*res) {
2806 case '+':
2807 case '-':
2808 case '*':
2809 case '/':
2810 case '%':
2811 case '&':
2812 case ':':
2813 break;
2814 case '<':
2815 switch (v[*pos + 1]) {
2816 case '=':
2817 *res = 'l';
2818 (*pos)++;
2819 break;
2820 case '>':
2821 *res = '!';
2822 (*pos)++;
2823 break;
2824 case '?':
2825 *res = 'i';
2826 (*pos)++;
2827 break;
2828 default:
2829 break;
2830 }
2831 break;
2832 case '>':
2833 switch (v[*pos + 1]) {
2834 case '=':
2835 *res = 'g';
2836 (*pos)++;
2837 break;
2838 case '?':
2839 *res = 'a';
2840 (*pos)++;
2841 break;
2842 default:
2843 break;
2844 }
2845 break;
2846 case '=':
2847 if ('=' == v[*pos + 1])
2848 (*pos)++;
2849 break;
2850 default:
2851 return 0;
2852 }
2853 (*pos)++;
2854
2855 return *res;
2856 }
2857
2858 /*
2859 * Evaluate either a parenthesized numeric expression
2860 * or a single signed integer number.
2861 */
2862 static int
roff_evalpar(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2863 roff_evalpar(struct roff *r, int ln,
2864 const char *v, int *pos, int *res, int flags)
2865 {
2866
2867 if ('(' != v[*pos])
2868 return roff_getnum(v, pos, res, flags);
2869
2870 (*pos)++;
2871 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2872 return 0;
2873
2874 /*
2875 * Omission of the closing parenthesis
2876 * is an error in validation mode,
2877 * but ignored in evaluation mode.
2878 */
2879
2880 if (')' == v[*pos])
2881 (*pos)++;
2882 else if (NULL == res)
2883 return 0;
2884
2885 return 1;
2886 }
2887
2888 /*
2889 * Evaluate a complete numeric expression.
2890 * Proceed left to right, there is no concept of precedence.
2891 */
2892 static int
roff_evalnum(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2893 roff_evalnum(struct roff *r, int ln, const char *v,
2894 int *pos, int *res, int flags)
2895 {
2896 int mypos, operand2;
2897 char operator;
2898
2899 if (NULL == pos) {
2900 mypos = 0;
2901 pos = &mypos;
2902 }
2903
2904 if (flags & ROFFNUM_WHITE)
2905 while (isspace((unsigned char)v[*pos]))
2906 (*pos)++;
2907
2908 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2909 return 0;
2910
2911 while (1) {
2912 if (flags & ROFFNUM_WHITE)
2913 while (isspace((unsigned char)v[*pos]))
2914 (*pos)++;
2915
2916 if ( ! roff_getop(v, pos, &operator))
2917 break;
2918
2919 if (flags & ROFFNUM_WHITE)
2920 while (isspace((unsigned char)v[*pos]))
2921 (*pos)++;
2922
2923 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2924 return 0;
2925
2926 if (flags & ROFFNUM_WHITE)
2927 while (isspace((unsigned char)v[*pos]))
2928 (*pos)++;
2929
2930 if (NULL == res)
2931 continue;
2932
2933 switch (operator) {
2934 case '+':
2935 *res += operand2;
2936 break;
2937 case '-':
2938 *res -= operand2;
2939 break;
2940 case '*':
2941 *res *= operand2;
2942 break;
2943 case '/':
2944 if (operand2 == 0) {
2945 mandoc_msg(MANDOCERR_DIVZERO,
2946 ln, *pos, "%s", v);
2947 *res = 0;
2948 break;
2949 }
2950 *res /= operand2;
2951 break;
2952 case '%':
2953 if (operand2 == 0) {
2954 mandoc_msg(MANDOCERR_DIVZERO,
2955 ln, *pos, "%s", v);
2956 *res = 0;
2957 break;
2958 }
2959 *res %= operand2;
2960 break;
2961 case '<':
2962 *res = *res < operand2;
2963 break;
2964 case '>':
2965 *res = *res > operand2;
2966 break;
2967 case 'l':
2968 *res = *res <= operand2;
2969 break;
2970 case 'g':
2971 *res = *res >= operand2;
2972 break;
2973 case '=':
2974 *res = *res == operand2;
2975 break;
2976 case '!':
2977 *res = *res != operand2;
2978 break;
2979 case '&':
2980 *res = *res && operand2;
2981 break;
2982 case ':':
2983 *res = *res || operand2;
2984 break;
2985 case 'i':
2986 if (operand2 < *res)
2987 *res = operand2;
2988 break;
2989 case 'a':
2990 if (operand2 > *res)
2991 *res = operand2;
2992 break;
2993 default:
2994 abort();
2995 }
2996 }
2997 return 1;
2998 }
2999
3000 /* --- register management ------------------------------------------------ */
3001
3002 void
roff_setreg(struct roff * r,const char * name,int val,char sign)3003 roff_setreg(struct roff *r, const char *name, int val, char sign)
3004 {
3005 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3006 }
3007
3008 static void
roff_setregn(struct roff * r,const char * name,size_t len,int val,char sign,int step)3009 roff_setregn(struct roff *r, const char *name, size_t len,
3010 int val, char sign, int step)
3011 {
3012 struct roffreg *reg;
3013
3014 /* Search for an existing register with the same name. */
3015 reg = r->regtab;
3016
3017 while (reg != NULL && (reg->key.sz != len ||
3018 strncmp(reg->key.p, name, len) != 0))
3019 reg = reg->next;
3020
3021 if (NULL == reg) {
3022 /* Create a new register. */
3023 reg = mandoc_malloc(sizeof(struct roffreg));
3024 reg->key.p = mandoc_strndup(name, len);
3025 reg->key.sz = len;
3026 reg->val = 0;
3027 reg->step = 0;
3028 reg->next = r->regtab;
3029 r->regtab = reg;
3030 }
3031
3032 if ('+' == sign)
3033 reg->val += val;
3034 else if ('-' == sign)
3035 reg->val -= val;
3036 else
3037 reg->val = val;
3038 if (step != INT_MIN)
3039 reg->step = step;
3040 }
3041
3042 /*
3043 * Handle some predefined read-only number registers.
3044 * For now, return -1 if the requested register is not predefined;
3045 * in case a predefined read-only register having the value -1
3046 * were to turn up, another special value would have to be chosen.
3047 */
3048 static int
roff_getregro(const struct roff * r,const char * name)3049 roff_getregro(const struct roff *r, const char *name)
3050 {
3051
3052 switch (*name) {
3053 case '$': /* Number of arguments of the last macro evaluated. */
3054 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3055 case 'A': /* ASCII approximation mode is always off. */
3056 return 0;
3057 case 'g': /* Groff compatibility mode is always on. */
3058 return 1;
3059 case 'H': /* Fixed horizontal resolution. */
3060 return 24;
3061 case 'j': /* Always adjust left margin only. */
3062 return 0;
3063 case 'T': /* Some output device is always defined. */
3064 return 1;
3065 case 'V': /* Fixed vertical resolution. */
3066 return 40;
3067 default:
3068 return -1;
3069 }
3070 }
3071
3072 int
roff_getreg(struct roff * r,const char * name)3073 roff_getreg(struct roff *r, const char *name)
3074 {
3075 return roff_getregn(r, name, strlen(name), '\0');
3076 }
3077
3078 static int
roff_getregn(struct roff * r,const char * name,size_t len,char sign)3079 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3080 {
3081 struct roffreg *reg;
3082 int val;
3083
3084 if ('.' == name[0] && 2 == len) {
3085 val = roff_getregro(r, name + 1);
3086 if (-1 != val)
3087 return val;
3088 }
3089
3090 for (reg = r->regtab; reg; reg = reg->next) {
3091 if (len == reg->key.sz &&
3092 0 == strncmp(name, reg->key.p, len)) {
3093 switch (sign) {
3094 case '+':
3095 reg->val += reg->step;
3096 break;
3097 case '-':
3098 reg->val -= reg->step;
3099 break;
3100 default:
3101 break;
3102 }
3103 return reg->val;
3104 }
3105 }
3106
3107 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3108 return 0;
3109 }
3110
3111 static int
roff_hasregn(const struct roff * r,const char * name,size_t len)3112 roff_hasregn(const struct roff *r, const char *name, size_t len)
3113 {
3114 struct roffreg *reg;
3115 int val;
3116
3117 if ('.' == name[0] && 2 == len) {
3118 val = roff_getregro(r, name + 1);
3119 if (-1 != val)
3120 return 1;
3121 }
3122
3123 for (reg = r->regtab; reg; reg = reg->next)
3124 if (len == reg->key.sz &&
3125 0 == strncmp(name, reg->key.p, len))
3126 return 1;
3127
3128 return 0;
3129 }
3130
3131 static void
roff_freereg(struct roffreg * reg)3132 roff_freereg(struct roffreg *reg)
3133 {
3134 struct roffreg *old_reg;
3135
3136 while (NULL != reg) {
3137 free(reg->key.p);
3138 old_reg = reg;
3139 reg = reg->next;
3140 free(old_reg);
3141 }
3142 }
3143
3144 static int
roff_nr(ROFF_ARGS)3145 roff_nr(ROFF_ARGS)
3146 {
3147 char *key, *val, *step;
3148 size_t keysz;
3149 int iv, is, len;
3150 char sign;
3151
3152 key = val = buf->buf + pos;
3153 if (*key == '\0')
3154 return ROFF_IGN;
3155
3156 keysz = roff_getname(r, &val, ln, pos);
3157 if (key[keysz] == '\\' || key[keysz] == '\t')
3158 return ROFF_IGN;
3159
3160 sign = *val;
3161 if (sign == '+' || sign == '-')
3162 val++;
3163
3164 len = 0;
3165 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3166 return ROFF_IGN;
3167
3168 step = val + len;
3169 while (isspace((unsigned char)*step))
3170 step++;
3171 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3172 is = INT_MIN;
3173
3174 roff_setregn(r, key, keysz, iv, sign, is);
3175 return ROFF_IGN;
3176 }
3177
3178 static int
roff_rr(ROFF_ARGS)3179 roff_rr(ROFF_ARGS)
3180 {
3181 struct roffreg *reg, **prev;
3182 char *name, *cp;
3183 size_t namesz;
3184
3185 name = cp = buf->buf + pos;
3186 if (*name == '\0')
3187 return ROFF_IGN;
3188 namesz = roff_getname(r, &cp, ln, pos);
3189 name[namesz] = '\0';
3190
3191 prev = &r->regtab;
3192 while (1) {
3193 reg = *prev;
3194 if (reg == NULL || !strcmp(name, reg->key.p))
3195 break;
3196 prev = ®->next;
3197 }
3198 if (reg != NULL) {
3199 *prev = reg->next;
3200 free(reg->key.p);
3201 free(reg);
3202 }
3203 return ROFF_IGN;
3204 }
3205
3206 /* --- handler functions for roff requests -------------------------------- */
3207
3208 static int
roff_rm(ROFF_ARGS)3209 roff_rm(ROFF_ARGS)
3210 {
3211 const char *name;
3212 char *cp;
3213 size_t namesz;
3214
3215 cp = buf->buf + pos;
3216 while (*cp != '\0') {
3217 name = cp;
3218 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3219 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3220 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3221 if (name[namesz] == '\\' || name[namesz] == '\t')
3222 break;
3223 }
3224 return ROFF_IGN;
3225 }
3226
3227 static int
roff_it(ROFF_ARGS)3228 roff_it(ROFF_ARGS)
3229 {
3230 int iv;
3231
3232 /* Parse the number of lines. */
3233
3234 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3235 mandoc_msg(MANDOCERR_IT_NONUM,
3236 ln, ppos, "%s", buf->buf + 1);
3237 return ROFF_IGN;
3238 }
3239
3240 while (isspace((unsigned char)buf->buf[pos]))
3241 pos++;
3242
3243 /*
3244 * Arm the input line trap.
3245 * Special-casing "an-trap" is an ugly workaround to cope
3246 * with DocBook stupidly fiddling with man(7) internals.
3247 */
3248
3249 roffit_lines = iv;
3250 roffit_macro = mandoc_strdup(iv != 1 ||
3251 strcmp(buf->buf + pos, "an-trap") ?
3252 buf->buf + pos : "br");
3253 return ROFF_IGN;
3254 }
3255
3256 static int
roff_Dd(ROFF_ARGS)3257 roff_Dd(ROFF_ARGS)
3258 {
3259 int mask;
3260 enum roff_tok t, te;
3261
3262 switch (tok) {
3263 case ROFF_Dd:
3264 tok = MDOC_Dd;
3265 te = MDOC_MAX;
3266 if (r->format == 0)
3267 r->format = MPARSE_MDOC;
3268 mask = MPARSE_MDOC | MPARSE_QUICK;
3269 break;
3270 case ROFF_TH:
3271 tok = MAN_TH;
3272 te = MAN_MAX;
3273 if (r->format == 0)
3274 r->format = MPARSE_MAN;
3275 mask = MPARSE_QUICK;
3276 break;
3277 default:
3278 abort();
3279 }
3280 if ((r->options & mask) == 0)
3281 for (t = tok; t < te; t++)
3282 roff_setstr(r, roff_name[t], NULL, 0);
3283 return ROFF_CONT;
3284 }
3285
3286 static int
roff_TE(ROFF_ARGS)3287 roff_TE(ROFF_ARGS)
3288 {
3289 r->man->flags &= ~ROFF_NONOFILL;
3290 if (r->tbl == NULL) {
3291 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3292 return ROFF_IGN;
3293 }
3294 if (tbl_end(r->tbl, 0) == 0) {
3295 r->tbl = NULL;
3296 free(buf->buf);
3297 buf->buf = mandoc_strdup(".sp");
3298 buf->sz = 4;
3299 *offs = 0;
3300 return ROFF_REPARSE;
3301 }
3302 r->tbl = NULL;
3303 return ROFF_IGN;
3304 }
3305
3306 static int
roff_T_(ROFF_ARGS)3307 roff_T_(ROFF_ARGS)
3308 {
3309
3310 if (NULL == r->tbl)
3311 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3312 else
3313 tbl_restart(ln, ppos, r->tbl);
3314
3315 return ROFF_IGN;
3316 }
3317
3318 /*
3319 * Handle in-line equation delimiters.
3320 */
3321 static int
roff_eqndelim(struct roff * r,struct buf * buf,int pos)3322 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3323 {
3324 char *cp1, *cp2;
3325 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3326
3327 /*
3328 * Outside equations, look for an opening delimiter.
3329 * If we are inside an equation, we already know it is
3330 * in-line, or this function wouldn't have been called;
3331 * so look for a closing delimiter.
3332 */
3333
3334 cp1 = buf->buf + pos;
3335 cp2 = strchr(cp1, r->eqn == NULL ?
3336 r->last_eqn->odelim : r->last_eqn->cdelim);
3337 if (cp2 == NULL)
3338 return ROFF_CONT;
3339
3340 *cp2++ = '\0';
3341 bef_pr = bef_nl = aft_nl = aft_pr = "";
3342
3343 /* Handle preceding text, protecting whitespace. */
3344
3345 if (*buf->buf != '\0') {
3346 if (r->eqn == NULL)
3347 bef_pr = "\\&";
3348 bef_nl = "\n";
3349 }
3350
3351 /*
3352 * Prepare replacing the delimiter with an equation macro
3353 * and drop leading white space from the equation.
3354 */
3355
3356 if (r->eqn == NULL) {
3357 while (*cp2 == ' ')
3358 cp2++;
3359 mac = ".EQ";
3360 } else
3361 mac = ".EN";
3362
3363 /* Handle following text, protecting whitespace. */
3364
3365 if (*cp2 != '\0') {
3366 aft_nl = "\n";
3367 if (r->eqn != NULL)
3368 aft_pr = "\\&";
3369 }
3370
3371 /* Do the actual replacement. */
3372
3373 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3374 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3375 free(buf->buf);
3376 buf->buf = cp1;
3377
3378 /* Toggle the in-line state of the eqn subsystem. */
3379
3380 r->eqn_inline = r->eqn == NULL;
3381 return ROFF_REPARSE;
3382 }
3383
3384 static int
roff_EQ(ROFF_ARGS)3385 roff_EQ(ROFF_ARGS)
3386 {
3387 struct roff_node *n;
3388
3389 if (r->man->meta.macroset == MACROSET_MAN)
3390 man_breakscope(r->man, ROFF_EQ);
3391 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3392 if (ln > r->man->last->line)
3393 n->flags |= NODE_LINE;
3394 n->eqn = eqn_box_new();
3395 roff_node_append(r->man, n);
3396 r->man->next = ROFF_NEXT_SIBLING;
3397
3398 assert(r->eqn == NULL);
3399 if (r->last_eqn == NULL)
3400 r->last_eqn = eqn_alloc();
3401 else
3402 eqn_reset(r->last_eqn);
3403 r->eqn = r->last_eqn;
3404 r->eqn->node = n;
3405
3406 if (buf->buf[pos] != '\0')
3407 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3408 ".EQ %s", buf->buf + pos);
3409
3410 return ROFF_IGN;
3411 }
3412
3413 static int
roff_EN(ROFF_ARGS)3414 roff_EN(ROFF_ARGS)
3415 {
3416 if (r->eqn != NULL) {
3417 eqn_parse(r->eqn);
3418 r->eqn = NULL;
3419 } else
3420 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3421 if (buf->buf[pos] != '\0')
3422 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3423 "EN %s", buf->buf + pos);
3424 return ROFF_IGN;
3425 }
3426
3427 static int
roff_TS(ROFF_ARGS)3428 roff_TS(ROFF_ARGS)
3429 {
3430 if (r->tbl != NULL) {
3431 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3432 tbl_end(r->tbl, 0);
3433 }
3434 r->man->flags |= ROFF_NONOFILL;
3435 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3436 if (r->last_tbl == NULL)
3437 r->first_tbl = r->tbl;
3438 r->last_tbl = r->tbl;
3439 return ROFF_IGN;
3440 }
3441
3442 static int
roff_noarg(ROFF_ARGS)3443 roff_noarg(ROFF_ARGS)
3444 {
3445 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3446 man_breakscope(r->man, tok);
3447 if (tok == ROFF_brp)
3448 tok = ROFF_br;
3449 roff_elem_alloc(r->man, ln, ppos, tok);
3450 if (buf->buf[pos] != '\0')
3451 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3452 "%s %s", roff_name[tok], buf->buf + pos);
3453 if (tok == ROFF_nf)
3454 r->man->flags |= ROFF_NOFILL;
3455 else if (tok == ROFF_fi)
3456 r->man->flags &= ~ROFF_NOFILL;
3457 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3458 r->man->next = ROFF_NEXT_SIBLING;
3459 return ROFF_IGN;
3460 }
3461
3462 static int
roff_onearg(ROFF_ARGS)3463 roff_onearg(ROFF_ARGS)
3464 {
3465 struct roff_node *n;
3466 char *cp;
3467 int npos;
3468
3469 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3470 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3471 tok == ROFF_ti))
3472 man_breakscope(r->man, tok);
3473
3474 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3475 r->man->last = roffce_node;
3476 r->man->next = ROFF_NEXT_SIBLING;
3477 }
3478
3479 roff_elem_alloc(r->man, ln, ppos, tok);
3480 n = r->man->last;
3481
3482 cp = buf->buf + pos;
3483 if (*cp != '\0') {
3484 while (*cp != '\0' && *cp != ' ')
3485 cp++;
3486 while (*cp == ' ')
3487 *cp++ = '\0';
3488 if (*cp != '\0')
3489 mandoc_msg(MANDOCERR_ARG_EXCESS,
3490 ln, (int)(cp - buf->buf),
3491 "%s ... %s", roff_name[tok], cp);
3492 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3493 }
3494
3495 if (tok == ROFF_ce || tok == ROFF_rj) {
3496 if (r->man->last->type == ROFFT_ELEM) {
3497 roff_word_alloc(r->man, ln, pos, "1");
3498 r->man->last->flags |= NODE_NOSRC;
3499 }
3500 npos = 0;
3501 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3502 &roffce_lines, 0) == 0) {
3503 mandoc_msg(MANDOCERR_CE_NONUM,
3504 ln, pos, "ce %s", buf->buf + pos);
3505 roffce_lines = 1;
3506 }
3507 if (roffce_lines < 1) {
3508 r->man->last = r->man->last->parent;
3509 roffce_node = NULL;
3510 roffce_lines = 0;
3511 } else
3512 roffce_node = r->man->last->parent;
3513 } else {
3514 n->flags |= NODE_VALID | NODE_ENDED;
3515 r->man->last = n;
3516 }
3517 n->flags |= NODE_LINE;
3518 r->man->next = ROFF_NEXT_SIBLING;
3519 return ROFF_IGN;
3520 }
3521
3522 static int
roff_manyarg(ROFF_ARGS)3523 roff_manyarg(ROFF_ARGS)
3524 {
3525 struct roff_node *n;
3526 char *sp, *ep;
3527
3528 roff_elem_alloc(r->man, ln, ppos, tok);
3529 n = r->man->last;
3530
3531 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3532 while (*ep != '\0' && *ep != ' ')
3533 ep++;
3534 while (*ep == ' ')
3535 *ep++ = '\0';
3536 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3537 }
3538
3539 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3540 r->man->last = n;
3541 r->man->next = ROFF_NEXT_SIBLING;
3542 return ROFF_IGN;
3543 }
3544
3545 static int
roff_als(ROFF_ARGS)3546 roff_als(ROFF_ARGS)
3547 {
3548 char *oldn, *newn, *end, *value;
3549 size_t oldsz, newsz, valsz;
3550
3551 newn = oldn = buf->buf + pos;
3552 if (*newn == '\0')
3553 return ROFF_IGN;
3554
3555 newsz = roff_getname(r, &oldn, ln, pos);
3556 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3557 return ROFF_IGN;
3558
3559 end = oldn;
3560 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3561 if (oldsz == 0)
3562 return ROFF_IGN;
3563
3564 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3565 (int)oldsz, oldn);
3566 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3567 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3568 free(value);
3569 return ROFF_IGN;
3570 }
3571
3572 /*
3573 * The .break request only makes sense inside conditionals,
3574 * and that case is already handled in roff_cond_sub().
3575 */
3576 static int
roff_break(ROFF_ARGS)3577 roff_break(ROFF_ARGS)
3578 {
3579 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3580 return ROFF_IGN;
3581 }
3582
3583 static int
roff_cc(ROFF_ARGS)3584 roff_cc(ROFF_ARGS)
3585 {
3586 const char *p;
3587
3588 p = buf->buf + pos;
3589
3590 if (*p == '\0' || (r->control = *p++) == '.')
3591 r->control = '\0';
3592
3593 if (*p != '\0')
3594 mandoc_msg(MANDOCERR_ARG_EXCESS,
3595 ln, p - buf->buf, "cc ... %s", p);
3596
3597 return ROFF_IGN;
3598 }
3599
3600 static int
roff_char(ROFF_ARGS)3601 roff_char(ROFF_ARGS)
3602 {
3603 const char *p, *kp, *vp;
3604 size_t ksz, vsz;
3605 int font;
3606
3607 /* Parse the character to be replaced. */
3608
3609 kp = buf->buf + pos;
3610 p = kp + 1;
3611 if (*kp == '\0' || (*kp == '\\' &&
3612 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3613 (*p != ' ' && *p != '\0')) {
3614 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3615 return ROFF_IGN;
3616 }
3617 ksz = p - kp;
3618 while (*p == ' ')
3619 p++;
3620
3621 /*
3622 * If the replacement string contains a font escape sequence,
3623 * we have to restore the font at the end.
3624 */
3625
3626 vp = p;
3627 vsz = strlen(p);
3628 font = 0;
3629 while (*p != '\0') {
3630 if (*p++ != '\\')
3631 continue;
3632 switch (mandoc_escape(&p, NULL, NULL)) {
3633 case ESCAPE_FONT:
3634 case ESCAPE_FONTROMAN:
3635 case ESCAPE_FONTITALIC:
3636 case ESCAPE_FONTBOLD:
3637 case ESCAPE_FONTBI:
3638 case ESCAPE_FONTCR:
3639 case ESCAPE_FONTCB:
3640 case ESCAPE_FONTCI:
3641 case ESCAPE_FONTPREV:
3642 font++;
3643 break;
3644 default:
3645 break;
3646 }
3647 }
3648 if (font > 1)
3649 mandoc_msg(MANDOCERR_CHAR_FONT,
3650 ln, (int)(vp - buf->buf), "%s", vp);
3651
3652 /*
3653 * Approximate the effect of .char using the .tr tables.
3654 * XXX In groff, .char and .tr interact differently.
3655 */
3656
3657 if (ksz == 1) {
3658 if (r->xtab == NULL)
3659 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3660 assert((unsigned int)*kp < 128);
3661 free(r->xtab[(int)*kp].p);
3662 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3663 "%s%s", vp, font ? "\fP" : "");
3664 } else {
3665 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3666 if (font)
3667 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3668 }
3669 return ROFF_IGN;
3670 }
3671
3672 static int
roff_ec(ROFF_ARGS)3673 roff_ec(ROFF_ARGS)
3674 {
3675 const char *p;
3676
3677 p = buf->buf + pos;
3678 if (*p == '\0')
3679 r->escape = '\\';
3680 else {
3681 r->escape = *p;
3682 if (*++p != '\0')
3683 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3684 (int)(p - buf->buf), "ec ... %s", p);
3685 }
3686 return ROFF_IGN;
3687 }
3688
3689 static int
roff_eo(ROFF_ARGS)3690 roff_eo(ROFF_ARGS)
3691 {
3692 r->escape = '\0';
3693 if (buf->buf[pos] != '\0')
3694 mandoc_msg(MANDOCERR_ARG_SKIP,
3695 ln, pos, "eo %s", buf->buf + pos);
3696 return ROFF_IGN;
3697 }
3698
3699 static int
roff_mc(ROFF_ARGS)3700 roff_mc(ROFF_ARGS)
3701 {
3702 struct roff_node *n;
3703 char *cp;
3704
3705 /* Parse the first argument. */
3706
3707 cp = buf->buf + pos;
3708 if (*cp != '\0')
3709 cp++;
3710 if (buf->buf[pos] == '\\') {
3711 switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
3712 case ESCAPE_SPECIAL:
3713 case ESCAPE_UNICODE:
3714 case ESCAPE_NUMBERED:
3715 break;
3716 default:
3717 *cp = '\0';
3718 mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
3719 "mc %s", buf->buf + pos);
3720 buf->buf[pos] = '\0';
3721 break;
3722 }
3723 }
3724
3725 /* Ignore additional arguments. */
3726
3727 while (*cp == ' ')
3728 *cp++ = '\0';
3729 if (*cp != '\0') {
3730 mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
3731 "mc ... %s", cp);
3732 *cp = '\0';
3733 }
3734
3735 /* Create the .mc node. */
3736
3737 roff_elem_alloc(r->man, ln, ppos, tok);
3738 n = r->man->last;
3739 if (buf->buf[pos] != '\0')
3740 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3741 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3742 r->man->last = n;
3743 r->man->next = ROFF_NEXT_SIBLING;
3744 return ROFF_IGN;
3745 }
3746
3747 static int
roff_nop(ROFF_ARGS)3748 roff_nop(ROFF_ARGS)
3749 {
3750 while (buf->buf[pos] == ' ')
3751 pos++;
3752 *offs = pos;
3753 return ROFF_RERUN;
3754 }
3755
3756 static int
roff_tr(ROFF_ARGS)3757 roff_tr(ROFF_ARGS)
3758 {
3759 const char *p, *first, *second;
3760 size_t fsz, ssz;
3761
3762 p = buf->buf + pos;
3763
3764 if (*p == '\0') {
3765 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3766 return ROFF_IGN;
3767 }
3768
3769 while (*p != '\0') {
3770 fsz = ssz = 1;
3771
3772 first = p++;
3773 if (*first == '\\') {
3774 if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3775 return ROFF_IGN;
3776 fsz = (size_t)(p - first);
3777 }
3778
3779 second = p++;
3780 if (*second == '\\') {
3781 if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
3782 return ROFF_IGN;
3783 ssz = (size_t)(p - second);
3784 } else if (*second == '\0') {
3785 mandoc_msg(MANDOCERR_TR_ODD, ln,
3786 (int)(first - buf->buf), "tr %s", first);
3787 second = " ";
3788 p--;
3789 }
3790
3791 if (fsz > 1) {
3792 roff_setstrn(&r->xmbtab, first, fsz,
3793 second, ssz, 0);
3794 continue;
3795 }
3796
3797 if (r->xtab == NULL)
3798 r->xtab = mandoc_calloc(128,
3799 sizeof(struct roffstr));
3800
3801 free(r->xtab[(int)*first].p);
3802 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3803 r->xtab[(int)*first].sz = ssz;
3804 }
3805
3806 return ROFF_IGN;
3807 }
3808
3809 /*
3810 * Implementation of the .return request.
3811 * There is no need to call roff_userret() from here.
3812 * The read module will call that after rewinding the reader stack
3813 * to the place from where the current macro was called.
3814 */
3815 static int
roff_return(ROFF_ARGS)3816 roff_return(ROFF_ARGS)
3817 {
3818 if (r->mstackpos >= 0)
3819 return ROFF_IGN | ROFF_USERRET;
3820
3821 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3822 return ROFF_IGN;
3823 }
3824
3825 static int
roff_rn(ROFF_ARGS)3826 roff_rn(ROFF_ARGS)
3827 {
3828 const char *value;
3829 char *oldn, *newn, *end;
3830 size_t oldsz, newsz;
3831 int deftype;
3832
3833 oldn = newn = buf->buf + pos;
3834 if (*oldn == '\0')
3835 return ROFF_IGN;
3836
3837 oldsz = roff_getname(r, &newn, ln, pos);
3838 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3839 return ROFF_IGN;
3840
3841 end = newn;
3842 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3843 if (newsz == 0)
3844 return ROFF_IGN;
3845
3846 deftype = ROFFDEF_ANY;
3847 value = roff_getstrn(r, oldn, oldsz, &deftype);
3848 switch (deftype) {
3849 case ROFFDEF_USER:
3850 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3851 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3852 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3853 break;
3854 case ROFFDEF_PRE:
3855 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3856 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3857 break;
3858 case ROFFDEF_REN:
3859 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3860 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3861 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3862 break;
3863 case ROFFDEF_STD:
3864 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3865 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3866 break;
3867 default:
3868 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3869 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3870 break;
3871 }
3872 return ROFF_IGN;
3873 }
3874
3875 static int
roff_shift(ROFF_ARGS)3876 roff_shift(ROFF_ARGS)
3877 {
3878 struct mctx *ctx;
3879 int argpos, levels, i;
3880
3881 argpos = pos;
3882 levels = 1;
3883 if (buf->buf[pos] != '\0' &&
3884 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3885 mandoc_msg(MANDOCERR_CE_NONUM,
3886 ln, pos, "shift %s", buf->buf + pos);
3887 levels = 1;
3888 }
3889 if (r->mstackpos < 0) {
3890 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3891 return ROFF_IGN;
3892 }
3893 ctx = r->mstack + r->mstackpos;
3894 if (levels > ctx->argc) {
3895 mandoc_msg(MANDOCERR_SHIFT,
3896 ln, argpos, "%d, but max is %d", levels, ctx->argc);
3897 levels = ctx->argc;
3898 }
3899 if (levels < 0) {
3900 mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
3901 levels = 0;
3902 }
3903 if (levels == 0)
3904 return ROFF_IGN;
3905 for (i = 0; i < levels; i++)
3906 free(ctx->argv[i]);
3907 ctx->argc -= levels;
3908 for (i = 0; i < ctx->argc; i++)
3909 ctx->argv[i] = ctx->argv[i + levels];
3910 return ROFF_IGN;
3911 }
3912
3913 static int
roff_so(ROFF_ARGS)3914 roff_so(ROFF_ARGS)
3915 {
3916 char *name, *cp;
3917
3918 name = buf->buf + pos;
3919 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3920
3921 /*
3922 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3923 * opening anything that's not in our cwd or anything beneath
3924 * it. Thus, explicitly disallow traversing up the file-system
3925 * or using absolute paths.
3926 */
3927
3928 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3929 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3930 buf->sz = mandoc_asprintf(&cp,
3931 ".sp\nSee the file %s.\n.sp", name) + 1;
3932 free(buf->buf);
3933 buf->buf = cp;
3934 *offs = 0;
3935 return ROFF_REPARSE;
3936 }
3937
3938 *offs = pos;
3939 return ROFF_SO;
3940 }
3941
3942 /* --- user defined strings and macros ------------------------------------ */
3943
3944 static int
roff_userdef(ROFF_ARGS)3945 roff_userdef(ROFF_ARGS)
3946 {
3947 struct mctx *ctx;
3948 char *arg, *ap, *dst, *src;
3949 size_t sz;
3950
3951 /* If the macro is empty, ignore it altogether. */
3952
3953 if (*r->current_string == '\0')
3954 return ROFF_IGN;
3955
3956 /* Initialize a new macro stack context. */
3957
3958 if (++r->mstackpos == r->mstacksz) {
3959 r->mstack = mandoc_recallocarray(r->mstack,
3960 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3961 r->mstacksz += 8;
3962 }
3963 ctx = r->mstack + r->mstackpos;
3964 ctx->argc = 0;
3965
3966 /*
3967 * Collect pointers to macro argument strings,
3968 * NUL-terminating them and escaping quotes.
3969 */
3970
3971 src = buf->buf + pos;
3972 while (*src != '\0') {
3973 if (ctx->argc == ctx->argsz) {
3974 ctx->argsz += 8;
3975 ctx->argv = mandoc_reallocarray(ctx->argv,
3976 ctx->argsz, sizeof(*ctx->argv));
3977 }
3978 arg = roff_getarg(r, &src, ln, &pos);
3979 sz = 1; /* For the terminating NUL. */
3980 for (ap = arg; *ap != '\0'; ap++)
3981 sz += *ap == '"' ? 4 : 1;
3982 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3983 for (ap = arg; *ap != '\0'; ap++) {
3984 if (*ap == '"') {
3985 memcpy(dst, "\\(dq", 4);
3986 dst += 4;
3987 } else
3988 *dst++ = *ap;
3989 }
3990 *dst = '\0';
3991 free(arg);
3992 }
3993
3994 /* Replace the macro invocation by the macro definition. */
3995
3996 free(buf->buf);
3997 buf->buf = mandoc_strdup(r->current_string);
3998 buf->sz = strlen(buf->buf) + 1;
3999 *offs = 0;
4000
4001 return buf->buf[buf->sz - 2] == '\n' ?
4002 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
4003 }
4004
4005 /*
4006 * Calling a high-level macro that was renamed with .rn.
4007 * r->current_string has already been set up by roff_parse().
4008 */
4009 static int
roff_renamed(ROFF_ARGS)4010 roff_renamed(ROFF_ARGS)
4011 {
4012 char *nbuf;
4013
4014 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4015 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4016 free(buf->buf);
4017 buf->buf = nbuf;
4018 *offs = 0;
4019 return ROFF_CONT;
4020 }
4021
4022 /*
4023 * Measure the length in bytes of the roff identifier at *cpp
4024 * and advance the pointer to the next word.
4025 */
4026 static size_t
roff_getname(struct roff * r,char ** cpp,int ln,int pos)4027 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4028 {
4029 char *name, *cp;
4030 int namesz, inam, iend;
4031
4032 name = *cpp;
4033 if (*name == '\0')
4034 return 0;
4035
4036 /* Advance cp to the byte after the end of the name. */
4037
4038 cp = name;
4039 namesz = 0;
4040 for (;;) {
4041 if (*cp == '\0')
4042 break;
4043 if (*cp == ' ' || *cp == '\t') {
4044 cp++;
4045 break;
4046 }
4047 if (*cp != '\\') {
4048 if (name + namesz < cp) {
4049 name[namesz] = *cp;
4050 *cp = ' ';
4051 }
4052 namesz++;
4053 cp++;
4054 continue;
4055 }
4056 if (cp[1] == '{' || cp[1] == '}')
4057 break;
4058 if (roff_escape(cp, 0, 0, NULL, &inam,
4059 NULL, NULL, &iend) != ESCAPE_UNDEF) {
4060 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4061 "%.*s%.*s", namesz, name, iend, cp);
4062 cp += iend;
4063 break;
4064 }
4065
4066 /*
4067 * In an identifier, \\, \., \G and so on
4068 * are reduced to \, ., G and so on,
4069 * vaguely similar to copy mode.
4070 */
4071
4072 name[namesz++] = cp[inam];
4073 while (iend--) {
4074 if (cp >= name + namesz)
4075 *cp = ' ';
4076 cp++;
4077 }
4078 }
4079
4080 /* Read past spaces. */
4081
4082 while (*cp == ' ')
4083 cp++;
4084
4085 *cpp = cp;
4086 return namesz;
4087 }
4088
4089 /*
4090 * Store *string into the user-defined string called *name.
4091 * To clear an existing entry, call with (*r, *name, NULL, 0).
4092 * append == 0: replace mode
4093 * append == 1: single-line append mode
4094 * append == 2: multiline append mode, append '\n' after each call
4095 */
4096 static void
roff_setstr(struct roff * r,const char * name,const char * string,int append)4097 roff_setstr(struct roff *r, const char *name, const char *string,
4098 int append)
4099 {
4100 size_t namesz;
4101
4102 namesz = strlen(name);
4103 roff_setstrn(&r->strtab, name, namesz, string,
4104 string ? strlen(string) : 0, append);
4105 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4106 }
4107
4108 static void
roff_setstrn(struct roffkv ** r,const char * name,size_t namesz,const char * string,size_t stringsz,int append)4109 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4110 const char *string, size_t stringsz, int append)
4111 {
4112 struct roffkv *n;
4113 char *c;
4114 int i;
4115 size_t oldch, newch;
4116
4117 /* Search for an existing string with the same name. */
4118 n = *r;
4119
4120 while (n && (namesz != n->key.sz ||
4121 strncmp(n->key.p, name, namesz)))
4122 n = n->next;
4123
4124 if (NULL == n) {
4125 /* Create a new string table entry. */
4126 n = mandoc_malloc(sizeof(struct roffkv));
4127 n->key.p = mandoc_strndup(name, namesz);
4128 n->key.sz = namesz;
4129 n->val.p = NULL;
4130 n->val.sz = 0;
4131 n->next = *r;
4132 *r = n;
4133 } else if (0 == append) {
4134 free(n->val.p);
4135 n->val.p = NULL;
4136 n->val.sz = 0;
4137 }
4138
4139 if (NULL == string)
4140 return;
4141
4142 /*
4143 * One additional byte for the '\n' in multiline mode,
4144 * and one for the terminating '\0'.
4145 */
4146 newch = stringsz + (1 < append ? 2u : 1u);
4147
4148 if (NULL == n->val.p) {
4149 n->val.p = mandoc_malloc(newch);
4150 *n->val.p = '\0';
4151 oldch = 0;
4152 } else {
4153 oldch = n->val.sz;
4154 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4155 }
4156
4157 /* Skip existing content in the destination buffer. */
4158 c = n->val.p + (int)oldch;
4159
4160 /* Append new content to the destination buffer. */
4161 i = 0;
4162 while (i < (int)stringsz) {
4163 /*
4164 * Rudimentary roff copy mode:
4165 * Handle escaped backslashes.
4166 */
4167 if ('\\' == string[i] && '\\' == string[i + 1])
4168 i++;
4169 *c++ = string[i++];
4170 }
4171
4172 /* Append terminating bytes. */
4173 if (1 < append)
4174 *c++ = '\n';
4175
4176 *c = '\0';
4177 n->val.sz = (int)(c - n->val.p);
4178 }
4179
4180 static const char *
roff_getstrn(struct roff * r,const char * name,size_t len,int * deftype)4181 roff_getstrn(struct roff *r, const char *name, size_t len,
4182 int *deftype)
4183 {
4184 const struct roffkv *n;
4185 int found, i;
4186 enum roff_tok tok;
4187
4188 found = 0;
4189 for (n = r->strtab; n != NULL; n = n->next) {
4190 if (strncmp(name, n->key.p, len) != 0 ||
4191 n->key.p[len] != '\0' || n->val.p == NULL)
4192 continue;
4193 if (*deftype & ROFFDEF_USER) {
4194 *deftype = ROFFDEF_USER;
4195 return n->val.p;
4196 } else {
4197 found = 1;
4198 break;
4199 }
4200 }
4201 for (n = r->rentab; n != NULL; n = n->next) {
4202 if (strncmp(name, n->key.p, len) != 0 ||
4203 n->key.p[len] != '\0' || n->val.p == NULL)
4204 continue;
4205 if (*deftype & ROFFDEF_REN) {
4206 *deftype = ROFFDEF_REN;
4207 return n->val.p;
4208 } else {
4209 found = 1;
4210 break;
4211 }
4212 }
4213 for (i = 0; i < PREDEFS_MAX; i++) {
4214 if (strncmp(name, predefs[i].name, len) != 0 ||
4215 predefs[i].name[len] != '\0')
4216 continue;
4217 if (*deftype & ROFFDEF_PRE) {
4218 *deftype = ROFFDEF_PRE;
4219 return predefs[i].str;
4220 } else {
4221 found = 1;
4222 break;
4223 }
4224 }
4225 if (r->man->meta.macroset != MACROSET_MAN) {
4226 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4227 if (strncmp(name, roff_name[tok], len) != 0 ||
4228 roff_name[tok][len] != '\0')
4229 continue;
4230 if (*deftype & ROFFDEF_STD) {
4231 *deftype = ROFFDEF_STD;
4232 return NULL;
4233 } else {
4234 found = 1;
4235 break;
4236 }
4237 }
4238 }
4239 if (r->man->meta.macroset != MACROSET_MDOC) {
4240 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4241 if (strncmp(name, roff_name[tok], len) != 0 ||
4242 roff_name[tok][len] != '\0')
4243 continue;
4244 if (*deftype & ROFFDEF_STD) {
4245 *deftype = ROFFDEF_STD;
4246 return NULL;
4247 } else {
4248 found = 1;
4249 break;
4250 }
4251 }
4252 }
4253
4254 if (found == 0 && *deftype != ROFFDEF_ANY) {
4255 if (*deftype & ROFFDEF_REN) {
4256 /*
4257 * This might still be a request,
4258 * so do not treat it as undefined yet.
4259 */
4260 *deftype = ROFFDEF_UNDEF;
4261 return NULL;
4262 }
4263
4264 /* Using an undefined string defines it to be empty. */
4265
4266 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4267 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4268 }
4269
4270 *deftype = 0;
4271 return NULL;
4272 }
4273
4274 static void
roff_freestr(struct roffkv * r)4275 roff_freestr(struct roffkv *r)
4276 {
4277 struct roffkv *n, *nn;
4278
4279 for (n = r; n; n = nn) {
4280 free(n->key.p);
4281 free(n->val.p);
4282 nn = n->next;
4283 free(n);
4284 }
4285 }
4286
4287 /* --- accessors and utility functions ------------------------------------ */
4288
4289 /*
4290 * Duplicate an input string, making the appropriate character
4291 * conversations (as stipulated by `tr') along the way.
4292 * Returns a heap-allocated string with all the replacements made.
4293 */
4294 char *
roff_strdup(const struct roff * r,const char * p)4295 roff_strdup(const struct roff *r, const char *p)
4296 {
4297 const struct roffkv *cp;
4298 char *res;
4299 const char *pp;
4300 size_t ssz, sz;
4301 enum mandoc_esc esc;
4302
4303 if (NULL == r->xmbtab && NULL == r->xtab)
4304 return mandoc_strdup(p);
4305 else if ('\0' == *p)
4306 return mandoc_strdup("");
4307
4308 /*
4309 * Step through each character looking for term matches
4310 * (remember that a `tr' can be invoked with an escape, which is
4311 * a glyph but the escape is multi-character).
4312 * We only do this if the character hash has been initialised
4313 * and the string is >0 length.
4314 */
4315
4316 res = NULL;
4317 ssz = 0;
4318
4319 while ('\0' != *p) {
4320 assert((unsigned int)*p < 128);
4321 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4322 sz = r->xtab[(int)*p].sz;
4323 res = mandoc_realloc(res, ssz + sz + 1);
4324 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4325 ssz += sz;
4326 p++;
4327 continue;
4328 } else if ('\\' != *p) {
4329 res = mandoc_realloc(res, ssz + 2);
4330 res[ssz++] = *p++;
4331 continue;
4332 }
4333
4334 /* Search for term matches. */
4335 for (cp = r->xmbtab; cp; cp = cp->next)
4336 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4337 break;
4338
4339 if (NULL != cp) {
4340 /*
4341 * A match has been found.
4342 * Append the match to the array and move
4343 * forward by its keysize.
4344 */
4345 res = mandoc_realloc(res,
4346 ssz + cp->val.sz + 1);
4347 memcpy(res + ssz, cp->val.p, cp->val.sz);
4348 ssz += cp->val.sz;
4349 p += (int)cp->key.sz;
4350 continue;
4351 }
4352
4353 /*
4354 * Handle escapes carefully: we need to copy
4355 * over just the escape itself, or else we might
4356 * do replacements within the escape itself.
4357 * Make sure to pass along the bogus string.
4358 */
4359 pp = p++;
4360 esc = mandoc_escape(&p, NULL, NULL);
4361 if (ESCAPE_ERROR == esc) {
4362 sz = strlen(pp);
4363 res = mandoc_realloc(res, ssz + sz + 1);
4364 memcpy(res + ssz, pp, sz);
4365 break;
4366 }
4367 /*
4368 * We bail out on bad escapes.
4369 * No need to warn: we already did so when
4370 * roff_expand() was called.
4371 */
4372 sz = (int)(p - pp);
4373 res = mandoc_realloc(res, ssz + sz + 1);
4374 memcpy(res + ssz, pp, sz);
4375 ssz += sz;
4376 }
4377
4378 res[(int)ssz] = '\0';
4379 return res;
4380 }
4381
4382 int
roff_getformat(const struct roff * r)4383 roff_getformat(const struct roff *r)
4384 {
4385
4386 return r->format;
4387 }
4388
4389 /*
4390 * Find out whether a line is a macro line or not.
4391 * If it is, adjust the current position and return one; if it isn't,
4392 * return zero and don't change the current position.
4393 * If the control character has been set with `.cc', then let that grain
4394 * precedence.
4395 * This is slightly contrary to groff, where using the non-breaking
4396 * control character when `cc' has been invoked will cause the
4397 * non-breaking macro contents to be printed verbatim.
4398 */
4399 int
roff_getcontrol(const struct roff * r,const char * cp,int * ppos)4400 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4401 {
4402 int pos;
4403
4404 pos = *ppos;
4405
4406 if (r->control != '\0' && cp[pos] == r->control)
4407 pos++;
4408 else if (r->control != '\0')
4409 return 0;
4410 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4411 pos += 2;
4412 else if ('.' == cp[pos] || '\'' == cp[pos])
4413 pos++;
4414 else
4415 return 0;
4416
4417 while (' ' == cp[pos] || '\t' == cp[pos])
4418 pos++;
4419
4420 *ppos = pos;
4421 return 1;
4422 }
4423