1 /* $Id: roff.c,v 1.378 2021/08/10 12:55:04 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the roff(7) parser for mandoc(1).
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <limits.h>
27 #include <stddef.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc_ohash.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mandoc_parse.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 #include "tbl_parse.h"
41 #include "eqn_parse.h"
42
43 /*
44 * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
45 * that an escape sequence resulted from copy-in processing and
46 * needs to be checked or interpolated. As it is used nowhere
47 * else, it is defined here rather than in a header file.
48 */
49 #define ASCII_ESC 27
50
51 /* Maximum number of string expansions per line, to break infinite loops. */
52 #define EXPAND_LIMIT 1000
53
54 /* Types of definitions of macros and strings. */
55 #define ROFFDEF_USER (1 << 1) /* User-defined. */
56 #define ROFFDEF_PRE (1 << 2) /* Predefined. */
57 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */
58 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */
59 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \
60 ROFFDEF_REN | ROFFDEF_STD)
61 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */
62
63 /* --- data types --------------------------------------------------------- */
64
65 /*
66 * An incredibly-simple string buffer.
67 */
68 struct roffstr {
69 char *p; /* nil-terminated buffer */
70 size_t sz; /* saved strlen(p) */
71 };
72
73 /*
74 * A key-value roffstr pair as part of a singly-linked list.
75 */
76 struct roffkv {
77 struct roffstr key;
78 struct roffstr val;
79 struct roffkv *next; /* next in list */
80 };
81
82 /*
83 * A single number register as part of a singly-linked list.
84 */
85 struct roffreg {
86 struct roffstr key;
87 int val;
88 int step;
89 struct roffreg *next;
90 };
91
92 /*
93 * Association of request and macro names with token IDs.
94 */
95 struct roffreq {
96 enum roff_tok tok;
97 char name[];
98 };
99
100 /*
101 * A macro processing context.
102 * More than one is needed when macro calls are nested.
103 */
104 struct mctx {
105 char **argv;
106 int argc;
107 int argsz;
108 };
109
110 struct roff {
111 struct roff_man *man; /* mdoc or man parser */
112 struct roffnode *last; /* leaf of stack */
113 struct mctx *mstack; /* stack of macro contexts */
114 int *rstack; /* stack of inverted `ie' values */
115 struct ohash *reqtab; /* request lookup table */
116 struct roffreg *regtab; /* number registers */
117 struct roffkv *strtab; /* user-defined strings & macros */
118 struct roffkv *rentab; /* renamed strings & macros */
119 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */
120 struct roffstr *xtab; /* single-byte trans table (`tr') */
121 const char *current_string; /* value of last called user macro */
122 struct tbl_node *first_tbl; /* first table parsed */
123 struct tbl_node *last_tbl; /* last table parsed */
124 struct tbl_node *tbl; /* current table being parsed */
125 struct eqn_node *last_eqn; /* equation parser */
126 struct eqn_node *eqn; /* active equation parser */
127 int eqn_inline; /* current equation is inline */
128 int options; /* parse options */
129 int mstacksz; /* current size of mstack */
130 int mstackpos; /* position in mstack */
131 int rstacksz; /* current size limit of rstack */
132 int rstackpos; /* position in rstack */
133 int format; /* current file in mdoc or man format */
134 char control; /* control character */
135 char escape; /* escape character */
136 };
137
138 /*
139 * A macro definition, condition, or ignored block.
140 */
141 struct roffnode {
142 enum roff_tok tok; /* type of node */
143 struct roffnode *parent; /* up one in stack */
144 int line; /* parse line */
145 int col; /* parse col */
146 char *name; /* node name, e.g. macro name */
147 char *end; /* custom end macro of the block */
148 int endspan; /* scope to: 1=eol 2=next line -1=\} */
149 int rule; /* content is: 1=evaluated 0=skipped */
150 };
151
152 #define ROFF_ARGS struct roff *r, /* parse ctx */ \
153 enum roff_tok tok, /* tok of macro */ \
154 struct buf *buf, /* input buffer */ \
155 int ln, /* parse line */ \
156 int ppos, /* original pos in buffer */ \
157 int pos, /* current pos in buffer */ \
158 int *offs /* reset offset of buffer data */
159
160 typedef int (*roffproc)(ROFF_ARGS);
161
162 struct roffmac {
163 roffproc proc; /* process new macro */
164 roffproc text; /* process as child text of macro */
165 roffproc sub; /* process as child of macro */
166 int flags;
167 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */
168 };
169
170 struct predef {
171 const char *name; /* predefined input name */
172 const char *str; /* replacement symbol */
173 };
174
175 #define PREDEF(__name, __str) \
176 { (__name), (__str) },
177
178 /* --- function prototypes ------------------------------------------------ */
179
180 static int roffnode_cleanscope(struct roff *);
181 static int roffnode_pop(struct roff *);
182 static void roffnode_push(struct roff *, enum roff_tok,
183 const char *, int, int);
184 static void roff_addtbl(struct roff_man *, int, struct tbl_node *);
185 static int roff_als(ROFF_ARGS);
186 static int roff_block(ROFF_ARGS);
187 static int roff_block_text(ROFF_ARGS);
188 static int roff_block_sub(ROFF_ARGS);
189 static int roff_break(ROFF_ARGS);
190 static int roff_cblock(ROFF_ARGS);
191 static int roff_cc(ROFF_ARGS);
192 static int roff_ccond(struct roff *, int, int);
193 static int roff_char(ROFF_ARGS);
194 static int roff_cond(ROFF_ARGS);
195 static int roff_cond_checkend(ROFF_ARGS);
196 static int roff_cond_text(ROFF_ARGS);
197 static int roff_cond_sub(ROFF_ARGS);
198 static int roff_ds(ROFF_ARGS);
199 static int roff_ec(ROFF_ARGS);
200 static int roff_eo(ROFF_ARGS);
201 static int roff_eqndelim(struct roff *, struct buf *, int);
202 static int roff_evalcond(struct roff *, int, char *, int *);
203 static int roff_evalnum(struct roff *, int,
204 const char *, int *, int *, int);
205 static int roff_evalpar(struct roff *, int,
206 const char *, int *, int *, int);
207 static int roff_evalstrcond(const char *, int *);
208 static int roff_expand(struct roff *, struct buf *,
209 int, int, char);
210 static void roff_free1(struct roff *);
211 static void roff_freereg(struct roffreg *);
212 static void roff_freestr(struct roffkv *);
213 static size_t roff_getname(struct roff *, char **, int, int);
214 static int roff_getnum(const char *, int *, int *, int);
215 static int roff_getop(const char *, int *, char *);
216 static int roff_getregn(struct roff *,
217 const char *, size_t, char);
218 static int roff_getregro(const struct roff *,
219 const char *name);
220 static const char *roff_getstrn(struct roff *,
221 const char *, size_t, int *);
222 static int roff_hasregn(const struct roff *,
223 const char *, size_t);
224 static int roff_insec(ROFF_ARGS);
225 static int roff_it(ROFF_ARGS);
226 static int roff_line_ignore(ROFF_ARGS);
227 static void roff_man_alloc1(struct roff_man *);
228 static void roff_man_free1(struct roff_man *);
229 static int roff_manyarg(ROFF_ARGS);
230 static int roff_noarg(ROFF_ARGS);
231 static int roff_nop(ROFF_ARGS);
232 static int roff_nr(ROFF_ARGS);
233 static int roff_onearg(ROFF_ARGS);
234 static enum roff_tok roff_parse(struct roff *, char *, int *,
235 int, int);
236 static int roff_parsetext(struct roff *, struct buf *,
237 int, int *);
238 static int roff_renamed(ROFF_ARGS);
239 static int roff_return(ROFF_ARGS);
240 static int roff_rm(ROFF_ARGS);
241 static int roff_rn(ROFF_ARGS);
242 static int roff_rr(ROFF_ARGS);
243 static void roff_setregn(struct roff *, const char *,
244 size_t, int, char, int);
245 static void roff_setstr(struct roff *,
246 const char *, const char *, int);
247 static void roff_setstrn(struct roffkv **, const char *,
248 size_t, const char *, size_t, int);
249 static int roff_shift(ROFF_ARGS);
250 static int roff_so(ROFF_ARGS);
251 static int roff_tr(ROFF_ARGS);
252 static int roff_Dd(ROFF_ARGS);
253 static int roff_TE(ROFF_ARGS);
254 static int roff_TS(ROFF_ARGS);
255 static int roff_EQ(ROFF_ARGS);
256 static int roff_EN(ROFF_ARGS);
257 static int roff_T_(ROFF_ARGS);
258 static int roff_unsupp(ROFF_ARGS);
259 static int roff_userdef(ROFF_ARGS);
260
261 /* --- constant data ------------------------------------------------------ */
262
263 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */
264 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */
265
266 const char *__roff_name[MAN_MAX + 1] = {
267 "br", "ce", "fi", "ft",
268 "ll", "mc", "nf",
269 "po", "rj", "sp",
270 "ta", "ti", NULL,
271 "ab", "ad", "af", "aln",
272 "als", "am", "am1", "ami",
273 "ami1", "as", "as1", "asciify",
274 "backtrace", "bd", "bleedat", "blm",
275 "box", "boxa", "bp", "BP",
276 "break", "breakchar", "brnl", "brp",
277 "brpnl", "c2", "cc",
278 "cf", "cflags", "ch", "char",
279 "chop", "class", "close", "CL",
280 "color", "composite", "continue", "cp",
281 "cropat", "cs", "cu", "da",
282 "dch", "Dd", "de", "de1",
283 "defcolor", "dei", "dei1", "device",
284 "devicem", "di", "do", "ds",
285 "ds1", "dwh", "dt", "ec",
286 "ecr", "ecs", "el", "em",
287 "EN", "eo", "EP", "EQ",
288 "errprint", "ev", "evc", "ex",
289 "fallback", "fam", "fc", "fchar",
290 "fcolor", "fdeferlig", "feature", "fkern",
291 "fl", "flig", "fp", "fps",
292 "fschar", "fspacewidth", "fspecial", "ftr",
293 "fzoom", "gcolor", "hc", "hcode",
294 "hidechar", "hla", "hlm", "hpf",
295 "hpfa", "hpfcode", "hw", "hy",
296 "hylang", "hylen", "hym", "hypp",
297 "hys", "ie", "if", "ig",
298 "index", "it", "itc", "IX",
299 "kern", "kernafter", "kernbefore", "kernpair",
300 "lc", "lc_ctype", "lds", "length",
301 "letadj", "lf", "lg", "lhang",
302 "linetabs", "lnr", "lnrf", "lpfx",
303 "ls", "lsm", "lt",
304 "mediasize", "minss", "mk", "mso",
305 "na", "ne", "nh", "nhychar",
306 "nm", "nn", "nop", "nr",
307 "nrf", "nroff", "ns", "nx",
308 "open", "opena", "os", "output",
309 "padj", "papersize", "pc", "pev",
310 "pi", "PI", "pl", "pm",
311 "pn", "pnr", "ps",
312 "psbb", "pshape", "pso", "ptr",
313 "pvs", "rchar", "rd", "recursionlimit",
314 "return", "rfschar", "rhang",
315 "rm", "rn", "rnn", "rr",
316 "rs", "rt", "schar", "sentchar",
317 "shc", "shift", "sizes", "so",
318 "spacewidth", "special", "spreadwarn", "ss",
319 "sty", "substring", "sv", "sy",
320 "T&", "tc", "TE",
321 "TH", "tkf", "tl",
322 "tm", "tm1", "tmc", "tr",
323 "track", "transchar", "trf", "trimat",
324 "trin", "trnt", "troff", "TS",
325 "uf", "ul", "unformat", "unwatch",
326 "unwatchn", "vpt", "vs", "warn",
327 "warnscale", "watch", "watchlength", "watchn",
328 "wh", "while", "write", "writec",
329 "writem", "xflag", ".", NULL,
330 NULL, "text",
331 "Dd", "Dt", "Os", "Sh",
332 "Ss", "Pp", "D1", "Dl",
333 "Bd", "Ed", "Bl", "El",
334 "It", "Ad", "An", "Ap",
335 "Ar", "Cd", "Cm", "Dv",
336 "Er", "Ev", "Ex", "Fa",
337 "Fd", "Fl", "Fn", "Ft",
338 "Ic", "In", "Li", "Nd",
339 "Nm", "Op", "Ot", "Pa",
340 "Rv", "St", "Va", "Vt",
341 "Xr", "%A", "%B", "%D",
342 "%I", "%J", "%N", "%O",
343 "%P", "%R", "%T", "%V",
344 "Ac", "Ao", "Aq", "At",
345 "Bc", "Bf", "Bo", "Bq",
346 "Bsx", "Bx", "Db", "Dc",
347 "Do", "Dq", "Ec", "Ef",
348 "Em", "Eo", "Fx", "Ms",
349 "No", "Ns", "Nx", "Ox",
350 "Pc", "Pf", "Po", "Pq",
351 "Qc", "Ql", "Qo", "Qq",
352 "Re", "Rs", "Sc", "So",
353 "Sq", "Sm", "Sx", "Sy",
354 "Tn", "Ux", "Xc", "Xo",
355 "Fo", "Fc", "Oo", "Oc",
356 "Bk", "Ek", "Bt", "Hf",
357 "Fr", "Ud", "Lb", "Lp",
358 "Lk", "Mt", "Brq", "Bro",
359 "Brc", "%C", "Es", "En",
360 "Dx", "%Q", "%U", "Ta",
361 "Tg", NULL,
362 "TH", "SH", "SS", "TP",
363 "TQ",
364 "LP", "PP", "P", "IP",
365 "HP", "SM", "SB", "BI",
366 "IB", "BR", "RB", "R",
367 "B", "I", "IR", "RI",
368 "RE", "RS", "DT", "UC",
369 "PD", "AT", "in",
370 "SY", "YS", "OP",
371 "EX", "EE", "UR",
372 "UE", "MT", "ME", NULL
373 };
374 const char *const *roff_name = __roff_name;
375
376 static struct roffmac roffs[TOKEN_NONE] = {
377 { roff_noarg, NULL, NULL, 0 }, /* br */
378 { roff_onearg, NULL, NULL, 0 }, /* ce */
379 { roff_noarg, NULL, NULL, 0 }, /* fi */
380 { roff_onearg, NULL, NULL, 0 }, /* ft */
381 { roff_onearg, NULL, NULL, 0 }, /* ll */
382 { roff_onearg, NULL, NULL, 0 }, /* mc */
383 { roff_noarg, NULL, NULL, 0 }, /* nf */
384 { roff_onearg, NULL, NULL, 0 }, /* po */
385 { roff_onearg, NULL, NULL, 0 }, /* rj */
386 { roff_onearg, NULL, NULL, 0 }, /* sp */
387 { roff_manyarg, NULL, NULL, 0 }, /* ta */
388 { roff_onearg, NULL, NULL, 0 }, /* ti */
389 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */
390 { roff_unsupp, NULL, NULL, 0 }, /* ab */
391 { roff_line_ignore, NULL, NULL, 0 }, /* ad */
392 { roff_line_ignore, NULL, NULL, 0 }, /* af */
393 { roff_unsupp, NULL, NULL, 0 }, /* aln */
394 { roff_als, NULL, NULL, 0 }, /* als */
395 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */
396 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */
397 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */
398 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */
399 { roff_ds, NULL, NULL, 0 }, /* as */
400 { roff_ds, NULL, NULL, 0 }, /* as1 */
401 { roff_unsupp, NULL, NULL, 0 }, /* asciify */
402 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */
403 { roff_line_ignore, NULL, NULL, 0 }, /* bd */
404 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */
405 { roff_unsupp, NULL, NULL, 0 }, /* blm */
406 { roff_unsupp, NULL, NULL, 0 }, /* box */
407 { roff_unsupp, NULL, NULL, 0 }, /* boxa */
408 { roff_line_ignore, NULL, NULL, 0 }, /* bp */
409 { roff_unsupp, NULL, NULL, 0 }, /* BP */
410 { roff_break, NULL, NULL, 0 }, /* break */
411 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */
412 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */
413 { roff_noarg, NULL, NULL, 0 }, /* brp */
414 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */
415 { roff_unsupp, NULL, NULL, 0 }, /* c2 */
416 { roff_cc, NULL, NULL, 0 }, /* cc */
417 { roff_insec, NULL, NULL, 0 }, /* cf */
418 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */
419 { roff_line_ignore, NULL, NULL, 0 }, /* ch */
420 { roff_char, NULL, NULL, 0 }, /* char */
421 { roff_unsupp, NULL, NULL, 0 }, /* chop */
422 { roff_line_ignore, NULL, NULL, 0 }, /* class */
423 { roff_insec, NULL, NULL, 0 }, /* close */
424 { roff_unsupp, NULL, NULL, 0 }, /* CL */
425 { roff_line_ignore, NULL, NULL, 0 }, /* color */
426 { roff_unsupp, NULL, NULL, 0 }, /* composite */
427 { roff_unsupp, NULL, NULL, 0 }, /* continue */
428 { roff_line_ignore, NULL, NULL, 0 }, /* cp */
429 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */
430 { roff_line_ignore, NULL, NULL, 0 }, /* cs */
431 { roff_line_ignore, NULL, NULL, 0 }, /* cu */
432 { roff_unsupp, NULL, NULL, 0 }, /* da */
433 { roff_unsupp, NULL, NULL, 0 }, /* dch */
434 { roff_Dd, NULL, NULL, 0 }, /* Dd */
435 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */
436 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */
437 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */
438 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */
439 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */
440 { roff_unsupp, NULL, NULL, 0 }, /* device */
441 { roff_unsupp, NULL, NULL, 0 }, /* devicem */
442 { roff_unsupp, NULL, NULL, 0 }, /* di */
443 { roff_unsupp, NULL, NULL, 0 }, /* do */
444 { roff_ds, NULL, NULL, 0 }, /* ds */
445 { roff_ds, NULL, NULL, 0 }, /* ds1 */
446 { roff_unsupp, NULL, NULL, 0 }, /* dwh */
447 { roff_unsupp, NULL, NULL, 0 }, /* dt */
448 { roff_ec, NULL, NULL, 0 }, /* ec */
449 { roff_unsupp, NULL, NULL, 0 }, /* ecr */
450 { roff_unsupp, NULL, NULL, 0 }, /* ecs */
451 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */
452 { roff_unsupp, NULL, NULL, 0 }, /* em */
453 { roff_EN, NULL, NULL, 0 }, /* EN */
454 { roff_eo, NULL, NULL, 0 }, /* eo */
455 { roff_unsupp, NULL, NULL, 0 }, /* EP */
456 { roff_EQ, NULL, NULL, 0 }, /* EQ */
457 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */
458 { roff_unsupp, NULL, NULL, 0 }, /* ev */
459 { roff_unsupp, NULL, NULL, 0 }, /* evc */
460 { roff_unsupp, NULL, NULL, 0 }, /* ex */
461 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */
462 { roff_line_ignore, NULL, NULL, 0 }, /* fam */
463 { roff_unsupp, NULL, NULL, 0 }, /* fc */
464 { roff_unsupp, NULL, NULL, 0 }, /* fchar */
465 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */
466 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */
467 { roff_line_ignore, NULL, NULL, 0 }, /* feature */
468 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */
469 { roff_line_ignore, NULL, NULL, 0 }, /* fl */
470 { roff_line_ignore, NULL, NULL, 0 }, /* flig */
471 { roff_line_ignore, NULL, NULL, 0 }, /* fp */
472 { roff_line_ignore, NULL, NULL, 0 }, /* fps */
473 { roff_unsupp, NULL, NULL, 0 }, /* fschar */
474 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */
475 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */
476 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */
477 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */
478 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */
479 { roff_line_ignore, NULL, NULL, 0 }, /* hc */
480 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */
481 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */
482 { roff_line_ignore, NULL, NULL, 0 }, /* hla */
483 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */
484 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */
485 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */
486 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */
487 { roff_line_ignore, NULL, NULL, 0 }, /* hw */
488 { roff_line_ignore, NULL, NULL, 0 }, /* hy */
489 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */
490 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */
491 { roff_line_ignore, NULL, NULL, 0 }, /* hym */
492 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */
493 { roff_line_ignore, NULL, NULL, 0 }, /* hys */
494 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */
495 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */
496 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */
497 { roff_unsupp, NULL, NULL, 0 }, /* index */
498 { roff_it, NULL, NULL, 0 }, /* it */
499 { roff_unsupp, NULL, NULL, 0 }, /* itc */
500 { roff_line_ignore, NULL, NULL, 0 }, /* IX */
501 { roff_line_ignore, NULL, NULL, 0 }, /* kern */
502 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */
503 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */
504 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */
505 { roff_unsupp, NULL, NULL, 0 }, /* lc */
506 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */
507 { roff_unsupp, NULL, NULL, 0 }, /* lds */
508 { roff_unsupp, NULL, NULL, 0 }, /* length */
509 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */
510 { roff_insec, NULL, NULL, 0 }, /* lf */
511 { roff_line_ignore, NULL, NULL, 0 }, /* lg */
512 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */
513 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */
514 { roff_unsupp, NULL, NULL, 0 }, /* lnr */
515 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */
516 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */
517 { roff_line_ignore, NULL, NULL, 0 }, /* ls */
518 { roff_unsupp, NULL, NULL, 0 }, /* lsm */
519 { roff_line_ignore, NULL, NULL, 0 }, /* lt */
520 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */
521 { roff_line_ignore, NULL, NULL, 0 }, /* minss */
522 { roff_line_ignore, NULL, NULL, 0 }, /* mk */
523 { roff_insec, NULL, NULL, 0 }, /* mso */
524 { roff_line_ignore, NULL, NULL, 0 }, /* na */
525 { roff_line_ignore, NULL, NULL, 0 }, /* ne */
526 { roff_line_ignore, NULL, NULL, 0 }, /* nh */
527 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */
528 { roff_unsupp, NULL, NULL, 0 }, /* nm */
529 { roff_unsupp, NULL, NULL, 0 }, /* nn */
530 { roff_nop, NULL, NULL, 0 }, /* nop */
531 { roff_nr, NULL, NULL, 0 }, /* nr */
532 { roff_unsupp, NULL, NULL, 0 }, /* nrf */
533 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */
534 { roff_line_ignore, NULL, NULL, 0 }, /* ns */
535 { roff_insec, NULL, NULL, 0 }, /* nx */
536 { roff_insec, NULL, NULL, 0 }, /* open */
537 { roff_insec, NULL, NULL, 0 }, /* opena */
538 { roff_line_ignore, NULL, NULL, 0 }, /* os */
539 { roff_unsupp, NULL, NULL, 0 }, /* output */
540 { roff_line_ignore, NULL, NULL, 0 }, /* padj */
541 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */
542 { roff_line_ignore, NULL, NULL, 0 }, /* pc */
543 { roff_line_ignore, NULL, NULL, 0 }, /* pev */
544 { roff_insec, NULL, NULL, 0 }, /* pi */
545 { roff_unsupp, NULL, NULL, 0 }, /* PI */
546 { roff_line_ignore, NULL, NULL, 0 }, /* pl */
547 { roff_line_ignore, NULL, NULL, 0 }, /* pm */
548 { roff_line_ignore, NULL, NULL, 0 }, /* pn */
549 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */
550 { roff_line_ignore, NULL, NULL, 0 }, /* ps */
551 { roff_unsupp, NULL, NULL, 0 }, /* psbb */
552 { roff_unsupp, NULL, NULL, 0 }, /* pshape */
553 { roff_insec, NULL, NULL, 0 }, /* pso */
554 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */
555 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */
556 { roff_unsupp, NULL, NULL, 0 }, /* rchar */
557 { roff_line_ignore, NULL, NULL, 0 }, /* rd */
558 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */
559 { roff_return, NULL, NULL, 0 }, /* return */
560 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */
561 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */
562 { roff_rm, NULL, NULL, 0 }, /* rm */
563 { roff_rn, NULL, NULL, 0 }, /* rn */
564 { roff_unsupp, NULL, NULL, 0 }, /* rnn */
565 { roff_rr, NULL, NULL, 0 }, /* rr */
566 { roff_line_ignore, NULL, NULL, 0 }, /* rs */
567 { roff_line_ignore, NULL, NULL, 0 }, /* rt */
568 { roff_unsupp, NULL, NULL, 0 }, /* schar */
569 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */
570 { roff_line_ignore, NULL, NULL, 0 }, /* shc */
571 { roff_shift, NULL, NULL, 0 }, /* shift */
572 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */
573 { roff_so, NULL, NULL, 0 }, /* so */
574 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */
575 { roff_line_ignore, NULL, NULL, 0 }, /* special */
576 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */
577 { roff_line_ignore, NULL, NULL, 0 }, /* ss */
578 { roff_line_ignore, NULL, NULL, 0 }, /* sty */
579 { roff_unsupp, NULL, NULL, 0 }, /* substring */
580 { roff_line_ignore, NULL, NULL, 0 }, /* sv */
581 { roff_insec, NULL, NULL, 0 }, /* sy */
582 { roff_T_, NULL, NULL, 0 }, /* T& */
583 { roff_unsupp, NULL, NULL, 0 }, /* tc */
584 { roff_TE, NULL, NULL, 0 }, /* TE */
585 { roff_Dd, NULL, NULL, 0 }, /* TH */
586 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */
587 { roff_unsupp, NULL, NULL, 0 }, /* tl */
588 { roff_line_ignore, NULL, NULL, 0 }, /* tm */
589 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */
590 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */
591 { roff_tr, NULL, NULL, 0 }, /* tr */
592 { roff_line_ignore, NULL, NULL, 0 }, /* track */
593 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */
594 { roff_insec, NULL, NULL, 0 }, /* trf */
595 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */
596 { roff_unsupp, NULL, NULL, 0 }, /* trin */
597 { roff_unsupp, NULL, NULL, 0 }, /* trnt */
598 { roff_line_ignore, NULL, NULL, 0 }, /* troff */
599 { roff_TS, NULL, NULL, 0 }, /* TS */
600 { roff_line_ignore, NULL, NULL, 0 }, /* uf */
601 { roff_line_ignore, NULL, NULL, 0 }, /* ul */
602 { roff_unsupp, NULL, NULL, 0 }, /* unformat */
603 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */
604 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */
605 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */
606 { roff_line_ignore, NULL, NULL, 0 }, /* vs */
607 { roff_line_ignore, NULL, NULL, 0 }, /* warn */
608 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */
609 { roff_line_ignore, NULL, NULL, 0 }, /* watch */
610 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */
611 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */
612 { roff_unsupp, NULL, NULL, 0 }, /* wh */
613 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/
614 { roff_insec, NULL, NULL, 0 }, /* write */
615 { roff_insec, NULL, NULL, 0 }, /* writec */
616 { roff_insec, NULL, NULL, 0 }, /* writem */
617 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */
618 { roff_cblock, NULL, NULL, 0 }, /* . */
619 { roff_renamed, NULL, NULL, 0 },
620 { roff_userdef, NULL, NULL, 0 }
621 };
622
623 /* Array of injected predefined strings. */
624 #define PREDEFS_MAX 38
625 static const struct predef predefs[PREDEFS_MAX] = {
626 #include "predefs.in"
627 };
628
629 static int roffce_lines; /* number of input lines to center */
630 static struct roff_node *roffce_node; /* active request */
631 static int roffit_lines; /* number of lines to delay */
632 static char *roffit_macro; /* nil-terminated macro line */
633
634
635 /* --- request table ------------------------------------------------------ */
636
637 struct ohash *
roffhash_alloc(enum roff_tok mintok,enum roff_tok maxtok)638 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok)
639 {
640 struct ohash *htab;
641 struct roffreq *req;
642 enum roff_tok tok;
643 size_t sz;
644 unsigned int slot;
645
646 htab = mandoc_malloc(sizeof(*htab));
647 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name));
648
649 for (tok = mintok; tok < maxtok; tok++) {
650 if (roff_name[tok] == NULL)
651 continue;
652 sz = strlen(roff_name[tok]);
653 req = mandoc_malloc(sizeof(*req) + sz + 1);
654 req->tok = tok;
655 memcpy(req->name, roff_name[tok], sz + 1);
656 slot = ohash_qlookup(htab, req->name);
657 ohash_insert(htab, slot, req);
658 }
659 return htab;
660 }
661
662 void
roffhash_free(struct ohash * htab)663 roffhash_free(struct ohash *htab)
664 {
665 struct roffreq *req;
666 unsigned int slot;
667
668 if (htab == NULL)
669 return;
670 for (req = ohash_first(htab, &slot); req != NULL;
671 req = ohash_next(htab, &slot))
672 free(req);
673 ohash_delete(htab);
674 free(htab);
675 }
676
677 enum roff_tok
roffhash_find(struct ohash * htab,const char * name,size_t sz)678 roffhash_find(struct ohash *htab, const char *name, size_t sz)
679 {
680 struct roffreq *req;
681 const char *end;
682
683 if (sz) {
684 end = name + sz;
685 req = ohash_find(htab, ohash_qlookupi(htab, name, &end));
686 } else
687 req = ohash_find(htab, ohash_qlookup(htab, name));
688 return req == NULL ? TOKEN_NONE : req->tok;
689 }
690
691 /* --- stack of request blocks -------------------------------------------- */
692
693 /*
694 * Pop the current node off of the stack of roff instructions currently
695 * pending. Return 1 if it is a loop or 0 otherwise.
696 */
697 static int
roffnode_pop(struct roff * r)698 roffnode_pop(struct roff *r)
699 {
700 struct roffnode *p;
701 int inloop;
702
703 p = r->last;
704 inloop = p->tok == ROFF_while;
705 r->last = p->parent;
706 free(p->name);
707 free(p->end);
708 free(p);
709 return inloop;
710 }
711
712 /*
713 * Push a roff node onto the instruction stack. This must later be
714 * removed with roffnode_pop().
715 */
716 static void
roffnode_push(struct roff * r,enum roff_tok tok,const char * name,int line,int col)717 roffnode_push(struct roff *r, enum roff_tok tok, const char *name,
718 int line, int col)
719 {
720 struct roffnode *p;
721
722 p = mandoc_calloc(1, sizeof(struct roffnode));
723 p->tok = tok;
724 if (name)
725 p->name = mandoc_strdup(name);
726 p->parent = r->last;
727 p->line = line;
728 p->col = col;
729 p->rule = p->parent ? p->parent->rule : 0;
730
731 r->last = p;
732 }
733
734 /* --- roff parser state data management ---------------------------------- */
735
736 static void
roff_free1(struct roff * r)737 roff_free1(struct roff *r)
738 {
739 int i;
740
741 tbl_free(r->first_tbl);
742 r->first_tbl = r->last_tbl = r->tbl = NULL;
743
744 eqn_free(r->last_eqn);
745 r->last_eqn = r->eqn = NULL;
746
747 while (r->mstackpos >= 0)
748 roff_userret(r);
749
750 while (r->last)
751 roffnode_pop(r);
752
753 free (r->rstack);
754 r->rstack = NULL;
755 r->rstacksz = 0;
756 r->rstackpos = -1;
757
758 roff_freereg(r->regtab);
759 r->regtab = NULL;
760
761 roff_freestr(r->strtab);
762 roff_freestr(r->rentab);
763 roff_freestr(r->xmbtab);
764 r->strtab = r->rentab = r->xmbtab = NULL;
765
766 if (r->xtab)
767 for (i = 0; i < 128; i++)
768 free(r->xtab[i].p);
769 free(r->xtab);
770 r->xtab = NULL;
771 }
772
773 void
roff_reset(struct roff * r)774 roff_reset(struct roff *r)
775 {
776 roff_free1(r);
777 r->options |= MPARSE_COMMENT;
778 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN);
779 r->control = '\0';
780 r->escape = '\\';
781 roffce_lines = 0;
782 roffce_node = NULL;
783 roffit_lines = 0;
784 roffit_macro = NULL;
785 }
786
787 void
roff_free(struct roff * r)788 roff_free(struct roff *r)
789 {
790 int i;
791
792 roff_free1(r);
793 for (i = 0; i < r->mstacksz; i++)
794 free(r->mstack[i].argv);
795 free(r->mstack);
796 roffhash_free(r->reqtab);
797 free(r);
798 }
799
800 struct roff *
roff_alloc(int options)801 roff_alloc(int options)
802 {
803 struct roff *r;
804
805 r = mandoc_calloc(1, sizeof(struct roff));
806 r->reqtab = roffhash_alloc(0, ROFF_RENAMED);
807 r->options = options | MPARSE_COMMENT;
808 r->format = options & (MPARSE_MDOC | MPARSE_MAN);
809 r->mstackpos = -1;
810 r->rstackpos = -1;
811 r->escape = '\\';
812 return r;
813 }
814
815 /* --- syntax tree state data management ---------------------------------- */
816
817 static void
roff_man_free1(struct roff_man * man)818 roff_man_free1(struct roff_man *man)
819 {
820 if (man->meta.first != NULL)
821 roff_node_delete(man, man->meta.first);
822 free(man->meta.msec);
823 free(man->meta.vol);
824 free(man->meta.os);
825 free(man->meta.arch);
826 free(man->meta.title);
827 free(man->meta.name);
828 free(man->meta.date);
829 free(man->meta.sodest);
830 }
831
832 void
roff_state_reset(struct roff_man * man)833 roff_state_reset(struct roff_man *man)
834 {
835 man->last = man->meta.first;
836 man->last_es = NULL;
837 man->flags = 0;
838 man->lastsec = man->lastnamed = SEC_NONE;
839 man->next = ROFF_NEXT_CHILD;
840 roff_setreg(man->roff, "nS", 0, '=');
841 }
842
843 static void
roff_man_alloc1(struct roff_man * man)844 roff_man_alloc1(struct roff_man *man)
845 {
846 memset(&man->meta, 0, sizeof(man->meta));
847 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first));
848 man->meta.first->type = ROFFT_ROOT;
849 man->meta.macroset = MACROSET_NONE;
850 roff_state_reset(man);
851 }
852
853 void
roff_man_reset(struct roff_man * man)854 roff_man_reset(struct roff_man *man)
855 {
856 roff_man_free1(man);
857 roff_man_alloc1(man);
858 }
859
860 void
roff_man_free(struct roff_man * man)861 roff_man_free(struct roff_man *man)
862 {
863 roff_man_free1(man);
864 free(man);
865 }
866
867 struct roff_man *
roff_man_alloc(struct roff * roff,const char * os_s,int quick)868 roff_man_alloc(struct roff *roff, const char *os_s, int quick)
869 {
870 struct roff_man *man;
871
872 man = mandoc_calloc(1, sizeof(*man));
873 man->roff = roff;
874 man->os_s = os_s;
875 man->quick = quick;
876 roff_man_alloc1(man);
877 roff->man = man;
878 return man;
879 }
880
881 /* --- syntax tree handling ----------------------------------------------- */
882
883 struct roff_node *
roff_node_alloc(struct roff_man * man,int line,int pos,enum roff_type type,int tok)884 roff_node_alloc(struct roff_man *man, int line, int pos,
885 enum roff_type type, int tok)
886 {
887 struct roff_node *n;
888
889 n = mandoc_calloc(1, sizeof(*n));
890 n->line = line;
891 n->pos = pos;
892 n->tok = tok;
893 n->type = type;
894 n->sec = man->lastsec;
895
896 if (man->flags & MDOC_SYNOPSIS)
897 n->flags |= NODE_SYNPRETTY;
898 else
899 n->flags &= ~NODE_SYNPRETTY;
900 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL)
901 n->flags |= NODE_NOFILL;
902 else
903 n->flags &= ~NODE_NOFILL;
904 if (man->flags & MDOC_NEWLINE)
905 n->flags |= NODE_LINE;
906 man->flags &= ~MDOC_NEWLINE;
907
908 return n;
909 }
910
911 void
roff_node_append(struct roff_man * man,struct roff_node * n)912 roff_node_append(struct roff_man *man, struct roff_node *n)
913 {
914
915 switch (man->next) {
916 case ROFF_NEXT_SIBLING:
917 if (man->last->next != NULL) {
918 n->next = man->last->next;
919 man->last->next->prev = n;
920 } else
921 man->last->parent->last = n;
922 man->last->next = n;
923 n->prev = man->last;
924 n->parent = man->last->parent;
925 break;
926 case ROFF_NEXT_CHILD:
927 if (man->last->child != NULL) {
928 n->next = man->last->child;
929 man->last->child->prev = n;
930 } else
931 man->last->last = n;
932 man->last->child = n;
933 n->parent = man->last;
934 break;
935 default:
936 abort();
937 }
938 man->last = n;
939
940 switch (n->type) {
941 case ROFFT_HEAD:
942 n->parent->head = n;
943 break;
944 case ROFFT_BODY:
945 if (n->end != ENDBODY_NOT)
946 return;
947 n->parent->body = n;
948 break;
949 case ROFFT_TAIL:
950 n->parent->tail = n;
951 break;
952 default:
953 return;
954 }
955
956 /*
957 * Copy over the normalised-data pointer of our parent. Not
958 * everybody has one, but copying a null pointer is fine.
959 */
960
961 n->norm = n->parent->norm;
962 assert(n->parent->type == ROFFT_BLOCK);
963 }
964
965 void
roff_word_alloc(struct roff_man * man,int line,int pos,const char * word)966 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word)
967 {
968 struct roff_node *n;
969
970 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE);
971 n->string = roff_strdup(man->roff, word);
972 roff_node_append(man, n);
973 n->flags |= NODE_VALID | NODE_ENDED;
974 man->next = ROFF_NEXT_SIBLING;
975 }
976
977 void
roff_word_append(struct roff_man * man,const char * word)978 roff_word_append(struct roff_man *man, const char *word)
979 {
980 struct roff_node *n;
981 char *addstr, *newstr;
982
983 n = man->last;
984 addstr = roff_strdup(man->roff, word);
985 mandoc_asprintf(&newstr, "%s %s", n->string, addstr);
986 free(addstr);
987 free(n->string);
988 n->string = newstr;
989 man->next = ROFF_NEXT_SIBLING;
990 }
991
992 void
roff_elem_alloc(struct roff_man * man,int line,int pos,int tok)993 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok)
994 {
995 struct roff_node *n;
996
997 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok);
998 roff_node_append(man, n);
999 man->next = ROFF_NEXT_CHILD;
1000 }
1001
1002 struct roff_node *
roff_block_alloc(struct roff_man * man,int line,int pos,int tok)1003 roff_block_alloc(struct roff_man *man, int line, int pos, int tok)
1004 {
1005 struct roff_node *n;
1006
1007 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok);
1008 roff_node_append(man, n);
1009 man->next = ROFF_NEXT_CHILD;
1010 return n;
1011 }
1012
1013 struct roff_node *
roff_head_alloc(struct roff_man * man,int line,int pos,int tok)1014 roff_head_alloc(struct roff_man *man, int line, int pos, int tok)
1015 {
1016 struct roff_node *n;
1017
1018 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok);
1019 roff_node_append(man, n);
1020 man->next = ROFF_NEXT_CHILD;
1021 return n;
1022 }
1023
1024 struct roff_node *
roff_body_alloc(struct roff_man * man,int line,int pos,int tok)1025 roff_body_alloc(struct roff_man *man, int line, int pos, int tok)
1026 {
1027 struct roff_node *n;
1028
1029 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok);
1030 roff_node_append(man, n);
1031 man->next = ROFF_NEXT_CHILD;
1032 return n;
1033 }
1034
1035 static void
roff_addtbl(struct roff_man * man,int line,struct tbl_node * tbl)1036 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl)
1037 {
1038 struct roff_node *n;
1039 struct tbl_span *span;
1040
1041 if (man->meta.macroset == MACROSET_MAN)
1042 man_breakscope(man, ROFF_TS);
1043 while ((span = tbl_span(tbl)) != NULL) {
1044 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE);
1045 n->span = span;
1046 roff_node_append(man, n);
1047 n->flags |= NODE_VALID | NODE_ENDED;
1048 man->next = ROFF_NEXT_SIBLING;
1049 }
1050 }
1051
1052 void
roff_node_unlink(struct roff_man * man,struct roff_node * n)1053 roff_node_unlink(struct roff_man *man, struct roff_node *n)
1054 {
1055
1056 /* Adjust siblings. */
1057
1058 if (n->prev)
1059 n->prev->next = n->next;
1060 if (n->next)
1061 n->next->prev = n->prev;
1062
1063 /* Adjust parent. */
1064
1065 if (n->parent != NULL) {
1066 if (n->parent->child == n)
1067 n->parent->child = n->next;
1068 if (n->parent->last == n)
1069 n->parent->last = n->prev;
1070 }
1071
1072 /* Adjust parse point. */
1073
1074 if (man == NULL)
1075 return;
1076 if (man->last == n) {
1077 if (n->prev == NULL) {
1078 man->last = n->parent;
1079 man->next = ROFF_NEXT_CHILD;
1080 } else {
1081 man->last = n->prev;
1082 man->next = ROFF_NEXT_SIBLING;
1083 }
1084 }
1085 if (man->meta.first == n)
1086 man->meta.first = NULL;
1087 }
1088
1089 void
roff_node_relink(struct roff_man * man,struct roff_node * n)1090 roff_node_relink(struct roff_man *man, struct roff_node *n)
1091 {
1092 roff_node_unlink(man, n);
1093 n->prev = n->next = NULL;
1094 roff_node_append(man, n);
1095 }
1096
1097 void
roff_node_free(struct roff_node * n)1098 roff_node_free(struct roff_node *n)
1099 {
1100
1101 if (n->args != NULL)
1102 mdoc_argv_free(n->args);
1103 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM)
1104 free(n->norm);
1105 eqn_box_free(n->eqn);
1106 free(n->string);
1107 free(n->tag);
1108 free(n);
1109 }
1110
1111 void
roff_node_delete(struct roff_man * man,struct roff_node * n)1112 roff_node_delete(struct roff_man *man, struct roff_node *n)
1113 {
1114
1115 while (n->child != NULL)
1116 roff_node_delete(man, n->child);
1117 roff_node_unlink(man, n);
1118 roff_node_free(n);
1119 }
1120
1121 int
roff_node_transparent(struct roff_node * n)1122 roff_node_transparent(struct roff_node *n)
1123 {
1124 if (n == NULL)
1125 return 0;
1126 if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
1127 return 1;
1128 return roff_tok_transparent(n->tok);
1129 }
1130
1131 int
roff_tok_transparent(enum roff_tok tok)1132 roff_tok_transparent(enum roff_tok tok)
1133 {
1134 switch (tok) {
1135 case ROFF_ft:
1136 case ROFF_ll:
1137 case ROFF_mc:
1138 case ROFF_po:
1139 case ROFF_ta:
1140 case MDOC_Db:
1141 case MDOC_Es:
1142 case MDOC_Sm:
1143 case MDOC_Tg:
1144 case MAN_DT:
1145 case MAN_UC:
1146 case MAN_PD:
1147 case MAN_AT:
1148 return 1;
1149 default:
1150 return 0;
1151 }
1152 }
1153
1154 struct roff_node *
roff_node_child(struct roff_node * n)1155 roff_node_child(struct roff_node *n)
1156 {
1157 for (n = n->child; roff_node_transparent(n); n = n->next)
1158 continue;
1159 return n;
1160 }
1161
1162 struct roff_node *
roff_node_prev(struct roff_node * n)1163 roff_node_prev(struct roff_node *n)
1164 {
1165 do {
1166 n = n->prev;
1167 } while (roff_node_transparent(n));
1168 return n;
1169 }
1170
1171 struct roff_node *
roff_node_next(struct roff_node * n)1172 roff_node_next(struct roff_node *n)
1173 {
1174 do {
1175 n = n->next;
1176 } while (roff_node_transparent(n));
1177 return n;
1178 }
1179
1180 void
deroff(char ** dest,const struct roff_node * n)1181 deroff(char **dest, const struct roff_node *n)
1182 {
1183 char *cp;
1184 size_t sz;
1185
1186 if (n->string == NULL) {
1187 for (n = n->child; n != NULL; n = n->next)
1188 deroff(dest, n);
1189 return;
1190 }
1191
1192 /* Skip leading whitespace. */
1193
1194 for (cp = n->string; *cp != '\0'; cp++) {
1195 if (cp[0] == '\\' && cp[1] != '\0' &&
1196 strchr(" %&0^|~", cp[1]) != NULL)
1197 cp++;
1198 else if ( ! isspace((unsigned char)*cp))
1199 break;
1200 }
1201
1202 /* Skip trailing backslash. */
1203
1204 sz = strlen(cp);
1205 if (sz > 0 && cp[sz - 1] == '\\')
1206 sz--;
1207
1208 /* Skip trailing whitespace. */
1209
1210 for (; sz; sz--)
1211 if ( ! isspace((unsigned char)cp[sz-1]))
1212 break;
1213
1214 /* Skip empty strings. */
1215
1216 if (sz == 0)
1217 return;
1218
1219 if (*dest == NULL) {
1220 *dest = mandoc_strndup(cp, sz);
1221 return;
1222 }
1223
1224 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
1225 free(*dest);
1226 *dest = cp;
1227 }
1228
1229 /* --- main functions of the roff parser ---------------------------------- */
1230
1231 /*
1232 * In the current line, expand escape sequences that produce parsable
1233 * input text. Also check the syntax of the remaining escape sequences,
1234 * which typically produce output glyphs or change formatter state.
1235 */
1236 static int
roff_expand(struct roff * r,struct buf * buf,int ln,int pos,char newesc)1237 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
1238 {
1239 struct mctx *ctx; /* current macro call context */
1240 char ubuf[24]; /* buffer to print the number */
1241 struct roff_node *n; /* used for header comments */
1242 const char *start; /* start of the string to process */
1243 char *stesc; /* start of an escape sequence ('\\') */
1244 const char *esct; /* type of esccape sequence */
1245 char *ep; /* end of comment string */
1246 const char *stnam; /* start of the name, after "[(*" */
1247 const char *cp; /* end of the name, e.g. before ']' */
1248 const char *res; /* the string to be substituted */
1249 char *nbuf; /* new buffer to copy buf->buf to */
1250 size_t maxl; /* expected length of the escape name */
1251 size_t naml; /* actual length of the escape name */
1252 size_t asz; /* length of the replacement */
1253 size_t rsz; /* length of the rest of the string */
1254 int inaml; /* length returned from mandoc_escape() */
1255 int expand_count; /* to avoid infinite loops */
1256 int npos; /* position in numeric expression */
1257 int arg_complete; /* argument not interrupted by eol */
1258 int quote_args; /* true for \\$@, false for \\$* */
1259 int done; /* no more input available */
1260 int deftype; /* type of definition to paste */
1261 int rcsid; /* kind of RCS id seen */
1262 enum mandocerr err; /* for escape sequence problems */
1263 char sign; /* increment number register */
1264 char term; /* character terminating the escape */
1265
1266 /* Search forward for comments. */
1267
1268 done = 0;
1269 start = buf->buf + pos;
1270 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
1271 if (stesc[0] != newesc || stesc[1] == '\0')
1272 continue;
1273 stesc++;
1274 if (*stesc != '"' && *stesc != '#')
1275 continue;
1276
1277 /* Comment found, look for RCS id. */
1278
1279 rcsid = 0;
1280 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
1281 rcsid = 1 << MANDOC_OS_OPENBSD;
1282 cp += 8;
1283 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
1284 rcsid = 1 << MANDOC_OS_NETBSD;
1285 cp += 7;
1286 }
1287 if (cp != NULL &&
1288 isalnum((unsigned char)*cp) == 0 &&
1289 strchr(cp, '$') != NULL) {
1290 if (r->man->meta.rcsids & rcsid)
1291 mandoc_msg(MANDOCERR_RCS_REP, ln,
1292 (int)(stesc - buf->buf) + 1,
1293 "%s", stesc + 1);
1294 r->man->meta.rcsids |= rcsid;
1295 }
1296
1297 /* Handle trailing whitespace. */
1298
1299 ep = strchr(stesc--, '\0') - 1;
1300 if (*ep == '\n') {
1301 done = 1;
1302 ep--;
1303 }
1304 if (*ep == ' ' || *ep == '\t')
1305 mandoc_msg(MANDOCERR_SPACE_EOL,
1306 ln, (int)(ep - buf->buf), NULL);
1307
1308 /*
1309 * Save comments preceding the title macro
1310 * in the syntax tree.
1311 */
1312
1313 if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
1314 while (*ep == ' ' || *ep == '\t')
1315 ep--;
1316 ep[1] = '\0';
1317 n = roff_node_alloc(r->man,
1318 ln, stesc + 1 - buf->buf,
1319 ROFFT_COMMENT, TOKEN_NONE);
1320 n->string = mandoc_strdup(stesc + 2);
1321 roff_node_append(r->man, n);
1322 n->flags |= NODE_VALID | NODE_ENDED;
1323 r->man->next = ROFF_NEXT_SIBLING;
1324 }
1325
1326 /* Line continuation with comment. */
1327
1328 if (stesc[1] == '#') {
1329 *stesc = '\0';
1330 return ROFF_IGN | ROFF_APPEND;
1331 }
1332
1333 /* Discard normal comments. */
1334
1335 while (stesc > start && stesc[-1] == ' ' &&
1336 (stesc == start + 1 || stesc[-2] != '\\'))
1337 stesc--;
1338 *stesc = '\0';
1339 break;
1340 }
1341 if (stesc == start)
1342 return ROFF_CONT;
1343 stesc--;
1344
1345 /* Notice the end of the input. */
1346
1347 if (*stesc == '\n') {
1348 *stesc-- = '\0';
1349 done = 1;
1350 }
1351
1352 expand_count = 0;
1353 while (stesc >= start) {
1354 if (*stesc != newesc) {
1355
1356 /*
1357 * If we have a non-standard escape character,
1358 * escape literal backslashes because all
1359 * processing in subsequent functions uses
1360 * the standard escaping rules.
1361 */
1362
1363 if (newesc != ASCII_ESC && *stesc == '\\') {
1364 *stesc = '\0';
1365 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
1366 buf->buf, stesc + 1) + 1;
1367 start = nbuf + pos;
1368 stesc = nbuf + (stesc - buf->buf);
1369 free(buf->buf);
1370 buf->buf = nbuf;
1371 }
1372
1373 /* Search backwards for the next escape. */
1374
1375 stesc--;
1376 continue;
1377 }
1378
1379 /* If it is escaped, skip it. */
1380
1381 for (cp = stesc - 1; cp >= start; cp--)
1382 if (*cp != r->escape)
1383 break;
1384
1385 if ((stesc - cp) % 2 == 0) {
1386 while (stesc > cp)
1387 *stesc-- = '\\';
1388 continue;
1389 } else if (stesc[1] != '\0') {
1390 *stesc = '\\';
1391 } else {
1392 *stesc-- = '\0';
1393 if (done)
1394 continue;
1395 else
1396 return ROFF_IGN | ROFF_APPEND;
1397 }
1398
1399 /* Decide whether to expand or to check only. */
1400
1401 term = '\0';
1402 cp = stesc + 1;
1403 if (*cp == 'E')
1404 cp++;
1405 esct = cp;
1406 switch (*esct) {
1407 case '*':
1408 case '$':
1409 res = NULL;
1410 break;
1411 case 'B':
1412 case 'w':
1413 term = cp[1];
1414 /* FALLTHROUGH */
1415 case 'n':
1416 sign = cp[1];
1417 if (sign == '+' || sign == '-')
1418 cp++;
1419 res = ubuf;
1420 break;
1421 default:
1422 err = MANDOCERR_OK;
1423 switch(mandoc_escape(&cp, &stnam, &inaml)) {
1424 case ESCAPE_SPECIAL:
1425 if (mchars_spec2cp(stnam, inaml) >= 0)
1426 break;
1427 /* FALLTHROUGH */
1428 case ESCAPE_ERROR:
1429 err = MANDOCERR_ESC_BAD;
1430 break;
1431 case ESCAPE_UNDEF:
1432 err = MANDOCERR_ESC_UNDEF;
1433 break;
1434 case ESCAPE_UNSUPP:
1435 err = MANDOCERR_ESC_UNSUPP;
1436 break;
1437 default:
1438 break;
1439 }
1440 if (err != MANDOCERR_OK)
1441 mandoc_msg(err, ln, (int)(stesc - buf->buf),
1442 "%.*s", (int)(cp - stesc), stesc);
1443 stesc--;
1444 continue;
1445 }
1446
1447 if (EXPAND_LIMIT < ++expand_count) {
1448 mandoc_msg(MANDOCERR_ROFFLOOP,
1449 ln, (int)(stesc - buf->buf), NULL);
1450 return ROFF_IGN;
1451 }
1452
1453 /*
1454 * The third character decides the length
1455 * of the name of the string or register.
1456 * Save a pointer to the name.
1457 */
1458
1459 if (term == '\0') {
1460 switch (*++cp) {
1461 case '\0':
1462 maxl = 0;
1463 break;
1464 case '(':
1465 cp++;
1466 maxl = 2;
1467 break;
1468 case '[':
1469 cp++;
1470 term = ']';
1471 maxl = 0;
1472 break;
1473 default:
1474 maxl = 1;
1475 break;
1476 }
1477 } else {
1478 cp += 2;
1479 maxl = 0;
1480 }
1481 stnam = cp;
1482
1483 /* Advance to the end of the name. */
1484
1485 naml = 0;
1486 arg_complete = 1;
1487 while (maxl == 0 || naml < maxl) {
1488 if (*cp == '\0') {
1489 mandoc_msg(MANDOCERR_ESC_BAD, ln,
1490 (int)(stesc - buf->buf), "%s", stesc);
1491 arg_complete = 0;
1492 break;
1493 }
1494 if (maxl == 0 && *cp == term) {
1495 cp++;
1496 break;
1497 }
1498 if (*cp++ != '\\' || *esct != 'w') {
1499 naml++;
1500 continue;
1501 }
1502 switch (mandoc_escape(&cp, NULL, NULL)) {
1503 case ESCAPE_SPECIAL:
1504 case ESCAPE_UNICODE:
1505 case ESCAPE_NUMBERED:
1506 case ESCAPE_UNDEF:
1507 case ESCAPE_OVERSTRIKE:
1508 naml++;
1509 break;
1510 default:
1511 break;
1512 }
1513 }
1514
1515 /*
1516 * Retrieve the replacement string; if it is
1517 * undefined, resume searching for escapes.
1518 */
1519
1520 switch (*esct) {
1521 case '*':
1522 if (arg_complete) {
1523 deftype = ROFFDEF_USER | ROFFDEF_PRE;
1524 res = roff_getstrn(r, stnam, naml, &deftype);
1525
1526 /*
1527 * If not overriden, let \*(.T
1528 * through to the formatters.
1529 */
1530
1531 if (res == NULL && naml == 2 &&
1532 stnam[0] == '.' && stnam[1] == 'T') {
1533 roff_setstrn(&r->strtab,
1534 ".T", 2, NULL, 0, 0);
1535 stesc--;
1536 continue;
1537 }
1538 }
1539 break;
1540 case '$':
1541 if (r->mstackpos < 0) {
1542 mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
1543 (int)(stesc - buf->buf), "%.3s", stesc);
1544 break;
1545 }
1546 ctx = r->mstack + r->mstackpos;
1547 npos = esct[1] - '1';
1548 if (npos >= 0 && npos <= 8) {
1549 res = npos < ctx->argc ?
1550 ctx->argv[npos] : "";
1551 break;
1552 }
1553 if (esct[1] == '*')
1554 quote_args = 0;
1555 else if (esct[1] == '@')
1556 quote_args = 1;
1557 else {
1558 mandoc_msg(MANDOCERR_ARG_NONUM, ln,
1559 (int)(stesc - buf->buf), "%.3s", stesc);
1560 break;
1561 }
1562 asz = 0;
1563 for (npos = 0; npos < ctx->argc; npos++) {
1564 if (npos)
1565 asz++; /* blank */
1566 if (quote_args)
1567 asz += 2; /* quotes */
1568 asz += strlen(ctx->argv[npos]);
1569 }
1570 if (asz != 3) {
1571 rsz = buf->sz - (stesc - buf->buf) - 3;
1572 if (asz < 3)
1573 memmove(stesc + asz, stesc + 3, rsz);
1574 buf->sz += asz - 3;
1575 nbuf = mandoc_realloc(buf->buf, buf->sz);
1576 start = nbuf + pos;
1577 stesc = nbuf + (stesc - buf->buf);
1578 buf->buf = nbuf;
1579 if (asz > 3)
1580 memmove(stesc + asz, stesc + 3, rsz);
1581 }
1582 for (npos = 0; npos < ctx->argc; npos++) {
1583 if (npos)
1584 *stesc++ = ' ';
1585 if (quote_args)
1586 *stesc++ = '"';
1587 cp = ctx->argv[npos];
1588 while (*cp != '\0')
1589 *stesc++ = *cp++;
1590 if (quote_args)
1591 *stesc++ = '"';
1592 }
1593 continue;
1594 case 'B':
1595 npos = 0;
1596 ubuf[0] = arg_complete &&
1597 roff_evalnum(r, ln, stnam, &npos,
1598 NULL, ROFFNUM_SCALE) &&
1599 stnam + npos + 1 == cp ? '1' : '0';
1600 ubuf[1] = '\0';
1601 break;
1602 case 'n':
1603 if (arg_complete)
1604 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1605 roff_getregn(r, stnam, naml, sign));
1606 else
1607 ubuf[0] = '\0';
1608 break;
1609 case 'w':
1610 /* use even incomplete args */
1611 (void)snprintf(ubuf, sizeof(ubuf), "%d",
1612 24 * (int)naml);
1613 break;
1614 }
1615
1616 if (res == NULL) {
1617 if (*esct == '*')
1618 mandoc_msg(MANDOCERR_STR_UNDEF,
1619 ln, (int)(stesc - buf->buf),
1620 "%.*s", (int)naml, stnam);
1621 res = "";
1622 } else if (buf->sz + strlen(res) > SHRT_MAX) {
1623 mandoc_msg(MANDOCERR_ROFFLOOP,
1624 ln, (int)(stesc - buf->buf), NULL);
1625 return ROFF_IGN;
1626 }
1627
1628 /* Replace the escape sequence by the string. */
1629
1630 *stesc = '\0';
1631 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
1632 buf->buf, res, cp) + 1;
1633
1634 /* Prepare for the next replacement. */
1635
1636 start = nbuf + pos;
1637 stesc = nbuf + (stesc - buf->buf) + strlen(res);
1638 free(buf->buf);
1639 buf->buf = nbuf;
1640 }
1641 return ROFF_CONT;
1642 }
1643
1644 /*
1645 * Parse a quoted or unquoted roff-style request or macro argument.
1646 * Return a pointer to the parsed argument, which is either the original
1647 * pointer or advanced by one byte in case the argument is quoted.
1648 * NUL-terminate the argument in place.
1649 * Collapse pairs of quotes inside quoted arguments.
1650 * Advance the argument pointer to the next argument,
1651 * or to the NUL byte terminating the argument line.
1652 */
1653 char *
roff_getarg(struct roff * r,char ** cpp,int ln,int * pos)1654 roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
1655 {
1656 struct buf buf;
1657 char *cp, *start;
1658 int newesc, pairs, quoted, white;
1659
1660 /* Quoting can only start with a new word. */
1661 start = *cpp;
1662 quoted = 0;
1663 if ('"' == *start) {
1664 quoted = 1;
1665 start++;
1666 }
1667
1668 newesc = pairs = white = 0;
1669 for (cp = start; '\0' != *cp; cp++) {
1670
1671 /*
1672 * Move the following text left
1673 * after quoted quotes and after "\\" and "\t".
1674 */
1675 if (pairs)
1676 cp[-pairs] = cp[0];
1677
1678 if ('\\' == cp[0]) {
1679 /*
1680 * In copy mode, translate double to single
1681 * backslashes and backslash-t to literal tabs.
1682 */
1683 switch (cp[1]) {
1684 case 'a':
1685 case 't':
1686 cp[-pairs] = '\t';
1687 pairs++;
1688 cp++;
1689 break;
1690 case '\\':
1691 newesc = 1;
1692 cp[-pairs] = ASCII_ESC;
1693 pairs++;
1694 cp++;
1695 break;
1696 case ' ':
1697 /* Skip escaped blanks. */
1698 if (0 == quoted)
1699 cp++;
1700 break;
1701 default:
1702 break;
1703 }
1704 } else if (0 == quoted) {
1705 if (' ' == cp[0]) {
1706 /* Unescaped blanks end unquoted args. */
1707 white = 1;
1708 break;
1709 }
1710 } else if ('"' == cp[0]) {
1711 if ('"' == cp[1]) {
1712 /* Quoted quotes collapse. */
1713 pairs++;
1714 cp++;
1715 } else {
1716 /* Unquoted quotes end quoted args. */
1717 quoted = 2;
1718 break;
1719 }
1720 }
1721 }
1722
1723 /* Quoted argument without a closing quote. */
1724 if (1 == quoted)
1725 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL);
1726
1727 /* NUL-terminate this argument and move to the next one. */
1728 if (pairs)
1729 cp[-pairs] = '\0';
1730 if ('\0' != *cp) {
1731 *cp++ = '\0';
1732 while (' ' == *cp)
1733 cp++;
1734 }
1735 *pos += (int)(cp - start) + (quoted ? 1 : 0);
1736 *cpp = cp;
1737
1738 if ('\0' == *cp && (white || ' ' == cp[-1]))
1739 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL);
1740
1741 start = mandoc_strdup(start);
1742 if (newesc == 0)
1743 return start;
1744
1745 buf.buf = start;
1746 buf.sz = strlen(start) + 1;
1747 buf.next = NULL;
1748 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
1749 free(buf.buf);
1750 buf.buf = mandoc_strdup("");
1751 }
1752 return buf.buf;
1753 }
1754
1755
1756 /*
1757 * Process text streams.
1758 */
1759 static int
roff_parsetext(struct roff * r,struct buf * buf,int pos,int * offs)1760 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs)
1761 {
1762 size_t sz;
1763 const char *start;
1764 char *p;
1765 int isz;
1766 enum mandoc_esc esc;
1767
1768 /* Spring the input line trap. */
1769
1770 if (roffit_lines == 1) {
1771 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro);
1772 free(buf->buf);
1773 buf->buf = p;
1774 buf->sz = isz + 1;
1775 *offs = 0;
1776 free(roffit_macro);
1777 roffit_lines = 0;
1778 return ROFF_REPARSE;
1779 } else if (roffit_lines > 1)
1780 --roffit_lines;
1781
1782 if (roffce_node != NULL && buf->buf[pos] != '\0') {
1783 if (roffce_lines < 1) {
1784 r->man->last = roffce_node;
1785 r->man->next = ROFF_NEXT_SIBLING;
1786 roffce_lines = 0;
1787 roffce_node = NULL;
1788 } else
1789 roffce_lines--;
1790 }
1791
1792 /* Convert all breakable hyphens into ASCII_HYPH. */
1793
1794 start = p = buf->buf + pos;
1795
1796 while (*p != '\0') {
1797 sz = strcspn(p, "-\\");
1798 p += sz;
1799
1800 if (*p == '\0')
1801 break;
1802
1803 if (*p == '\\') {
1804 /* Skip over escapes. */
1805 p++;
1806 esc = mandoc_escape((const char **)&p, NULL, NULL);
1807 if (esc == ESCAPE_ERROR)
1808 break;
1809 while (*p == '-')
1810 p++;
1811 continue;
1812 } else if (p == start) {
1813 p++;
1814 continue;
1815 }
1816
1817 if (isalpha((unsigned char)p[-1]) &&
1818 isalpha((unsigned char)p[1]))
1819 *p = ASCII_HYPH;
1820 p++;
1821 }
1822 return ROFF_CONT;
1823 }
1824
1825 int
roff_parseln(struct roff * r,int ln,struct buf * buf,int * offs,size_t len)1826 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
1827 {
1828 enum roff_tok t;
1829 int e;
1830 int pos; /* parse point */
1831 int spos; /* saved parse point for messages */
1832 int ppos; /* original offset in buf->buf */
1833 int ctl; /* macro line (boolean) */
1834
1835 ppos = pos = *offs;
1836
1837 if (len > 80 && r->tbl == NULL && r->eqn == NULL &&
1838 (r->man->flags & ROFF_NOFILL) == 0 &&
1839 strchr(" .\\", buf->buf[pos]) == NULL &&
1840 buf->buf[pos] != r->control &&
1841 strcspn(buf->buf, " ") < 80)
1842 mandoc_msg(MANDOCERR_TEXT_LONG, ln, (int)len - 1,
1843 "%.20s...", buf->buf + pos);
1844
1845 /* Handle in-line equation delimiters. */
1846
1847 if (r->tbl == NULL &&
1848 r->last_eqn != NULL && r->last_eqn->delim &&
1849 (r->eqn == NULL || r->eqn_inline)) {
1850 e = roff_eqndelim(r, buf, pos);
1851 if (e == ROFF_REPARSE)
1852 return e;
1853 assert(e == ROFF_CONT);
1854 }
1855
1856 /* Expand some escape sequences. */
1857
1858 e = roff_expand(r, buf, ln, pos, r->escape);
1859 if ((e & ROFF_MASK) == ROFF_IGN)
1860 return e;
1861 assert(e == ROFF_CONT);
1862
1863 ctl = roff_getcontrol(r, buf->buf, &pos);
1864
1865 /*
1866 * First, if a scope is open and we're not a macro, pass the
1867 * text through the macro's filter.
1868 * Equations process all content themselves.
1869 * Tables process almost all content themselves, but we want
1870 * to warn about macros before passing it there.
1871 */
1872
1873 if (r->last != NULL && ! ctl) {
1874 t = r->last->tok;
1875 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs);
1876 if ((e & ROFF_MASK) == ROFF_IGN)
1877 return e;
1878 e &= ~ROFF_MASK;
1879 } else
1880 e = ROFF_IGN;
1881 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) {
1882 eqn_read(r->eqn, buf->buf + ppos);
1883 return e;
1884 }
1885 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) {
1886 tbl_read(r->tbl, ln, buf->buf, ppos);
1887 roff_addtbl(r->man, ln, r->tbl);
1888 return e;
1889 }
1890 if ( ! ctl) {
1891 r->options &= ~MPARSE_COMMENT;
1892 return roff_parsetext(r, buf, pos, offs) | e;
1893 }
1894
1895 /* Skip empty request lines. */
1896
1897 if (buf->buf[pos] == '"') {
1898 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL);
1899 return ROFF_IGN;
1900 } else if (buf->buf[pos] == '\0')
1901 return ROFF_IGN;
1902
1903 /*
1904 * If a scope is open, go to the child handler for that macro,
1905 * as it may want to preprocess before doing anything with it.
1906 * Don't do so if an equation is open.
1907 */
1908
1909 if (r->last) {
1910 t = r->last->tok;
1911 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
1912 }
1913
1914 /* No scope is open. This is a new request or macro. */
1915
1916 r->options &= ~MPARSE_COMMENT;
1917 spos = pos;
1918 t = roff_parse(r, buf->buf, &pos, ln, ppos);
1919
1920 /* Tables ignore most macros. */
1921
1922 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
1923 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
1924 mandoc_msg(MANDOCERR_TBLMACRO,
1925 ln, pos, "%s", buf->buf + spos);
1926 if (t != TOKEN_NONE)
1927 return ROFF_IGN;
1928 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
1929 pos++;
1930 while (buf->buf[pos] == ' ')
1931 pos++;
1932 tbl_read(r->tbl, ln, buf->buf, pos);
1933 roff_addtbl(r->man, ln, r->tbl);
1934 return ROFF_IGN;
1935 }
1936
1937 /* For now, let high level macros abort .ce mode. */
1938
1939 if (ctl && roffce_node != NULL &&
1940 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
1941 t == ROFF_TH || t == ROFF_TS)) {
1942 r->man->last = roffce_node;
1943 r->man->next = ROFF_NEXT_SIBLING;
1944 roffce_lines = 0;
1945 roffce_node = NULL;
1946 }
1947
1948 /*
1949 * This is neither a roff request nor a user-defined macro.
1950 * Let the standard macro set parsers handle it.
1951 */
1952
1953 if (t == TOKEN_NONE)
1954 return ROFF_CONT;
1955
1956 /* Execute a roff request or a user defined macro. */
1957
1958 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
1959 }
1960
1961 /*
1962 * Internal interface function to tell the roff parser that execution
1963 * of the current macro ended. This is required because macro
1964 * definitions usually do not end with a .return request.
1965 */
1966 void
roff_userret(struct roff * r)1967 roff_userret(struct roff *r)
1968 {
1969 struct mctx *ctx;
1970 int i;
1971
1972 assert(r->mstackpos >= 0);
1973 ctx = r->mstack + r->mstackpos;
1974 for (i = 0; i < ctx->argc; i++)
1975 free(ctx->argv[i]);
1976 ctx->argc = 0;
1977 r->mstackpos--;
1978 }
1979
1980 void
roff_endparse(struct roff * r)1981 roff_endparse(struct roff *r)
1982 {
1983 if (r->last != NULL)
1984 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line,
1985 r->last->col, "%s", roff_name[r->last->tok]);
1986
1987 if (r->eqn != NULL) {
1988 mandoc_msg(MANDOCERR_BLK_NOEND,
1989 r->eqn->node->line, r->eqn->node->pos, "EQ");
1990 eqn_parse(r->eqn);
1991 r->eqn = NULL;
1992 }
1993
1994 if (r->tbl != NULL) {
1995 tbl_end(r->tbl, 1);
1996 r->tbl = NULL;
1997 }
1998 }
1999
2000 /*
2001 * Parse a roff node's type from the input buffer. This must be in the
2002 * form of ".foo xxx" in the usual way.
2003 */
2004 static enum roff_tok
roff_parse(struct roff * r,char * buf,int * pos,int ln,int ppos)2005 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
2006 {
2007 char *cp;
2008 const char *mac;
2009 size_t maclen;
2010 int deftype;
2011 enum roff_tok t;
2012
2013 cp = buf + *pos;
2014
2015 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp)
2016 return TOKEN_NONE;
2017
2018 mac = cp;
2019 maclen = roff_getname(r, &cp, ln, ppos);
2020
2021 deftype = ROFFDEF_USER | ROFFDEF_REN;
2022 r->current_string = roff_getstrn(r, mac, maclen, &deftype);
2023 switch (deftype) {
2024 case ROFFDEF_USER:
2025 t = ROFF_USERDEF;
2026 break;
2027 case ROFFDEF_REN:
2028 t = ROFF_RENAMED;
2029 break;
2030 default:
2031 t = roffhash_find(r->reqtab, mac, maclen);
2032 break;
2033 }
2034 if (t != TOKEN_NONE)
2035 *pos = cp - buf;
2036 else if (deftype == ROFFDEF_UNDEF) {
2037 /* Using an undefined macro defines it to be empty. */
2038 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0);
2039 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0);
2040 }
2041 return t;
2042 }
2043
2044 /* --- handling of request blocks ----------------------------------------- */
2045
2046 /*
2047 * Close a macro definition block or an "ignore" block.
2048 */
2049 static int
roff_cblock(ROFF_ARGS)2050 roff_cblock(ROFF_ARGS)
2051 {
2052 int rr;
2053
2054 if (r->last == NULL) {
2055 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2056 return ROFF_IGN;
2057 }
2058
2059 switch (r->last->tok) {
2060 case ROFF_am:
2061 case ROFF_ami:
2062 case ROFF_de:
2063 case ROFF_dei:
2064 case ROFF_ig:
2065 break;
2066 case ROFF_am1:
2067 case ROFF_de1:
2068 /* Remapped in roff_block(). */
2069 abort();
2070 default:
2071 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "..");
2072 return ROFF_IGN;
2073 }
2074
2075 roffnode_pop(r);
2076 roffnode_cleanscope(r);
2077
2078 /*
2079 * If a conditional block with braces is still open,
2080 * check for "\}" block end markers.
2081 */
2082
2083 if (r->last != NULL && r->last->endspan < 0) {
2084 rr = 1; /* If arguments follow "\}", warn about them. */
2085 roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2086 }
2087
2088 if (buf->buf[pos] != '\0')
2089 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
2090 ".. %s", buf->buf + pos);
2091
2092 return ROFF_IGN;
2093 }
2094
2095 /*
2096 * Pop all nodes ending at the end of the current input line.
2097 * Return the number of loops ended.
2098 */
2099 static int
roffnode_cleanscope(struct roff * r)2100 roffnode_cleanscope(struct roff *r)
2101 {
2102 int inloop;
2103
2104 inloop = 0;
2105 while (r->last != NULL && r->last->endspan > 0) {
2106 if (--r->last->endspan != 0)
2107 break;
2108 inloop += roffnode_pop(r);
2109 }
2110 return inloop;
2111 }
2112
2113 /*
2114 * Handle the closing "\}" of a conditional block.
2115 * Apart from generating warnings, this only pops nodes.
2116 * Return the number of loops ended.
2117 */
2118 static int
roff_ccond(struct roff * r,int ln,int ppos)2119 roff_ccond(struct roff *r, int ln, int ppos)
2120 {
2121 if (NULL == r->last) {
2122 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2123 return 0;
2124 }
2125
2126 switch (r->last->tok) {
2127 case ROFF_el:
2128 case ROFF_ie:
2129 case ROFF_if:
2130 case ROFF_while:
2131 break;
2132 default:
2133 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2134 return 0;
2135 }
2136
2137 if (r->last->endspan > -1) {
2138 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}");
2139 return 0;
2140 }
2141
2142 return roffnode_pop(r) + roffnode_cleanscope(r);
2143 }
2144
2145 static int
roff_block(ROFF_ARGS)2146 roff_block(ROFF_ARGS)
2147 {
2148 const char *name, *value;
2149 char *call, *cp, *iname, *rname;
2150 size_t csz, namesz, rsz;
2151 int deftype;
2152
2153 /* Ignore groff compatibility mode for now. */
2154
2155 if (tok == ROFF_de1)
2156 tok = ROFF_de;
2157 else if (tok == ROFF_dei1)
2158 tok = ROFF_dei;
2159 else if (tok == ROFF_am1)
2160 tok = ROFF_am;
2161 else if (tok == ROFF_ami1)
2162 tok = ROFF_ami;
2163
2164 /* Parse the macro name argument. */
2165
2166 cp = buf->buf + pos;
2167 if (tok == ROFF_ig) {
2168 iname = NULL;
2169 namesz = 0;
2170 } else {
2171 iname = cp;
2172 namesz = roff_getname(r, &cp, ln, ppos);
2173 iname[namesz] = '\0';
2174 }
2175
2176 /* Resolve the macro name argument if it is indirect. */
2177
2178 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2179 deftype = ROFFDEF_USER;
2180 name = roff_getstrn(r, iname, namesz, &deftype);
2181 if (name == NULL) {
2182 mandoc_msg(MANDOCERR_STR_UNDEF,
2183 ln, (int)(iname - buf->buf),
2184 "%.*s", (int)namesz, iname);
2185 namesz = 0;
2186 } else
2187 namesz = strlen(name);
2188 } else
2189 name = iname;
2190
2191 if (namesz == 0 && tok != ROFF_ig) {
2192 mandoc_msg(MANDOCERR_REQ_EMPTY,
2193 ln, ppos, "%s", roff_name[tok]);
2194 return ROFF_IGN;
2195 }
2196
2197 roffnode_push(r, tok, name, ln, ppos);
2198
2199 /*
2200 * At the beginning of a `de' macro, clear the existing string
2201 * with the same name, if there is one. New content will be
2202 * appended from roff_block_text() in multiline mode.
2203 */
2204
2205 if (tok == ROFF_de || tok == ROFF_dei) {
2206 roff_setstrn(&r->strtab, name, namesz, "", 0, 0);
2207 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2208 } else if (tok == ROFF_am || tok == ROFF_ami) {
2209 deftype = ROFFDEF_ANY;
2210 value = roff_getstrn(r, iname, namesz, &deftype);
2211 switch (deftype) { /* Before appending, ... */
2212 case ROFFDEF_PRE: /* copy predefined to user-defined. */
2213 roff_setstrn(&r->strtab, name, namesz,
2214 value, strlen(value), 0);
2215 break;
2216 case ROFFDEF_REN: /* call original standard macro. */
2217 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2218 (int)strlen(value), value);
2219 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2220 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2221 free(call);
2222 break;
2223 case ROFFDEF_STD: /* rename and call standard macro. */
2224 rsz = mandoc_asprintf(&rname, "__%s_renamed", name);
2225 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0);
2226 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n",
2227 (int)rsz, rname);
2228 roff_setstrn(&r->strtab, name, namesz, call, csz, 0);
2229 free(call);
2230 free(rname);
2231 break;
2232 default:
2233 break;
2234 }
2235 }
2236
2237 if (*cp == '\0')
2238 return ROFF_IGN;
2239
2240 /* Get the custom end marker. */
2241
2242 iname = cp;
2243 namesz = roff_getname(r, &cp, ln, ppos);
2244
2245 /* Resolve the end marker if it is indirect. */
2246
2247 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) {
2248 deftype = ROFFDEF_USER;
2249 name = roff_getstrn(r, iname, namesz, &deftype);
2250 if (name == NULL) {
2251 mandoc_msg(MANDOCERR_STR_UNDEF,
2252 ln, (int)(iname - buf->buf),
2253 "%.*s", (int)namesz, iname);
2254 namesz = 0;
2255 } else
2256 namesz = strlen(name);
2257 } else
2258 name = iname;
2259
2260 if (namesz)
2261 r->last->end = mandoc_strndup(name, namesz);
2262
2263 if (*cp != '\0')
2264 mandoc_msg(MANDOCERR_ARG_EXCESS,
2265 ln, pos, ".%s ... %s", roff_name[tok], cp);
2266
2267 return ROFF_IGN;
2268 }
2269
2270 static int
roff_block_sub(ROFF_ARGS)2271 roff_block_sub(ROFF_ARGS)
2272 {
2273 enum roff_tok t;
2274 int i, j;
2275
2276 /*
2277 * First check whether a custom macro exists at this level. If
2278 * it does, then check against it. This is some of groff's
2279 * stranger behaviours. If we encountered a custom end-scope
2280 * tag and that tag also happens to be a "real" macro, then we
2281 * need to try interpreting it again as a real macro. If it's
2282 * not, then return ignore. Else continue.
2283 */
2284
2285 if (r->last->end) {
2286 for (i = pos, j = 0; r->last->end[j]; j++, i++)
2287 if (buf->buf[i] != r->last->end[j])
2288 break;
2289
2290 if (r->last->end[j] == '\0' &&
2291 (buf->buf[i] == '\0' ||
2292 buf->buf[i] == ' ' ||
2293 buf->buf[i] == '\t')) {
2294 roffnode_pop(r);
2295 roffnode_cleanscope(r);
2296
2297 while (buf->buf[i] == ' ' || buf->buf[i] == '\t')
2298 i++;
2299
2300 pos = i;
2301 if (roff_parse(r, buf->buf, &pos, ln, ppos) !=
2302 TOKEN_NONE)
2303 return ROFF_RERUN;
2304 return ROFF_IGN;
2305 }
2306 }
2307
2308 /*
2309 * If we have no custom end-query or lookup failed, then try
2310 * pulling it out of the hashtable.
2311 */
2312
2313 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2314
2315 if (t != ROFF_cblock) {
2316 if (tok != ROFF_ig)
2317 roff_setstr(r, r->last->name, buf->buf + ppos, 2);
2318 return ROFF_IGN;
2319 }
2320
2321 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2322 }
2323
2324 static int
roff_block_text(ROFF_ARGS)2325 roff_block_text(ROFF_ARGS)
2326 {
2327
2328 if (tok != ROFF_ig)
2329 roff_setstr(r, r->last->name, buf->buf + pos, 2);
2330
2331 return ROFF_IGN;
2332 }
2333
2334 /*
2335 * Check for a closing "\}" and handle it.
2336 * In this function, the final "int *offs" argument is used for
2337 * different purposes than elsewhere:
2338 * Input: *offs == 0: caller wants to discard arguments following \}
2339 * *offs == 1: caller wants to preserve text following \}
2340 * Output: *offs = 0: tell caller to discard input line
2341 * *offs = 1: tell caller to use input line
2342 */
2343 static int
roff_cond_checkend(ROFF_ARGS)2344 roff_cond_checkend(ROFF_ARGS)
2345 {
2346 char *ep;
2347 int endloop, irc, rr;
2348
2349 irc = ROFF_IGN;
2350 rr = r->last->rule;
2351 endloop = tok != ROFF_while ? ROFF_IGN :
2352 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT;
2353 if (roffnode_cleanscope(r))
2354 irc |= endloop;
2355
2356 /*
2357 * If "\}" occurs on a macro line without a preceding macro or
2358 * a text line contains nothing else, drop the line completely.
2359 */
2360
2361 ep = buf->buf + pos;
2362 if (ep[0] == '\\' && ep[1] == '}' && (ep[2] == '\0' || *offs == 0))
2363 rr = 0;
2364
2365 /*
2366 * The closing delimiter "\}" rewinds the conditional scope
2367 * but is otherwise ignored when interpreting the line.
2368 */
2369
2370 while ((ep = strchr(ep, '\\')) != NULL) {
2371 switch (ep[1]) {
2372 case '}':
2373 if (ep[2] == '\0')
2374 ep[0] = '\0';
2375 else if (rr)
2376 ep[1] = '&';
2377 else
2378 memmove(ep, ep + 2, strlen(ep + 2) + 1);
2379 if (roff_ccond(r, ln, ep - buf->buf))
2380 irc |= endloop;
2381 break;
2382 case '\0':
2383 ++ep;
2384 break;
2385 default:
2386 ep += 2;
2387 break;
2388 }
2389 }
2390 *offs = rr;
2391 return irc;
2392 }
2393
2394 /*
2395 * Parse and process a request or macro line in conditional scope.
2396 */
2397 static int
roff_cond_sub(ROFF_ARGS)2398 roff_cond_sub(ROFF_ARGS)
2399 {
2400 struct roffnode *bl;
2401 int irc, rr;
2402 enum roff_tok t;
2403
2404 rr = 0; /* If arguments follow "\}", skip them. */
2405 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2406 t = roff_parse(r, buf->buf, &pos, ln, ppos);
2407
2408 /* For now, let high level macros abort .ce mode. */
2409
2410 if (roffce_node != NULL &&
2411 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
2412 t == ROFF_TH || t == ROFF_TS)) {
2413 r->man->last = roffce_node;
2414 r->man->next = ROFF_NEXT_SIBLING;
2415 roffce_lines = 0;
2416 roffce_node = NULL;
2417 }
2418
2419 /*
2420 * Fully handle known macros when they are structurally
2421 * required or when the conditional evaluated to true.
2422 */
2423
2424 if (t == ROFF_break) {
2425 if (irc & ROFF_LOOPMASK)
2426 irc = ROFF_IGN | ROFF_LOOPEXIT;
2427 else if (rr) {
2428 for (bl = r->last; bl != NULL; bl = bl->parent) {
2429 bl->rule = 0;
2430 if (bl->tok == ROFF_while)
2431 break;
2432 }
2433 }
2434 } else if (t != TOKEN_NONE &&
2435 (rr || roffs[t].flags & ROFFMAC_STRUCT))
2436 irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
2437 else
2438 irc |= rr ? ROFF_CONT : ROFF_IGN;
2439 return irc;
2440 }
2441
2442 /*
2443 * Parse and process a text line in conditional scope.
2444 */
2445 static int
roff_cond_text(ROFF_ARGS)2446 roff_cond_text(ROFF_ARGS)
2447 {
2448 int irc, rr;
2449
2450 rr = 1; /* If arguments follow "\}", preserve them. */
2451 irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
2452 if (rr)
2453 irc |= ROFF_CONT;
2454 return irc;
2455 }
2456
2457 /* --- handling of numeric and conditional expressions -------------------- */
2458
2459 /*
2460 * Parse a single signed integer number. Stop at the first non-digit.
2461 * If there is at least one digit, return success and advance the
2462 * parse point, else return failure and let the parse point unchanged.
2463 * Ignore overflows, treat them just like the C language.
2464 */
2465 static int
roff_getnum(const char * v,int * pos,int * res,int flags)2466 roff_getnum(const char *v, int *pos, int *res, int flags)
2467 {
2468 int myres, scaled, n, p;
2469
2470 if (NULL == res)
2471 res = &myres;
2472
2473 p = *pos;
2474 n = v[p] == '-';
2475 if (n || v[p] == '+')
2476 p++;
2477
2478 if (flags & ROFFNUM_WHITE)
2479 while (isspace((unsigned char)v[p]))
2480 p++;
2481
2482 for (*res = 0; isdigit((unsigned char)v[p]); p++)
2483 *res = 10 * *res + v[p] - '0';
2484 if (p == *pos + n)
2485 return 0;
2486
2487 if (n)
2488 *res = -*res;
2489
2490 /* Each number may be followed by one optional scaling unit. */
2491
2492 switch (v[p]) {
2493 case 'f':
2494 scaled = *res * 65536;
2495 break;
2496 case 'i':
2497 scaled = *res * 240;
2498 break;
2499 case 'c':
2500 scaled = *res * 240 / 2.54;
2501 break;
2502 case 'v':
2503 case 'P':
2504 scaled = *res * 40;
2505 break;
2506 case 'm':
2507 case 'n':
2508 scaled = *res * 24;
2509 break;
2510 case 'p':
2511 scaled = *res * 10 / 3;
2512 break;
2513 case 'u':
2514 scaled = *res;
2515 break;
2516 case 'M':
2517 scaled = *res * 6 / 25;
2518 break;
2519 default:
2520 scaled = *res;
2521 p--;
2522 break;
2523 }
2524 if (flags & ROFFNUM_SCALE)
2525 *res = scaled;
2526
2527 *pos = p + 1;
2528 return 1;
2529 }
2530
2531 /*
2532 * Evaluate a string comparison condition.
2533 * The first character is the delimiter.
2534 * Succeed if the string up to its second occurrence
2535 * matches the string up to its third occurence.
2536 * Advance the cursor after the third occurrence
2537 * or lacking that, to the end of the line.
2538 */
2539 static int
roff_evalstrcond(const char * v,int * pos)2540 roff_evalstrcond(const char *v, int *pos)
2541 {
2542 const char *s1, *s2, *s3;
2543 int match;
2544
2545 match = 0;
2546 s1 = v + *pos; /* initial delimiter */
2547 s2 = s1 + 1; /* for scanning the first string */
2548 s3 = strchr(s2, *s1); /* for scanning the second string */
2549
2550 if (NULL == s3) /* found no middle delimiter */
2551 goto out;
2552
2553 while ('\0' != *++s3) {
2554 if (*s2 != *s3) { /* mismatch */
2555 s3 = strchr(s3, *s1);
2556 break;
2557 }
2558 if (*s3 == *s1) { /* found the final delimiter */
2559 match = 1;
2560 break;
2561 }
2562 s2++;
2563 }
2564
2565 out:
2566 if (NULL == s3)
2567 s3 = strchr(s2, '\0');
2568 else if (*s3 != '\0')
2569 s3++;
2570 *pos = s3 - v;
2571 return match;
2572 }
2573
2574 /*
2575 * Evaluate an optionally negated single character, numerical,
2576 * or string condition.
2577 */
2578 static int
roff_evalcond(struct roff * r,int ln,char * v,int * pos)2579 roff_evalcond(struct roff *r, int ln, char *v, int *pos)
2580 {
2581 const char *start, *end;
2582 char *cp, *name;
2583 size_t sz;
2584 int deftype, len, number, savepos, istrue, wanttrue;
2585
2586 if ('!' == v[*pos]) {
2587 wanttrue = 0;
2588 (*pos)++;
2589 } else
2590 wanttrue = 1;
2591
2592 switch (v[*pos]) {
2593 case '\0':
2594 return 0;
2595 case 'n':
2596 case 'o':
2597 (*pos)++;
2598 return wanttrue;
2599 case 'e':
2600 case 't':
2601 case 'v':
2602 (*pos)++;
2603 return !wanttrue;
2604 case 'c':
2605 do {
2606 (*pos)++;
2607 } while (v[*pos] == ' ');
2608
2609 /*
2610 * Quirk for groff compatibility:
2611 * The horizontal tab is neither available nor unavailable.
2612 */
2613
2614 if (v[*pos] == '\t') {
2615 (*pos)++;
2616 return 0;
2617 }
2618
2619 /* Printable ASCII characters are available. */
2620
2621 if (v[*pos] != '\\') {
2622 (*pos)++;
2623 return wanttrue;
2624 }
2625
2626 end = v + ++*pos;
2627 switch (mandoc_escape(&end, &start, &len)) {
2628 case ESCAPE_SPECIAL:
2629 istrue = mchars_spec2cp(start, len) != -1;
2630 break;
2631 case ESCAPE_UNICODE:
2632 istrue = 1;
2633 break;
2634 case ESCAPE_NUMBERED:
2635 istrue = mchars_num2char(start, len) != -1;
2636 break;
2637 default:
2638 istrue = !wanttrue;
2639 break;
2640 }
2641 *pos = end - v;
2642 return istrue == wanttrue;
2643 case 'd':
2644 case 'r':
2645 cp = v + *pos + 1;
2646 while (*cp == ' ')
2647 cp++;
2648 name = cp;
2649 sz = roff_getname(r, &cp, ln, cp - v);
2650 if (sz == 0)
2651 istrue = 0;
2652 else if (v[*pos] == 'r')
2653 istrue = roff_hasregn(r, name, sz);
2654 else {
2655 deftype = ROFFDEF_ANY;
2656 roff_getstrn(r, name, sz, &deftype);
2657 istrue = !!deftype;
2658 }
2659 *pos = (name + sz) - v;
2660 return istrue == wanttrue;
2661 default:
2662 break;
2663 }
2664
2665 savepos = *pos;
2666 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE))
2667 return (number > 0) == wanttrue;
2668 else if (*pos == savepos)
2669 return roff_evalstrcond(v, pos) == wanttrue;
2670 else
2671 return 0;
2672 }
2673
2674 static int
roff_line_ignore(ROFF_ARGS)2675 roff_line_ignore(ROFF_ARGS)
2676 {
2677
2678 return ROFF_IGN;
2679 }
2680
2681 static int
roff_insec(ROFF_ARGS)2682 roff_insec(ROFF_ARGS)
2683 {
2684
2685 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]);
2686 return ROFF_IGN;
2687 }
2688
2689 static int
roff_unsupp(ROFF_ARGS)2690 roff_unsupp(ROFF_ARGS)
2691 {
2692
2693 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]);
2694 return ROFF_IGN;
2695 }
2696
2697 static int
roff_cond(ROFF_ARGS)2698 roff_cond(ROFF_ARGS)
2699 {
2700 int irc;
2701
2702 roffnode_push(r, tok, NULL, ln, ppos);
2703
2704 /*
2705 * An `.el' has no conditional body: it will consume the value
2706 * of the current rstack entry set in prior `ie' calls or
2707 * defaults to DENY.
2708 *
2709 * If we're not an `el', however, then evaluate the conditional.
2710 */
2711
2712 r->last->rule = tok == ROFF_el ?
2713 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) :
2714 roff_evalcond(r, ln, buf->buf, &pos);
2715
2716 /*
2717 * An if-else will put the NEGATION of the current evaluated
2718 * conditional into the stack of rules.
2719 */
2720
2721 if (tok == ROFF_ie) {
2722 if (r->rstackpos + 1 == r->rstacksz) {
2723 r->rstacksz += 16;
2724 r->rstack = mandoc_reallocarray(r->rstack,
2725 r->rstacksz, sizeof(int));
2726 }
2727 r->rstack[++r->rstackpos] = !r->last->rule;
2728 }
2729
2730 /* If the parent has false as its rule, then so do we. */
2731
2732 if (r->last->parent && !r->last->parent->rule)
2733 r->last->rule = 0;
2734
2735 /*
2736 * Determine scope.
2737 * If there is nothing on the line after the conditional,
2738 * not even whitespace, use next-line scope.
2739 * Except that .while does not support next-line scope.
2740 */
2741
2742 if (buf->buf[pos] == '\0' && tok != ROFF_while) {
2743 r->last->endspan = 2;
2744 goto out;
2745 }
2746
2747 while (buf->buf[pos] == ' ')
2748 pos++;
2749
2750 /* An opening brace requests multiline scope. */
2751
2752 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') {
2753 r->last->endspan = -1;
2754 pos += 2;
2755 while (buf->buf[pos] == ' ')
2756 pos++;
2757 goto out;
2758 }
2759
2760 /*
2761 * Anything else following the conditional causes
2762 * single-line scope. Warn if the scope contains
2763 * nothing but trailing whitespace.
2764 */
2765
2766 if (buf->buf[pos] == '\0')
2767 mandoc_msg(MANDOCERR_COND_EMPTY,
2768 ln, ppos, "%s", roff_name[tok]);
2769
2770 r->last->endspan = 1;
2771
2772 out:
2773 *offs = pos;
2774 irc = ROFF_RERUN;
2775 if (tok == ROFF_while)
2776 irc |= ROFF_WHILE;
2777 return irc;
2778 }
2779
2780 static int
roff_ds(ROFF_ARGS)2781 roff_ds(ROFF_ARGS)
2782 {
2783 char *string;
2784 const char *name;
2785 size_t namesz;
2786
2787 /* Ignore groff compatibility mode for now. */
2788
2789 if (tok == ROFF_ds1)
2790 tok = ROFF_ds;
2791 else if (tok == ROFF_as1)
2792 tok = ROFF_as;
2793
2794 /*
2795 * The first word is the name of the string.
2796 * If it is empty or terminated by an escape sequence,
2797 * abort the `ds' request without defining anything.
2798 */
2799
2800 name = string = buf->buf + pos;
2801 if (*name == '\0')
2802 return ROFF_IGN;
2803
2804 namesz = roff_getname(r, &string, ln, pos);
2805 switch (name[namesz]) {
2806 case '\\':
2807 return ROFF_IGN;
2808 case '\t':
2809 string = buf->buf + pos + namesz;
2810 break;
2811 default:
2812 break;
2813 }
2814
2815 /* Read past the initial double-quote, if any. */
2816 if (*string == '"')
2817 string++;
2818
2819 /* The rest is the value. */
2820 roff_setstrn(&r->strtab, name, namesz, string, strlen(string),
2821 ROFF_as == tok);
2822 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
2823 return ROFF_IGN;
2824 }
2825
2826 /*
2827 * Parse a single operator, one or two characters long.
2828 * If the operator is recognized, return success and advance the
2829 * parse point, else return failure and let the parse point unchanged.
2830 */
2831 static int
roff_getop(const char * v,int * pos,char * res)2832 roff_getop(const char *v, int *pos, char *res)
2833 {
2834
2835 *res = v[*pos];
2836
2837 switch (*res) {
2838 case '+':
2839 case '-':
2840 case '*':
2841 case '/':
2842 case '%':
2843 case '&':
2844 case ':':
2845 break;
2846 case '<':
2847 switch (v[*pos + 1]) {
2848 case '=':
2849 *res = 'l';
2850 (*pos)++;
2851 break;
2852 case '>':
2853 *res = '!';
2854 (*pos)++;
2855 break;
2856 case '?':
2857 *res = 'i';
2858 (*pos)++;
2859 break;
2860 default:
2861 break;
2862 }
2863 break;
2864 case '>':
2865 switch (v[*pos + 1]) {
2866 case '=':
2867 *res = 'g';
2868 (*pos)++;
2869 break;
2870 case '?':
2871 *res = 'a';
2872 (*pos)++;
2873 break;
2874 default:
2875 break;
2876 }
2877 break;
2878 case '=':
2879 if ('=' == v[*pos + 1])
2880 (*pos)++;
2881 break;
2882 default:
2883 return 0;
2884 }
2885 (*pos)++;
2886
2887 return *res;
2888 }
2889
2890 /*
2891 * Evaluate either a parenthesized numeric expression
2892 * or a single signed integer number.
2893 */
2894 static int
roff_evalpar(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2895 roff_evalpar(struct roff *r, int ln,
2896 const char *v, int *pos, int *res, int flags)
2897 {
2898
2899 if ('(' != v[*pos])
2900 return roff_getnum(v, pos, res, flags);
2901
2902 (*pos)++;
2903 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE))
2904 return 0;
2905
2906 /*
2907 * Omission of the closing parenthesis
2908 * is an error in validation mode,
2909 * but ignored in evaluation mode.
2910 */
2911
2912 if (')' == v[*pos])
2913 (*pos)++;
2914 else if (NULL == res)
2915 return 0;
2916
2917 return 1;
2918 }
2919
2920 /*
2921 * Evaluate a complete numeric expression.
2922 * Proceed left to right, there is no concept of precedence.
2923 */
2924 static int
roff_evalnum(struct roff * r,int ln,const char * v,int * pos,int * res,int flags)2925 roff_evalnum(struct roff *r, int ln, const char *v,
2926 int *pos, int *res, int flags)
2927 {
2928 int mypos, operand2;
2929 char operator;
2930
2931 if (NULL == pos) {
2932 mypos = 0;
2933 pos = &mypos;
2934 }
2935
2936 if (flags & ROFFNUM_WHITE)
2937 while (isspace((unsigned char)v[*pos]))
2938 (*pos)++;
2939
2940 if ( ! roff_evalpar(r, ln, v, pos, res, flags))
2941 return 0;
2942
2943 while (1) {
2944 if (flags & ROFFNUM_WHITE)
2945 while (isspace((unsigned char)v[*pos]))
2946 (*pos)++;
2947
2948 if ( ! roff_getop(v, pos, &operator))
2949 break;
2950
2951 if (flags & ROFFNUM_WHITE)
2952 while (isspace((unsigned char)v[*pos]))
2953 (*pos)++;
2954
2955 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags))
2956 return 0;
2957
2958 if (flags & ROFFNUM_WHITE)
2959 while (isspace((unsigned char)v[*pos]))
2960 (*pos)++;
2961
2962 if (NULL == res)
2963 continue;
2964
2965 switch (operator) {
2966 case '+':
2967 *res += operand2;
2968 break;
2969 case '-':
2970 *res -= operand2;
2971 break;
2972 case '*':
2973 *res *= operand2;
2974 break;
2975 case '/':
2976 if (operand2 == 0) {
2977 mandoc_msg(MANDOCERR_DIVZERO,
2978 ln, *pos, "%s", v);
2979 *res = 0;
2980 break;
2981 }
2982 *res /= operand2;
2983 break;
2984 case '%':
2985 if (operand2 == 0) {
2986 mandoc_msg(MANDOCERR_DIVZERO,
2987 ln, *pos, "%s", v);
2988 *res = 0;
2989 break;
2990 }
2991 *res %= operand2;
2992 break;
2993 case '<':
2994 *res = *res < operand2;
2995 break;
2996 case '>':
2997 *res = *res > operand2;
2998 break;
2999 case 'l':
3000 *res = *res <= operand2;
3001 break;
3002 case 'g':
3003 *res = *res >= operand2;
3004 break;
3005 case '=':
3006 *res = *res == operand2;
3007 break;
3008 case '!':
3009 *res = *res != operand2;
3010 break;
3011 case '&':
3012 *res = *res && operand2;
3013 break;
3014 case ':':
3015 *res = *res || operand2;
3016 break;
3017 case 'i':
3018 if (operand2 < *res)
3019 *res = operand2;
3020 break;
3021 case 'a':
3022 if (operand2 > *res)
3023 *res = operand2;
3024 break;
3025 default:
3026 abort();
3027 }
3028 }
3029 return 1;
3030 }
3031
3032 /* --- register management ------------------------------------------------ */
3033
3034 void
roff_setreg(struct roff * r,const char * name,int val,char sign)3035 roff_setreg(struct roff *r, const char *name, int val, char sign)
3036 {
3037 roff_setregn(r, name, strlen(name), val, sign, INT_MIN);
3038 }
3039
3040 static void
roff_setregn(struct roff * r,const char * name,size_t len,int val,char sign,int step)3041 roff_setregn(struct roff *r, const char *name, size_t len,
3042 int val, char sign, int step)
3043 {
3044 struct roffreg *reg;
3045
3046 /* Search for an existing register with the same name. */
3047 reg = r->regtab;
3048
3049 while (reg != NULL && (reg->key.sz != len ||
3050 strncmp(reg->key.p, name, len) != 0))
3051 reg = reg->next;
3052
3053 if (NULL == reg) {
3054 /* Create a new register. */
3055 reg = mandoc_malloc(sizeof(struct roffreg));
3056 reg->key.p = mandoc_strndup(name, len);
3057 reg->key.sz = len;
3058 reg->val = 0;
3059 reg->step = 0;
3060 reg->next = r->regtab;
3061 r->regtab = reg;
3062 }
3063
3064 if ('+' == sign)
3065 reg->val += val;
3066 else if ('-' == sign)
3067 reg->val -= val;
3068 else
3069 reg->val = val;
3070 if (step != INT_MIN)
3071 reg->step = step;
3072 }
3073
3074 /*
3075 * Handle some predefined read-only number registers.
3076 * For now, return -1 if the requested register is not predefined;
3077 * in case a predefined read-only register having the value -1
3078 * were to turn up, another special value would have to be chosen.
3079 */
3080 static int
roff_getregro(const struct roff * r,const char * name)3081 roff_getregro(const struct roff *r, const char *name)
3082 {
3083
3084 switch (*name) {
3085 case '$': /* Number of arguments of the last macro evaluated. */
3086 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc;
3087 case 'A': /* ASCII approximation mode is always off. */
3088 return 0;
3089 case 'g': /* Groff compatibility mode is always on. */
3090 return 1;
3091 case 'H': /* Fixed horizontal resolution. */
3092 return 24;
3093 case 'j': /* Always adjust left margin only. */
3094 return 0;
3095 case 'T': /* Some output device is always defined. */
3096 return 1;
3097 case 'V': /* Fixed vertical resolution. */
3098 return 40;
3099 default:
3100 return -1;
3101 }
3102 }
3103
3104 int
roff_getreg(struct roff * r,const char * name)3105 roff_getreg(struct roff *r, const char *name)
3106 {
3107 return roff_getregn(r, name, strlen(name), '\0');
3108 }
3109
3110 static int
roff_getregn(struct roff * r,const char * name,size_t len,char sign)3111 roff_getregn(struct roff *r, const char *name, size_t len, char sign)
3112 {
3113 struct roffreg *reg;
3114 int val;
3115
3116 if ('.' == name[0] && 2 == len) {
3117 val = roff_getregro(r, name + 1);
3118 if (-1 != val)
3119 return val;
3120 }
3121
3122 for (reg = r->regtab; reg; reg = reg->next) {
3123 if (len == reg->key.sz &&
3124 0 == strncmp(name, reg->key.p, len)) {
3125 switch (sign) {
3126 case '+':
3127 reg->val += reg->step;
3128 break;
3129 case '-':
3130 reg->val -= reg->step;
3131 break;
3132 default:
3133 break;
3134 }
3135 return reg->val;
3136 }
3137 }
3138
3139 roff_setregn(r, name, len, 0, '\0', INT_MIN);
3140 return 0;
3141 }
3142
3143 static int
roff_hasregn(const struct roff * r,const char * name,size_t len)3144 roff_hasregn(const struct roff *r, const char *name, size_t len)
3145 {
3146 struct roffreg *reg;
3147 int val;
3148
3149 if ('.' == name[0] && 2 == len) {
3150 val = roff_getregro(r, name + 1);
3151 if (-1 != val)
3152 return 1;
3153 }
3154
3155 for (reg = r->regtab; reg; reg = reg->next)
3156 if (len == reg->key.sz &&
3157 0 == strncmp(name, reg->key.p, len))
3158 return 1;
3159
3160 return 0;
3161 }
3162
3163 static void
roff_freereg(struct roffreg * reg)3164 roff_freereg(struct roffreg *reg)
3165 {
3166 struct roffreg *old_reg;
3167
3168 while (NULL != reg) {
3169 free(reg->key.p);
3170 old_reg = reg;
3171 reg = reg->next;
3172 free(old_reg);
3173 }
3174 }
3175
3176 static int
roff_nr(ROFF_ARGS)3177 roff_nr(ROFF_ARGS)
3178 {
3179 char *key, *val, *step;
3180 size_t keysz;
3181 int iv, is, len;
3182 char sign;
3183
3184 key = val = buf->buf + pos;
3185 if (*key == '\0')
3186 return ROFF_IGN;
3187
3188 keysz = roff_getname(r, &val, ln, pos);
3189 if (key[keysz] == '\\' || key[keysz] == '\t')
3190 return ROFF_IGN;
3191
3192 sign = *val;
3193 if (sign == '+' || sign == '-')
3194 val++;
3195
3196 len = 0;
3197 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0)
3198 return ROFF_IGN;
3199
3200 step = val + len;
3201 while (isspace((unsigned char)*step))
3202 step++;
3203 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0)
3204 is = INT_MIN;
3205
3206 roff_setregn(r, key, keysz, iv, sign, is);
3207 return ROFF_IGN;
3208 }
3209
3210 static int
roff_rr(ROFF_ARGS)3211 roff_rr(ROFF_ARGS)
3212 {
3213 struct roffreg *reg, **prev;
3214 char *name, *cp;
3215 size_t namesz;
3216
3217 name = cp = buf->buf + pos;
3218 if (*name == '\0')
3219 return ROFF_IGN;
3220 namesz = roff_getname(r, &cp, ln, pos);
3221 name[namesz] = '\0';
3222
3223 prev = &r->regtab;
3224 while (1) {
3225 reg = *prev;
3226 if (reg == NULL || !strcmp(name, reg->key.p))
3227 break;
3228 prev = ®->next;
3229 }
3230 if (reg != NULL) {
3231 *prev = reg->next;
3232 free(reg->key.p);
3233 free(reg);
3234 }
3235 return ROFF_IGN;
3236 }
3237
3238 /* --- handler functions for roff requests -------------------------------- */
3239
3240 static int
roff_rm(ROFF_ARGS)3241 roff_rm(ROFF_ARGS)
3242 {
3243 const char *name;
3244 char *cp;
3245 size_t namesz;
3246
3247 cp = buf->buf + pos;
3248 while (*cp != '\0') {
3249 name = cp;
3250 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf));
3251 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0);
3252 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
3253 if (name[namesz] == '\\' || name[namesz] == '\t')
3254 break;
3255 }
3256 return ROFF_IGN;
3257 }
3258
3259 static int
roff_it(ROFF_ARGS)3260 roff_it(ROFF_ARGS)
3261 {
3262 int iv;
3263
3264 /* Parse the number of lines. */
3265
3266 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) {
3267 mandoc_msg(MANDOCERR_IT_NONUM,
3268 ln, ppos, "%s", buf->buf + 1);
3269 return ROFF_IGN;
3270 }
3271
3272 while (isspace((unsigned char)buf->buf[pos]))
3273 pos++;
3274
3275 /*
3276 * Arm the input line trap.
3277 * Special-casing "an-trap" is an ugly workaround to cope
3278 * with DocBook stupidly fiddling with man(7) internals.
3279 */
3280
3281 roffit_lines = iv;
3282 roffit_macro = mandoc_strdup(iv != 1 ||
3283 strcmp(buf->buf + pos, "an-trap") ?
3284 buf->buf + pos : "br");
3285 return ROFF_IGN;
3286 }
3287
3288 static int
roff_Dd(ROFF_ARGS)3289 roff_Dd(ROFF_ARGS)
3290 {
3291 int mask;
3292 enum roff_tok t, te;
3293
3294 switch (tok) {
3295 case ROFF_Dd:
3296 tok = MDOC_Dd;
3297 te = MDOC_MAX;
3298 if (r->format == 0)
3299 r->format = MPARSE_MDOC;
3300 mask = MPARSE_MDOC | MPARSE_QUICK;
3301 break;
3302 case ROFF_TH:
3303 tok = MAN_TH;
3304 te = MAN_MAX;
3305 if (r->format == 0)
3306 r->format = MPARSE_MAN;
3307 mask = MPARSE_QUICK;
3308 break;
3309 default:
3310 abort();
3311 }
3312 if ((r->options & mask) == 0)
3313 for (t = tok; t < te; t++)
3314 roff_setstr(r, roff_name[t], NULL, 0);
3315 return ROFF_CONT;
3316 }
3317
3318 static int
roff_TE(ROFF_ARGS)3319 roff_TE(ROFF_ARGS)
3320 {
3321 r->man->flags &= ~ROFF_NONOFILL;
3322 if (r->tbl == NULL) {
3323 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE");
3324 return ROFF_IGN;
3325 }
3326 if (tbl_end(r->tbl, 0) == 0) {
3327 r->tbl = NULL;
3328 free(buf->buf);
3329 buf->buf = mandoc_strdup(".sp");
3330 buf->sz = 4;
3331 *offs = 0;
3332 return ROFF_REPARSE;
3333 }
3334 r->tbl = NULL;
3335 return ROFF_IGN;
3336 }
3337
3338 static int
roff_T_(ROFF_ARGS)3339 roff_T_(ROFF_ARGS)
3340 {
3341
3342 if (NULL == r->tbl)
3343 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&");
3344 else
3345 tbl_restart(ln, ppos, r->tbl);
3346
3347 return ROFF_IGN;
3348 }
3349
3350 /*
3351 * Handle in-line equation delimiters.
3352 */
3353 static int
roff_eqndelim(struct roff * r,struct buf * buf,int pos)3354 roff_eqndelim(struct roff *r, struct buf *buf, int pos)
3355 {
3356 char *cp1, *cp2;
3357 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr;
3358
3359 /*
3360 * Outside equations, look for an opening delimiter.
3361 * If we are inside an equation, we already know it is
3362 * in-line, or this function wouldn't have been called;
3363 * so look for a closing delimiter.
3364 */
3365
3366 cp1 = buf->buf + pos;
3367 cp2 = strchr(cp1, r->eqn == NULL ?
3368 r->last_eqn->odelim : r->last_eqn->cdelim);
3369 if (cp2 == NULL)
3370 return ROFF_CONT;
3371
3372 *cp2++ = '\0';
3373 bef_pr = bef_nl = aft_nl = aft_pr = "";
3374
3375 /* Handle preceding text, protecting whitespace. */
3376
3377 if (*buf->buf != '\0') {
3378 if (r->eqn == NULL)
3379 bef_pr = "\\&";
3380 bef_nl = "\n";
3381 }
3382
3383 /*
3384 * Prepare replacing the delimiter with an equation macro
3385 * and drop leading white space from the equation.
3386 */
3387
3388 if (r->eqn == NULL) {
3389 while (*cp2 == ' ')
3390 cp2++;
3391 mac = ".EQ";
3392 } else
3393 mac = ".EN";
3394
3395 /* Handle following text, protecting whitespace. */
3396
3397 if (*cp2 != '\0') {
3398 aft_nl = "\n";
3399 if (r->eqn != NULL)
3400 aft_pr = "\\&";
3401 }
3402
3403 /* Do the actual replacement. */
3404
3405 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf,
3406 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1;
3407 free(buf->buf);
3408 buf->buf = cp1;
3409
3410 /* Toggle the in-line state of the eqn subsystem. */
3411
3412 r->eqn_inline = r->eqn == NULL;
3413 return ROFF_REPARSE;
3414 }
3415
3416 static int
roff_EQ(ROFF_ARGS)3417 roff_EQ(ROFF_ARGS)
3418 {
3419 struct roff_node *n;
3420
3421 if (r->man->meta.macroset == MACROSET_MAN)
3422 man_breakscope(r->man, ROFF_EQ);
3423 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE);
3424 if (ln > r->man->last->line)
3425 n->flags |= NODE_LINE;
3426 n->eqn = eqn_box_new();
3427 roff_node_append(r->man, n);
3428 r->man->next = ROFF_NEXT_SIBLING;
3429
3430 assert(r->eqn == NULL);
3431 if (r->last_eqn == NULL)
3432 r->last_eqn = eqn_alloc();
3433 else
3434 eqn_reset(r->last_eqn);
3435 r->eqn = r->last_eqn;
3436 r->eqn->node = n;
3437
3438 if (buf->buf[pos] != '\0')
3439 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3440 ".EQ %s", buf->buf + pos);
3441
3442 return ROFF_IGN;
3443 }
3444
3445 static int
roff_EN(ROFF_ARGS)3446 roff_EN(ROFF_ARGS)
3447 {
3448 if (r->eqn != NULL) {
3449 eqn_parse(r->eqn);
3450 r->eqn = NULL;
3451 } else
3452 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN");
3453 if (buf->buf[pos] != '\0')
3454 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3455 "EN %s", buf->buf + pos);
3456 return ROFF_IGN;
3457 }
3458
3459 static int
roff_TS(ROFF_ARGS)3460 roff_TS(ROFF_ARGS)
3461 {
3462 if (r->tbl != NULL) {
3463 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS");
3464 tbl_end(r->tbl, 0);
3465 }
3466 r->man->flags |= ROFF_NONOFILL;
3467 r->tbl = tbl_alloc(ppos, ln, r->last_tbl);
3468 if (r->last_tbl == NULL)
3469 r->first_tbl = r->tbl;
3470 r->last_tbl = r->tbl;
3471 return ROFF_IGN;
3472 }
3473
3474 static int
roff_noarg(ROFF_ARGS)3475 roff_noarg(ROFF_ARGS)
3476 {
3477 if (r->man->flags & (MAN_BLINE | MAN_ELINE))
3478 man_breakscope(r->man, tok);
3479 if (tok == ROFF_brp)
3480 tok = ROFF_br;
3481 roff_elem_alloc(r->man, ln, ppos, tok);
3482 if (buf->buf[pos] != '\0')
3483 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos,
3484 "%s %s", roff_name[tok], buf->buf + pos);
3485 if (tok == ROFF_nf)
3486 r->man->flags |= ROFF_NOFILL;
3487 else if (tok == ROFF_fi)
3488 r->man->flags &= ~ROFF_NOFILL;
3489 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3490 r->man->next = ROFF_NEXT_SIBLING;
3491 return ROFF_IGN;
3492 }
3493
3494 static int
roff_onearg(ROFF_ARGS)3495 roff_onearg(ROFF_ARGS)
3496 {
3497 struct roff_node *n;
3498 char *cp;
3499 int npos;
3500
3501 if (r->man->flags & (MAN_BLINE | MAN_ELINE) &&
3502 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp ||
3503 tok == ROFF_ti))
3504 man_breakscope(r->man, tok);
3505
3506 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) {
3507 r->man->last = roffce_node;
3508 r->man->next = ROFF_NEXT_SIBLING;
3509 }
3510
3511 roff_elem_alloc(r->man, ln, ppos, tok);
3512 n = r->man->last;
3513
3514 cp = buf->buf + pos;
3515 if (*cp != '\0') {
3516 while (*cp != '\0' && *cp != ' ')
3517 cp++;
3518 while (*cp == ' ')
3519 *cp++ = '\0';
3520 if (*cp != '\0')
3521 mandoc_msg(MANDOCERR_ARG_EXCESS,
3522 ln, (int)(cp - buf->buf),
3523 "%s ... %s", roff_name[tok], cp);
3524 roff_word_alloc(r->man, ln, pos, buf->buf + pos);
3525 }
3526
3527 if (tok == ROFF_ce || tok == ROFF_rj) {
3528 if (r->man->last->type == ROFFT_ELEM) {
3529 roff_word_alloc(r->man, ln, pos, "1");
3530 r->man->last->flags |= NODE_NOSRC;
3531 }
3532 npos = 0;
3533 if (roff_evalnum(r, ln, r->man->last->string, &npos,
3534 &roffce_lines, 0) == 0) {
3535 mandoc_msg(MANDOCERR_CE_NONUM,
3536 ln, pos, "ce %s", buf->buf + pos);
3537 roffce_lines = 1;
3538 }
3539 if (roffce_lines < 1) {
3540 r->man->last = r->man->last->parent;
3541 roffce_node = NULL;
3542 roffce_lines = 0;
3543 } else
3544 roffce_node = r->man->last->parent;
3545 } else {
3546 n->flags |= NODE_VALID | NODE_ENDED;
3547 r->man->last = n;
3548 }
3549 n->flags |= NODE_LINE;
3550 r->man->next = ROFF_NEXT_SIBLING;
3551 return ROFF_IGN;
3552 }
3553
3554 static int
roff_manyarg(ROFF_ARGS)3555 roff_manyarg(ROFF_ARGS)
3556 {
3557 struct roff_node *n;
3558 char *sp, *ep;
3559
3560 roff_elem_alloc(r->man, ln, ppos, tok);
3561 n = r->man->last;
3562
3563 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) {
3564 while (*ep != '\0' && *ep != ' ')
3565 ep++;
3566 while (*ep == ' ')
3567 *ep++ = '\0';
3568 roff_word_alloc(r->man, ln, sp - buf->buf, sp);
3569 }
3570
3571 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
3572 r->man->last = n;
3573 r->man->next = ROFF_NEXT_SIBLING;
3574 return ROFF_IGN;
3575 }
3576
3577 static int
roff_als(ROFF_ARGS)3578 roff_als(ROFF_ARGS)
3579 {
3580 char *oldn, *newn, *end, *value;
3581 size_t oldsz, newsz, valsz;
3582
3583 newn = oldn = buf->buf + pos;
3584 if (*newn == '\0')
3585 return ROFF_IGN;
3586
3587 newsz = roff_getname(r, &oldn, ln, pos);
3588 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0')
3589 return ROFF_IGN;
3590
3591 end = oldn;
3592 oldsz = roff_getname(r, &end, ln, oldn - buf->buf);
3593 if (oldsz == 0)
3594 return ROFF_IGN;
3595
3596 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n",
3597 (int)oldsz, oldn);
3598 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0);
3599 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3600 free(value);
3601 return ROFF_IGN;
3602 }
3603
3604 /*
3605 * The .break request only makes sense inside conditionals,
3606 * and that case is already handled in roff_cond_sub().
3607 */
3608 static int
roff_break(ROFF_ARGS)3609 roff_break(ROFF_ARGS)
3610 {
3611 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, pos, "break");
3612 return ROFF_IGN;
3613 }
3614
3615 static int
roff_cc(ROFF_ARGS)3616 roff_cc(ROFF_ARGS)
3617 {
3618 const char *p;
3619
3620 p = buf->buf + pos;
3621
3622 if (*p == '\0' || (r->control = *p++) == '.')
3623 r->control = '\0';
3624
3625 if (*p != '\0')
3626 mandoc_msg(MANDOCERR_ARG_EXCESS,
3627 ln, p - buf->buf, "cc ... %s", p);
3628
3629 return ROFF_IGN;
3630 }
3631
3632 static int
roff_char(ROFF_ARGS)3633 roff_char(ROFF_ARGS)
3634 {
3635 const char *p, *kp, *vp;
3636 size_t ksz, vsz;
3637 int font;
3638
3639 /* Parse the character to be replaced. */
3640
3641 kp = buf->buf + pos;
3642 p = kp + 1;
3643 if (*kp == '\0' || (*kp == '\\' &&
3644 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) ||
3645 (*p != ' ' && *p != '\0')) {
3646 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp);
3647 return ROFF_IGN;
3648 }
3649 ksz = p - kp;
3650 while (*p == ' ')
3651 p++;
3652
3653 /*
3654 * If the replacement string contains a font escape sequence,
3655 * we have to restore the font at the end.
3656 */
3657
3658 vp = p;
3659 vsz = strlen(p);
3660 font = 0;
3661 while (*p != '\0') {
3662 if (*p++ != '\\')
3663 continue;
3664 switch (mandoc_escape(&p, NULL, NULL)) {
3665 case ESCAPE_FONT:
3666 case ESCAPE_FONTROMAN:
3667 case ESCAPE_FONTITALIC:
3668 case ESCAPE_FONTBOLD:
3669 case ESCAPE_FONTBI:
3670 case ESCAPE_FONTCR:
3671 case ESCAPE_FONTCB:
3672 case ESCAPE_FONTCI:
3673 case ESCAPE_FONTPREV:
3674 font++;
3675 break;
3676 default:
3677 break;
3678 }
3679 }
3680 if (font > 1)
3681 mandoc_msg(MANDOCERR_CHAR_FONT,
3682 ln, (int)(vp - buf->buf), "%s", vp);
3683
3684 /*
3685 * Approximate the effect of .char using the .tr tables.
3686 * XXX In groff, .char and .tr interact differently.
3687 */
3688
3689 if (ksz == 1) {
3690 if (r->xtab == NULL)
3691 r->xtab = mandoc_calloc(128, sizeof(*r->xtab));
3692 assert((unsigned int)*kp < 128);
3693 free(r->xtab[(int)*kp].p);
3694 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p,
3695 "%s%s", vp, font ? "\fP" : "");
3696 } else {
3697 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0);
3698 if (font)
3699 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1);
3700 }
3701 return ROFF_IGN;
3702 }
3703
3704 static int
roff_ec(ROFF_ARGS)3705 roff_ec(ROFF_ARGS)
3706 {
3707 const char *p;
3708
3709 p = buf->buf + pos;
3710 if (*p == '\0')
3711 r->escape = '\\';
3712 else {
3713 r->escape = *p;
3714 if (*++p != '\0')
3715 mandoc_msg(MANDOCERR_ARG_EXCESS, ln,
3716 (int)(p - buf->buf), "ec ... %s", p);
3717 }
3718 return ROFF_IGN;
3719 }
3720
3721 static int
roff_eo(ROFF_ARGS)3722 roff_eo(ROFF_ARGS)
3723 {
3724 r->escape = '\0';
3725 if (buf->buf[pos] != '\0')
3726 mandoc_msg(MANDOCERR_ARG_SKIP,
3727 ln, pos, "eo %s", buf->buf + pos);
3728 return ROFF_IGN;
3729 }
3730
3731 static int
roff_nop(ROFF_ARGS)3732 roff_nop(ROFF_ARGS)
3733 {
3734 while (buf->buf[pos] == ' ')
3735 pos++;
3736 *offs = pos;
3737 return ROFF_RERUN;
3738 }
3739
3740 static int
roff_tr(ROFF_ARGS)3741 roff_tr(ROFF_ARGS)
3742 {
3743 const char *p, *first, *second;
3744 size_t fsz, ssz;
3745 enum mandoc_esc esc;
3746
3747 p = buf->buf + pos;
3748
3749 if (*p == '\0') {
3750 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr");
3751 return ROFF_IGN;
3752 }
3753
3754 while (*p != '\0') {
3755 fsz = ssz = 1;
3756
3757 first = p++;
3758 if (*first == '\\') {
3759 esc = mandoc_escape(&p, NULL, NULL);
3760 if (esc == ESCAPE_ERROR) {
3761 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3762 (int)(p - buf->buf), "%s", first);
3763 return ROFF_IGN;
3764 }
3765 fsz = (size_t)(p - first);
3766 }
3767
3768 second = p++;
3769 if (*second == '\\') {
3770 esc = mandoc_escape(&p, NULL, NULL);
3771 if (esc == ESCAPE_ERROR) {
3772 mandoc_msg(MANDOCERR_ESC_BAD, ln,
3773 (int)(p - buf->buf), "%s", second);
3774 return ROFF_IGN;
3775 }
3776 ssz = (size_t)(p - second);
3777 } else if (*second == '\0') {
3778 mandoc_msg(MANDOCERR_TR_ODD, ln,
3779 (int)(first - buf->buf), "tr %s", first);
3780 second = " ";
3781 p--;
3782 }
3783
3784 if (fsz > 1) {
3785 roff_setstrn(&r->xmbtab, first, fsz,
3786 second, ssz, 0);
3787 continue;
3788 }
3789
3790 if (r->xtab == NULL)
3791 r->xtab = mandoc_calloc(128,
3792 sizeof(struct roffstr));
3793
3794 free(r->xtab[(int)*first].p);
3795 r->xtab[(int)*first].p = mandoc_strndup(second, ssz);
3796 r->xtab[(int)*first].sz = ssz;
3797 }
3798
3799 return ROFF_IGN;
3800 }
3801
3802 /*
3803 * Implementation of the .return request.
3804 * There is no need to call roff_userret() from here.
3805 * The read module will call that after rewinding the reader stack
3806 * to the place from where the current macro was called.
3807 */
3808 static int
roff_return(ROFF_ARGS)3809 roff_return(ROFF_ARGS)
3810 {
3811 if (r->mstackpos >= 0)
3812 return ROFF_IGN | ROFF_USERRET;
3813
3814 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return");
3815 return ROFF_IGN;
3816 }
3817
3818 static int
roff_rn(ROFF_ARGS)3819 roff_rn(ROFF_ARGS)
3820 {
3821 const char *value;
3822 char *oldn, *newn, *end;
3823 size_t oldsz, newsz;
3824 int deftype;
3825
3826 oldn = newn = buf->buf + pos;
3827 if (*oldn == '\0')
3828 return ROFF_IGN;
3829
3830 oldsz = roff_getname(r, &newn, ln, pos);
3831 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0')
3832 return ROFF_IGN;
3833
3834 end = newn;
3835 newsz = roff_getname(r, &end, ln, newn - buf->buf);
3836 if (newsz == 0)
3837 return ROFF_IGN;
3838
3839 deftype = ROFFDEF_ANY;
3840 value = roff_getstrn(r, oldn, oldsz, &deftype);
3841 switch (deftype) {
3842 case ROFFDEF_USER:
3843 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3844 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0);
3845 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3846 break;
3847 case ROFFDEF_PRE:
3848 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0);
3849 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3850 break;
3851 case ROFFDEF_REN:
3852 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0);
3853 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0);
3854 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3855 break;
3856 case ROFFDEF_STD:
3857 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0);
3858 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3859 break;
3860 default:
3861 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0);
3862 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0);
3863 break;
3864 }
3865 return ROFF_IGN;
3866 }
3867
3868 static int
roff_shift(ROFF_ARGS)3869 roff_shift(ROFF_ARGS)
3870 {
3871 struct mctx *ctx;
3872 int levels, i;
3873
3874 levels = 1;
3875 if (buf->buf[pos] != '\0' &&
3876 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
3877 mandoc_msg(MANDOCERR_CE_NONUM,
3878 ln, pos, "shift %s", buf->buf + pos);
3879 levels = 1;
3880 }
3881 if (r->mstackpos < 0) {
3882 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift");
3883 return ROFF_IGN;
3884 }
3885 ctx = r->mstack + r->mstackpos;
3886 if (levels > ctx->argc) {
3887 mandoc_msg(MANDOCERR_SHIFT,
3888 ln, pos, "%d, but max is %d", levels, ctx->argc);
3889 levels = ctx->argc;
3890 }
3891 if (levels == 0)
3892 return ROFF_IGN;
3893 for (i = 0; i < levels; i++)
3894 free(ctx->argv[i]);
3895 ctx->argc -= levels;
3896 for (i = 0; i < ctx->argc; i++)
3897 ctx->argv[i] = ctx->argv[i + levels];
3898 return ROFF_IGN;
3899 }
3900
3901 static int
roff_so(ROFF_ARGS)3902 roff_so(ROFF_ARGS)
3903 {
3904 char *name, *cp;
3905
3906 name = buf->buf + pos;
3907 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name);
3908
3909 /*
3910 * Handle `so'. Be EXTREMELY careful, as we shouldn't be
3911 * opening anything that's not in our cwd or anything beneath
3912 * it. Thus, explicitly disallow traversing up the file-system
3913 * or using absolute paths.
3914 */
3915
3916 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) {
3917 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name);
3918 buf->sz = mandoc_asprintf(&cp,
3919 ".sp\nSee the file %s.\n.sp", name) + 1;
3920 free(buf->buf);
3921 buf->buf = cp;
3922 *offs = 0;
3923 return ROFF_REPARSE;
3924 }
3925
3926 *offs = pos;
3927 return ROFF_SO;
3928 }
3929
3930 /* --- user defined strings and macros ------------------------------------ */
3931
3932 static int
roff_userdef(ROFF_ARGS)3933 roff_userdef(ROFF_ARGS)
3934 {
3935 struct mctx *ctx;
3936 char *arg, *ap, *dst, *src;
3937 size_t sz;
3938
3939 /* If the macro is empty, ignore it altogether. */
3940
3941 if (*r->current_string == '\0')
3942 return ROFF_IGN;
3943
3944 /* Initialize a new macro stack context. */
3945
3946 if (++r->mstackpos == r->mstacksz) {
3947 r->mstack = mandoc_recallocarray(r->mstack,
3948 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack));
3949 r->mstacksz += 8;
3950 }
3951 ctx = r->mstack + r->mstackpos;
3952 ctx->argsz = 0;
3953 ctx->argc = 0;
3954 ctx->argv = NULL;
3955
3956 /*
3957 * Collect pointers to macro argument strings,
3958 * NUL-terminating them and escaping quotes.
3959 */
3960
3961 src = buf->buf + pos;
3962 while (*src != '\0') {
3963 if (ctx->argc == ctx->argsz) {
3964 ctx->argsz += 8;
3965 ctx->argv = mandoc_reallocarray(ctx->argv,
3966 ctx->argsz, sizeof(*ctx->argv));
3967 }
3968 arg = roff_getarg(r, &src, ln, &pos);
3969 sz = 1; /* For the terminating NUL. */
3970 for (ap = arg; *ap != '\0'; ap++)
3971 sz += *ap == '"' ? 4 : 1;
3972 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz);
3973 for (ap = arg; *ap != '\0'; ap++) {
3974 if (*ap == '"') {
3975 memcpy(dst, "\\(dq", 4);
3976 dst += 4;
3977 } else
3978 *dst++ = *ap;
3979 }
3980 *dst = '\0';
3981 free(arg);
3982 }
3983
3984 /* Replace the macro invocation by the macro definition. */
3985
3986 free(buf->buf);
3987 buf->buf = mandoc_strdup(r->current_string);
3988 buf->sz = strlen(buf->buf) + 1;
3989 *offs = 0;
3990
3991 return buf->buf[buf->sz - 2] == '\n' ?
3992 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND;
3993 }
3994
3995 /*
3996 * Calling a high-level macro that was renamed with .rn.
3997 * r->current_string has already been set up by roff_parse().
3998 */
3999 static int
roff_renamed(ROFF_ARGS)4000 roff_renamed(ROFF_ARGS)
4001 {
4002 char *nbuf;
4003
4004 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string,
4005 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1;
4006 free(buf->buf);
4007 buf->buf = nbuf;
4008 *offs = 0;
4009 return ROFF_CONT;
4010 }
4011
4012 /*
4013 * Measure the length in bytes of the roff identifier at *cpp
4014 * and advance the pointer to the next word.
4015 */
4016 static size_t
roff_getname(struct roff * r,char ** cpp,int ln,int pos)4017 roff_getname(struct roff *r, char **cpp, int ln, int pos)
4018 {
4019 char *name, *cp;
4020 size_t namesz;
4021
4022 name = *cpp;
4023 if (*name == '\0')
4024 return 0;
4025
4026 /* Advance cp to the byte after the end of the name. */
4027
4028 for (cp = name; 1; cp++) {
4029 namesz = cp - name;
4030 if (*cp == '\0')
4031 break;
4032 if (*cp == ' ' || *cp == '\t') {
4033 cp++;
4034 break;
4035 }
4036 if (*cp != '\\')
4037 continue;
4038 if (cp[1] == '{' || cp[1] == '}')
4039 break;
4040 if (*++cp == '\\')
4041 continue;
4042 mandoc_msg(MANDOCERR_NAMESC, ln, pos,
4043 "%.*s", (int)(cp - name + 1), name);
4044 mandoc_escape((const char **)&cp, NULL, NULL);
4045 break;
4046 }
4047
4048 /* Read past spaces. */
4049
4050 while (*cp == ' ')
4051 cp++;
4052
4053 *cpp = cp;
4054 return namesz;
4055 }
4056
4057 /*
4058 * Store *string into the user-defined string called *name.
4059 * To clear an existing entry, call with (*r, *name, NULL, 0).
4060 * append == 0: replace mode
4061 * append == 1: single-line append mode
4062 * append == 2: multiline append mode, append '\n' after each call
4063 */
4064 static void
roff_setstr(struct roff * r,const char * name,const char * string,int append)4065 roff_setstr(struct roff *r, const char *name, const char *string,
4066 int append)
4067 {
4068 size_t namesz;
4069
4070 namesz = strlen(name);
4071 roff_setstrn(&r->strtab, name, namesz, string,
4072 string ? strlen(string) : 0, append);
4073 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0);
4074 }
4075
4076 static void
roff_setstrn(struct roffkv ** r,const char * name,size_t namesz,const char * string,size_t stringsz,int append)4077 roff_setstrn(struct roffkv **r, const char *name, size_t namesz,
4078 const char *string, size_t stringsz, int append)
4079 {
4080 struct roffkv *n;
4081 char *c;
4082 int i;
4083 size_t oldch, newch;
4084
4085 /* Search for an existing string with the same name. */
4086 n = *r;
4087
4088 while (n && (namesz != n->key.sz ||
4089 strncmp(n->key.p, name, namesz)))
4090 n = n->next;
4091
4092 if (NULL == n) {
4093 /* Create a new string table entry. */
4094 n = mandoc_malloc(sizeof(struct roffkv));
4095 n->key.p = mandoc_strndup(name, namesz);
4096 n->key.sz = namesz;
4097 n->val.p = NULL;
4098 n->val.sz = 0;
4099 n->next = *r;
4100 *r = n;
4101 } else if (0 == append) {
4102 free(n->val.p);
4103 n->val.p = NULL;
4104 n->val.sz = 0;
4105 }
4106
4107 if (NULL == string)
4108 return;
4109
4110 /*
4111 * One additional byte for the '\n' in multiline mode,
4112 * and one for the terminating '\0'.
4113 */
4114 newch = stringsz + (1 < append ? 2u : 1u);
4115
4116 if (NULL == n->val.p) {
4117 n->val.p = mandoc_malloc(newch);
4118 *n->val.p = '\0';
4119 oldch = 0;
4120 } else {
4121 oldch = n->val.sz;
4122 n->val.p = mandoc_realloc(n->val.p, oldch + newch);
4123 }
4124
4125 /* Skip existing content in the destination buffer. */
4126 c = n->val.p + (int)oldch;
4127
4128 /* Append new content to the destination buffer. */
4129 i = 0;
4130 while (i < (int)stringsz) {
4131 /*
4132 * Rudimentary roff copy mode:
4133 * Handle escaped backslashes.
4134 */
4135 if ('\\' == string[i] && '\\' == string[i + 1])
4136 i++;
4137 *c++ = string[i++];
4138 }
4139
4140 /* Append terminating bytes. */
4141 if (1 < append)
4142 *c++ = '\n';
4143
4144 *c = '\0';
4145 n->val.sz = (int)(c - n->val.p);
4146 }
4147
4148 static const char *
roff_getstrn(struct roff * r,const char * name,size_t len,int * deftype)4149 roff_getstrn(struct roff *r, const char *name, size_t len,
4150 int *deftype)
4151 {
4152 const struct roffkv *n;
4153 int found, i;
4154 enum roff_tok tok;
4155
4156 found = 0;
4157 for (n = r->strtab; n != NULL; n = n->next) {
4158 if (strncmp(name, n->key.p, len) != 0 ||
4159 n->key.p[len] != '\0' || n->val.p == NULL)
4160 continue;
4161 if (*deftype & ROFFDEF_USER) {
4162 *deftype = ROFFDEF_USER;
4163 return n->val.p;
4164 } else {
4165 found = 1;
4166 break;
4167 }
4168 }
4169 for (n = r->rentab; n != NULL; n = n->next) {
4170 if (strncmp(name, n->key.p, len) != 0 ||
4171 n->key.p[len] != '\0' || n->val.p == NULL)
4172 continue;
4173 if (*deftype & ROFFDEF_REN) {
4174 *deftype = ROFFDEF_REN;
4175 return n->val.p;
4176 } else {
4177 found = 1;
4178 break;
4179 }
4180 }
4181 for (i = 0; i < PREDEFS_MAX; i++) {
4182 if (strncmp(name, predefs[i].name, len) != 0 ||
4183 predefs[i].name[len] != '\0')
4184 continue;
4185 if (*deftype & ROFFDEF_PRE) {
4186 *deftype = ROFFDEF_PRE;
4187 return predefs[i].str;
4188 } else {
4189 found = 1;
4190 break;
4191 }
4192 }
4193 if (r->man->meta.macroset != MACROSET_MAN) {
4194 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) {
4195 if (strncmp(name, roff_name[tok], len) != 0 ||
4196 roff_name[tok][len] != '\0')
4197 continue;
4198 if (*deftype & ROFFDEF_STD) {
4199 *deftype = ROFFDEF_STD;
4200 return NULL;
4201 } else {
4202 found = 1;
4203 break;
4204 }
4205 }
4206 }
4207 if (r->man->meta.macroset != MACROSET_MDOC) {
4208 for (tok = MAN_TH; tok < MAN_MAX; tok++) {
4209 if (strncmp(name, roff_name[tok], len) != 0 ||
4210 roff_name[tok][len] != '\0')
4211 continue;
4212 if (*deftype & ROFFDEF_STD) {
4213 *deftype = ROFFDEF_STD;
4214 return NULL;
4215 } else {
4216 found = 1;
4217 break;
4218 }
4219 }
4220 }
4221
4222 if (found == 0 && *deftype != ROFFDEF_ANY) {
4223 if (*deftype & ROFFDEF_REN) {
4224 /*
4225 * This might still be a request,
4226 * so do not treat it as undefined yet.
4227 */
4228 *deftype = ROFFDEF_UNDEF;
4229 return NULL;
4230 }
4231
4232 /* Using an undefined string defines it to be empty. */
4233
4234 roff_setstrn(&r->strtab, name, len, "", 0, 0);
4235 roff_setstrn(&r->rentab, name, len, NULL, 0, 0);
4236 }
4237
4238 *deftype = 0;
4239 return NULL;
4240 }
4241
4242 static void
roff_freestr(struct roffkv * r)4243 roff_freestr(struct roffkv *r)
4244 {
4245 struct roffkv *n, *nn;
4246
4247 for (n = r; n; n = nn) {
4248 free(n->key.p);
4249 free(n->val.p);
4250 nn = n->next;
4251 free(n);
4252 }
4253 }
4254
4255 /* --- accessors and utility functions ------------------------------------ */
4256
4257 /*
4258 * Duplicate an input string, making the appropriate character
4259 * conversations (as stipulated by `tr') along the way.
4260 * Returns a heap-allocated string with all the replacements made.
4261 */
4262 char *
roff_strdup(const struct roff * r,const char * p)4263 roff_strdup(const struct roff *r, const char *p)
4264 {
4265 const struct roffkv *cp;
4266 char *res;
4267 const char *pp;
4268 size_t ssz, sz;
4269 enum mandoc_esc esc;
4270
4271 if (NULL == r->xmbtab && NULL == r->xtab)
4272 return mandoc_strdup(p);
4273 else if ('\0' == *p)
4274 return mandoc_strdup("");
4275
4276 /*
4277 * Step through each character looking for term matches
4278 * (remember that a `tr' can be invoked with an escape, which is
4279 * a glyph but the escape is multi-character).
4280 * We only do this if the character hash has been initialised
4281 * and the string is >0 length.
4282 */
4283
4284 res = NULL;
4285 ssz = 0;
4286
4287 while ('\0' != *p) {
4288 assert((unsigned int)*p < 128);
4289 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) {
4290 sz = r->xtab[(int)*p].sz;
4291 res = mandoc_realloc(res, ssz + sz + 1);
4292 memcpy(res + ssz, r->xtab[(int)*p].p, sz);
4293 ssz += sz;
4294 p++;
4295 continue;
4296 } else if ('\\' != *p) {
4297 res = mandoc_realloc(res, ssz + 2);
4298 res[ssz++] = *p++;
4299 continue;
4300 }
4301
4302 /* Search for term matches. */
4303 for (cp = r->xmbtab; cp; cp = cp->next)
4304 if (0 == strncmp(p, cp->key.p, cp->key.sz))
4305 break;
4306
4307 if (NULL != cp) {
4308 /*
4309 * A match has been found.
4310 * Append the match to the array and move
4311 * forward by its keysize.
4312 */
4313 res = mandoc_realloc(res,
4314 ssz + cp->val.sz + 1);
4315 memcpy(res + ssz, cp->val.p, cp->val.sz);
4316 ssz += cp->val.sz;
4317 p += (int)cp->key.sz;
4318 continue;
4319 }
4320
4321 /*
4322 * Handle escapes carefully: we need to copy
4323 * over just the escape itself, or else we might
4324 * do replacements within the escape itself.
4325 * Make sure to pass along the bogus string.
4326 */
4327 pp = p++;
4328 esc = mandoc_escape(&p, NULL, NULL);
4329 if (ESCAPE_ERROR == esc) {
4330 sz = strlen(pp);
4331 res = mandoc_realloc(res, ssz + sz + 1);
4332 memcpy(res + ssz, pp, sz);
4333 break;
4334 }
4335 /*
4336 * We bail out on bad escapes.
4337 * No need to warn: we already did so when
4338 * roff_expand() was called.
4339 */
4340 sz = (int)(p - pp);
4341 res = mandoc_realloc(res, ssz + sz + 1);
4342 memcpy(res + ssz, pp, sz);
4343 ssz += sz;
4344 }
4345
4346 res[(int)ssz] = '\0';
4347 return res;
4348 }
4349
4350 int
roff_getformat(const struct roff * r)4351 roff_getformat(const struct roff *r)
4352 {
4353
4354 return r->format;
4355 }
4356
4357 /*
4358 * Find out whether a line is a macro line or not.
4359 * If it is, adjust the current position and return one; if it isn't,
4360 * return zero and don't change the current position.
4361 * If the control character has been set with `.cc', then let that grain
4362 * precedence.
4363 * This is slighly contrary to groff, where using the non-breaking
4364 * control character when `cc' has been invoked will cause the
4365 * non-breaking macro contents to be printed verbatim.
4366 */
4367 int
roff_getcontrol(const struct roff * r,const char * cp,int * ppos)4368 roff_getcontrol(const struct roff *r, const char *cp, int *ppos)
4369 {
4370 int pos;
4371
4372 pos = *ppos;
4373
4374 if (r->control != '\0' && cp[pos] == r->control)
4375 pos++;
4376 else if (r->control != '\0')
4377 return 0;
4378 else if ('\\' == cp[pos] && '.' == cp[pos + 1])
4379 pos += 2;
4380 else if ('.' == cp[pos] || '\'' == cp[pos])
4381 pos++;
4382 else
4383 return 0;
4384
4385 while (' ' == cp[pos] || '\t' == cp[pos])
4386 pos++;
4387
4388 *ppos = pos;
4389 return 1;
4390 }
4391