1 /* $Id: roff.c,v 1.363 2019/02/06 21:11:43 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc_ohash.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mandoc_parse.h" 36 #include "libmandoc.h" 37 #include "roff_int.h" 38 #include "tbl_parse.h" 39 #include "eqn_parse.h" 40 41 /* 42 * ASCII_ESC is used to signal from roff_getarg() to roff_expand() 43 * that an escape sequence resulted from copy-in processing and 44 * needs to be checked or interpolated. As it is used nowhere 45 * else, it is defined here rather than in a header file. 46 */ 47 #define ASCII_ESC 27 48 49 /* Maximum number of string expansions per line, to break infinite loops. */ 50 #define EXPAND_LIMIT 1000 51 52 /* Types of definitions of macros and strings. */ 53 #define ROFFDEF_USER (1 << 1) /* User-defined. */ 54 #define ROFFDEF_PRE (1 << 2) /* Predefined. */ 55 #define ROFFDEF_REN (1 << 3) /* Renamed standard macro. */ 56 #define ROFFDEF_STD (1 << 4) /* mdoc(7) or man(7) macro. */ 57 #define ROFFDEF_ANY (ROFFDEF_USER | ROFFDEF_PRE | \ 58 ROFFDEF_REN | ROFFDEF_STD) 59 #define ROFFDEF_UNDEF (1 << 5) /* Completely undefined. */ 60 61 /* --- data types --------------------------------------------------------- */ 62 63 /* 64 * An incredibly-simple string buffer. 65 */ 66 struct roffstr { 67 char *p; /* nil-terminated buffer */ 68 size_t sz; /* saved strlen(p) */ 69 }; 70 71 /* 72 * A key-value roffstr pair as part of a singly-linked list. 73 */ 74 struct roffkv { 75 struct roffstr key; 76 struct roffstr val; 77 struct roffkv *next; /* next in list */ 78 }; 79 80 /* 81 * A single number register as part of a singly-linked list. 82 */ 83 struct roffreg { 84 struct roffstr key; 85 int val; 86 int step; 87 struct roffreg *next; 88 }; 89 90 /* 91 * Association of request and macro names with token IDs. 92 */ 93 struct roffreq { 94 enum roff_tok tok; 95 char name[]; 96 }; 97 98 /* 99 * A macro processing context. 100 * More than one is needed when macro calls are nested. 101 */ 102 struct mctx { 103 char **argv; 104 int argc; 105 int argsz; 106 }; 107 108 struct roff { 109 struct roff_man *man; /* mdoc or man parser */ 110 struct roffnode *last; /* leaf of stack */ 111 struct mctx *mstack; /* stack of macro contexts */ 112 int *rstack; /* stack of inverted `ie' values */ 113 struct ohash *reqtab; /* request lookup table */ 114 struct roffreg *regtab; /* number registers */ 115 struct roffkv *strtab; /* user-defined strings & macros */ 116 struct roffkv *rentab; /* renamed strings & macros */ 117 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 118 struct roffstr *xtab; /* single-byte trans table (`tr') */ 119 const char *current_string; /* value of last called user macro */ 120 struct tbl_node *first_tbl; /* first table parsed */ 121 struct tbl_node *last_tbl; /* last table parsed */ 122 struct tbl_node *tbl; /* current table being parsed */ 123 struct eqn_node *last_eqn; /* equation parser */ 124 struct eqn_node *eqn; /* active equation parser */ 125 int eqn_inline; /* current equation is inline */ 126 int options; /* parse options */ 127 int mstacksz; /* current size of mstack */ 128 int mstackpos; /* position in mstack */ 129 int rstacksz; /* current size limit of rstack */ 130 int rstackpos; /* position in rstack */ 131 int format; /* current file in mdoc or man format */ 132 char control; /* control character */ 133 char escape; /* escape character */ 134 }; 135 136 struct roffnode { 137 enum roff_tok tok; /* type of node */ 138 struct roffnode *parent; /* up one in stack */ 139 int line; /* parse line */ 140 int col; /* parse col */ 141 char *name; /* node name, e.g. macro name */ 142 char *end; /* end-rules: custom token */ 143 int endspan; /* end-rules: next-line or infty */ 144 int rule; /* current evaluation rule */ 145 }; 146 147 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 148 enum roff_tok tok, /* tok of macro */ \ 149 struct buf *buf, /* input buffer */ \ 150 int ln, /* parse line */ \ 151 int ppos, /* original pos in buffer */ \ 152 int pos, /* current pos in buffer */ \ 153 int *offs /* reset offset of buffer data */ 154 155 typedef int (*roffproc)(ROFF_ARGS); 156 157 struct roffmac { 158 roffproc proc; /* process new macro */ 159 roffproc text; /* process as child text of macro */ 160 roffproc sub; /* process as child of macro */ 161 int flags; 162 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 163 }; 164 165 struct predef { 166 const char *name; /* predefined input name */ 167 const char *str; /* replacement symbol */ 168 }; 169 170 #define PREDEF(__name, __str) \ 171 { (__name), (__str) }, 172 173 /* --- function prototypes ------------------------------------------------ */ 174 175 static int roffnode_cleanscope(struct roff *); 176 static int roffnode_pop(struct roff *); 177 static void roffnode_push(struct roff *, enum roff_tok, 178 const char *, int, int); 179 static void roff_addtbl(struct roff_man *, int, struct tbl_node *); 180 static int roff_als(ROFF_ARGS); 181 static int roff_block(ROFF_ARGS); 182 static int roff_block_text(ROFF_ARGS); 183 static int roff_block_sub(ROFF_ARGS); 184 static int roff_cblock(ROFF_ARGS); 185 static int roff_cc(ROFF_ARGS); 186 static int roff_ccond(struct roff *, int, int); 187 static int roff_char(ROFF_ARGS); 188 static int roff_cond(ROFF_ARGS); 189 static int roff_cond_text(ROFF_ARGS); 190 static int roff_cond_sub(ROFF_ARGS); 191 static int roff_ds(ROFF_ARGS); 192 static int roff_ec(ROFF_ARGS); 193 static int roff_eo(ROFF_ARGS); 194 static int roff_eqndelim(struct roff *, struct buf *, int); 195 static int roff_evalcond(struct roff *r, int, char *, int *); 196 static int roff_evalnum(struct roff *, int, 197 const char *, int *, int *, int); 198 static int roff_evalpar(struct roff *, int, 199 const char *, int *, int *, int); 200 static int roff_evalstrcond(const char *, int *); 201 static int roff_expand(struct roff *, struct buf *, 202 int, int, char); 203 static void roff_free1(struct roff *); 204 static void roff_freereg(struct roffreg *); 205 static void roff_freestr(struct roffkv *); 206 static size_t roff_getname(struct roff *, char **, int, int); 207 static int roff_getnum(const char *, int *, int *, int); 208 static int roff_getop(const char *, int *, char *); 209 static int roff_getregn(struct roff *, 210 const char *, size_t, char); 211 static int roff_getregro(const struct roff *, 212 const char *name); 213 static const char *roff_getstrn(struct roff *, 214 const char *, size_t, int *); 215 static int roff_hasregn(const struct roff *, 216 const char *, size_t); 217 static int roff_insec(ROFF_ARGS); 218 static int roff_it(ROFF_ARGS); 219 static int roff_line_ignore(ROFF_ARGS); 220 static void roff_man_alloc1(struct roff_man *); 221 static void roff_man_free1(struct roff_man *); 222 static int roff_manyarg(ROFF_ARGS); 223 static int roff_noarg(ROFF_ARGS); 224 static int roff_nop(ROFF_ARGS); 225 static int roff_nr(ROFF_ARGS); 226 static int roff_onearg(ROFF_ARGS); 227 static enum roff_tok roff_parse(struct roff *, char *, int *, 228 int, int); 229 static int roff_parsetext(struct roff *, struct buf *, 230 int, int *); 231 static int roff_renamed(ROFF_ARGS); 232 static int roff_return(ROFF_ARGS); 233 static int roff_rm(ROFF_ARGS); 234 static int roff_rn(ROFF_ARGS); 235 static int roff_rr(ROFF_ARGS); 236 static void roff_setregn(struct roff *, const char *, 237 size_t, int, char, int); 238 static void roff_setstr(struct roff *, 239 const char *, const char *, int); 240 static void roff_setstrn(struct roffkv **, const char *, 241 size_t, const char *, size_t, int); 242 static int roff_shift(ROFF_ARGS); 243 static int roff_so(ROFF_ARGS); 244 static int roff_tr(ROFF_ARGS); 245 static int roff_Dd(ROFF_ARGS); 246 static int roff_TE(ROFF_ARGS); 247 static int roff_TS(ROFF_ARGS); 248 static int roff_EQ(ROFF_ARGS); 249 static int roff_EN(ROFF_ARGS); 250 static int roff_T_(ROFF_ARGS); 251 static int roff_unsupp(ROFF_ARGS); 252 static int roff_userdef(ROFF_ARGS); 253 254 /* --- constant data ------------------------------------------------------ */ 255 256 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 257 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 258 259 const char *__roff_name[MAN_MAX + 1] = { 260 "br", "ce", "fi", "ft", 261 "ll", "mc", "nf", 262 "po", "rj", "sp", 263 "ta", "ti", NULL, 264 "ab", "ad", "af", "aln", 265 "als", "am", "am1", "ami", 266 "ami1", "as", "as1", "asciify", 267 "backtrace", "bd", "bleedat", "blm", 268 "box", "boxa", "bp", "BP", 269 "break", "breakchar", "brnl", "brp", 270 "brpnl", "c2", "cc", 271 "cf", "cflags", "ch", "char", 272 "chop", "class", "close", "CL", 273 "color", "composite", "continue", "cp", 274 "cropat", "cs", "cu", "da", 275 "dch", "Dd", "de", "de1", 276 "defcolor", "dei", "dei1", "device", 277 "devicem", "di", "do", "ds", 278 "ds1", "dwh", "dt", "ec", 279 "ecr", "ecs", "el", "em", 280 "EN", "eo", "EP", "EQ", 281 "errprint", "ev", "evc", "ex", 282 "fallback", "fam", "fc", "fchar", 283 "fcolor", "fdeferlig", "feature", "fkern", 284 "fl", "flig", "fp", "fps", 285 "fschar", "fspacewidth", "fspecial", "ftr", 286 "fzoom", "gcolor", "hc", "hcode", 287 "hidechar", "hla", "hlm", "hpf", 288 "hpfa", "hpfcode", "hw", "hy", 289 "hylang", "hylen", "hym", "hypp", 290 "hys", "ie", "if", "ig", 291 "index", "it", "itc", "IX", 292 "kern", "kernafter", "kernbefore", "kernpair", 293 "lc", "lc_ctype", "lds", "length", 294 "letadj", "lf", "lg", "lhang", 295 "linetabs", "lnr", "lnrf", "lpfx", 296 "ls", "lsm", "lt", 297 "mediasize", "minss", "mk", "mso", 298 "na", "ne", "nh", "nhychar", 299 "nm", "nn", "nop", "nr", 300 "nrf", "nroff", "ns", "nx", 301 "open", "opena", "os", "output", 302 "padj", "papersize", "pc", "pev", 303 "pi", "PI", "pl", "pm", 304 "pn", "pnr", "ps", 305 "psbb", "pshape", "pso", "ptr", 306 "pvs", "rchar", "rd", "recursionlimit", 307 "return", "rfschar", "rhang", 308 "rm", "rn", "rnn", "rr", 309 "rs", "rt", "schar", "sentchar", 310 "shc", "shift", "sizes", "so", 311 "spacewidth", "special", "spreadwarn", "ss", 312 "sty", "substring", "sv", "sy", 313 "T&", "tc", "TE", 314 "TH", "tkf", "tl", 315 "tm", "tm1", "tmc", "tr", 316 "track", "transchar", "trf", "trimat", 317 "trin", "trnt", "troff", "TS", 318 "uf", "ul", "unformat", "unwatch", 319 "unwatchn", "vpt", "vs", "warn", 320 "warnscale", "watch", "watchlength", "watchn", 321 "wh", "while", "write", "writec", 322 "writem", "xflag", ".", NULL, 323 NULL, "text", 324 "Dd", "Dt", "Os", "Sh", 325 "Ss", "Pp", "D1", "Dl", 326 "Bd", "Ed", "Bl", "El", 327 "It", "Ad", "An", "Ap", 328 "Ar", "Cd", "Cm", "Dv", 329 "Er", "Ev", "Ex", "Fa", 330 "Fd", "Fl", "Fn", "Ft", 331 "Ic", "In", "Li", "Nd", 332 "Nm", "Op", "Ot", "Pa", 333 "Rv", "St", "Va", "Vt", 334 "Xr", "%A", "%B", "%D", 335 "%I", "%J", "%N", "%O", 336 "%P", "%R", "%T", "%V", 337 "Ac", "Ao", "Aq", "At", 338 "Bc", "Bf", "Bo", "Bq", 339 "Bsx", "Bx", "Db", "Dc", 340 "Do", "Dq", "Ec", "Ef", 341 "Em", "Eo", "Fx", "Ms", 342 "No", "Ns", "Nx", "Ox", 343 "Pc", "Pf", "Po", "Pq", 344 "Qc", "Ql", "Qo", "Qq", 345 "Re", "Rs", "Sc", "So", 346 "Sq", "Sm", "Sx", "Sy", 347 "Tn", "Ux", "Xc", "Xo", 348 "Fo", "Fc", "Oo", "Oc", 349 "Bk", "Ek", "Bt", "Hf", 350 "Fr", "Ud", "Lb", "Lp", 351 "Lk", "Mt", "Brq", "Bro", 352 "Brc", "%C", "Es", "En", 353 "Dx", "%Q", "%U", "Ta", 354 NULL, 355 "TH", "SH", "SS", "TP", 356 "TQ", 357 "LP", "PP", "P", "IP", 358 "HP", "SM", "SB", "BI", 359 "IB", "BR", "RB", "R", 360 "B", "I", "IR", "RI", 361 "RE", "RS", "DT", "UC", 362 "PD", "AT", "in", 363 "SY", "YS", "OP", 364 "EX", "EE", "UR", 365 "UE", "MT", "ME", NULL 366 }; 367 const char *const *roff_name = __roff_name; 368 369 static struct roffmac roffs[TOKEN_NONE] = { 370 { roff_noarg, NULL, NULL, 0 }, /* br */ 371 { roff_onearg, NULL, NULL, 0 }, /* ce */ 372 { roff_noarg, NULL, NULL, 0 }, /* fi */ 373 { roff_onearg, NULL, NULL, 0 }, /* ft */ 374 { roff_onearg, NULL, NULL, 0 }, /* ll */ 375 { roff_onearg, NULL, NULL, 0 }, /* mc */ 376 { roff_noarg, NULL, NULL, 0 }, /* nf */ 377 { roff_onearg, NULL, NULL, 0 }, /* po */ 378 { roff_onearg, NULL, NULL, 0 }, /* rj */ 379 { roff_onearg, NULL, NULL, 0 }, /* sp */ 380 { roff_manyarg, NULL, NULL, 0 }, /* ta */ 381 { roff_onearg, NULL, NULL, 0 }, /* ti */ 382 { NULL, NULL, NULL, 0 }, /* ROFF_MAX */ 383 { roff_unsupp, NULL, NULL, 0 }, /* ab */ 384 { roff_line_ignore, NULL, NULL, 0 }, /* ad */ 385 { roff_line_ignore, NULL, NULL, 0 }, /* af */ 386 { roff_unsupp, NULL, NULL, 0 }, /* aln */ 387 { roff_als, NULL, NULL, 0 }, /* als */ 388 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am */ 389 { roff_block, roff_block_text, roff_block_sub, 0 }, /* am1 */ 390 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami */ 391 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ami1 */ 392 { roff_ds, NULL, NULL, 0 }, /* as */ 393 { roff_ds, NULL, NULL, 0 }, /* as1 */ 394 { roff_unsupp, NULL, NULL, 0 }, /* asciify */ 395 { roff_line_ignore, NULL, NULL, 0 }, /* backtrace */ 396 { roff_line_ignore, NULL, NULL, 0 }, /* bd */ 397 { roff_line_ignore, NULL, NULL, 0 }, /* bleedat */ 398 { roff_unsupp, NULL, NULL, 0 }, /* blm */ 399 { roff_unsupp, NULL, NULL, 0 }, /* box */ 400 { roff_unsupp, NULL, NULL, 0 }, /* boxa */ 401 { roff_line_ignore, NULL, NULL, 0 }, /* bp */ 402 { roff_unsupp, NULL, NULL, 0 }, /* BP */ 403 { roff_unsupp, NULL, NULL, 0 }, /* break */ 404 { roff_line_ignore, NULL, NULL, 0 }, /* breakchar */ 405 { roff_line_ignore, NULL, NULL, 0 }, /* brnl */ 406 { roff_noarg, NULL, NULL, 0 }, /* brp */ 407 { roff_line_ignore, NULL, NULL, 0 }, /* brpnl */ 408 { roff_unsupp, NULL, NULL, 0 }, /* c2 */ 409 { roff_cc, NULL, NULL, 0 }, /* cc */ 410 { roff_insec, NULL, NULL, 0 }, /* cf */ 411 { roff_line_ignore, NULL, NULL, 0 }, /* cflags */ 412 { roff_line_ignore, NULL, NULL, 0 }, /* ch */ 413 { roff_char, NULL, NULL, 0 }, /* char */ 414 { roff_unsupp, NULL, NULL, 0 }, /* chop */ 415 { roff_line_ignore, NULL, NULL, 0 }, /* class */ 416 { roff_insec, NULL, NULL, 0 }, /* close */ 417 { roff_unsupp, NULL, NULL, 0 }, /* CL */ 418 { roff_line_ignore, NULL, NULL, 0 }, /* color */ 419 { roff_unsupp, NULL, NULL, 0 }, /* composite */ 420 { roff_unsupp, NULL, NULL, 0 }, /* continue */ 421 { roff_line_ignore, NULL, NULL, 0 }, /* cp */ 422 { roff_line_ignore, NULL, NULL, 0 }, /* cropat */ 423 { roff_line_ignore, NULL, NULL, 0 }, /* cs */ 424 { roff_line_ignore, NULL, NULL, 0 }, /* cu */ 425 { roff_unsupp, NULL, NULL, 0 }, /* da */ 426 { roff_unsupp, NULL, NULL, 0 }, /* dch */ 427 { roff_Dd, NULL, NULL, 0 }, /* Dd */ 428 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de */ 429 { roff_block, roff_block_text, roff_block_sub, 0 }, /* de1 */ 430 { roff_line_ignore, NULL, NULL, 0 }, /* defcolor */ 431 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei */ 432 { roff_block, roff_block_text, roff_block_sub, 0 }, /* dei1 */ 433 { roff_unsupp, NULL, NULL, 0 }, /* device */ 434 { roff_unsupp, NULL, NULL, 0 }, /* devicem */ 435 { roff_unsupp, NULL, NULL, 0 }, /* di */ 436 { roff_unsupp, NULL, NULL, 0 }, /* do */ 437 { roff_ds, NULL, NULL, 0 }, /* ds */ 438 { roff_ds, NULL, NULL, 0 }, /* ds1 */ 439 { roff_unsupp, NULL, NULL, 0 }, /* dwh */ 440 { roff_unsupp, NULL, NULL, 0 }, /* dt */ 441 { roff_ec, NULL, NULL, 0 }, /* ec */ 442 { roff_unsupp, NULL, NULL, 0 }, /* ecr */ 443 { roff_unsupp, NULL, NULL, 0 }, /* ecs */ 444 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* el */ 445 { roff_unsupp, NULL, NULL, 0 }, /* em */ 446 { roff_EN, NULL, NULL, 0 }, /* EN */ 447 { roff_eo, NULL, NULL, 0 }, /* eo */ 448 { roff_unsupp, NULL, NULL, 0 }, /* EP */ 449 { roff_EQ, NULL, NULL, 0 }, /* EQ */ 450 { roff_line_ignore, NULL, NULL, 0 }, /* errprint */ 451 { roff_unsupp, NULL, NULL, 0 }, /* ev */ 452 { roff_unsupp, NULL, NULL, 0 }, /* evc */ 453 { roff_unsupp, NULL, NULL, 0 }, /* ex */ 454 { roff_line_ignore, NULL, NULL, 0 }, /* fallback */ 455 { roff_line_ignore, NULL, NULL, 0 }, /* fam */ 456 { roff_unsupp, NULL, NULL, 0 }, /* fc */ 457 { roff_unsupp, NULL, NULL, 0 }, /* fchar */ 458 { roff_line_ignore, NULL, NULL, 0 }, /* fcolor */ 459 { roff_line_ignore, NULL, NULL, 0 }, /* fdeferlig */ 460 { roff_line_ignore, NULL, NULL, 0 }, /* feature */ 461 { roff_line_ignore, NULL, NULL, 0 }, /* fkern */ 462 { roff_line_ignore, NULL, NULL, 0 }, /* fl */ 463 { roff_line_ignore, NULL, NULL, 0 }, /* flig */ 464 { roff_line_ignore, NULL, NULL, 0 }, /* fp */ 465 { roff_line_ignore, NULL, NULL, 0 }, /* fps */ 466 { roff_unsupp, NULL, NULL, 0 }, /* fschar */ 467 { roff_line_ignore, NULL, NULL, 0 }, /* fspacewidth */ 468 { roff_line_ignore, NULL, NULL, 0 }, /* fspecial */ 469 { roff_line_ignore, NULL, NULL, 0 }, /* ftr */ 470 { roff_line_ignore, NULL, NULL, 0 }, /* fzoom */ 471 { roff_line_ignore, NULL, NULL, 0 }, /* gcolor */ 472 { roff_line_ignore, NULL, NULL, 0 }, /* hc */ 473 { roff_line_ignore, NULL, NULL, 0 }, /* hcode */ 474 { roff_line_ignore, NULL, NULL, 0 }, /* hidechar */ 475 { roff_line_ignore, NULL, NULL, 0 }, /* hla */ 476 { roff_line_ignore, NULL, NULL, 0 }, /* hlm */ 477 { roff_line_ignore, NULL, NULL, 0 }, /* hpf */ 478 { roff_line_ignore, NULL, NULL, 0 }, /* hpfa */ 479 { roff_line_ignore, NULL, NULL, 0 }, /* hpfcode */ 480 { roff_line_ignore, NULL, NULL, 0 }, /* hw */ 481 { roff_line_ignore, NULL, NULL, 0 }, /* hy */ 482 { roff_line_ignore, NULL, NULL, 0 }, /* hylang */ 483 { roff_line_ignore, NULL, NULL, 0 }, /* hylen */ 484 { roff_line_ignore, NULL, NULL, 0 }, /* hym */ 485 { roff_line_ignore, NULL, NULL, 0 }, /* hypp */ 486 { roff_line_ignore, NULL, NULL, 0 }, /* hys */ 487 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* ie */ 488 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /* if */ 489 { roff_block, roff_block_text, roff_block_sub, 0 }, /* ig */ 490 { roff_unsupp, NULL, NULL, 0 }, /* index */ 491 { roff_it, NULL, NULL, 0 }, /* it */ 492 { roff_unsupp, NULL, NULL, 0 }, /* itc */ 493 { roff_line_ignore, NULL, NULL, 0 }, /* IX */ 494 { roff_line_ignore, NULL, NULL, 0 }, /* kern */ 495 { roff_line_ignore, NULL, NULL, 0 }, /* kernafter */ 496 { roff_line_ignore, NULL, NULL, 0 }, /* kernbefore */ 497 { roff_line_ignore, NULL, NULL, 0 }, /* kernpair */ 498 { roff_unsupp, NULL, NULL, 0 }, /* lc */ 499 { roff_unsupp, NULL, NULL, 0 }, /* lc_ctype */ 500 { roff_unsupp, NULL, NULL, 0 }, /* lds */ 501 { roff_unsupp, NULL, NULL, 0 }, /* length */ 502 { roff_line_ignore, NULL, NULL, 0 }, /* letadj */ 503 { roff_insec, NULL, NULL, 0 }, /* lf */ 504 { roff_line_ignore, NULL, NULL, 0 }, /* lg */ 505 { roff_line_ignore, NULL, NULL, 0 }, /* lhang */ 506 { roff_unsupp, NULL, NULL, 0 }, /* linetabs */ 507 { roff_unsupp, NULL, NULL, 0 }, /* lnr */ 508 { roff_unsupp, NULL, NULL, 0 }, /* lnrf */ 509 { roff_unsupp, NULL, NULL, 0 }, /* lpfx */ 510 { roff_line_ignore, NULL, NULL, 0 }, /* ls */ 511 { roff_unsupp, NULL, NULL, 0 }, /* lsm */ 512 { roff_line_ignore, NULL, NULL, 0 }, /* lt */ 513 { roff_line_ignore, NULL, NULL, 0 }, /* mediasize */ 514 { roff_line_ignore, NULL, NULL, 0 }, /* minss */ 515 { roff_line_ignore, NULL, NULL, 0 }, /* mk */ 516 { roff_insec, NULL, NULL, 0 }, /* mso */ 517 { roff_line_ignore, NULL, NULL, 0 }, /* na */ 518 { roff_line_ignore, NULL, NULL, 0 }, /* ne */ 519 { roff_line_ignore, NULL, NULL, 0 }, /* nh */ 520 { roff_line_ignore, NULL, NULL, 0 }, /* nhychar */ 521 { roff_unsupp, NULL, NULL, 0 }, /* nm */ 522 { roff_unsupp, NULL, NULL, 0 }, /* nn */ 523 { roff_nop, NULL, NULL, 0 }, /* nop */ 524 { roff_nr, NULL, NULL, 0 }, /* nr */ 525 { roff_unsupp, NULL, NULL, 0 }, /* nrf */ 526 { roff_line_ignore, NULL, NULL, 0 }, /* nroff */ 527 { roff_line_ignore, NULL, NULL, 0 }, /* ns */ 528 { roff_insec, NULL, NULL, 0 }, /* nx */ 529 { roff_insec, NULL, NULL, 0 }, /* open */ 530 { roff_insec, NULL, NULL, 0 }, /* opena */ 531 { roff_line_ignore, NULL, NULL, 0 }, /* os */ 532 { roff_unsupp, NULL, NULL, 0 }, /* output */ 533 { roff_line_ignore, NULL, NULL, 0 }, /* padj */ 534 { roff_line_ignore, NULL, NULL, 0 }, /* papersize */ 535 { roff_line_ignore, NULL, NULL, 0 }, /* pc */ 536 { roff_line_ignore, NULL, NULL, 0 }, /* pev */ 537 { roff_insec, NULL, NULL, 0 }, /* pi */ 538 { roff_unsupp, NULL, NULL, 0 }, /* PI */ 539 { roff_line_ignore, NULL, NULL, 0 }, /* pl */ 540 { roff_line_ignore, NULL, NULL, 0 }, /* pm */ 541 { roff_line_ignore, NULL, NULL, 0 }, /* pn */ 542 { roff_line_ignore, NULL, NULL, 0 }, /* pnr */ 543 { roff_line_ignore, NULL, NULL, 0 }, /* ps */ 544 { roff_unsupp, NULL, NULL, 0 }, /* psbb */ 545 { roff_unsupp, NULL, NULL, 0 }, /* pshape */ 546 { roff_insec, NULL, NULL, 0 }, /* pso */ 547 { roff_line_ignore, NULL, NULL, 0 }, /* ptr */ 548 { roff_line_ignore, NULL, NULL, 0 }, /* pvs */ 549 { roff_unsupp, NULL, NULL, 0 }, /* rchar */ 550 { roff_line_ignore, NULL, NULL, 0 }, /* rd */ 551 { roff_line_ignore, NULL, NULL, 0 }, /* recursionlimit */ 552 { roff_return, NULL, NULL, 0 }, /* return */ 553 { roff_unsupp, NULL, NULL, 0 }, /* rfschar */ 554 { roff_line_ignore, NULL, NULL, 0 }, /* rhang */ 555 { roff_rm, NULL, NULL, 0 }, /* rm */ 556 { roff_rn, NULL, NULL, 0 }, /* rn */ 557 { roff_unsupp, NULL, NULL, 0 }, /* rnn */ 558 { roff_rr, NULL, NULL, 0 }, /* rr */ 559 { roff_line_ignore, NULL, NULL, 0 }, /* rs */ 560 { roff_line_ignore, NULL, NULL, 0 }, /* rt */ 561 { roff_unsupp, NULL, NULL, 0 }, /* schar */ 562 { roff_line_ignore, NULL, NULL, 0 }, /* sentchar */ 563 { roff_line_ignore, NULL, NULL, 0 }, /* shc */ 564 { roff_shift, NULL, NULL, 0 }, /* shift */ 565 { roff_line_ignore, NULL, NULL, 0 }, /* sizes */ 566 { roff_so, NULL, NULL, 0 }, /* so */ 567 { roff_line_ignore, NULL, NULL, 0 }, /* spacewidth */ 568 { roff_line_ignore, NULL, NULL, 0 }, /* special */ 569 { roff_line_ignore, NULL, NULL, 0 }, /* spreadwarn */ 570 { roff_line_ignore, NULL, NULL, 0 }, /* ss */ 571 { roff_line_ignore, NULL, NULL, 0 }, /* sty */ 572 { roff_unsupp, NULL, NULL, 0 }, /* substring */ 573 { roff_line_ignore, NULL, NULL, 0 }, /* sv */ 574 { roff_insec, NULL, NULL, 0 }, /* sy */ 575 { roff_T_, NULL, NULL, 0 }, /* T& */ 576 { roff_unsupp, NULL, NULL, 0 }, /* tc */ 577 { roff_TE, NULL, NULL, 0 }, /* TE */ 578 { roff_Dd, NULL, NULL, 0 }, /* TH */ 579 { roff_line_ignore, NULL, NULL, 0 }, /* tkf */ 580 { roff_unsupp, NULL, NULL, 0 }, /* tl */ 581 { roff_line_ignore, NULL, NULL, 0 }, /* tm */ 582 { roff_line_ignore, NULL, NULL, 0 }, /* tm1 */ 583 { roff_line_ignore, NULL, NULL, 0 }, /* tmc */ 584 { roff_tr, NULL, NULL, 0 }, /* tr */ 585 { roff_line_ignore, NULL, NULL, 0 }, /* track */ 586 { roff_line_ignore, NULL, NULL, 0 }, /* transchar */ 587 { roff_insec, NULL, NULL, 0 }, /* trf */ 588 { roff_line_ignore, NULL, NULL, 0 }, /* trimat */ 589 { roff_unsupp, NULL, NULL, 0 }, /* trin */ 590 { roff_unsupp, NULL, NULL, 0 }, /* trnt */ 591 { roff_line_ignore, NULL, NULL, 0 }, /* troff */ 592 { roff_TS, NULL, NULL, 0 }, /* TS */ 593 { roff_line_ignore, NULL, NULL, 0 }, /* uf */ 594 { roff_line_ignore, NULL, NULL, 0 }, /* ul */ 595 { roff_unsupp, NULL, NULL, 0 }, /* unformat */ 596 { roff_line_ignore, NULL, NULL, 0 }, /* unwatch */ 597 { roff_line_ignore, NULL, NULL, 0 }, /* unwatchn */ 598 { roff_line_ignore, NULL, NULL, 0 }, /* vpt */ 599 { roff_line_ignore, NULL, NULL, 0 }, /* vs */ 600 { roff_line_ignore, NULL, NULL, 0 }, /* warn */ 601 { roff_line_ignore, NULL, NULL, 0 }, /* warnscale */ 602 { roff_line_ignore, NULL, NULL, 0 }, /* watch */ 603 { roff_line_ignore, NULL, NULL, 0 }, /* watchlength */ 604 { roff_line_ignore, NULL, NULL, 0 }, /* watchn */ 605 { roff_unsupp, NULL, NULL, 0 }, /* wh */ 606 { roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT }, /*while*/ 607 { roff_insec, NULL, NULL, 0 }, /* write */ 608 { roff_insec, NULL, NULL, 0 }, /* writec */ 609 { roff_insec, NULL, NULL, 0 }, /* writem */ 610 { roff_line_ignore, NULL, NULL, 0 }, /* xflag */ 611 { roff_cblock, NULL, NULL, 0 }, /* . */ 612 { roff_renamed, NULL, NULL, 0 }, 613 { roff_userdef, NULL, NULL, 0 } 614 }; 615 616 /* Array of injected predefined strings. */ 617 #define PREDEFS_MAX 38 618 static const struct predef predefs[PREDEFS_MAX] = { 619 #include "predefs.in" 620 }; 621 622 static int roffce_lines; /* number of input lines to center */ 623 static struct roff_node *roffce_node; /* active request */ 624 static int roffit_lines; /* number of lines to delay */ 625 static char *roffit_macro; /* nil-terminated macro line */ 626 627 628 /* --- request table ------------------------------------------------------ */ 629 630 struct ohash * 631 roffhash_alloc(enum roff_tok mintok, enum roff_tok maxtok) 632 { 633 struct ohash *htab; 634 struct roffreq *req; 635 enum roff_tok tok; 636 size_t sz; 637 unsigned int slot; 638 639 htab = mandoc_malloc(sizeof(*htab)); 640 mandoc_ohash_init(htab, 8, offsetof(struct roffreq, name)); 641 642 for (tok = mintok; tok < maxtok; tok++) { 643 if (roff_name[tok] == NULL) 644 continue; 645 sz = strlen(roff_name[tok]); 646 req = mandoc_malloc(sizeof(*req) + sz + 1); 647 req->tok = tok; 648 memcpy(req->name, roff_name[tok], sz + 1); 649 slot = ohash_qlookup(htab, req->name); 650 ohash_insert(htab, slot, req); 651 } 652 return htab; 653 } 654 655 void 656 roffhash_free(struct ohash *htab) 657 { 658 struct roffreq *req; 659 unsigned int slot; 660 661 if (htab == NULL) 662 return; 663 for (req = ohash_first(htab, &slot); req != NULL; 664 req = ohash_next(htab, &slot)) 665 free(req); 666 ohash_delete(htab); 667 free(htab); 668 } 669 670 enum roff_tok 671 roffhash_find(struct ohash *htab, const char *name, size_t sz) 672 { 673 struct roffreq *req; 674 const char *end; 675 676 if (sz) { 677 end = name + sz; 678 req = ohash_find(htab, ohash_qlookupi(htab, name, &end)); 679 } else 680 req = ohash_find(htab, ohash_qlookup(htab, name)); 681 return req == NULL ? TOKEN_NONE : req->tok; 682 } 683 684 /* --- stack of request blocks -------------------------------------------- */ 685 686 /* 687 * Pop the current node off of the stack of roff instructions currently 688 * pending. 689 */ 690 static int 691 roffnode_pop(struct roff *r) 692 { 693 struct roffnode *p; 694 int inloop; 695 696 p = r->last; 697 inloop = p->tok == ROFF_while; 698 r->last = p->parent; 699 free(p->name); 700 free(p->end); 701 free(p); 702 return inloop; 703 } 704 705 /* 706 * Push a roff node onto the instruction stack. This must later be 707 * removed with roffnode_pop(). 708 */ 709 static void 710 roffnode_push(struct roff *r, enum roff_tok tok, const char *name, 711 int line, int col) 712 { 713 struct roffnode *p; 714 715 p = mandoc_calloc(1, sizeof(struct roffnode)); 716 p->tok = tok; 717 if (name) 718 p->name = mandoc_strdup(name); 719 p->parent = r->last; 720 p->line = line; 721 p->col = col; 722 p->rule = p->parent ? p->parent->rule : 0; 723 724 r->last = p; 725 } 726 727 /* --- roff parser state data management ---------------------------------- */ 728 729 static void 730 roff_free1(struct roff *r) 731 { 732 int i; 733 734 tbl_free(r->first_tbl); 735 r->first_tbl = r->last_tbl = r->tbl = NULL; 736 737 eqn_free(r->last_eqn); 738 r->last_eqn = r->eqn = NULL; 739 740 while (r->mstackpos >= 0) 741 roff_userret(r); 742 743 while (r->last) 744 roffnode_pop(r); 745 746 free (r->rstack); 747 r->rstack = NULL; 748 r->rstacksz = 0; 749 r->rstackpos = -1; 750 751 roff_freereg(r->regtab); 752 r->regtab = NULL; 753 754 roff_freestr(r->strtab); 755 roff_freestr(r->rentab); 756 roff_freestr(r->xmbtab); 757 r->strtab = r->rentab = r->xmbtab = NULL; 758 759 if (r->xtab) 760 for (i = 0; i < 128; i++) 761 free(r->xtab[i].p); 762 free(r->xtab); 763 r->xtab = NULL; 764 } 765 766 void 767 roff_reset(struct roff *r) 768 { 769 roff_free1(r); 770 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 771 r->control = '\0'; 772 r->escape = '\\'; 773 roffce_lines = 0; 774 roffce_node = NULL; 775 roffit_lines = 0; 776 roffit_macro = NULL; 777 } 778 779 void 780 roff_free(struct roff *r) 781 { 782 int i; 783 784 roff_free1(r); 785 for (i = 0; i < r->mstacksz; i++) 786 free(r->mstack[i].argv); 787 free(r->mstack); 788 roffhash_free(r->reqtab); 789 free(r); 790 } 791 792 struct roff * 793 roff_alloc(int options) 794 { 795 struct roff *r; 796 797 r = mandoc_calloc(1, sizeof(struct roff)); 798 r->reqtab = roffhash_alloc(0, ROFF_RENAMED); 799 r->options = options; 800 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 801 r->mstackpos = -1; 802 r->rstackpos = -1; 803 r->escape = '\\'; 804 return r; 805 } 806 807 /* --- syntax tree state data management ---------------------------------- */ 808 809 static void 810 roff_man_free1(struct roff_man *man) 811 { 812 if (man->meta.first != NULL) 813 roff_node_delete(man, man->meta.first); 814 free(man->meta.msec); 815 free(man->meta.vol); 816 free(man->meta.os); 817 free(man->meta.arch); 818 free(man->meta.title); 819 free(man->meta.name); 820 free(man->meta.date); 821 free(man->meta.sodest); 822 } 823 824 void 825 roff_state_reset(struct roff_man *man) 826 { 827 man->last = man->meta.first; 828 man->last_es = NULL; 829 man->flags = 0; 830 man->lastsec = man->lastnamed = SEC_NONE; 831 man->next = ROFF_NEXT_CHILD; 832 roff_setreg(man->roff, "nS", 0, '='); 833 } 834 835 static void 836 roff_man_alloc1(struct roff_man *man) 837 { 838 memset(&man->meta, 0, sizeof(man->meta)); 839 man->meta.first = mandoc_calloc(1, sizeof(*man->meta.first)); 840 man->meta.first->type = ROFFT_ROOT; 841 man->meta.macroset = MACROSET_NONE; 842 roff_state_reset(man); 843 } 844 845 void 846 roff_man_reset(struct roff_man *man) 847 { 848 roff_man_free1(man); 849 roff_man_alloc1(man); 850 } 851 852 void 853 roff_man_free(struct roff_man *man) 854 { 855 roff_man_free1(man); 856 free(man); 857 } 858 859 struct roff_man * 860 roff_man_alloc(struct roff *roff, const char *os_s, int quick) 861 { 862 struct roff_man *man; 863 864 man = mandoc_calloc(1, sizeof(*man)); 865 man->roff = roff; 866 man->os_s = os_s; 867 man->quick = quick; 868 roff_man_alloc1(man); 869 roff->man = man; 870 return man; 871 } 872 873 /* --- syntax tree handling ----------------------------------------------- */ 874 875 struct roff_node * 876 roff_node_alloc(struct roff_man *man, int line, int pos, 877 enum roff_type type, int tok) 878 { 879 struct roff_node *n; 880 881 n = mandoc_calloc(1, sizeof(*n)); 882 n->line = line; 883 n->pos = pos; 884 n->tok = tok; 885 n->type = type; 886 n->sec = man->lastsec; 887 888 if (man->flags & MDOC_SYNOPSIS) 889 n->flags |= NODE_SYNPRETTY; 890 else 891 n->flags &= ~NODE_SYNPRETTY; 892 if ((man->flags & (ROFF_NOFILL | ROFF_NONOFILL)) == ROFF_NOFILL) 893 n->flags |= NODE_NOFILL; 894 else 895 n->flags &= ~NODE_NOFILL; 896 if (man->flags & MDOC_NEWLINE) 897 n->flags |= NODE_LINE; 898 man->flags &= ~MDOC_NEWLINE; 899 900 return n; 901 } 902 903 void 904 roff_node_append(struct roff_man *man, struct roff_node *n) 905 { 906 907 switch (man->next) { 908 case ROFF_NEXT_SIBLING: 909 if (man->last->next != NULL) { 910 n->next = man->last->next; 911 man->last->next->prev = n; 912 } else 913 man->last->parent->last = n; 914 man->last->next = n; 915 n->prev = man->last; 916 n->parent = man->last->parent; 917 break; 918 case ROFF_NEXT_CHILD: 919 if (man->last->child != NULL) { 920 n->next = man->last->child; 921 man->last->child->prev = n; 922 } else 923 man->last->last = n; 924 man->last->child = n; 925 n->parent = man->last; 926 break; 927 default: 928 abort(); 929 } 930 man->last = n; 931 932 switch (n->type) { 933 case ROFFT_HEAD: 934 n->parent->head = n; 935 break; 936 case ROFFT_BODY: 937 if (n->end != ENDBODY_NOT) 938 return; 939 n->parent->body = n; 940 break; 941 case ROFFT_TAIL: 942 n->parent->tail = n; 943 break; 944 default: 945 return; 946 } 947 948 /* 949 * Copy over the normalised-data pointer of our parent. Not 950 * everybody has one, but copying a null pointer is fine. 951 */ 952 953 n->norm = n->parent->norm; 954 assert(n->parent->type == ROFFT_BLOCK); 955 } 956 957 void 958 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 959 { 960 struct roff_node *n; 961 962 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 963 n->string = roff_strdup(man->roff, word); 964 roff_node_append(man, n); 965 n->flags |= NODE_VALID | NODE_ENDED; 966 man->next = ROFF_NEXT_SIBLING; 967 } 968 969 void 970 roff_word_append(struct roff_man *man, const char *word) 971 { 972 struct roff_node *n; 973 char *addstr, *newstr; 974 975 n = man->last; 976 addstr = roff_strdup(man->roff, word); 977 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 978 free(addstr); 979 free(n->string); 980 n->string = newstr; 981 man->next = ROFF_NEXT_SIBLING; 982 } 983 984 void 985 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 986 { 987 struct roff_node *n; 988 989 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 990 roff_node_append(man, n); 991 man->next = ROFF_NEXT_CHILD; 992 } 993 994 struct roff_node * 995 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 996 { 997 struct roff_node *n; 998 999 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1000 roff_node_append(man, n); 1001 man->next = ROFF_NEXT_CHILD; 1002 return n; 1003 } 1004 1005 struct roff_node * 1006 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1007 { 1008 struct roff_node *n; 1009 1010 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1011 roff_node_append(man, n); 1012 man->next = ROFF_NEXT_CHILD; 1013 return n; 1014 } 1015 1016 struct roff_node * 1017 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1018 { 1019 struct roff_node *n; 1020 1021 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1022 roff_node_append(man, n); 1023 man->next = ROFF_NEXT_CHILD; 1024 return n; 1025 } 1026 1027 static void 1028 roff_addtbl(struct roff_man *man, int line, struct tbl_node *tbl) 1029 { 1030 struct roff_node *n; 1031 struct tbl_span *span; 1032 1033 if (man->meta.macroset == MACROSET_MAN) 1034 man_breakscope(man, ROFF_TS); 1035 while ((span = tbl_span(tbl)) != NULL) { 1036 n = roff_node_alloc(man, line, 0, ROFFT_TBL, TOKEN_NONE); 1037 n->span = span; 1038 roff_node_append(man, n); 1039 n->flags |= NODE_VALID | NODE_ENDED; 1040 man->next = ROFF_NEXT_SIBLING; 1041 } 1042 } 1043 1044 void 1045 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1046 { 1047 1048 /* Adjust siblings. */ 1049 1050 if (n->prev) 1051 n->prev->next = n->next; 1052 if (n->next) 1053 n->next->prev = n->prev; 1054 1055 /* Adjust parent. */ 1056 1057 if (n->parent != NULL) { 1058 if (n->parent->child == n) 1059 n->parent->child = n->next; 1060 if (n->parent->last == n) 1061 n->parent->last = n->prev; 1062 } 1063 1064 /* Adjust parse point. */ 1065 1066 if (man == NULL) 1067 return; 1068 if (man->last == n) { 1069 if (n->prev == NULL) { 1070 man->last = n->parent; 1071 man->next = ROFF_NEXT_CHILD; 1072 } else { 1073 man->last = n->prev; 1074 man->next = ROFF_NEXT_SIBLING; 1075 } 1076 } 1077 if (man->meta.first == n) 1078 man->meta.first = NULL; 1079 } 1080 1081 void 1082 roff_node_relink(struct roff_man *man, struct roff_node *n) 1083 { 1084 roff_node_unlink(man, n); 1085 n->prev = n->next = NULL; 1086 roff_node_append(man, n); 1087 } 1088 1089 void 1090 roff_node_free(struct roff_node *n) 1091 { 1092 1093 if (n->args != NULL) 1094 mdoc_argv_free(n->args); 1095 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1096 free(n->norm); 1097 eqn_box_free(n->eqn); 1098 free(n->string); 1099 free(n); 1100 } 1101 1102 void 1103 roff_node_delete(struct roff_man *man, struct roff_node *n) 1104 { 1105 1106 while (n->child != NULL) 1107 roff_node_delete(man, n->child); 1108 roff_node_unlink(man, n); 1109 roff_node_free(n); 1110 } 1111 1112 void 1113 deroff(char **dest, const struct roff_node *n) 1114 { 1115 char *cp; 1116 size_t sz; 1117 1118 if (n->type != ROFFT_TEXT) { 1119 for (n = n->child; n != NULL; n = n->next) 1120 deroff(dest, n); 1121 return; 1122 } 1123 1124 /* Skip leading whitespace. */ 1125 1126 for (cp = n->string; *cp != '\0'; cp++) { 1127 if (cp[0] == '\\' && cp[1] != '\0' && 1128 strchr(" %&0^|~", cp[1]) != NULL) 1129 cp++; 1130 else if ( ! isspace((unsigned char)*cp)) 1131 break; 1132 } 1133 1134 /* Skip trailing backslash. */ 1135 1136 sz = strlen(cp); 1137 if (sz > 0 && cp[sz - 1] == '\\') 1138 sz--; 1139 1140 /* Skip trailing whitespace. */ 1141 1142 for (; sz; sz--) 1143 if ( ! isspace((unsigned char)cp[sz-1])) 1144 break; 1145 1146 /* Skip empty strings. */ 1147 1148 if (sz == 0) 1149 return; 1150 1151 if (*dest == NULL) { 1152 *dest = mandoc_strndup(cp, sz); 1153 return; 1154 } 1155 1156 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1157 free(*dest); 1158 *dest = cp; 1159 } 1160 1161 /* --- main functions of the roff parser ---------------------------------- */ 1162 1163 /* 1164 * In the current line, expand escape sequences that produce parsable 1165 * input text. Also check the syntax of the remaining escape sequences, 1166 * which typically produce output glyphs or change formatter state. 1167 */ 1168 static int 1169 roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc) 1170 { 1171 struct mctx *ctx; /* current macro call context */ 1172 char ubuf[24]; /* buffer to print the number */ 1173 struct roff_node *n; /* used for header comments */ 1174 const char *start; /* start of the string to process */ 1175 char *stesc; /* start of an escape sequence ('\\') */ 1176 const char *esct; /* type of esccape sequence */ 1177 char *ep; /* end of comment string */ 1178 const char *stnam; /* start of the name, after "[(*" */ 1179 const char *cp; /* end of the name, e.g. before ']' */ 1180 const char *res; /* the string to be substituted */ 1181 char *nbuf; /* new buffer to copy buf->buf to */ 1182 size_t maxl; /* expected length of the escape name */ 1183 size_t naml; /* actual length of the escape name */ 1184 size_t asz; /* length of the replacement */ 1185 size_t rsz; /* length of the rest of the string */ 1186 int inaml; /* length returned from mandoc_escape() */ 1187 int expand_count; /* to avoid infinite loops */ 1188 int npos; /* position in numeric expression */ 1189 int arg_complete; /* argument not interrupted by eol */ 1190 int quote_args; /* true for \\$@, false for \\$* */ 1191 int done; /* no more input available */ 1192 int deftype; /* type of definition to paste */ 1193 int rcsid; /* kind of RCS id seen */ 1194 enum mandocerr err; /* for escape sequence problems */ 1195 char sign; /* increment number register */ 1196 char term; /* character terminating the escape */ 1197 1198 /* Search forward for comments. */ 1199 1200 done = 0; 1201 start = buf->buf + pos; 1202 for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) { 1203 if (stesc[0] != newesc || stesc[1] == '\0') 1204 continue; 1205 stesc++; 1206 if (*stesc != '"' && *stesc != '#') 1207 continue; 1208 1209 /* Comment found, look for RCS id. */ 1210 1211 rcsid = 0; 1212 if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) { 1213 rcsid = 1 << MANDOC_OS_OPENBSD; 1214 cp += 8; 1215 } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) { 1216 rcsid = 1 << MANDOC_OS_NETBSD; 1217 cp += 7; 1218 } 1219 if (cp != NULL && 1220 isalnum((unsigned char)*cp) == 0 && 1221 strchr(cp, '$') != NULL) { 1222 if (r->man->meta.rcsids & rcsid) 1223 mandoc_msg(MANDOCERR_RCS_REP, ln, 1224 (int)(stesc - buf->buf) + 1, 1225 "%s", stesc + 1); 1226 r->man->meta.rcsids |= rcsid; 1227 } 1228 1229 /* Handle trailing whitespace. */ 1230 1231 ep = strchr(stesc--, '\0') - 1; 1232 if (*ep == '\n') { 1233 done = 1; 1234 ep--; 1235 } 1236 if (*ep == ' ' || *ep == '\t') 1237 mandoc_msg(MANDOCERR_SPACE_EOL, 1238 ln, (int)(ep - buf->buf), NULL); 1239 1240 /* 1241 * Save comments preceding the title macro 1242 * in the syntax tree. 1243 */ 1244 1245 if (newesc != ASCII_ESC && r->format == 0) { 1246 while (*ep == ' ' || *ep == '\t') 1247 ep--; 1248 ep[1] = '\0'; 1249 n = roff_node_alloc(r->man, 1250 ln, stesc + 1 - buf->buf, 1251 ROFFT_COMMENT, TOKEN_NONE); 1252 n->string = mandoc_strdup(stesc + 2); 1253 roff_node_append(r->man, n); 1254 n->flags |= NODE_VALID | NODE_ENDED; 1255 r->man->next = ROFF_NEXT_SIBLING; 1256 } 1257 1258 /* Line continuation with comment. */ 1259 1260 if (stesc[1] == '#') { 1261 *stesc = '\0'; 1262 return ROFF_IGN | ROFF_APPEND; 1263 } 1264 1265 /* Discard normal comments. */ 1266 1267 while (stesc > start && stesc[-1] == ' ' && 1268 (stesc == start + 1 || stesc[-2] != '\\')) 1269 stesc--; 1270 *stesc = '\0'; 1271 break; 1272 } 1273 if (stesc == start) 1274 return ROFF_CONT; 1275 stesc--; 1276 1277 /* Notice the end of the input. */ 1278 1279 if (*stesc == '\n') { 1280 *stesc-- = '\0'; 1281 done = 1; 1282 } 1283 1284 expand_count = 0; 1285 while (stesc >= start) { 1286 if (*stesc != newesc) { 1287 1288 /* 1289 * If we have a non-standard escape character, 1290 * escape literal backslashes because all 1291 * processing in subsequent functions uses 1292 * the standard escaping rules. 1293 */ 1294 1295 if (newesc != ASCII_ESC && *stesc == '\\') { 1296 *stesc = '\0'; 1297 buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s", 1298 buf->buf, stesc + 1) + 1; 1299 start = nbuf + pos; 1300 stesc = nbuf + (stesc - buf->buf); 1301 free(buf->buf); 1302 buf->buf = nbuf; 1303 } 1304 1305 /* Search backwards for the next escape. */ 1306 1307 stesc--; 1308 continue; 1309 } 1310 1311 /* If it is escaped, skip it. */ 1312 1313 for (cp = stesc - 1; cp >= start; cp--) 1314 if (*cp != r->escape) 1315 break; 1316 1317 if ((stesc - cp) % 2 == 0) { 1318 while (stesc > cp) 1319 *stesc-- = '\\'; 1320 continue; 1321 } else if (stesc[1] != '\0') { 1322 *stesc = '\\'; 1323 } else { 1324 *stesc-- = '\0'; 1325 if (done) 1326 continue; 1327 else 1328 return ROFF_IGN | ROFF_APPEND; 1329 } 1330 1331 /* Decide whether to expand or to check only. */ 1332 1333 term = '\0'; 1334 cp = stesc + 1; 1335 if (*cp == 'E') 1336 cp++; 1337 esct = cp; 1338 switch (*esct) { 1339 case '*': 1340 case '$': 1341 res = NULL; 1342 break; 1343 case 'B': 1344 case 'w': 1345 term = cp[1]; 1346 /* FALLTHROUGH */ 1347 case 'n': 1348 sign = cp[1]; 1349 if (sign == '+' || sign == '-') 1350 cp++; 1351 res = ubuf; 1352 break; 1353 default: 1354 err = MANDOCERR_OK; 1355 switch(mandoc_escape(&cp, &stnam, &inaml)) { 1356 case ESCAPE_SPECIAL: 1357 if (mchars_spec2cp(stnam, inaml) >= 0) 1358 break; 1359 /* FALLTHROUGH */ 1360 case ESCAPE_ERROR: 1361 err = MANDOCERR_ESC_BAD; 1362 break; 1363 case ESCAPE_UNDEF: 1364 err = MANDOCERR_ESC_UNDEF; 1365 break; 1366 case ESCAPE_UNSUPP: 1367 err = MANDOCERR_ESC_UNSUPP; 1368 break; 1369 default: 1370 break; 1371 } 1372 if (err != MANDOCERR_OK) 1373 mandoc_msg(err, ln, (int)(stesc - buf->buf), 1374 "%.*s", (int)(cp - stesc), stesc); 1375 stesc--; 1376 continue; 1377 } 1378 1379 if (EXPAND_LIMIT < ++expand_count) { 1380 mandoc_msg(MANDOCERR_ROFFLOOP, 1381 ln, (int)(stesc - buf->buf), NULL); 1382 return ROFF_IGN; 1383 } 1384 1385 /* 1386 * The third character decides the length 1387 * of the name of the string or register. 1388 * Save a pointer to the name. 1389 */ 1390 1391 if (term == '\0') { 1392 switch (*++cp) { 1393 case '\0': 1394 maxl = 0; 1395 break; 1396 case '(': 1397 cp++; 1398 maxl = 2; 1399 break; 1400 case '[': 1401 cp++; 1402 term = ']'; 1403 maxl = 0; 1404 break; 1405 default: 1406 maxl = 1; 1407 break; 1408 } 1409 } else { 1410 cp += 2; 1411 maxl = 0; 1412 } 1413 stnam = cp; 1414 1415 /* Advance to the end of the name. */ 1416 1417 naml = 0; 1418 arg_complete = 1; 1419 while (maxl == 0 || naml < maxl) { 1420 if (*cp == '\0') { 1421 mandoc_msg(MANDOCERR_ESC_BAD, ln, 1422 (int)(stesc - buf->buf), "%s", stesc); 1423 arg_complete = 0; 1424 break; 1425 } 1426 if (maxl == 0 && *cp == term) { 1427 cp++; 1428 break; 1429 } 1430 if (*cp++ != '\\' || *esct != 'w') { 1431 naml++; 1432 continue; 1433 } 1434 switch (mandoc_escape(&cp, NULL, NULL)) { 1435 case ESCAPE_SPECIAL: 1436 case ESCAPE_UNICODE: 1437 case ESCAPE_NUMBERED: 1438 case ESCAPE_UNDEF: 1439 case ESCAPE_OVERSTRIKE: 1440 naml++; 1441 break; 1442 default: 1443 break; 1444 } 1445 } 1446 1447 /* 1448 * Retrieve the replacement string; if it is 1449 * undefined, resume searching for escapes. 1450 */ 1451 1452 switch (*esct) { 1453 case '*': 1454 if (arg_complete) { 1455 deftype = ROFFDEF_USER | ROFFDEF_PRE; 1456 res = roff_getstrn(r, stnam, naml, &deftype); 1457 1458 /* 1459 * If not overriden, let \*(.T 1460 * through to the formatters. 1461 */ 1462 1463 if (res == NULL && naml == 2 && 1464 stnam[0] == '.' && stnam[1] == 'T') { 1465 roff_setstrn(&r->strtab, 1466 ".T", 2, NULL, 0, 0); 1467 stesc--; 1468 continue; 1469 } 1470 } 1471 break; 1472 case '$': 1473 if (r->mstackpos < 0) { 1474 mandoc_msg(MANDOCERR_ARG_UNDEF, ln, 1475 (int)(stesc - buf->buf), "%.3s", stesc); 1476 break; 1477 } 1478 ctx = r->mstack + r->mstackpos; 1479 npos = esct[1] - '1'; 1480 if (npos >= 0 && npos <= 8) { 1481 res = npos < ctx->argc ? 1482 ctx->argv[npos] : ""; 1483 break; 1484 } 1485 if (esct[1] == '*') 1486 quote_args = 0; 1487 else if (esct[1] == '@') 1488 quote_args = 1; 1489 else { 1490 mandoc_msg(MANDOCERR_ARG_NONUM, ln, 1491 (int)(stesc - buf->buf), "%.3s", stesc); 1492 break; 1493 } 1494 asz = 0; 1495 for (npos = 0; npos < ctx->argc; npos++) { 1496 if (npos) 1497 asz++; /* blank */ 1498 if (quote_args) 1499 asz += 2; /* quotes */ 1500 asz += strlen(ctx->argv[npos]); 1501 } 1502 if (asz != 3) { 1503 rsz = buf->sz - (stesc - buf->buf) - 3; 1504 if (asz < 3) 1505 memmove(stesc + asz, stesc + 3, rsz); 1506 buf->sz += asz - 3; 1507 nbuf = mandoc_realloc(buf->buf, buf->sz); 1508 start = nbuf + pos; 1509 stesc = nbuf + (stesc - buf->buf); 1510 buf->buf = nbuf; 1511 if (asz > 3) 1512 memmove(stesc + asz, stesc + 3, rsz); 1513 } 1514 for (npos = 0; npos < ctx->argc; npos++) { 1515 if (npos) 1516 *stesc++ = ' '; 1517 if (quote_args) 1518 *stesc++ = '"'; 1519 cp = ctx->argv[npos]; 1520 while (*cp != '\0') 1521 *stesc++ = *cp++; 1522 if (quote_args) 1523 *stesc++ = '"'; 1524 } 1525 continue; 1526 case 'B': 1527 npos = 0; 1528 ubuf[0] = arg_complete && 1529 roff_evalnum(r, ln, stnam, &npos, 1530 NULL, ROFFNUM_SCALE) && 1531 stnam + npos + 1 == cp ? '1' : '0'; 1532 ubuf[1] = '\0'; 1533 break; 1534 case 'n': 1535 if (arg_complete) 1536 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1537 roff_getregn(r, stnam, naml, sign)); 1538 else 1539 ubuf[0] = '\0'; 1540 break; 1541 case 'w': 1542 /* use even incomplete args */ 1543 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1544 24 * (int)naml); 1545 break; 1546 } 1547 1548 if (res == NULL) { 1549 if (*esct == '*') 1550 mandoc_msg(MANDOCERR_STR_UNDEF, 1551 ln, (int)(stesc - buf->buf), 1552 "%.*s", (int)naml, stnam); 1553 res = ""; 1554 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1555 mandoc_msg(MANDOCERR_ROFFLOOP, 1556 ln, (int)(stesc - buf->buf), NULL); 1557 return ROFF_IGN; 1558 } 1559 1560 /* Replace the escape sequence by the string. */ 1561 1562 *stesc = '\0'; 1563 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1564 buf->buf, res, cp) + 1; 1565 1566 /* Prepare for the next replacement. */ 1567 1568 start = nbuf + pos; 1569 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1570 free(buf->buf); 1571 buf->buf = nbuf; 1572 } 1573 return ROFF_CONT; 1574 } 1575 1576 /* 1577 * Parse a quoted or unquoted roff-style request or macro argument. 1578 * Return a pointer to the parsed argument, which is either the original 1579 * pointer or advanced by one byte in case the argument is quoted. 1580 * NUL-terminate the argument in place. 1581 * Collapse pairs of quotes inside quoted arguments. 1582 * Advance the argument pointer to the next argument, 1583 * or to the NUL byte terminating the argument line. 1584 */ 1585 char * 1586 roff_getarg(struct roff *r, char **cpp, int ln, int *pos) 1587 { 1588 struct buf buf; 1589 char *cp, *start; 1590 int newesc, pairs, quoted, white; 1591 1592 /* Quoting can only start with a new word. */ 1593 start = *cpp; 1594 quoted = 0; 1595 if ('"' == *start) { 1596 quoted = 1; 1597 start++; 1598 } 1599 1600 newesc = pairs = white = 0; 1601 for (cp = start; '\0' != *cp; cp++) { 1602 1603 /* 1604 * Move the following text left 1605 * after quoted quotes and after "\\" and "\t". 1606 */ 1607 if (pairs) 1608 cp[-pairs] = cp[0]; 1609 1610 if ('\\' == cp[0]) { 1611 /* 1612 * In copy mode, translate double to single 1613 * backslashes and backslash-t to literal tabs. 1614 */ 1615 switch (cp[1]) { 1616 case 'a': 1617 case 't': 1618 cp[-pairs] = '\t'; 1619 pairs++; 1620 cp++; 1621 break; 1622 case '\\': 1623 newesc = 1; 1624 cp[-pairs] = ASCII_ESC; 1625 pairs++; 1626 cp++; 1627 break; 1628 case ' ': 1629 /* Skip escaped blanks. */ 1630 if (0 == quoted) 1631 cp++; 1632 break; 1633 default: 1634 break; 1635 } 1636 } else if (0 == quoted) { 1637 if (' ' == cp[0]) { 1638 /* Unescaped blanks end unquoted args. */ 1639 white = 1; 1640 break; 1641 } 1642 } else if ('"' == cp[0]) { 1643 if ('"' == cp[1]) { 1644 /* Quoted quotes collapse. */ 1645 pairs++; 1646 cp++; 1647 } else { 1648 /* Unquoted quotes end quoted args. */ 1649 quoted = 2; 1650 break; 1651 } 1652 } 1653 } 1654 1655 /* Quoted argument without a closing quote. */ 1656 if (1 == quoted) 1657 mandoc_msg(MANDOCERR_ARG_QUOTE, ln, *pos, NULL); 1658 1659 /* NUL-terminate this argument and move to the next one. */ 1660 if (pairs) 1661 cp[-pairs] = '\0'; 1662 if ('\0' != *cp) { 1663 *cp++ = '\0'; 1664 while (' ' == *cp) 1665 cp++; 1666 } 1667 *pos += (int)(cp - start) + (quoted ? 1 : 0); 1668 *cpp = cp; 1669 1670 if ('\0' == *cp && (white || ' ' == cp[-1])) 1671 mandoc_msg(MANDOCERR_SPACE_EOL, ln, *pos, NULL); 1672 1673 start = mandoc_strdup(start); 1674 if (newesc == 0) 1675 return start; 1676 1677 buf.buf = start; 1678 buf.sz = strlen(start) + 1; 1679 buf.next = NULL; 1680 if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) { 1681 free(buf.buf); 1682 buf.buf = mandoc_strdup(""); 1683 } 1684 return buf.buf; 1685 } 1686 1687 1688 /* 1689 * Process text streams. 1690 */ 1691 static int 1692 roff_parsetext(struct roff *r, struct buf *buf, int pos, int *offs) 1693 { 1694 size_t sz; 1695 const char *start; 1696 char *p; 1697 int isz; 1698 enum mandoc_esc esc; 1699 1700 /* Spring the input line trap. */ 1701 1702 if (roffit_lines == 1) { 1703 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1704 free(buf->buf); 1705 buf->buf = p; 1706 buf->sz = isz + 1; 1707 *offs = 0; 1708 free(roffit_macro); 1709 roffit_lines = 0; 1710 return ROFF_REPARSE; 1711 } else if (roffit_lines > 1) 1712 --roffit_lines; 1713 1714 if (roffce_node != NULL && buf->buf[pos] != '\0') { 1715 if (roffce_lines < 1) { 1716 r->man->last = roffce_node; 1717 r->man->next = ROFF_NEXT_SIBLING; 1718 roffce_lines = 0; 1719 roffce_node = NULL; 1720 } else 1721 roffce_lines--; 1722 } 1723 1724 /* Convert all breakable hyphens into ASCII_HYPH. */ 1725 1726 start = p = buf->buf + pos; 1727 1728 while (*p != '\0') { 1729 sz = strcspn(p, "-\\"); 1730 p += sz; 1731 1732 if (*p == '\0') 1733 break; 1734 1735 if (*p == '\\') { 1736 /* Skip over escapes. */ 1737 p++; 1738 esc = mandoc_escape((const char **)&p, NULL, NULL); 1739 if (esc == ESCAPE_ERROR) 1740 break; 1741 while (*p == '-') 1742 p++; 1743 continue; 1744 } else if (p == start) { 1745 p++; 1746 continue; 1747 } 1748 1749 if (isalpha((unsigned char)p[-1]) && 1750 isalpha((unsigned char)p[1])) 1751 *p = ASCII_HYPH; 1752 p++; 1753 } 1754 return ROFF_CONT; 1755 } 1756 1757 int 1758 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1759 { 1760 enum roff_tok t; 1761 int e; 1762 int pos; /* parse point */ 1763 int spos; /* saved parse point for messages */ 1764 int ppos; /* original offset in buf->buf */ 1765 int ctl; /* macro line (boolean) */ 1766 1767 ppos = pos = *offs; 1768 1769 /* Handle in-line equation delimiters. */ 1770 1771 if (r->tbl == NULL && 1772 r->last_eqn != NULL && r->last_eqn->delim && 1773 (r->eqn == NULL || r->eqn_inline)) { 1774 e = roff_eqndelim(r, buf, pos); 1775 if (e == ROFF_REPARSE) 1776 return e; 1777 assert(e == ROFF_CONT); 1778 } 1779 1780 /* Expand some escape sequences. */ 1781 1782 e = roff_expand(r, buf, ln, pos, r->escape); 1783 if ((e & ROFF_MASK) == ROFF_IGN) 1784 return e; 1785 assert(e == ROFF_CONT); 1786 1787 ctl = roff_getcontrol(r, buf->buf, &pos); 1788 1789 /* 1790 * First, if a scope is open and we're not a macro, pass the 1791 * text through the macro's filter. 1792 * Equations process all content themselves. 1793 * Tables process almost all content themselves, but we want 1794 * to warn about macros before passing it there. 1795 */ 1796 1797 if (r->last != NULL && ! ctl) { 1798 t = r->last->tok; 1799 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1800 if ((e & ROFF_MASK) == ROFF_IGN) 1801 return e; 1802 e &= ~ROFF_MASK; 1803 } else 1804 e = ROFF_IGN; 1805 if (r->eqn != NULL && strncmp(buf->buf + ppos, ".EN", 3)) { 1806 eqn_read(r->eqn, buf->buf + ppos); 1807 return e; 1808 } 1809 if (r->tbl != NULL && (ctl == 0 || buf->buf[pos] == '\0')) { 1810 tbl_read(r->tbl, ln, buf->buf, ppos); 1811 roff_addtbl(r->man, ln, r->tbl); 1812 return e; 1813 } 1814 if ( ! ctl) 1815 return roff_parsetext(r, buf, pos, offs) | e; 1816 1817 /* Skip empty request lines. */ 1818 1819 if (buf->buf[pos] == '"') { 1820 mandoc_msg(MANDOCERR_COMMENT_BAD, ln, pos, NULL); 1821 return ROFF_IGN; 1822 } else if (buf->buf[pos] == '\0') 1823 return ROFF_IGN; 1824 1825 /* 1826 * If a scope is open, go to the child handler for that macro, 1827 * as it may want to preprocess before doing anything with it. 1828 * Don't do so if an equation is open. 1829 */ 1830 1831 if (r->last) { 1832 t = r->last->tok; 1833 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1834 } 1835 1836 /* No scope is open. This is a new request or macro. */ 1837 1838 spos = pos; 1839 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1840 1841 /* Tables ignore most macros. */ 1842 1843 if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS || 1844 t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) { 1845 mandoc_msg(MANDOCERR_TBLMACRO, 1846 ln, pos, "%s", buf->buf + spos); 1847 if (t != TOKEN_NONE) 1848 return ROFF_IGN; 1849 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1850 pos++; 1851 while (buf->buf[pos] == ' ') 1852 pos++; 1853 tbl_read(r->tbl, ln, buf->buf, pos); 1854 roff_addtbl(r->man, ln, r->tbl); 1855 return ROFF_IGN; 1856 } 1857 1858 /* For now, let high level macros abort .ce mode. */ 1859 1860 if (ctl && roffce_node != NULL && 1861 (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ || 1862 t == ROFF_TH || t == ROFF_TS)) { 1863 r->man->last = roffce_node; 1864 r->man->next = ROFF_NEXT_SIBLING; 1865 roffce_lines = 0; 1866 roffce_node = NULL; 1867 } 1868 1869 /* 1870 * This is neither a roff request nor a user-defined macro. 1871 * Let the standard macro set parsers handle it. 1872 */ 1873 1874 if (t == TOKEN_NONE) 1875 return ROFF_CONT; 1876 1877 /* Execute a roff request or a user defined macro. */ 1878 1879 return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs); 1880 } 1881 1882 /* 1883 * Internal interface function to tell the roff parser that execution 1884 * of the current macro ended. This is required because macro 1885 * definitions usually do not end with a .return request. 1886 */ 1887 void 1888 roff_userret(struct roff *r) 1889 { 1890 struct mctx *ctx; 1891 int i; 1892 1893 assert(r->mstackpos >= 0); 1894 ctx = r->mstack + r->mstackpos; 1895 for (i = 0; i < ctx->argc; i++) 1896 free(ctx->argv[i]); 1897 ctx->argc = 0; 1898 r->mstackpos--; 1899 } 1900 1901 void 1902 roff_endparse(struct roff *r) 1903 { 1904 if (r->last != NULL) 1905 mandoc_msg(MANDOCERR_BLK_NOEND, r->last->line, 1906 r->last->col, "%s", roff_name[r->last->tok]); 1907 1908 if (r->eqn != NULL) { 1909 mandoc_msg(MANDOCERR_BLK_NOEND, 1910 r->eqn->node->line, r->eqn->node->pos, "EQ"); 1911 eqn_parse(r->eqn); 1912 r->eqn = NULL; 1913 } 1914 1915 if (r->tbl != NULL) { 1916 tbl_end(r->tbl, 1); 1917 r->tbl = NULL; 1918 } 1919 } 1920 1921 /* 1922 * Parse a roff node's type from the input buffer. This must be in the 1923 * form of ".foo xxx" in the usual way. 1924 */ 1925 static enum roff_tok 1926 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1927 { 1928 char *cp; 1929 const char *mac; 1930 size_t maclen; 1931 int deftype; 1932 enum roff_tok t; 1933 1934 cp = buf + *pos; 1935 1936 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 1937 return TOKEN_NONE; 1938 1939 mac = cp; 1940 maclen = roff_getname(r, &cp, ln, ppos); 1941 1942 deftype = ROFFDEF_USER | ROFFDEF_REN; 1943 r->current_string = roff_getstrn(r, mac, maclen, &deftype); 1944 switch (deftype) { 1945 case ROFFDEF_USER: 1946 t = ROFF_USERDEF; 1947 break; 1948 case ROFFDEF_REN: 1949 t = ROFF_RENAMED; 1950 break; 1951 default: 1952 t = roffhash_find(r->reqtab, mac, maclen); 1953 break; 1954 } 1955 if (t != TOKEN_NONE) 1956 *pos = cp - buf; 1957 else if (deftype == ROFFDEF_UNDEF) { 1958 /* Using an undefined macro defines it to be empty. */ 1959 roff_setstrn(&r->strtab, mac, maclen, "", 0, 0); 1960 roff_setstrn(&r->rentab, mac, maclen, NULL, 0, 0); 1961 } 1962 return t; 1963 } 1964 1965 /* --- handling of request blocks ----------------------------------------- */ 1966 1967 static int 1968 roff_cblock(ROFF_ARGS) 1969 { 1970 1971 /* 1972 * A block-close `..' should only be invoked as a child of an 1973 * ignore macro, otherwise raise a warning and just ignore it. 1974 */ 1975 1976 if (r->last == NULL) { 1977 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 1978 return ROFF_IGN; 1979 } 1980 1981 switch (r->last->tok) { 1982 case ROFF_am: 1983 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ 1984 case ROFF_ami: 1985 case ROFF_de: 1986 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 1987 case ROFF_dei: 1988 case ROFF_ig: 1989 break; 1990 default: 1991 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, ".."); 1992 return ROFF_IGN; 1993 } 1994 1995 if (buf->buf[pos] != '\0') 1996 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 1997 ".. %s", buf->buf + pos); 1998 1999 roffnode_pop(r); 2000 roffnode_cleanscope(r); 2001 return ROFF_IGN; 2002 2003 } 2004 2005 static int 2006 roffnode_cleanscope(struct roff *r) 2007 { 2008 int inloop; 2009 2010 inloop = 0; 2011 while (r->last != NULL) { 2012 if (--r->last->endspan != 0) 2013 break; 2014 inloop += roffnode_pop(r); 2015 } 2016 return inloop; 2017 } 2018 2019 static int 2020 roff_ccond(struct roff *r, int ln, int ppos) 2021 { 2022 if (NULL == r->last) { 2023 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2024 return 0; 2025 } 2026 2027 switch (r->last->tok) { 2028 case ROFF_el: 2029 case ROFF_ie: 2030 case ROFF_if: 2031 case ROFF_while: 2032 break; 2033 default: 2034 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2035 return 0; 2036 } 2037 2038 if (r->last->endspan > -1) { 2039 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "\\}"); 2040 return 0; 2041 } 2042 2043 return roffnode_pop(r) + roffnode_cleanscope(r); 2044 } 2045 2046 static int 2047 roff_block(ROFF_ARGS) 2048 { 2049 const char *name, *value; 2050 char *call, *cp, *iname, *rname; 2051 size_t csz, namesz, rsz; 2052 int deftype; 2053 2054 /* Ignore groff compatibility mode for now. */ 2055 2056 if (tok == ROFF_de1) 2057 tok = ROFF_de; 2058 else if (tok == ROFF_dei1) 2059 tok = ROFF_dei; 2060 else if (tok == ROFF_am1) 2061 tok = ROFF_am; 2062 else if (tok == ROFF_ami1) 2063 tok = ROFF_ami; 2064 2065 /* Parse the macro name argument. */ 2066 2067 cp = buf->buf + pos; 2068 if (tok == ROFF_ig) { 2069 iname = NULL; 2070 namesz = 0; 2071 } else { 2072 iname = cp; 2073 namesz = roff_getname(r, &cp, ln, ppos); 2074 iname[namesz] = '\0'; 2075 } 2076 2077 /* Resolve the macro name argument if it is indirect. */ 2078 2079 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2080 deftype = ROFFDEF_USER; 2081 name = roff_getstrn(r, iname, namesz, &deftype); 2082 if (name == NULL) { 2083 mandoc_msg(MANDOCERR_STR_UNDEF, 2084 ln, (int)(iname - buf->buf), 2085 "%.*s", (int)namesz, iname); 2086 namesz = 0; 2087 } else 2088 namesz = strlen(name); 2089 } else 2090 name = iname; 2091 2092 if (namesz == 0 && tok != ROFF_ig) { 2093 mandoc_msg(MANDOCERR_REQ_EMPTY, 2094 ln, ppos, "%s", roff_name[tok]); 2095 return ROFF_IGN; 2096 } 2097 2098 roffnode_push(r, tok, name, ln, ppos); 2099 2100 /* 2101 * At the beginning of a `de' macro, clear the existing string 2102 * with the same name, if there is one. New content will be 2103 * appended from roff_block_text() in multiline mode. 2104 */ 2105 2106 if (tok == ROFF_de || tok == ROFF_dei) { 2107 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 2108 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2109 } else if (tok == ROFF_am || tok == ROFF_ami) { 2110 deftype = ROFFDEF_ANY; 2111 value = roff_getstrn(r, iname, namesz, &deftype); 2112 switch (deftype) { /* Before appending, ... */ 2113 case ROFFDEF_PRE: /* copy predefined to user-defined. */ 2114 roff_setstrn(&r->strtab, name, namesz, 2115 value, strlen(value), 0); 2116 break; 2117 case ROFFDEF_REN: /* call original standard macro. */ 2118 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2119 (int)strlen(value), value); 2120 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2121 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2122 free(call); 2123 break; 2124 case ROFFDEF_STD: /* rename and call standard macro. */ 2125 rsz = mandoc_asprintf(&rname, "__%s_renamed", name); 2126 roff_setstrn(&r->rentab, rname, rsz, name, namesz, 0); 2127 csz = mandoc_asprintf(&call, ".%.*s \\$* \\\"\n", 2128 (int)rsz, rname); 2129 roff_setstrn(&r->strtab, name, namesz, call, csz, 0); 2130 free(call); 2131 free(rname); 2132 break; 2133 default: 2134 break; 2135 } 2136 } 2137 2138 if (*cp == '\0') 2139 return ROFF_IGN; 2140 2141 /* Get the custom end marker. */ 2142 2143 iname = cp; 2144 namesz = roff_getname(r, &cp, ln, ppos); 2145 2146 /* Resolve the end marker if it is indirect. */ 2147 2148 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 2149 deftype = ROFFDEF_USER; 2150 name = roff_getstrn(r, iname, namesz, &deftype); 2151 if (name == NULL) { 2152 mandoc_msg(MANDOCERR_STR_UNDEF, 2153 ln, (int)(iname - buf->buf), 2154 "%.*s", (int)namesz, iname); 2155 namesz = 0; 2156 } else 2157 namesz = strlen(name); 2158 } else 2159 name = iname; 2160 2161 if (namesz) 2162 r->last->end = mandoc_strndup(name, namesz); 2163 2164 if (*cp != '\0') 2165 mandoc_msg(MANDOCERR_ARG_EXCESS, 2166 ln, pos, ".%s ... %s", roff_name[tok], cp); 2167 2168 return ROFF_IGN; 2169 } 2170 2171 static int 2172 roff_block_sub(ROFF_ARGS) 2173 { 2174 enum roff_tok t; 2175 int i, j; 2176 2177 /* 2178 * First check whether a custom macro exists at this level. If 2179 * it does, then check against it. This is some of groff's 2180 * stranger behaviours. If we encountered a custom end-scope 2181 * tag and that tag also happens to be a "real" macro, then we 2182 * need to try interpreting it again as a real macro. If it's 2183 * not, then return ignore. Else continue. 2184 */ 2185 2186 if (r->last->end) { 2187 for (i = pos, j = 0; r->last->end[j]; j++, i++) 2188 if (buf->buf[i] != r->last->end[j]) 2189 break; 2190 2191 if (r->last->end[j] == '\0' && 2192 (buf->buf[i] == '\0' || 2193 buf->buf[i] == ' ' || 2194 buf->buf[i] == '\t')) { 2195 roffnode_pop(r); 2196 roffnode_cleanscope(r); 2197 2198 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 2199 i++; 2200 2201 pos = i; 2202 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 2203 TOKEN_NONE) 2204 return ROFF_RERUN; 2205 return ROFF_IGN; 2206 } 2207 } 2208 2209 /* 2210 * If we have no custom end-query or lookup failed, then try 2211 * pulling it out of the hashtable. 2212 */ 2213 2214 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2215 2216 if (t != ROFF_cblock) { 2217 if (tok != ROFF_ig) 2218 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 2219 return ROFF_IGN; 2220 } 2221 2222 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 2223 } 2224 2225 static int 2226 roff_block_text(ROFF_ARGS) 2227 { 2228 2229 if (tok != ROFF_ig) 2230 roff_setstr(r, r->last->name, buf->buf + pos, 2); 2231 2232 return ROFF_IGN; 2233 } 2234 2235 static int 2236 roff_cond_sub(ROFF_ARGS) 2237 { 2238 char *ep; 2239 int endloop, irc, rr; 2240 enum roff_tok t; 2241 2242 irc = ROFF_IGN; 2243 rr = r->last->rule; 2244 endloop = tok != ROFF_while ? ROFF_IGN : 2245 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2246 if (roffnode_cleanscope(r)) 2247 irc |= endloop; 2248 2249 /* 2250 * If `\}' occurs on a macro line without a preceding macro, 2251 * drop the line completely. 2252 */ 2253 2254 ep = buf->buf + pos; 2255 if (ep[0] == '\\' && ep[1] == '}') 2256 rr = 0; 2257 2258 /* 2259 * The closing delimiter `\}' rewinds the conditional scope 2260 * but is otherwise ignored when interpreting the line. 2261 */ 2262 2263 while ((ep = strchr(ep, '\\')) != NULL) { 2264 switch (ep[1]) { 2265 case '}': 2266 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2267 if (roff_ccond(r, ln, ep - buf->buf)) 2268 irc |= endloop; 2269 break; 2270 case '\0': 2271 ++ep; 2272 break; 2273 default: 2274 ep += 2; 2275 break; 2276 } 2277 } 2278 2279 /* 2280 * Fully handle known macros when they are structurally 2281 * required or when the conditional evaluated to true. 2282 */ 2283 2284 t = roff_parse(r, buf->buf, &pos, ln, ppos); 2285 irc |= t != TOKEN_NONE && (rr || roffs[t].flags & ROFFMAC_STRUCT) ? 2286 (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs) : 2287 rr ? ROFF_CONT : ROFF_IGN; 2288 return irc; 2289 } 2290 2291 static int 2292 roff_cond_text(ROFF_ARGS) 2293 { 2294 char *ep; 2295 int endloop, irc, rr; 2296 2297 irc = ROFF_IGN; 2298 rr = r->last->rule; 2299 endloop = tok != ROFF_while ? ROFF_IGN : 2300 rr ? ROFF_LOOPCONT : ROFF_LOOPEXIT; 2301 if (roffnode_cleanscope(r)) 2302 irc |= endloop; 2303 2304 /* 2305 * If `\}' occurs on a text line with neither preceding 2306 * nor following characters, drop the line completely. 2307 */ 2308 2309 ep = buf->buf + pos; 2310 if (strcmp(ep, "\\}") == 0) 2311 rr = 0; 2312 2313 /* 2314 * The closing delimiter `\}' rewinds the conditional scope 2315 * but is otherwise ignored when interpreting the line. 2316 */ 2317 2318 while ((ep = strchr(ep, '\\')) != NULL) { 2319 switch (ep[1]) { 2320 case '}': 2321 memmove(ep, ep + 2, strlen(ep + 2) + 1); 2322 if (roff_ccond(r, ln, ep - buf->buf)) 2323 irc |= endloop; 2324 break; 2325 case '\0': 2326 ++ep; 2327 break; 2328 default: 2329 ep += 2; 2330 break; 2331 } 2332 } 2333 if (rr) 2334 irc |= ROFF_CONT; 2335 return irc; 2336 } 2337 2338 /* --- handling of numeric and conditional expressions -------------------- */ 2339 2340 /* 2341 * Parse a single signed integer number. Stop at the first non-digit. 2342 * If there is at least one digit, return success and advance the 2343 * parse point, else return failure and let the parse point unchanged. 2344 * Ignore overflows, treat them just like the C language. 2345 */ 2346 static int 2347 roff_getnum(const char *v, int *pos, int *res, int flags) 2348 { 2349 int myres, scaled, n, p; 2350 2351 if (NULL == res) 2352 res = &myres; 2353 2354 p = *pos; 2355 n = v[p] == '-'; 2356 if (n || v[p] == '+') 2357 p++; 2358 2359 if (flags & ROFFNUM_WHITE) 2360 while (isspace((unsigned char)v[p])) 2361 p++; 2362 2363 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2364 *res = 10 * *res + v[p] - '0'; 2365 if (p == *pos + n) 2366 return 0; 2367 2368 if (n) 2369 *res = -*res; 2370 2371 /* Each number may be followed by one optional scaling unit. */ 2372 2373 switch (v[p]) { 2374 case 'f': 2375 scaled = *res * 65536; 2376 break; 2377 case 'i': 2378 scaled = *res * 240; 2379 break; 2380 case 'c': 2381 scaled = *res * 240 / 2.54; 2382 break; 2383 case 'v': 2384 case 'P': 2385 scaled = *res * 40; 2386 break; 2387 case 'm': 2388 case 'n': 2389 scaled = *res * 24; 2390 break; 2391 case 'p': 2392 scaled = *res * 10 / 3; 2393 break; 2394 case 'u': 2395 scaled = *res; 2396 break; 2397 case 'M': 2398 scaled = *res * 6 / 25; 2399 break; 2400 default: 2401 scaled = *res; 2402 p--; 2403 break; 2404 } 2405 if (flags & ROFFNUM_SCALE) 2406 *res = scaled; 2407 2408 *pos = p + 1; 2409 return 1; 2410 } 2411 2412 /* 2413 * Evaluate a string comparison condition. 2414 * The first character is the delimiter. 2415 * Succeed if the string up to its second occurrence 2416 * matches the string up to its third occurence. 2417 * Advance the cursor after the third occurrence 2418 * or lacking that, to the end of the line. 2419 */ 2420 static int 2421 roff_evalstrcond(const char *v, int *pos) 2422 { 2423 const char *s1, *s2, *s3; 2424 int match; 2425 2426 match = 0; 2427 s1 = v + *pos; /* initial delimiter */ 2428 s2 = s1 + 1; /* for scanning the first string */ 2429 s3 = strchr(s2, *s1); /* for scanning the second string */ 2430 2431 if (NULL == s3) /* found no middle delimiter */ 2432 goto out; 2433 2434 while ('\0' != *++s3) { 2435 if (*s2 != *s3) { /* mismatch */ 2436 s3 = strchr(s3, *s1); 2437 break; 2438 } 2439 if (*s3 == *s1) { /* found the final delimiter */ 2440 match = 1; 2441 break; 2442 } 2443 s2++; 2444 } 2445 2446 out: 2447 if (NULL == s3) 2448 s3 = strchr(s2, '\0'); 2449 else if (*s3 != '\0') 2450 s3++; 2451 *pos = s3 - v; 2452 return match; 2453 } 2454 2455 /* 2456 * Evaluate an optionally negated single character, numerical, 2457 * or string condition. 2458 */ 2459 static int 2460 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2461 { 2462 const char *start, *end; 2463 char *cp, *name; 2464 size_t sz; 2465 int deftype, len, number, savepos, istrue, wanttrue; 2466 2467 if ('!' == v[*pos]) { 2468 wanttrue = 0; 2469 (*pos)++; 2470 } else 2471 wanttrue = 1; 2472 2473 switch (v[*pos]) { 2474 case '\0': 2475 return 0; 2476 case 'n': 2477 case 'o': 2478 (*pos)++; 2479 return wanttrue; 2480 case 'e': 2481 case 't': 2482 case 'v': 2483 (*pos)++; 2484 return !wanttrue; 2485 case 'c': 2486 do { 2487 (*pos)++; 2488 } while (v[*pos] == ' '); 2489 2490 /* 2491 * Quirk for groff compatibility: 2492 * The horizontal tab is neither available nor unavailable. 2493 */ 2494 2495 if (v[*pos] == '\t') { 2496 (*pos)++; 2497 return 0; 2498 } 2499 2500 /* Printable ASCII characters are available. */ 2501 2502 if (v[*pos] != '\\') { 2503 (*pos)++; 2504 return wanttrue; 2505 } 2506 2507 end = v + ++*pos; 2508 switch (mandoc_escape(&end, &start, &len)) { 2509 case ESCAPE_SPECIAL: 2510 istrue = mchars_spec2cp(start, len) != -1; 2511 break; 2512 case ESCAPE_UNICODE: 2513 istrue = 1; 2514 break; 2515 case ESCAPE_NUMBERED: 2516 istrue = mchars_num2char(start, len) != -1; 2517 break; 2518 default: 2519 istrue = !wanttrue; 2520 break; 2521 } 2522 *pos = end - v; 2523 return istrue == wanttrue; 2524 case 'd': 2525 case 'r': 2526 cp = v + *pos + 1; 2527 while (*cp == ' ') 2528 cp++; 2529 name = cp; 2530 sz = roff_getname(r, &cp, ln, cp - v); 2531 if (sz == 0) 2532 istrue = 0; 2533 else if (v[*pos] == 'r') 2534 istrue = roff_hasregn(r, name, sz); 2535 else { 2536 deftype = ROFFDEF_ANY; 2537 roff_getstrn(r, name, sz, &deftype); 2538 istrue = !!deftype; 2539 } 2540 *pos = (name + sz) - v; 2541 return istrue == wanttrue; 2542 default: 2543 break; 2544 } 2545 2546 savepos = *pos; 2547 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2548 return (number > 0) == wanttrue; 2549 else if (*pos == savepos) 2550 return roff_evalstrcond(v, pos) == wanttrue; 2551 else 2552 return 0; 2553 } 2554 2555 static int 2556 roff_line_ignore(ROFF_ARGS) 2557 { 2558 2559 return ROFF_IGN; 2560 } 2561 2562 static int 2563 roff_insec(ROFF_ARGS) 2564 { 2565 2566 mandoc_msg(MANDOCERR_REQ_INSEC, ln, ppos, "%s", roff_name[tok]); 2567 return ROFF_IGN; 2568 } 2569 2570 static int 2571 roff_unsupp(ROFF_ARGS) 2572 { 2573 2574 mandoc_msg(MANDOCERR_REQ_UNSUPP, ln, ppos, "%s", roff_name[tok]); 2575 return ROFF_IGN; 2576 } 2577 2578 static int 2579 roff_cond(ROFF_ARGS) 2580 { 2581 int irc; 2582 2583 roffnode_push(r, tok, NULL, ln, ppos); 2584 2585 /* 2586 * An `.el' has no conditional body: it will consume the value 2587 * of the current rstack entry set in prior `ie' calls or 2588 * defaults to DENY. 2589 * 2590 * If we're not an `el', however, then evaluate the conditional. 2591 */ 2592 2593 r->last->rule = tok == ROFF_el ? 2594 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2595 roff_evalcond(r, ln, buf->buf, &pos); 2596 2597 /* 2598 * An if-else will put the NEGATION of the current evaluated 2599 * conditional into the stack of rules. 2600 */ 2601 2602 if (tok == ROFF_ie) { 2603 if (r->rstackpos + 1 == r->rstacksz) { 2604 r->rstacksz += 16; 2605 r->rstack = mandoc_reallocarray(r->rstack, 2606 r->rstacksz, sizeof(int)); 2607 } 2608 r->rstack[++r->rstackpos] = !r->last->rule; 2609 } 2610 2611 /* If the parent has false as its rule, then so do we. */ 2612 2613 if (r->last->parent && !r->last->parent->rule) 2614 r->last->rule = 0; 2615 2616 /* 2617 * Determine scope. 2618 * If there is nothing on the line after the conditional, 2619 * not even whitespace, use next-line scope. 2620 * Except that .while does not support next-line scope. 2621 */ 2622 2623 if (buf->buf[pos] == '\0' && tok != ROFF_while) { 2624 r->last->endspan = 2; 2625 goto out; 2626 } 2627 2628 while (buf->buf[pos] == ' ') 2629 pos++; 2630 2631 /* An opening brace requests multiline scope. */ 2632 2633 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2634 r->last->endspan = -1; 2635 pos += 2; 2636 while (buf->buf[pos] == ' ') 2637 pos++; 2638 goto out; 2639 } 2640 2641 /* 2642 * Anything else following the conditional causes 2643 * single-line scope. Warn if the scope contains 2644 * nothing but trailing whitespace. 2645 */ 2646 2647 if (buf->buf[pos] == '\0') 2648 mandoc_msg(MANDOCERR_COND_EMPTY, 2649 ln, ppos, "%s", roff_name[tok]); 2650 2651 r->last->endspan = 1; 2652 2653 out: 2654 *offs = pos; 2655 irc = ROFF_RERUN; 2656 if (tok == ROFF_while) 2657 irc |= ROFF_WHILE; 2658 return irc; 2659 } 2660 2661 static int 2662 roff_ds(ROFF_ARGS) 2663 { 2664 char *string; 2665 const char *name; 2666 size_t namesz; 2667 2668 /* Ignore groff compatibility mode for now. */ 2669 2670 if (tok == ROFF_ds1) 2671 tok = ROFF_ds; 2672 else if (tok == ROFF_as1) 2673 tok = ROFF_as; 2674 2675 /* 2676 * The first word is the name of the string. 2677 * If it is empty or terminated by an escape sequence, 2678 * abort the `ds' request without defining anything. 2679 */ 2680 2681 name = string = buf->buf + pos; 2682 if (*name == '\0') 2683 return ROFF_IGN; 2684 2685 namesz = roff_getname(r, &string, ln, pos); 2686 switch (name[namesz]) { 2687 case '\\': 2688 return ROFF_IGN; 2689 case '\t': 2690 string = buf->buf + pos + namesz; 2691 break; 2692 default: 2693 break; 2694 } 2695 2696 /* Read past the initial double-quote, if any. */ 2697 if (*string == '"') 2698 string++; 2699 2700 /* The rest is the value. */ 2701 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2702 ROFF_as == tok); 2703 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 2704 return ROFF_IGN; 2705 } 2706 2707 /* 2708 * Parse a single operator, one or two characters long. 2709 * If the operator is recognized, return success and advance the 2710 * parse point, else return failure and let the parse point unchanged. 2711 */ 2712 static int 2713 roff_getop(const char *v, int *pos, char *res) 2714 { 2715 2716 *res = v[*pos]; 2717 2718 switch (*res) { 2719 case '+': 2720 case '-': 2721 case '*': 2722 case '/': 2723 case '%': 2724 case '&': 2725 case ':': 2726 break; 2727 case '<': 2728 switch (v[*pos + 1]) { 2729 case '=': 2730 *res = 'l'; 2731 (*pos)++; 2732 break; 2733 case '>': 2734 *res = '!'; 2735 (*pos)++; 2736 break; 2737 case '?': 2738 *res = 'i'; 2739 (*pos)++; 2740 break; 2741 default: 2742 break; 2743 } 2744 break; 2745 case '>': 2746 switch (v[*pos + 1]) { 2747 case '=': 2748 *res = 'g'; 2749 (*pos)++; 2750 break; 2751 case '?': 2752 *res = 'a'; 2753 (*pos)++; 2754 break; 2755 default: 2756 break; 2757 } 2758 break; 2759 case '=': 2760 if ('=' == v[*pos + 1]) 2761 (*pos)++; 2762 break; 2763 default: 2764 return 0; 2765 } 2766 (*pos)++; 2767 2768 return *res; 2769 } 2770 2771 /* 2772 * Evaluate either a parenthesized numeric expression 2773 * or a single signed integer number. 2774 */ 2775 static int 2776 roff_evalpar(struct roff *r, int ln, 2777 const char *v, int *pos, int *res, int flags) 2778 { 2779 2780 if ('(' != v[*pos]) 2781 return roff_getnum(v, pos, res, flags); 2782 2783 (*pos)++; 2784 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2785 return 0; 2786 2787 /* 2788 * Omission of the closing parenthesis 2789 * is an error in validation mode, 2790 * but ignored in evaluation mode. 2791 */ 2792 2793 if (')' == v[*pos]) 2794 (*pos)++; 2795 else if (NULL == res) 2796 return 0; 2797 2798 return 1; 2799 } 2800 2801 /* 2802 * Evaluate a complete numeric expression. 2803 * Proceed left to right, there is no concept of precedence. 2804 */ 2805 static int 2806 roff_evalnum(struct roff *r, int ln, const char *v, 2807 int *pos, int *res, int flags) 2808 { 2809 int mypos, operand2; 2810 char operator; 2811 2812 if (NULL == pos) { 2813 mypos = 0; 2814 pos = &mypos; 2815 } 2816 2817 if (flags & ROFFNUM_WHITE) 2818 while (isspace((unsigned char)v[*pos])) 2819 (*pos)++; 2820 2821 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2822 return 0; 2823 2824 while (1) { 2825 if (flags & ROFFNUM_WHITE) 2826 while (isspace((unsigned char)v[*pos])) 2827 (*pos)++; 2828 2829 if ( ! roff_getop(v, pos, &operator)) 2830 break; 2831 2832 if (flags & ROFFNUM_WHITE) 2833 while (isspace((unsigned char)v[*pos])) 2834 (*pos)++; 2835 2836 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2837 return 0; 2838 2839 if (flags & ROFFNUM_WHITE) 2840 while (isspace((unsigned char)v[*pos])) 2841 (*pos)++; 2842 2843 if (NULL == res) 2844 continue; 2845 2846 switch (operator) { 2847 case '+': 2848 *res += operand2; 2849 break; 2850 case '-': 2851 *res -= operand2; 2852 break; 2853 case '*': 2854 *res *= operand2; 2855 break; 2856 case '/': 2857 if (operand2 == 0) { 2858 mandoc_msg(MANDOCERR_DIVZERO, 2859 ln, *pos, "%s", v); 2860 *res = 0; 2861 break; 2862 } 2863 *res /= operand2; 2864 break; 2865 case '%': 2866 if (operand2 == 0) { 2867 mandoc_msg(MANDOCERR_DIVZERO, 2868 ln, *pos, "%s", v); 2869 *res = 0; 2870 break; 2871 } 2872 *res %= operand2; 2873 break; 2874 case '<': 2875 *res = *res < operand2; 2876 break; 2877 case '>': 2878 *res = *res > operand2; 2879 break; 2880 case 'l': 2881 *res = *res <= operand2; 2882 break; 2883 case 'g': 2884 *res = *res >= operand2; 2885 break; 2886 case '=': 2887 *res = *res == operand2; 2888 break; 2889 case '!': 2890 *res = *res != operand2; 2891 break; 2892 case '&': 2893 *res = *res && operand2; 2894 break; 2895 case ':': 2896 *res = *res || operand2; 2897 break; 2898 case 'i': 2899 if (operand2 < *res) 2900 *res = operand2; 2901 break; 2902 case 'a': 2903 if (operand2 > *res) 2904 *res = operand2; 2905 break; 2906 default: 2907 abort(); 2908 } 2909 } 2910 return 1; 2911 } 2912 2913 /* --- register management ------------------------------------------------ */ 2914 2915 void 2916 roff_setreg(struct roff *r, const char *name, int val, char sign) 2917 { 2918 roff_setregn(r, name, strlen(name), val, sign, INT_MIN); 2919 } 2920 2921 static void 2922 roff_setregn(struct roff *r, const char *name, size_t len, 2923 int val, char sign, int step) 2924 { 2925 struct roffreg *reg; 2926 2927 /* Search for an existing register with the same name. */ 2928 reg = r->regtab; 2929 2930 while (reg != NULL && (reg->key.sz != len || 2931 strncmp(reg->key.p, name, len) != 0)) 2932 reg = reg->next; 2933 2934 if (NULL == reg) { 2935 /* Create a new register. */ 2936 reg = mandoc_malloc(sizeof(struct roffreg)); 2937 reg->key.p = mandoc_strndup(name, len); 2938 reg->key.sz = len; 2939 reg->val = 0; 2940 reg->step = 0; 2941 reg->next = r->regtab; 2942 r->regtab = reg; 2943 } 2944 2945 if ('+' == sign) 2946 reg->val += val; 2947 else if ('-' == sign) 2948 reg->val -= val; 2949 else 2950 reg->val = val; 2951 if (step != INT_MIN) 2952 reg->step = step; 2953 } 2954 2955 /* 2956 * Handle some predefined read-only number registers. 2957 * For now, return -1 if the requested register is not predefined; 2958 * in case a predefined read-only register having the value -1 2959 * were to turn up, another special value would have to be chosen. 2960 */ 2961 static int 2962 roff_getregro(const struct roff *r, const char *name) 2963 { 2964 2965 switch (*name) { 2966 case '$': /* Number of arguments of the last macro evaluated. */ 2967 return r->mstackpos < 0 ? 0 : r->mstack[r->mstackpos].argc; 2968 case 'A': /* ASCII approximation mode is always off. */ 2969 return 0; 2970 case 'g': /* Groff compatibility mode is always on. */ 2971 return 1; 2972 case 'H': /* Fixed horizontal resolution. */ 2973 return 24; 2974 case 'j': /* Always adjust left margin only. */ 2975 return 0; 2976 case 'T': /* Some output device is always defined. */ 2977 return 1; 2978 case 'V': /* Fixed vertical resolution. */ 2979 return 40; 2980 default: 2981 return -1; 2982 } 2983 } 2984 2985 int 2986 roff_getreg(struct roff *r, const char *name) 2987 { 2988 return roff_getregn(r, name, strlen(name), '\0'); 2989 } 2990 2991 static int 2992 roff_getregn(struct roff *r, const char *name, size_t len, char sign) 2993 { 2994 struct roffreg *reg; 2995 int val; 2996 2997 if ('.' == name[0] && 2 == len) { 2998 val = roff_getregro(r, name + 1); 2999 if (-1 != val) 3000 return val; 3001 } 3002 3003 for (reg = r->regtab; reg; reg = reg->next) { 3004 if (len == reg->key.sz && 3005 0 == strncmp(name, reg->key.p, len)) { 3006 switch (sign) { 3007 case '+': 3008 reg->val += reg->step; 3009 break; 3010 case '-': 3011 reg->val -= reg->step; 3012 break; 3013 default: 3014 break; 3015 } 3016 return reg->val; 3017 } 3018 } 3019 3020 roff_setregn(r, name, len, 0, '\0', INT_MIN); 3021 return 0; 3022 } 3023 3024 static int 3025 roff_hasregn(const struct roff *r, const char *name, size_t len) 3026 { 3027 struct roffreg *reg; 3028 int val; 3029 3030 if ('.' == name[0] && 2 == len) { 3031 val = roff_getregro(r, name + 1); 3032 if (-1 != val) 3033 return 1; 3034 } 3035 3036 for (reg = r->regtab; reg; reg = reg->next) 3037 if (len == reg->key.sz && 3038 0 == strncmp(name, reg->key.p, len)) 3039 return 1; 3040 3041 return 0; 3042 } 3043 3044 static void 3045 roff_freereg(struct roffreg *reg) 3046 { 3047 struct roffreg *old_reg; 3048 3049 while (NULL != reg) { 3050 free(reg->key.p); 3051 old_reg = reg; 3052 reg = reg->next; 3053 free(old_reg); 3054 } 3055 } 3056 3057 static int 3058 roff_nr(ROFF_ARGS) 3059 { 3060 char *key, *val, *step; 3061 size_t keysz; 3062 int iv, is, len; 3063 char sign; 3064 3065 key = val = buf->buf + pos; 3066 if (*key == '\0') 3067 return ROFF_IGN; 3068 3069 keysz = roff_getname(r, &val, ln, pos); 3070 if (key[keysz] == '\\' || key[keysz] == '\t') 3071 return ROFF_IGN; 3072 3073 sign = *val; 3074 if (sign == '+' || sign == '-') 3075 val++; 3076 3077 len = 0; 3078 if (roff_evalnum(r, ln, val, &len, &iv, ROFFNUM_SCALE) == 0) 3079 return ROFF_IGN; 3080 3081 step = val + len; 3082 while (isspace((unsigned char)*step)) 3083 step++; 3084 if (roff_evalnum(r, ln, step, NULL, &is, 0) == 0) 3085 is = INT_MIN; 3086 3087 roff_setregn(r, key, keysz, iv, sign, is); 3088 return ROFF_IGN; 3089 } 3090 3091 static int 3092 roff_rr(ROFF_ARGS) 3093 { 3094 struct roffreg *reg, **prev; 3095 char *name, *cp; 3096 size_t namesz; 3097 3098 name = cp = buf->buf + pos; 3099 if (*name == '\0') 3100 return ROFF_IGN; 3101 namesz = roff_getname(r, &cp, ln, pos); 3102 name[namesz] = '\0'; 3103 3104 prev = &r->regtab; 3105 while (1) { 3106 reg = *prev; 3107 if (reg == NULL || !strcmp(name, reg->key.p)) 3108 break; 3109 prev = ®->next; 3110 } 3111 if (reg != NULL) { 3112 *prev = reg->next; 3113 free(reg->key.p); 3114 free(reg); 3115 } 3116 return ROFF_IGN; 3117 } 3118 3119 /* --- handler functions for roff requests -------------------------------- */ 3120 3121 static int 3122 roff_rm(ROFF_ARGS) 3123 { 3124 const char *name; 3125 char *cp; 3126 size_t namesz; 3127 3128 cp = buf->buf + pos; 3129 while (*cp != '\0') { 3130 name = cp; 3131 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 3132 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 3133 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3134 if (name[namesz] == '\\' || name[namesz] == '\t') 3135 break; 3136 } 3137 return ROFF_IGN; 3138 } 3139 3140 static int 3141 roff_it(ROFF_ARGS) 3142 { 3143 int iv; 3144 3145 /* Parse the number of lines. */ 3146 3147 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 3148 mandoc_msg(MANDOCERR_IT_NONUM, 3149 ln, ppos, "%s", buf->buf + 1); 3150 return ROFF_IGN; 3151 } 3152 3153 while (isspace((unsigned char)buf->buf[pos])) 3154 pos++; 3155 3156 /* 3157 * Arm the input line trap. 3158 * Special-casing "an-trap" is an ugly workaround to cope 3159 * with DocBook stupidly fiddling with man(7) internals. 3160 */ 3161 3162 roffit_lines = iv; 3163 roffit_macro = mandoc_strdup(iv != 1 || 3164 strcmp(buf->buf + pos, "an-trap") ? 3165 buf->buf + pos : "br"); 3166 return ROFF_IGN; 3167 } 3168 3169 static int 3170 roff_Dd(ROFF_ARGS) 3171 { 3172 int mask; 3173 enum roff_tok t, te; 3174 3175 switch (tok) { 3176 case ROFF_Dd: 3177 tok = MDOC_Dd; 3178 te = MDOC_MAX; 3179 if (r->format == 0) 3180 r->format = MPARSE_MDOC; 3181 mask = MPARSE_MDOC | MPARSE_QUICK; 3182 break; 3183 case ROFF_TH: 3184 tok = MAN_TH; 3185 te = MAN_MAX; 3186 if (r->format == 0) 3187 r->format = MPARSE_MAN; 3188 mask = MPARSE_QUICK; 3189 break; 3190 default: 3191 abort(); 3192 } 3193 if ((r->options & mask) == 0) 3194 for (t = tok; t < te; t++) 3195 roff_setstr(r, roff_name[t], NULL, 0); 3196 return ROFF_CONT; 3197 } 3198 3199 static int 3200 roff_TE(ROFF_ARGS) 3201 { 3202 r->man->flags &= ~ROFF_NONOFILL; 3203 if (r->tbl == NULL) { 3204 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "TE"); 3205 return ROFF_IGN; 3206 } 3207 if (tbl_end(r->tbl, 0) == 0) { 3208 r->tbl = NULL; 3209 free(buf->buf); 3210 buf->buf = mandoc_strdup(".sp"); 3211 buf->sz = 4; 3212 *offs = 0; 3213 return ROFF_REPARSE; 3214 } 3215 r->tbl = NULL; 3216 return ROFF_IGN; 3217 } 3218 3219 static int 3220 roff_T_(ROFF_ARGS) 3221 { 3222 3223 if (NULL == r->tbl) 3224 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "T&"); 3225 else 3226 tbl_restart(ln, ppos, r->tbl); 3227 3228 return ROFF_IGN; 3229 } 3230 3231 /* 3232 * Handle in-line equation delimiters. 3233 */ 3234 static int 3235 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 3236 { 3237 char *cp1, *cp2; 3238 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 3239 3240 /* 3241 * Outside equations, look for an opening delimiter. 3242 * If we are inside an equation, we already know it is 3243 * in-line, or this function wouldn't have been called; 3244 * so look for a closing delimiter. 3245 */ 3246 3247 cp1 = buf->buf + pos; 3248 cp2 = strchr(cp1, r->eqn == NULL ? 3249 r->last_eqn->odelim : r->last_eqn->cdelim); 3250 if (cp2 == NULL) 3251 return ROFF_CONT; 3252 3253 *cp2++ = '\0'; 3254 bef_pr = bef_nl = aft_nl = aft_pr = ""; 3255 3256 /* Handle preceding text, protecting whitespace. */ 3257 3258 if (*buf->buf != '\0') { 3259 if (r->eqn == NULL) 3260 bef_pr = "\\&"; 3261 bef_nl = "\n"; 3262 } 3263 3264 /* 3265 * Prepare replacing the delimiter with an equation macro 3266 * and drop leading white space from the equation. 3267 */ 3268 3269 if (r->eqn == NULL) { 3270 while (*cp2 == ' ') 3271 cp2++; 3272 mac = ".EQ"; 3273 } else 3274 mac = ".EN"; 3275 3276 /* Handle following text, protecting whitespace. */ 3277 3278 if (*cp2 != '\0') { 3279 aft_nl = "\n"; 3280 if (r->eqn != NULL) 3281 aft_pr = "\\&"; 3282 } 3283 3284 /* Do the actual replacement. */ 3285 3286 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 3287 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 3288 free(buf->buf); 3289 buf->buf = cp1; 3290 3291 /* Toggle the in-line state of the eqn subsystem. */ 3292 3293 r->eqn_inline = r->eqn == NULL; 3294 return ROFF_REPARSE; 3295 } 3296 3297 static int 3298 roff_EQ(ROFF_ARGS) 3299 { 3300 struct roff_node *n; 3301 3302 if (r->man->meta.macroset == MACROSET_MAN) 3303 man_breakscope(r->man, ROFF_EQ); 3304 n = roff_node_alloc(r->man, ln, ppos, ROFFT_EQN, TOKEN_NONE); 3305 if (ln > r->man->last->line) 3306 n->flags |= NODE_LINE; 3307 n->eqn = eqn_box_new(); 3308 roff_node_append(r->man, n); 3309 r->man->next = ROFF_NEXT_SIBLING; 3310 3311 assert(r->eqn == NULL); 3312 if (r->last_eqn == NULL) 3313 r->last_eqn = eqn_alloc(); 3314 else 3315 eqn_reset(r->last_eqn); 3316 r->eqn = r->last_eqn; 3317 r->eqn->node = n; 3318 3319 if (buf->buf[pos] != '\0') 3320 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3321 ".EQ %s", buf->buf + pos); 3322 3323 return ROFF_IGN; 3324 } 3325 3326 static int 3327 roff_EN(ROFF_ARGS) 3328 { 3329 if (r->eqn != NULL) { 3330 eqn_parse(r->eqn); 3331 r->eqn = NULL; 3332 } else 3333 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ln, ppos, "EN"); 3334 if (buf->buf[pos] != '\0') 3335 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3336 "EN %s", buf->buf + pos); 3337 return ROFF_IGN; 3338 } 3339 3340 static int 3341 roff_TS(ROFF_ARGS) 3342 { 3343 if (r->tbl != NULL) { 3344 mandoc_msg(MANDOCERR_BLK_BROKEN, ln, ppos, "TS breaks TS"); 3345 tbl_end(r->tbl, 0); 3346 } 3347 r->man->flags |= ROFF_NONOFILL; 3348 r->tbl = tbl_alloc(ppos, ln, r->last_tbl); 3349 if (r->last_tbl == NULL) 3350 r->first_tbl = r->tbl; 3351 r->last_tbl = r->tbl; 3352 return ROFF_IGN; 3353 } 3354 3355 static int 3356 roff_noarg(ROFF_ARGS) 3357 { 3358 if (r->man->flags & (MAN_BLINE | MAN_ELINE)) 3359 man_breakscope(r->man, tok); 3360 if (tok == ROFF_brp) 3361 tok = ROFF_br; 3362 roff_elem_alloc(r->man, ln, ppos, tok); 3363 if (buf->buf[pos] != '\0') 3364 mandoc_msg(MANDOCERR_ARG_SKIP, ln, pos, 3365 "%s %s", roff_name[tok], buf->buf + pos); 3366 if (tok == ROFF_nf) 3367 r->man->flags |= ROFF_NOFILL; 3368 else if (tok == ROFF_fi) 3369 r->man->flags &= ~ROFF_NOFILL; 3370 r->man->last->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3371 r->man->next = ROFF_NEXT_SIBLING; 3372 return ROFF_IGN; 3373 } 3374 3375 static int 3376 roff_onearg(ROFF_ARGS) 3377 { 3378 struct roff_node *n; 3379 char *cp; 3380 int npos; 3381 3382 if (r->man->flags & (MAN_BLINE | MAN_ELINE) && 3383 (tok == ROFF_ce || tok == ROFF_rj || tok == ROFF_sp || 3384 tok == ROFF_ti)) 3385 man_breakscope(r->man, tok); 3386 3387 if (roffce_node != NULL && (tok == ROFF_ce || tok == ROFF_rj)) { 3388 r->man->last = roffce_node; 3389 r->man->next = ROFF_NEXT_SIBLING; 3390 } 3391 3392 roff_elem_alloc(r->man, ln, ppos, tok); 3393 n = r->man->last; 3394 3395 cp = buf->buf + pos; 3396 if (*cp != '\0') { 3397 while (*cp != '\0' && *cp != ' ') 3398 cp++; 3399 while (*cp == ' ') 3400 *cp++ = '\0'; 3401 if (*cp != '\0') 3402 mandoc_msg(MANDOCERR_ARG_EXCESS, 3403 ln, (int)(cp - buf->buf), 3404 "%s ... %s", roff_name[tok], cp); 3405 roff_word_alloc(r->man, ln, pos, buf->buf + pos); 3406 } 3407 3408 if (tok == ROFF_ce || tok == ROFF_rj) { 3409 if (r->man->last->type == ROFFT_ELEM) { 3410 roff_word_alloc(r->man, ln, pos, "1"); 3411 r->man->last->flags |= NODE_NOSRC; 3412 } 3413 npos = 0; 3414 if (roff_evalnum(r, ln, r->man->last->string, &npos, 3415 &roffce_lines, 0) == 0) { 3416 mandoc_msg(MANDOCERR_CE_NONUM, 3417 ln, pos, "ce %s", buf->buf + pos); 3418 roffce_lines = 1; 3419 } 3420 if (roffce_lines < 1) { 3421 r->man->last = r->man->last->parent; 3422 roffce_node = NULL; 3423 roffce_lines = 0; 3424 } else 3425 roffce_node = r->man->last->parent; 3426 } else { 3427 n->flags |= NODE_VALID | NODE_ENDED; 3428 r->man->last = n; 3429 } 3430 n->flags |= NODE_LINE; 3431 r->man->next = ROFF_NEXT_SIBLING; 3432 return ROFF_IGN; 3433 } 3434 3435 static int 3436 roff_manyarg(ROFF_ARGS) 3437 { 3438 struct roff_node *n; 3439 char *sp, *ep; 3440 3441 roff_elem_alloc(r->man, ln, ppos, tok); 3442 n = r->man->last; 3443 3444 for (sp = ep = buf->buf + pos; *sp != '\0'; sp = ep) { 3445 while (*ep != '\0' && *ep != ' ') 3446 ep++; 3447 while (*ep == ' ') 3448 *ep++ = '\0'; 3449 roff_word_alloc(r->man, ln, sp - buf->buf, sp); 3450 } 3451 3452 n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED; 3453 r->man->last = n; 3454 r->man->next = ROFF_NEXT_SIBLING; 3455 return ROFF_IGN; 3456 } 3457 3458 static int 3459 roff_als(ROFF_ARGS) 3460 { 3461 char *oldn, *newn, *end, *value; 3462 size_t oldsz, newsz, valsz; 3463 3464 newn = oldn = buf->buf + pos; 3465 if (*newn == '\0') 3466 return ROFF_IGN; 3467 3468 newsz = roff_getname(r, &oldn, ln, pos); 3469 if (newn[newsz] == '\\' || newn[newsz] == '\t' || *oldn == '\0') 3470 return ROFF_IGN; 3471 3472 end = oldn; 3473 oldsz = roff_getname(r, &end, ln, oldn - buf->buf); 3474 if (oldsz == 0) 3475 return ROFF_IGN; 3476 3477 valsz = mandoc_asprintf(&value, ".%.*s \\$@\\\"\n", 3478 (int)oldsz, oldn); 3479 roff_setstrn(&r->strtab, newn, newsz, value, valsz, 0); 3480 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3481 free(value); 3482 return ROFF_IGN; 3483 } 3484 3485 static int 3486 roff_cc(ROFF_ARGS) 3487 { 3488 const char *p; 3489 3490 p = buf->buf + pos; 3491 3492 if (*p == '\0' || (r->control = *p++) == '.') 3493 r->control = '\0'; 3494 3495 if (*p != '\0') 3496 mandoc_msg(MANDOCERR_ARG_EXCESS, 3497 ln, p - buf->buf, "cc ... %s", p); 3498 3499 return ROFF_IGN; 3500 } 3501 3502 static int 3503 roff_char(ROFF_ARGS) 3504 { 3505 const char *p, *kp, *vp; 3506 size_t ksz, vsz; 3507 int font; 3508 3509 /* Parse the character to be replaced. */ 3510 3511 kp = buf->buf + pos; 3512 p = kp + 1; 3513 if (*kp == '\0' || (*kp == '\\' && 3514 mandoc_escape(&p, NULL, NULL) != ESCAPE_SPECIAL) || 3515 (*p != ' ' && *p != '\0')) { 3516 mandoc_msg(MANDOCERR_CHAR_ARG, ln, pos, "char %s", kp); 3517 return ROFF_IGN; 3518 } 3519 ksz = p - kp; 3520 while (*p == ' ') 3521 p++; 3522 3523 /* 3524 * If the replacement string contains a font escape sequence, 3525 * we have to restore the font at the end. 3526 */ 3527 3528 vp = p; 3529 vsz = strlen(p); 3530 font = 0; 3531 while (*p != '\0') { 3532 if (*p++ != '\\') 3533 continue; 3534 switch (mandoc_escape(&p, NULL, NULL)) { 3535 case ESCAPE_FONT: 3536 case ESCAPE_FONTROMAN: 3537 case ESCAPE_FONTITALIC: 3538 case ESCAPE_FONTBOLD: 3539 case ESCAPE_FONTBI: 3540 case ESCAPE_FONTCW: 3541 case ESCAPE_FONTPREV: 3542 font++; 3543 break; 3544 default: 3545 break; 3546 } 3547 } 3548 if (font > 1) 3549 mandoc_msg(MANDOCERR_CHAR_FONT, 3550 ln, (int)(vp - buf->buf), "%s", vp); 3551 3552 /* 3553 * Approximate the effect of .char using the .tr tables. 3554 * XXX In groff, .char and .tr interact differently. 3555 */ 3556 3557 if (ksz == 1) { 3558 if (r->xtab == NULL) 3559 r->xtab = mandoc_calloc(128, sizeof(*r->xtab)); 3560 assert((unsigned int)*kp < 128); 3561 free(r->xtab[(int)*kp].p); 3562 r->xtab[(int)*kp].sz = mandoc_asprintf(&r->xtab[(int)*kp].p, 3563 "%s%s", vp, font ? "\fP" : ""); 3564 } else { 3565 roff_setstrn(&r->xmbtab, kp, ksz, vp, vsz, 0); 3566 if (font) 3567 roff_setstrn(&r->xmbtab, kp, ksz, "\\fP", 3, 1); 3568 } 3569 return ROFF_IGN; 3570 } 3571 3572 static int 3573 roff_ec(ROFF_ARGS) 3574 { 3575 const char *p; 3576 3577 p = buf->buf + pos; 3578 if (*p == '\0') 3579 r->escape = '\\'; 3580 else { 3581 r->escape = *p; 3582 if (*++p != '\0') 3583 mandoc_msg(MANDOCERR_ARG_EXCESS, ln, 3584 (int)(p - buf->buf), "ec ... %s", p); 3585 } 3586 return ROFF_IGN; 3587 } 3588 3589 static int 3590 roff_eo(ROFF_ARGS) 3591 { 3592 r->escape = '\0'; 3593 if (buf->buf[pos] != '\0') 3594 mandoc_msg(MANDOCERR_ARG_SKIP, 3595 ln, pos, "eo %s", buf->buf + pos); 3596 return ROFF_IGN; 3597 } 3598 3599 static int 3600 roff_nop(ROFF_ARGS) 3601 { 3602 while (buf->buf[pos] == ' ') 3603 pos++; 3604 *offs = pos; 3605 return ROFF_RERUN; 3606 } 3607 3608 static int 3609 roff_tr(ROFF_ARGS) 3610 { 3611 const char *p, *first, *second; 3612 size_t fsz, ssz; 3613 enum mandoc_esc esc; 3614 3615 p = buf->buf + pos; 3616 3617 if (*p == '\0') { 3618 mandoc_msg(MANDOCERR_REQ_EMPTY, ln, ppos, "tr"); 3619 return ROFF_IGN; 3620 } 3621 3622 while (*p != '\0') { 3623 fsz = ssz = 1; 3624 3625 first = p++; 3626 if (*first == '\\') { 3627 esc = mandoc_escape(&p, NULL, NULL); 3628 if (esc == ESCAPE_ERROR) { 3629 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3630 (int)(p - buf->buf), "%s", first); 3631 return ROFF_IGN; 3632 } 3633 fsz = (size_t)(p - first); 3634 } 3635 3636 second = p++; 3637 if (*second == '\\') { 3638 esc = mandoc_escape(&p, NULL, NULL); 3639 if (esc == ESCAPE_ERROR) { 3640 mandoc_msg(MANDOCERR_ESC_BAD, ln, 3641 (int)(p - buf->buf), "%s", second); 3642 return ROFF_IGN; 3643 } 3644 ssz = (size_t)(p - second); 3645 } else if (*second == '\0') { 3646 mandoc_msg(MANDOCERR_TR_ODD, ln, 3647 (int)(first - buf->buf), "tr %s", first); 3648 second = " "; 3649 p--; 3650 } 3651 3652 if (fsz > 1) { 3653 roff_setstrn(&r->xmbtab, first, fsz, 3654 second, ssz, 0); 3655 continue; 3656 } 3657 3658 if (r->xtab == NULL) 3659 r->xtab = mandoc_calloc(128, 3660 sizeof(struct roffstr)); 3661 3662 free(r->xtab[(int)*first].p); 3663 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 3664 r->xtab[(int)*first].sz = ssz; 3665 } 3666 3667 return ROFF_IGN; 3668 } 3669 3670 /* 3671 * Implementation of the .return request. 3672 * There is no need to call roff_userret() from here. 3673 * The read module will call that after rewinding the reader stack 3674 * to the place from where the current macro was called. 3675 */ 3676 static int 3677 roff_return(ROFF_ARGS) 3678 { 3679 if (r->mstackpos >= 0) 3680 return ROFF_IGN | ROFF_USERRET; 3681 3682 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "return"); 3683 return ROFF_IGN; 3684 } 3685 3686 static int 3687 roff_rn(ROFF_ARGS) 3688 { 3689 const char *value; 3690 char *oldn, *newn, *end; 3691 size_t oldsz, newsz; 3692 int deftype; 3693 3694 oldn = newn = buf->buf + pos; 3695 if (*oldn == '\0') 3696 return ROFF_IGN; 3697 3698 oldsz = roff_getname(r, &newn, ln, pos); 3699 if (oldn[oldsz] == '\\' || oldn[oldsz] == '\t' || *newn == '\0') 3700 return ROFF_IGN; 3701 3702 end = newn; 3703 newsz = roff_getname(r, &end, ln, newn - buf->buf); 3704 if (newsz == 0) 3705 return ROFF_IGN; 3706 3707 deftype = ROFFDEF_ANY; 3708 value = roff_getstrn(r, oldn, oldsz, &deftype); 3709 switch (deftype) { 3710 case ROFFDEF_USER: 3711 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3712 roff_setstrn(&r->strtab, oldn, oldsz, NULL, 0, 0); 3713 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3714 break; 3715 case ROFFDEF_PRE: 3716 roff_setstrn(&r->strtab, newn, newsz, value, strlen(value), 0); 3717 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3718 break; 3719 case ROFFDEF_REN: 3720 roff_setstrn(&r->rentab, newn, newsz, value, strlen(value), 0); 3721 roff_setstrn(&r->rentab, oldn, oldsz, NULL, 0, 0); 3722 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3723 break; 3724 case ROFFDEF_STD: 3725 roff_setstrn(&r->rentab, newn, newsz, oldn, oldsz, 0); 3726 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3727 break; 3728 default: 3729 roff_setstrn(&r->strtab, newn, newsz, NULL, 0, 0); 3730 roff_setstrn(&r->rentab, newn, newsz, NULL, 0, 0); 3731 break; 3732 } 3733 return ROFF_IGN; 3734 } 3735 3736 static int 3737 roff_shift(ROFF_ARGS) 3738 { 3739 struct mctx *ctx; 3740 int levels, i; 3741 3742 levels = 1; 3743 if (buf->buf[pos] != '\0' && 3744 roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) { 3745 mandoc_msg(MANDOCERR_CE_NONUM, 3746 ln, pos, "shift %s", buf->buf + pos); 3747 levels = 1; 3748 } 3749 if (r->mstackpos < 0) { 3750 mandoc_msg(MANDOCERR_REQ_NOMAC, ln, ppos, "shift"); 3751 return ROFF_IGN; 3752 } 3753 ctx = r->mstack + r->mstackpos; 3754 if (levels > ctx->argc) { 3755 mandoc_msg(MANDOCERR_SHIFT, 3756 ln, pos, "%d, but max is %d", levels, ctx->argc); 3757 levels = ctx->argc; 3758 } 3759 if (levels == 0) 3760 return ROFF_IGN; 3761 for (i = 0; i < levels; i++) 3762 free(ctx->argv[i]); 3763 ctx->argc -= levels; 3764 for (i = 0; i < ctx->argc; i++) 3765 ctx->argv[i] = ctx->argv[i + levels]; 3766 return ROFF_IGN; 3767 } 3768 3769 static int 3770 roff_so(ROFF_ARGS) 3771 { 3772 char *name, *cp; 3773 3774 name = buf->buf + pos; 3775 mandoc_msg(MANDOCERR_SO, ln, ppos, "so %s", name); 3776 3777 /* 3778 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3779 * opening anything that's not in our cwd or anything beneath 3780 * it. Thus, explicitly disallow traversing up the file-system 3781 * or using absolute paths. 3782 */ 3783 3784 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3785 mandoc_msg(MANDOCERR_SO_PATH, ln, ppos, ".so %s", name); 3786 buf->sz = mandoc_asprintf(&cp, 3787 ".sp\nSee the file %s.\n.sp", name) + 1; 3788 free(buf->buf); 3789 buf->buf = cp; 3790 *offs = 0; 3791 return ROFF_REPARSE; 3792 } 3793 3794 *offs = pos; 3795 return ROFF_SO; 3796 } 3797 3798 /* --- user defined strings and macros ------------------------------------ */ 3799 3800 static int 3801 roff_userdef(ROFF_ARGS) 3802 { 3803 struct mctx *ctx; 3804 char *arg, *ap, *dst, *src; 3805 size_t sz; 3806 3807 /* Initialize a new macro stack context. */ 3808 3809 if (++r->mstackpos == r->mstacksz) { 3810 r->mstack = mandoc_recallocarray(r->mstack, 3811 r->mstacksz, r->mstacksz + 8, sizeof(*r->mstack)); 3812 r->mstacksz += 8; 3813 } 3814 ctx = r->mstack + r->mstackpos; 3815 ctx->argsz = 0; 3816 ctx->argc = 0; 3817 ctx->argv = NULL; 3818 3819 /* 3820 * Collect pointers to macro argument strings, 3821 * NUL-terminating them and escaping quotes. 3822 */ 3823 3824 src = buf->buf + pos; 3825 while (*src != '\0') { 3826 if (ctx->argc == ctx->argsz) { 3827 ctx->argsz += 8; 3828 ctx->argv = mandoc_reallocarray(ctx->argv, 3829 ctx->argsz, sizeof(*ctx->argv)); 3830 } 3831 arg = roff_getarg(r, &src, ln, &pos); 3832 sz = 1; /* For the terminating NUL. */ 3833 for (ap = arg; *ap != '\0'; ap++) 3834 sz += *ap == '"' ? 4 : 1; 3835 ctx->argv[ctx->argc++] = dst = mandoc_malloc(sz); 3836 for (ap = arg; *ap != '\0'; ap++) { 3837 if (*ap == '"') { 3838 memcpy(dst, "\\(dq", 4); 3839 dst += 4; 3840 } else 3841 *dst++ = *ap; 3842 } 3843 *dst = '\0'; 3844 free(arg); 3845 } 3846 3847 /* Replace the macro invocation by the macro definition. */ 3848 3849 free(buf->buf); 3850 buf->buf = mandoc_strdup(r->current_string); 3851 buf->sz = strlen(buf->buf) + 1; 3852 *offs = 0; 3853 3854 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ? 3855 ROFF_REPARSE | ROFF_USERCALL : ROFF_IGN | ROFF_APPEND; 3856 } 3857 3858 /* 3859 * Calling a high-level macro that was renamed with .rn. 3860 * r->current_string has already been set up by roff_parse(). 3861 */ 3862 static int 3863 roff_renamed(ROFF_ARGS) 3864 { 3865 char *nbuf; 3866 3867 buf->sz = mandoc_asprintf(&nbuf, ".%s%s%s", r->current_string, 3868 buf->buf[pos] == '\0' ? "" : " ", buf->buf + pos) + 1; 3869 free(buf->buf); 3870 buf->buf = nbuf; 3871 *offs = 0; 3872 return ROFF_CONT; 3873 } 3874 3875 /* 3876 * Measure the length in bytes of the roff identifier at *cpp 3877 * and advance the pointer to the next word. 3878 */ 3879 static size_t 3880 roff_getname(struct roff *r, char **cpp, int ln, int pos) 3881 { 3882 char *name, *cp; 3883 size_t namesz; 3884 3885 name = *cpp; 3886 if (*name == '\0') 3887 return 0; 3888 3889 /* Advance cp to the byte after the end of the name. */ 3890 3891 for (cp = name; 1; cp++) { 3892 namesz = cp - name; 3893 if (*cp == '\0') 3894 break; 3895 if (*cp == ' ' || *cp == '\t') { 3896 cp++; 3897 break; 3898 } 3899 if (*cp != '\\') 3900 continue; 3901 if (cp[1] == '{' || cp[1] == '}') 3902 break; 3903 if (*++cp == '\\') 3904 continue; 3905 mandoc_msg(MANDOCERR_NAMESC, ln, pos, 3906 "%.*s", (int)(cp - name + 1), name); 3907 mandoc_escape((const char **)&cp, NULL, NULL); 3908 break; 3909 } 3910 3911 /* Read past spaces. */ 3912 3913 while (*cp == ' ') 3914 cp++; 3915 3916 *cpp = cp; 3917 return namesz; 3918 } 3919 3920 /* 3921 * Store *string into the user-defined string called *name. 3922 * To clear an existing entry, call with (*r, *name, NULL, 0). 3923 * append == 0: replace mode 3924 * append == 1: single-line append mode 3925 * append == 2: multiline append mode, append '\n' after each call 3926 */ 3927 static void 3928 roff_setstr(struct roff *r, const char *name, const char *string, 3929 int append) 3930 { 3931 size_t namesz; 3932 3933 namesz = strlen(name); 3934 roff_setstrn(&r->strtab, name, namesz, string, 3935 string ? strlen(string) : 0, append); 3936 roff_setstrn(&r->rentab, name, namesz, NULL, 0, 0); 3937 } 3938 3939 static void 3940 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 3941 const char *string, size_t stringsz, int append) 3942 { 3943 struct roffkv *n; 3944 char *c; 3945 int i; 3946 size_t oldch, newch; 3947 3948 /* Search for an existing string with the same name. */ 3949 n = *r; 3950 3951 while (n && (namesz != n->key.sz || 3952 strncmp(n->key.p, name, namesz))) 3953 n = n->next; 3954 3955 if (NULL == n) { 3956 /* Create a new string table entry. */ 3957 n = mandoc_malloc(sizeof(struct roffkv)); 3958 n->key.p = mandoc_strndup(name, namesz); 3959 n->key.sz = namesz; 3960 n->val.p = NULL; 3961 n->val.sz = 0; 3962 n->next = *r; 3963 *r = n; 3964 } else if (0 == append) { 3965 free(n->val.p); 3966 n->val.p = NULL; 3967 n->val.sz = 0; 3968 } 3969 3970 if (NULL == string) 3971 return; 3972 3973 /* 3974 * One additional byte for the '\n' in multiline mode, 3975 * and one for the terminating '\0'. 3976 */ 3977 newch = stringsz + (1 < append ? 2u : 1u); 3978 3979 if (NULL == n->val.p) { 3980 n->val.p = mandoc_malloc(newch); 3981 *n->val.p = '\0'; 3982 oldch = 0; 3983 } else { 3984 oldch = n->val.sz; 3985 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 3986 } 3987 3988 /* Skip existing content in the destination buffer. */ 3989 c = n->val.p + (int)oldch; 3990 3991 /* Append new content to the destination buffer. */ 3992 i = 0; 3993 while (i < (int)stringsz) { 3994 /* 3995 * Rudimentary roff copy mode: 3996 * Handle escaped backslashes. 3997 */ 3998 if ('\\' == string[i] && '\\' == string[i + 1]) 3999 i++; 4000 *c++ = string[i++]; 4001 } 4002 4003 /* Append terminating bytes. */ 4004 if (1 < append) 4005 *c++ = '\n'; 4006 4007 *c = '\0'; 4008 n->val.sz = (int)(c - n->val.p); 4009 } 4010 4011 static const char * 4012 roff_getstrn(struct roff *r, const char *name, size_t len, 4013 int *deftype) 4014 { 4015 const struct roffkv *n; 4016 int found, i; 4017 enum roff_tok tok; 4018 4019 found = 0; 4020 for (n = r->strtab; n != NULL; n = n->next) { 4021 if (strncmp(name, n->key.p, len) != 0 || 4022 n->key.p[len] != '\0' || n->val.p == NULL) 4023 continue; 4024 if (*deftype & ROFFDEF_USER) { 4025 *deftype = ROFFDEF_USER; 4026 return n->val.p; 4027 } else { 4028 found = 1; 4029 break; 4030 } 4031 } 4032 for (n = r->rentab; n != NULL; n = n->next) { 4033 if (strncmp(name, n->key.p, len) != 0 || 4034 n->key.p[len] != '\0' || n->val.p == NULL) 4035 continue; 4036 if (*deftype & ROFFDEF_REN) { 4037 *deftype = ROFFDEF_REN; 4038 return n->val.p; 4039 } else { 4040 found = 1; 4041 break; 4042 } 4043 } 4044 for (i = 0; i < PREDEFS_MAX; i++) { 4045 if (strncmp(name, predefs[i].name, len) != 0 || 4046 predefs[i].name[len] != '\0') 4047 continue; 4048 if (*deftype & ROFFDEF_PRE) { 4049 *deftype = ROFFDEF_PRE; 4050 return predefs[i].str; 4051 } else { 4052 found = 1; 4053 break; 4054 } 4055 } 4056 if (r->man->meta.macroset != MACROSET_MAN) { 4057 for (tok = MDOC_Dd; tok < MDOC_MAX; tok++) { 4058 if (strncmp(name, roff_name[tok], len) != 0 || 4059 roff_name[tok][len] != '\0') 4060 continue; 4061 if (*deftype & ROFFDEF_STD) { 4062 *deftype = ROFFDEF_STD; 4063 return NULL; 4064 } else { 4065 found = 1; 4066 break; 4067 } 4068 } 4069 } 4070 if (r->man->meta.macroset != MACROSET_MDOC) { 4071 for (tok = MAN_TH; tok < MAN_MAX; tok++) { 4072 if (strncmp(name, roff_name[tok], len) != 0 || 4073 roff_name[tok][len] != '\0') 4074 continue; 4075 if (*deftype & ROFFDEF_STD) { 4076 *deftype = ROFFDEF_STD; 4077 return NULL; 4078 } else { 4079 found = 1; 4080 break; 4081 } 4082 } 4083 } 4084 4085 if (found == 0 && *deftype != ROFFDEF_ANY) { 4086 if (*deftype & ROFFDEF_REN) { 4087 /* 4088 * This might still be a request, 4089 * so do not treat it as undefined yet. 4090 */ 4091 *deftype = ROFFDEF_UNDEF; 4092 return NULL; 4093 } 4094 4095 /* Using an undefined string defines it to be empty. */ 4096 4097 roff_setstrn(&r->strtab, name, len, "", 0, 0); 4098 roff_setstrn(&r->rentab, name, len, NULL, 0, 0); 4099 } 4100 4101 *deftype = 0; 4102 return NULL; 4103 } 4104 4105 static void 4106 roff_freestr(struct roffkv *r) 4107 { 4108 struct roffkv *n, *nn; 4109 4110 for (n = r; n; n = nn) { 4111 free(n->key.p); 4112 free(n->val.p); 4113 nn = n->next; 4114 free(n); 4115 } 4116 } 4117 4118 /* --- accessors and utility functions ------------------------------------ */ 4119 4120 /* 4121 * Duplicate an input string, making the appropriate character 4122 * conversations (as stipulated by `tr') along the way. 4123 * Returns a heap-allocated string with all the replacements made. 4124 */ 4125 char * 4126 roff_strdup(const struct roff *r, const char *p) 4127 { 4128 const struct roffkv *cp; 4129 char *res; 4130 const char *pp; 4131 size_t ssz, sz; 4132 enum mandoc_esc esc; 4133 4134 if (NULL == r->xmbtab && NULL == r->xtab) 4135 return mandoc_strdup(p); 4136 else if ('\0' == *p) 4137 return mandoc_strdup(""); 4138 4139 /* 4140 * Step through each character looking for term matches 4141 * (remember that a `tr' can be invoked with an escape, which is 4142 * a glyph but the escape is multi-character). 4143 * We only do this if the character hash has been initialised 4144 * and the string is >0 length. 4145 */ 4146 4147 res = NULL; 4148 ssz = 0; 4149 4150 while ('\0' != *p) { 4151 assert((unsigned int)*p < 128); 4152 if ('\\' != *p && r->xtab && r->xtab[(unsigned int)*p].p) { 4153 sz = r->xtab[(int)*p].sz; 4154 res = mandoc_realloc(res, ssz + sz + 1); 4155 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 4156 ssz += sz; 4157 p++; 4158 continue; 4159 } else if ('\\' != *p) { 4160 res = mandoc_realloc(res, ssz + 2); 4161 res[ssz++] = *p++; 4162 continue; 4163 } 4164 4165 /* Search for term matches. */ 4166 for (cp = r->xmbtab; cp; cp = cp->next) 4167 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 4168 break; 4169 4170 if (NULL != cp) { 4171 /* 4172 * A match has been found. 4173 * Append the match to the array and move 4174 * forward by its keysize. 4175 */ 4176 res = mandoc_realloc(res, 4177 ssz + cp->val.sz + 1); 4178 memcpy(res + ssz, cp->val.p, cp->val.sz); 4179 ssz += cp->val.sz; 4180 p += (int)cp->key.sz; 4181 continue; 4182 } 4183 4184 /* 4185 * Handle escapes carefully: we need to copy 4186 * over just the escape itself, or else we might 4187 * do replacements within the escape itself. 4188 * Make sure to pass along the bogus string. 4189 */ 4190 pp = p++; 4191 esc = mandoc_escape(&p, NULL, NULL); 4192 if (ESCAPE_ERROR == esc) { 4193 sz = strlen(pp); 4194 res = mandoc_realloc(res, ssz + sz + 1); 4195 memcpy(res + ssz, pp, sz); 4196 break; 4197 } 4198 /* 4199 * We bail out on bad escapes. 4200 * No need to warn: we already did so when 4201 * roff_expand() was called. 4202 */ 4203 sz = (int)(p - pp); 4204 res = mandoc_realloc(res, ssz + sz + 1); 4205 memcpy(res + ssz, pp, sz); 4206 ssz += sz; 4207 } 4208 4209 res[(int)ssz] = '\0'; 4210 return res; 4211 } 4212 4213 int 4214 roff_getformat(const struct roff *r) 4215 { 4216 4217 return r->format; 4218 } 4219 4220 /* 4221 * Find out whether a line is a macro line or not. 4222 * If it is, adjust the current position and return one; if it isn't, 4223 * return zero and don't change the current position. 4224 * If the control character has been set with `.cc', then let that grain 4225 * precedence. 4226 * This is slighly contrary to groff, where using the non-breaking 4227 * control character when `cc' has been invoked will cause the 4228 * non-breaking macro contents to be printed verbatim. 4229 */ 4230 int 4231 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 4232 { 4233 int pos; 4234 4235 pos = *ppos; 4236 4237 if (r->control != '\0' && cp[pos] == r->control) 4238 pos++; 4239 else if (r->control != '\0') 4240 return 0; 4241 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 4242 pos += 2; 4243 else if ('.' == cp[pos] || '\'' == cp[pos]) 4244 pos++; 4245 else 4246 return 0; 4247 4248 while (' ' == cp[pos] || '\t' == cp[pos]) 4249 pos++; 4250 4251 *ppos = pos; 4252 return 1; 4253 } 4254