1 /* $OpenBSD: roff.c,v 1.156 2016/01/08 17:48:04 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 27 #include "mandoc.h" 28 #include "mandoc_aux.h" 29 #include "roff.h" 30 #include "libmandoc.h" 31 #include "roff_int.h" 32 #include "libroff.h" 33 34 /* Maximum number of string expansions per line, to break infinite loops. */ 35 #define EXPAND_LIMIT 1000 36 37 /* --- data types --------------------------------------------------------- */ 38 39 enum rofft { 40 ROFF_ab, 41 ROFF_ad, 42 ROFF_af, 43 ROFF_aln, 44 ROFF_als, 45 ROFF_am, 46 ROFF_am1, 47 ROFF_ami, 48 ROFF_ami1, 49 ROFF_as, 50 ROFF_as1, 51 ROFF_asciify, 52 ROFF_backtrace, 53 ROFF_bd, 54 ROFF_bleedat, 55 ROFF_blm, 56 ROFF_box, 57 ROFF_boxa, 58 ROFF_bp, 59 ROFF_BP, 60 /* MAN_br, MDOC_br */ 61 ROFF_break, 62 ROFF_breakchar, 63 ROFF_brnl, 64 ROFF_brp, 65 ROFF_brpnl, 66 ROFF_c2, 67 ROFF_cc, 68 ROFF_ce, 69 ROFF_cf, 70 ROFF_cflags, 71 ROFF_ch, 72 ROFF_char, 73 ROFF_chop, 74 ROFF_class, 75 ROFF_close, 76 ROFF_CL, 77 ROFF_color, 78 ROFF_composite, 79 ROFF_continue, 80 ROFF_cp, 81 ROFF_cropat, 82 ROFF_cs, 83 ROFF_cu, 84 ROFF_da, 85 ROFF_dch, 86 ROFF_Dd, 87 ROFF_de, 88 ROFF_de1, 89 ROFF_defcolor, 90 ROFF_dei, 91 ROFF_dei1, 92 ROFF_device, 93 ROFF_devicem, 94 ROFF_di, 95 ROFF_do, 96 ROFF_ds, 97 ROFF_ds1, 98 ROFF_dwh, 99 ROFF_dt, 100 ROFF_ec, 101 ROFF_ecr, 102 ROFF_ecs, 103 ROFF_el, 104 ROFF_em, 105 ROFF_EN, 106 ROFF_eo, 107 ROFF_EP, 108 ROFF_EQ, 109 ROFF_errprint, 110 ROFF_ev, 111 ROFF_evc, 112 ROFF_ex, 113 ROFF_fallback, 114 ROFF_fam, 115 ROFF_fc, 116 ROFF_fchar, 117 ROFF_fcolor, 118 ROFF_fdeferlig, 119 ROFF_feature, 120 /* MAN_fi; ignored in mdoc(7) */ 121 ROFF_fkern, 122 ROFF_fl, 123 ROFF_flig, 124 ROFF_fp, 125 ROFF_fps, 126 ROFF_fschar, 127 ROFF_fspacewidth, 128 ROFF_fspecial, 129 /* MAN_ft; ignored in mdoc(7) */ 130 ROFF_ftr, 131 ROFF_fzoom, 132 ROFF_gcolor, 133 ROFF_hc, 134 ROFF_hcode, 135 ROFF_hidechar, 136 ROFF_hla, 137 ROFF_hlm, 138 ROFF_hpf, 139 ROFF_hpfa, 140 ROFF_hpfcode, 141 ROFF_hw, 142 ROFF_hy, 143 ROFF_hylang, 144 ROFF_hylen, 145 ROFF_hym, 146 ROFF_hypp, 147 ROFF_hys, 148 ROFF_ie, 149 ROFF_if, 150 ROFF_ig, 151 /* MAN_in; ignored in mdoc(7) */ 152 ROFF_index, 153 ROFF_it, 154 ROFF_itc, 155 ROFF_IX, 156 ROFF_kern, 157 ROFF_kernafter, 158 ROFF_kernbefore, 159 ROFF_kernpair, 160 ROFF_lc, 161 ROFF_lc_ctype, 162 ROFF_lds, 163 ROFF_length, 164 ROFF_letadj, 165 ROFF_lf, 166 ROFF_lg, 167 ROFF_lhang, 168 ROFF_linetabs, 169 /* MAN_ll, MDOC_ll */ 170 ROFF_lnr, 171 ROFF_lnrf, 172 ROFF_lpfx, 173 ROFF_ls, 174 ROFF_lsm, 175 ROFF_lt, 176 ROFF_mc, 177 ROFF_mediasize, 178 ROFF_minss, 179 ROFF_mk, 180 ROFF_mso, 181 ROFF_na, 182 ROFF_ne, 183 /* MAN_nf; ignored in mdoc(7) */ 184 ROFF_nh, 185 ROFF_nhychar, 186 ROFF_nm, 187 ROFF_nn, 188 ROFF_nop, 189 ROFF_nr, 190 ROFF_nrf, 191 ROFF_nroff, 192 ROFF_ns, 193 ROFF_nx, 194 ROFF_open, 195 ROFF_opena, 196 ROFF_os, 197 ROFF_output, 198 ROFF_padj, 199 ROFF_papersize, 200 ROFF_pc, 201 ROFF_pev, 202 ROFF_pi, 203 ROFF_PI, 204 ROFF_pl, 205 ROFF_pm, 206 ROFF_pn, 207 ROFF_pnr, 208 ROFF_po, 209 ROFF_ps, 210 ROFF_psbb, 211 ROFF_pshape, 212 ROFF_pso, 213 ROFF_ptr, 214 ROFF_pvs, 215 ROFF_rchar, 216 ROFF_rd, 217 ROFF_recursionlimit, 218 ROFF_return, 219 ROFF_rfschar, 220 ROFF_rhang, 221 ROFF_rj, 222 ROFF_rm, 223 ROFF_rn, 224 ROFF_rnn, 225 ROFF_rr, 226 ROFF_rs, 227 ROFF_rt, 228 ROFF_schar, 229 ROFF_sentchar, 230 ROFF_shc, 231 ROFF_shift, 232 ROFF_sizes, 233 ROFF_so, 234 /* MAN_sp, MDOC_sp */ 235 ROFF_spacewidth, 236 ROFF_special, 237 ROFF_spreadwarn, 238 ROFF_ss, 239 ROFF_sty, 240 ROFF_substring, 241 ROFF_sv, 242 ROFF_sy, 243 ROFF_T_, 244 ROFF_ta, 245 ROFF_tc, 246 ROFF_TE, 247 ROFF_TH, 248 ROFF_ti, 249 ROFF_tkf, 250 ROFF_tl, 251 ROFF_tm, 252 ROFF_tm1, 253 ROFF_tmc, 254 ROFF_tr, 255 ROFF_track, 256 ROFF_transchar, 257 ROFF_trf, 258 ROFF_trimat, 259 ROFF_trin, 260 ROFF_trnt, 261 ROFF_troff, 262 ROFF_TS, 263 ROFF_uf, 264 ROFF_ul, 265 ROFF_unformat, 266 ROFF_unwatch, 267 ROFF_unwatchn, 268 ROFF_vpt, 269 ROFF_vs, 270 ROFF_warn, 271 ROFF_warnscale, 272 ROFF_watch, 273 ROFF_watchlength, 274 ROFF_watchn, 275 ROFF_wh, 276 ROFF_while, 277 ROFF_write, 278 ROFF_writec, 279 ROFF_writem, 280 ROFF_xflag, 281 ROFF_cblock, 282 ROFF_USERDEF, 283 ROFF_MAX 284 }; 285 286 /* 287 * An incredibly-simple string buffer. 288 */ 289 struct roffstr { 290 char *p; /* nil-terminated buffer */ 291 size_t sz; /* saved strlen(p) */ 292 }; 293 294 /* 295 * A key-value roffstr pair as part of a singly-linked list. 296 */ 297 struct roffkv { 298 struct roffstr key; 299 struct roffstr val; 300 struct roffkv *next; /* next in list */ 301 }; 302 303 /* 304 * A single number register as part of a singly-linked list. 305 */ 306 struct roffreg { 307 struct roffstr key; 308 int val; 309 struct roffreg *next; 310 }; 311 312 struct roff { 313 struct mparse *parse; /* parse point */ 314 struct roffnode *last; /* leaf of stack */ 315 int *rstack; /* stack of inverted `ie' values */ 316 struct roffreg *regtab; /* number registers */ 317 struct roffkv *strtab; /* user-defined strings & macros */ 318 struct roffkv *xmbtab; /* multi-byte trans table (`tr') */ 319 struct roffstr *xtab; /* single-byte trans table (`tr') */ 320 const char *current_string; /* value of last called user macro */ 321 struct tbl_node *first_tbl; /* first table parsed */ 322 struct tbl_node *last_tbl; /* last table parsed */ 323 struct tbl_node *tbl; /* current table being parsed */ 324 struct eqn_node *last_eqn; /* last equation parsed */ 325 struct eqn_node *first_eqn; /* first equation parsed */ 326 struct eqn_node *eqn; /* current equation being parsed */ 327 int eqn_inline; /* current equation is inline */ 328 int options; /* parse options */ 329 int rstacksz; /* current size limit of rstack */ 330 int rstackpos; /* position in rstack */ 331 int format; /* current file in mdoc or man format */ 332 int argc; /* number of args of the last macro */ 333 char control; /* control character */ 334 }; 335 336 struct roffnode { 337 enum rofft tok; /* type of node */ 338 struct roffnode *parent; /* up one in stack */ 339 int line; /* parse line */ 340 int col; /* parse col */ 341 char *name; /* node name, e.g. macro name */ 342 char *end; /* end-rules: custom token */ 343 int endspan; /* end-rules: next-line or infty */ 344 int rule; /* current evaluation rule */ 345 }; 346 347 #define ROFF_ARGS struct roff *r, /* parse ctx */ \ 348 enum rofft tok, /* tok of macro */ \ 349 struct buf *buf, /* input buffer */ \ 350 int ln, /* parse line */ \ 351 int ppos, /* original pos in buffer */ \ 352 int pos, /* current pos in buffer */ \ 353 int *offs /* reset offset of buffer data */ 354 355 typedef enum rofferr (*roffproc)(ROFF_ARGS); 356 357 struct roffmac { 358 const char *name; /* macro name */ 359 roffproc proc; /* process new macro */ 360 roffproc text; /* process as child text of macro */ 361 roffproc sub; /* process as child of macro */ 362 int flags; 363 #define ROFFMAC_STRUCT (1 << 0) /* always interpret */ 364 struct roffmac *next; 365 }; 366 367 struct predef { 368 const char *name; /* predefined input name */ 369 const char *str; /* replacement symbol */ 370 }; 371 372 #define PREDEF(__name, __str) \ 373 { (__name), (__str) }, 374 375 /* --- function prototypes ------------------------------------------------ */ 376 377 static enum rofft roffhash_find(const char *, size_t); 378 static void roffhash_init(void); 379 static void roffnode_cleanscope(struct roff *); 380 static void roffnode_pop(struct roff *); 381 static void roffnode_push(struct roff *, enum rofft, 382 const char *, int, int); 383 static enum rofferr roff_block(ROFF_ARGS); 384 static enum rofferr roff_block_text(ROFF_ARGS); 385 static enum rofferr roff_block_sub(ROFF_ARGS); 386 static enum rofferr roff_brp(ROFF_ARGS); 387 static enum rofferr roff_cblock(ROFF_ARGS); 388 static enum rofferr roff_cc(ROFF_ARGS); 389 static void roff_ccond(struct roff *, int, int); 390 static enum rofferr roff_cond(ROFF_ARGS); 391 static enum rofferr roff_cond_text(ROFF_ARGS); 392 static enum rofferr roff_cond_sub(ROFF_ARGS); 393 static enum rofferr roff_ds(ROFF_ARGS); 394 static enum rofferr roff_eqndelim(struct roff *, struct buf *, int); 395 static int roff_evalcond(struct roff *r, int, char *, int *); 396 static int roff_evalnum(struct roff *, int, 397 const char *, int *, int *, int); 398 static int roff_evalpar(struct roff *, int, 399 const char *, int *, int *, int); 400 static int roff_evalstrcond(const char *, int *); 401 static void roff_free1(struct roff *); 402 static void roff_freereg(struct roffreg *); 403 static void roff_freestr(struct roffkv *); 404 static size_t roff_getname(struct roff *, char **, int, int); 405 static int roff_getnum(const char *, int *, int *, int); 406 static int roff_getop(const char *, int *, char *); 407 static int roff_getregn(const struct roff *, 408 const char *, size_t); 409 static int roff_getregro(const struct roff *, 410 const char *name); 411 static const char *roff_getstrn(const struct roff *, 412 const char *, size_t); 413 static int roff_hasregn(const struct roff *, 414 const char *, size_t); 415 static enum rofferr roff_insec(ROFF_ARGS); 416 static enum rofferr roff_it(ROFF_ARGS); 417 static enum rofferr roff_line_ignore(ROFF_ARGS); 418 static void roff_man_alloc1(struct roff_man *); 419 static void roff_man_free1(struct roff_man *); 420 static enum rofferr roff_nr(ROFF_ARGS); 421 static enum rofft roff_parse(struct roff *, char *, int *, 422 int, int); 423 static enum rofferr roff_parsetext(struct buf *, int, int *); 424 static enum rofferr roff_res(struct roff *, struct buf *, int, int); 425 static enum rofferr roff_rm(ROFF_ARGS); 426 static enum rofferr roff_rr(ROFF_ARGS); 427 static void roff_setstr(struct roff *, 428 const char *, const char *, int); 429 static void roff_setstrn(struct roffkv **, const char *, 430 size_t, const char *, size_t, int); 431 static enum rofferr roff_so(ROFF_ARGS); 432 static enum rofferr roff_tr(ROFF_ARGS); 433 static enum rofferr roff_Dd(ROFF_ARGS); 434 static enum rofferr roff_TH(ROFF_ARGS); 435 static enum rofferr roff_TE(ROFF_ARGS); 436 static enum rofferr roff_TS(ROFF_ARGS); 437 static enum rofferr roff_EQ(ROFF_ARGS); 438 static enum rofferr roff_EN(ROFF_ARGS); 439 static enum rofferr roff_T_(ROFF_ARGS); 440 static enum rofferr roff_unsupp(ROFF_ARGS); 441 static enum rofferr roff_userdef(ROFF_ARGS); 442 443 /* --- constant data ------------------------------------------------------ */ 444 445 /* See roffhash_find() */ 446 447 #define ASCII_HI 126 448 #define ASCII_LO 33 449 #define HASHWIDTH (ASCII_HI - ASCII_LO + 1) 450 451 #define ROFFNUM_SCALE (1 << 0) /* Honour scaling in roff_getnum(). */ 452 #define ROFFNUM_WHITE (1 << 1) /* Skip whitespace in roff_evalnum(). */ 453 454 static struct roffmac *hash[HASHWIDTH]; 455 456 static struct roffmac roffs[ROFF_MAX] = { 457 { "ab", roff_unsupp, NULL, NULL, 0, NULL }, 458 { "ad", roff_line_ignore, NULL, NULL, 0, NULL }, 459 { "af", roff_line_ignore, NULL, NULL, 0, NULL }, 460 { "aln", roff_unsupp, NULL, NULL, 0, NULL }, 461 { "als", roff_unsupp, NULL, NULL, 0, NULL }, 462 { "am", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 463 { "am1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 464 { "ami", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 465 { "ami1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 466 { "as", roff_ds, NULL, NULL, 0, NULL }, 467 { "as1", roff_ds, NULL, NULL, 0, NULL }, 468 { "asciify", roff_unsupp, NULL, NULL, 0, NULL }, 469 { "backtrace", roff_line_ignore, NULL, NULL, 0, NULL }, 470 { "bd", roff_line_ignore, NULL, NULL, 0, NULL }, 471 { "bleedat", roff_line_ignore, NULL, NULL, 0, NULL }, 472 { "blm", roff_unsupp, NULL, NULL, 0, NULL }, 473 { "box", roff_unsupp, NULL, NULL, 0, NULL }, 474 { "boxa", roff_unsupp, NULL, NULL, 0, NULL }, 475 { "bp", roff_line_ignore, NULL, NULL, 0, NULL }, 476 { "BP", roff_unsupp, NULL, NULL, 0, NULL }, 477 { "break", roff_unsupp, NULL, NULL, 0, NULL }, 478 { "breakchar", roff_line_ignore, NULL, NULL, 0, NULL }, 479 { "brnl", roff_line_ignore, NULL, NULL, 0, NULL }, 480 { "brp", roff_brp, NULL, NULL, 0, NULL }, 481 { "brpnl", roff_line_ignore, NULL, NULL, 0, NULL }, 482 { "c2", roff_unsupp, NULL, NULL, 0, NULL }, 483 { "cc", roff_cc, NULL, NULL, 0, NULL }, 484 { "ce", roff_line_ignore, NULL, NULL, 0, NULL }, 485 { "cf", roff_insec, NULL, NULL, 0, NULL }, 486 { "cflags", roff_line_ignore, NULL, NULL, 0, NULL }, 487 { "ch", roff_line_ignore, NULL, NULL, 0, NULL }, 488 { "char", roff_unsupp, NULL, NULL, 0, NULL }, 489 { "chop", roff_unsupp, NULL, NULL, 0, NULL }, 490 { "class", roff_line_ignore, NULL, NULL, 0, NULL }, 491 { "close", roff_insec, NULL, NULL, 0, NULL }, 492 { "CL", roff_unsupp, NULL, NULL, 0, NULL }, 493 { "color", roff_line_ignore, NULL, NULL, 0, NULL }, 494 { "composite", roff_unsupp, NULL, NULL, 0, NULL }, 495 { "continue", roff_unsupp, NULL, NULL, 0, NULL }, 496 { "cp", roff_line_ignore, NULL, NULL, 0, NULL }, 497 { "cropat", roff_line_ignore, NULL, NULL, 0, NULL }, 498 { "cs", roff_line_ignore, NULL, NULL, 0, NULL }, 499 { "cu", roff_line_ignore, NULL, NULL, 0, NULL }, 500 { "da", roff_unsupp, NULL, NULL, 0, NULL }, 501 { "dch", roff_unsupp, NULL, NULL, 0, NULL }, 502 { "Dd", roff_Dd, NULL, NULL, 0, NULL }, 503 { "de", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 504 { "de1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 505 { "defcolor", roff_line_ignore, NULL, NULL, 0, NULL }, 506 { "dei", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 507 { "dei1", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 508 { "device", roff_unsupp, NULL, NULL, 0, NULL }, 509 { "devicem", roff_unsupp, NULL, NULL, 0, NULL }, 510 { "di", roff_unsupp, NULL, NULL, 0, NULL }, 511 { "do", roff_unsupp, NULL, NULL, 0, NULL }, 512 { "ds", roff_ds, NULL, NULL, 0, NULL }, 513 { "ds1", roff_ds, NULL, NULL, 0, NULL }, 514 { "dwh", roff_unsupp, NULL, NULL, 0, NULL }, 515 { "dt", roff_unsupp, NULL, NULL, 0, NULL }, 516 { "ec", roff_unsupp, NULL, NULL, 0, NULL }, 517 { "ecr", roff_unsupp, NULL, NULL, 0, NULL }, 518 { "ecs", roff_unsupp, NULL, NULL, 0, NULL }, 519 { "el", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 520 { "em", roff_unsupp, NULL, NULL, 0, NULL }, 521 { "EN", roff_EN, NULL, NULL, 0, NULL }, 522 { "eo", roff_unsupp, NULL, NULL, 0, NULL }, 523 { "EP", roff_unsupp, NULL, NULL, 0, NULL }, 524 { "EQ", roff_EQ, NULL, NULL, 0, NULL }, 525 { "errprint", roff_line_ignore, NULL, NULL, 0, NULL }, 526 { "ev", roff_unsupp, NULL, NULL, 0, NULL }, 527 { "evc", roff_unsupp, NULL, NULL, 0, NULL }, 528 { "ex", roff_unsupp, NULL, NULL, 0, NULL }, 529 { "fallback", roff_line_ignore, NULL, NULL, 0, NULL }, 530 { "fam", roff_line_ignore, NULL, NULL, 0, NULL }, 531 { "fc", roff_unsupp, NULL, NULL, 0, NULL }, 532 { "fchar", roff_unsupp, NULL, NULL, 0, NULL }, 533 { "fcolor", roff_line_ignore, NULL, NULL, 0, NULL }, 534 { "fdeferlig", roff_line_ignore, NULL, NULL, 0, NULL }, 535 { "feature", roff_line_ignore, NULL, NULL, 0, NULL }, 536 { "fkern", roff_line_ignore, NULL, NULL, 0, NULL }, 537 { "fl", roff_line_ignore, NULL, NULL, 0, NULL }, 538 { "flig", roff_line_ignore, NULL, NULL, 0, NULL }, 539 { "fp", roff_line_ignore, NULL, NULL, 0, NULL }, 540 { "fps", roff_line_ignore, NULL, NULL, 0, NULL }, 541 { "fschar", roff_unsupp, NULL, NULL, 0, NULL }, 542 { "fspacewidth", roff_line_ignore, NULL, NULL, 0, NULL }, 543 { "fspecial", roff_line_ignore, NULL, NULL, 0, NULL }, 544 { "ftr", roff_line_ignore, NULL, NULL, 0, NULL }, 545 { "fzoom", roff_line_ignore, NULL, NULL, 0, NULL }, 546 { "gcolor", roff_line_ignore, NULL, NULL, 0, NULL }, 547 { "hc", roff_line_ignore, NULL, NULL, 0, NULL }, 548 { "hcode", roff_line_ignore, NULL, NULL, 0, NULL }, 549 { "hidechar", roff_line_ignore, NULL, NULL, 0, NULL }, 550 { "hla", roff_line_ignore, NULL, NULL, 0, NULL }, 551 { "hlm", roff_line_ignore, NULL, NULL, 0, NULL }, 552 { "hpf", roff_line_ignore, NULL, NULL, 0, NULL }, 553 { "hpfa", roff_line_ignore, NULL, NULL, 0, NULL }, 554 { "hpfcode", roff_line_ignore, NULL, NULL, 0, NULL }, 555 { "hw", roff_line_ignore, NULL, NULL, 0, NULL }, 556 { "hy", roff_line_ignore, NULL, NULL, 0, NULL }, 557 { "hylang", roff_line_ignore, NULL, NULL, 0, NULL }, 558 { "hylen", roff_line_ignore, NULL, NULL, 0, NULL }, 559 { "hym", roff_line_ignore, NULL, NULL, 0, NULL }, 560 { "hypp", roff_line_ignore, NULL, NULL, 0, NULL }, 561 { "hys", roff_line_ignore, NULL, NULL, 0, NULL }, 562 { "ie", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 563 { "if", roff_cond, roff_cond_text, roff_cond_sub, ROFFMAC_STRUCT, NULL }, 564 { "ig", roff_block, roff_block_text, roff_block_sub, 0, NULL }, 565 { "index", roff_unsupp, NULL, NULL, 0, NULL }, 566 { "it", roff_it, NULL, NULL, 0, NULL }, 567 { "itc", roff_unsupp, NULL, NULL, 0, NULL }, 568 { "IX", roff_line_ignore, NULL, NULL, 0, NULL }, 569 { "kern", roff_line_ignore, NULL, NULL, 0, NULL }, 570 { "kernafter", roff_line_ignore, NULL, NULL, 0, NULL }, 571 { "kernbefore", roff_line_ignore, NULL, NULL, 0, NULL }, 572 { "kernpair", roff_line_ignore, NULL, NULL, 0, NULL }, 573 { "lc", roff_unsupp, NULL, NULL, 0, NULL }, 574 { "lc_ctype", roff_unsupp, NULL, NULL, 0, NULL }, 575 { "lds", roff_unsupp, NULL, NULL, 0, NULL }, 576 { "length", roff_unsupp, NULL, NULL, 0, NULL }, 577 { "letadj", roff_line_ignore, NULL, NULL, 0, NULL }, 578 { "lf", roff_insec, NULL, NULL, 0, NULL }, 579 { "lg", roff_line_ignore, NULL, NULL, 0, NULL }, 580 { "lhang", roff_line_ignore, NULL, NULL, 0, NULL }, 581 { "linetabs", roff_unsupp, NULL, NULL, 0, NULL }, 582 { "lnr", roff_unsupp, NULL, NULL, 0, NULL }, 583 { "lnrf", roff_unsupp, NULL, NULL, 0, NULL }, 584 { "lpfx", roff_unsupp, NULL, NULL, 0, NULL }, 585 { "ls", roff_line_ignore, NULL, NULL, 0, NULL }, 586 { "lsm", roff_unsupp, NULL, NULL, 0, NULL }, 587 { "lt", roff_line_ignore, NULL, NULL, 0, NULL }, 588 { "mc", roff_line_ignore, NULL, NULL, 0, NULL }, 589 { "mediasize", roff_line_ignore, NULL, NULL, 0, NULL }, 590 { "minss", roff_line_ignore, NULL, NULL, 0, NULL }, 591 { "mk", roff_line_ignore, NULL, NULL, 0, NULL }, 592 { "mso", roff_insec, NULL, NULL, 0, NULL }, 593 { "na", roff_line_ignore, NULL, NULL, 0, NULL }, 594 { "ne", roff_line_ignore, NULL, NULL, 0, NULL }, 595 { "nh", roff_line_ignore, NULL, NULL, 0, NULL }, 596 { "nhychar", roff_line_ignore, NULL, NULL, 0, NULL }, 597 { "nm", roff_unsupp, NULL, NULL, 0, NULL }, 598 { "nn", roff_unsupp, NULL, NULL, 0, NULL }, 599 { "nop", roff_unsupp, NULL, NULL, 0, NULL }, 600 { "nr", roff_nr, NULL, NULL, 0, NULL }, 601 { "nrf", roff_unsupp, NULL, NULL, 0, NULL }, 602 { "nroff", roff_line_ignore, NULL, NULL, 0, NULL }, 603 { "ns", roff_line_ignore, NULL, NULL, 0, NULL }, 604 { "nx", roff_insec, NULL, NULL, 0, NULL }, 605 { "open", roff_insec, NULL, NULL, 0, NULL }, 606 { "opena", roff_insec, NULL, NULL, 0, NULL }, 607 { "os", roff_line_ignore, NULL, NULL, 0, NULL }, 608 { "output", roff_unsupp, NULL, NULL, 0, NULL }, 609 { "padj", roff_line_ignore, NULL, NULL, 0, NULL }, 610 { "papersize", roff_line_ignore, NULL, NULL, 0, NULL }, 611 { "pc", roff_line_ignore, NULL, NULL, 0, NULL }, 612 { "pev", roff_line_ignore, NULL, NULL, 0, NULL }, 613 { "pi", roff_insec, NULL, NULL, 0, NULL }, 614 { "PI", roff_unsupp, NULL, NULL, 0, NULL }, 615 { "pl", roff_line_ignore, NULL, NULL, 0, NULL }, 616 { "pm", roff_line_ignore, NULL, NULL, 0, NULL }, 617 { "pn", roff_line_ignore, NULL, NULL, 0, NULL }, 618 { "pnr", roff_line_ignore, NULL, NULL, 0, NULL }, 619 { "po", roff_line_ignore, NULL, NULL, 0, NULL }, 620 { "ps", roff_line_ignore, NULL, NULL, 0, NULL }, 621 { "psbb", roff_unsupp, NULL, NULL, 0, NULL }, 622 { "pshape", roff_unsupp, NULL, NULL, 0, NULL }, 623 { "pso", roff_insec, NULL, NULL, 0, NULL }, 624 { "ptr", roff_line_ignore, NULL, NULL, 0, NULL }, 625 { "pvs", roff_line_ignore, NULL, NULL, 0, NULL }, 626 { "rchar", roff_unsupp, NULL, NULL, 0, NULL }, 627 { "rd", roff_line_ignore, NULL, NULL, 0, NULL }, 628 { "recursionlimit", roff_line_ignore, NULL, NULL, 0, NULL }, 629 { "return", roff_unsupp, NULL, NULL, 0, NULL }, 630 { "rfschar", roff_unsupp, NULL, NULL, 0, NULL }, 631 { "rhang", roff_line_ignore, NULL, NULL, 0, NULL }, 632 { "rj", roff_line_ignore, NULL, NULL, 0, NULL }, 633 { "rm", roff_rm, NULL, NULL, 0, NULL }, 634 { "rn", roff_unsupp, NULL, NULL, 0, NULL }, 635 { "rnn", roff_unsupp, NULL, NULL, 0, NULL }, 636 { "rr", roff_rr, NULL, NULL, 0, NULL }, 637 { "rs", roff_line_ignore, NULL, NULL, 0, NULL }, 638 { "rt", roff_line_ignore, NULL, NULL, 0, NULL }, 639 { "schar", roff_unsupp, NULL, NULL, 0, NULL }, 640 { "sentchar", roff_line_ignore, NULL, NULL, 0, NULL }, 641 { "shc", roff_line_ignore, NULL, NULL, 0, NULL }, 642 { "shift", roff_unsupp, NULL, NULL, 0, NULL }, 643 { "sizes", roff_line_ignore, NULL, NULL, 0, NULL }, 644 { "so", roff_so, NULL, NULL, 0, NULL }, 645 { "spacewidth", roff_line_ignore, NULL, NULL, 0, NULL }, 646 { "special", roff_line_ignore, NULL, NULL, 0, NULL }, 647 { "spreadwarn", roff_line_ignore, NULL, NULL, 0, NULL }, 648 { "ss", roff_line_ignore, NULL, NULL, 0, NULL }, 649 { "sty", roff_line_ignore, NULL, NULL, 0, NULL }, 650 { "substring", roff_unsupp, NULL, NULL, 0, NULL }, 651 { "sv", roff_line_ignore, NULL, NULL, 0, NULL }, 652 { "sy", roff_insec, NULL, NULL, 0, NULL }, 653 { "T&", roff_T_, NULL, NULL, 0, NULL }, 654 { "ta", roff_unsupp, NULL, NULL, 0, NULL }, 655 { "tc", roff_unsupp, NULL, NULL, 0, NULL }, 656 { "TE", roff_TE, NULL, NULL, 0, NULL }, 657 { "TH", roff_TH, NULL, NULL, 0, NULL }, 658 { "ti", roff_unsupp, NULL, NULL, 0, NULL }, 659 { "tkf", roff_line_ignore, NULL, NULL, 0, NULL }, 660 { "tl", roff_unsupp, NULL, NULL, 0, NULL }, 661 { "tm", roff_line_ignore, NULL, NULL, 0, NULL }, 662 { "tm1", roff_line_ignore, NULL, NULL, 0, NULL }, 663 { "tmc", roff_line_ignore, NULL, NULL, 0, NULL }, 664 { "tr", roff_tr, NULL, NULL, 0, NULL }, 665 { "track", roff_line_ignore, NULL, NULL, 0, NULL }, 666 { "transchar", roff_line_ignore, NULL, NULL, 0, NULL }, 667 { "trf", roff_insec, NULL, NULL, 0, NULL }, 668 { "trimat", roff_line_ignore, NULL, NULL, 0, NULL }, 669 { "trin", roff_unsupp, NULL, NULL, 0, NULL }, 670 { "trnt", roff_unsupp, NULL, NULL, 0, NULL }, 671 { "troff", roff_line_ignore, NULL, NULL, 0, NULL }, 672 { "TS", roff_TS, NULL, NULL, 0, NULL }, 673 { "uf", roff_line_ignore, NULL, NULL, 0, NULL }, 674 { "ul", roff_line_ignore, NULL, NULL, 0, NULL }, 675 { "unformat", roff_unsupp, NULL, NULL, 0, NULL }, 676 { "unwatch", roff_line_ignore, NULL, NULL, 0, NULL }, 677 { "unwatchn", roff_line_ignore, NULL, NULL, 0, NULL }, 678 { "vpt", roff_line_ignore, NULL, NULL, 0, NULL }, 679 { "vs", roff_line_ignore, NULL, NULL, 0, NULL }, 680 { "warn", roff_line_ignore, NULL, NULL, 0, NULL }, 681 { "warnscale", roff_line_ignore, NULL, NULL, 0, NULL }, 682 { "watch", roff_line_ignore, NULL, NULL, 0, NULL }, 683 { "watchlength", roff_line_ignore, NULL, NULL, 0, NULL }, 684 { "watchn", roff_line_ignore, NULL, NULL, 0, NULL }, 685 { "wh", roff_unsupp, NULL, NULL, 0, NULL }, 686 { "while", roff_unsupp, NULL, NULL, 0, NULL }, 687 { "write", roff_insec, NULL, NULL, 0, NULL }, 688 { "writec", roff_insec, NULL, NULL, 0, NULL }, 689 { "writem", roff_insec, NULL, NULL, 0, NULL }, 690 { "xflag", roff_line_ignore, NULL, NULL, 0, NULL }, 691 { ".", roff_cblock, NULL, NULL, 0, NULL }, 692 { NULL, roff_userdef, NULL, NULL, 0, NULL }, 693 }; 694 695 /* not currently implemented: Ds em Eq LP Me PP pp Or Rd Sf SH */ 696 const char *const __mdoc_reserved[] = { 697 "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At", 698 "Bc", "Bd", "Bf", "Bk", "Bl", "Bo", "Bq", 699 "Brc", "Bro", "Brq", "Bsx", "Bt", "Bx", 700 "Cd", "Cm", "Db", "Dc", "Dd", "Dl", "Do", "Dq", 701 "Dt", "Dv", "Dx", "D1", 702 "Ec", "Ed", "Ef", "Ek", "El", "Em", 703 "En", "Eo", "Er", "Es", "Ev", "Ex", 704 "Fa", "Fc", "Fd", "Fl", "Fn", "Fo", "Fr", "Ft", "Fx", 705 "Hf", "Ic", "In", "It", "Lb", "Li", "Lk", "Lp", 706 "Ms", "Mt", "Nd", "Nm", "No", "Ns", "Nx", 707 "Oc", "Oo", "Op", "Os", "Ot", "Ox", 708 "Pa", "Pc", "Pf", "Po", "Pp", "Pq", 709 "Qc", "Ql", "Qo", "Qq", "Re", "Rs", "Rv", 710 "Sc", "Sh", "Sm", "So", "Sq", 711 "Ss", "St", "Sx", "Sy", 712 "Ta", "Tn", "Ud", "Ux", "Va", "Vt", "Xc", "Xo", "Xr", 713 "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O", 714 "%P", "%Q", "%R", "%T", "%U", "%V", 715 NULL 716 }; 717 718 /* not currently implemented: BT DE DS ME MT PT SY TQ YS */ 719 const char *const __man_reserved[] = { 720 "AT", "B", "BI", "BR", "DT", 721 "EE", "EN", "EQ", "EX", "HP", "I", "IB", "IP", "IR", 722 "LP", "OP", "P", "PD", "PP", 723 "R", "RB", "RE", "RI", "RS", "SB", "SH", "SM", "SS", 724 "TE", "TH", "TP", "TS", "T&", "UC", "UE", "UR", 725 NULL 726 }; 727 728 /* Array of injected predefined strings. */ 729 #define PREDEFS_MAX 38 730 static const struct predef predefs[PREDEFS_MAX] = { 731 #include "predefs.in" 732 }; 733 734 /* See roffhash_find() */ 735 #define ROFF_HASH(p) (p[0] - ASCII_LO) 736 737 static int roffit_lines; /* number of lines to delay */ 738 static char *roffit_macro; /* nil-terminated macro line */ 739 740 741 /* --- request table ------------------------------------------------------ */ 742 743 static void 744 roffhash_init(void) 745 { 746 struct roffmac *n; 747 int buc, i; 748 749 for (i = 0; i < (int)ROFF_USERDEF; i++) { 750 assert(roffs[i].name[0] >= ASCII_LO); 751 assert(roffs[i].name[0] <= ASCII_HI); 752 753 buc = ROFF_HASH(roffs[i].name); 754 755 if (NULL != (n = hash[buc])) { 756 for ( ; n->next; n = n->next) 757 /* Do nothing. */ ; 758 n->next = &roffs[i]; 759 } else 760 hash[buc] = &roffs[i]; 761 } 762 } 763 764 /* 765 * Look up a roff token by its name. Returns ROFF_MAX if no macro by 766 * the nil-terminated string name could be found. 767 */ 768 static enum rofft 769 roffhash_find(const char *p, size_t s) 770 { 771 int buc; 772 struct roffmac *n; 773 774 /* 775 * libroff has an extremely simple hashtable, for the time 776 * being, which simply keys on the first character, which must 777 * be printable, then walks a chain. It works well enough until 778 * optimised. 779 */ 780 781 if (p[0] < ASCII_LO || p[0] > ASCII_HI) 782 return ROFF_MAX; 783 784 buc = ROFF_HASH(p); 785 786 if (NULL == (n = hash[buc])) 787 return ROFF_MAX; 788 for ( ; n; n = n->next) 789 if (0 == strncmp(n->name, p, s) && '\0' == n->name[(int)s]) 790 return (enum rofft)(n - roffs); 791 792 return ROFF_MAX; 793 } 794 795 /* --- stack of request blocks -------------------------------------------- */ 796 797 /* 798 * Pop the current node off of the stack of roff instructions currently 799 * pending. 800 */ 801 static void 802 roffnode_pop(struct roff *r) 803 { 804 struct roffnode *p; 805 806 assert(r->last); 807 p = r->last; 808 809 r->last = r->last->parent; 810 free(p->name); 811 free(p->end); 812 free(p); 813 } 814 815 /* 816 * Push a roff node onto the instruction stack. This must later be 817 * removed with roffnode_pop(). 818 */ 819 static void 820 roffnode_push(struct roff *r, enum rofft tok, const char *name, 821 int line, int col) 822 { 823 struct roffnode *p; 824 825 p = mandoc_calloc(1, sizeof(struct roffnode)); 826 p->tok = tok; 827 if (name) 828 p->name = mandoc_strdup(name); 829 p->parent = r->last; 830 p->line = line; 831 p->col = col; 832 p->rule = p->parent ? p->parent->rule : 0; 833 834 r->last = p; 835 } 836 837 /* --- roff parser state data management ---------------------------------- */ 838 839 static void 840 roff_free1(struct roff *r) 841 { 842 struct tbl_node *tbl; 843 struct eqn_node *e; 844 int i; 845 846 while (NULL != (tbl = r->first_tbl)) { 847 r->first_tbl = tbl->next; 848 tbl_free(tbl); 849 } 850 r->first_tbl = r->last_tbl = r->tbl = NULL; 851 852 while (NULL != (e = r->first_eqn)) { 853 r->first_eqn = e->next; 854 eqn_free(e); 855 } 856 r->first_eqn = r->last_eqn = r->eqn = NULL; 857 858 while (r->last) 859 roffnode_pop(r); 860 861 free (r->rstack); 862 r->rstack = NULL; 863 r->rstacksz = 0; 864 r->rstackpos = -1; 865 866 roff_freereg(r->regtab); 867 r->regtab = NULL; 868 869 roff_freestr(r->strtab); 870 roff_freestr(r->xmbtab); 871 r->strtab = r->xmbtab = NULL; 872 873 if (r->xtab) 874 for (i = 0; i < 128; i++) 875 free(r->xtab[i].p); 876 free(r->xtab); 877 r->xtab = NULL; 878 } 879 880 void 881 roff_reset(struct roff *r) 882 { 883 884 roff_free1(r); 885 r->format = r->options & (MPARSE_MDOC | MPARSE_MAN); 886 r->control = 0; 887 } 888 889 void 890 roff_free(struct roff *r) 891 { 892 893 roff_free1(r); 894 free(r); 895 } 896 897 struct roff * 898 roff_alloc(struct mparse *parse, int options) 899 { 900 struct roff *r; 901 902 r = mandoc_calloc(1, sizeof(struct roff)); 903 r->parse = parse; 904 r->options = options; 905 r->format = options & (MPARSE_MDOC | MPARSE_MAN); 906 r->rstackpos = -1; 907 908 roffhash_init(); 909 910 return r; 911 } 912 913 /* --- syntax tree state data management ---------------------------------- */ 914 915 static void 916 roff_man_free1(struct roff_man *man) 917 { 918 919 if (man->first != NULL) 920 roff_node_delete(man, man->first); 921 free(man->meta.msec); 922 free(man->meta.vol); 923 free(man->meta.os); 924 free(man->meta.arch); 925 free(man->meta.title); 926 free(man->meta.name); 927 free(man->meta.date); 928 } 929 930 static void 931 roff_man_alloc1(struct roff_man *man) 932 { 933 934 memset(&man->meta, 0, sizeof(man->meta)); 935 man->first = mandoc_calloc(1, sizeof(*man->first)); 936 man->first->type = ROFFT_ROOT; 937 man->last = man->first; 938 man->last_es = NULL; 939 man->flags = 0; 940 man->macroset = MACROSET_NONE; 941 man->lastsec = man->lastnamed = SEC_NONE; 942 man->next = ROFF_NEXT_CHILD; 943 } 944 945 void 946 roff_man_reset(struct roff_man *man) 947 { 948 949 roff_man_free1(man); 950 roff_man_alloc1(man); 951 } 952 953 void 954 roff_man_free(struct roff_man *man) 955 { 956 957 roff_man_free1(man); 958 free(man); 959 } 960 961 struct roff_man * 962 roff_man_alloc(struct roff *roff, struct mparse *parse, 963 const char *defos, int quick) 964 { 965 struct roff_man *man; 966 967 man = mandoc_calloc(1, sizeof(*man)); 968 man->parse = parse; 969 man->roff = roff; 970 man->defos = defos; 971 man->quick = quick; 972 roff_man_alloc1(man); 973 return man; 974 } 975 976 /* --- syntax tree handling ----------------------------------------------- */ 977 978 struct roff_node * 979 roff_node_alloc(struct roff_man *man, int line, int pos, 980 enum roff_type type, int tok) 981 { 982 struct roff_node *n; 983 984 n = mandoc_calloc(1, sizeof(*n)); 985 n->line = line; 986 n->pos = pos; 987 n->tok = tok; 988 n->type = type; 989 n->sec = man->lastsec; 990 991 if (man->flags & MDOC_SYNOPSIS) 992 n->flags |= MDOC_SYNPRETTY; 993 else 994 n->flags &= ~MDOC_SYNPRETTY; 995 if (man->flags & MDOC_NEWLINE) 996 n->flags |= MDOC_LINE; 997 man->flags &= ~MDOC_NEWLINE; 998 999 return n; 1000 } 1001 1002 void 1003 roff_node_append(struct roff_man *man, struct roff_node *n) 1004 { 1005 1006 switch (man->next) { 1007 case ROFF_NEXT_SIBLING: 1008 if (man->last->next != NULL) { 1009 n->next = man->last->next; 1010 man->last->next->prev = n; 1011 } else 1012 man->last->parent->last = n; 1013 man->last->next = n; 1014 n->prev = man->last; 1015 n->parent = man->last->parent; 1016 break; 1017 case ROFF_NEXT_CHILD: 1018 man->last->child = n; 1019 n->parent = man->last; 1020 n->parent->last = n; 1021 break; 1022 default: 1023 abort(); 1024 } 1025 man->last = n; 1026 1027 switch (n->type) { 1028 case ROFFT_HEAD: 1029 n->parent->head = n; 1030 break; 1031 case ROFFT_BODY: 1032 if (n->end != ENDBODY_NOT) 1033 return; 1034 n->parent->body = n; 1035 break; 1036 case ROFFT_TAIL: 1037 n->parent->tail = n; 1038 break; 1039 default: 1040 return; 1041 } 1042 1043 /* 1044 * Copy over the normalised-data pointer of our parent. Not 1045 * everybody has one, but copying a null pointer is fine. 1046 */ 1047 1048 n->norm = n->parent->norm; 1049 assert(n->parent->type == ROFFT_BLOCK); 1050 } 1051 1052 void 1053 roff_word_alloc(struct roff_man *man, int line, int pos, const char *word) 1054 { 1055 struct roff_node *n; 1056 1057 n = roff_node_alloc(man, line, pos, ROFFT_TEXT, TOKEN_NONE); 1058 n->string = roff_strdup(man->roff, word); 1059 roff_node_append(man, n); 1060 if (man->macroset == MACROSET_MDOC) 1061 n->flags |= MDOC_VALID | MDOC_ENDED; 1062 else 1063 n->flags |= MAN_VALID; 1064 man->next = ROFF_NEXT_SIBLING; 1065 } 1066 1067 void 1068 roff_word_append(struct roff_man *man, const char *word) 1069 { 1070 struct roff_node *n; 1071 char *addstr, *newstr; 1072 1073 n = man->last; 1074 addstr = roff_strdup(man->roff, word); 1075 mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 1076 free(addstr); 1077 free(n->string); 1078 n->string = newstr; 1079 man->next = ROFF_NEXT_SIBLING; 1080 } 1081 1082 void 1083 roff_elem_alloc(struct roff_man *man, int line, int pos, int tok) 1084 { 1085 struct roff_node *n; 1086 1087 n = roff_node_alloc(man, line, pos, ROFFT_ELEM, tok); 1088 roff_node_append(man, n); 1089 man->next = ROFF_NEXT_CHILD; 1090 } 1091 1092 struct roff_node * 1093 roff_block_alloc(struct roff_man *man, int line, int pos, int tok) 1094 { 1095 struct roff_node *n; 1096 1097 n = roff_node_alloc(man, line, pos, ROFFT_BLOCK, tok); 1098 roff_node_append(man, n); 1099 man->next = ROFF_NEXT_CHILD; 1100 return n; 1101 } 1102 1103 struct roff_node * 1104 roff_head_alloc(struct roff_man *man, int line, int pos, int tok) 1105 { 1106 struct roff_node *n; 1107 1108 n = roff_node_alloc(man, line, pos, ROFFT_HEAD, tok); 1109 roff_node_append(man, n); 1110 man->next = ROFF_NEXT_CHILD; 1111 return n; 1112 } 1113 1114 struct roff_node * 1115 roff_body_alloc(struct roff_man *man, int line, int pos, int tok) 1116 { 1117 struct roff_node *n; 1118 1119 n = roff_node_alloc(man, line, pos, ROFFT_BODY, tok); 1120 roff_node_append(man, n); 1121 man->next = ROFF_NEXT_CHILD; 1122 return n; 1123 } 1124 1125 void 1126 roff_addeqn(struct roff_man *man, const struct eqn *eqn) 1127 { 1128 struct roff_node *n; 1129 1130 n = roff_node_alloc(man, eqn->ln, eqn->pos, ROFFT_EQN, TOKEN_NONE); 1131 n->eqn = eqn; 1132 if (eqn->ln > man->last->line) 1133 n->flags |= MDOC_LINE; 1134 roff_node_append(man, n); 1135 man->next = ROFF_NEXT_SIBLING; 1136 } 1137 1138 void 1139 roff_addtbl(struct roff_man *man, const struct tbl_span *tbl) 1140 { 1141 struct roff_node *n; 1142 1143 if (man->macroset == MACROSET_MAN) 1144 man_breakscope(man, TOKEN_NONE); 1145 n = roff_node_alloc(man, tbl->line, 0, ROFFT_TBL, TOKEN_NONE); 1146 n->span = tbl; 1147 roff_node_append(man, n); 1148 if (man->macroset == MACROSET_MDOC) 1149 n->flags |= MDOC_VALID | MDOC_ENDED; 1150 else 1151 n->flags |= MAN_VALID; 1152 man->next = ROFF_NEXT_SIBLING; 1153 } 1154 1155 void 1156 roff_node_unlink(struct roff_man *man, struct roff_node *n) 1157 { 1158 1159 /* Adjust siblings. */ 1160 1161 if (n->prev) 1162 n->prev->next = n->next; 1163 if (n->next) 1164 n->next->prev = n->prev; 1165 1166 /* Adjust parent. */ 1167 1168 if (n->parent != NULL) { 1169 if (n->parent->child == n) 1170 n->parent->child = n->next; 1171 if (n->parent->last == n) 1172 n->parent->last = n->prev; 1173 } 1174 1175 /* Adjust parse point. */ 1176 1177 if (man == NULL) 1178 return; 1179 if (man->last == n) { 1180 if (n->prev == NULL) { 1181 man->last = n->parent; 1182 man->next = ROFF_NEXT_CHILD; 1183 } else { 1184 man->last = n->prev; 1185 man->next = ROFF_NEXT_SIBLING; 1186 } 1187 } 1188 if (man->first == n) 1189 man->first = NULL; 1190 } 1191 1192 void 1193 roff_node_free(struct roff_node *n) 1194 { 1195 1196 if (n->args != NULL) 1197 mdoc_argv_free(n->args); 1198 if (n->type == ROFFT_BLOCK || n->type == ROFFT_ELEM) 1199 free(n->norm); 1200 free(n->string); 1201 free(n); 1202 } 1203 1204 void 1205 roff_node_delete(struct roff_man *man, struct roff_node *n) 1206 { 1207 1208 while (n->child != NULL) 1209 roff_node_delete(man, n->child); 1210 roff_node_unlink(man, n); 1211 roff_node_free(n); 1212 } 1213 1214 void 1215 deroff(char **dest, const struct roff_node *n) 1216 { 1217 char *cp; 1218 size_t sz; 1219 1220 if (n->type != ROFFT_TEXT) { 1221 for (n = n->child; n != NULL; n = n->next) 1222 deroff(dest, n); 1223 return; 1224 } 1225 1226 /* Skip leading whitespace and escape sequences. */ 1227 1228 cp = n->string; 1229 while (*cp != '\0') { 1230 if ('\\' == *cp) { 1231 cp++; 1232 mandoc_escape((const char **)&cp, NULL, NULL); 1233 } else if (isspace((unsigned char)*cp)) 1234 cp++; 1235 else 1236 break; 1237 } 1238 1239 /* Skip trailing whitespace. */ 1240 1241 for (sz = strlen(cp); sz; sz--) 1242 if ( ! isspace((unsigned char)cp[sz-1])) 1243 break; 1244 1245 /* Skip empty strings. */ 1246 1247 if (sz == 0) 1248 return; 1249 1250 if (*dest == NULL) { 1251 *dest = mandoc_strndup(cp, sz); 1252 return; 1253 } 1254 1255 mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 1256 free(*dest); 1257 *dest = cp; 1258 } 1259 1260 /* --- main functions of the roff parser ---------------------------------- */ 1261 1262 /* 1263 * In the current line, expand escape sequences that tend to get 1264 * used in numerical expressions and conditional requests. 1265 * Also check the syntax of the remaining escape sequences. 1266 */ 1267 static enum rofferr 1268 roff_res(struct roff *r, struct buf *buf, int ln, int pos) 1269 { 1270 char ubuf[24]; /* buffer to print the number */ 1271 const char *start; /* start of the string to process */ 1272 char *stesc; /* start of an escape sequence ('\\') */ 1273 const char *stnam; /* start of the name, after "[(*" */ 1274 const char *cp; /* end of the name, e.g. before ']' */ 1275 const char *res; /* the string to be substituted */ 1276 char *nbuf; /* new buffer to copy buf->buf to */ 1277 size_t maxl; /* expected length of the escape name */ 1278 size_t naml; /* actual length of the escape name */ 1279 enum mandoc_esc esc; /* type of the escape sequence */ 1280 int inaml; /* length returned from mandoc_escape() */ 1281 int expand_count; /* to avoid infinite loops */ 1282 int npos; /* position in numeric expression */ 1283 int arg_complete; /* argument not interrupted by eol */ 1284 char term; /* character terminating the escape */ 1285 1286 expand_count = 0; 1287 start = buf->buf + pos; 1288 stesc = strchr(start, '\0') - 1; 1289 while (stesc-- > start) { 1290 1291 /* Search backwards for the next backslash. */ 1292 1293 if (*stesc != '\\') 1294 continue; 1295 1296 /* If it is escaped, skip it. */ 1297 1298 for (cp = stesc - 1; cp >= start; cp--) 1299 if (*cp != '\\') 1300 break; 1301 1302 if ((stesc - cp) % 2 == 0) { 1303 stesc = (char *)cp; 1304 continue; 1305 } 1306 1307 /* Decide whether to expand or to check only. */ 1308 1309 term = '\0'; 1310 cp = stesc + 1; 1311 switch (*cp) { 1312 case '*': 1313 res = NULL; 1314 break; 1315 case 'B': 1316 case 'w': 1317 term = cp[1]; 1318 /* FALLTHROUGH */ 1319 case 'n': 1320 res = ubuf; 1321 break; 1322 default: 1323 esc = mandoc_escape(&cp, &stnam, &inaml); 1324 if (esc == ESCAPE_ERROR || 1325 (esc == ESCAPE_SPECIAL && 1326 mchars_spec2cp(stnam, inaml) < 0)) 1327 mandoc_vmsg(MANDOCERR_ESC_BAD, 1328 r->parse, ln, (int)(stesc - buf->buf), 1329 "%.*s", (int)(cp - stesc), stesc); 1330 continue; 1331 } 1332 1333 if (EXPAND_LIMIT < ++expand_count) { 1334 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, 1335 ln, (int)(stesc - buf->buf), NULL); 1336 return ROFF_IGN; 1337 } 1338 1339 /* 1340 * The third character decides the length 1341 * of the name of the string or register. 1342 * Save a pointer to the name. 1343 */ 1344 1345 if (term == '\0') { 1346 switch (*++cp) { 1347 case '\0': 1348 maxl = 0; 1349 break; 1350 case '(': 1351 cp++; 1352 maxl = 2; 1353 break; 1354 case '[': 1355 cp++; 1356 term = ']'; 1357 maxl = 0; 1358 break; 1359 default: 1360 maxl = 1; 1361 break; 1362 } 1363 } else { 1364 cp += 2; 1365 maxl = 0; 1366 } 1367 stnam = cp; 1368 1369 /* Advance to the end of the name. */ 1370 1371 naml = 0; 1372 arg_complete = 1; 1373 while (maxl == 0 || naml < maxl) { 1374 if (*cp == '\0') { 1375 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 1376 ln, (int)(stesc - buf->buf), stesc); 1377 arg_complete = 0; 1378 break; 1379 } 1380 if (maxl == 0 && *cp == term) { 1381 cp++; 1382 break; 1383 } 1384 if (*cp++ != '\\' || stesc[1] != 'w') { 1385 naml++; 1386 continue; 1387 } 1388 switch (mandoc_escape(&cp, NULL, NULL)) { 1389 case ESCAPE_SPECIAL: 1390 case ESCAPE_UNICODE: 1391 case ESCAPE_NUMBERED: 1392 case ESCAPE_OVERSTRIKE: 1393 naml++; 1394 break; 1395 default: 1396 break; 1397 } 1398 } 1399 1400 /* 1401 * Retrieve the replacement string; if it is 1402 * undefined, resume searching for escapes. 1403 */ 1404 1405 switch (stesc[1]) { 1406 case '*': 1407 if (arg_complete) 1408 res = roff_getstrn(r, stnam, naml); 1409 break; 1410 case 'B': 1411 npos = 0; 1412 ubuf[0] = arg_complete && 1413 roff_evalnum(r, ln, stnam, &npos, 1414 NULL, ROFFNUM_SCALE) && 1415 stnam + npos + 1 == cp ? '1' : '0'; 1416 ubuf[1] = '\0'; 1417 break; 1418 case 'n': 1419 if (arg_complete) 1420 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1421 roff_getregn(r, stnam, naml)); 1422 else 1423 ubuf[0] = '\0'; 1424 break; 1425 case 'w': 1426 /* use even incomplete args */ 1427 (void)snprintf(ubuf, sizeof(ubuf), "%d", 1428 24 * (int)naml); 1429 break; 1430 } 1431 1432 if (res == NULL) { 1433 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1434 r->parse, ln, (int)(stesc - buf->buf), 1435 "%.*s", (int)naml, stnam); 1436 res = ""; 1437 } else if (buf->sz + strlen(res) > SHRT_MAX) { 1438 mandoc_msg(MANDOCERR_ROFFLOOP, r->parse, 1439 ln, (int)(stesc - buf->buf), NULL); 1440 return ROFF_IGN; 1441 } 1442 1443 /* Replace the escape sequence by the string. */ 1444 1445 *stesc = '\0'; 1446 buf->sz = mandoc_asprintf(&nbuf, "%s%s%s", 1447 buf->buf, res, cp) + 1; 1448 1449 /* Prepare for the next replacement. */ 1450 1451 start = nbuf + pos; 1452 stesc = nbuf + (stesc - buf->buf) + strlen(res); 1453 free(buf->buf); 1454 buf->buf = nbuf; 1455 } 1456 return ROFF_CONT; 1457 } 1458 1459 /* 1460 * Process text streams. 1461 */ 1462 static enum rofferr 1463 roff_parsetext(struct buf *buf, int pos, int *offs) 1464 { 1465 size_t sz; 1466 const char *start; 1467 char *p; 1468 int isz; 1469 enum mandoc_esc esc; 1470 1471 /* Spring the input line trap. */ 1472 1473 if (roffit_lines == 1) { 1474 isz = mandoc_asprintf(&p, "%s\n.%s", buf->buf, roffit_macro); 1475 free(buf->buf); 1476 buf->buf = p; 1477 buf->sz = isz + 1; 1478 *offs = 0; 1479 free(roffit_macro); 1480 roffit_lines = 0; 1481 return ROFF_REPARSE; 1482 } else if (roffit_lines > 1) 1483 --roffit_lines; 1484 1485 /* Convert all breakable hyphens into ASCII_HYPH. */ 1486 1487 start = p = buf->buf + pos; 1488 1489 while (*p != '\0') { 1490 sz = strcspn(p, "-\\"); 1491 p += sz; 1492 1493 if (*p == '\0') 1494 break; 1495 1496 if (*p == '\\') { 1497 /* Skip over escapes. */ 1498 p++; 1499 esc = mandoc_escape((const char **)&p, NULL, NULL); 1500 if (esc == ESCAPE_ERROR) 1501 break; 1502 while (*p == '-') 1503 p++; 1504 continue; 1505 } else if (p == start) { 1506 p++; 1507 continue; 1508 } 1509 1510 if (isalpha((unsigned char)p[-1]) && 1511 isalpha((unsigned char)p[1])) 1512 *p = ASCII_HYPH; 1513 p++; 1514 } 1515 return ROFF_CONT; 1516 } 1517 1518 enum rofferr 1519 roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs) 1520 { 1521 enum rofft t; 1522 enum rofferr e; 1523 int pos; /* parse point */ 1524 int spos; /* saved parse point for messages */ 1525 int ppos; /* original offset in buf->buf */ 1526 int ctl; /* macro line (boolean) */ 1527 1528 ppos = pos = *offs; 1529 1530 /* Handle in-line equation delimiters. */ 1531 1532 if (r->tbl == NULL && 1533 r->last_eqn != NULL && r->last_eqn->delim && 1534 (r->eqn == NULL || r->eqn_inline)) { 1535 e = roff_eqndelim(r, buf, pos); 1536 if (e == ROFF_REPARSE) 1537 return e; 1538 assert(e == ROFF_CONT); 1539 } 1540 1541 /* Expand some escape sequences. */ 1542 1543 e = roff_res(r, buf, ln, pos); 1544 if (e == ROFF_IGN) 1545 return e; 1546 assert(e == ROFF_CONT); 1547 1548 ctl = roff_getcontrol(r, buf->buf, &pos); 1549 1550 /* 1551 * First, if a scope is open and we're not a macro, pass the 1552 * text through the macro's filter. 1553 * Equations process all content themselves. 1554 * Tables process almost all content themselves, but we want 1555 * to warn about macros before passing it there. 1556 */ 1557 1558 if (r->last != NULL && ! ctl) { 1559 t = r->last->tok; 1560 assert(roffs[t].text); 1561 e = (*roffs[t].text)(r, t, buf, ln, pos, pos, offs); 1562 assert(e == ROFF_IGN || e == ROFF_CONT); 1563 if (e != ROFF_CONT) 1564 return e; 1565 } 1566 if (r->eqn != NULL) 1567 return eqn_read(&r->eqn, ln, buf->buf, ppos, offs); 1568 if (r->tbl != NULL && ( ! ctl || buf->buf[pos] == '\0')) 1569 return tbl_read(r->tbl, ln, buf->buf, ppos); 1570 if ( ! ctl) 1571 return roff_parsetext(buf, pos, offs); 1572 1573 /* Skip empty request lines. */ 1574 1575 if (buf->buf[pos] == '"') { 1576 mandoc_msg(MANDOCERR_COMMENT_BAD, r->parse, 1577 ln, pos, NULL); 1578 return ROFF_IGN; 1579 } else if (buf->buf[pos] == '\0') 1580 return ROFF_IGN; 1581 1582 /* 1583 * If a scope is open, go to the child handler for that macro, 1584 * as it may want to preprocess before doing anything with it. 1585 * Don't do so if an equation is open. 1586 */ 1587 1588 if (r->last) { 1589 t = r->last->tok; 1590 assert(roffs[t].sub); 1591 return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs); 1592 } 1593 1594 /* No scope is open. This is a new request or macro. */ 1595 1596 spos = pos; 1597 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1598 1599 /* Tables ignore most macros. */ 1600 1601 if (r->tbl != NULL && (t == ROFF_MAX || t == ROFF_TS)) { 1602 mandoc_msg(MANDOCERR_TBLMACRO, r->parse, 1603 ln, pos, buf->buf + spos); 1604 if (t == ROFF_TS) 1605 return ROFF_IGN; 1606 while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ') 1607 pos++; 1608 while (buf->buf[pos] != '\0' && buf->buf[pos] == ' ') 1609 pos++; 1610 return tbl_read(r->tbl, ln, buf->buf, pos); 1611 } 1612 1613 /* 1614 * This is neither a roff request nor a user-defined macro. 1615 * Let the standard macro set parsers handle it. 1616 */ 1617 1618 if (t == ROFF_MAX) 1619 return ROFF_CONT; 1620 1621 /* Execute a roff request or a user defined macro. */ 1622 1623 assert(roffs[t].proc); 1624 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 1625 } 1626 1627 void 1628 roff_endparse(struct roff *r) 1629 { 1630 1631 if (r->last) 1632 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1633 r->last->line, r->last->col, 1634 roffs[r->last->tok].name); 1635 1636 if (r->eqn) { 1637 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1638 r->eqn->eqn.ln, r->eqn->eqn.pos, "EQ"); 1639 eqn_end(&r->eqn); 1640 } 1641 1642 if (r->tbl) { 1643 mandoc_msg(MANDOCERR_BLK_NOEND, r->parse, 1644 r->tbl->line, r->tbl->pos, "TS"); 1645 tbl_end(&r->tbl); 1646 } 1647 } 1648 1649 /* 1650 * Parse a roff node's type from the input buffer. This must be in the 1651 * form of ".foo xxx" in the usual way. 1652 */ 1653 static enum rofft 1654 roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos) 1655 { 1656 char *cp; 1657 const char *mac; 1658 size_t maclen; 1659 enum rofft t; 1660 1661 cp = buf + *pos; 1662 1663 if ('\0' == *cp || '"' == *cp || '\t' == *cp || ' ' == *cp) 1664 return ROFF_MAX; 1665 1666 mac = cp; 1667 maclen = roff_getname(r, &cp, ln, ppos); 1668 1669 t = (r->current_string = roff_getstrn(r, mac, maclen)) 1670 ? ROFF_USERDEF : roffhash_find(mac, maclen); 1671 1672 if (ROFF_MAX != t) 1673 *pos = cp - buf; 1674 1675 return t; 1676 } 1677 1678 /* --- handling of request blocks ----------------------------------------- */ 1679 1680 static enum rofferr 1681 roff_cblock(ROFF_ARGS) 1682 { 1683 1684 /* 1685 * A block-close `..' should only be invoked as a child of an 1686 * ignore macro, otherwise raise a warning and just ignore it. 1687 */ 1688 1689 if (r->last == NULL) { 1690 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1691 ln, ppos, ".."); 1692 return ROFF_IGN; 1693 } 1694 1695 switch (r->last->tok) { 1696 case ROFF_am: 1697 /* ROFF_am1 is remapped to ROFF_am in roff_block(). */ 1698 case ROFF_ami: 1699 case ROFF_de: 1700 /* ROFF_de1 is remapped to ROFF_de in roff_block(). */ 1701 case ROFF_dei: 1702 case ROFF_ig: 1703 break; 1704 default: 1705 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1706 ln, ppos, ".."); 1707 return ROFF_IGN; 1708 } 1709 1710 if (buf->buf[pos] != '\0') 1711 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 1712 ".. %s", buf->buf + pos); 1713 1714 roffnode_pop(r); 1715 roffnode_cleanscope(r); 1716 return ROFF_IGN; 1717 1718 } 1719 1720 static void 1721 roffnode_cleanscope(struct roff *r) 1722 { 1723 1724 while (r->last) { 1725 if (--r->last->endspan != 0) 1726 break; 1727 roffnode_pop(r); 1728 } 1729 } 1730 1731 static void 1732 roff_ccond(struct roff *r, int ln, int ppos) 1733 { 1734 1735 if (NULL == r->last) { 1736 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1737 ln, ppos, "\\}"); 1738 return; 1739 } 1740 1741 switch (r->last->tok) { 1742 case ROFF_el: 1743 case ROFF_ie: 1744 case ROFF_if: 1745 break; 1746 default: 1747 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1748 ln, ppos, "\\}"); 1749 return; 1750 } 1751 1752 if (r->last->endspan > -1) { 1753 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 1754 ln, ppos, "\\}"); 1755 return; 1756 } 1757 1758 roffnode_pop(r); 1759 roffnode_cleanscope(r); 1760 return; 1761 } 1762 1763 static enum rofferr 1764 roff_block(ROFF_ARGS) 1765 { 1766 const char *name; 1767 char *iname, *cp; 1768 size_t namesz; 1769 1770 /* Ignore groff compatibility mode for now. */ 1771 1772 if (tok == ROFF_de1) 1773 tok = ROFF_de; 1774 else if (tok == ROFF_dei1) 1775 tok = ROFF_dei; 1776 else if (tok == ROFF_am1) 1777 tok = ROFF_am; 1778 else if (tok == ROFF_ami1) 1779 tok = ROFF_ami; 1780 1781 /* Parse the macro name argument. */ 1782 1783 cp = buf->buf + pos; 1784 if (tok == ROFF_ig) { 1785 iname = NULL; 1786 namesz = 0; 1787 } else { 1788 iname = cp; 1789 namesz = roff_getname(r, &cp, ln, ppos); 1790 iname[namesz] = '\0'; 1791 } 1792 1793 /* Resolve the macro name argument if it is indirect. */ 1794 1795 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 1796 if ((name = roff_getstrn(r, iname, namesz)) == NULL) { 1797 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1798 r->parse, ln, (int)(iname - buf->buf), 1799 "%.*s", (int)namesz, iname); 1800 namesz = 0; 1801 } else 1802 namesz = strlen(name); 1803 } else 1804 name = iname; 1805 1806 if (namesz == 0 && tok != ROFF_ig) { 1807 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, 1808 ln, ppos, roffs[tok].name); 1809 return ROFF_IGN; 1810 } 1811 1812 roffnode_push(r, tok, name, ln, ppos); 1813 1814 /* 1815 * At the beginning of a `de' macro, clear the existing string 1816 * with the same name, if there is one. New content will be 1817 * appended from roff_block_text() in multiline mode. 1818 */ 1819 1820 if (tok == ROFF_de || tok == ROFF_dei) 1821 roff_setstrn(&r->strtab, name, namesz, "", 0, 0); 1822 1823 if (*cp == '\0') 1824 return ROFF_IGN; 1825 1826 /* Get the custom end marker. */ 1827 1828 iname = cp; 1829 namesz = roff_getname(r, &cp, ln, ppos); 1830 1831 /* Resolve the end marker if it is indirect. */ 1832 1833 if (namesz && (tok == ROFF_dei || tok == ROFF_ami)) { 1834 if ((name = roff_getstrn(r, iname, namesz)) == NULL) { 1835 mandoc_vmsg(MANDOCERR_STR_UNDEF, 1836 r->parse, ln, (int)(iname - buf->buf), 1837 "%.*s", (int)namesz, iname); 1838 namesz = 0; 1839 } else 1840 namesz = strlen(name); 1841 } else 1842 name = iname; 1843 1844 if (namesz) 1845 r->last->end = mandoc_strndup(name, namesz); 1846 1847 if (*cp != '\0') 1848 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, 1849 ln, pos, ".%s ... %s", roffs[tok].name, cp); 1850 1851 return ROFF_IGN; 1852 } 1853 1854 static enum rofferr 1855 roff_block_sub(ROFF_ARGS) 1856 { 1857 enum rofft t; 1858 int i, j; 1859 1860 /* 1861 * First check whether a custom macro exists at this level. If 1862 * it does, then check against it. This is some of groff's 1863 * stranger behaviours. If we encountered a custom end-scope 1864 * tag and that tag also happens to be a "real" macro, then we 1865 * need to try interpreting it again as a real macro. If it's 1866 * not, then return ignore. Else continue. 1867 */ 1868 1869 if (r->last->end) { 1870 for (i = pos, j = 0; r->last->end[j]; j++, i++) 1871 if (buf->buf[i] != r->last->end[j]) 1872 break; 1873 1874 if (r->last->end[j] == '\0' && 1875 (buf->buf[i] == '\0' || 1876 buf->buf[i] == ' ' || 1877 buf->buf[i] == '\t')) { 1878 roffnode_pop(r); 1879 roffnode_cleanscope(r); 1880 1881 while (buf->buf[i] == ' ' || buf->buf[i] == '\t') 1882 i++; 1883 1884 pos = i; 1885 if (roff_parse(r, buf->buf, &pos, ln, ppos) != 1886 ROFF_MAX) 1887 return ROFF_RERUN; 1888 return ROFF_IGN; 1889 } 1890 } 1891 1892 /* 1893 * If we have no custom end-query or lookup failed, then try 1894 * pulling it out of the hashtable. 1895 */ 1896 1897 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1898 1899 if (t != ROFF_cblock) { 1900 if (tok != ROFF_ig) 1901 roff_setstr(r, r->last->name, buf->buf + ppos, 2); 1902 return ROFF_IGN; 1903 } 1904 1905 assert(roffs[t].proc); 1906 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 1907 } 1908 1909 static enum rofferr 1910 roff_block_text(ROFF_ARGS) 1911 { 1912 1913 if (tok != ROFF_ig) 1914 roff_setstr(r, r->last->name, buf->buf + pos, 2); 1915 1916 return ROFF_IGN; 1917 } 1918 1919 static enum rofferr 1920 roff_cond_sub(ROFF_ARGS) 1921 { 1922 enum rofft t; 1923 char *ep; 1924 int rr; 1925 1926 rr = r->last->rule; 1927 roffnode_cleanscope(r); 1928 t = roff_parse(r, buf->buf, &pos, ln, ppos); 1929 1930 /* 1931 * Fully handle known macros when they are structurally 1932 * required or when the conditional evaluated to true. 1933 */ 1934 1935 if ((t != ROFF_MAX) && 1936 (rr || roffs[t].flags & ROFFMAC_STRUCT)) { 1937 assert(roffs[t].proc); 1938 return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs); 1939 } 1940 1941 /* 1942 * If `\}' occurs on a macro line without a preceding macro, 1943 * drop the line completely. 1944 */ 1945 1946 ep = buf->buf + pos; 1947 if (ep[0] == '\\' && ep[1] == '}') 1948 rr = 0; 1949 1950 /* Always check for the closing delimiter `\}'. */ 1951 1952 while ((ep = strchr(ep, '\\')) != NULL) { 1953 if (*(++ep) == '}') { 1954 *ep = '&'; 1955 roff_ccond(r, ln, ep - buf->buf - 1); 1956 } 1957 if (*ep != '\0') 1958 ++ep; 1959 } 1960 return rr ? ROFF_CONT : ROFF_IGN; 1961 } 1962 1963 static enum rofferr 1964 roff_cond_text(ROFF_ARGS) 1965 { 1966 char *ep; 1967 int rr; 1968 1969 rr = r->last->rule; 1970 roffnode_cleanscope(r); 1971 1972 ep = buf->buf + pos; 1973 while ((ep = strchr(ep, '\\')) != NULL) { 1974 if (*(++ep) == '}') { 1975 *ep = '&'; 1976 roff_ccond(r, ln, ep - buf->buf - 1); 1977 } 1978 if (*ep != '\0') 1979 ++ep; 1980 } 1981 return rr ? ROFF_CONT : ROFF_IGN; 1982 } 1983 1984 /* --- handling of numeric and conditional expressions -------------------- */ 1985 1986 /* 1987 * Parse a single signed integer number. Stop at the first non-digit. 1988 * If there is at least one digit, return success and advance the 1989 * parse point, else return failure and let the parse point unchanged. 1990 * Ignore overflows, treat them just like the C language. 1991 */ 1992 static int 1993 roff_getnum(const char *v, int *pos, int *res, int flags) 1994 { 1995 int myres, scaled, n, p; 1996 1997 if (NULL == res) 1998 res = &myres; 1999 2000 p = *pos; 2001 n = v[p] == '-'; 2002 if (n || v[p] == '+') 2003 p++; 2004 2005 if (flags & ROFFNUM_WHITE) 2006 while (isspace((unsigned char)v[p])) 2007 p++; 2008 2009 for (*res = 0; isdigit((unsigned char)v[p]); p++) 2010 *res = 10 * *res + v[p] - '0'; 2011 if (p == *pos + n) 2012 return 0; 2013 2014 if (n) 2015 *res = -*res; 2016 2017 /* Each number may be followed by one optional scaling unit. */ 2018 2019 switch (v[p]) { 2020 case 'f': 2021 scaled = *res * 65536; 2022 break; 2023 case 'i': 2024 scaled = *res * 240; 2025 break; 2026 case 'c': 2027 scaled = *res * 240 / 2.54; 2028 break; 2029 case 'v': 2030 case 'P': 2031 scaled = *res * 40; 2032 break; 2033 case 'm': 2034 case 'n': 2035 scaled = *res * 24; 2036 break; 2037 case 'p': 2038 scaled = *res * 10 / 3; 2039 break; 2040 case 'u': 2041 scaled = *res; 2042 break; 2043 case 'M': 2044 scaled = *res * 6 / 25; 2045 break; 2046 default: 2047 scaled = *res; 2048 p--; 2049 break; 2050 } 2051 if (flags & ROFFNUM_SCALE) 2052 *res = scaled; 2053 2054 *pos = p + 1; 2055 return 1; 2056 } 2057 2058 /* 2059 * Evaluate a string comparison condition. 2060 * The first character is the delimiter. 2061 * Succeed if the string up to its second occurrence 2062 * matches the string up to its third occurence. 2063 * Advance the cursor after the third occurrence 2064 * or lacking that, to the end of the line. 2065 */ 2066 static int 2067 roff_evalstrcond(const char *v, int *pos) 2068 { 2069 const char *s1, *s2, *s3; 2070 int match; 2071 2072 match = 0; 2073 s1 = v + *pos; /* initial delimiter */ 2074 s2 = s1 + 1; /* for scanning the first string */ 2075 s3 = strchr(s2, *s1); /* for scanning the second string */ 2076 2077 if (NULL == s3) /* found no middle delimiter */ 2078 goto out; 2079 2080 while ('\0' != *++s3) { 2081 if (*s2 != *s3) { /* mismatch */ 2082 s3 = strchr(s3, *s1); 2083 break; 2084 } 2085 if (*s3 == *s1) { /* found the final delimiter */ 2086 match = 1; 2087 break; 2088 } 2089 s2++; 2090 } 2091 2092 out: 2093 if (NULL == s3) 2094 s3 = strchr(s2, '\0'); 2095 else if (*s3 != '\0') 2096 s3++; 2097 *pos = s3 - v; 2098 return match; 2099 } 2100 2101 /* 2102 * Evaluate an optionally negated single character, numerical, 2103 * or string condition. 2104 */ 2105 static int 2106 roff_evalcond(struct roff *r, int ln, char *v, int *pos) 2107 { 2108 char *cp, *name; 2109 size_t sz; 2110 int number, savepos, wanttrue; 2111 2112 if ('!' == v[*pos]) { 2113 wanttrue = 0; 2114 (*pos)++; 2115 } else 2116 wanttrue = 1; 2117 2118 switch (v[*pos]) { 2119 case '\0': 2120 return 0; 2121 case 'n': 2122 case 'o': 2123 (*pos)++; 2124 return wanttrue; 2125 case 'c': 2126 case 'd': 2127 case 'e': 2128 case 't': 2129 case 'v': 2130 (*pos)++; 2131 return !wanttrue; 2132 case 'r': 2133 cp = name = v + ++*pos; 2134 sz = roff_getname(r, &cp, ln, *pos); 2135 *pos = cp - v; 2136 return (sz && roff_hasregn(r, name, sz)) == wanttrue; 2137 default: 2138 break; 2139 } 2140 2141 savepos = *pos; 2142 if (roff_evalnum(r, ln, v, pos, &number, ROFFNUM_SCALE)) 2143 return (number > 0) == wanttrue; 2144 else if (*pos == savepos) 2145 return roff_evalstrcond(v, pos) == wanttrue; 2146 else 2147 return 0; 2148 } 2149 2150 static enum rofferr 2151 roff_line_ignore(ROFF_ARGS) 2152 { 2153 2154 return ROFF_IGN; 2155 } 2156 2157 static enum rofferr 2158 roff_insec(ROFF_ARGS) 2159 { 2160 2161 mandoc_msg(MANDOCERR_REQ_INSEC, r->parse, 2162 ln, ppos, roffs[tok].name); 2163 return ROFF_IGN; 2164 } 2165 2166 static enum rofferr 2167 roff_unsupp(ROFF_ARGS) 2168 { 2169 2170 mandoc_msg(MANDOCERR_REQ_UNSUPP, r->parse, 2171 ln, ppos, roffs[tok].name); 2172 return ROFF_IGN; 2173 } 2174 2175 static enum rofferr 2176 roff_cond(ROFF_ARGS) 2177 { 2178 2179 roffnode_push(r, tok, NULL, ln, ppos); 2180 2181 /* 2182 * An `.el' has no conditional body: it will consume the value 2183 * of the current rstack entry set in prior `ie' calls or 2184 * defaults to DENY. 2185 * 2186 * If we're not an `el', however, then evaluate the conditional. 2187 */ 2188 2189 r->last->rule = tok == ROFF_el ? 2190 (r->rstackpos < 0 ? 0 : r->rstack[r->rstackpos--]) : 2191 roff_evalcond(r, ln, buf->buf, &pos); 2192 2193 /* 2194 * An if-else will put the NEGATION of the current evaluated 2195 * conditional into the stack of rules. 2196 */ 2197 2198 if (tok == ROFF_ie) { 2199 if (r->rstackpos + 1 == r->rstacksz) { 2200 r->rstacksz += 16; 2201 r->rstack = mandoc_reallocarray(r->rstack, 2202 r->rstacksz, sizeof(int)); 2203 } 2204 r->rstack[++r->rstackpos] = !r->last->rule; 2205 } 2206 2207 /* If the parent has false as its rule, then so do we. */ 2208 2209 if (r->last->parent && !r->last->parent->rule) 2210 r->last->rule = 0; 2211 2212 /* 2213 * Determine scope. 2214 * If there is nothing on the line after the conditional, 2215 * not even whitespace, use next-line scope. 2216 */ 2217 2218 if (buf->buf[pos] == '\0') { 2219 r->last->endspan = 2; 2220 goto out; 2221 } 2222 2223 while (buf->buf[pos] == ' ') 2224 pos++; 2225 2226 /* An opening brace requests multiline scope. */ 2227 2228 if (buf->buf[pos] == '\\' && buf->buf[pos + 1] == '{') { 2229 r->last->endspan = -1; 2230 pos += 2; 2231 while (buf->buf[pos] == ' ') 2232 pos++; 2233 goto out; 2234 } 2235 2236 /* 2237 * Anything else following the conditional causes 2238 * single-line scope. Warn if the scope contains 2239 * nothing but trailing whitespace. 2240 */ 2241 2242 if (buf->buf[pos] == '\0') 2243 mandoc_msg(MANDOCERR_COND_EMPTY, r->parse, 2244 ln, ppos, roffs[tok].name); 2245 2246 r->last->endspan = 1; 2247 2248 out: 2249 *offs = pos; 2250 return ROFF_RERUN; 2251 } 2252 2253 static enum rofferr 2254 roff_ds(ROFF_ARGS) 2255 { 2256 char *string; 2257 const char *name; 2258 size_t namesz; 2259 2260 /* Ignore groff compatibility mode for now. */ 2261 2262 if (tok == ROFF_ds1) 2263 tok = ROFF_ds; 2264 else if (tok == ROFF_as1) 2265 tok = ROFF_as; 2266 2267 /* 2268 * The first word is the name of the string. 2269 * If it is empty or terminated by an escape sequence, 2270 * abort the `ds' request without defining anything. 2271 */ 2272 2273 name = string = buf->buf + pos; 2274 if (*name == '\0') 2275 return ROFF_IGN; 2276 2277 namesz = roff_getname(r, &string, ln, pos); 2278 if (name[namesz] == '\\') 2279 return ROFF_IGN; 2280 2281 /* Read past the initial double-quote, if any. */ 2282 if (*string == '"') 2283 string++; 2284 2285 /* The rest is the value. */ 2286 roff_setstrn(&r->strtab, name, namesz, string, strlen(string), 2287 ROFF_as == tok); 2288 return ROFF_IGN; 2289 } 2290 2291 /* 2292 * Parse a single operator, one or two characters long. 2293 * If the operator is recognized, return success and advance the 2294 * parse point, else return failure and let the parse point unchanged. 2295 */ 2296 static int 2297 roff_getop(const char *v, int *pos, char *res) 2298 { 2299 2300 *res = v[*pos]; 2301 2302 switch (*res) { 2303 case '+': 2304 case '-': 2305 case '*': 2306 case '/': 2307 case '%': 2308 case '&': 2309 case ':': 2310 break; 2311 case '<': 2312 switch (v[*pos + 1]) { 2313 case '=': 2314 *res = 'l'; 2315 (*pos)++; 2316 break; 2317 case '>': 2318 *res = '!'; 2319 (*pos)++; 2320 break; 2321 case '?': 2322 *res = 'i'; 2323 (*pos)++; 2324 break; 2325 default: 2326 break; 2327 } 2328 break; 2329 case '>': 2330 switch (v[*pos + 1]) { 2331 case '=': 2332 *res = 'g'; 2333 (*pos)++; 2334 break; 2335 case '?': 2336 *res = 'a'; 2337 (*pos)++; 2338 break; 2339 default: 2340 break; 2341 } 2342 break; 2343 case '=': 2344 if ('=' == v[*pos + 1]) 2345 (*pos)++; 2346 break; 2347 default: 2348 return 0; 2349 } 2350 (*pos)++; 2351 2352 return *res; 2353 } 2354 2355 /* 2356 * Evaluate either a parenthesized numeric expression 2357 * or a single signed integer number. 2358 */ 2359 static int 2360 roff_evalpar(struct roff *r, int ln, 2361 const char *v, int *pos, int *res, int flags) 2362 { 2363 2364 if ('(' != v[*pos]) 2365 return roff_getnum(v, pos, res, flags); 2366 2367 (*pos)++; 2368 if ( ! roff_evalnum(r, ln, v, pos, res, flags | ROFFNUM_WHITE)) 2369 return 0; 2370 2371 /* 2372 * Omission of the closing parenthesis 2373 * is an error in validation mode, 2374 * but ignored in evaluation mode. 2375 */ 2376 2377 if (')' == v[*pos]) 2378 (*pos)++; 2379 else if (NULL == res) 2380 return 0; 2381 2382 return 1; 2383 } 2384 2385 /* 2386 * Evaluate a complete numeric expression. 2387 * Proceed left to right, there is no concept of precedence. 2388 */ 2389 static int 2390 roff_evalnum(struct roff *r, int ln, const char *v, 2391 int *pos, int *res, int flags) 2392 { 2393 int mypos, operand2; 2394 char operator; 2395 2396 if (NULL == pos) { 2397 mypos = 0; 2398 pos = &mypos; 2399 } 2400 2401 if (flags & ROFFNUM_WHITE) 2402 while (isspace((unsigned char)v[*pos])) 2403 (*pos)++; 2404 2405 if ( ! roff_evalpar(r, ln, v, pos, res, flags)) 2406 return 0; 2407 2408 while (1) { 2409 if (flags & ROFFNUM_WHITE) 2410 while (isspace((unsigned char)v[*pos])) 2411 (*pos)++; 2412 2413 if ( ! roff_getop(v, pos, &operator)) 2414 break; 2415 2416 if (flags & ROFFNUM_WHITE) 2417 while (isspace((unsigned char)v[*pos])) 2418 (*pos)++; 2419 2420 if ( ! roff_evalpar(r, ln, v, pos, &operand2, flags)) 2421 return 0; 2422 2423 if (flags & ROFFNUM_WHITE) 2424 while (isspace((unsigned char)v[*pos])) 2425 (*pos)++; 2426 2427 if (NULL == res) 2428 continue; 2429 2430 switch (operator) { 2431 case '+': 2432 *res += operand2; 2433 break; 2434 case '-': 2435 *res -= operand2; 2436 break; 2437 case '*': 2438 *res *= operand2; 2439 break; 2440 case '/': 2441 if (operand2 == 0) { 2442 mandoc_msg(MANDOCERR_DIVZERO, 2443 r->parse, ln, *pos, v); 2444 *res = 0; 2445 break; 2446 } 2447 *res /= operand2; 2448 break; 2449 case '%': 2450 if (operand2 == 0) { 2451 mandoc_msg(MANDOCERR_DIVZERO, 2452 r->parse, ln, *pos, v); 2453 *res = 0; 2454 break; 2455 } 2456 *res %= operand2; 2457 break; 2458 case '<': 2459 *res = *res < operand2; 2460 break; 2461 case '>': 2462 *res = *res > operand2; 2463 break; 2464 case 'l': 2465 *res = *res <= operand2; 2466 break; 2467 case 'g': 2468 *res = *res >= operand2; 2469 break; 2470 case '=': 2471 *res = *res == operand2; 2472 break; 2473 case '!': 2474 *res = *res != operand2; 2475 break; 2476 case '&': 2477 *res = *res && operand2; 2478 break; 2479 case ':': 2480 *res = *res || operand2; 2481 break; 2482 case 'i': 2483 if (operand2 < *res) 2484 *res = operand2; 2485 break; 2486 case 'a': 2487 if (operand2 > *res) 2488 *res = operand2; 2489 break; 2490 default: 2491 abort(); 2492 } 2493 } 2494 return 1; 2495 } 2496 2497 /* --- register management ------------------------------------------------ */ 2498 2499 void 2500 roff_setreg(struct roff *r, const char *name, int val, char sign) 2501 { 2502 struct roffreg *reg; 2503 2504 /* Search for an existing register with the same name. */ 2505 reg = r->regtab; 2506 2507 while (reg && strcmp(name, reg->key.p)) 2508 reg = reg->next; 2509 2510 if (NULL == reg) { 2511 /* Create a new register. */ 2512 reg = mandoc_malloc(sizeof(struct roffreg)); 2513 reg->key.p = mandoc_strdup(name); 2514 reg->key.sz = strlen(name); 2515 reg->val = 0; 2516 reg->next = r->regtab; 2517 r->regtab = reg; 2518 } 2519 2520 if ('+' == sign) 2521 reg->val += val; 2522 else if ('-' == sign) 2523 reg->val -= val; 2524 else 2525 reg->val = val; 2526 } 2527 2528 /* 2529 * Handle some predefined read-only number registers. 2530 * For now, return -1 if the requested register is not predefined; 2531 * in case a predefined read-only register having the value -1 2532 * were to turn up, another special value would have to be chosen. 2533 */ 2534 static int 2535 roff_getregro(const struct roff *r, const char *name) 2536 { 2537 2538 switch (*name) { 2539 case '$': /* Number of arguments of the last macro evaluated. */ 2540 return r->argc; 2541 case 'A': /* ASCII approximation mode is always off. */ 2542 return 0; 2543 case 'g': /* Groff compatibility mode is always on. */ 2544 return 1; 2545 case 'H': /* Fixed horizontal resolution. */ 2546 return 24; 2547 case 'j': /* Always adjust left margin only. */ 2548 return 0; 2549 case 'T': /* Some output device is always defined. */ 2550 return 1; 2551 case 'V': /* Fixed vertical resolution. */ 2552 return 40; 2553 default: 2554 return -1; 2555 } 2556 } 2557 2558 int 2559 roff_getreg(const struct roff *r, const char *name) 2560 { 2561 struct roffreg *reg; 2562 int val; 2563 2564 if ('.' == name[0] && '\0' != name[1] && '\0' == name[2]) { 2565 val = roff_getregro(r, name + 1); 2566 if (-1 != val) 2567 return val; 2568 } 2569 2570 for (reg = r->regtab; reg; reg = reg->next) 2571 if (0 == strcmp(name, reg->key.p)) 2572 return reg->val; 2573 2574 return 0; 2575 } 2576 2577 static int 2578 roff_getregn(const struct roff *r, const char *name, size_t len) 2579 { 2580 struct roffreg *reg; 2581 int val; 2582 2583 if ('.' == name[0] && 2 == len) { 2584 val = roff_getregro(r, name + 1); 2585 if (-1 != val) 2586 return val; 2587 } 2588 2589 for (reg = r->regtab; reg; reg = reg->next) 2590 if (len == reg->key.sz && 2591 0 == strncmp(name, reg->key.p, len)) 2592 return reg->val; 2593 2594 return 0; 2595 } 2596 2597 static int 2598 roff_hasregn(const struct roff *r, const char *name, size_t len) 2599 { 2600 struct roffreg *reg; 2601 int val; 2602 2603 if ('.' == name[0] && 2 == len) { 2604 val = roff_getregro(r, name + 1); 2605 if (-1 != val) 2606 return 1; 2607 } 2608 2609 for (reg = r->regtab; reg; reg = reg->next) 2610 if (len == reg->key.sz && 2611 0 == strncmp(name, reg->key.p, len)) 2612 return 1; 2613 2614 return 0; 2615 } 2616 2617 static void 2618 roff_freereg(struct roffreg *reg) 2619 { 2620 struct roffreg *old_reg; 2621 2622 while (NULL != reg) { 2623 free(reg->key.p); 2624 old_reg = reg; 2625 reg = reg->next; 2626 free(old_reg); 2627 } 2628 } 2629 2630 static enum rofferr 2631 roff_nr(ROFF_ARGS) 2632 { 2633 char *key, *val; 2634 size_t keysz; 2635 int iv; 2636 char sign; 2637 2638 key = val = buf->buf + pos; 2639 if (*key == '\0') 2640 return ROFF_IGN; 2641 2642 keysz = roff_getname(r, &val, ln, pos); 2643 if (key[keysz] == '\\') 2644 return ROFF_IGN; 2645 key[keysz] = '\0'; 2646 2647 sign = *val; 2648 if (sign == '+' || sign == '-') 2649 val++; 2650 2651 if (roff_evalnum(r, ln, val, NULL, &iv, ROFFNUM_SCALE)) 2652 roff_setreg(r, key, iv, sign); 2653 2654 return ROFF_IGN; 2655 } 2656 2657 static enum rofferr 2658 roff_rr(ROFF_ARGS) 2659 { 2660 struct roffreg *reg, **prev; 2661 char *name, *cp; 2662 size_t namesz; 2663 2664 name = cp = buf->buf + pos; 2665 if (*name == '\0') 2666 return ROFF_IGN; 2667 namesz = roff_getname(r, &cp, ln, pos); 2668 name[namesz] = '\0'; 2669 2670 prev = &r->regtab; 2671 while (1) { 2672 reg = *prev; 2673 if (reg == NULL || !strcmp(name, reg->key.p)) 2674 break; 2675 prev = ®->next; 2676 } 2677 if (reg != NULL) { 2678 *prev = reg->next; 2679 free(reg->key.p); 2680 free(reg); 2681 } 2682 return ROFF_IGN; 2683 } 2684 2685 /* --- handler functions for roff requests -------------------------------- */ 2686 2687 static enum rofferr 2688 roff_rm(ROFF_ARGS) 2689 { 2690 const char *name; 2691 char *cp; 2692 size_t namesz; 2693 2694 cp = buf->buf + pos; 2695 while (*cp != '\0') { 2696 name = cp; 2697 namesz = roff_getname(r, &cp, ln, (int)(cp - buf->buf)); 2698 roff_setstrn(&r->strtab, name, namesz, NULL, 0, 0); 2699 if (name[namesz] == '\\') 2700 break; 2701 } 2702 return ROFF_IGN; 2703 } 2704 2705 static enum rofferr 2706 roff_it(ROFF_ARGS) 2707 { 2708 int iv; 2709 2710 /* Parse the number of lines. */ 2711 2712 if ( ! roff_evalnum(r, ln, buf->buf, &pos, &iv, 0)) { 2713 mandoc_msg(MANDOCERR_IT_NONUM, r->parse, 2714 ln, ppos, buf->buf + 1); 2715 return ROFF_IGN; 2716 } 2717 2718 while (isspace((unsigned char)buf->buf[pos])) 2719 pos++; 2720 2721 /* 2722 * Arm the input line trap. 2723 * Special-casing "an-trap" is an ugly workaround to cope 2724 * with DocBook stupidly fiddling with man(7) internals. 2725 */ 2726 2727 roffit_lines = iv; 2728 roffit_macro = mandoc_strdup(iv != 1 || 2729 strcmp(buf->buf + pos, "an-trap") ? 2730 buf->buf + pos : "br"); 2731 return ROFF_IGN; 2732 } 2733 2734 static enum rofferr 2735 roff_Dd(ROFF_ARGS) 2736 { 2737 const char *const *cp; 2738 2739 if ((r->options & (MPARSE_MDOC | MPARSE_QUICK)) == 0) 2740 for (cp = __mdoc_reserved; *cp; cp++) 2741 roff_setstr(r, *cp, NULL, 0); 2742 2743 if (r->format == 0) 2744 r->format = MPARSE_MDOC; 2745 2746 return ROFF_CONT; 2747 } 2748 2749 static enum rofferr 2750 roff_TH(ROFF_ARGS) 2751 { 2752 const char *const *cp; 2753 2754 if ((r->options & MPARSE_QUICK) == 0) 2755 for (cp = __man_reserved; *cp; cp++) 2756 roff_setstr(r, *cp, NULL, 0); 2757 2758 if (r->format == 0) 2759 r->format = MPARSE_MAN; 2760 2761 return ROFF_CONT; 2762 } 2763 2764 static enum rofferr 2765 roff_TE(ROFF_ARGS) 2766 { 2767 2768 if (NULL == r->tbl) 2769 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 2770 ln, ppos, "TE"); 2771 else if ( ! tbl_end(&r->tbl)) { 2772 free(buf->buf); 2773 buf->buf = mandoc_strdup(".sp"); 2774 buf->sz = 4; 2775 return ROFF_REPARSE; 2776 } 2777 return ROFF_IGN; 2778 } 2779 2780 static enum rofferr 2781 roff_T_(ROFF_ARGS) 2782 { 2783 2784 if (NULL == r->tbl) 2785 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, 2786 ln, ppos, "T&"); 2787 else 2788 tbl_restart(ppos, ln, r->tbl); 2789 2790 return ROFF_IGN; 2791 } 2792 2793 /* 2794 * Handle in-line equation delimiters. 2795 */ 2796 static enum rofferr 2797 roff_eqndelim(struct roff *r, struct buf *buf, int pos) 2798 { 2799 char *cp1, *cp2; 2800 const char *bef_pr, *bef_nl, *mac, *aft_nl, *aft_pr; 2801 2802 /* 2803 * Outside equations, look for an opening delimiter. 2804 * If we are inside an equation, we already know it is 2805 * in-line, or this function wouldn't have been called; 2806 * so look for a closing delimiter. 2807 */ 2808 2809 cp1 = buf->buf + pos; 2810 cp2 = strchr(cp1, r->eqn == NULL ? 2811 r->last_eqn->odelim : r->last_eqn->cdelim); 2812 if (cp2 == NULL) 2813 return ROFF_CONT; 2814 2815 *cp2++ = '\0'; 2816 bef_pr = bef_nl = aft_nl = aft_pr = ""; 2817 2818 /* Handle preceding text, protecting whitespace. */ 2819 2820 if (*buf->buf != '\0') { 2821 if (r->eqn == NULL) 2822 bef_pr = "\\&"; 2823 bef_nl = "\n"; 2824 } 2825 2826 /* 2827 * Prepare replacing the delimiter with an equation macro 2828 * and drop leading white space from the equation. 2829 */ 2830 2831 if (r->eqn == NULL) { 2832 while (*cp2 == ' ') 2833 cp2++; 2834 mac = ".EQ"; 2835 } else 2836 mac = ".EN"; 2837 2838 /* Handle following text, protecting whitespace. */ 2839 2840 if (*cp2 != '\0') { 2841 aft_nl = "\n"; 2842 if (r->eqn != NULL) 2843 aft_pr = "\\&"; 2844 } 2845 2846 /* Do the actual replacement. */ 2847 2848 buf->sz = mandoc_asprintf(&cp1, "%s%s%s%s%s%s%s", buf->buf, 2849 bef_pr, bef_nl, mac, aft_nl, aft_pr, cp2) + 1; 2850 free(buf->buf); 2851 buf->buf = cp1; 2852 2853 /* Toggle the in-line state of the eqn subsystem. */ 2854 2855 r->eqn_inline = r->eqn == NULL; 2856 return ROFF_REPARSE; 2857 } 2858 2859 static enum rofferr 2860 roff_EQ(ROFF_ARGS) 2861 { 2862 struct eqn_node *e; 2863 2864 assert(r->eqn == NULL); 2865 e = eqn_alloc(ppos, ln, r->parse); 2866 2867 if (r->last_eqn) { 2868 r->last_eqn->next = e; 2869 e->delim = r->last_eqn->delim; 2870 e->odelim = r->last_eqn->odelim; 2871 e->cdelim = r->last_eqn->cdelim; 2872 } else 2873 r->first_eqn = r->last_eqn = e; 2874 2875 r->eqn = r->last_eqn = e; 2876 2877 if (buf->buf[pos] != '\0') 2878 mandoc_vmsg(MANDOCERR_ARG_SKIP, r->parse, ln, pos, 2879 ".EQ %s", buf->buf + pos); 2880 2881 return ROFF_IGN; 2882 } 2883 2884 static enum rofferr 2885 roff_EN(ROFF_ARGS) 2886 { 2887 2888 mandoc_msg(MANDOCERR_BLK_NOTOPEN, r->parse, ln, ppos, "EN"); 2889 return ROFF_IGN; 2890 } 2891 2892 static enum rofferr 2893 roff_TS(ROFF_ARGS) 2894 { 2895 struct tbl_node *tbl; 2896 2897 if (r->tbl) { 2898 mandoc_msg(MANDOCERR_BLK_BROKEN, r->parse, 2899 ln, ppos, "TS breaks TS"); 2900 tbl_end(&r->tbl); 2901 } 2902 2903 tbl = tbl_alloc(ppos, ln, r->parse); 2904 2905 if (r->last_tbl) 2906 r->last_tbl->next = tbl; 2907 else 2908 r->first_tbl = r->last_tbl = tbl; 2909 2910 r->tbl = r->last_tbl = tbl; 2911 return ROFF_IGN; 2912 } 2913 2914 static enum rofferr 2915 roff_brp(ROFF_ARGS) 2916 { 2917 2918 buf->buf[pos - 1] = '\0'; 2919 return ROFF_CONT; 2920 } 2921 2922 static enum rofferr 2923 roff_cc(ROFF_ARGS) 2924 { 2925 const char *p; 2926 2927 p = buf->buf + pos; 2928 2929 if (*p == '\0' || (r->control = *p++) == '.') 2930 r->control = 0; 2931 2932 if (*p != '\0') 2933 mandoc_vmsg(MANDOCERR_ARG_EXCESS, r->parse, 2934 ln, p - buf->buf, "cc ... %s", p); 2935 2936 return ROFF_IGN; 2937 } 2938 2939 static enum rofferr 2940 roff_tr(ROFF_ARGS) 2941 { 2942 const char *p, *first, *second; 2943 size_t fsz, ssz; 2944 enum mandoc_esc esc; 2945 2946 p = buf->buf + pos; 2947 2948 if (*p == '\0') { 2949 mandoc_msg(MANDOCERR_REQ_EMPTY, r->parse, ln, ppos, "tr"); 2950 return ROFF_IGN; 2951 } 2952 2953 while (*p != '\0') { 2954 fsz = ssz = 1; 2955 2956 first = p++; 2957 if (*first == '\\') { 2958 esc = mandoc_escape(&p, NULL, NULL); 2959 if (esc == ESCAPE_ERROR) { 2960 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 2961 ln, (int)(p - buf->buf), first); 2962 return ROFF_IGN; 2963 } 2964 fsz = (size_t)(p - first); 2965 } 2966 2967 second = p++; 2968 if (*second == '\\') { 2969 esc = mandoc_escape(&p, NULL, NULL); 2970 if (esc == ESCAPE_ERROR) { 2971 mandoc_msg(MANDOCERR_ESC_BAD, r->parse, 2972 ln, (int)(p - buf->buf), second); 2973 return ROFF_IGN; 2974 } 2975 ssz = (size_t)(p - second); 2976 } else if (*second == '\0') { 2977 mandoc_vmsg(MANDOCERR_TR_ODD, r->parse, 2978 ln, first - buf->buf, "tr %s", first); 2979 second = " "; 2980 p--; 2981 } 2982 2983 if (fsz > 1) { 2984 roff_setstrn(&r->xmbtab, first, fsz, 2985 second, ssz, 0); 2986 continue; 2987 } 2988 2989 if (r->xtab == NULL) 2990 r->xtab = mandoc_calloc(128, 2991 sizeof(struct roffstr)); 2992 2993 free(r->xtab[(int)*first].p); 2994 r->xtab[(int)*first].p = mandoc_strndup(second, ssz); 2995 r->xtab[(int)*first].sz = ssz; 2996 } 2997 2998 return ROFF_IGN; 2999 } 3000 3001 static enum rofferr 3002 roff_so(ROFF_ARGS) 3003 { 3004 char *name, *cp; 3005 3006 name = buf->buf + pos; 3007 mandoc_vmsg(MANDOCERR_SO, r->parse, ln, ppos, "so %s", name); 3008 3009 /* 3010 * Handle `so'. Be EXTREMELY careful, as we shouldn't be 3011 * opening anything that's not in our cwd or anything beneath 3012 * it. Thus, explicitly disallow traversing up the file-system 3013 * or using absolute paths. 3014 */ 3015 3016 if (*name == '/' || strstr(name, "../") || strstr(name, "/..")) { 3017 mandoc_vmsg(MANDOCERR_SO_PATH, r->parse, ln, ppos, 3018 ".so %s", name); 3019 buf->sz = mandoc_asprintf(&cp, 3020 ".sp\nSee the file %s.\n.sp", name) + 1; 3021 free(buf->buf); 3022 buf->buf = cp; 3023 *offs = 0; 3024 return ROFF_REPARSE; 3025 } 3026 3027 *offs = pos; 3028 return ROFF_SO; 3029 } 3030 3031 /* --- user defined strings and macros ------------------------------------ */ 3032 3033 static enum rofferr 3034 roff_userdef(ROFF_ARGS) 3035 { 3036 const char *arg[9], *ap; 3037 char *cp, *n1, *n2; 3038 int i, ib, ie; 3039 size_t asz, rsz; 3040 3041 /* 3042 * Collect pointers to macro argument strings 3043 * and NUL-terminate them. 3044 */ 3045 3046 r->argc = 0; 3047 cp = buf->buf + pos; 3048 for (i = 0; i < 9; i++) { 3049 if (*cp == '\0') 3050 arg[i] = ""; 3051 else { 3052 arg[i] = mandoc_getarg(r->parse, &cp, ln, &pos); 3053 r->argc = i + 1; 3054 } 3055 } 3056 3057 /* 3058 * Expand macro arguments. 3059 */ 3060 3061 buf->sz = strlen(r->current_string) + 1; 3062 n1 = cp = mandoc_malloc(buf->sz); 3063 memcpy(n1, r->current_string, buf->sz); 3064 while (*cp != '\0') { 3065 3066 /* Scan ahead for the next argument invocation. */ 3067 3068 if (*cp++ != '\\') 3069 continue; 3070 if (*cp++ != '$') 3071 continue; 3072 if (*cp == '*') { /* \\$* inserts all arguments */ 3073 ib = 0; 3074 ie = r->argc - 1; 3075 } else { /* \\$1 .. \\$9 insert one argument */ 3076 ib = ie = *cp - '1'; 3077 if (ib < 0 || ib > 8) 3078 continue; 3079 } 3080 cp -= 2; 3081 3082 /* 3083 * Determine the size of the expanded argument, 3084 * taking escaping of quotes into account. 3085 */ 3086 3087 asz = ie > ib ? ie - ib : 0; /* for blanks */ 3088 for (i = ib; i <= ie; i++) { 3089 for (ap = arg[i]; *ap != '\0'; ap++) { 3090 asz++; 3091 if (*ap == '"') 3092 asz += 3; 3093 } 3094 } 3095 if (asz != 3) { 3096 3097 /* 3098 * Determine the size of the rest of the 3099 * unexpanded macro, including the NUL. 3100 */ 3101 3102 rsz = buf->sz - (cp - n1) - 3; 3103 3104 /* 3105 * When shrinking, move before 3106 * releasing the storage. 3107 */ 3108 3109 if (asz < 3) 3110 memmove(cp + asz, cp + 3, rsz); 3111 3112 /* 3113 * Resize the storage for the macro 3114 * and readjust the parse pointer. 3115 */ 3116 3117 buf->sz += asz - 3; 3118 n2 = mandoc_realloc(n1, buf->sz); 3119 cp = n2 + (cp - n1); 3120 n1 = n2; 3121 3122 /* 3123 * When growing, make room 3124 * for the expanded argument. 3125 */ 3126 3127 if (asz > 3) 3128 memmove(cp + asz, cp + 3, rsz); 3129 } 3130 3131 /* Copy the expanded argument, escaping quotes. */ 3132 3133 n2 = cp; 3134 for (i = ib; i <= ie; i++) { 3135 for (ap = arg[i]; *ap != '\0'; ap++) { 3136 if (*ap == '"') { 3137 memcpy(n2, "\\(dq", 4); 3138 n2 += 4; 3139 } else 3140 *n2++ = *ap; 3141 } 3142 if (i < ie) 3143 *n2++ = ' '; 3144 } 3145 } 3146 3147 /* 3148 * Replace the macro invocation 3149 * by the expanded macro. 3150 */ 3151 3152 free(buf->buf); 3153 buf->buf = n1; 3154 *offs = 0; 3155 3156 return buf->sz > 1 && buf->buf[buf->sz - 2] == '\n' ? 3157 ROFF_REPARSE : ROFF_APPEND; 3158 } 3159 3160 static size_t 3161 roff_getname(struct roff *r, char **cpp, int ln, int pos) 3162 { 3163 char *name, *cp; 3164 size_t namesz; 3165 3166 name = *cpp; 3167 if ('\0' == *name) 3168 return 0; 3169 3170 /* Read until end of name and terminate it with NUL. */ 3171 for (cp = name; 1; cp++) { 3172 if ('\0' == *cp || ' ' == *cp) { 3173 namesz = cp - name; 3174 break; 3175 } 3176 if ('\\' != *cp) 3177 continue; 3178 namesz = cp - name; 3179 if ('{' == cp[1] || '}' == cp[1]) 3180 break; 3181 cp++; 3182 if ('\\' == *cp) 3183 continue; 3184 mandoc_vmsg(MANDOCERR_NAMESC, r->parse, ln, pos, 3185 "%.*s", (int)(cp - name + 1), name); 3186 mandoc_escape((const char **)&cp, NULL, NULL); 3187 break; 3188 } 3189 3190 /* Read past spaces. */ 3191 while (' ' == *cp) 3192 cp++; 3193 3194 *cpp = cp; 3195 return namesz; 3196 } 3197 3198 /* 3199 * Store *string into the user-defined string called *name. 3200 * To clear an existing entry, call with (*r, *name, NULL, 0). 3201 * append == 0: replace mode 3202 * append == 1: single-line append mode 3203 * append == 2: multiline append mode, append '\n' after each call 3204 */ 3205 static void 3206 roff_setstr(struct roff *r, const char *name, const char *string, 3207 int append) 3208 { 3209 3210 roff_setstrn(&r->strtab, name, strlen(name), string, 3211 string ? strlen(string) : 0, append); 3212 } 3213 3214 static void 3215 roff_setstrn(struct roffkv **r, const char *name, size_t namesz, 3216 const char *string, size_t stringsz, int append) 3217 { 3218 struct roffkv *n; 3219 char *c; 3220 int i; 3221 size_t oldch, newch; 3222 3223 /* Search for an existing string with the same name. */ 3224 n = *r; 3225 3226 while (n && (namesz != n->key.sz || 3227 strncmp(n->key.p, name, namesz))) 3228 n = n->next; 3229 3230 if (NULL == n) { 3231 /* Create a new string table entry. */ 3232 n = mandoc_malloc(sizeof(struct roffkv)); 3233 n->key.p = mandoc_strndup(name, namesz); 3234 n->key.sz = namesz; 3235 n->val.p = NULL; 3236 n->val.sz = 0; 3237 n->next = *r; 3238 *r = n; 3239 } else if (0 == append) { 3240 free(n->val.p); 3241 n->val.p = NULL; 3242 n->val.sz = 0; 3243 } 3244 3245 if (NULL == string) 3246 return; 3247 3248 /* 3249 * One additional byte for the '\n' in multiline mode, 3250 * and one for the terminating '\0'. 3251 */ 3252 newch = stringsz + (1 < append ? 2u : 1u); 3253 3254 if (NULL == n->val.p) { 3255 n->val.p = mandoc_malloc(newch); 3256 *n->val.p = '\0'; 3257 oldch = 0; 3258 } else { 3259 oldch = n->val.sz; 3260 n->val.p = mandoc_realloc(n->val.p, oldch + newch); 3261 } 3262 3263 /* Skip existing content in the destination buffer. */ 3264 c = n->val.p + (int)oldch; 3265 3266 /* Append new content to the destination buffer. */ 3267 i = 0; 3268 while (i < (int)stringsz) { 3269 /* 3270 * Rudimentary roff copy mode: 3271 * Handle escaped backslashes. 3272 */ 3273 if ('\\' == string[i] && '\\' == string[i + 1]) 3274 i++; 3275 *c++ = string[i++]; 3276 } 3277 3278 /* Append terminating bytes. */ 3279 if (1 < append) 3280 *c++ = '\n'; 3281 3282 *c = '\0'; 3283 n->val.sz = (int)(c - n->val.p); 3284 } 3285 3286 static const char * 3287 roff_getstrn(const struct roff *r, const char *name, size_t len) 3288 { 3289 const struct roffkv *n; 3290 int i; 3291 3292 for (n = r->strtab; n; n = n->next) 3293 if (0 == strncmp(name, n->key.p, len) && 3294 '\0' == n->key.p[(int)len]) 3295 return n->val.p; 3296 3297 for (i = 0; i < PREDEFS_MAX; i++) 3298 if (0 == strncmp(name, predefs[i].name, len) && 3299 '\0' == predefs[i].name[(int)len]) 3300 return predefs[i].str; 3301 3302 return NULL; 3303 } 3304 3305 static void 3306 roff_freestr(struct roffkv *r) 3307 { 3308 struct roffkv *n, *nn; 3309 3310 for (n = r; n; n = nn) { 3311 free(n->key.p); 3312 free(n->val.p); 3313 nn = n->next; 3314 free(n); 3315 } 3316 } 3317 3318 /* --- accessors and utility functions ------------------------------------ */ 3319 3320 const struct tbl_span * 3321 roff_span(const struct roff *r) 3322 { 3323 3324 return r->tbl ? tbl_span(r->tbl) : NULL; 3325 } 3326 3327 const struct eqn * 3328 roff_eqn(const struct roff *r) 3329 { 3330 3331 return r->last_eqn ? &r->last_eqn->eqn : NULL; 3332 } 3333 3334 /* 3335 * Duplicate an input string, making the appropriate character 3336 * conversations (as stipulated by `tr') along the way. 3337 * Returns a heap-allocated string with all the replacements made. 3338 */ 3339 char * 3340 roff_strdup(const struct roff *r, const char *p) 3341 { 3342 const struct roffkv *cp; 3343 char *res; 3344 const char *pp; 3345 size_t ssz, sz; 3346 enum mandoc_esc esc; 3347 3348 if (NULL == r->xmbtab && NULL == r->xtab) 3349 return mandoc_strdup(p); 3350 else if ('\0' == *p) 3351 return mandoc_strdup(""); 3352 3353 /* 3354 * Step through each character looking for term matches 3355 * (remember that a `tr' can be invoked with an escape, which is 3356 * a glyph but the escape is multi-character). 3357 * We only do this if the character hash has been initialised 3358 * and the string is >0 length. 3359 */ 3360 3361 res = NULL; 3362 ssz = 0; 3363 3364 while ('\0' != *p) { 3365 if ('\\' != *p && r->xtab && r->xtab[(int)*p].p) { 3366 sz = r->xtab[(int)*p].sz; 3367 res = mandoc_realloc(res, ssz + sz + 1); 3368 memcpy(res + ssz, r->xtab[(int)*p].p, sz); 3369 ssz += sz; 3370 p++; 3371 continue; 3372 } else if ('\\' != *p) { 3373 res = mandoc_realloc(res, ssz + 2); 3374 res[ssz++] = *p++; 3375 continue; 3376 } 3377 3378 /* Search for term matches. */ 3379 for (cp = r->xmbtab; cp; cp = cp->next) 3380 if (0 == strncmp(p, cp->key.p, cp->key.sz)) 3381 break; 3382 3383 if (NULL != cp) { 3384 /* 3385 * A match has been found. 3386 * Append the match to the array and move 3387 * forward by its keysize. 3388 */ 3389 res = mandoc_realloc(res, 3390 ssz + cp->val.sz + 1); 3391 memcpy(res + ssz, cp->val.p, cp->val.sz); 3392 ssz += cp->val.sz; 3393 p += (int)cp->key.sz; 3394 continue; 3395 } 3396 3397 /* 3398 * Handle escapes carefully: we need to copy 3399 * over just the escape itself, or else we might 3400 * do replacements within the escape itself. 3401 * Make sure to pass along the bogus string. 3402 */ 3403 pp = p++; 3404 esc = mandoc_escape(&p, NULL, NULL); 3405 if (ESCAPE_ERROR == esc) { 3406 sz = strlen(pp); 3407 res = mandoc_realloc(res, ssz + sz + 1); 3408 memcpy(res + ssz, pp, sz); 3409 break; 3410 } 3411 /* 3412 * We bail out on bad escapes. 3413 * No need to warn: we already did so when 3414 * roff_res() was called. 3415 */ 3416 sz = (int)(p - pp); 3417 res = mandoc_realloc(res, ssz + sz + 1); 3418 memcpy(res + ssz, pp, sz); 3419 ssz += sz; 3420 } 3421 3422 res[(int)ssz] = '\0'; 3423 return res; 3424 } 3425 3426 int 3427 roff_getformat(const struct roff *r) 3428 { 3429 3430 return r->format; 3431 } 3432 3433 /* 3434 * Find out whether a line is a macro line or not. 3435 * If it is, adjust the current position and return one; if it isn't, 3436 * return zero and don't change the current position. 3437 * If the control character has been set with `.cc', then let that grain 3438 * precedence. 3439 * This is slighly contrary to groff, where using the non-breaking 3440 * control character when `cc' has been invoked will cause the 3441 * non-breaking macro contents to be printed verbatim. 3442 */ 3443 int 3444 roff_getcontrol(const struct roff *r, const char *cp, int *ppos) 3445 { 3446 int pos; 3447 3448 pos = *ppos; 3449 3450 if (0 != r->control && cp[pos] == r->control) 3451 pos++; 3452 else if (0 != r->control) 3453 return 0; 3454 else if ('\\' == cp[pos] && '.' == cp[pos + 1]) 3455 pos += 2; 3456 else if ('.' == cp[pos] || '\'' == cp[pos]) 3457 pos++; 3458 else 3459 return 0; 3460 3461 while (' ' == cp[pos] || '\t' == cp[pos]) 3462 pos++; 3463 3464 *ppos = pos; 3465 return 1; 3466 } 3467