1 /* $OpenBSD: eqn.c,v 1.47 2020/01/08 12:09:14 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "mandoc_aux.h" 29 #include "mandoc.h" 30 #include "roff.h" 31 #include "eqn.h" 32 #include "libmandoc.h" 33 #include "eqn_parse.h" 34 35 #define EQN_NEST_MAX 128 /* maximum nesting of defines */ 36 #define STRNEQ(p1, sz1, p2, sz2) \ 37 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) 38 39 enum eqn_tok { 40 EQN_TOK_DYAD = 0, 41 EQN_TOK_VEC, 42 EQN_TOK_UNDER, 43 EQN_TOK_BAR, 44 EQN_TOK_TILDE, 45 EQN_TOK_HAT, 46 EQN_TOK_DOT, 47 EQN_TOK_DOTDOT, 48 EQN_TOK_FWD, 49 EQN_TOK_BACK, 50 EQN_TOK_DOWN, 51 EQN_TOK_UP, 52 EQN_TOK_FAT, 53 EQN_TOK_ROMAN, 54 EQN_TOK_ITALIC, 55 EQN_TOK_BOLD, 56 EQN_TOK_SIZE, 57 EQN_TOK_SUB, 58 EQN_TOK_SUP, 59 EQN_TOK_SQRT, 60 EQN_TOK_OVER, 61 EQN_TOK_FROM, 62 EQN_TOK_TO, 63 EQN_TOK_BRACE_OPEN, 64 EQN_TOK_BRACE_CLOSE, 65 EQN_TOK_GSIZE, 66 EQN_TOK_GFONT, 67 EQN_TOK_MARK, 68 EQN_TOK_LINEUP, 69 EQN_TOK_LEFT, 70 EQN_TOK_RIGHT, 71 EQN_TOK_PILE, 72 EQN_TOK_LPILE, 73 EQN_TOK_RPILE, 74 EQN_TOK_CPILE, 75 EQN_TOK_MATRIX, 76 EQN_TOK_CCOL, 77 EQN_TOK_LCOL, 78 EQN_TOK_RCOL, 79 EQN_TOK_DELIM, 80 EQN_TOK_DEFINE, 81 EQN_TOK_TDEFINE, 82 EQN_TOK_NDEFINE, 83 EQN_TOK_UNDEF, 84 EQN_TOK_ABOVE, 85 EQN_TOK__MAX, 86 EQN_TOK_FUNC, 87 EQN_TOK_QUOTED, 88 EQN_TOK_SYM, 89 EQN_TOK_EOF 90 }; 91 92 static const char *eqn_toks[EQN_TOK__MAX] = { 93 "dyad", /* EQN_TOK_DYAD */ 94 "vec", /* EQN_TOK_VEC */ 95 "under", /* EQN_TOK_UNDER */ 96 "bar", /* EQN_TOK_BAR */ 97 "tilde", /* EQN_TOK_TILDE */ 98 "hat", /* EQN_TOK_HAT */ 99 "dot", /* EQN_TOK_DOT */ 100 "dotdot", /* EQN_TOK_DOTDOT */ 101 "fwd", /* EQN_TOK_FWD * */ 102 "back", /* EQN_TOK_BACK */ 103 "down", /* EQN_TOK_DOWN */ 104 "up", /* EQN_TOK_UP */ 105 "fat", /* EQN_TOK_FAT */ 106 "roman", /* EQN_TOK_ROMAN */ 107 "italic", /* EQN_TOK_ITALIC */ 108 "bold", /* EQN_TOK_BOLD */ 109 "size", /* EQN_TOK_SIZE */ 110 "sub", /* EQN_TOK_SUB */ 111 "sup", /* EQN_TOK_SUP */ 112 "sqrt", /* EQN_TOK_SQRT */ 113 "over", /* EQN_TOK_OVER */ 114 "from", /* EQN_TOK_FROM */ 115 "to", /* EQN_TOK_TO */ 116 "{", /* EQN_TOK_BRACE_OPEN */ 117 "}", /* EQN_TOK_BRACE_CLOSE */ 118 "gsize", /* EQN_TOK_GSIZE */ 119 "gfont", /* EQN_TOK_GFONT */ 120 "mark", /* EQN_TOK_MARK */ 121 "lineup", /* EQN_TOK_LINEUP */ 122 "left", /* EQN_TOK_LEFT */ 123 "right", /* EQN_TOK_RIGHT */ 124 "pile", /* EQN_TOK_PILE */ 125 "lpile", /* EQN_TOK_LPILE */ 126 "rpile", /* EQN_TOK_RPILE */ 127 "cpile", /* EQN_TOK_CPILE */ 128 "matrix", /* EQN_TOK_MATRIX */ 129 "ccol", /* EQN_TOK_CCOL */ 130 "lcol", /* EQN_TOK_LCOL */ 131 "rcol", /* EQN_TOK_RCOL */ 132 "delim", /* EQN_TOK_DELIM */ 133 "define", /* EQN_TOK_DEFINE */ 134 "tdefine", /* EQN_TOK_TDEFINE */ 135 "ndefine", /* EQN_TOK_NDEFINE */ 136 "undef", /* EQN_TOK_UNDEF */ 137 "above", /* EQN_TOK_ABOVE */ 138 }; 139 140 static const char *const eqn_func[] = { 141 "acos", "acsc", "and", "arc", "asec", "asin", "atan", 142 "cos", "cosh", "coth", "csc", "det", "exp", "for", 143 "if", "lim", "ln", "log", "max", "min", 144 "sec", "sin", "sinh", "tan", "tanh", "Im", "Re", 145 }; 146 147 enum eqn_symt { 148 EQNSYM_alpha = 0, 149 EQNSYM_beta, 150 EQNSYM_chi, 151 EQNSYM_delta, 152 EQNSYM_epsilon, 153 EQNSYM_eta, 154 EQNSYM_gamma, 155 EQNSYM_iota, 156 EQNSYM_kappa, 157 EQNSYM_lambda, 158 EQNSYM_mu, 159 EQNSYM_nu, 160 EQNSYM_omega, 161 EQNSYM_omicron, 162 EQNSYM_phi, 163 EQNSYM_pi, 164 EQNSYM_ps, 165 EQNSYM_rho, 166 EQNSYM_sigma, 167 EQNSYM_tau, 168 EQNSYM_theta, 169 EQNSYM_upsilon, 170 EQNSYM_xi, 171 EQNSYM_zeta, 172 EQNSYM_DELTA, 173 EQNSYM_GAMMA, 174 EQNSYM_LAMBDA, 175 EQNSYM_OMEGA, 176 EQNSYM_PHI, 177 EQNSYM_PI, 178 EQNSYM_PSI, 179 EQNSYM_SIGMA, 180 EQNSYM_THETA, 181 EQNSYM_UPSILON, 182 EQNSYM_XI, 183 EQNSYM_inter, 184 EQNSYM_union, 185 EQNSYM_prod, 186 EQNSYM_int, 187 EQNSYM_sum, 188 EQNSYM_grad, 189 EQNSYM_del, 190 EQNSYM_times, 191 EQNSYM_cdot, 192 EQNSYM_nothing, 193 EQNSYM_approx, 194 EQNSYM_prime, 195 EQNSYM_half, 196 EQNSYM_partial, 197 EQNSYM_inf, 198 EQNSYM_muchgreat, 199 EQNSYM_muchless, 200 EQNSYM_larrow, 201 EQNSYM_rarrow, 202 EQNSYM_pm, 203 EQNSYM_nequal, 204 EQNSYM_equiv, 205 EQNSYM_lessequal, 206 EQNSYM_moreequal, 207 EQNSYM_minus, 208 EQNSYM__MAX 209 }; 210 211 struct eqnsym { 212 const char *str; 213 const char *sym; 214 }; 215 216 static const struct eqnsym eqnsyms[EQNSYM__MAX] = { 217 { "alpha", "*a" }, /* EQNSYM_alpha */ 218 { "beta", "*b" }, /* EQNSYM_beta */ 219 { "chi", "*x" }, /* EQNSYM_chi */ 220 { "delta", "*d" }, /* EQNSYM_delta */ 221 { "epsilon", "*e" }, /* EQNSYM_epsilon */ 222 { "eta", "*y" }, /* EQNSYM_eta */ 223 { "gamma", "*g" }, /* EQNSYM_gamma */ 224 { "iota", "*i" }, /* EQNSYM_iota */ 225 { "kappa", "*k" }, /* EQNSYM_kappa */ 226 { "lambda", "*l" }, /* EQNSYM_lambda */ 227 { "mu", "*m" }, /* EQNSYM_mu */ 228 { "nu", "*n" }, /* EQNSYM_nu */ 229 { "omega", "*w" }, /* EQNSYM_omega */ 230 { "omicron", "*o" }, /* EQNSYM_omicron */ 231 { "phi", "*f" }, /* EQNSYM_phi */ 232 { "pi", "*p" }, /* EQNSYM_pi */ 233 { "psi", "*q" }, /* EQNSYM_psi */ 234 { "rho", "*r" }, /* EQNSYM_rho */ 235 { "sigma", "*s" }, /* EQNSYM_sigma */ 236 { "tau", "*t" }, /* EQNSYM_tau */ 237 { "theta", "*h" }, /* EQNSYM_theta */ 238 { "upsilon", "*u" }, /* EQNSYM_upsilon */ 239 { "xi", "*c" }, /* EQNSYM_xi */ 240 { "zeta", "*z" }, /* EQNSYM_zeta */ 241 { "DELTA", "*D" }, /* EQNSYM_DELTA */ 242 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ 243 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ 244 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ 245 { "PHI", "*F" }, /* EQNSYM_PHI */ 246 { "PI", "*P" }, /* EQNSYM_PI */ 247 { "PSI", "*Q" }, /* EQNSYM_PSI */ 248 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ 249 { "THETA", "*H" }, /* EQNSYM_THETA */ 250 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ 251 { "XI", "*C" }, /* EQNSYM_XI */ 252 { "inter", "ca" }, /* EQNSYM_inter */ 253 { "union", "cu" }, /* EQNSYM_union */ 254 { "prod", "product" }, /* EQNSYM_prod */ 255 { "int", "integral" }, /* EQNSYM_int */ 256 { "sum", "sum" }, /* EQNSYM_sum */ 257 { "grad", "gr" }, /* EQNSYM_grad */ 258 { "del", "gr" }, /* EQNSYM_del */ 259 { "times", "mu" }, /* EQNSYM_times */ 260 { "cdot", "pc" }, /* EQNSYM_cdot */ 261 { "nothing", "&" }, /* EQNSYM_nothing */ 262 { "approx", "~~" }, /* EQNSYM_approx */ 263 { "prime", "fm" }, /* EQNSYM_prime */ 264 { "half", "12" }, /* EQNSYM_half */ 265 { "partial", "pd" }, /* EQNSYM_partial */ 266 { "inf", "if" }, /* EQNSYM_inf */ 267 { ">>", ">>" }, /* EQNSYM_muchgreat */ 268 { "<<", "<<" }, /* EQNSYM_muchless */ 269 { "<-", "<-" }, /* EQNSYM_larrow */ 270 { "->", "->" }, /* EQNSYM_rarrow */ 271 { "+-", "+-" }, /* EQNSYM_pm */ 272 { "!=", "!=" }, /* EQNSYM_nequal */ 273 { "==", "==" }, /* EQNSYM_equiv */ 274 { "<=", "<=" }, /* EQNSYM_lessequal */ 275 { ">=", ">=" }, /* EQNSYM_moreequal */ 276 { "-", "mi" }, /* EQNSYM_minus */ 277 }; 278 279 enum parse_mode { 280 MODE_QUOTED, 281 MODE_NOSUB, 282 MODE_SUB, 283 MODE_TOK 284 }; 285 286 struct eqn_def { 287 char *key; 288 size_t keysz; 289 char *val; 290 size_t valsz; 291 }; 292 293 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); 294 static struct eqn_box *eqn_box_makebinary(struct eqn_node *, 295 struct eqn_box *); 296 static void eqn_def(struct eqn_node *); 297 static struct eqn_def *eqn_def_find(struct eqn_node *); 298 static void eqn_delim(struct eqn_node *); 299 static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode); 300 static void eqn_undef(struct eqn_node *); 301 302 303 struct eqn_node * 304 eqn_alloc(void) 305 { 306 struct eqn_node *ep; 307 308 ep = mandoc_calloc(1, sizeof(*ep)); 309 ep->gsize = EQN_DEFSIZE; 310 return ep; 311 } 312 313 void 314 eqn_reset(struct eqn_node *ep) 315 { 316 free(ep->data); 317 ep->data = ep->start = ep->end = NULL; 318 ep->sz = ep->toksz = 0; 319 } 320 321 void 322 eqn_read(struct eqn_node *ep, const char *p) 323 { 324 char *cp; 325 326 if (ep->data == NULL) { 327 ep->sz = strlen(p); 328 ep->data = mandoc_strdup(p); 329 } else { 330 ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p); 331 free(ep->data); 332 ep->data = cp; 333 } 334 ep->sz += 1; 335 } 336 337 /* 338 * Find the key "key" of the give size within our eqn-defined values. 339 */ 340 static struct eqn_def * 341 eqn_def_find(struct eqn_node *ep) 342 { 343 int i; 344 345 for (i = 0; i < (int)ep->defsz; i++) 346 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, 347 ep->defs[i].keysz, ep->start, ep->toksz)) 348 return &ep->defs[i]; 349 350 return NULL; 351 } 352 353 /* 354 * Parse a token from the input text. The modes are: 355 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends 356 * before its next occurence. Do not interpret the token in any 357 * way and return EQN_TOK_QUOTED. All other modes behave like 358 * MODE_QUOTED when *ep->start is '"'. 359 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it; 360 * otherwise, it ends before the next whitespace or brace. 361 * Do not interpret the token and return EQN_TOK__MAX. 362 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an 363 * alias created with define. If it is an alias, replace it with 364 * its string value and reparse. 365 * MODE_TOK: Like MODE_SUB, but also check the token against the list 366 * of tokens, and if there is a match, return that token. Otherwise, 367 * if the token matches a symbol, return EQN_TOK_SYM; if it matches 368 * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for 369 * a token match, *ep->start is set to an allocated string that the 370 * caller is expected to free. 371 * All modes skip whitespace following the end of the token. 372 */ 373 static enum eqn_tok 374 eqn_next(struct eqn_node *ep, enum parse_mode mode) 375 { 376 static int last_len, lim; 377 378 struct eqn_def *def; 379 size_t start; 380 int diff, i, quoted; 381 enum eqn_tok tok; 382 383 /* 384 * Reset the recursion counter after advancing 385 * beyond the end of the previous substitution. 386 */ 387 if (ep->end - ep->data >= last_len) 388 lim = 0; 389 390 ep->start = ep->end; 391 quoted = mode == MODE_QUOTED; 392 for (;;) { 393 switch (*ep->start) { 394 case '\0': 395 ep->toksz = 0; 396 return EQN_TOK_EOF; 397 case '"': 398 quoted = 1; 399 break; 400 case ' ': 401 case '\t': 402 case '~': 403 case '^': 404 if (quoted) 405 break; 406 ep->start++; 407 continue; 408 default: 409 break; 410 } 411 if (quoted) { 412 ep->end = strchr(ep->start + 1, *ep->start); 413 ep->start++; /* Skip opening quote. */ 414 if (ep->end == NULL) { 415 mandoc_msg(MANDOCERR_ARG_QUOTE, 416 ep->node->line, ep->node->pos, NULL); 417 ep->end = strchr(ep->start, '\0'); 418 } 419 } else { 420 ep->end = ep->start + 1; 421 if (*ep->start != '{' && *ep->start != '}') 422 ep->end += strcspn(ep->end, " ^~\"{}\t"); 423 } 424 ep->toksz = ep->end - ep->start; 425 if (quoted && *ep->end != '\0') 426 ep->end++; /* Skip closing quote. */ 427 while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL) 428 ep->end++; 429 if (quoted) /* Cannot return, may have to strndup. */ 430 break; 431 if (mode == MODE_NOSUB) 432 return EQN_TOK__MAX; 433 if ((def = eqn_def_find(ep)) == NULL) 434 break; 435 if (++lim > EQN_NEST_MAX) { 436 mandoc_msg(MANDOCERR_ROFFLOOP, 437 ep->node->line, ep->node->pos, NULL); 438 return EQN_TOK_EOF; 439 } 440 441 /* Replace a defined name with its string value. */ 442 if ((diff = def->valsz - ep->toksz) > 0) { 443 start = ep->start - ep->data; 444 ep->sz += diff; 445 ep->data = mandoc_realloc(ep->data, ep->sz + 1); 446 ep->start = ep->data + start; 447 } 448 if (diff) 449 memmove(ep->start + def->valsz, ep->start + ep->toksz, 450 strlen(ep->start + ep->toksz) + 1); 451 memcpy(ep->start, def->val, def->valsz); 452 last_len = ep->start - ep->data + def->valsz; 453 } 454 if (mode != MODE_TOK) 455 return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX; 456 if (quoted) { 457 ep->start = mandoc_strndup(ep->start, ep->toksz); 458 return EQN_TOK_QUOTED; 459 } 460 for (tok = 0; tok < EQN_TOK__MAX; tok++) 461 if (STRNEQ(ep->start, ep->toksz, 462 eqn_toks[tok], strlen(eqn_toks[tok]))) 463 return tok; 464 465 for (i = 0; i < EQNSYM__MAX; i++) { 466 if (STRNEQ(ep->start, ep->toksz, 467 eqnsyms[i].str, strlen(eqnsyms[i].str))) { 468 mandoc_asprintf(&ep->start, 469 "\\[%s]", eqnsyms[i].sym); 470 return EQN_TOK_SYM; 471 } 472 } 473 ep->start = mandoc_strndup(ep->start, ep->toksz); 474 for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++) 475 if (STRNEQ(ep->start, ep->toksz, 476 eqn_func[i], strlen(eqn_func[i]))) 477 return EQN_TOK_FUNC; 478 return EQN_TOK__MAX; 479 } 480 481 void 482 eqn_box_free(struct eqn_box *bp) 483 { 484 if (bp == NULL) 485 return; 486 487 if (bp->first) 488 eqn_box_free(bp->first); 489 if (bp->next) 490 eqn_box_free(bp->next); 491 492 free(bp->text); 493 free(bp->left); 494 free(bp->right); 495 free(bp->top); 496 free(bp->bottom); 497 free(bp); 498 } 499 500 struct eqn_box * 501 eqn_box_new(void) 502 { 503 struct eqn_box *bp; 504 505 bp = mandoc_calloc(1, sizeof(*bp)); 506 bp->expectargs = UINT_MAX; 507 return bp; 508 } 509 510 /* 511 * Allocate a box as the last child of the parent node. 512 */ 513 static struct eqn_box * 514 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) 515 { 516 struct eqn_box *bp; 517 518 bp = eqn_box_new(); 519 bp->parent = parent; 520 bp->parent->args++; 521 bp->font = bp->parent->font; 522 bp->size = ep->gsize; 523 524 if (NULL != parent->first) { 525 parent->last->next = bp; 526 bp->prev = parent->last; 527 } else 528 parent->first = bp; 529 530 parent->last = bp; 531 return bp; 532 } 533 534 /* 535 * Reparent the current last node (of the current parent) under a new 536 * EQN_SUBEXPR as the first element. 537 * Then return the new parent. 538 * The new EQN_SUBEXPR will have a two-child limit. 539 */ 540 static struct eqn_box * 541 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent) 542 { 543 struct eqn_box *b, *newb; 544 545 assert(NULL != parent->last); 546 b = parent->last; 547 if (parent->last == parent->first) 548 parent->first = NULL; 549 parent->args--; 550 parent->last = b->prev; 551 b->prev = NULL; 552 newb = eqn_box_alloc(ep, parent); 553 newb->type = EQN_SUBEXPR; 554 newb->expectargs = 2; 555 newb->args = 1; 556 newb->first = newb->last = b; 557 newb->first->next = NULL; 558 b->parent = newb; 559 return newb; 560 } 561 562 /* 563 * Parse the "delim" control statement. 564 */ 565 static void 566 eqn_delim(struct eqn_node *ep) 567 { 568 if (ep->end[0] == '\0' || ep->end[1] == '\0') { 569 mandoc_msg(MANDOCERR_REQ_EMPTY, 570 ep->node->line, ep->node->pos, "delim"); 571 if (ep->end[0] != '\0') 572 ep->end++; 573 } else if (strncmp(ep->end, "off", 3) == 0) { 574 ep->delim = 0; 575 ep->end += 3; 576 } else if (strncmp(ep->end, "on", 2) == 0) { 577 if (ep->odelim && ep->cdelim) 578 ep->delim = 1; 579 ep->end += 2; 580 } else { 581 ep->odelim = *ep->end++; 582 ep->cdelim = *ep->end++; 583 ep->delim = 1; 584 } 585 } 586 587 /* 588 * Undefine a previously-defined string. 589 */ 590 static void 591 eqn_undef(struct eqn_node *ep) 592 { 593 struct eqn_def *def; 594 595 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 596 mandoc_msg(MANDOCERR_REQ_EMPTY, 597 ep->node->line, ep->node->pos, "undef"); 598 return; 599 } 600 if ((def = eqn_def_find(ep)) == NULL) 601 return; 602 free(def->key); 603 free(def->val); 604 def->key = def->val = NULL; 605 def->keysz = def->valsz = 0; 606 } 607 608 static void 609 eqn_def(struct eqn_node *ep) 610 { 611 struct eqn_def *def; 612 int i; 613 614 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 615 mandoc_msg(MANDOCERR_REQ_EMPTY, 616 ep->node->line, ep->node->pos, "define"); 617 return; 618 } 619 620 /* 621 * Search for a key that already exists. 622 * Create a new key if none is found. 623 */ 624 if ((def = eqn_def_find(ep)) == NULL) { 625 /* Find holes in string array. */ 626 for (i = 0; i < (int)ep->defsz; i++) 627 if (0 == ep->defs[i].keysz) 628 break; 629 630 if (i == (int)ep->defsz) { 631 ep->defsz++; 632 ep->defs = mandoc_reallocarray(ep->defs, 633 ep->defsz, sizeof(struct eqn_def)); 634 ep->defs[i].key = ep->defs[i].val = NULL; 635 } 636 637 def = ep->defs + i; 638 free(def->key); 639 def->key = mandoc_strndup(ep->start, ep->toksz); 640 def->keysz = ep->toksz; 641 } 642 643 if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) { 644 mandoc_msg(MANDOCERR_REQ_EMPTY, 645 ep->node->line, ep->node->pos, "define %s", def->key); 646 free(def->key); 647 free(def->val); 648 def->key = def->val = NULL; 649 def->keysz = def->valsz = 0; 650 return; 651 } 652 free(def->val); 653 def->val = mandoc_strndup(ep->start, ep->toksz); 654 def->valsz = ep->toksz; 655 } 656 657 void 658 eqn_parse(struct eqn_node *ep) 659 { 660 struct eqn_box *cur, *nbox, *parent, *split; 661 const char *cp, *cpn; 662 char *p; 663 enum eqn_tok tok; 664 enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln; 665 int size; 666 667 parent = ep->node->eqn; 668 assert(parent != NULL); 669 670 /* 671 * Empty equation. 672 * Do not add it to the high-level syntax tree. 673 */ 674 675 if (ep->data == NULL) 676 return; 677 678 ep->start = ep->end = ep->data; 679 680 next_tok: 681 tok = eqn_next(ep, MODE_TOK); 682 switch (tok) { 683 case EQN_TOK_UNDEF: 684 eqn_undef(ep); 685 break; 686 case EQN_TOK_NDEFINE: 687 case EQN_TOK_DEFINE: 688 eqn_def(ep); 689 break; 690 case EQN_TOK_TDEFINE: 691 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF || 692 eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) 693 mandoc_msg(MANDOCERR_REQ_EMPTY, 694 ep->node->line, ep->node->pos, "tdefine"); 695 break; 696 case EQN_TOK_DELIM: 697 eqn_delim(ep); 698 break; 699 case EQN_TOK_GFONT: 700 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 701 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 702 ep->node->pos, "%s", eqn_toks[tok]); 703 break; 704 case EQN_TOK_MARK: 705 case EQN_TOK_LINEUP: 706 /* Ignore these. */ 707 break; 708 case EQN_TOK_DYAD: 709 case EQN_TOK_VEC: 710 case EQN_TOK_UNDER: 711 case EQN_TOK_BAR: 712 case EQN_TOK_TILDE: 713 case EQN_TOK_HAT: 714 case EQN_TOK_DOT: 715 case EQN_TOK_DOTDOT: 716 if (parent->last == NULL) { 717 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 718 ep->node->pos, "%s", eqn_toks[tok]); 719 cur = eqn_box_alloc(ep, parent); 720 cur->type = EQN_TEXT; 721 cur->text = mandoc_strdup(""); 722 } 723 parent = eqn_box_makebinary(ep, parent); 724 parent->type = EQN_LIST; 725 parent->expectargs = 1; 726 parent->font = EQNFONT_ROMAN; 727 switch (tok) { 728 case EQN_TOK_DOTDOT: 729 parent->top = mandoc_strdup("\\[ad]"); 730 break; 731 case EQN_TOK_VEC: 732 parent->top = mandoc_strdup("\\[->]"); 733 break; 734 case EQN_TOK_DYAD: 735 parent->top = mandoc_strdup("\\[<>]"); 736 break; 737 case EQN_TOK_TILDE: 738 parent->top = mandoc_strdup("\\[a~]"); 739 break; 740 case EQN_TOK_UNDER: 741 parent->bottom = mandoc_strdup("\\[ul]"); 742 break; 743 case EQN_TOK_BAR: 744 parent->top = mandoc_strdup("\\[rn]"); 745 break; 746 case EQN_TOK_DOT: 747 parent->top = mandoc_strdup("\\[a.]"); 748 break; 749 case EQN_TOK_HAT: 750 parent->top = mandoc_strdup("\\[ha]"); 751 break; 752 default: 753 abort(); 754 } 755 parent = parent->parent; 756 break; 757 case EQN_TOK_FWD: 758 case EQN_TOK_BACK: 759 case EQN_TOK_DOWN: 760 case EQN_TOK_UP: 761 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 762 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 763 ep->node->pos, "%s", eqn_toks[tok]); 764 break; 765 case EQN_TOK_FAT: 766 case EQN_TOK_ROMAN: 767 case EQN_TOK_ITALIC: 768 case EQN_TOK_BOLD: 769 while (parent->args == parent->expectargs) 770 parent = parent->parent; 771 /* 772 * These values apply to the next word or sequence of 773 * words; thus, we mark that we'll have a child with 774 * exactly one of those. 775 */ 776 parent = eqn_box_alloc(ep, parent); 777 parent->type = EQN_LIST; 778 parent->expectargs = 1; 779 switch (tok) { 780 case EQN_TOK_FAT: 781 parent->font = EQNFONT_FAT; 782 break; 783 case EQN_TOK_ROMAN: 784 parent->font = EQNFONT_ROMAN; 785 break; 786 case EQN_TOK_ITALIC: 787 parent->font = EQNFONT_ITALIC; 788 break; 789 case EQN_TOK_BOLD: 790 parent->font = EQNFONT_BOLD; 791 break; 792 default: 793 abort(); 794 } 795 break; 796 case EQN_TOK_SIZE: 797 case EQN_TOK_GSIZE: 798 /* Accept two values: integral size and a single. */ 799 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 800 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 801 ep->node->pos, "%s", eqn_toks[tok]); 802 break; 803 } 804 size = mandoc_strntoi(ep->start, ep->toksz, 10); 805 if (-1 == size) { 806 mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line, 807 ep->node->pos, "%s", eqn_toks[tok]); 808 break; 809 } 810 if (EQN_TOK_GSIZE == tok) { 811 ep->gsize = size; 812 break; 813 } 814 while (parent->args == parent->expectargs) 815 parent = parent->parent; 816 parent = eqn_box_alloc(ep, parent); 817 parent->type = EQN_LIST; 818 parent->expectargs = 1; 819 parent->size = size; 820 break; 821 case EQN_TOK_FROM: 822 case EQN_TOK_TO: 823 case EQN_TOK_SUB: 824 case EQN_TOK_SUP: 825 /* 826 * We have a left-right-associative expression. 827 * Repivot under a positional node, open a child scope 828 * and keep on reading. 829 */ 830 if (parent->last == NULL) { 831 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 832 ep->node->pos, "%s", eqn_toks[tok]); 833 cur = eqn_box_alloc(ep, parent); 834 cur->type = EQN_TEXT; 835 cur->text = mandoc_strdup(""); 836 } 837 while (parent->expectargs == 1 && parent->args == 1) 838 parent = parent->parent; 839 if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO) { 840 for (cur = parent; cur != NULL; cur = cur->parent) 841 if (cur->pos == EQNPOS_SUB || 842 cur->pos == EQNPOS_SUP || 843 cur->pos == EQNPOS_SUBSUP || 844 cur->pos == EQNPOS_SQRT || 845 cur->pos == EQNPOS_OVER) 846 break; 847 if (cur != NULL) 848 parent = cur->parent; 849 } 850 if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) { 851 parent->expectargs = 3; 852 parent->pos = EQNPOS_SUBSUP; 853 break; 854 } 855 if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) { 856 parent->expectargs = 3; 857 parent->pos = EQNPOS_FROMTO; 858 break; 859 } 860 parent = eqn_box_makebinary(ep, parent); 861 switch (tok) { 862 case EQN_TOK_FROM: 863 parent->pos = EQNPOS_FROM; 864 break; 865 case EQN_TOK_TO: 866 parent->pos = EQNPOS_TO; 867 break; 868 case EQN_TOK_SUP: 869 parent->pos = EQNPOS_SUP; 870 break; 871 case EQN_TOK_SUB: 872 parent->pos = EQNPOS_SUB; 873 break; 874 default: 875 abort(); 876 } 877 break; 878 case EQN_TOK_SQRT: 879 while (parent->args == parent->expectargs) 880 parent = parent->parent; 881 /* 882 * Accept a left-right-associative set of arguments just 883 * like sub and sup and friends but without rebalancing 884 * under a pivot. 885 */ 886 parent = eqn_box_alloc(ep, parent); 887 parent->type = EQN_SUBEXPR; 888 parent->pos = EQNPOS_SQRT; 889 parent->expectargs = 1; 890 break; 891 case EQN_TOK_OVER: 892 /* 893 * We have a right-left-associative fraction. 894 * Close out anything that's currently open, then 895 * rebalance and continue reading. 896 */ 897 if (parent->last == NULL) { 898 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 899 ep->node->pos, "%s", eqn_toks[tok]); 900 cur = eqn_box_alloc(ep, parent); 901 cur->type = EQN_TEXT; 902 cur->text = mandoc_strdup(""); 903 } 904 while (parent->args == parent->expectargs) 905 parent = parent->parent; 906 while (EQN_SUBEXPR == parent->type) 907 parent = parent->parent; 908 parent = eqn_box_makebinary(ep, parent); 909 parent->pos = EQNPOS_OVER; 910 break; 911 case EQN_TOK_RIGHT: 912 case EQN_TOK_BRACE_CLOSE: 913 /* 914 * Close out the existing brace. 915 * FIXME: this is a shitty sentinel: we should really 916 * have a native EQN_BRACE type or whatnot. 917 */ 918 for (cur = parent; cur != NULL; cur = cur->parent) 919 if (cur->type == EQN_LIST && 920 cur->expectargs > 1 && 921 (tok == EQN_TOK_BRACE_CLOSE || 922 cur->left != NULL)) 923 break; 924 if (cur == NULL) { 925 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line, 926 ep->node->pos, "%s", eqn_toks[tok]); 927 break; 928 } 929 parent = cur; 930 if (EQN_TOK_RIGHT == tok) { 931 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 932 mandoc_msg(MANDOCERR_REQ_EMPTY, 933 ep->node->line, ep->node->pos, 934 "%s", eqn_toks[tok]); 935 break; 936 } 937 /* Handling depends on right/left. */ 938 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 939 parent->right = mandoc_strdup("\\[rc]"); 940 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 941 parent->right = mandoc_strdup("\\[rf]"); 942 else 943 parent->right = 944 mandoc_strndup(ep->start, ep->toksz); 945 } 946 parent = parent->parent; 947 if (tok == EQN_TOK_BRACE_CLOSE && 948 (parent->type == EQN_PILE || 949 parent->type == EQN_MATRIX)) 950 parent = parent->parent; 951 /* Close out any "singleton" lists. */ 952 while (parent->type == EQN_LIST && 953 parent->expectargs == 1 && 954 parent->args == 1) 955 parent = parent->parent; 956 break; 957 case EQN_TOK_BRACE_OPEN: 958 case EQN_TOK_LEFT: 959 /* 960 * If we already have something in the stack and we're 961 * in an expression, then rewind til we're not any more 962 * (just like with the text node). 963 */ 964 while (parent->args == parent->expectargs) 965 parent = parent->parent; 966 if (EQN_TOK_LEFT == tok && 967 eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 968 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 969 ep->node->pos, "%s", eqn_toks[tok]); 970 break; 971 } 972 parent = eqn_box_alloc(ep, parent); 973 parent->type = EQN_LIST; 974 if (EQN_TOK_LEFT == tok) { 975 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 976 parent->left = mandoc_strdup("\\[lc]"); 977 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 978 parent->left = mandoc_strdup("\\[lf]"); 979 else 980 parent->left = 981 mandoc_strndup(ep->start, ep->toksz); 982 } 983 break; 984 case EQN_TOK_PILE: 985 case EQN_TOK_LPILE: 986 case EQN_TOK_RPILE: 987 case EQN_TOK_CPILE: 988 case EQN_TOK_CCOL: 989 case EQN_TOK_LCOL: 990 case EQN_TOK_RCOL: 991 while (parent->args == parent->expectargs) 992 parent = parent->parent; 993 parent = eqn_box_alloc(ep, parent); 994 parent->type = EQN_PILE; 995 parent->expectargs = 1; 996 break; 997 case EQN_TOK_ABOVE: 998 for (cur = parent; cur != NULL; cur = cur->parent) 999 if (cur->type == EQN_PILE) 1000 break; 1001 if (cur == NULL) { 1002 mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line, 1003 ep->node->pos, "%s", eqn_toks[tok]); 1004 break; 1005 } 1006 parent = eqn_box_alloc(ep, cur); 1007 parent->type = EQN_LIST; 1008 break; 1009 case EQN_TOK_MATRIX: 1010 while (parent->args == parent->expectargs) 1011 parent = parent->parent; 1012 parent = eqn_box_alloc(ep, parent); 1013 parent->type = EQN_MATRIX; 1014 parent->expectargs = 1; 1015 break; 1016 case EQN_TOK_EOF: 1017 return; 1018 case EQN_TOK__MAX: 1019 case EQN_TOK_FUNC: 1020 case EQN_TOK_QUOTED: 1021 case EQN_TOK_SYM: 1022 p = ep->start; 1023 assert(p != NULL); 1024 /* 1025 * If we already have something in the stack and we're 1026 * in an expression, then rewind til we're not any more. 1027 */ 1028 while (parent->args == parent->expectargs) 1029 parent = parent->parent; 1030 cur = eqn_box_alloc(ep, parent); 1031 cur->type = EQN_TEXT; 1032 cur->text = p; 1033 switch (tok) { 1034 case EQN_TOK_FUNC: 1035 cur->font = EQNFONT_ROMAN; 1036 break; 1037 case EQN_TOK_QUOTED: 1038 if (cur->font == EQNFONT_NONE) 1039 cur->font = EQNFONT_ITALIC; 1040 break; 1041 case EQN_TOK_SYM: 1042 break; 1043 default: 1044 if (cur->font != EQNFONT_NONE || *p == '\0') 1045 break; 1046 cpn = p - 1; 1047 ccln = CCL_LET; 1048 split = NULL; 1049 for (;;) { 1050 /* Advance to next character. */ 1051 cp = cpn++; 1052 ccl = ccln; 1053 ccln = isalpha((unsigned char)*cpn) ? CCL_LET : 1054 isdigit((unsigned char)*cpn) || 1055 (*cpn == '.' && (ccl == CCL_DIG || 1056 isdigit((unsigned char)cpn[1]))) ? 1057 CCL_DIG : CCL_PUN; 1058 /* No boundary before first character. */ 1059 if (cp < p) 1060 continue; 1061 cur->font = ccl == CCL_LET ? 1062 EQNFONT_ITALIC : EQNFONT_ROMAN; 1063 if (*cp == '\\') 1064 mandoc_escape(&cpn, NULL, NULL); 1065 /* No boundary after last character. */ 1066 if (*cpn == '\0') 1067 break; 1068 if (ccln == ccl && *cp != ',' && *cpn != ',') 1069 continue; 1070 /* Boundary found, split the text. */ 1071 if (parent->args == parent->expectargs) { 1072 /* Remove the text from the tree. */ 1073 if (cur->prev == NULL) 1074 parent->first = cur->next; 1075 else 1076 cur->prev->next = NULL; 1077 parent->last = cur->prev; 1078 parent->args--; 1079 /* Set up a list instead. */ 1080 split = eqn_box_alloc(ep, parent); 1081 split->type = EQN_LIST; 1082 /* Insert the word into the list. */ 1083 split->first = split->last = cur; 1084 cur->parent = split; 1085 cur->prev = NULL; 1086 parent = split; 1087 } 1088 /* Append a new text box. */ 1089 nbox = eqn_box_alloc(ep, parent); 1090 nbox->type = EQN_TEXT; 1091 nbox->text = mandoc_strdup(cpn); 1092 /* Truncate the old box. */ 1093 p = mandoc_strndup(cur->text, 1094 cpn - cur->text); 1095 free(cur->text); 1096 cur->text = p; 1097 /* Setup to process the new box. */ 1098 cur = nbox; 1099 p = nbox->text; 1100 cpn = p - 1; 1101 ccln = CCL_LET; 1102 } 1103 if (split != NULL) 1104 parent = split->parent; 1105 break; 1106 } 1107 break; 1108 default: 1109 abort(); 1110 } 1111 goto next_tok; 1112 } 1113 1114 void 1115 eqn_free(struct eqn_node *p) 1116 { 1117 int i; 1118 1119 if (p == NULL) 1120 return; 1121 1122 for (i = 0; i < (int)p->defsz; i++) { 1123 free(p->defs[i].key); 1124 free(p->defs[i].val); 1125 } 1126 1127 free(p->data); 1128 free(p->defs); 1129 free(p); 1130 } 1131