1 /* $Id: eqn.c,v 1.83 2018/12/14 06:33:14 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <time.h> 29 30 #include "mandoc_aux.h" 31 #include "mandoc.h" 32 #include "roff.h" 33 #include "eqn.h" 34 #include "libmandoc.h" 35 #include "eqn_parse.h" 36 37 #define EQN_NEST_MAX 128 /* maximum nesting of defines */ 38 #define STRNEQ(p1, sz1, p2, sz2) \ 39 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) 40 41 enum eqn_tok { 42 EQN_TOK_DYAD = 0, 43 EQN_TOK_VEC, 44 EQN_TOK_UNDER, 45 EQN_TOK_BAR, 46 EQN_TOK_TILDE, 47 EQN_TOK_HAT, 48 EQN_TOK_DOT, 49 EQN_TOK_DOTDOT, 50 EQN_TOK_FWD, 51 EQN_TOK_BACK, 52 EQN_TOK_DOWN, 53 EQN_TOK_UP, 54 EQN_TOK_FAT, 55 EQN_TOK_ROMAN, 56 EQN_TOK_ITALIC, 57 EQN_TOK_BOLD, 58 EQN_TOK_SIZE, 59 EQN_TOK_SUB, 60 EQN_TOK_SUP, 61 EQN_TOK_SQRT, 62 EQN_TOK_OVER, 63 EQN_TOK_FROM, 64 EQN_TOK_TO, 65 EQN_TOK_BRACE_OPEN, 66 EQN_TOK_BRACE_CLOSE, 67 EQN_TOK_GSIZE, 68 EQN_TOK_GFONT, 69 EQN_TOK_MARK, 70 EQN_TOK_LINEUP, 71 EQN_TOK_LEFT, 72 EQN_TOK_RIGHT, 73 EQN_TOK_PILE, 74 EQN_TOK_LPILE, 75 EQN_TOK_RPILE, 76 EQN_TOK_CPILE, 77 EQN_TOK_MATRIX, 78 EQN_TOK_CCOL, 79 EQN_TOK_LCOL, 80 EQN_TOK_RCOL, 81 EQN_TOK_DELIM, 82 EQN_TOK_DEFINE, 83 EQN_TOK_TDEFINE, 84 EQN_TOK_NDEFINE, 85 EQN_TOK_UNDEF, 86 EQN_TOK_ABOVE, 87 EQN_TOK__MAX, 88 EQN_TOK_FUNC, 89 EQN_TOK_QUOTED, 90 EQN_TOK_SYM, 91 EQN_TOK_EOF 92 }; 93 94 static const char *eqn_toks[EQN_TOK__MAX] = { 95 "dyad", /* EQN_TOK_DYAD */ 96 "vec", /* EQN_TOK_VEC */ 97 "under", /* EQN_TOK_UNDER */ 98 "bar", /* EQN_TOK_BAR */ 99 "tilde", /* EQN_TOK_TILDE */ 100 "hat", /* EQN_TOK_HAT */ 101 "dot", /* EQN_TOK_DOT */ 102 "dotdot", /* EQN_TOK_DOTDOT */ 103 "fwd", /* EQN_TOK_FWD * */ 104 "back", /* EQN_TOK_BACK */ 105 "down", /* EQN_TOK_DOWN */ 106 "up", /* EQN_TOK_UP */ 107 "fat", /* EQN_TOK_FAT */ 108 "roman", /* EQN_TOK_ROMAN */ 109 "italic", /* EQN_TOK_ITALIC */ 110 "bold", /* EQN_TOK_BOLD */ 111 "size", /* EQN_TOK_SIZE */ 112 "sub", /* EQN_TOK_SUB */ 113 "sup", /* EQN_TOK_SUP */ 114 "sqrt", /* EQN_TOK_SQRT */ 115 "over", /* EQN_TOK_OVER */ 116 "from", /* EQN_TOK_FROM */ 117 "to", /* EQN_TOK_TO */ 118 "{", /* EQN_TOK_BRACE_OPEN */ 119 "}", /* EQN_TOK_BRACE_CLOSE */ 120 "gsize", /* EQN_TOK_GSIZE */ 121 "gfont", /* EQN_TOK_GFONT */ 122 "mark", /* EQN_TOK_MARK */ 123 "lineup", /* EQN_TOK_LINEUP */ 124 "left", /* EQN_TOK_LEFT */ 125 "right", /* EQN_TOK_RIGHT */ 126 "pile", /* EQN_TOK_PILE */ 127 "lpile", /* EQN_TOK_LPILE */ 128 "rpile", /* EQN_TOK_RPILE */ 129 "cpile", /* EQN_TOK_CPILE */ 130 "matrix", /* EQN_TOK_MATRIX */ 131 "ccol", /* EQN_TOK_CCOL */ 132 "lcol", /* EQN_TOK_LCOL */ 133 "rcol", /* EQN_TOK_RCOL */ 134 "delim", /* EQN_TOK_DELIM */ 135 "define", /* EQN_TOK_DEFINE */ 136 "tdefine", /* EQN_TOK_TDEFINE */ 137 "ndefine", /* EQN_TOK_NDEFINE */ 138 "undef", /* EQN_TOK_UNDEF */ 139 "above", /* EQN_TOK_ABOVE */ 140 }; 141 142 static const char *const eqn_func[] = { 143 "acos", "acsc", "and", "arc", "asec", "asin", "atan", 144 "cos", "cosh", "coth", "csc", "det", "exp", "for", 145 "if", "lim", "ln", "log", "max", "min", 146 "sec", "sin", "sinh", "tan", "tanh", "Im", "Re", 147 }; 148 149 enum eqn_symt { 150 EQNSYM_alpha = 0, 151 EQNSYM_beta, 152 EQNSYM_chi, 153 EQNSYM_delta, 154 EQNSYM_epsilon, 155 EQNSYM_eta, 156 EQNSYM_gamma, 157 EQNSYM_iota, 158 EQNSYM_kappa, 159 EQNSYM_lambda, 160 EQNSYM_mu, 161 EQNSYM_nu, 162 EQNSYM_omega, 163 EQNSYM_omicron, 164 EQNSYM_phi, 165 EQNSYM_pi, 166 EQNSYM_ps, 167 EQNSYM_rho, 168 EQNSYM_sigma, 169 EQNSYM_tau, 170 EQNSYM_theta, 171 EQNSYM_upsilon, 172 EQNSYM_xi, 173 EQNSYM_zeta, 174 EQNSYM_DELTA, 175 EQNSYM_GAMMA, 176 EQNSYM_LAMBDA, 177 EQNSYM_OMEGA, 178 EQNSYM_PHI, 179 EQNSYM_PI, 180 EQNSYM_PSI, 181 EQNSYM_SIGMA, 182 EQNSYM_THETA, 183 EQNSYM_UPSILON, 184 EQNSYM_XI, 185 EQNSYM_inter, 186 EQNSYM_union, 187 EQNSYM_prod, 188 EQNSYM_int, 189 EQNSYM_sum, 190 EQNSYM_grad, 191 EQNSYM_del, 192 EQNSYM_times, 193 EQNSYM_cdot, 194 EQNSYM_nothing, 195 EQNSYM_approx, 196 EQNSYM_prime, 197 EQNSYM_half, 198 EQNSYM_partial, 199 EQNSYM_inf, 200 EQNSYM_muchgreat, 201 EQNSYM_muchless, 202 EQNSYM_larrow, 203 EQNSYM_rarrow, 204 EQNSYM_pm, 205 EQNSYM_nequal, 206 EQNSYM_equiv, 207 EQNSYM_lessequal, 208 EQNSYM_moreequal, 209 EQNSYM_minus, 210 EQNSYM__MAX 211 }; 212 213 struct eqnsym { 214 const char *str; 215 const char *sym; 216 }; 217 218 static const struct eqnsym eqnsyms[EQNSYM__MAX] = { 219 { "alpha", "*a" }, /* EQNSYM_alpha */ 220 { "beta", "*b" }, /* EQNSYM_beta */ 221 { "chi", "*x" }, /* EQNSYM_chi */ 222 { "delta", "*d" }, /* EQNSYM_delta */ 223 { "epsilon", "*e" }, /* EQNSYM_epsilon */ 224 { "eta", "*y" }, /* EQNSYM_eta */ 225 { "gamma", "*g" }, /* EQNSYM_gamma */ 226 { "iota", "*i" }, /* EQNSYM_iota */ 227 { "kappa", "*k" }, /* EQNSYM_kappa */ 228 { "lambda", "*l" }, /* EQNSYM_lambda */ 229 { "mu", "*m" }, /* EQNSYM_mu */ 230 { "nu", "*n" }, /* EQNSYM_nu */ 231 { "omega", "*w" }, /* EQNSYM_omega */ 232 { "omicron", "*o" }, /* EQNSYM_omicron */ 233 { "phi", "*f" }, /* EQNSYM_phi */ 234 { "pi", "*p" }, /* EQNSYM_pi */ 235 { "psi", "*q" }, /* EQNSYM_psi */ 236 { "rho", "*r" }, /* EQNSYM_rho */ 237 { "sigma", "*s" }, /* EQNSYM_sigma */ 238 { "tau", "*t" }, /* EQNSYM_tau */ 239 { "theta", "*h" }, /* EQNSYM_theta */ 240 { "upsilon", "*u" }, /* EQNSYM_upsilon */ 241 { "xi", "*c" }, /* EQNSYM_xi */ 242 { "zeta", "*z" }, /* EQNSYM_zeta */ 243 { "DELTA", "*D" }, /* EQNSYM_DELTA */ 244 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ 245 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ 246 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ 247 { "PHI", "*F" }, /* EQNSYM_PHI */ 248 { "PI", "*P" }, /* EQNSYM_PI */ 249 { "PSI", "*Q" }, /* EQNSYM_PSI */ 250 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ 251 { "THETA", "*H" }, /* EQNSYM_THETA */ 252 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ 253 { "XI", "*C" }, /* EQNSYM_XI */ 254 { "inter", "ca" }, /* EQNSYM_inter */ 255 { "union", "cu" }, /* EQNSYM_union */ 256 { "prod", "product" }, /* EQNSYM_prod */ 257 { "int", "integral" }, /* EQNSYM_int */ 258 { "sum", "sum" }, /* EQNSYM_sum */ 259 { "grad", "gr" }, /* EQNSYM_grad */ 260 { "del", "gr" }, /* EQNSYM_del */ 261 { "times", "mu" }, /* EQNSYM_times */ 262 { "cdot", "pc" }, /* EQNSYM_cdot */ 263 { "nothing", "&" }, /* EQNSYM_nothing */ 264 { "approx", "~~" }, /* EQNSYM_approx */ 265 { "prime", "fm" }, /* EQNSYM_prime */ 266 { "half", "12" }, /* EQNSYM_half */ 267 { "partial", "pd" }, /* EQNSYM_partial */ 268 { "inf", "if" }, /* EQNSYM_inf */ 269 { ">>", ">>" }, /* EQNSYM_muchgreat */ 270 { "<<", "<<" }, /* EQNSYM_muchless */ 271 { "<-", "<-" }, /* EQNSYM_larrow */ 272 { "->", "->" }, /* EQNSYM_rarrow */ 273 { "+-", "+-" }, /* EQNSYM_pm */ 274 { "!=", "!=" }, /* EQNSYM_nequal */ 275 { "==", "==" }, /* EQNSYM_equiv */ 276 { "<=", "<=" }, /* EQNSYM_lessequal */ 277 { ">=", ">=" }, /* EQNSYM_moreequal */ 278 { "-", "mi" }, /* EQNSYM_minus */ 279 }; 280 281 enum parse_mode { 282 MODE_QUOTED, 283 MODE_NOSUB, 284 MODE_SUB, 285 MODE_TOK 286 }; 287 288 struct eqn_def { 289 char *key; 290 size_t keysz; 291 char *val; 292 size_t valsz; 293 }; 294 295 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); 296 static struct eqn_box *eqn_box_makebinary(struct eqn_node *, 297 struct eqn_box *); 298 static void eqn_def(struct eqn_node *); 299 static struct eqn_def *eqn_def_find(struct eqn_node *); 300 static void eqn_delim(struct eqn_node *); 301 static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode); 302 static void eqn_undef(struct eqn_node *); 303 304 305 struct eqn_node * 306 eqn_alloc(void) 307 { 308 struct eqn_node *ep; 309 310 ep = mandoc_calloc(1, sizeof(*ep)); 311 ep->gsize = EQN_DEFSIZE; 312 return ep; 313 } 314 315 void 316 eqn_reset(struct eqn_node *ep) 317 { 318 free(ep->data); 319 ep->data = ep->start = ep->end = NULL; 320 ep->sz = ep->toksz = 0; 321 } 322 323 void 324 eqn_read(struct eqn_node *ep, const char *p) 325 { 326 char *cp; 327 328 if (ep->data == NULL) { 329 ep->sz = strlen(p); 330 ep->data = mandoc_strdup(p); 331 } else { 332 ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p); 333 free(ep->data); 334 ep->data = cp; 335 } 336 ep->sz += 1; 337 } 338 339 /* 340 * Find the key "key" of the give size within our eqn-defined values. 341 */ 342 static struct eqn_def * 343 eqn_def_find(struct eqn_node *ep) 344 { 345 int i; 346 347 for (i = 0; i < (int)ep->defsz; i++) 348 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, 349 ep->defs[i].keysz, ep->start, ep->toksz)) 350 return &ep->defs[i]; 351 352 return NULL; 353 } 354 355 /* 356 * Parse a token from the input text. The modes are: 357 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends 358 * before its next occurence. Do not interpret the token in any 359 * way and return EQN_TOK_QUOTED. All other modes behave like 360 * MODE_QUOTED when *ep->start is '"'. 361 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it; 362 * otherwise, it ends before the next whitespace or brace. 363 * Do not interpret the token and return EQN_TOK__MAX. 364 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an 365 * alias created with define. If it is an alias, replace it with 366 * its string value and reparse. 367 * MODE_TOK: Like MODE_SUB, but also check the token against the list 368 * of tokens, and if there is a match, return that token. Otherwise, 369 * if the token matches a symbol, return EQN_TOK_SYM; if it matches 370 * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for 371 * a token match, *ep->start is set to an allocated string that the 372 * caller is expected to free. 373 * All modes skip whitespace following the end of the token. 374 */ 375 static enum eqn_tok 376 eqn_next(struct eqn_node *ep, enum parse_mode mode) 377 { 378 static int last_len, lim; 379 380 struct eqn_def *def; 381 size_t start; 382 int diff, i, quoted; 383 enum eqn_tok tok; 384 385 /* 386 * Reset the recursion counter after advancing 387 * beyond the end of the previous substitution. 388 */ 389 if (ep->end - ep->data >= last_len) 390 lim = 0; 391 392 ep->start = ep->end; 393 quoted = mode == MODE_QUOTED; 394 for (;;) { 395 switch (*ep->start) { 396 case '\0': 397 ep->toksz = 0; 398 return EQN_TOK_EOF; 399 case '"': 400 quoted = 1; 401 break; 402 default: 403 break; 404 } 405 if (quoted) { 406 ep->end = strchr(ep->start + 1, *ep->start); 407 ep->start++; /* Skip opening quote. */ 408 if (ep->end == NULL) { 409 mandoc_msg(MANDOCERR_ARG_QUOTE, 410 ep->node->line, ep->node->pos, NULL); 411 ep->end = strchr(ep->start, '\0'); 412 } 413 } else { 414 ep->end = ep->start + 1; 415 if (*ep->start != '{' && *ep->start != '}') 416 ep->end += strcspn(ep->end, " ^~\"{}\t"); 417 } 418 ep->toksz = ep->end - ep->start; 419 if (quoted && *ep->end != '\0') 420 ep->end++; /* Skip closing quote. */ 421 while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL) 422 ep->end++; 423 if (quoted) /* Cannot return, may have to strndup. */ 424 break; 425 if (mode == MODE_NOSUB) 426 return EQN_TOK__MAX; 427 if ((def = eqn_def_find(ep)) == NULL) 428 break; 429 if (++lim > EQN_NEST_MAX) { 430 mandoc_msg(MANDOCERR_ROFFLOOP, 431 ep->node->line, ep->node->pos, NULL); 432 return EQN_TOK_EOF; 433 } 434 435 /* Replace a defined name with its string value. */ 436 if ((diff = def->valsz - ep->toksz) > 0) { 437 start = ep->start - ep->data; 438 ep->sz += diff; 439 ep->data = mandoc_realloc(ep->data, ep->sz + 1); 440 ep->start = ep->data + start; 441 } 442 if (diff) 443 memmove(ep->start + def->valsz, ep->start + ep->toksz, 444 strlen(ep->start + ep->toksz) + 1); 445 memcpy(ep->start, def->val, def->valsz); 446 last_len = ep->start - ep->data + def->valsz; 447 } 448 if (mode != MODE_TOK) 449 return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX; 450 if (quoted) { 451 ep->start = mandoc_strndup(ep->start, ep->toksz); 452 return EQN_TOK_QUOTED; 453 } 454 for (tok = 0; tok < EQN_TOK__MAX; tok++) 455 if (STRNEQ(ep->start, ep->toksz, 456 eqn_toks[tok], strlen(eqn_toks[tok]))) 457 return tok; 458 459 for (i = 0; i < EQNSYM__MAX; i++) { 460 if (STRNEQ(ep->start, ep->toksz, 461 eqnsyms[i].str, strlen(eqnsyms[i].str))) { 462 mandoc_asprintf(&ep->start, 463 "\\[%s]", eqnsyms[i].sym); 464 return EQN_TOK_SYM; 465 } 466 } 467 ep->start = mandoc_strndup(ep->start, ep->toksz); 468 for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++) 469 if (STRNEQ(ep->start, ep->toksz, 470 eqn_func[i], strlen(eqn_func[i]))) 471 return EQN_TOK_FUNC; 472 return EQN_TOK__MAX; 473 } 474 475 void 476 eqn_box_free(struct eqn_box *bp) 477 { 478 if (bp == NULL) 479 return; 480 481 if (bp->first) 482 eqn_box_free(bp->first); 483 if (bp->next) 484 eqn_box_free(bp->next); 485 486 free(bp->text); 487 free(bp->left); 488 free(bp->right); 489 free(bp->top); 490 free(bp->bottom); 491 free(bp); 492 } 493 494 struct eqn_box * 495 eqn_box_new(void) 496 { 497 struct eqn_box *bp; 498 499 bp = mandoc_calloc(1, sizeof(*bp)); 500 bp->expectargs = UINT_MAX; 501 return bp; 502 } 503 504 /* 505 * Allocate a box as the last child of the parent node. 506 */ 507 static struct eqn_box * 508 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) 509 { 510 struct eqn_box *bp; 511 512 bp = eqn_box_new(); 513 bp->parent = parent; 514 bp->parent->args++; 515 bp->font = bp->parent->font; 516 bp->size = ep->gsize; 517 518 if (NULL != parent->first) { 519 parent->last->next = bp; 520 bp->prev = parent->last; 521 } else 522 parent->first = bp; 523 524 parent->last = bp; 525 return bp; 526 } 527 528 /* 529 * Reparent the current last node (of the current parent) under a new 530 * EQN_SUBEXPR as the first element. 531 * Then return the new parent. 532 * The new EQN_SUBEXPR will have a two-child limit. 533 */ 534 static struct eqn_box * 535 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent) 536 { 537 struct eqn_box *b, *newb; 538 539 assert(NULL != parent->last); 540 b = parent->last; 541 if (parent->last == parent->first) 542 parent->first = NULL; 543 parent->args--; 544 parent->last = b->prev; 545 b->prev = NULL; 546 newb = eqn_box_alloc(ep, parent); 547 newb->type = EQN_SUBEXPR; 548 newb->expectargs = 2; 549 newb->args = 1; 550 newb->first = newb->last = b; 551 newb->first->next = NULL; 552 b->parent = newb; 553 return newb; 554 } 555 556 /* 557 * Parse the "delim" control statement. 558 */ 559 static void 560 eqn_delim(struct eqn_node *ep) 561 { 562 if (ep->end[0] == '\0' || ep->end[1] == '\0') { 563 mandoc_msg(MANDOCERR_REQ_EMPTY, 564 ep->node->line, ep->node->pos, "delim"); 565 if (ep->end[0] != '\0') 566 ep->end++; 567 } else if (strncmp(ep->end, "off", 3) == 0) { 568 ep->delim = 0; 569 ep->end += 3; 570 } else if (strncmp(ep->end, "on", 2) == 0) { 571 if (ep->odelim && ep->cdelim) 572 ep->delim = 1; 573 ep->end += 2; 574 } else { 575 ep->odelim = *ep->end++; 576 ep->cdelim = *ep->end++; 577 ep->delim = 1; 578 } 579 } 580 581 /* 582 * Undefine a previously-defined string. 583 */ 584 static void 585 eqn_undef(struct eqn_node *ep) 586 { 587 struct eqn_def *def; 588 589 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 590 mandoc_msg(MANDOCERR_REQ_EMPTY, 591 ep->node->line, ep->node->pos, "undef"); 592 return; 593 } 594 if ((def = eqn_def_find(ep)) == NULL) 595 return; 596 free(def->key); 597 free(def->val); 598 def->key = def->val = NULL; 599 def->keysz = def->valsz = 0; 600 } 601 602 static void 603 eqn_def(struct eqn_node *ep) 604 { 605 struct eqn_def *def; 606 int i; 607 608 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 609 mandoc_msg(MANDOCERR_REQ_EMPTY, 610 ep->node->line, ep->node->pos, "define"); 611 return; 612 } 613 614 /* 615 * Search for a key that already exists. 616 * Create a new key if none is found. 617 */ 618 if ((def = eqn_def_find(ep)) == NULL) { 619 /* Find holes in string array. */ 620 for (i = 0; i < (int)ep->defsz; i++) 621 if (0 == ep->defs[i].keysz) 622 break; 623 624 if (i == (int)ep->defsz) { 625 ep->defsz++; 626 ep->defs = mandoc_reallocarray(ep->defs, 627 ep->defsz, sizeof(struct eqn_def)); 628 ep->defs[i].key = ep->defs[i].val = NULL; 629 } 630 631 def = ep->defs + i; 632 free(def->key); 633 def->key = mandoc_strndup(ep->start, ep->toksz); 634 def->keysz = ep->toksz; 635 } 636 637 if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) { 638 mandoc_msg(MANDOCERR_REQ_EMPTY, 639 ep->node->line, ep->node->pos, "define %s", def->key); 640 free(def->key); 641 free(def->val); 642 def->key = def->val = NULL; 643 def->keysz = def->valsz = 0; 644 return; 645 } 646 free(def->val); 647 def->val = mandoc_strndup(ep->start, ep->toksz); 648 def->valsz = ep->toksz; 649 } 650 651 void 652 eqn_parse(struct eqn_node *ep) 653 { 654 struct eqn_box *cur, *nbox, *parent, *split; 655 const char *cp, *cpn; 656 char *p; 657 enum eqn_tok tok; 658 enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln; 659 int size; 660 661 parent = ep->node->eqn; 662 assert(parent != NULL); 663 664 /* 665 * Empty equation. 666 * Do not add it to the high-level syntax tree. 667 */ 668 669 if (ep->data == NULL) 670 return; 671 672 ep->start = ep->end = ep->data + strspn(ep->data, " ^~"); 673 674 next_tok: 675 tok = eqn_next(ep, MODE_TOK); 676 switch (tok) { 677 case EQN_TOK_UNDEF: 678 eqn_undef(ep); 679 break; 680 case EQN_TOK_NDEFINE: 681 case EQN_TOK_DEFINE: 682 eqn_def(ep); 683 break; 684 case EQN_TOK_TDEFINE: 685 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF || 686 eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) 687 mandoc_msg(MANDOCERR_REQ_EMPTY, 688 ep->node->line, ep->node->pos, "tdefine"); 689 break; 690 case EQN_TOK_DELIM: 691 eqn_delim(ep); 692 break; 693 case EQN_TOK_GFONT: 694 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 695 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 696 ep->node->pos, "%s", eqn_toks[tok]); 697 break; 698 case EQN_TOK_MARK: 699 case EQN_TOK_LINEUP: 700 /* Ignore these. */ 701 break; 702 case EQN_TOK_DYAD: 703 case EQN_TOK_VEC: 704 case EQN_TOK_UNDER: 705 case EQN_TOK_BAR: 706 case EQN_TOK_TILDE: 707 case EQN_TOK_HAT: 708 case EQN_TOK_DOT: 709 case EQN_TOK_DOTDOT: 710 if (parent->last == NULL) { 711 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 712 ep->node->pos, "%s", eqn_toks[tok]); 713 cur = eqn_box_alloc(ep, parent); 714 cur->type = EQN_TEXT; 715 cur->text = mandoc_strdup(""); 716 } 717 parent = eqn_box_makebinary(ep, parent); 718 parent->type = EQN_LIST; 719 parent->expectargs = 1; 720 parent->font = EQNFONT_ROMAN; 721 switch (tok) { 722 case EQN_TOK_DOTDOT: 723 parent->top = mandoc_strdup("\\[ad]"); 724 break; 725 case EQN_TOK_VEC: 726 parent->top = mandoc_strdup("\\[->]"); 727 break; 728 case EQN_TOK_DYAD: 729 parent->top = mandoc_strdup("\\[<>]"); 730 break; 731 case EQN_TOK_TILDE: 732 parent->top = mandoc_strdup("\\[a~]"); 733 break; 734 case EQN_TOK_UNDER: 735 parent->bottom = mandoc_strdup("\\[ul]"); 736 break; 737 case EQN_TOK_BAR: 738 parent->top = mandoc_strdup("\\[rn]"); 739 break; 740 case EQN_TOK_DOT: 741 parent->top = mandoc_strdup("\\[a.]"); 742 break; 743 case EQN_TOK_HAT: 744 parent->top = mandoc_strdup("\\[ha]"); 745 break; 746 default: 747 abort(); 748 } 749 parent = parent->parent; 750 break; 751 case EQN_TOK_FWD: 752 case EQN_TOK_BACK: 753 case EQN_TOK_DOWN: 754 case EQN_TOK_UP: 755 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 756 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 757 ep->node->pos, "%s", eqn_toks[tok]); 758 break; 759 case EQN_TOK_FAT: 760 case EQN_TOK_ROMAN: 761 case EQN_TOK_ITALIC: 762 case EQN_TOK_BOLD: 763 while (parent->args == parent->expectargs) 764 parent = parent->parent; 765 /* 766 * These values apply to the next word or sequence of 767 * words; thus, we mark that we'll have a child with 768 * exactly one of those. 769 */ 770 parent = eqn_box_alloc(ep, parent); 771 parent->type = EQN_LIST; 772 parent->expectargs = 1; 773 switch (tok) { 774 case EQN_TOK_FAT: 775 parent->font = EQNFONT_FAT; 776 break; 777 case EQN_TOK_ROMAN: 778 parent->font = EQNFONT_ROMAN; 779 break; 780 case EQN_TOK_ITALIC: 781 parent->font = EQNFONT_ITALIC; 782 break; 783 case EQN_TOK_BOLD: 784 parent->font = EQNFONT_BOLD; 785 break; 786 default: 787 abort(); 788 } 789 break; 790 case EQN_TOK_SIZE: 791 case EQN_TOK_GSIZE: 792 /* Accept two values: integral size and a single. */ 793 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 794 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 795 ep->node->pos, "%s", eqn_toks[tok]); 796 break; 797 } 798 size = mandoc_strntoi(ep->start, ep->toksz, 10); 799 if (-1 == size) { 800 mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line, 801 ep->node->pos, "%s", eqn_toks[tok]); 802 break; 803 } 804 if (EQN_TOK_GSIZE == tok) { 805 ep->gsize = size; 806 break; 807 } 808 while (parent->args == parent->expectargs) 809 parent = parent->parent; 810 parent = eqn_box_alloc(ep, parent); 811 parent->type = EQN_LIST; 812 parent->expectargs = 1; 813 parent->size = size; 814 break; 815 case EQN_TOK_FROM: 816 case EQN_TOK_TO: 817 case EQN_TOK_SUB: 818 case EQN_TOK_SUP: 819 /* 820 * We have a left-right-associative expression. 821 * Repivot under a positional node, open a child scope 822 * and keep on reading. 823 */ 824 if (parent->last == NULL) { 825 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 826 ep->node->pos, "%s", eqn_toks[tok]); 827 cur = eqn_box_alloc(ep, parent); 828 cur->type = EQN_TEXT; 829 cur->text = mandoc_strdup(""); 830 } 831 while (parent->expectargs == 1 && parent->args == 1) 832 parent = parent->parent; 833 if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO) { 834 for (cur = parent; cur != NULL; cur = cur->parent) 835 if (cur->pos == EQNPOS_SUB || 836 cur->pos == EQNPOS_SUP || 837 cur->pos == EQNPOS_SUBSUP || 838 cur->pos == EQNPOS_SQRT || 839 cur->pos == EQNPOS_OVER) 840 break; 841 if (cur != NULL) 842 parent = cur->parent; 843 } 844 if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) { 845 parent->expectargs = 3; 846 parent->pos = EQNPOS_SUBSUP; 847 break; 848 } 849 if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) { 850 parent->expectargs = 3; 851 parent->pos = EQNPOS_FROMTO; 852 break; 853 } 854 parent = eqn_box_makebinary(ep, parent); 855 switch (tok) { 856 case EQN_TOK_FROM: 857 parent->pos = EQNPOS_FROM; 858 break; 859 case EQN_TOK_TO: 860 parent->pos = EQNPOS_TO; 861 break; 862 case EQN_TOK_SUP: 863 parent->pos = EQNPOS_SUP; 864 break; 865 case EQN_TOK_SUB: 866 parent->pos = EQNPOS_SUB; 867 break; 868 default: 869 abort(); 870 } 871 break; 872 case EQN_TOK_SQRT: 873 while (parent->args == parent->expectargs) 874 parent = parent->parent; 875 /* 876 * Accept a left-right-associative set of arguments just 877 * like sub and sup and friends but without rebalancing 878 * under a pivot. 879 */ 880 parent = eqn_box_alloc(ep, parent); 881 parent->type = EQN_SUBEXPR; 882 parent->pos = EQNPOS_SQRT; 883 parent->expectargs = 1; 884 break; 885 case EQN_TOK_OVER: 886 /* 887 * We have a right-left-associative fraction. 888 * Close out anything that's currently open, then 889 * rebalance and continue reading. 890 */ 891 if (parent->last == NULL) { 892 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 893 ep->node->pos, "%s", eqn_toks[tok]); 894 cur = eqn_box_alloc(ep, parent); 895 cur->type = EQN_TEXT; 896 cur->text = mandoc_strdup(""); 897 } 898 while (parent->args == parent->expectargs) 899 parent = parent->parent; 900 while (EQN_SUBEXPR == parent->type) 901 parent = parent->parent; 902 parent = eqn_box_makebinary(ep, parent); 903 parent->pos = EQNPOS_OVER; 904 break; 905 case EQN_TOK_RIGHT: 906 case EQN_TOK_BRACE_CLOSE: 907 /* 908 * Close out the existing brace. 909 * FIXME: this is a shitty sentinel: we should really 910 * have a native EQN_BRACE type or whatnot. 911 */ 912 for (cur = parent; cur != NULL; cur = cur->parent) 913 if (cur->type == EQN_LIST && 914 cur->expectargs > 1 && 915 (tok == EQN_TOK_BRACE_CLOSE || 916 cur->left != NULL)) 917 break; 918 if (cur == NULL) { 919 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line, 920 ep->node->pos, "%s", eqn_toks[tok]); 921 break; 922 } 923 parent = cur; 924 if (EQN_TOK_RIGHT == tok) { 925 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 926 mandoc_msg(MANDOCERR_REQ_EMPTY, 927 ep->node->line, ep->node->pos, 928 "%s", eqn_toks[tok]); 929 break; 930 } 931 /* Handling depends on right/left. */ 932 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 933 parent->right = mandoc_strdup("\\[rc]"); 934 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 935 parent->right = mandoc_strdup("\\[rf]"); 936 else 937 parent->right = 938 mandoc_strndup(ep->start, ep->toksz); 939 } 940 parent = parent->parent; 941 if (tok == EQN_TOK_BRACE_CLOSE && 942 (parent->type == EQN_PILE || 943 parent->type == EQN_MATRIX)) 944 parent = parent->parent; 945 /* Close out any "singleton" lists. */ 946 while (parent->type == EQN_LIST && 947 parent->expectargs == 1 && 948 parent->args == 1) 949 parent = parent->parent; 950 break; 951 case EQN_TOK_BRACE_OPEN: 952 case EQN_TOK_LEFT: 953 /* 954 * If we already have something in the stack and we're 955 * in an expression, then rewind til we're not any more 956 * (just like with the text node). 957 */ 958 while (parent->args == parent->expectargs) 959 parent = parent->parent; 960 if (EQN_TOK_LEFT == tok && 961 eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 962 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 963 ep->node->pos, "%s", eqn_toks[tok]); 964 break; 965 } 966 parent = eqn_box_alloc(ep, parent); 967 parent->type = EQN_LIST; 968 if (EQN_TOK_LEFT == tok) { 969 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 970 parent->left = mandoc_strdup("\\[lc]"); 971 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 972 parent->left = mandoc_strdup("\\[lf]"); 973 else 974 parent->left = 975 mandoc_strndup(ep->start, ep->toksz); 976 } 977 break; 978 case EQN_TOK_PILE: 979 case EQN_TOK_LPILE: 980 case EQN_TOK_RPILE: 981 case EQN_TOK_CPILE: 982 case EQN_TOK_CCOL: 983 case EQN_TOK_LCOL: 984 case EQN_TOK_RCOL: 985 while (parent->args == parent->expectargs) 986 parent = parent->parent; 987 parent = eqn_box_alloc(ep, parent); 988 parent->type = EQN_PILE; 989 parent->expectargs = 1; 990 break; 991 case EQN_TOK_ABOVE: 992 for (cur = parent; cur != NULL; cur = cur->parent) 993 if (cur->type == EQN_PILE) 994 break; 995 if (cur == NULL) { 996 mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line, 997 ep->node->pos, "%s", eqn_toks[tok]); 998 break; 999 } 1000 parent = eqn_box_alloc(ep, cur); 1001 parent->type = EQN_LIST; 1002 break; 1003 case EQN_TOK_MATRIX: 1004 while (parent->args == parent->expectargs) 1005 parent = parent->parent; 1006 parent = eqn_box_alloc(ep, parent); 1007 parent->type = EQN_MATRIX; 1008 parent->expectargs = 1; 1009 break; 1010 case EQN_TOK_EOF: 1011 return; 1012 case EQN_TOK__MAX: 1013 case EQN_TOK_FUNC: 1014 case EQN_TOK_QUOTED: 1015 case EQN_TOK_SYM: 1016 p = ep->start; 1017 assert(p != NULL); 1018 /* 1019 * If we already have something in the stack and we're 1020 * in an expression, then rewind til we're not any more. 1021 */ 1022 while (parent->args == parent->expectargs) 1023 parent = parent->parent; 1024 cur = eqn_box_alloc(ep, parent); 1025 cur->type = EQN_TEXT; 1026 cur->text = p; 1027 switch (tok) { 1028 case EQN_TOK_FUNC: 1029 cur->font = EQNFONT_ROMAN; 1030 break; 1031 case EQN_TOK_QUOTED: 1032 if (cur->font == EQNFONT_NONE) 1033 cur->font = EQNFONT_ITALIC; 1034 break; 1035 case EQN_TOK_SYM: 1036 break; 1037 default: 1038 if (cur->font != EQNFONT_NONE || *p == '\0') 1039 break; 1040 cpn = p - 1; 1041 ccln = CCL_LET; 1042 split = NULL; 1043 for (;;) { 1044 /* Advance to next character. */ 1045 cp = cpn++; 1046 ccl = ccln; 1047 ccln = isalpha((unsigned char)*cpn) ? CCL_LET : 1048 isdigit((unsigned char)*cpn) || 1049 (*cpn == '.' && (ccl == CCL_DIG || 1050 isdigit((unsigned char)cpn[1]))) ? 1051 CCL_DIG : CCL_PUN; 1052 /* No boundary before first character. */ 1053 if (cp < p) 1054 continue; 1055 cur->font = ccl == CCL_LET ? 1056 EQNFONT_ITALIC : EQNFONT_ROMAN; 1057 if (*cp == '\\') 1058 mandoc_escape(&cpn, NULL, NULL); 1059 /* No boundary after last character. */ 1060 if (*cpn == '\0') 1061 break; 1062 if (ccln == ccl && *cp != ',' && *cpn != ',') 1063 continue; 1064 /* Boundary found, split the text. */ 1065 if (parent->args == parent->expectargs) { 1066 /* Remove the text from the tree. */ 1067 if (cur->prev == NULL) 1068 parent->first = cur->next; 1069 else 1070 cur->prev->next = NULL; 1071 parent->last = cur->prev; 1072 parent->args--; 1073 /* Set up a list instead. */ 1074 split = eqn_box_alloc(ep, parent); 1075 split->type = EQN_LIST; 1076 /* Insert the word into the list. */ 1077 split->first = split->last = cur; 1078 cur->parent = split; 1079 cur->prev = NULL; 1080 parent = split; 1081 } 1082 /* Append a new text box. */ 1083 nbox = eqn_box_alloc(ep, parent); 1084 nbox->type = EQN_TEXT; 1085 nbox->text = mandoc_strdup(cpn); 1086 /* Truncate the old box. */ 1087 p = mandoc_strndup(cur->text, 1088 cpn - cur->text); 1089 free(cur->text); 1090 cur->text = p; 1091 /* Setup to process the new box. */ 1092 cur = nbox; 1093 p = nbox->text; 1094 cpn = p - 1; 1095 ccln = CCL_LET; 1096 } 1097 if (split != NULL) 1098 parent = split->parent; 1099 break; 1100 } 1101 break; 1102 default: 1103 abort(); 1104 } 1105 goto next_tok; 1106 } 1107 1108 void 1109 eqn_free(struct eqn_node *p) 1110 { 1111 int i; 1112 1113 if (p == NULL) 1114 return; 1115 1116 for (i = 0; i < (int)p->defsz; i++) { 1117 free(p->defs[i].key); 1118 free(p->defs[i].val); 1119 } 1120 1121 free(p->data); 1122 free(p->defs); 1123 free(p); 1124 } 1125