1 /* $OpenBSD: eqn.c,v 1.49 2022/12/26 19:16:02 jmc Exp $ */
2 /*
3 * Copyright (c) 2014, 2015, 2017, 2018, 2020, 2022
4 * Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include <sys/types.h>
20
21 #include <assert.h>
22 #include <ctype.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <time.h>
28
29 #include "mandoc_aux.h"
30 #include "mandoc.h"
31 #include "roff.h"
32 #include "eqn.h"
33 #include "libmandoc.h"
34 #include "eqn_parse.h"
35
36 #define EQN_NEST_MAX 128 /* maximum nesting of defines */
37 #define STRNEQ(p1, sz1, p2, sz2) \
38 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
39
40 enum eqn_tok {
41 EQN_TOK_DYAD = 0,
42 EQN_TOK_VEC,
43 EQN_TOK_UNDER,
44 EQN_TOK_BAR,
45 EQN_TOK_TILDE,
46 EQN_TOK_HAT,
47 EQN_TOK_DOT,
48 EQN_TOK_DOTDOT,
49 EQN_TOK_FWD,
50 EQN_TOK_BACK,
51 EQN_TOK_DOWN,
52 EQN_TOK_UP,
53 EQN_TOK_FAT,
54 EQN_TOK_ROMAN,
55 EQN_TOK_ITALIC,
56 EQN_TOK_BOLD,
57 EQN_TOK_SIZE,
58 EQN_TOK_SUB,
59 EQN_TOK_SUP,
60 EQN_TOK_SQRT,
61 EQN_TOK_OVER,
62 EQN_TOK_FROM,
63 EQN_TOK_TO,
64 EQN_TOK_BRACE_OPEN,
65 EQN_TOK_BRACE_CLOSE,
66 EQN_TOK_GSIZE,
67 EQN_TOK_GFONT,
68 EQN_TOK_MARK,
69 EQN_TOK_LINEUP,
70 EQN_TOK_LEFT,
71 EQN_TOK_RIGHT,
72 EQN_TOK_PILE,
73 EQN_TOK_LPILE,
74 EQN_TOK_RPILE,
75 EQN_TOK_CPILE,
76 EQN_TOK_MATRIX,
77 EQN_TOK_CCOL,
78 EQN_TOK_LCOL,
79 EQN_TOK_RCOL,
80 EQN_TOK_DELIM,
81 EQN_TOK_DEFINE,
82 EQN_TOK_TDEFINE,
83 EQN_TOK_NDEFINE,
84 EQN_TOK_UNDEF,
85 EQN_TOK_ABOVE,
86 EQN_TOK__MAX,
87 EQN_TOK_FUNC,
88 EQN_TOK_QUOTED,
89 EQN_TOK_SYM,
90 EQN_TOK_EOF
91 };
92
93 static const char *eqn_toks[EQN_TOK__MAX] = {
94 "dyad", /* EQN_TOK_DYAD */
95 "vec", /* EQN_TOK_VEC */
96 "under", /* EQN_TOK_UNDER */
97 "bar", /* EQN_TOK_BAR */
98 "tilde", /* EQN_TOK_TILDE */
99 "hat", /* EQN_TOK_HAT */
100 "dot", /* EQN_TOK_DOT */
101 "dotdot", /* EQN_TOK_DOTDOT */
102 "fwd", /* EQN_TOK_FWD * */
103 "back", /* EQN_TOK_BACK */
104 "down", /* EQN_TOK_DOWN */
105 "up", /* EQN_TOK_UP */
106 "fat", /* EQN_TOK_FAT */
107 "roman", /* EQN_TOK_ROMAN */
108 "italic", /* EQN_TOK_ITALIC */
109 "bold", /* EQN_TOK_BOLD */
110 "size", /* EQN_TOK_SIZE */
111 "sub", /* EQN_TOK_SUB */
112 "sup", /* EQN_TOK_SUP */
113 "sqrt", /* EQN_TOK_SQRT */
114 "over", /* EQN_TOK_OVER */
115 "from", /* EQN_TOK_FROM */
116 "to", /* EQN_TOK_TO */
117 "{", /* EQN_TOK_BRACE_OPEN */
118 "}", /* EQN_TOK_BRACE_CLOSE */
119 "gsize", /* EQN_TOK_GSIZE */
120 "gfont", /* EQN_TOK_GFONT */
121 "mark", /* EQN_TOK_MARK */
122 "lineup", /* EQN_TOK_LINEUP */
123 "left", /* EQN_TOK_LEFT */
124 "right", /* EQN_TOK_RIGHT */
125 "pile", /* EQN_TOK_PILE */
126 "lpile", /* EQN_TOK_LPILE */
127 "rpile", /* EQN_TOK_RPILE */
128 "cpile", /* EQN_TOK_CPILE */
129 "matrix", /* EQN_TOK_MATRIX */
130 "ccol", /* EQN_TOK_CCOL */
131 "lcol", /* EQN_TOK_LCOL */
132 "rcol", /* EQN_TOK_RCOL */
133 "delim", /* EQN_TOK_DELIM */
134 "define", /* EQN_TOK_DEFINE */
135 "tdefine", /* EQN_TOK_TDEFINE */
136 "ndefine", /* EQN_TOK_NDEFINE */
137 "undef", /* EQN_TOK_UNDEF */
138 "above", /* EQN_TOK_ABOVE */
139 };
140
141 static const char *const eqn_func[] = {
142 "acos", "acsc", "and", "arc", "asec", "asin", "atan",
143 "cos", "cosh", "coth", "csc", "det", "exp", "for",
144 "if", "lim", "ln", "log", "max", "min",
145 "sec", "sin", "sinh", "tan", "tanh", "Im", "Re",
146 };
147
148 enum eqn_symt {
149 EQNSYM_alpha = 0,
150 EQNSYM_beta,
151 EQNSYM_chi,
152 EQNSYM_delta,
153 EQNSYM_epsilon,
154 EQNSYM_eta,
155 EQNSYM_gamma,
156 EQNSYM_iota,
157 EQNSYM_kappa,
158 EQNSYM_lambda,
159 EQNSYM_mu,
160 EQNSYM_nu,
161 EQNSYM_omega,
162 EQNSYM_omicron,
163 EQNSYM_phi,
164 EQNSYM_pi,
165 EQNSYM_ps,
166 EQNSYM_rho,
167 EQNSYM_sigma,
168 EQNSYM_tau,
169 EQNSYM_theta,
170 EQNSYM_upsilon,
171 EQNSYM_xi,
172 EQNSYM_zeta,
173 EQNSYM_DELTA,
174 EQNSYM_GAMMA,
175 EQNSYM_LAMBDA,
176 EQNSYM_OMEGA,
177 EQNSYM_PHI,
178 EQNSYM_PI,
179 EQNSYM_PSI,
180 EQNSYM_SIGMA,
181 EQNSYM_THETA,
182 EQNSYM_UPSILON,
183 EQNSYM_XI,
184 EQNSYM_inter,
185 EQNSYM_union,
186 EQNSYM_prod,
187 EQNSYM_int,
188 EQNSYM_sum,
189 EQNSYM_grad,
190 EQNSYM_del,
191 EQNSYM_times,
192 EQNSYM_cdot,
193 EQNSYM_nothing,
194 EQNSYM_approx,
195 EQNSYM_prime,
196 EQNSYM_half,
197 EQNSYM_partial,
198 EQNSYM_inf,
199 EQNSYM_muchgreat,
200 EQNSYM_muchless,
201 EQNSYM_larrow,
202 EQNSYM_rarrow,
203 EQNSYM_pm,
204 EQNSYM_nequal,
205 EQNSYM_equiv,
206 EQNSYM_lessequal,
207 EQNSYM_moreequal,
208 EQNSYM_minus,
209 EQNSYM__MAX
210 };
211
212 struct eqnsym {
213 const char *str;
214 const char *sym;
215 };
216
217 static const struct eqnsym eqnsyms[EQNSYM__MAX] = {
218 { "alpha", "*a" }, /* EQNSYM_alpha */
219 { "beta", "*b" }, /* EQNSYM_beta */
220 { "chi", "*x" }, /* EQNSYM_chi */
221 { "delta", "*d" }, /* EQNSYM_delta */
222 { "epsilon", "*e" }, /* EQNSYM_epsilon */
223 { "eta", "*y" }, /* EQNSYM_eta */
224 { "gamma", "*g" }, /* EQNSYM_gamma */
225 { "iota", "*i" }, /* EQNSYM_iota */
226 { "kappa", "*k" }, /* EQNSYM_kappa */
227 { "lambda", "*l" }, /* EQNSYM_lambda */
228 { "mu", "*m" }, /* EQNSYM_mu */
229 { "nu", "*n" }, /* EQNSYM_nu */
230 { "omega", "*w" }, /* EQNSYM_omega */
231 { "omicron", "*o" }, /* EQNSYM_omicron */
232 { "phi", "*f" }, /* EQNSYM_phi */
233 { "pi", "*p" }, /* EQNSYM_pi */
234 { "psi", "*q" }, /* EQNSYM_psi */
235 { "rho", "*r" }, /* EQNSYM_rho */
236 { "sigma", "*s" }, /* EQNSYM_sigma */
237 { "tau", "*t" }, /* EQNSYM_tau */
238 { "theta", "*h" }, /* EQNSYM_theta */
239 { "upsilon", "*u" }, /* EQNSYM_upsilon */
240 { "xi", "*c" }, /* EQNSYM_xi */
241 { "zeta", "*z" }, /* EQNSYM_zeta */
242 { "DELTA", "*D" }, /* EQNSYM_DELTA */
243 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */
244 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
245 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */
246 { "PHI", "*F" }, /* EQNSYM_PHI */
247 { "PI", "*P" }, /* EQNSYM_PI */
248 { "PSI", "*Q" }, /* EQNSYM_PSI */
249 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */
250 { "THETA", "*H" }, /* EQNSYM_THETA */
251 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */
252 { "XI", "*C" }, /* EQNSYM_XI */
253 { "inter", "ca" }, /* EQNSYM_inter */
254 { "union", "cu" }, /* EQNSYM_union */
255 { "prod", "product" }, /* EQNSYM_prod */
256 { "int", "integral" }, /* EQNSYM_int */
257 { "sum", "sum" }, /* EQNSYM_sum */
258 { "grad", "gr" }, /* EQNSYM_grad */
259 { "del", "gr" }, /* EQNSYM_del */
260 { "times", "mu" }, /* EQNSYM_times */
261 { "cdot", "pc" }, /* EQNSYM_cdot */
262 { "nothing", "&" }, /* EQNSYM_nothing */
263 { "approx", "~~" }, /* EQNSYM_approx */
264 { "prime", "fm" }, /* EQNSYM_prime */
265 { "half", "12" }, /* EQNSYM_half */
266 { "partial", "pd" }, /* EQNSYM_partial */
267 { "inf", "if" }, /* EQNSYM_inf */
268 { ">>", ">>" }, /* EQNSYM_muchgreat */
269 { "<<", "<<" }, /* EQNSYM_muchless */
270 { "<-", "<-" }, /* EQNSYM_larrow */
271 { "->", "->" }, /* EQNSYM_rarrow */
272 { "+-", "+-" }, /* EQNSYM_pm */
273 { "!=", "!=" }, /* EQNSYM_nequal */
274 { "==", "==" }, /* EQNSYM_equiv */
275 { "<=", "<=" }, /* EQNSYM_lessequal */
276 { ">=", ">=" }, /* EQNSYM_moreequal */
277 { "-", "mi" }, /* EQNSYM_minus */
278 };
279
280 enum parse_mode {
281 MODE_QUOTED,
282 MODE_NOSUB,
283 MODE_SUB,
284 MODE_TOK
285 };
286
287 struct eqn_def {
288 char *key;
289 size_t keysz;
290 char *val;
291 size_t valsz;
292 };
293
294 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *);
295 static struct eqn_box *eqn_box_makebinary(struct eqn_node *,
296 struct eqn_box *);
297 static void eqn_def(struct eqn_node *);
298 static struct eqn_def *eqn_def_find(struct eqn_node *);
299 static void eqn_delim(struct eqn_node *);
300 static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode);
301 static void eqn_undef(struct eqn_node *);
302
303
304 struct eqn_node *
eqn_alloc(void)305 eqn_alloc(void)
306 {
307 struct eqn_node *ep;
308
309 ep = mandoc_calloc(1, sizeof(*ep));
310 ep->gsize = EQN_DEFSIZE;
311 return ep;
312 }
313
314 void
eqn_reset(struct eqn_node * ep)315 eqn_reset(struct eqn_node *ep)
316 {
317 free(ep->data);
318 ep->data = ep->start = ep->end = NULL;
319 ep->sz = ep->toksz = 0;
320 }
321
322 void
eqn_read(struct eqn_node * ep,const char * p)323 eqn_read(struct eqn_node *ep, const char *p)
324 {
325 char *cp;
326
327 if (ep->data == NULL) {
328 ep->sz = strlen(p);
329 ep->data = mandoc_strdup(p);
330 } else {
331 ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
332 free(ep->data);
333 ep->data = cp;
334 }
335 ep->sz += 1;
336 }
337
338 /*
339 * Find the key "key" of the give size within our eqn-defined values.
340 */
341 static struct eqn_def *
eqn_def_find(struct eqn_node * ep)342 eqn_def_find(struct eqn_node *ep)
343 {
344 int i;
345
346 for (i = 0; i < (int)ep->defsz; i++)
347 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
348 ep->defs[i].keysz, ep->start, ep->toksz))
349 return &ep->defs[i];
350
351 return NULL;
352 }
353
354 /*
355 * Parse a token from the input text. The modes are:
356 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
357 * before its next occurrence. Do not interpret the token in any
358 * way and return EQN_TOK_QUOTED. All other modes behave like
359 * MODE_QUOTED when *ep->start is '"'.
360 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
361 * otherwise, it ends before the next whitespace or brace.
362 * Do not interpret the token and return EQN_TOK__MAX.
363 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
364 * alias created with define. If it is an alias, replace it with
365 * its string value and reparse.
366 * MODE_TOK: Like MODE_SUB, but also check the token against the list
367 * of tokens, and if there is a match, return that token. Otherwise,
368 * if the token matches a symbol, return EQN_TOK_SYM; if it matches
369 * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for
370 * a token match, *ep->start is set to an allocated string that the
371 * caller is expected to free.
372 * All modes skip whitespace following the end of the token.
373 */
374 static enum eqn_tok
eqn_next(struct eqn_node * ep,enum parse_mode mode)375 eqn_next(struct eqn_node *ep, enum parse_mode mode)
376 {
377 struct eqn_def *def;
378 size_t start;
379 int diff, i, newlen, quoted;
380 enum eqn_tok tok;
381
382 /*
383 * Reset the recursion counter after advancing
384 * beyond the end of the rightmost substitution.
385 */
386 if (ep->end - ep->data >= ep->sublen)
387 ep->subcnt = 0;
388
389 ep->start = ep->end;
390 quoted = mode == MODE_QUOTED;
391 for (;;) {
392 switch (*ep->start) {
393 case '\0':
394 ep->toksz = 0;
395 return EQN_TOK_EOF;
396 case '"':
397 quoted = 1;
398 break;
399 case ' ':
400 case '\t':
401 case '~':
402 case '^':
403 if (quoted)
404 break;
405 ep->start++;
406 continue;
407 default:
408 break;
409 }
410 if (quoted) {
411 ep->end = strchr(ep->start + 1, *ep->start);
412 ep->start++; /* Skip opening quote. */
413 if (ep->end == NULL) {
414 mandoc_msg(MANDOCERR_ARG_QUOTE,
415 ep->node->line, ep->node->pos, NULL);
416 ep->end = strchr(ep->start, '\0');
417 }
418 } else {
419 ep->end = ep->start + 1;
420 if (*ep->start != '{' && *ep->start != '}')
421 ep->end += strcspn(ep->end, " ^~\"{}\t");
422 }
423 ep->toksz = ep->end - ep->start;
424 if (quoted && *ep->end != '\0')
425 ep->end++; /* Skip closing quote. */
426 while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
427 ep->end++;
428 if (quoted) /* Cannot return, may have to strndup. */
429 break;
430 if (mode == MODE_NOSUB)
431 return EQN_TOK__MAX;
432 if ((def = eqn_def_find(ep)) == NULL)
433 break;
434 if (++ep->subcnt > EQN_NEST_MAX) {
435 mandoc_msg(MANDOCERR_ROFFLOOP,
436 ep->node->line, ep->node->pos, NULL);
437 break;
438 }
439
440 /* Replace a defined name with its string value. */
441 if ((diff = def->valsz - ep->toksz) > 0) {
442 start = ep->start - ep->data;
443 ep->sz += diff;
444 ep->data = mandoc_realloc(ep->data, ep->sz + 1);
445 ep->start = ep->data + start;
446 ep->sublen += diff;
447 }
448 if (diff)
449 memmove(ep->start + def->valsz, ep->start + ep->toksz,
450 strlen(ep->start + ep->toksz) + 1);
451 memcpy(ep->start, def->val, def->valsz);
452 newlen = ep->start - ep->data + def->valsz;
453 if (ep->sublen < newlen)
454 ep->sublen = newlen;
455 }
456 if (mode != MODE_TOK)
457 return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
458 if (quoted) {
459 ep->start = mandoc_strndup(ep->start, ep->toksz);
460 return EQN_TOK_QUOTED;
461 }
462 for (tok = 0; tok < EQN_TOK__MAX; tok++)
463 if (STRNEQ(ep->start, ep->toksz,
464 eqn_toks[tok], strlen(eqn_toks[tok])))
465 return tok;
466
467 for (i = 0; i < EQNSYM__MAX; i++) {
468 if (STRNEQ(ep->start, ep->toksz,
469 eqnsyms[i].str, strlen(eqnsyms[i].str))) {
470 mandoc_asprintf(&ep->start,
471 "\\[%s]", eqnsyms[i].sym);
472 return EQN_TOK_SYM;
473 }
474 }
475 ep->start = mandoc_strndup(ep->start, ep->toksz);
476 for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
477 if (STRNEQ(ep->start, ep->toksz,
478 eqn_func[i], strlen(eqn_func[i])))
479 return EQN_TOK_FUNC;
480 return EQN_TOK__MAX;
481 }
482
483 void
eqn_box_free(struct eqn_box * bp)484 eqn_box_free(struct eqn_box *bp)
485 {
486 if (bp == NULL)
487 return;
488
489 if (bp->first)
490 eqn_box_free(bp->first);
491 if (bp->next)
492 eqn_box_free(bp->next);
493
494 free(bp->text);
495 free(bp->left);
496 free(bp->right);
497 free(bp->top);
498 free(bp->bottom);
499 free(bp);
500 }
501
502 struct eqn_box *
eqn_box_new(void)503 eqn_box_new(void)
504 {
505 struct eqn_box *bp;
506
507 bp = mandoc_calloc(1, sizeof(*bp));
508 bp->expectargs = UINT_MAX;
509 return bp;
510 }
511
512 /*
513 * Allocate a box as the last child of the parent node.
514 */
515 static struct eqn_box *
eqn_box_alloc(struct eqn_node * ep,struct eqn_box * parent)516 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
517 {
518 struct eqn_box *bp;
519
520 bp = eqn_box_new();
521 bp->parent = parent;
522 bp->parent->args++;
523 bp->font = bp->parent->font;
524 bp->size = ep->gsize;
525
526 if (NULL != parent->first) {
527 parent->last->next = bp;
528 bp->prev = parent->last;
529 } else
530 parent->first = bp;
531
532 parent->last = bp;
533 return bp;
534 }
535
536 /*
537 * Reparent the current last node (of the current parent) under a new
538 * EQN_SUBEXPR as the first element.
539 * Then return the new parent.
540 * The new EQN_SUBEXPR will have a two-child limit.
541 */
542 static struct eqn_box *
eqn_box_makebinary(struct eqn_node * ep,struct eqn_box * parent)543 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
544 {
545 struct eqn_box *b, *newb;
546
547 assert(NULL != parent->last);
548 b = parent->last;
549 if (parent->last == parent->first)
550 parent->first = NULL;
551 parent->args--;
552 parent->last = b->prev;
553 b->prev = NULL;
554 newb = eqn_box_alloc(ep, parent);
555 newb->type = EQN_SUBEXPR;
556 newb->expectargs = 2;
557 newb->args = 1;
558 newb->first = newb->last = b;
559 newb->first->next = NULL;
560 b->parent = newb;
561 return newb;
562 }
563
564 /*
565 * Parse the "delim" control statement.
566 */
567 static void
eqn_delim(struct eqn_node * ep)568 eqn_delim(struct eqn_node *ep)
569 {
570 if (ep->end[0] == '\0' || ep->end[1] == '\0') {
571 mandoc_msg(MANDOCERR_REQ_EMPTY,
572 ep->node->line, ep->node->pos, "delim");
573 if (ep->end[0] != '\0')
574 ep->end++;
575 } else if (strncmp(ep->end, "off", 3) == 0) {
576 ep->delim = 0;
577 ep->end += 3;
578 } else if (strncmp(ep->end, "on", 2) == 0) {
579 if (ep->odelim && ep->cdelim)
580 ep->delim = 1;
581 ep->end += 2;
582 } else {
583 ep->odelim = *ep->end++;
584 ep->cdelim = *ep->end++;
585 ep->delim = 1;
586 }
587 }
588
589 /*
590 * Undefine a previously-defined string.
591 */
592 static void
eqn_undef(struct eqn_node * ep)593 eqn_undef(struct eqn_node *ep)
594 {
595 struct eqn_def *def;
596
597 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
598 mandoc_msg(MANDOCERR_REQ_EMPTY,
599 ep->node->line, ep->node->pos, "undef");
600 return;
601 }
602 if ((def = eqn_def_find(ep)) == NULL)
603 return;
604 free(def->key);
605 free(def->val);
606 def->key = def->val = NULL;
607 def->keysz = def->valsz = 0;
608 }
609
610 static void
eqn_def(struct eqn_node * ep)611 eqn_def(struct eqn_node *ep)
612 {
613 struct eqn_def *def;
614 int i;
615
616 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
617 mandoc_msg(MANDOCERR_REQ_EMPTY,
618 ep->node->line, ep->node->pos, "define");
619 return;
620 }
621
622 /*
623 * Search for a key that already exists.
624 * Create a new key if none is found.
625 */
626 if ((def = eqn_def_find(ep)) == NULL) {
627 /* Find holes in string array. */
628 for (i = 0; i < (int)ep->defsz; i++)
629 if (0 == ep->defs[i].keysz)
630 break;
631
632 if (i == (int)ep->defsz) {
633 ep->defsz++;
634 ep->defs = mandoc_reallocarray(ep->defs,
635 ep->defsz, sizeof(struct eqn_def));
636 ep->defs[i].key = ep->defs[i].val = NULL;
637 }
638
639 def = ep->defs + i;
640 free(def->key);
641 def->key = mandoc_strndup(ep->start, ep->toksz);
642 def->keysz = ep->toksz;
643 }
644
645 if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
646 mandoc_msg(MANDOCERR_REQ_EMPTY,
647 ep->node->line, ep->node->pos, "define %s", def->key);
648 free(def->key);
649 free(def->val);
650 def->key = def->val = NULL;
651 def->keysz = def->valsz = 0;
652 return;
653 }
654 free(def->val);
655 def->val = mandoc_strndup(ep->start, ep->toksz);
656 def->valsz = ep->toksz;
657 }
658
659 void
eqn_parse(struct eqn_node * ep)660 eqn_parse(struct eqn_node *ep)
661 {
662 struct eqn_box *cur, *nbox, *parent, *split;
663 const char *cp, *cpn;
664 char *p;
665 enum eqn_tok tok;
666 enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
667 int size;
668
669 parent = ep->node->eqn;
670 assert(parent != NULL);
671
672 /*
673 * Empty equation.
674 * Do not add it to the high-level syntax tree.
675 */
676
677 if (ep->data == NULL)
678 return;
679
680 ep->start = ep->end = ep->data;
681 ep->sublen = 0;
682 ep->subcnt = 0;
683
684 next_tok:
685 tok = eqn_next(ep, MODE_TOK);
686 switch (tok) {
687 case EQN_TOK_UNDEF:
688 eqn_undef(ep);
689 break;
690 case EQN_TOK_NDEFINE:
691 case EQN_TOK_DEFINE:
692 eqn_def(ep);
693 break;
694 case EQN_TOK_TDEFINE:
695 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
696 eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
697 mandoc_msg(MANDOCERR_REQ_EMPTY,
698 ep->node->line, ep->node->pos, "tdefine");
699 break;
700 case EQN_TOK_DELIM:
701 eqn_delim(ep);
702 break;
703 case EQN_TOK_GFONT:
704 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
705 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
706 ep->node->pos, "%s", eqn_toks[tok]);
707 break;
708 case EQN_TOK_MARK:
709 case EQN_TOK_LINEUP:
710 /* Ignore these. */
711 break;
712 case EQN_TOK_DYAD:
713 case EQN_TOK_VEC:
714 case EQN_TOK_UNDER:
715 case EQN_TOK_BAR:
716 case EQN_TOK_TILDE:
717 case EQN_TOK_HAT:
718 case EQN_TOK_DOT:
719 case EQN_TOK_DOTDOT:
720 if (parent->last == NULL) {
721 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
722 ep->node->pos, "%s", eqn_toks[tok]);
723 cur = eqn_box_alloc(ep, parent);
724 cur->type = EQN_TEXT;
725 cur->text = mandoc_strdup("");
726 }
727 parent = eqn_box_makebinary(ep, parent);
728 parent->type = EQN_LIST;
729 parent->expectargs = 1;
730 parent->font = EQNFONT_ROMAN;
731 switch (tok) {
732 case EQN_TOK_DOTDOT:
733 parent->top = mandoc_strdup("\\[ad]");
734 break;
735 case EQN_TOK_VEC:
736 parent->top = mandoc_strdup("\\[->]");
737 break;
738 case EQN_TOK_DYAD:
739 parent->top = mandoc_strdup("\\[<>]");
740 break;
741 case EQN_TOK_TILDE:
742 parent->top = mandoc_strdup("\\[a~]");
743 break;
744 case EQN_TOK_UNDER:
745 parent->bottom = mandoc_strdup("\\[ul]");
746 break;
747 case EQN_TOK_BAR:
748 parent->top = mandoc_strdup("\\[rn]");
749 break;
750 case EQN_TOK_DOT:
751 parent->top = mandoc_strdup("\\[a.]");
752 break;
753 case EQN_TOK_HAT:
754 parent->top = mandoc_strdup("\\[ha]");
755 break;
756 default:
757 abort();
758 }
759 parent = parent->parent;
760 break;
761 case EQN_TOK_FWD:
762 case EQN_TOK_BACK:
763 case EQN_TOK_DOWN:
764 case EQN_TOK_UP:
765 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
766 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
767 ep->node->pos, "%s", eqn_toks[tok]);
768 break;
769 case EQN_TOK_FAT:
770 case EQN_TOK_ROMAN:
771 case EQN_TOK_ITALIC:
772 case EQN_TOK_BOLD:
773 while (parent->args == parent->expectargs)
774 parent = parent->parent;
775 /*
776 * These values apply to the next word or sequence of
777 * words; thus, we mark that we'll have a child with
778 * exactly one of those.
779 */
780 parent = eqn_box_alloc(ep, parent);
781 parent->type = EQN_LIST;
782 parent->expectargs = 1;
783 switch (tok) {
784 case EQN_TOK_FAT:
785 parent->font = EQNFONT_FAT;
786 break;
787 case EQN_TOK_ROMAN:
788 parent->font = EQNFONT_ROMAN;
789 break;
790 case EQN_TOK_ITALIC:
791 parent->font = EQNFONT_ITALIC;
792 break;
793 case EQN_TOK_BOLD:
794 parent->font = EQNFONT_BOLD;
795 break;
796 default:
797 abort();
798 }
799 break;
800 case EQN_TOK_SIZE:
801 case EQN_TOK_GSIZE:
802 /* Accept two values: integral size and a single. */
803 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
804 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
805 ep->node->pos, "%s", eqn_toks[tok]);
806 break;
807 }
808 size = mandoc_strntoi(ep->start, ep->toksz, 10);
809 if (-1 == size) {
810 mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
811 ep->node->pos, "%s", eqn_toks[tok]);
812 break;
813 }
814 if (EQN_TOK_GSIZE == tok) {
815 ep->gsize = size;
816 break;
817 }
818 while (parent->args == parent->expectargs)
819 parent = parent->parent;
820 parent = eqn_box_alloc(ep, parent);
821 parent->type = EQN_LIST;
822 parent->expectargs = 1;
823 parent->size = size;
824 break;
825 case EQN_TOK_FROM:
826 case EQN_TOK_TO:
827 case EQN_TOK_SUB:
828 case EQN_TOK_SUP:
829 /*
830 * We have a left-right-associative expression.
831 * Repivot under a positional node, open a child scope
832 * and keep on reading.
833 */
834 if (parent->last == NULL) {
835 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
836 ep->node->pos, "%s", eqn_toks[tok]);
837 cur = eqn_box_alloc(ep, parent);
838 cur->type = EQN_TEXT;
839 cur->text = mandoc_strdup("");
840 }
841 while (parent->expectargs == 1 && parent->args == 1)
842 parent = parent->parent;
843 if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO) {
844 for (cur = parent; cur != NULL; cur = cur->parent)
845 if (cur->pos == EQNPOS_SUB ||
846 cur->pos == EQNPOS_SUP ||
847 cur->pos == EQNPOS_SUBSUP ||
848 cur->pos == EQNPOS_SQRT ||
849 cur->pos == EQNPOS_OVER)
850 break;
851 if (cur != NULL)
852 parent = cur->parent;
853 }
854 if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
855 parent->expectargs = 3;
856 parent->pos = EQNPOS_SUBSUP;
857 break;
858 }
859 if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
860 parent->expectargs = 3;
861 parent->pos = EQNPOS_FROMTO;
862 break;
863 }
864 parent = eqn_box_makebinary(ep, parent);
865 switch (tok) {
866 case EQN_TOK_FROM:
867 parent->pos = EQNPOS_FROM;
868 break;
869 case EQN_TOK_TO:
870 parent->pos = EQNPOS_TO;
871 break;
872 case EQN_TOK_SUP:
873 parent->pos = EQNPOS_SUP;
874 break;
875 case EQN_TOK_SUB:
876 parent->pos = EQNPOS_SUB;
877 break;
878 default:
879 abort();
880 }
881 break;
882 case EQN_TOK_SQRT:
883 while (parent->args == parent->expectargs)
884 parent = parent->parent;
885 /*
886 * Accept a left-right-associative set of arguments just
887 * like sub and sup and friends but without rebalancing
888 * under a pivot.
889 */
890 parent = eqn_box_alloc(ep, parent);
891 parent->type = EQN_SUBEXPR;
892 parent->pos = EQNPOS_SQRT;
893 parent->expectargs = 1;
894 break;
895 case EQN_TOK_OVER:
896 /*
897 * We have a right-left-associative fraction.
898 * Close out anything that's currently open, then
899 * rebalance and continue reading.
900 */
901 if (parent->last == NULL) {
902 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
903 ep->node->pos, "%s", eqn_toks[tok]);
904 cur = eqn_box_alloc(ep, parent);
905 cur->type = EQN_TEXT;
906 cur->text = mandoc_strdup("");
907 }
908 while (parent->args == parent->expectargs)
909 parent = parent->parent;
910 while (EQN_SUBEXPR == parent->type)
911 parent = parent->parent;
912 parent = eqn_box_makebinary(ep, parent);
913 parent->pos = EQNPOS_OVER;
914 break;
915 case EQN_TOK_RIGHT:
916 case EQN_TOK_BRACE_CLOSE:
917 /*
918 * Close out the existing brace.
919 * FIXME: this is a shitty sentinel: we should really
920 * have a native EQN_BRACE type or whatnot.
921 */
922 for (cur = parent; cur != NULL; cur = cur->parent)
923 if (cur->type == EQN_LIST &&
924 cur->expectargs > 1 &&
925 (tok == EQN_TOK_BRACE_CLOSE ||
926 cur->left != NULL))
927 break;
928 if (cur == NULL) {
929 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
930 ep->node->pos, "%s", eqn_toks[tok]);
931 break;
932 }
933 parent = cur;
934 if (EQN_TOK_RIGHT == tok) {
935 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
936 mandoc_msg(MANDOCERR_REQ_EMPTY,
937 ep->node->line, ep->node->pos,
938 "%s", eqn_toks[tok]);
939 break;
940 }
941 /* Handling depends on right/left. */
942 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
943 parent->right = mandoc_strdup("\\[rc]");
944 else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
945 parent->right = mandoc_strdup("\\[rf]");
946 else
947 parent->right =
948 mandoc_strndup(ep->start, ep->toksz);
949 }
950 parent = parent->parent;
951 if (tok == EQN_TOK_BRACE_CLOSE &&
952 (parent->type == EQN_PILE ||
953 parent->type == EQN_MATRIX))
954 parent = parent->parent;
955 /* Close out any "singleton" lists. */
956 while (parent->type == EQN_LIST &&
957 parent->expectargs == 1 &&
958 parent->args == 1)
959 parent = parent->parent;
960 break;
961 case EQN_TOK_BRACE_OPEN:
962 case EQN_TOK_LEFT:
963 /*
964 * If we already have something in the stack and we're
965 * in an expression, then rewind til we're not any more
966 * (just like with the text node).
967 */
968 while (parent->args == parent->expectargs)
969 parent = parent->parent;
970 if (EQN_TOK_LEFT == tok &&
971 eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
972 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
973 ep->node->pos, "%s", eqn_toks[tok]);
974 break;
975 }
976 parent = eqn_box_alloc(ep, parent);
977 parent->type = EQN_LIST;
978 if (EQN_TOK_LEFT == tok) {
979 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
980 parent->left = mandoc_strdup("\\[lc]");
981 else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
982 parent->left = mandoc_strdup("\\[lf]");
983 else
984 parent->left =
985 mandoc_strndup(ep->start, ep->toksz);
986 }
987 break;
988 case EQN_TOK_PILE:
989 case EQN_TOK_LPILE:
990 case EQN_TOK_RPILE:
991 case EQN_TOK_CPILE:
992 case EQN_TOK_CCOL:
993 case EQN_TOK_LCOL:
994 case EQN_TOK_RCOL:
995 while (parent->args == parent->expectargs)
996 parent = parent->parent;
997 parent = eqn_box_alloc(ep, parent);
998 parent->type = EQN_PILE;
999 parent->expectargs = 1;
1000 break;
1001 case EQN_TOK_ABOVE:
1002 for (cur = parent; cur != NULL; cur = cur->parent)
1003 if (cur->type == EQN_PILE)
1004 break;
1005 if (cur == NULL) {
1006 mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
1007 ep->node->pos, "%s", eqn_toks[tok]);
1008 break;
1009 }
1010 parent = eqn_box_alloc(ep, cur);
1011 parent->type = EQN_LIST;
1012 break;
1013 case EQN_TOK_MATRIX:
1014 while (parent->args == parent->expectargs)
1015 parent = parent->parent;
1016 parent = eqn_box_alloc(ep, parent);
1017 parent->type = EQN_MATRIX;
1018 parent->expectargs = 1;
1019 break;
1020 case EQN_TOK_EOF:
1021 return;
1022 case EQN_TOK__MAX:
1023 case EQN_TOK_FUNC:
1024 case EQN_TOK_QUOTED:
1025 case EQN_TOK_SYM:
1026 p = ep->start;
1027 assert(p != NULL);
1028 /*
1029 * If we already have something in the stack and we're
1030 * in an expression, then rewind til we're not any more.
1031 */
1032 while (parent->args == parent->expectargs)
1033 parent = parent->parent;
1034 cur = eqn_box_alloc(ep, parent);
1035 cur->type = EQN_TEXT;
1036 cur->text = p;
1037 switch (tok) {
1038 case EQN_TOK_FUNC:
1039 cur->font = EQNFONT_ROMAN;
1040 break;
1041 case EQN_TOK_QUOTED:
1042 if (cur->font == EQNFONT_NONE)
1043 cur->font = EQNFONT_ITALIC;
1044 break;
1045 case EQN_TOK_SYM:
1046 break;
1047 default:
1048 if (cur->font != EQNFONT_NONE || *p == '\0')
1049 break;
1050 cpn = p - 1;
1051 ccln = CCL_LET;
1052 split = NULL;
1053 for (;;) {
1054 /* Advance to next character. */
1055 cp = cpn++;
1056 ccl = ccln;
1057 ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1058 isdigit((unsigned char)*cpn) ||
1059 (*cpn == '.' && (ccl == CCL_DIG ||
1060 isdigit((unsigned char)cpn[1]))) ?
1061 CCL_DIG : CCL_PUN;
1062 /* No boundary before first character. */
1063 if (cp < p)
1064 continue;
1065 cur->font = ccl == CCL_LET ?
1066 EQNFONT_ITALIC : EQNFONT_ROMAN;
1067 if (*cp == '\\')
1068 mandoc_escape(&cpn, NULL, NULL);
1069 /* No boundary after last character. */
1070 if (*cpn == '\0')
1071 break;
1072 if (ccln == ccl && *cp != ',' && *cpn != ',')
1073 continue;
1074 /* Boundary found, split the text. */
1075 if (parent->args == parent->expectargs) {
1076 /* Remove the text from the tree. */
1077 if (cur->prev == NULL)
1078 parent->first = cur->next;
1079 else
1080 cur->prev->next = NULL;
1081 parent->last = cur->prev;
1082 parent->args--;
1083 /* Set up a list instead. */
1084 split = eqn_box_alloc(ep, parent);
1085 split->type = EQN_LIST;
1086 /* Insert the word into the list. */
1087 split->first = split->last = cur;
1088 cur->parent = split;
1089 cur->prev = NULL;
1090 parent = split;
1091 }
1092 /* Append a new text box. */
1093 nbox = eqn_box_alloc(ep, parent);
1094 nbox->type = EQN_TEXT;
1095 nbox->text = mandoc_strdup(cpn);
1096 /* Truncate the old box. */
1097 p = mandoc_strndup(cur->text,
1098 cpn - cur->text);
1099 free(cur->text);
1100 cur->text = p;
1101 /* Setup to process the new box. */
1102 cur = nbox;
1103 p = nbox->text;
1104 cpn = p - 1;
1105 ccln = CCL_LET;
1106 }
1107 if (split != NULL)
1108 parent = split->parent;
1109 break;
1110 }
1111 break;
1112 default:
1113 abort();
1114 }
1115 goto next_tok;
1116 }
1117
1118 void
eqn_free(struct eqn_node * p)1119 eqn_free(struct eqn_node *p)
1120 {
1121 int i;
1122
1123 if (p == NULL)
1124 return;
1125
1126 for (i = 0; i < (int)p->defsz; i++) {
1127 free(p->defs[i].key);
1128 free(p->defs[i].val);
1129 }
1130
1131 free(p->data);
1132 free(p->defs);
1133 free(p);
1134 }
1135