xref: /openbsd/usr.bin/mandoc/eqn.c (revision d9a51c35)
1 /* $OpenBSD: eqn.c,v 1.49 2022/12/26 19:16:02 jmc Exp $ */
2 /*
3  * Copyright (c) 2014, 2015, 2017, 2018, 2020, 2022
4  *               Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/types.h>
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <time.h>
28 
29 #include "mandoc_aux.h"
30 #include "mandoc.h"
31 #include "roff.h"
32 #include "eqn.h"
33 #include "libmandoc.h"
34 #include "eqn_parse.h"
35 
36 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
37 #define	STRNEQ(p1, sz1, p2, sz2) \
38 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
39 
40 enum	eqn_tok {
41 	EQN_TOK_DYAD = 0,
42 	EQN_TOK_VEC,
43 	EQN_TOK_UNDER,
44 	EQN_TOK_BAR,
45 	EQN_TOK_TILDE,
46 	EQN_TOK_HAT,
47 	EQN_TOK_DOT,
48 	EQN_TOK_DOTDOT,
49 	EQN_TOK_FWD,
50 	EQN_TOK_BACK,
51 	EQN_TOK_DOWN,
52 	EQN_TOK_UP,
53 	EQN_TOK_FAT,
54 	EQN_TOK_ROMAN,
55 	EQN_TOK_ITALIC,
56 	EQN_TOK_BOLD,
57 	EQN_TOK_SIZE,
58 	EQN_TOK_SUB,
59 	EQN_TOK_SUP,
60 	EQN_TOK_SQRT,
61 	EQN_TOK_OVER,
62 	EQN_TOK_FROM,
63 	EQN_TOK_TO,
64 	EQN_TOK_BRACE_OPEN,
65 	EQN_TOK_BRACE_CLOSE,
66 	EQN_TOK_GSIZE,
67 	EQN_TOK_GFONT,
68 	EQN_TOK_MARK,
69 	EQN_TOK_LINEUP,
70 	EQN_TOK_LEFT,
71 	EQN_TOK_RIGHT,
72 	EQN_TOK_PILE,
73 	EQN_TOK_LPILE,
74 	EQN_TOK_RPILE,
75 	EQN_TOK_CPILE,
76 	EQN_TOK_MATRIX,
77 	EQN_TOK_CCOL,
78 	EQN_TOK_LCOL,
79 	EQN_TOK_RCOL,
80 	EQN_TOK_DELIM,
81 	EQN_TOK_DEFINE,
82 	EQN_TOK_TDEFINE,
83 	EQN_TOK_NDEFINE,
84 	EQN_TOK_UNDEF,
85 	EQN_TOK_ABOVE,
86 	EQN_TOK__MAX,
87 	EQN_TOK_FUNC,
88 	EQN_TOK_QUOTED,
89 	EQN_TOK_SYM,
90 	EQN_TOK_EOF
91 };
92 
93 static	const char *eqn_toks[EQN_TOK__MAX] = {
94 	"dyad", /* EQN_TOK_DYAD */
95 	"vec", /* EQN_TOK_VEC */
96 	"under", /* EQN_TOK_UNDER */
97 	"bar", /* EQN_TOK_BAR */
98 	"tilde", /* EQN_TOK_TILDE */
99 	"hat", /* EQN_TOK_HAT */
100 	"dot", /* EQN_TOK_DOT */
101 	"dotdot", /* EQN_TOK_DOTDOT */
102 	"fwd", /* EQN_TOK_FWD * */
103 	"back", /* EQN_TOK_BACK */
104 	"down", /* EQN_TOK_DOWN */
105 	"up", /* EQN_TOK_UP */
106 	"fat", /* EQN_TOK_FAT */
107 	"roman", /* EQN_TOK_ROMAN */
108 	"italic", /* EQN_TOK_ITALIC */
109 	"bold", /* EQN_TOK_BOLD */
110 	"size", /* EQN_TOK_SIZE */
111 	"sub", /* EQN_TOK_SUB */
112 	"sup", /* EQN_TOK_SUP */
113 	"sqrt", /* EQN_TOK_SQRT */
114 	"over", /* EQN_TOK_OVER */
115 	"from", /* EQN_TOK_FROM */
116 	"to", /* EQN_TOK_TO */
117 	"{", /* EQN_TOK_BRACE_OPEN */
118 	"}", /* EQN_TOK_BRACE_CLOSE */
119 	"gsize", /* EQN_TOK_GSIZE */
120 	"gfont", /* EQN_TOK_GFONT */
121 	"mark", /* EQN_TOK_MARK */
122 	"lineup", /* EQN_TOK_LINEUP */
123 	"left", /* EQN_TOK_LEFT */
124 	"right", /* EQN_TOK_RIGHT */
125 	"pile", /* EQN_TOK_PILE */
126 	"lpile", /* EQN_TOK_LPILE */
127 	"rpile", /* EQN_TOK_RPILE */
128 	"cpile", /* EQN_TOK_CPILE */
129 	"matrix", /* EQN_TOK_MATRIX */
130 	"ccol", /* EQN_TOK_CCOL */
131 	"lcol", /* EQN_TOK_LCOL */
132 	"rcol", /* EQN_TOK_RCOL */
133 	"delim", /* EQN_TOK_DELIM */
134 	"define", /* EQN_TOK_DEFINE */
135 	"tdefine", /* EQN_TOK_TDEFINE */
136 	"ndefine", /* EQN_TOK_NDEFINE */
137 	"undef", /* EQN_TOK_UNDEF */
138 	"above", /* EQN_TOK_ABOVE */
139 };
140 
141 static	const char *const eqn_func[] = {
142 	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
143 	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
144 	"if",	"lim",	"ln",	"log",	"max",	"min",
145 	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
146 };
147 
148 enum	eqn_symt {
149 	EQNSYM_alpha = 0,
150 	EQNSYM_beta,
151 	EQNSYM_chi,
152 	EQNSYM_delta,
153 	EQNSYM_epsilon,
154 	EQNSYM_eta,
155 	EQNSYM_gamma,
156 	EQNSYM_iota,
157 	EQNSYM_kappa,
158 	EQNSYM_lambda,
159 	EQNSYM_mu,
160 	EQNSYM_nu,
161 	EQNSYM_omega,
162 	EQNSYM_omicron,
163 	EQNSYM_phi,
164 	EQNSYM_pi,
165 	EQNSYM_ps,
166 	EQNSYM_rho,
167 	EQNSYM_sigma,
168 	EQNSYM_tau,
169 	EQNSYM_theta,
170 	EQNSYM_upsilon,
171 	EQNSYM_xi,
172 	EQNSYM_zeta,
173 	EQNSYM_DELTA,
174 	EQNSYM_GAMMA,
175 	EQNSYM_LAMBDA,
176 	EQNSYM_OMEGA,
177 	EQNSYM_PHI,
178 	EQNSYM_PI,
179 	EQNSYM_PSI,
180 	EQNSYM_SIGMA,
181 	EQNSYM_THETA,
182 	EQNSYM_UPSILON,
183 	EQNSYM_XI,
184 	EQNSYM_inter,
185 	EQNSYM_union,
186 	EQNSYM_prod,
187 	EQNSYM_int,
188 	EQNSYM_sum,
189 	EQNSYM_grad,
190 	EQNSYM_del,
191 	EQNSYM_times,
192 	EQNSYM_cdot,
193 	EQNSYM_nothing,
194 	EQNSYM_approx,
195 	EQNSYM_prime,
196 	EQNSYM_half,
197 	EQNSYM_partial,
198 	EQNSYM_inf,
199 	EQNSYM_muchgreat,
200 	EQNSYM_muchless,
201 	EQNSYM_larrow,
202 	EQNSYM_rarrow,
203 	EQNSYM_pm,
204 	EQNSYM_nequal,
205 	EQNSYM_equiv,
206 	EQNSYM_lessequal,
207 	EQNSYM_moreequal,
208 	EQNSYM_minus,
209 	EQNSYM__MAX
210 };
211 
212 struct	eqnsym {
213 	const char	*str;
214 	const char	*sym;
215 };
216 
217 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
218 	{ "alpha", "*a" }, /* EQNSYM_alpha */
219 	{ "beta", "*b" }, /* EQNSYM_beta */
220 	{ "chi", "*x" }, /* EQNSYM_chi */
221 	{ "delta", "*d" }, /* EQNSYM_delta */
222 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
223 	{ "eta", "*y" }, /* EQNSYM_eta */
224 	{ "gamma", "*g" }, /* EQNSYM_gamma */
225 	{ "iota", "*i" }, /* EQNSYM_iota */
226 	{ "kappa", "*k" }, /* EQNSYM_kappa */
227 	{ "lambda", "*l" }, /* EQNSYM_lambda */
228 	{ "mu", "*m" }, /* EQNSYM_mu */
229 	{ "nu", "*n" }, /* EQNSYM_nu */
230 	{ "omega", "*w" }, /* EQNSYM_omega */
231 	{ "omicron", "*o" }, /* EQNSYM_omicron */
232 	{ "phi", "*f" }, /* EQNSYM_phi */
233 	{ "pi", "*p" }, /* EQNSYM_pi */
234 	{ "psi", "*q" }, /* EQNSYM_psi */
235 	{ "rho", "*r" }, /* EQNSYM_rho */
236 	{ "sigma", "*s" }, /* EQNSYM_sigma */
237 	{ "tau", "*t" }, /* EQNSYM_tau */
238 	{ "theta", "*h" }, /* EQNSYM_theta */
239 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
240 	{ "xi", "*c" }, /* EQNSYM_xi */
241 	{ "zeta", "*z" }, /* EQNSYM_zeta */
242 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
243 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
244 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
245 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
246 	{ "PHI", "*F" }, /* EQNSYM_PHI */
247 	{ "PI", "*P" }, /* EQNSYM_PI */
248 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
249 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
250 	{ "THETA", "*H" }, /* EQNSYM_THETA */
251 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
252 	{ "XI", "*C" }, /* EQNSYM_XI */
253 	{ "inter", "ca" }, /* EQNSYM_inter */
254 	{ "union", "cu" }, /* EQNSYM_union */
255 	{ "prod", "product" }, /* EQNSYM_prod */
256 	{ "int", "integral" }, /* EQNSYM_int */
257 	{ "sum", "sum" }, /* EQNSYM_sum */
258 	{ "grad", "gr" }, /* EQNSYM_grad */
259 	{ "del", "gr" }, /* EQNSYM_del */
260 	{ "times", "mu" }, /* EQNSYM_times */
261 	{ "cdot", "pc" }, /* EQNSYM_cdot */
262 	{ "nothing", "&" }, /* EQNSYM_nothing */
263 	{ "approx", "~~" }, /* EQNSYM_approx */
264 	{ "prime", "fm" }, /* EQNSYM_prime */
265 	{ "half", "12" }, /* EQNSYM_half */
266 	{ "partial", "pd" }, /* EQNSYM_partial */
267 	{ "inf", "if" }, /* EQNSYM_inf */
268 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
269 	{ "<<", "<<" }, /* EQNSYM_muchless */
270 	{ "<-", "<-" }, /* EQNSYM_larrow */
271 	{ "->", "->" }, /* EQNSYM_rarrow */
272 	{ "+-", "+-" }, /* EQNSYM_pm */
273 	{ "!=", "!=" }, /* EQNSYM_nequal */
274 	{ "==", "==" }, /* EQNSYM_equiv */
275 	{ "<=", "<=" }, /* EQNSYM_lessequal */
276 	{ ">=", ">=" }, /* EQNSYM_moreequal */
277 	{ "-", "mi" }, /* EQNSYM_minus */
278 };
279 
280 enum	parse_mode {
281 	MODE_QUOTED,
282 	MODE_NOSUB,
283 	MODE_SUB,
284 	MODE_TOK
285 };
286 
287 struct	eqn_def {
288 	char		 *key;
289 	size_t		  keysz;
290 	char		 *val;
291 	size_t		  valsz;
292 };
293 
294 static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
295 static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
296 				struct eqn_box *);
297 static	void		 eqn_def(struct eqn_node *);
298 static	struct eqn_def	*eqn_def_find(struct eqn_node *);
299 static	void		 eqn_delim(struct eqn_node *);
300 static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
301 static	void		 eqn_undef(struct eqn_node *);
302 
303 
304 struct eqn_node *
eqn_alloc(void)305 eqn_alloc(void)
306 {
307 	struct eqn_node *ep;
308 
309 	ep = mandoc_calloc(1, sizeof(*ep));
310 	ep->gsize = EQN_DEFSIZE;
311 	return ep;
312 }
313 
314 void
eqn_reset(struct eqn_node * ep)315 eqn_reset(struct eqn_node *ep)
316 {
317 	free(ep->data);
318 	ep->data = ep->start = ep->end = NULL;
319 	ep->sz = ep->toksz = 0;
320 }
321 
322 void
eqn_read(struct eqn_node * ep,const char * p)323 eqn_read(struct eqn_node *ep, const char *p)
324 {
325 	char		*cp;
326 
327 	if (ep->data == NULL) {
328 		ep->sz = strlen(p);
329 		ep->data = mandoc_strdup(p);
330 	} else {
331 		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
332 		free(ep->data);
333 		ep->data = cp;
334 	}
335 	ep->sz += 1;
336 }
337 
338 /*
339  * Find the key "key" of the give size within our eqn-defined values.
340  */
341 static struct eqn_def *
eqn_def_find(struct eqn_node * ep)342 eqn_def_find(struct eqn_node *ep)
343 {
344 	int		 i;
345 
346 	for (i = 0; i < (int)ep->defsz; i++)
347 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
348 		    ep->defs[i].keysz, ep->start, ep->toksz))
349 			return &ep->defs[i];
350 
351 	return NULL;
352 }
353 
354 /*
355  * Parse a token from the input text.  The modes are:
356  * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
357  *   before its next occurrence.  Do not interpret the token in any
358  *   way and return EQN_TOK_QUOTED.  All other modes behave like
359  *   MODE_QUOTED when *ep->start is '"'.
360  * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
361  *   otherwise, it ends before the next whitespace or brace.
362  *   Do not interpret the token and return EQN_TOK__MAX.
363  * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
364  *   alias created with define.  If it is an alias, replace it with
365  *   its string value and reparse.
366  * MODE_TOK: Like MODE_SUB, but also check the token against the list
367  *   of tokens, and if there is a match, return that token.  Otherwise,
368  *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
369  *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
370  *   a token match, *ep->start is set to an allocated string that the
371  *   caller is expected to free.
372  * All modes skip whitespace following the end of the token.
373  */
374 static enum eqn_tok
eqn_next(struct eqn_node * ep,enum parse_mode mode)375 eqn_next(struct eqn_node *ep, enum parse_mode mode)
376 {
377 	struct eqn_def	*def;
378 	size_t		 start;
379 	int		 diff, i, newlen, quoted;
380 	enum eqn_tok	 tok;
381 
382 	/*
383 	 * Reset the recursion counter after advancing
384 	 * beyond the end of the rightmost substitution.
385 	 */
386 	if (ep->end - ep->data >= ep->sublen)
387 		ep->subcnt = 0;
388 
389 	ep->start = ep->end;
390 	quoted = mode == MODE_QUOTED;
391 	for (;;) {
392 		switch (*ep->start) {
393 		case '\0':
394 			ep->toksz = 0;
395 			return EQN_TOK_EOF;
396 		case '"':
397 			quoted = 1;
398 			break;
399 		case ' ':
400 		case '\t':
401 		case '~':
402 		case '^':
403 			if (quoted)
404 				break;
405 			ep->start++;
406 			continue;
407 		default:
408 			break;
409 		}
410 		if (quoted) {
411 			ep->end = strchr(ep->start + 1, *ep->start);
412 			ep->start++;  /* Skip opening quote. */
413 			if (ep->end == NULL) {
414 				mandoc_msg(MANDOCERR_ARG_QUOTE,
415 				    ep->node->line, ep->node->pos, NULL);
416 				ep->end = strchr(ep->start, '\0');
417 			}
418 		} else {
419 			ep->end = ep->start + 1;
420 			if (*ep->start != '{' && *ep->start != '}')
421 				ep->end += strcspn(ep->end, " ^~\"{}\t");
422 		}
423 		ep->toksz = ep->end - ep->start;
424 		if (quoted && *ep->end != '\0')
425 			ep->end++;  /* Skip closing quote. */
426 		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
427 			ep->end++;
428 		if (quoted)  /* Cannot return, may have to strndup. */
429 			break;
430 		if (mode == MODE_NOSUB)
431 			return EQN_TOK__MAX;
432 		if ((def = eqn_def_find(ep)) == NULL)
433 			break;
434 		if (++ep->subcnt > EQN_NEST_MAX) {
435 			mandoc_msg(MANDOCERR_ROFFLOOP,
436 			    ep->node->line, ep->node->pos, NULL);
437 			break;
438 		}
439 
440 		/* Replace a defined name with its string value. */
441 		if ((diff = def->valsz - ep->toksz) > 0) {
442 			start = ep->start - ep->data;
443 			ep->sz += diff;
444 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
445 			ep->start = ep->data + start;
446 			ep->sublen += diff;
447 		}
448 		if (diff)
449 			memmove(ep->start + def->valsz, ep->start + ep->toksz,
450 			    strlen(ep->start + ep->toksz) + 1);
451 		memcpy(ep->start, def->val, def->valsz);
452 		newlen = ep->start - ep->data + def->valsz;
453 		if (ep->sublen < newlen)
454 			ep->sublen = newlen;
455 	}
456 	if (mode != MODE_TOK)
457 		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
458 	if (quoted) {
459 		ep->start = mandoc_strndup(ep->start, ep->toksz);
460 		return EQN_TOK_QUOTED;
461 	}
462 	for (tok = 0; tok < EQN_TOK__MAX; tok++)
463 		if (STRNEQ(ep->start, ep->toksz,
464 		    eqn_toks[tok], strlen(eqn_toks[tok])))
465 			return tok;
466 
467 	for (i = 0; i < EQNSYM__MAX; i++) {
468 		if (STRNEQ(ep->start, ep->toksz,
469 		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
470 			mandoc_asprintf(&ep->start,
471 			    "\\[%s]", eqnsyms[i].sym);
472 			return EQN_TOK_SYM;
473 		}
474 	}
475 	ep->start = mandoc_strndup(ep->start, ep->toksz);
476 	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
477 		if (STRNEQ(ep->start, ep->toksz,
478 		    eqn_func[i], strlen(eqn_func[i])))
479 			return EQN_TOK_FUNC;
480 	return EQN_TOK__MAX;
481 }
482 
483 void
eqn_box_free(struct eqn_box * bp)484 eqn_box_free(struct eqn_box *bp)
485 {
486 	if (bp == NULL)
487 		return;
488 
489 	if (bp->first)
490 		eqn_box_free(bp->first);
491 	if (bp->next)
492 		eqn_box_free(bp->next);
493 
494 	free(bp->text);
495 	free(bp->left);
496 	free(bp->right);
497 	free(bp->top);
498 	free(bp->bottom);
499 	free(bp);
500 }
501 
502 struct eqn_box *
eqn_box_new(void)503 eqn_box_new(void)
504 {
505 	struct eqn_box	*bp;
506 
507 	bp = mandoc_calloc(1, sizeof(*bp));
508 	bp->expectargs = UINT_MAX;
509 	return bp;
510 }
511 
512 /*
513  * Allocate a box as the last child of the parent node.
514  */
515 static struct eqn_box *
eqn_box_alloc(struct eqn_node * ep,struct eqn_box * parent)516 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
517 {
518 	struct eqn_box	*bp;
519 
520 	bp = eqn_box_new();
521 	bp->parent = parent;
522 	bp->parent->args++;
523 	bp->font = bp->parent->font;
524 	bp->size = ep->gsize;
525 
526 	if (NULL != parent->first) {
527 		parent->last->next = bp;
528 		bp->prev = parent->last;
529 	} else
530 		parent->first = bp;
531 
532 	parent->last = bp;
533 	return bp;
534 }
535 
536 /*
537  * Reparent the current last node (of the current parent) under a new
538  * EQN_SUBEXPR as the first element.
539  * Then return the new parent.
540  * The new EQN_SUBEXPR will have a two-child limit.
541  */
542 static struct eqn_box *
eqn_box_makebinary(struct eqn_node * ep,struct eqn_box * parent)543 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
544 {
545 	struct eqn_box	*b, *newb;
546 
547 	assert(NULL != parent->last);
548 	b = parent->last;
549 	if (parent->last == parent->first)
550 		parent->first = NULL;
551 	parent->args--;
552 	parent->last = b->prev;
553 	b->prev = NULL;
554 	newb = eqn_box_alloc(ep, parent);
555 	newb->type = EQN_SUBEXPR;
556 	newb->expectargs = 2;
557 	newb->args = 1;
558 	newb->first = newb->last = b;
559 	newb->first->next = NULL;
560 	b->parent = newb;
561 	return newb;
562 }
563 
564 /*
565  * Parse the "delim" control statement.
566  */
567 static void
eqn_delim(struct eqn_node * ep)568 eqn_delim(struct eqn_node *ep)
569 {
570 	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
571 		mandoc_msg(MANDOCERR_REQ_EMPTY,
572 		    ep->node->line, ep->node->pos, "delim");
573 		if (ep->end[0] != '\0')
574 			ep->end++;
575 	} else if (strncmp(ep->end, "off", 3) == 0) {
576 		ep->delim = 0;
577 		ep->end += 3;
578 	} else if (strncmp(ep->end, "on", 2) == 0) {
579 		if (ep->odelim && ep->cdelim)
580 			ep->delim = 1;
581 		ep->end += 2;
582 	} else {
583 		ep->odelim = *ep->end++;
584 		ep->cdelim = *ep->end++;
585 		ep->delim = 1;
586 	}
587 }
588 
589 /*
590  * Undefine a previously-defined string.
591  */
592 static void
eqn_undef(struct eqn_node * ep)593 eqn_undef(struct eqn_node *ep)
594 {
595 	struct eqn_def	*def;
596 
597 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
598 		mandoc_msg(MANDOCERR_REQ_EMPTY,
599 		    ep->node->line, ep->node->pos, "undef");
600 		return;
601 	}
602 	if ((def = eqn_def_find(ep)) == NULL)
603 		return;
604 	free(def->key);
605 	free(def->val);
606 	def->key = def->val = NULL;
607 	def->keysz = def->valsz = 0;
608 }
609 
610 static void
eqn_def(struct eqn_node * ep)611 eqn_def(struct eqn_node *ep)
612 {
613 	struct eqn_def	*def;
614 	int		 i;
615 
616 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
617 		mandoc_msg(MANDOCERR_REQ_EMPTY,
618 		    ep->node->line, ep->node->pos, "define");
619 		return;
620 	}
621 
622 	/*
623 	 * Search for a key that already exists.
624 	 * Create a new key if none is found.
625 	 */
626 	if ((def = eqn_def_find(ep)) == NULL) {
627 		/* Find holes in string array. */
628 		for (i = 0; i < (int)ep->defsz; i++)
629 			if (0 == ep->defs[i].keysz)
630 				break;
631 
632 		if (i == (int)ep->defsz) {
633 			ep->defsz++;
634 			ep->defs = mandoc_reallocarray(ep->defs,
635 			    ep->defsz, sizeof(struct eqn_def));
636 			ep->defs[i].key = ep->defs[i].val = NULL;
637 		}
638 
639 		def = ep->defs + i;
640 		free(def->key);
641 		def->key = mandoc_strndup(ep->start, ep->toksz);
642 		def->keysz = ep->toksz;
643 	}
644 
645 	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
646 		mandoc_msg(MANDOCERR_REQ_EMPTY,
647 		    ep->node->line, ep->node->pos, "define %s", def->key);
648 		free(def->key);
649 		free(def->val);
650 		def->key = def->val = NULL;
651 		def->keysz = def->valsz = 0;
652 		return;
653 	}
654 	free(def->val);
655 	def->val = mandoc_strndup(ep->start, ep->toksz);
656 	def->valsz = ep->toksz;
657 }
658 
659 void
eqn_parse(struct eqn_node * ep)660 eqn_parse(struct eqn_node *ep)
661 {
662 	struct eqn_box	*cur, *nbox, *parent, *split;
663 	const char	*cp, *cpn;
664 	char		*p;
665 	enum eqn_tok	 tok;
666 	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
667 	int		 size;
668 
669 	parent = ep->node->eqn;
670 	assert(parent != NULL);
671 
672 	/*
673 	 * Empty equation.
674 	 * Do not add it to the high-level syntax tree.
675 	 */
676 
677 	if (ep->data == NULL)
678 		return;
679 
680 	ep->start = ep->end = ep->data;
681 	ep->sublen = 0;
682 	ep->subcnt = 0;
683 
684 next_tok:
685 	tok = eqn_next(ep, MODE_TOK);
686 	switch (tok) {
687 	case EQN_TOK_UNDEF:
688 		eqn_undef(ep);
689 		break;
690 	case EQN_TOK_NDEFINE:
691 	case EQN_TOK_DEFINE:
692 		eqn_def(ep);
693 		break;
694 	case EQN_TOK_TDEFINE:
695 		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
696 		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
697 			mandoc_msg(MANDOCERR_REQ_EMPTY,
698 			    ep->node->line, ep->node->pos, "tdefine");
699 		break;
700 	case EQN_TOK_DELIM:
701 		eqn_delim(ep);
702 		break;
703 	case EQN_TOK_GFONT:
704 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
705 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
706 			    ep->node->pos, "%s", eqn_toks[tok]);
707 		break;
708 	case EQN_TOK_MARK:
709 	case EQN_TOK_LINEUP:
710 		/* Ignore these. */
711 		break;
712 	case EQN_TOK_DYAD:
713 	case EQN_TOK_VEC:
714 	case EQN_TOK_UNDER:
715 	case EQN_TOK_BAR:
716 	case EQN_TOK_TILDE:
717 	case EQN_TOK_HAT:
718 	case EQN_TOK_DOT:
719 	case EQN_TOK_DOTDOT:
720 		if (parent->last == NULL) {
721 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
722 			    ep->node->pos, "%s", eqn_toks[tok]);
723 			cur = eqn_box_alloc(ep, parent);
724 			cur->type = EQN_TEXT;
725 			cur->text = mandoc_strdup("");
726 		}
727 		parent = eqn_box_makebinary(ep, parent);
728 		parent->type = EQN_LIST;
729 		parent->expectargs = 1;
730 		parent->font = EQNFONT_ROMAN;
731 		switch (tok) {
732 		case EQN_TOK_DOTDOT:
733 			parent->top = mandoc_strdup("\\[ad]");
734 			break;
735 		case EQN_TOK_VEC:
736 			parent->top = mandoc_strdup("\\[->]");
737 			break;
738 		case EQN_TOK_DYAD:
739 			parent->top = mandoc_strdup("\\[<>]");
740 			break;
741 		case EQN_TOK_TILDE:
742 			parent->top = mandoc_strdup("\\[a~]");
743 			break;
744 		case EQN_TOK_UNDER:
745 			parent->bottom = mandoc_strdup("\\[ul]");
746 			break;
747 		case EQN_TOK_BAR:
748 			parent->top = mandoc_strdup("\\[rn]");
749 			break;
750 		case EQN_TOK_DOT:
751 			parent->top = mandoc_strdup("\\[a.]");
752 			break;
753 		case EQN_TOK_HAT:
754 			parent->top = mandoc_strdup("\\[ha]");
755 			break;
756 		default:
757 			abort();
758 		}
759 		parent = parent->parent;
760 		break;
761 	case EQN_TOK_FWD:
762 	case EQN_TOK_BACK:
763 	case EQN_TOK_DOWN:
764 	case EQN_TOK_UP:
765 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
766 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
767 			    ep->node->pos, "%s", eqn_toks[tok]);
768 		break;
769 	case EQN_TOK_FAT:
770 	case EQN_TOK_ROMAN:
771 	case EQN_TOK_ITALIC:
772 	case EQN_TOK_BOLD:
773 		while (parent->args == parent->expectargs)
774 			parent = parent->parent;
775 		/*
776 		 * These values apply to the next word or sequence of
777 		 * words; thus, we mark that we'll have a child with
778 		 * exactly one of those.
779 		 */
780 		parent = eqn_box_alloc(ep, parent);
781 		parent->type = EQN_LIST;
782 		parent->expectargs = 1;
783 		switch (tok) {
784 		case EQN_TOK_FAT:
785 			parent->font = EQNFONT_FAT;
786 			break;
787 		case EQN_TOK_ROMAN:
788 			parent->font = EQNFONT_ROMAN;
789 			break;
790 		case EQN_TOK_ITALIC:
791 			parent->font = EQNFONT_ITALIC;
792 			break;
793 		case EQN_TOK_BOLD:
794 			parent->font = EQNFONT_BOLD;
795 			break;
796 		default:
797 			abort();
798 		}
799 		break;
800 	case EQN_TOK_SIZE:
801 	case EQN_TOK_GSIZE:
802 		/* Accept two values: integral size and a single. */
803 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
804 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
805 			    ep->node->pos, "%s", eqn_toks[tok]);
806 			break;
807 		}
808 		size = mandoc_strntoi(ep->start, ep->toksz, 10);
809 		if (-1 == size) {
810 			mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
811 			    ep->node->pos, "%s", eqn_toks[tok]);
812 			break;
813 		}
814 		if (EQN_TOK_GSIZE == tok) {
815 			ep->gsize = size;
816 			break;
817 		}
818 		while (parent->args == parent->expectargs)
819 			parent = parent->parent;
820 		parent = eqn_box_alloc(ep, parent);
821 		parent->type = EQN_LIST;
822 		parent->expectargs = 1;
823 		parent->size = size;
824 		break;
825 	case EQN_TOK_FROM:
826 	case EQN_TOK_TO:
827 	case EQN_TOK_SUB:
828 	case EQN_TOK_SUP:
829 		/*
830 		 * We have a left-right-associative expression.
831 		 * Repivot under a positional node, open a child scope
832 		 * and keep on reading.
833 		 */
834 		if (parent->last == NULL) {
835 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
836 			    ep->node->pos, "%s", eqn_toks[tok]);
837 			cur = eqn_box_alloc(ep, parent);
838 			cur->type = EQN_TEXT;
839 			cur->text = mandoc_strdup("");
840 		}
841 		while (parent->expectargs == 1 && parent->args == 1)
842 			parent = parent->parent;
843 		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
844 			for (cur = parent; cur != NULL; cur = cur->parent)
845 				if (cur->pos == EQNPOS_SUB ||
846 				    cur->pos == EQNPOS_SUP ||
847 				    cur->pos == EQNPOS_SUBSUP ||
848 				    cur->pos == EQNPOS_SQRT ||
849 				    cur->pos == EQNPOS_OVER)
850 					break;
851 			if (cur != NULL)
852 				parent = cur->parent;
853 		}
854 		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
855 			parent->expectargs = 3;
856 			parent->pos = EQNPOS_SUBSUP;
857 			break;
858 		}
859 		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
860 			parent->expectargs = 3;
861 			parent->pos = EQNPOS_FROMTO;
862 			break;
863 		}
864 		parent = eqn_box_makebinary(ep, parent);
865 		switch (tok) {
866 		case EQN_TOK_FROM:
867 			parent->pos = EQNPOS_FROM;
868 			break;
869 		case EQN_TOK_TO:
870 			parent->pos = EQNPOS_TO;
871 			break;
872 		case EQN_TOK_SUP:
873 			parent->pos = EQNPOS_SUP;
874 			break;
875 		case EQN_TOK_SUB:
876 			parent->pos = EQNPOS_SUB;
877 			break;
878 		default:
879 			abort();
880 		}
881 		break;
882 	case EQN_TOK_SQRT:
883 		while (parent->args == parent->expectargs)
884 			parent = parent->parent;
885 		/*
886 		 * Accept a left-right-associative set of arguments just
887 		 * like sub and sup and friends but without rebalancing
888 		 * under a pivot.
889 		 */
890 		parent = eqn_box_alloc(ep, parent);
891 		parent->type = EQN_SUBEXPR;
892 		parent->pos = EQNPOS_SQRT;
893 		parent->expectargs = 1;
894 		break;
895 	case EQN_TOK_OVER:
896 		/*
897 		 * We have a right-left-associative fraction.
898 		 * Close out anything that's currently open, then
899 		 * rebalance and continue reading.
900 		 */
901 		if (parent->last == NULL) {
902 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
903 			    ep->node->pos, "%s", eqn_toks[tok]);
904 			cur = eqn_box_alloc(ep, parent);
905 			cur->type = EQN_TEXT;
906 			cur->text = mandoc_strdup("");
907 		}
908 		while (parent->args == parent->expectargs)
909 			parent = parent->parent;
910 		while (EQN_SUBEXPR == parent->type)
911 			parent = parent->parent;
912 		parent = eqn_box_makebinary(ep, parent);
913 		parent->pos = EQNPOS_OVER;
914 		break;
915 	case EQN_TOK_RIGHT:
916 	case EQN_TOK_BRACE_CLOSE:
917 		/*
918 		 * Close out the existing brace.
919 		 * FIXME: this is a shitty sentinel: we should really
920 		 * have a native EQN_BRACE type or whatnot.
921 		 */
922 		for (cur = parent; cur != NULL; cur = cur->parent)
923 			if (cur->type == EQN_LIST &&
924 			    cur->expectargs > 1 &&
925 			    (tok == EQN_TOK_BRACE_CLOSE ||
926 			     cur->left != NULL))
927 				break;
928 		if (cur == NULL) {
929 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
930 			    ep->node->pos, "%s", eqn_toks[tok]);
931 			break;
932 		}
933 		parent = cur;
934 		if (EQN_TOK_RIGHT == tok) {
935 			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
936 				mandoc_msg(MANDOCERR_REQ_EMPTY,
937 				    ep->node->line, ep->node->pos,
938 				    "%s", eqn_toks[tok]);
939 				break;
940 			}
941 			/* Handling depends on right/left. */
942 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
943 				parent->right = mandoc_strdup("\\[rc]");
944 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
945 				parent->right = mandoc_strdup("\\[rf]");
946 			else
947 				parent->right =
948 				    mandoc_strndup(ep->start, ep->toksz);
949 		}
950 		parent = parent->parent;
951 		if (tok == EQN_TOK_BRACE_CLOSE &&
952 		    (parent->type == EQN_PILE ||
953 		     parent->type == EQN_MATRIX))
954 			parent = parent->parent;
955 		/* Close out any "singleton" lists. */
956 		while (parent->type == EQN_LIST &&
957 		    parent->expectargs == 1 &&
958 		    parent->args == 1)
959 			parent = parent->parent;
960 		break;
961 	case EQN_TOK_BRACE_OPEN:
962 	case EQN_TOK_LEFT:
963 		/*
964 		 * If we already have something in the stack and we're
965 		 * in an expression, then rewind til we're not any more
966 		 * (just like with the text node).
967 		 */
968 		while (parent->args == parent->expectargs)
969 			parent = parent->parent;
970 		if (EQN_TOK_LEFT == tok &&
971 		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
972 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
973 			    ep->node->pos, "%s", eqn_toks[tok]);
974 			break;
975 		}
976 		parent = eqn_box_alloc(ep, parent);
977 		parent->type = EQN_LIST;
978 		if (EQN_TOK_LEFT == tok) {
979 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
980 				parent->left = mandoc_strdup("\\[lc]");
981 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
982 				parent->left = mandoc_strdup("\\[lf]");
983 			else
984 				parent->left =
985 				    mandoc_strndup(ep->start, ep->toksz);
986 		}
987 		break;
988 	case EQN_TOK_PILE:
989 	case EQN_TOK_LPILE:
990 	case EQN_TOK_RPILE:
991 	case EQN_TOK_CPILE:
992 	case EQN_TOK_CCOL:
993 	case EQN_TOK_LCOL:
994 	case EQN_TOK_RCOL:
995 		while (parent->args == parent->expectargs)
996 			parent = parent->parent;
997 		parent = eqn_box_alloc(ep, parent);
998 		parent->type = EQN_PILE;
999 		parent->expectargs = 1;
1000 		break;
1001 	case EQN_TOK_ABOVE:
1002 		for (cur = parent; cur != NULL; cur = cur->parent)
1003 			if (cur->type == EQN_PILE)
1004 				break;
1005 		if (cur == NULL) {
1006 			mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
1007 			    ep->node->pos, "%s", eqn_toks[tok]);
1008 			break;
1009 		}
1010 		parent = eqn_box_alloc(ep, cur);
1011 		parent->type = EQN_LIST;
1012 		break;
1013 	case EQN_TOK_MATRIX:
1014 		while (parent->args == parent->expectargs)
1015 			parent = parent->parent;
1016 		parent = eqn_box_alloc(ep, parent);
1017 		parent->type = EQN_MATRIX;
1018 		parent->expectargs = 1;
1019 		break;
1020 	case EQN_TOK_EOF:
1021 		return;
1022 	case EQN_TOK__MAX:
1023 	case EQN_TOK_FUNC:
1024 	case EQN_TOK_QUOTED:
1025 	case EQN_TOK_SYM:
1026 		p = ep->start;
1027 		assert(p != NULL);
1028 		/*
1029 		 * If we already have something in the stack and we're
1030 		 * in an expression, then rewind til we're not any more.
1031 		 */
1032 		while (parent->args == parent->expectargs)
1033 			parent = parent->parent;
1034 		cur = eqn_box_alloc(ep, parent);
1035 		cur->type = EQN_TEXT;
1036 		cur->text = p;
1037 		switch (tok) {
1038 		case EQN_TOK_FUNC:
1039 			cur->font = EQNFONT_ROMAN;
1040 			break;
1041 		case EQN_TOK_QUOTED:
1042 			if (cur->font == EQNFONT_NONE)
1043 				cur->font = EQNFONT_ITALIC;
1044 			break;
1045 		case EQN_TOK_SYM:
1046 			break;
1047 		default:
1048 			if (cur->font != EQNFONT_NONE || *p == '\0')
1049 				break;
1050 			cpn = p - 1;
1051 			ccln = CCL_LET;
1052 			split = NULL;
1053 			for (;;) {
1054 				/* Advance to next character. */
1055 				cp = cpn++;
1056 				ccl = ccln;
1057 				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1058 				    isdigit((unsigned char)*cpn) ||
1059 				    (*cpn == '.' && (ccl == CCL_DIG ||
1060 				     isdigit((unsigned char)cpn[1]))) ?
1061 				    CCL_DIG : CCL_PUN;
1062 				/* No boundary before first character. */
1063 				if (cp < p)
1064 					continue;
1065 				cur->font = ccl == CCL_LET ?
1066 				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1067 				if (*cp == '\\')
1068 					mandoc_escape(&cpn, NULL, NULL);
1069 				/* No boundary after last character. */
1070 				if (*cpn == '\0')
1071 					break;
1072 				if (ccln == ccl && *cp != ',' && *cpn != ',')
1073 					continue;
1074 				/* Boundary found, split the text. */
1075 				if (parent->args == parent->expectargs) {
1076 					/* Remove the text from the tree. */
1077 					if (cur->prev == NULL)
1078 						parent->first = cur->next;
1079 					else
1080 						cur->prev->next = NULL;
1081 					parent->last = cur->prev;
1082 					parent->args--;
1083 					/* Set up a list instead. */
1084 					split = eqn_box_alloc(ep, parent);
1085 					split->type = EQN_LIST;
1086 					/* Insert the word into the list. */
1087 					split->first = split->last = cur;
1088 					cur->parent = split;
1089 					cur->prev = NULL;
1090 					parent = split;
1091 				}
1092 				/* Append a new text box. */
1093 				nbox = eqn_box_alloc(ep, parent);
1094 				nbox->type = EQN_TEXT;
1095 				nbox->text = mandoc_strdup(cpn);
1096 				/* Truncate the old box. */
1097 				p = mandoc_strndup(cur->text,
1098 				    cpn - cur->text);
1099 				free(cur->text);
1100 				cur->text = p;
1101 				/* Setup to process the new box. */
1102 				cur = nbox;
1103 				p = nbox->text;
1104 				cpn = p - 1;
1105 				ccln = CCL_LET;
1106 			}
1107 			if (split != NULL)
1108 				parent = split->parent;
1109 			break;
1110 		}
1111 		break;
1112 	default:
1113 		abort();
1114 	}
1115 	goto next_tok;
1116 }
1117 
1118 void
eqn_free(struct eqn_node * p)1119 eqn_free(struct eqn_node *p)
1120 {
1121 	int		 i;
1122 
1123 	if (p == NULL)
1124 		return;
1125 
1126 	for (i = 0; i < (int)p->defsz; i++) {
1127 		free(p->defs[i].key);
1128 		free(p->defs[i].val);
1129 	}
1130 
1131 	free(p->data);
1132 	free(p->defs);
1133 	free(p);
1134 }
1135