xref: /openbsd/usr.bin/mandoc/eqn.c (revision 771fbea0)
1 /*	$OpenBSD: eqn.c,v 1.47 2020/01/08 12:09:14 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <time.h>
27 
28 #include "mandoc_aux.h"
29 #include "mandoc.h"
30 #include "roff.h"
31 #include "eqn.h"
32 #include "libmandoc.h"
33 #include "eqn_parse.h"
34 
35 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
36 #define	STRNEQ(p1, sz1, p2, sz2) \
37 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
38 
39 enum	eqn_tok {
40 	EQN_TOK_DYAD = 0,
41 	EQN_TOK_VEC,
42 	EQN_TOK_UNDER,
43 	EQN_TOK_BAR,
44 	EQN_TOK_TILDE,
45 	EQN_TOK_HAT,
46 	EQN_TOK_DOT,
47 	EQN_TOK_DOTDOT,
48 	EQN_TOK_FWD,
49 	EQN_TOK_BACK,
50 	EQN_TOK_DOWN,
51 	EQN_TOK_UP,
52 	EQN_TOK_FAT,
53 	EQN_TOK_ROMAN,
54 	EQN_TOK_ITALIC,
55 	EQN_TOK_BOLD,
56 	EQN_TOK_SIZE,
57 	EQN_TOK_SUB,
58 	EQN_TOK_SUP,
59 	EQN_TOK_SQRT,
60 	EQN_TOK_OVER,
61 	EQN_TOK_FROM,
62 	EQN_TOK_TO,
63 	EQN_TOK_BRACE_OPEN,
64 	EQN_TOK_BRACE_CLOSE,
65 	EQN_TOK_GSIZE,
66 	EQN_TOK_GFONT,
67 	EQN_TOK_MARK,
68 	EQN_TOK_LINEUP,
69 	EQN_TOK_LEFT,
70 	EQN_TOK_RIGHT,
71 	EQN_TOK_PILE,
72 	EQN_TOK_LPILE,
73 	EQN_TOK_RPILE,
74 	EQN_TOK_CPILE,
75 	EQN_TOK_MATRIX,
76 	EQN_TOK_CCOL,
77 	EQN_TOK_LCOL,
78 	EQN_TOK_RCOL,
79 	EQN_TOK_DELIM,
80 	EQN_TOK_DEFINE,
81 	EQN_TOK_TDEFINE,
82 	EQN_TOK_NDEFINE,
83 	EQN_TOK_UNDEF,
84 	EQN_TOK_ABOVE,
85 	EQN_TOK__MAX,
86 	EQN_TOK_FUNC,
87 	EQN_TOK_QUOTED,
88 	EQN_TOK_SYM,
89 	EQN_TOK_EOF
90 };
91 
92 static	const char *eqn_toks[EQN_TOK__MAX] = {
93 	"dyad", /* EQN_TOK_DYAD */
94 	"vec", /* EQN_TOK_VEC */
95 	"under", /* EQN_TOK_UNDER */
96 	"bar", /* EQN_TOK_BAR */
97 	"tilde", /* EQN_TOK_TILDE */
98 	"hat", /* EQN_TOK_HAT */
99 	"dot", /* EQN_TOK_DOT */
100 	"dotdot", /* EQN_TOK_DOTDOT */
101 	"fwd", /* EQN_TOK_FWD * */
102 	"back", /* EQN_TOK_BACK */
103 	"down", /* EQN_TOK_DOWN */
104 	"up", /* EQN_TOK_UP */
105 	"fat", /* EQN_TOK_FAT */
106 	"roman", /* EQN_TOK_ROMAN */
107 	"italic", /* EQN_TOK_ITALIC */
108 	"bold", /* EQN_TOK_BOLD */
109 	"size", /* EQN_TOK_SIZE */
110 	"sub", /* EQN_TOK_SUB */
111 	"sup", /* EQN_TOK_SUP */
112 	"sqrt", /* EQN_TOK_SQRT */
113 	"over", /* EQN_TOK_OVER */
114 	"from", /* EQN_TOK_FROM */
115 	"to", /* EQN_TOK_TO */
116 	"{", /* EQN_TOK_BRACE_OPEN */
117 	"}", /* EQN_TOK_BRACE_CLOSE */
118 	"gsize", /* EQN_TOK_GSIZE */
119 	"gfont", /* EQN_TOK_GFONT */
120 	"mark", /* EQN_TOK_MARK */
121 	"lineup", /* EQN_TOK_LINEUP */
122 	"left", /* EQN_TOK_LEFT */
123 	"right", /* EQN_TOK_RIGHT */
124 	"pile", /* EQN_TOK_PILE */
125 	"lpile", /* EQN_TOK_LPILE */
126 	"rpile", /* EQN_TOK_RPILE */
127 	"cpile", /* EQN_TOK_CPILE */
128 	"matrix", /* EQN_TOK_MATRIX */
129 	"ccol", /* EQN_TOK_CCOL */
130 	"lcol", /* EQN_TOK_LCOL */
131 	"rcol", /* EQN_TOK_RCOL */
132 	"delim", /* EQN_TOK_DELIM */
133 	"define", /* EQN_TOK_DEFINE */
134 	"tdefine", /* EQN_TOK_TDEFINE */
135 	"ndefine", /* EQN_TOK_NDEFINE */
136 	"undef", /* EQN_TOK_UNDEF */
137 	"above", /* EQN_TOK_ABOVE */
138 };
139 
140 static	const char *const eqn_func[] = {
141 	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
142 	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
143 	"if",	"lim",	"ln",	"log",	"max",	"min",
144 	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
145 };
146 
147 enum	eqn_symt {
148 	EQNSYM_alpha = 0,
149 	EQNSYM_beta,
150 	EQNSYM_chi,
151 	EQNSYM_delta,
152 	EQNSYM_epsilon,
153 	EQNSYM_eta,
154 	EQNSYM_gamma,
155 	EQNSYM_iota,
156 	EQNSYM_kappa,
157 	EQNSYM_lambda,
158 	EQNSYM_mu,
159 	EQNSYM_nu,
160 	EQNSYM_omega,
161 	EQNSYM_omicron,
162 	EQNSYM_phi,
163 	EQNSYM_pi,
164 	EQNSYM_ps,
165 	EQNSYM_rho,
166 	EQNSYM_sigma,
167 	EQNSYM_tau,
168 	EQNSYM_theta,
169 	EQNSYM_upsilon,
170 	EQNSYM_xi,
171 	EQNSYM_zeta,
172 	EQNSYM_DELTA,
173 	EQNSYM_GAMMA,
174 	EQNSYM_LAMBDA,
175 	EQNSYM_OMEGA,
176 	EQNSYM_PHI,
177 	EQNSYM_PI,
178 	EQNSYM_PSI,
179 	EQNSYM_SIGMA,
180 	EQNSYM_THETA,
181 	EQNSYM_UPSILON,
182 	EQNSYM_XI,
183 	EQNSYM_inter,
184 	EQNSYM_union,
185 	EQNSYM_prod,
186 	EQNSYM_int,
187 	EQNSYM_sum,
188 	EQNSYM_grad,
189 	EQNSYM_del,
190 	EQNSYM_times,
191 	EQNSYM_cdot,
192 	EQNSYM_nothing,
193 	EQNSYM_approx,
194 	EQNSYM_prime,
195 	EQNSYM_half,
196 	EQNSYM_partial,
197 	EQNSYM_inf,
198 	EQNSYM_muchgreat,
199 	EQNSYM_muchless,
200 	EQNSYM_larrow,
201 	EQNSYM_rarrow,
202 	EQNSYM_pm,
203 	EQNSYM_nequal,
204 	EQNSYM_equiv,
205 	EQNSYM_lessequal,
206 	EQNSYM_moreequal,
207 	EQNSYM_minus,
208 	EQNSYM__MAX
209 };
210 
211 struct	eqnsym {
212 	const char	*str;
213 	const char	*sym;
214 };
215 
216 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
217 	{ "alpha", "*a" }, /* EQNSYM_alpha */
218 	{ "beta", "*b" }, /* EQNSYM_beta */
219 	{ "chi", "*x" }, /* EQNSYM_chi */
220 	{ "delta", "*d" }, /* EQNSYM_delta */
221 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
222 	{ "eta", "*y" }, /* EQNSYM_eta */
223 	{ "gamma", "*g" }, /* EQNSYM_gamma */
224 	{ "iota", "*i" }, /* EQNSYM_iota */
225 	{ "kappa", "*k" }, /* EQNSYM_kappa */
226 	{ "lambda", "*l" }, /* EQNSYM_lambda */
227 	{ "mu", "*m" }, /* EQNSYM_mu */
228 	{ "nu", "*n" }, /* EQNSYM_nu */
229 	{ "omega", "*w" }, /* EQNSYM_omega */
230 	{ "omicron", "*o" }, /* EQNSYM_omicron */
231 	{ "phi", "*f" }, /* EQNSYM_phi */
232 	{ "pi", "*p" }, /* EQNSYM_pi */
233 	{ "psi", "*q" }, /* EQNSYM_psi */
234 	{ "rho", "*r" }, /* EQNSYM_rho */
235 	{ "sigma", "*s" }, /* EQNSYM_sigma */
236 	{ "tau", "*t" }, /* EQNSYM_tau */
237 	{ "theta", "*h" }, /* EQNSYM_theta */
238 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
239 	{ "xi", "*c" }, /* EQNSYM_xi */
240 	{ "zeta", "*z" }, /* EQNSYM_zeta */
241 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
242 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
243 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
244 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
245 	{ "PHI", "*F" }, /* EQNSYM_PHI */
246 	{ "PI", "*P" }, /* EQNSYM_PI */
247 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
248 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
249 	{ "THETA", "*H" }, /* EQNSYM_THETA */
250 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
251 	{ "XI", "*C" }, /* EQNSYM_XI */
252 	{ "inter", "ca" }, /* EQNSYM_inter */
253 	{ "union", "cu" }, /* EQNSYM_union */
254 	{ "prod", "product" }, /* EQNSYM_prod */
255 	{ "int", "integral" }, /* EQNSYM_int */
256 	{ "sum", "sum" }, /* EQNSYM_sum */
257 	{ "grad", "gr" }, /* EQNSYM_grad */
258 	{ "del", "gr" }, /* EQNSYM_del */
259 	{ "times", "mu" }, /* EQNSYM_times */
260 	{ "cdot", "pc" }, /* EQNSYM_cdot */
261 	{ "nothing", "&" }, /* EQNSYM_nothing */
262 	{ "approx", "~~" }, /* EQNSYM_approx */
263 	{ "prime", "fm" }, /* EQNSYM_prime */
264 	{ "half", "12" }, /* EQNSYM_half */
265 	{ "partial", "pd" }, /* EQNSYM_partial */
266 	{ "inf", "if" }, /* EQNSYM_inf */
267 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
268 	{ "<<", "<<" }, /* EQNSYM_muchless */
269 	{ "<-", "<-" }, /* EQNSYM_larrow */
270 	{ "->", "->" }, /* EQNSYM_rarrow */
271 	{ "+-", "+-" }, /* EQNSYM_pm */
272 	{ "!=", "!=" }, /* EQNSYM_nequal */
273 	{ "==", "==" }, /* EQNSYM_equiv */
274 	{ "<=", "<=" }, /* EQNSYM_lessequal */
275 	{ ">=", ">=" }, /* EQNSYM_moreequal */
276 	{ "-", "mi" }, /* EQNSYM_minus */
277 };
278 
279 enum	parse_mode {
280 	MODE_QUOTED,
281 	MODE_NOSUB,
282 	MODE_SUB,
283 	MODE_TOK
284 };
285 
286 struct	eqn_def {
287 	char		 *key;
288 	size_t		  keysz;
289 	char		 *val;
290 	size_t		  valsz;
291 };
292 
293 static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
294 static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
295 				struct eqn_box *);
296 static	void		 eqn_def(struct eqn_node *);
297 static	struct eqn_def	*eqn_def_find(struct eqn_node *);
298 static	void		 eqn_delim(struct eqn_node *);
299 static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
300 static	void		 eqn_undef(struct eqn_node *);
301 
302 
303 struct eqn_node *
304 eqn_alloc(void)
305 {
306 	struct eqn_node *ep;
307 
308 	ep = mandoc_calloc(1, sizeof(*ep));
309 	ep->gsize = EQN_DEFSIZE;
310 	return ep;
311 }
312 
313 void
314 eqn_reset(struct eqn_node *ep)
315 {
316 	free(ep->data);
317 	ep->data = ep->start = ep->end = NULL;
318 	ep->sz = ep->toksz = 0;
319 }
320 
321 void
322 eqn_read(struct eqn_node *ep, const char *p)
323 {
324 	char		*cp;
325 
326 	if (ep->data == NULL) {
327 		ep->sz = strlen(p);
328 		ep->data = mandoc_strdup(p);
329 	} else {
330 		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
331 		free(ep->data);
332 		ep->data = cp;
333 	}
334 	ep->sz += 1;
335 }
336 
337 /*
338  * Find the key "key" of the give size within our eqn-defined values.
339  */
340 static struct eqn_def *
341 eqn_def_find(struct eqn_node *ep)
342 {
343 	int		 i;
344 
345 	for (i = 0; i < (int)ep->defsz; i++)
346 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
347 		    ep->defs[i].keysz, ep->start, ep->toksz))
348 			return &ep->defs[i];
349 
350 	return NULL;
351 }
352 
353 /*
354  * Parse a token from the input text.  The modes are:
355  * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
356  *   before its next occurence.  Do not interpret the token in any
357  *   way and return EQN_TOK_QUOTED.  All other modes behave like
358  *   MODE_QUOTED when *ep->start is '"'.
359  * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
360  *   otherwise, it ends before the next whitespace or brace.
361  *   Do not interpret the token and return EQN_TOK__MAX.
362  * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
363  *   alias created with define.  If it is an alias, replace it with
364  *   its string value and reparse.
365  * MODE_TOK: Like MODE_SUB, but also check the token against the list
366  *   of tokens, and if there is a match, return that token.  Otherwise,
367  *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
368  *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
369  *   a token match, *ep->start is set to an allocated string that the
370  *   caller is expected to free.
371  * All modes skip whitespace following the end of the token.
372  */
373 static enum eqn_tok
374 eqn_next(struct eqn_node *ep, enum parse_mode mode)
375 {
376 	static int	 last_len, lim;
377 
378 	struct eqn_def	*def;
379 	size_t		 start;
380 	int		 diff, i, quoted;
381 	enum eqn_tok	 tok;
382 
383 	/*
384 	 * Reset the recursion counter after advancing
385 	 * beyond the end of the previous substitution.
386 	 */
387 	if (ep->end - ep->data >= last_len)
388 		lim = 0;
389 
390 	ep->start = ep->end;
391 	quoted = mode == MODE_QUOTED;
392 	for (;;) {
393 		switch (*ep->start) {
394 		case '\0':
395 			ep->toksz = 0;
396 			return EQN_TOK_EOF;
397 		case '"':
398 			quoted = 1;
399 			break;
400 		case ' ':
401 		case '\t':
402 		case '~':
403 		case '^':
404 			if (quoted)
405 				break;
406 			ep->start++;
407 			continue;
408 		default:
409 			break;
410 		}
411 		if (quoted) {
412 			ep->end = strchr(ep->start + 1, *ep->start);
413 			ep->start++;  /* Skip opening quote. */
414 			if (ep->end == NULL) {
415 				mandoc_msg(MANDOCERR_ARG_QUOTE,
416 				    ep->node->line, ep->node->pos, NULL);
417 				ep->end = strchr(ep->start, '\0');
418 			}
419 		} else {
420 			ep->end = ep->start + 1;
421 			if (*ep->start != '{' && *ep->start != '}')
422 				ep->end += strcspn(ep->end, " ^~\"{}\t");
423 		}
424 		ep->toksz = ep->end - ep->start;
425 		if (quoted && *ep->end != '\0')
426 			ep->end++;  /* Skip closing quote. */
427 		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
428 			ep->end++;
429 		if (quoted)  /* Cannot return, may have to strndup. */
430 			break;
431 		if (mode == MODE_NOSUB)
432 			return EQN_TOK__MAX;
433 		if ((def = eqn_def_find(ep)) == NULL)
434 			break;
435 		if (++lim > EQN_NEST_MAX) {
436 			mandoc_msg(MANDOCERR_ROFFLOOP,
437 			    ep->node->line, ep->node->pos, NULL);
438 			return EQN_TOK_EOF;
439 		}
440 
441 		/* Replace a defined name with its string value. */
442 		if ((diff = def->valsz - ep->toksz) > 0) {
443 			start = ep->start - ep->data;
444 			ep->sz += diff;
445 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
446 			ep->start = ep->data + start;
447 		}
448 		if (diff)
449 			memmove(ep->start + def->valsz, ep->start + ep->toksz,
450 			    strlen(ep->start + ep->toksz) + 1);
451 		memcpy(ep->start, def->val, def->valsz);
452 		last_len = ep->start - ep->data + def->valsz;
453 	}
454 	if (mode != MODE_TOK)
455 		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
456 	if (quoted) {
457 		ep->start = mandoc_strndup(ep->start, ep->toksz);
458 		return EQN_TOK_QUOTED;
459 	}
460 	for (tok = 0; tok < EQN_TOK__MAX; tok++)
461 		if (STRNEQ(ep->start, ep->toksz,
462 		    eqn_toks[tok], strlen(eqn_toks[tok])))
463 			return tok;
464 
465 	for (i = 0; i < EQNSYM__MAX; i++) {
466 		if (STRNEQ(ep->start, ep->toksz,
467 		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
468 			mandoc_asprintf(&ep->start,
469 			    "\\[%s]", eqnsyms[i].sym);
470 			return EQN_TOK_SYM;
471 		}
472 	}
473 	ep->start = mandoc_strndup(ep->start, ep->toksz);
474 	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
475 		if (STRNEQ(ep->start, ep->toksz,
476 		    eqn_func[i], strlen(eqn_func[i])))
477 			return EQN_TOK_FUNC;
478 	return EQN_TOK__MAX;
479 }
480 
481 void
482 eqn_box_free(struct eqn_box *bp)
483 {
484 	if (bp == NULL)
485 		return;
486 
487 	if (bp->first)
488 		eqn_box_free(bp->first);
489 	if (bp->next)
490 		eqn_box_free(bp->next);
491 
492 	free(bp->text);
493 	free(bp->left);
494 	free(bp->right);
495 	free(bp->top);
496 	free(bp->bottom);
497 	free(bp);
498 }
499 
500 struct eqn_box *
501 eqn_box_new(void)
502 {
503 	struct eqn_box	*bp;
504 
505 	bp = mandoc_calloc(1, sizeof(*bp));
506 	bp->expectargs = UINT_MAX;
507 	return bp;
508 }
509 
510 /*
511  * Allocate a box as the last child of the parent node.
512  */
513 static struct eqn_box *
514 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
515 {
516 	struct eqn_box	*bp;
517 
518 	bp = eqn_box_new();
519 	bp->parent = parent;
520 	bp->parent->args++;
521 	bp->font = bp->parent->font;
522 	bp->size = ep->gsize;
523 
524 	if (NULL != parent->first) {
525 		parent->last->next = bp;
526 		bp->prev = parent->last;
527 	} else
528 		parent->first = bp;
529 
530 	parent->last = bp;
531 	return bp;
532 }
533 
534 /*
535  * Reparent the current last node (of the current parent) under a new
536  * EQN_SUBEXPR as the first element.
537  * Then return the new parent.
538  * The new EQN_SUBEXPR will have a two-child limit.
539  */
540 static struct eqn_box *
541 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
542 {
543 	struct eqn_box	*b, *newb;
544 
545 	assert(NULL != parent->last);
546 	b = parent->last;
547 	if (parent->last == parent->first)
548 		parent->first = NULL;
549 	parent->args--;
550 	parent->last = b->prev;
551 	b->prev = NULL;
552 	newb = eqn_box_alloc(ep, parent);
553 	newb->type = EQN_SUBEXPR;
554 	newb->expectargs = 2;
555 	newb->args = 1;
556 	newb->first = newb->last = b;
557 	newb->first->next = NULL;
558 	b->parent = newb;
559 	return newb;
560 }
561 
562 /*
563  * Parse the "delim" control statement.
564  */
565 static void
566 eqn_delim(struct eqn_node *ep)
567 {
568 	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
569 		mandoc_msg(MANDOCERR_REQ_EMPTY,
570 		    ep->node->line, ep->node->pos, "delim");
571 		if (ep->end[0] != '\0')
572 			ep->end++;
573 	} else if (strncmp(ep->end, "off", 3) == 0) {
574 		ep->delim = 0;
575 		ep->end += 3;
576 	} else if (strncmp(ep->end, "on", 2) == 0) {
577 		if (ep->odelim && ep->cdelim)
578 			ep->delim = 1;
579 		ep->end += 2;
580 	} else {
581 		ep->odelim = *ep->end++;
582 		ep->cdelim = *ep->end++;
583 		ep->delim = 1;
584 	}
585 }
586 
587 /*
588  * Undefine a previously-defined string.
589  */
590 static void
591 eqn_undef(struct eqn_node *ep)
592 {
593 	struct eqn_def	*def;
594 
595 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
596 		mandoc_msg(MANDOCERR_REQ_EMPTY,
597 		    ep->node->line, ep->node->pos, "undef");
598 		return;
599 	}
600 	if ((def = eqn_def_find(ep)) == NULL)
601 		return;
602 	free(def->key);
603 	free(def->val);
604 	def->key = def->val = NULL;
605 	def->keysz = def->valsz = 0;
606 }
607 
608 static void
609 eqn_def(struct eqn_node *ep)
610 {
611 	struct eqn_def	*def;
612 	int		 i;
613 
614 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
615 		mandoc_msg(MANDOCERR_REQ_EMPTY,
616 		    ep->node->line, ep->node->pos, "define");
617 		return;
618 	}
619 
620 	/*
621 	 * Search for a key that already exists.
622 	 * Create a new key if none is found.
623 	 */
624 	if ((def = eqn_def_find(ep)) == NULL) {
625 		/* Find holes in string array. */
626 		for (i = 0; i < (int)ep->defsz; i++)
627 			if (0 == ep->defs[i].keysz)
628 				break;
629 
630 		if (i == (int)ep->defsz) {
631 			ep->defsz++;
632 			ep->defs = mandoc_reallocarray(ep->defs,
633 			    ep->defsz, sizeof(struct eqn_def));
634 			ep->defs[i].key = ep->defs[i].val = NULL;
635 		}
636 
637 		def = ep->defs + i;
638 		free(def->key);
639 		def->key = mandoc_strndup(ep->start, ep->toksz);
640 		def->keysz = ep->toksz;
641 	}
642 
643 	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
644 		mandoc_msg(MANDOCERR_REQ_EMPTY,
645 		    ep->node->line, ep->node->pos, "define %s", def->key);
646 		free(def->key);
647 		free(def->val);
648 		def->key = def->val = NULL;
649 		def->keysz = def->valsz = 0;
650 		return;
651 	}
652 	free(def->val);
653 	def->val = mandoc_strndup(ep->start, ep->toksz);
654 	def->valsz = ep->toksz;
655 }
656 
657 void
658 eqn_parse(struct eqn_node *ep)
659 {
660 	struct eqn_box	*cur, *nbox, *parent, *split;
661 	const char	*cp, *cpn;
662 	char		*p;
663 	enum eqn_tok	 tok;
664 	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
665 	int		 size;
666 
667 	parent = ep->node->eqn;
668 	assert(parent != NULL);
669 
670 	/*
671 	 * Empty equation.
672 	 * Do not add it to the high-level syntax tree.
673 	 */
674 
675 	if (ep->data == NULL)
676 		return;
677 
678 	ep->start = ep->end = ep->data;
679 
680 next_tok:
681 	tok = eqn_next(ep, MODE_TOK);
682 	switch (tok) {
683 	case EQN_TOK_UNDEF:
684 		eqn_undef(ep);
685 		break;
686 	case EQN_TOK_NDEFINE:
687 	case EQN_TOK_DEFINE:
688 		eqn_def(ep);
689 		break;
690 	case EQN_TOK_TDEFINE:
691 		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
692 		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
693 			mandoc_msg(MANDOCERR_REQ_EMPTY,
694 			    ep->node->line, ep->node->pos, "tdefine");
695 		break;
696 	case EQN_TOK_DELIM:
697 		eqn_delim(ep);
698 		break;
699 	case EQN_TOK_GFONT:
700 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
701 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
702 			    ep->node->pos, "%s", eqn_toks[tok]);
703 		break;
704 	case EQN_TOK_MARK:
705 	case EQN_TOK_LINEUP:
706 		/* Ignore these. */
707 		break;
708 	case EQN_TOK_DYAD:
709 	case EQN_TOK_VEC:
710 	case EQN_TOK_UNDER:
711 	case EQN_TOK_BAR:
712 	case EQN_TOK_TILDE:
713 	case EQN_TOK_HAT:
714 	case EQN_TOK_DOT:
715 	case EQN_TOK_DOTDOT:
716 		if (parent->last == NULL) {
717 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
718 			    ep->node->pos, "%s", eqn_toks[tok]);
719 			cur = eqn_box_alloc(ep, parent);
720 			cur->type = EQN_TEXT;
721 			cur->text = mandoc_strdup("");
722 		}
723 		parent = eqn_box_makebinary(ep, parent);
724 		parent->type = EQN_LIST;
725 		parent->expectargs = 1;
726 		parent->font = EQNFONT_ROMAN;
727 		switch (tok) {
728 		case EQN_TOK_DOTDOT:
729 			parent->top = mandoc_strdup("\\[ad]");
730 			break;
731 		case EQN_TOK_VEC:
732 			parent->top = mandoc_strdup("\\[->]");
733 			break;
734 		case EQN_TOK_DYAD:
735 			parent->top = mandoc_strdup("\\[<>]");
736 			break;
737 		case EQN_TOK_TILDE:
738 			parent->top = mandoc_strdup("\\[a~]");
739 			break;
740 		case EQN_TOK_UNDER:
741 			parent->bottom = mandoc_strdup("\\[ul]");
742 			break;
743 		case EQN_TOK_BAR:
744 			parent->top = mandoc_strdup("\\[rn]");
745 			break;
746 		case EQN_TOK_DOT:
747 			parent->top = mandoc_strdup("\\[a.]");
748 			break;
749 		case EQN_TOK_HAT:
750 			parent->top = mandoc_strdup("\\[ha]");
751 			break;
752 		default:
753 			abort();
754 		}
755 		parent = parent->parent;
756 		break;
757 	case EQN_TOK_FWD:
758 	case EQN_TOK_BACK:
759 	case EQN_TOK_DOWN:
760 	case EQN_TOK_UP:
761 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
762 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
763 			    ep->node->pos, "%s", eqn_toks[tok]);
764 		break;
765 	case EQN_TOK_FAT:
766 	case EQN_TOK_ROMAN:
767 	case EQN_TOK_ITALIC:
768 	case EQN_TOK_BOLD:
769 		while (parent->args == parent->expectargs)
770 			parent = parent->parent;
771 		/*
772 		 * These values apply to the next word or sequence of
773 		 * words; thus, we mark that we'll have a child with
774 		 * exactly one of those.
775 		 */
776 		parent = eqn_box_alloc(ep, parent);
777 		parent->type = EQN_LIST;
778 		parent->expectargs = 1;
779 		switch (tok) {
780 		case EQN_TOK_FAT:
781 			parent->font = EQNFONT_FAT;
782 			break;
783 		case EQN_TOK_ROMAN:
784 			parent->font = EQNFONT_ROMAN;
785 			break;
786 		case EQN_TOK_ITALIC:
787 			parent->font = EQNFONT_ITALIC;
788 			break;
789 		case EQN_TOK_BOLD:
790 			parent->font = EQNFONT_BOLD;
791 			break;
792 		default:
793 			abort();
794 		}
795 		break;
796 	case EQN_TOK_SIZE:
797 	case EQN_TOK_GSIZE:
798 		/* Accept two values: integral size and a single. */
799 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
800 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
801 			    ep->node->pos, "%s", eqn_toks[tok]);
802 			break;
803 		}
804 		size = mandoc_strntoi(ep->start, ep->toksz, 10);
805 		if (-1 == size) {
806 			mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
807 			    ep->node->pos, "%s", eqn_toks[tok]);
808 			break;
809 		}
810 		if (EQN_TOK_GSIZE == tok) {
811 			ep->gsize = size;
812 			break;
813 		}
814 		while (parent->args == parent->expectargs)
815 			parent = parent->parent;
816 		parent = eqn_box_alloc(ep, parent);
817 		parent->type = EQN_LIST;
818 		parent->expectargs = 1;
819 		parent->size = size;
820 		break;
821 	case EQN_TOK_FROM:
822 	case EQN_TOK_TO:
823 	case EQN_TOK_SUB:
824 	case EQN_TOK_SUP:
825 		/*
826 		 * We have a left-right-associative expression.
827 		 * Repivot under a positional node, open a child scope
828 		 * and keep on reading.
829 		 */
830 		if (parent->last == NULL) {
831 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
832 			    ep->node->pos, "%s", eqn_toks[tok]);
833 			cur = eqn_box_alloc(ep, parent);
834 			cur->type = EQN_TEXT;
835 			cur->text = mandoc_strdup("");
836 		}
837 		while (parent->expectargs == 1 && parent->args == 1)
838 			parent = parent->parent;
839 		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
840 			for (cur = parent; cur != NULL; cur = cur->parent)
841 				if (cur->pos == EQNPOS_SUB ||
842 				    cur->pos == EQNPOS_SUP ||
843 				    cur->pos == EQNPOS_SUBSUP ||
844 				    cur->pos == EQNPOS_SQRT ||
845 				    cur->pos == EQNPOS_OVER)
846 					break;
847 			if (cur != NULL)
848 				parent = cur->parent;
849 		}
850 		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
851 			parent->expectargs = 3;
852 			parent->pos = EQNPOS_SUBSUP;
853 			break;
854 		}
855 		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
856 			parent->expectargs = 3;
857 			parent->pos = EQNPOS_FROMTO;
858 			break;
859 		}
860 		parent = eqn_box_makebinary(ep, parent);
861 		switch (tok) {
862 		case EQN_TOK_FROM:
863 			parent->pos = EQNPOS_FROM;
864 			break;
865 		case EQN_TOK_TO:
866 			parent->pos = EQNPOS_TO;
867 			break;
868 		case EQN_TOK_SUP:
869 			parent->pos = EQNPOS_SUP;
870 			break;
871 		case EQN_TOK_SUB:
872 			parent->pos = EQNPOS_SUB;
873 			break;
874 		default:
875 			abort();
876 		}
877 		break;
878 	case EQN_TOK_SQRT:
879 		while (parent->args == parent->expectargs)
880 			parent = parent->parent;
881 		/*
882 		 * Accept a left-right-associative set of arguments just
883 		 * like sub and sup and friends but without rebalancing
884 		 * under a pivot.
885 		 */
886 		parent = eqn_box_alloc(ep, parent);
887 		parent->type = EQN_SUBEXPR;
888 		parent->pos = EQNPOS_SQRT;
889 		parent->expectargs = 1;
890 		break;
891 	case EQN_TOK_OVER:
892 		/*
893 		 * We have a right-left-associative fraction.
894 		 * Close out anything that's currently open, then
895 		 * rebalance and continue reading.
896 		 */
897 		if (parent->last == NULL) {
898 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
899 			    ep->node->pos, "%s", eqn_toks[tok]);
900 			cur = eqn_box_alloc(ep, parent);
901 			cur->type = EQN_TEXT;
902 			cur->text = mandoc_strdup("");
903 		}
904 		while (parent->args == parent->expectargs)
905 			parent = parent->parent;
906 		while (EQN_SUBEXPR == parent->type)
907 			parent = parent->parent;
908 		parent = eqn_box_makebinary(ep, parent);
909 		parent->pos = EQNPOS_OVER;
910 		break;
911 	case EQN_TOK_RIGHT:
912 	case EQN_TOK_BRACE_CLOSE:
913 		/*
914 		 * Close out the existing brace.
915 		 * FIXME: this is a shitty sentinel: we should really
916 		 * have a native EQN_BRACE type or whatnot.
917 		 */
918 		for (cur = parent; cur != NULL; cur = cur->parent)
919 			if (cur->type == EQN_LIST &&
920 			    cur->expectargs > 1 &&
921 			    (tok == EQN_TOK_BRACE_CLOSE ||
922 			     cur->left != NULL))
923 				break;
924 		if (cur == NULL) {
925 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
926 			    ep->node->pos, "%s", eqn_toks[tok]);
927 			break;
928 		}
929 		parent = cur;
930 		if (EQN_TOK_RIGHT == tok) {
931 			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
932 				mandoc_msg(MANDOCERR_REQ_EMPTY,
933 				    ep->node->line, ep->node->pos,
934 				    "%s", eqn_toks[tok]);
935 				break;
936 			}
937 			/* Handling depends on right/left. */
938 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
939 				parent->right = mandoc_strdup("\\[rc]");
940 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
941 				parent->right = mandoc_strdup("\\[rf]");
942 			else
943 				parent->right =
944 				    mandoc_strndup(ep->start, ep->toksz);
945 		}
946 		parent = parent->parent;
947 		if (tok == EQN_TOK_BRACE_CLOSE &&
948 		    (parent->type == EQN_PILE ||
949 		     parent->type == EQN_MATRIX))
950 			parent = parent->parent;
951 		/* Close out any "singleton" lists. */
952 		while (parent->type == EQN_LIST &&
953 		    parent->expectargs == 1 &&
954 		    parent->args == 1)
955 			parent = parent->parent;
956 		break;
957 	case EQN_TOK_BRACE_OPEN:
958 	case EQN_TOK_LEFT:
959 		/*
960 		 * If we already have something in the stack and we're
961 		 * in an expression, then rewind til we're not any more
962 		 * (just like with the text node).
963 		 */
964 		while (parent->args == parent->expectargs)
965 			parent = parent->parent;
966 		if (EQN_TOK_LEFT == tok &&
967 		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
968 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
969 			    ep->node->pos, "%s", eqn_toks[tok]);
970 			break;
971 		}
972 		parent = eqn_box_alloc(ep, parent);
973 		parent->type = EQN_LIST;
974 		if (EQN_TOK_LEFT == tok) {
975 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
976 				parent->left = mandoc_strdup("\\[lc]");
977 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
978 				parent->left = mandoc_strdup("\\[lf]");
979 			else
980 				parent->left =
981 				    mandoc_strndup(ep->start, ep->toksz);
982 		}
983 		break;
984 	case EQN_TOK_PILE:
985 	case EQN_TOK_LPILE:
986 	case EQN_TOK_RPILE:
987 	case EQN_TOK_CPILE:
988 	case EQN_TOK_CCOL:
989 	case EQN_TOK_LCOL:
990 	case EQN_TOK_RCOL:
991 		while (parent->args == parent->expectargs)
992 			parent = parent->parent;
993 		parent = eqn_box_alloc(ep, parent);
994 		parent->type = EQN_PILE;
995 		parent->expectargs = 1;
996 		break;
997 	case EQN_TOK_ABOVE:
998 		for (cur = parent; cur != NULL; cur = cur->parent)
999 			if (cur->type == EQN_PILE)
1000 				break;
1001 		if (cur == NULL) {
1002 			mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
1003 			    ep->node->pos, "%s", eqn_toks[tok]);
1004 			break;
1005 		}
1006 		parent = eqn_box_alloc(ep, cur);
1007 		parent->type = EQN_LIST;
1008 		break;
1009 	case EQN_TOK_MATRIX:
1010 		while (parent->args == parent->expectargs)
1011 			parent = parent->parent;
1012 		parent = eqn_box_alloc(ep, parent);
1013 		parent->type = EQN_MATRIX;
1014 		parent->expectargs = 1;
1015 		break;
1016 	case EQN_TOK_EOF:
1017 		return;
1018 	case EQN_TOK__MAX:
1019 	case EQN_TOK_FUNC:
1020 	case EQN_TOK_QUOTED:
1021 	case EQN_TOK_SYM:
1022 		p = ep->start;
1023 		assert(p != NULL);
1024 		/*
1025 		 * If we already have something in the stack and we're
1026 		 * in an expression, then rewind til we're not any more.
1027 		 */
1028 		while (parent->args == parent->expectargs)
1029 			parent = parent->parent;
1030 		cur = eqn_box_alloc(ep, parent);
1031 		cur->type = EQN_TEXT;
1032 		cur->text = p;
1033 		switch (tok) {
1034 		case EQN_TOK_FUNC:
1035 			cur->font = EQNFONT_ROMAN;
1036 			break;
1037 		case EQN_TOK_QUOTED:
1038 			if (cur->font == EQNFONT_NONE)
1039 				cur->font = EQNFONT_ITALIC;
1040 			break;
1041 		case EQN_TOK_SYM:
1042 			break;
1043 		default:
1044 			if (cur->font != EQNFONT_NONE || *p == '\0')
1045 				break;
1046 			cpn = p - 1;
1047 			ccln = CCL_LET;
1048 			split = NULL;
1049 			for (;;) {
1050 				/* Advance to next character. */
1051 				cp = cpn++;
1052 				ccl = ccln;
1053 				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1054 				    isdigit((unsigned char)*cpn) ||
1055 				    (*cpn == '.' && (ccl == CCL_DIG ||
1056 				     isdigit((unsigned char)cpn[1]))) ?
1057 				    CCL_DIG : CCL_PUN;
1058 				/* No boundary before first character. */
1059 				if (cp < p)
1060 					continue;
1061 				cur->font = ccl == CCL_LET ?
1062 				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1063 				if (*cp == '\\')
1064 					mandoc_escape(&cpn, NULL, NULL);
1065 				/* No boundary after last character. */
1066 				if (*cpn == '\0')
1067 					break;
1068 				if (ccln == ccl && *cp != ',' && *cpn != ',')
1069 					continue;
1070 				/* Boundary found, split the text. */
1071 				if (parent->args == parent->expectargs) {
1072 					/* Remove the text from the tree. */
1073 					if (cur->prev == NULL)
1074 						parent->first = cur->next;
1075 					else
1076 						cur->prev->next = NULL;
1077 					parent->last = cur->prev;
1078 					parent->args--;
1079 					/* Set up a list instead. */
1080 					split = eqn_box_alloc(ep, parent);
1081 					split->type = EQN_LIST;
1082 					/* Insert the word into the list. */
1083 					split->first = split->last = cur;
1084 					cur->parent = split;
1085 					cur->prev = NULL;
1086 					parent = split;
1087 				}
1088 				/* Append a new text box. */
1089 				nbox = eqn_box_alloc(ep, parent);
1090 				nbox->type = EQN_TEXT;
1091 				nbox->text = mandoc_strdup(cpn);
1092 				/* Truncate the old box. */
1093 				p = mandoc_strndup(cur->text,
1094 				    cpn - cur->text);
1095 				free(cur->text);
1096 				cur->text = p;
1097 				/* Setup to process the new box. */
1098 				cur = nbox;
1099 				p = nbox->text;
1100 				cpn = p - 1;
1101 				ccln = CCL_LET;
1102 			}
1103 			if (split != NULL)
1104 				parent = split->parent;
1105 			break;
1106 		}
1107 		break;
1108 	default:
1109 		abort();
1110 	}
1111 	goto next_tok;
1112 }
1113 
1114 void
1115 eqn_free(struct eqn_node *p)
1116 {
1117 	int		 i;
1118 
1119 	if (p == NULL)
1120 		return;
1121 
1122 	for (i = 0; i < (int)p->defsz; i++) {
1123 		free(p->defs[i].key);
1124 		free(p->defs[i].val);
1125 	}
1126 
1127 	free(p->data);
1128 	free(p->defs);
1129 	free(p);
1130 }
1131