xref: /openbsd/usr.bin/mandoc/eqn.c (revision a6445c1d)
1 /*	$OpenBSD: eqn.c,v 1.19 2014/10/25 15:05:21 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <limits.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <time.h>
26 
27 #include "mandoc.h"
28 #include "mandoc_aux.h"
29 #include "libmandoc.h"
30 #include "libroff.h"
31 
32 #define	EQN_MSG(t, x) \
33 	mandoc_msg((t), (x)->parse, (x)->eqn.ln, (x)->eqn.pos, NULL)
34 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
35 #define	STRNEQ(p1, sz1, p2, sz2) \
36 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
37 
38 enum	eqn_tok {
39 	EQN_TOK_DYAD = 0,
40 	EQN_TOK_VEC,
41 	EQN_TOK_UNDER,
42 	EQN_TOK_BAR,
43 	EQN_TOK_TILDE,
44 	EQN_TOK_HAT,
45 	EQN_TOK_DOT,
46 	EQN_TOK_DOTDOT,
47 	EQN_TOK_FWD,
48 	EQN_TOK_BACK,
49 	EQN_TOK_DOWN,
50 	EQN_TOK_UP,
51 	EQN_TOK_FAT,
52 	EQN_TOK_ROMAN,
53 	EQN_TOK_ITALIC,
54 	EQN_TOK_BOLD,
55 	EQN_TOK_SIZE,
56 	EQN_TOK_SUB,
57 	EQN_TOK_SUP,
58 	EQN_TOK_SQRT,
59 	EQN_TOK_OVER,
60 	EQN_TOK_FROM,
61 	EQN_TOK_TO,
62 	EQN_TOK_BRACE_OPEN,
63 	EQN_TOK_BRACE_CLOSE,
64 	EQN_TOK_GSIZE,
65 	EQN_TOK_GFONT,
66 	EQN_TOK_MARK,
67 	EQN_TOK_LINEUP,
68 	EQN_TOK_LEFT,
69 	EQN_TOK_RIGHT,
70 	EQN_TOK_PILE,
71 	EQN_TOK_LPILE,
72 	EQN_TOK_RPILE,
73 	EQN_TOK_CPILE,
74 	EQN_TOK_MATRIX,
75 	EQN_TOK_CCOL,
76 	EQN_TOK_LCOL,
77 	EQN_TOK_RCOL,
78 	EQN_TOK_DELIM,
79 	EQN_TOK_DEFINE,
80 	EQN_TOK_TDEFINE,
81 	EQN_TOK_NDEFINE,
82 	EQN_TOK_UNDEF,
83 	EQN_TOK_EOF,
84 	EQN_TOK_ABOVE,
85 	EQN_TOK__MAX
86 };
87 
88 static	const char *eqn_toks[EQN_TOK__MAX] = {
89 	"dyad", /* EQN_TOK_DYAD */
90 	"vec", /* EQN_TOK_VEC */
91 	"under", /* EQN_TOK_UNDER */
92 	"bar", /* EQN_TOK_BAR */
93 	"tilde", /* EQN_TOK_TILDE */
94 	"hat", /* EQN_TOK_HAT */
95 	"dot", /* EQN_TOK_DOT */
96 	"dotdot", /* EQN_TOK_DOTDOT */
97 	"fwd", /* EQN_TOK_FWD * */
98 	"back", /* EQN_TOK_BACK */
99 	"down", /* EQN_TOK_DOWN */
100 	"up", /* EQN_TOK_UP */
101 	"fat", /* EQN_TOK_FAT */
102 	"roman", /* EQN_TOK_ROMAN */
103 	"italic", /* EQN_TOK_ITALIC */
104 	"bold", /* EQN_TOK_BOLD */
105 	"size", /* EQN_TOK_SIZE */
106 	"sub", /* EQN_TOK_SUB */
107 	"sup", /* EQN_TOK_SUP */
108 	"sqrt", /* EQN_TOK_SQRT */
109 	"over", /* EQN_TOK_OVER */
110 	"from", /* EQN_TOK_FROM */
111 	"to", /* EQN_TOK_TO */
112 	"{", /* EQN_TOK_BRACE_OPEN */
113 	"}", /* EQN_TOK_BRACE_CLOSE */
114 	"gsize", /* EQN_TOK_GSIZE */
115 	"gfont", /* EQN_TOK_GFONT */
116 	"mark", /* EQN_TOK_MARK */
117 	"lineup", /* EQN_TOK_LINEUP */
118 	"left", /* EQN_TOK_LEFT */
119 	"right", /* EQN_TOK_RIGHT */
120 	"pile", /* EQN_TOK_PILE */
121 	"lpile", /* EQN_TOK_LPILE */
122 	"rpile", /* EQN_TOK_RPILE */
123 	"cpile", /* EQN_TOK_CPILE */
124 	"matrix", /* EQN_TOK_MATRIX */
125 	"ccol", /* EQN_TOK_CCOL */
126 	"lcol", /* EQN_TOK_LCOL */
127 	"rcol", /* EQN_TOK_RCOL */
128 	"delim", /* EQN_TOK_DELIM */
129 	"define", /* EQN_TOK_DEFINE */
130 	"tdefine", /* EQN_TOK_TDEFINE */
131 	"ndefine", /* EQN_TOK_NDEFINE */
132 	"undef", /* EQN_TOK_UNDEF */
133 	NULL, /* EQN_TOK_EOF */
134 	"above", /* EQN_TOK_ABOVE */
135 };
136 
137 enum	eqn_symt {
138 	EQNSYM_alpha,
139 	EQNSYM_beta,
140 	EQNSYM_chi,
141 	EQNSYM_delta,
142 	EQNSYM_epsilon,
143 	EQNSYM_eta,
144 	EQNSYM_gamma,
145 	EQNSYM_iota,
146 	EQNSYM_kappa,
147 	EQNSYM_lambda,
148 	EQNSYM_mu,
149 	EQNSYM_nu,
150 	EQNSYM_omega,
151 	EQNSYM_omicron,
152 	EQNSYM_phi,
153 	EQNSYM_pi,
154 	EQNSYM_ps,
155 	EQNSYM_rho,
156 	EQNSYM_sigma,
157 	EQNSYM_tau,
158 	EQNSYM_theta,
159 	EQNSYM_upsilon,
160 	EQNSYM_xi,
161 	EQNSYM_zeta,
162 	EQNSYM_DELTA,
163 	EQNSYM_GAMMA,
164 	EQNSYM_LAMBDA,
165 	EQNSYM_OMEGA,
166 	EQNSYM_PHI,
167 	EQNSYM_PI,
168 	EQNSYM_PSI,
169 	EQNSYM_SIGMA,
170 	EQNSYM_THETA,
171 	EQNSYM_UPSILON,
172 	EQNSYM_XI,
173 	EQNSYM_inter,
174 	EQNSYM_union,
175 	EQNSYM_prod,
176 	EQNSYM_int,
177 	EQNSYM_sum,
178 	EQNSYM_grad,
179 	EQNSYM_del,
180 	EQNSYM_times,
181 	EQNSYM_cdot,
182 	EQNSYM_nothing,
183 	EQNSYM_approx,
184 	EQNSYM_prime,
185 	EQNSYM_half,
186 	EQNSYM_partial,
187 	EQNSYM_inf,
188 	EQNSYM_muchgreat,
189 	EQNSYM_muchless,
190 	EQNSYM_larrow,
191 	EQNSYM_rarrow,
192 	EQNSYM_pm,
193 	EQNSYM_nequal,
194 	EQNSYM_equiv,
195 	EQNSYM_lessequal,
196 	EQNSYM_moreequal,
197 	EQNSYM__MAX
198 };
199 
200 struct	eqnsym {
201 	const char	*str;
202 	const char	*sym;
203 };
204 
205 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
206 	{ "alpha", "*a" }, /* EQNSYM_alpha */
207 	{ "beta", "*b" }, /* EQNSYM_beta */
208 	{ "chi", "*x" }, /* EQNSYM_chi */
209 	{ "delta", "*d" }, /* EQNSYM_delta */
210 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
211 	{ "eta", "*y" }, /* EQNSYM_eta */
212 	{ "gamma", "*g" }, /* EQNSYM_gamma */
213 	{ "iota", "*i" }, /* EQNSYM_iota */
214 	{ "kappa", "*k" }, /* EQNSYM_kappa */
215 	{ "lambda", "*l" }, /* EQNSYM_lambda */
216 	{ "mu", "*m" }, /* EQNSYM_mu */
217 	{ "nu", "*n" }, /* EQNSYM_nu */
218 	{ "omega", "*w" }, /* EQNSYM_omega */
219 	{ "omicron", "*o" }, /* EQNSYM_omicron */
220 	{ "phi", "*f" }, /* EQNSYM_phi */
221 	{ "pi", "*p" }, /* EQNSYM_pi */
222 	{ "psi", "*q" }, /* EQNSYM_psi */
223 	{ "rho", "*r" }, /* EQNSYM_rho */
224 	{ "sigma", "*s" }, /* EQNSYM_sigma */
225 	{ "tau", "*t" }, /* EQNSYM_tau */
226 	{ "theta", "*h" }, /* EQNSYM_theta */
227 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
228 	{ "xi", "*c" }, /* EQNSYM_xi */
229 	{ "zeta", "*z" }, /* EQNSYM_zeta */
230 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
231 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
232 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
233 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
234 	{ "PHI", "*F" }, /* EQNSYM_PHI */
235 	{ "PI", "*P" }, /* EQNSYM_PI */
236 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
237 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
238 	{ "THETA", "*H" }, /* EQNSYM_THETA */
239 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
240 	{ "XI", "*C" }, /* EQNSYM_XI */
241 	{ "inter", "ca" }, /* EQNSYM_inter */
242 	{ "union", "cu" }, /* EQNSYM_union */
243 	{ "prod", "product" }, /* EQNSYM_prod */
244 	{ "int", "integral" }, /* EQNSYM_int */
245 	{ "sum", "sum" }, /* EQNSYM_sum */
246 	{ "grad", "gr" }, /* EQNSYM_grad */
247 	{ "del", "gr" }, /* EQNSYM_del */
248 	{ "times", "mu" }, /* EQNSYM_times */
249 	{ "cdot", "pc" }, /* EQNSYM_cdot */
250 	{ "nothing", "&" }, /* EQNSYM_nothing */
251 	{ "approx", "~~" }, /* EQNSYM_approx */
252 	{ "prime", "aq" }, /* EQNSYM_prime */
253 	{ "half", "12" }, /* EQNSYM_half */
254 	{ "partial", "pd" }, /* EQNSYM_partial */
255 	{ "inf", "if" }, /* EQNSYM_inf */
256 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
257 	{ "<<", "<<" }, /* EQNSYM_muchless */
258 	{ "<-", "<-" }, /* EQNSYM_larrow */
259 	{ "->", "->" }, /* EQNSYM_rarrow */
260 	{ "+-", "+-" }, /* EQNSYM_pm */
261 	{ "!=", "!=" }, /* EQNSYM_nequal */
262 	{ "==", "==" }, /* EQNSYM_equiv */
263 	{ "<=", "<=" }, /* EQNSYM_lessequal */
264 	{ ">=", ">=" }, /* EQNSYM_moreequal */
265 };
266 
267 enum rofferr
268 eqn_read(struct eqn_node **epp, int ln,
269 		const char *p, int pos, int *offs)
270 {
271 	size_t		 sz;
272 	struct eqn_node	*ep;
273 	enum rofferr	 er;
274 
275 	ep = *epp;
276 
277 	/*
278 	 * If we're the terminating mark, unset our equation status and
279 	 * validate the full equation.
280 	 */
281 
282 	if (0 == strncmp(p, ".EN", 3)) {
283 		er = eqn_end(epp);
284 		p += 3;
285 		while (' ' == *p || '\t' == *p)
286 			p++;
287 		if ('\0' == *p)
288 			return(er);
289 		mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
290 		    ln, pos, "EN %s", p);
291 		return(er);
292 	}
293 
294 	/*
295 	 * Build up the full string, replacing all newlines with regular
296 	 * whitespace.
297 	 */
298 
299 	sz = strlen(p + pos) + 1;
300 	ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
301 
302 	/* First invocation: nil terminate the string. */
303 
304 	if (0 == ep->sz)
305 		*ep->data = '\0';
306 
307 	ep->sz += sz;
308 	strlcat(ep->data, p + pos, ep->sz + 1);
309 	strlcat(ep->data, " ", ep->sz + 1);
310 	return(ROFF_IGN);
311 }
312 
313 struct eqn_node *
314 eqn_alloc(int pos, int line, struct mparse *parse)
315 {
316 	struct eqn_node	*p;
317 
318 	p = mandoc_calloc(1, sizeof(struct eqn_node));
319 
320 	p->parse = parse;
321 	p->eqn.ln = line;
322 	p->eqn.pos = pos;
323 	p->gsize = EQN_DEFSIZE;
324 
325 	return(p);
326 }
327 
328 /*
329  * Find the key "key" of the give size within our eqn-defined values.
330  */
331 static struct eqn_def *
332 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
333 {
334 	int		 i;
335 
336 	for (i = 0; i < (int)ep->defsz; i++)
337 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
338 		    ep->defs[i].keysz, key, sz))
339 			return(&ep->defs[i]);
340 
341 	return(NULL);
342 }
343 
344 /*
345  * Get the next token from the input stream using the given quote
346  * character.
347  * Optionally make any replacements.
348  */
349 static const char *
350 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
351 {
352 	char		*start, *next;
353 	int		 q, diff, lim;
354 	size_t		 ssz, dummy;
355 	struct eqn_def	*def;
356 
357 	if (NULL == sz)
358 		sz = &dummy;
359 
360 	lim = 0;
361 	ep->rew = ep->cur;
362 again:
363 	/* Prevent self-definitions. */
364 
365 	if (lim >= EQN_NEST_MAX) {
366 		EQN_MSG(MANDOCERR_ROFFLOOP, ep);
367 		return(NULL);
368 	}
369 
370 	ep->cur = ep->rew;
371 	start = &ep->data[(int)ep->cur];
372 	q = 0;
373 
374 	if ('\0' == *start)
375 		return(NULL);
376 
377 	if (quote == *start) {
378 		ep->cur++;
379 		q = 1;
380 	}
381 
382 	start = &ep->data[(int)ep->cur];
383 
384 	if ( ! q) {
385 		if ('{' == *start || '}' == *start)
386 			ssz = 1;
387 		else
388 			ssz = strcspn(start + 1, " ^~\"{}\t") + 1;
389 		next = start + (int)ssz;
390 		if ('\0' == *next)
391 			next = NULL;
392 	} else
393 		next = strchr(start, quote);
394 
395 	if (NULL != next) {
396 		*sz = (size_t)(next - start);
397 		ep->cur += *sz;
398 		if (q)
399 			ep->cur++;
400 		while (' ' == ep->data[(int)ep->cur] ||
401 		    '\t' == ep->data[(int)ep->cur] ||
402 		    '^' == ep->data[(int)ep->cur] ||
403 		    '~' == ep->data[(int)ep->cur])
404 			ep->cur++;
405 	} else {
406 		if (q)
407 			EQN_MSG(MANDOCERR_ARG_QUOTE, ep);
408 		next = strchr(start, '\0');
409 		*sz = (size_t)(next - start);
410 		ep->cur += *sz;
411 	}
412 
413 	/* Quotes aren't expanded for values. */
414 
415 	if (q || ! repl)
416 		return(start);
417 
418 	if (NULL != (def = eqn_def_find(ep, start, *sz))) {
419 		diff = def->valsz - *sz;
420 
421 		if (def->valsz > *sz) {
422 			ep->sz += diff;
423 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
424 			ep->data[ep->sz] = '\0';
425 			start = &ep->data[(int)ep->rew];
426 		}
427 
428 		diff = def->valsz - *sz;
429 		memmove(start + *sz + diff, start + *sz,
430 		    (strlen(start) - *sz) + 1);
431 		memcpy(start, def->val, def->valsz);
432 		goto again;
433 	}
434 
435 	return(start);
436 }
437 
438 /*
439  * Get the next delimited token using the default current quote
440  * character.
441  */
442 static const char *
443 eqn_nexttok(struct eqn_node *ep, size_t *sz)
444 {
445 
446 	return(eqn_next(ep, '"', sz, 1));
447 }
448 
449 /*
450  * Get next token without replacement.
451  */
452 static const char *
453 eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
454 {
455 
456 	return(eqn_next(ep, '"', sz, 0));
457 }
458 
459 /*
460  * Parse a token from the stream of text.
461  * A token consists of one of the recognised eqn(7) strings.
462  * Strings are separated by delimiting marks.
463  * This returns EQN_TOK_EOF when there are no more tokens.
464  * If the token is an unrecognised string literal, then it returns
465  * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated
466  * string.
467  * This must be later freed with free(3).
468  */
469 static enum eqn_tok
470 eqn_tok_parse(struct eqn_node *ep, char **p)
471 {
472 	const char	*start;
473 	size_t		 i, sz;
474 	int		 quoted;
475 
476 	if (NULL != p)
477 		*p = NULL;
478 
479 	quoted = ep->data[ep->cur] == '"';
480 
481 	if (NULL == (start = eqn_nexttok(ep, &sz)))
482 		return(EQN_TOK_EOF);
483 
484 	if (quoted) {
485 		if (p != NULL)
486 			*p = mandoc_strndup(start, sz);
487 		return(EQN_TOK__MAX);
488 	}
489 
490 	for (i = 0; i < EQN_TOK__MAX; i++) {
491 		if (NULL == eqn_toks[i])
492 			continue;
493 		if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i])))
494 			break;
495 	}
496 
497 	if (i == EQN_TOK__MAX && NULL != p)
498 		*p = mandoc_strndup(start, sz);
499 
500 	return(i);
501 }
502 
503 static void
504 eqn_box_free(struct eqn_box *bp)
505 {
506 
507 	if (bp->first)
508 		eqn_box_free(bp->first);
509 	if (bp->next)
510 		eqn_box_free(bp->next);
511 
512 	free(bp->text);
513 	free(bp->left);
514 	free(bp->right);
515 	free(bp->top);
516 	free(bp->bottom);
517 	free(bp);
518 }
519 
520 /*
521  * Allocate a box as the last child of the parent node.
522  */
523 static struct eqn_box *
524 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
525 {
526 	struct eqn_box	*bp;
527 
528 	bp = mandoc_calloc(1, sizeof(struct eqn_box));
529 	bp->parent = parent;
530 	bp->parent->args++;
531 	bp->expectargs = UINT_MAX;
532 	bp->size = ep->gsize;
533 
534 	if (NULL != parent->first) {
535 		parent->last->next = bp;
536 		bp->prev = parent->last;
537 	} else
538 		parent->first = bp;
539 
540 	parent->last = bp;
541 	return(bp);
542 }
543 
544 /*
545  * Reparent the current last node (of the current parent) under a new
546  * EQN_SUBEXPR as the first element.
547  * Then return the new parent.
548  * The new EQN_SUBEXPR will have a two-child limit.
549  */
550 static struct eqn_box *
551 eqn_box_makebinary(struct eqn_node *ep,
552 	enum eqn_post pos, struct eqn_box *parent)
553 {
554 	struct eqn_box	*b, *newb;
555 
556 	assert(NULL != parent->last);
557 	b = parent->last;
558 	if (parent->last == parent->first)
559 		parent->first = NULL;
560 	parent->args--;
561 	parent->last = b->prev;
562 	b->prev = NULL;
563 	newb = eqn_box_alloc(ep, parent);
564 	newb->pos = pos;
565 	newb->type = EQN_SUBEXPR;
566 	newb->expectargs = 2;
567 	newb->args = 1;
568 	newb->first = newb->last = b;
569 	newb->first->next = NULL;
570 	b->parent = newb;
571 	return(newb);
572 }
573 
574 /*
575  * Parse the "delim" control statement.
576  */
577 static void
578 eqn_delim(struct eqn_node *ep)
579 {
580 	const char	*start;
581 	size_t		 sz;
582 
583 	if ((start = eqn_nextrawtok(ep, &sz)) == NULL)
584 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
585 		    ep->eqn.ln, ep->eqn.pos, "delim");
586 	else if (strncmp(start, "off", 3) == 0)
587 		ep->delim = 0;
588 	else if (strncmp(start, "on", 2) == 0) {
589 		if (ep->odelim && ep->cdelim)
590 			ep->delim = 1;
591 	} else if (start[1] != '\0') {
592 		ep->odelim = start[0];
593 		ep->cdelim = start[1];
594 		ep->delim = 1;
595 	}
596 }
597 
598 /*
599  * Undefine a previously-defined string.
600  */
601 static int
602 eqn_undef(struct eqn_node *ep)
603 {
604 	const char	*start;
605 	struct eqn_def	*def;
606 	size_t		 sz;
607 
608 	if (NULL == (start = eqn_nextrawtok(ep, &sz))) {
609 		EQN_MSG(MANDOCERR_EQNEOF, ep);
610 		return(0);
611 	} else if (NULL != (def = eqn_def_find(ep, start, sz)))
612 		def->keysz = 0;
613 
614 	return(1);
615 }
616 
617 static int
618 eqn_def(struct eqn_node *ep)
619 {
620 	const char	*start;
621 	size_t		 sz;
622 	struct eqn_def	*def;
623 	int		 i;
624 
625 	if (NULL == (start = eqn_nextrawtok(ep, &sz))) {
626 		EQN_MSG(MANDOCERR_EQNEOF, ep);
627 		return(0);
628 	}
629 
630 	/*
631 	 * Search for a key that already exists.
632 	 * Create a new key if none is found.
633 	 */
634 	if (NULL == (def = eqn_def_find(ep, start, sz))) {
635 		/* Find holes in string array. */
636 		for (i = 0; i < (int)ep->defsz; i++)
637 			if (0 == ep->defs[i].keysz)
638 				break;
639 
640 		if (i == (int)ep->defsz) {
641 			ep->defsz++;
642 			ep->defs = mandoc_reallocarray(ep->defs,
643 			    ep->defsz, sizeof(struct eqn_def));
644 			ep->defs[i].key = ep->defs[i].val = NULL;
645 		}
646 
647 		ep->defs[i].keysz = sz;
648 		ep->defs[i].key = mandoc_realloc(
649 		    ep->defs[i].key, sz + 1);
650 
651 		memcpy(ep->defs[i].key, start, sz);
652 		ep->defs[i].key[(int)sz] = '\0';
653 		def = &ep->defs[i];
654 	}
655 
656 	start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0);
657 
658 	if (NULL == start) {
659 		EQN_MSG(MANDOCERR_EQNEOF, ep);
660 		return(-1);
661 	}
662 
663 	def->valsz = sz;
664 	def->val = mandoc_realloc(def->val, sz + 1);
665 	memcpy(def->val, start, sz);
666 	def->val[(int)sz] = '\0';
667 	return(1);
668 }
669 
670 /*
671  * Recursively parse an eqn(7) expression.
672  */
673 static int
674 eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
675 {
676 	char		*p;
677 	enum eqn_tok	 tok, subtok;
678 	enum eqn_post	 pos;
679 	struct eqn_box	*cur;
680 	int		 rc, size;
681 	size_t		 i, sz;
682 	char		 sym[64];
683 	const char	*start;
684 
685 	assert(parent != NULL);
686 	if (ep->data == NULL)
687 		return(-1);
688 
689 next_tok:
690 	tok = eqn_tok_parse(ep, &p);
691 
692 this_tok:
693 	switch (tok) {
694 	case (EQN_TOK_UNDEF):
695 		if ((rc = eqn_undef(ep)) <= 0)
696 			return(rc);
697 		break;
698 	case (EQN_TOK_NDEFINE):
699 	case (EQN_TOK_DEFINE):
700 		if ((rc = eqn_def(ep)) <= 0)
701 			return(rc);
702 		break;
703 	case (EQN_TOK_TDEFINE):
704 		if (NULL == eqn_nextrawtok(ep, NULL))
705 			EQN_MSG(MANDOCERR_EQNEOF, ep);
706 		else if (NULL == eqn_next(ep,
707 				ep->data[(int)ep->cur], NULL, 0))
708 			EQN_MSG(MANDOCERR_EQNEOF, ep);
709 		break;
710 	case (EQN_TOK_DELIM):
711 		eqn_delim(ep);
712 		break;
713 	case (EQN_TOK_GFONT):
714 		if (eqn_nextrawtok(ep, NULL) == NULL)
715 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
716 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
717 		break;
718 	case (EQN_TOK_MARK):
719 	case (EQN_TOK_LINEUP):
720 		/* Ignore these. */
721 		break;
722 	case (EQN_TOK_DYAD):
723 	case (EQN_TOK_VEC):
724 	case (EQN_TOK_UNDER):
725 	case (EQN_TOK_BAR):
726 	case (EQN_TOK_TILDE):
727 	case (EQN_TOK_HAT):
728 	case (EQN_TOK_DOT):
729 	case (EQN_TOK_DOTDOT):
730 		if (parent->last == NULL) {
731 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
732 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
733 			cur = eqn_box_alloc(ep, parent);
734 			cur->type = EQN_TEXT;
735 			cur->text = mandoc_strdup("");
736 		}
737 		parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
738 		parent->type = EQN_LISTONE;
739 		parent->expectargs = 1;
740 		switch (tok) {
741 		case (EQN_TOK_DOTDOT):
742 			strlcpy(sym, "\\[ad]", sizeof(sym));
743 			break;
744 		case (EQN_TOK_VEC):
745 			strlcpy(sym, "\\[->]", sizeof(sym));
746 			break;
747 		case (EQN_TOK_DYAD):
748 			strlcpy(sym, "\\[<>]", sizeof(sym));
749 			break;
750 		case (EQN_TOK_TILDE):
751 			strlcpy(sym, "\\[a~]", sizeof(sym));
752 			break;
753 		case (EQN_TOK_UNDER):
754 			strlcpy(sym, "\\[ul]", sizeof(sym));
755 			break;
756 		case (EQN_TOK_BAR):
757 			strlcpy(sym, "\\[rl]", sizeof(sym));
758 			break;
759 		case (EQN_TOK_DOT):
760 			strlcpy(sym, "\\[a.]", sizeof(sym));
761 			break;
762 		case (EQN_TOK_HAT):
763 			strlcpy(sym, "\\[ha]", sizeof(sym));
764 			break;
765 		default:
766 			abort();
767 		}
768 
769 		switch (tok) {
770 		case (EQN_TOK_DOTDOT):
771 		case (EQN_TOK_VEC):
772 		case (EQN_TOK_DYAD):
773 		case (EQN_TOK_TILDE):
774 		case (EQN_TOK_BAR):
775 		case (EQN_TOK_DOT):
776 		case (EQN_TOK_HAT):
777 			parent->top = mandoc_strdup(sym);
778 			break;
779 		case (EQN_TOK_UNDER):
780 			parent->bottom = mandoc_strdup(sym);
781 			break;
782 		default:
783 			abort();
784 		}
785 		parent = parent->parent;
786 		break;
787 	case (EQN_TOK_FWD):
788 	case (EQN_TOK_BACK):
789 	case (EQN_TOK_DOWN):
790 	case (EQN_TOK_UP):
791 		subtok = eqn_tok_parse(ep, NULL);
792 		if (subtok != EQN_TOK__MAX) {
793 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
794 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
795 			tok = subtok;
796 			goto this_tok;
797 		}
798 		break;
799 	case (EQN_TOK_FAT):
800 	case (EQN_TOK_ROMAN):
801 	case (EQN_TOK_ITALIC):
802 	case (EQN_TOK_BOLD):
803 		while (parent->args == parent->expectargs)
804 			parent = parent->parent;
805 		/*
806 		 * These values apply to the next word or sequence of
807 		 * words; thus, we mark that we'll have a child with
808 		 * exactly one of those.
809 		 */
810 		parent = eqn_box_alloc(ep, parent);
811 		parent->type = EQN_LISTONE;
812 		parent->expectargs = 1;
813 		switch (tok) {
814 		case (EQN_TOK_FAT):
815 			parent->font = EQNFONT_FAT;
816 			break;
817 		case (EQN_TOK_ROMAN):
818 			parent->font = EQNFONT_ROMAN;
819 			break;
820 		case (EQN_TOK_ITALIC):
821 			parent->font = EQNFONT_ITALIC;
822 			break;
823 		case (EQN_TOK_BOLD):
824 			parent->font = EQNFONT_BOLD;
825 			break;
826 		default:
827 			abort();
828 		}
829 		break;
830 	case (EQN_TOK_SIZE):
831 	case (EQN_TOK_GSIZE):
832 		/* Accept two values: integral size and a single. */
833 		if (NULL == (start = eqn_nexttok(ep, &sz))) {
834 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
835 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
836 			break;
837 		}
838 		size = mandoc_strntoi(start, sz, 10);
839 		if (-1 == size) {
840 			mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
841 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
842 			break;
843 		}
844 		if (EQN_TOK_GSIZE == tok) {
845 			ep->gsize = size;
846 			break;
847 		}
848 		parent = eqn_box_alloc(ep, parent);
849 		parent->type = EQN_LISTONE;
850 		parent->expectargs = 1;
851 		parent->size = size;
852 		break;
853 	case (EQN_TOK_FROM):
854 	case (EQN_TOK_TO):
855 	case (EQN_TOK_SUB):
856 	case (EQN_TOK_SUP):
857 		/*
858 		 * We have a left-right-associative expression.
859 		 * Repivot under a positional node, open a child scope
860 		 * and keep on reading.
861 		 */
862 		if (parent->last == NULL) {
863 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
864 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
865 			cur = eqn_box_alloc(ep, parent);
866 			cur->type = EQN_TEXT;
867 			cur->text = mandoc_strdup("");
868 		}
869 		/* Handle the "subsup" and "fromto" positions. */
870 		if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
871 			parent->expectargs = 3;
872 			parent->pos = EQNPOS_SUBSUP;
873 			break;
874 		}
875 		if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
876 			parent->expectargs = 3;
877 			parent->pos = EQNPOS_FROMTO;
878 			break;
879 		}
880 		switch (tok) {
881 		case (EQN_TOK_FROM):
882 			pos = EQNPOS_FROM;
883 			break;
884 		case (EQN_TOK_TO):
885 			pos = EQNPOS_TO;
886 			break;
887 		case (EQN_TOK_SUP):
888 			pos = EQNPOS_SUP;
889 			break;
890 		case (EQN_TOK_SUB):
891 			pos = EQNPOS_SUB;
892 			break;
893 		default:
894 			abort();
895 		}
896 		parent = eqn_box_makebinary(ep, pos, parent);
897 		break;
898 	case (EQN_TOK_SQRT):
899 		while (parent->args == parent->expectargs)
900 			parent = parent->parent;
901 		/*
902 		 * Accept a left-right-associative set of arguments just
903 		 * like sub and sup and friends but without rebalancing
904 		 * under a pivot.
905 		 */
906 		parent = eqn_box_alloc(ep, parent);
907 		parent->type = EQN_SUBEXPR;
908 		parent->pos = EQNPOS_SQRT;
909 		parent->expectargs = 1;
910 		break;
911 	case (EQN_TOK_OVER):
912 		/*
913 		 * We have a right-left-associative fraction.
914 		 * Close out anything that's currently open, then
915 		 * rebalance and continue reading.
916 		 */
917 		if (parent->last == NULL) {
918 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
919 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
920 			cur = eqn_box_alloc(ep, parent);
921 			cur->type = EQN_TEXT;
922 			cur->text = mandoc_strdup("");
923 		}
924 		while (EQN_SUBEXPR == parent->type)
925 			parent = parent->parent;
926 		parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
927 		break;
928 	case (EQN_TOK_RIGHT):
929 	case (EQN_TOK_BRACE_CLOSE):
930 		/*
931 		 * Close out the existing brace.
932 		 * FIXME: this is a shitty sentinel: we should really
933 		 * have a native EQN_BRACE type or whatnot.
934 		 */
935 		for (cur = parent; cur != NULL; cur = cur->parent)
936 			if (cur->type == EQN_LIST &&
937 			    (tok == EQN_TOK_BRACE_CLOSE ||
938 			     cur->left != NULL))
939 				break;
940 		if (cur == NULL) {
941 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
942 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
943 			break;
944 		}
945 		parent = cur;
946 		if (EQN_TOK_RIGHT == tok) {
947 			if (NULL == (start = eqn_nexttok(ep, &sz))) {
948 				mandoc_msg(MANDOCERR_REQ_EMPTY,
949 				    ep->parse, ep->eqn.ln,
950 				    ep->eqn.pos, eqn_toks[tok]);
951 				break;
952 			}
953 			/* Handling depends on right/left. */
954 			if (STRNEQ(start, sz, "ceiling", 7)) {
955 				strlcpy(sym, "\\[rc]", sizeof(sym));
956 				parent->right = mandoc_strdup(sym);
957 			} else if (STRNEQ(start, sz, "floor", 5)) {
958 				strlcpy(sym, "\\[rf]", sizeof(sym));
959 				parent->right = mandoc_strdup(sym);
960 			} else
961 				parent->right = mandoc_strndup(start, sz);
962 		}
963 		parent = parent->parent;
964 		if (EQN_TOK_BRACE_CLOSE == tok && parent &&
965 		    (parent->type == EQN_PILE ||
966 		     parent->type == EQN_MATRIX))
967 			parent = parent->parent;
968 		/* Close out any "singleton" lists. */
969 		while (parent->type == EQN_LISTONE &&
970 		    parent->args == parent->expectargs)
971 			parent = parent->parent;
972 		break;
973 	case (EQN_TOK_BRACE_OPEN):
974 	case (EQN_TOK_LEFT):
975 		/*
976 		 * If we already have something in the stack and we're
977 		 * in an expression, then rewind til we're not any more
978 		 * (just like with the text node).
979 		 */
980 		while (parent->args == parent->expectargs)
981 			parent = parent->parent;
982 		if (EQN_TOK_LEFT == tok &&
983 		    (start = eqn_nexttok(ep, &sz)) == NULL) {
984 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
985 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
986 			break;
987 		}
988 		parent = eqn_box_alloc(ep, parent);
989 		parent->type = EQN_LIST;
990 		if (EQN_TOK_LEFT == tok) {
991 			if (STRNEQ(start, sz, "ceiling", 7)) {
992 				strlcpy(sym, "\\[lc]", sizeof(sym));
993 				parent->left = mandoc_strdup(sym);
994 			} else if (STRNEQ(start, sz, "floor", 5)) {
995 				strlcpy(sym, "\\[lf]", sizeof(sym));
996 				parent->left = mandoc_strdup(sym);
997 			} else
998 				parent->left = mandoc_strndup(start, sz);
999 		}
1000 		break;
1001 	case (EQN_TOK_PILE):
1002 	case (EQN_TOK_LPILE):
1003 	case (EQN_TOK_RPILE):
1004 	case (EQN_TOK_CPILE):
1005 	case (EQN_TOK_CCOL):
1006 	case (EQN_TOK_LCOL):
1007 	case (EQN_TOK_RCOL):
1008 		while (parent->args == parent->expectargs)
1009 			parent = parent->parent;
1010 		parent = eqn_box_alloc(ep, parent);
1011 		parent->type = EQN_PILE;
1012 		parent->expectargs = 1;
1013 		break;
1014 	case (EQN_TOK_ABOVE):
1015 		for (cur = parent; cur != NULL; cur = cur->parent)
1016 			if (cur->type == EQN_PILE)
1017 				break;
1018 		if (cur == NULL) {
1019 			mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
1020 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1021 			break;
1022 		}
1023 		parent = eqn_box_alloc(ep, cur);
1024 		parent->type = EQN_LIST;
1025 		break;
1026 	case (EQN_TOK_MATRIX):
1027 		while (parent->args == parent->expectargs)
1028 			parent = parent->parent;
1029 		parent = eqn_box_alloc(ep, parent);
1030 		parent->type = EQN_MATRIX;
1031 		parent->expectargs = 1;
1032 		break;
1033 	case (EQN_TOK_EOF):
1034 		/*
1035 		 * End of file!
1036 		 * TODO: make sure we're not in an open subexpression.
1037 		 */
1038 		return(0);
1039 	default:
1040 		assert(tok == EQN_TOK__MAX);
1041 		assert(NULL != p);
1042 		/*
1043 		 * If we already have something in the stack and we're
1044 		 * in an expression, then rewind til we're not any more.
1045 		 */
1046 		while (parent->args == parent->expectargs)
1047 			parent = parent->parent;
1048 		cur = eqn_box_alloc(ep, parent);
1049 		cur->type = EQN_TEXT;
1050 		for (i = 0; i < EQNSYM__MAX; i++)
1051 			if (0 == strcmp(eqnsyms[i].str, p)) {
1052 				(void)snprintf(sym, sizeof(sym),
1053 					"\\[%s]", eqnsyms[i].sym);
1054 				cur->text = mandoc_strdup(sym);
1055 				free(p);
1056 				break;
1057 			}
1058 
1059 		if (i == EQNSYM__MAX)
1060 			cur->text = p;
1061 		/*
1062 		 * Post-process list status.
1063 		 */
1064 		while (parent->type == EQN_LISTONE &&
1065 		    parent->args == parent->expectargs)
1066 			parent = parent->parent;
1067 		break;
1068 	}
1069 	goto next_tok;
1070 }
1071 
1072 enum rofferr
1073 eqn_end(struct eqn_node **epp)
1074 {
1075 	struct eqn_node	*ep;
1076 
1077 	ep = *epp;
1078 	*epp = NULL;
1079 
1080 	ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
1081 	ep->eqn.root->expectargs = UINT_MAX;
1082 	return(0 == eqn_parse(ep, ep->eqn.root) ? ROFF_EQN : ROFF_IGN);
1083 }
1084 
1085 void
1086 eqn_free(struct eqn_node *p)
1087 {
1088 	int		 i;
1089 
1090 	eqn_box_free(p->eqn.root);
1091 
1092 	for (i = 0; i < (int)p->defsz; i++) {
1093 		free(p->defs[i].key);
1094 		free(p->defs[i].val);
1095 	}
1096 
1097 	free(p->data);
1098 	free(p->defs);
1099 	free(p);
1100 }
1101