xref: /dragonfly/contrib/mdocml/mdoc.c (revision 89a89091)
1 /*	$Id: mdoc.c,v 1.188 2011/03/28 23:52:13 kristaps Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <time.h>
30 
31 #include "mdoc.h"
32 #include "mandoc.h"
33 #include "libmdoc.h"
34 #include "libmandoc.h"
35 
36 const	char *const __mdoc_macronames[MDOC_MAX] = {
37 	"Ap",		"Dd",		"Dt",		"Os",
38 	"Sh",		"Ss",		"Pp",		"D1",
39 	"Dl",		"Bd",		"Ed",		"Bl",
40 	"El",		"It",		"Ad",		"An",
41 	"Ar",		"Cd",		"Cm",		"Dv",
42 	"Er",		"Ev",		"Ex",		"Fa",
43 	"Fd",		"Fl",		"Fn",		"Ft",
44 	"Ic",		"In",		"Li",		"Nd",
45 	"Nm",		"Op",		"Ot",		"Pa",
46 	"Rv",		"St",		"Va",		"Vt",
47 	/* LINTED */
48 	"Xr",		"%A",		"%B",		"%D",
49 	/* LINTED */
50 	"%I",		"%J",		"%N",		"%O",
51 	/* LINTED */
52 	"%P",		"%R",		"%T",		"%V",
53 	"Ac",		"Ao",		"Aq",		"At",
54 	"Bc",		"Bf",		"Bo",		"Bq",
55 	"Bsx",		"Bx",		"Db",		"Dc",
56 	"Do",		"Dq",		"Ec",		"Ef",
57 	"Em",		"Eo",		"Fx",		"Ms",
58 	"No",		"Ns",		"Nx",		"Ox",
59 	"Pc",		"Pf",		"Po",		"Pq",
60 	"Qc",		"Ql",		"Qo",		"Qq",
61 	"Re",		"Rs",		"Sc",		"So",
62 	"Sq",		"Sm",		"Sx",		"Sy",
63 	"Tn",		"Ux",		"Xc",		"Xo",
64 	"Fo",		"Fc",		"Oo",		"Oc",
65 	"Bk",		"Ek",		"Bt",		"Hf",
66 	"Fr",		"Ud",		"Lb",		"Lp",
67 	"Lk",		"Mt",		"Brq",		"Bro",
68 	/* LINTED */
69 	"Brc",		"%C",		"Es",		"En",
70 	/* LINTED */
71 	"Dx",		"%Q",		"br",		"sp",
72 	/* LINTED */
73 	"%U",		"Ta"
74 	};
75 
76 const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
77 	"split",		"nosplit",		"ragged",
78 	"unfilled",		"literal",		"file",
79 	"offset",		"bullet",		"dash",
80 	"hyphen",		"item",			"enum",
81 	"tag",			"diag",			"hang",
82 	"ohang",		"inset",		"column",
83 	"width",		"compact",		"std",
84 	"filled",		"words",		"emphasis",
85 	"symbolic",		"nested",		"centered"
86 	};
87 
88 const	char * const *mdoc_macronames = __mdoc_macronames;
89 const	char * const *mdoc_argnames = __mdoc_argnames;
90 
91 static	void		  mdoc_node_free(struct mdoc_node *);
92 static	void		  mdoc_node_unlink(struct mdoc *,
93 				struct mdoc_node *);
94 static	void		  mdoc_free1(struct mdoc *);
95 static	void		  mdoc_alloc1(struct mdoc *);
96 static	struct mdoc_node *node_alloc(struct mdoc *, int, int,
97 				enum mdoct, enum mdoc_type);
98 static	int		  node_append(struct mdoc *,
99 				struct mdoc_node *);
100 static	int		  mdoc_ptext(struct mdoc *, int, char *, int);
101 static	int		  mdoc_pmacro(struct mdoc *, int, char *, int);
102 
103 const struct mdoc_node *
104 mdoc_node(const struct mdoc *m)
105 {
106 
107 	assert( ! (MDOC_HALT & m->flags));
108 	return(m->first);
109 }
110 
111 
112 const struct mdoc_meta *
113 mdoc_meta(const struct mdoc *m)
114 {
115 
116 	assert( ! (MDOC_HALT & m->flags));
117 	return(&m->meta);
118 }
119 
120 
121 /*
122  * Frees volatile resources (parse tree, meta-data, fields).
123  */
124 static void
125 mdoc_free1(struct mdoc *mdoc)
126 {
127 
128 	if (mdoc->first)
129 		mdoc_node_delete(mdoc, mdoc->first);
130 	if (mdoc->meta.title)
131 		free(mdoc->meta.title);
132 	if (mdoc->meta.os)
133 		free(mdoc->meta.os);
134 	if (mdoc->meta.name)
135 		free(mdoc->meta.name);
136 	if (mdoc->meta.arch)
137 		free(mdoc->meta.arch);
138 	if (mdoc->meta.vol)
139 		free(mdoc->meta.vol);
140 	if (mdoc->meta.msec)
141 		free(mdoc->meta.msec);
142 	if (mdoc->meta.date)
143 		free(mdoc->meta.date);
144 }
145 
146 
147 /*
148  * Allocate all volatile resources (parse tree, meta-data, fields).
149  */
150 static void
151 mdoc_alloc1(struct mdoc *mdoc)
152 {
153 
154 	memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
155 	mdoc->flags = 0;
156 	mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
157 	mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
158 	mdoc->first = mdoc->last;
159 	mdoc->last->type = MDOC_ROOT;
160 	mdoc->next = MDOC_NEXT_CHILD;
161 }
162 
163 
164 /*
165  * Free up volatile resources (see mdoc_free1()) then re-initialises the
166  * data with mdoc_alloc1().  After invocation, parse data has been reset
167  * and the parser is ready for re-invocation on a new tree; however,
168  * cross-parse non-volatile data is kept intact.
169  */
170 void
171 mdoc_reset(struct mdoc *mdoc)
172 {
173 
174 	mdoc_free1(mdoc);
175 	mdoc_alloc1(mdoc);
176 }
177 
178 
179 /*
180  * Completely free up all volatile and non-volatile parse resources.
181  * After invocation, the pointer is no longer usable.
182  */
183 void
184 mdoc_free(struct mdoc *mdoc)
185 {
186 
187 	mdoc_free1(mdoc);
188 	free(mdoc);
189 }
190 
191 
192 /*
193  * Allocate volatile and non-volatile parse resources.
194  */
195 struct mdoc *
196 mdoc_alloc(struct regset *regs, struct mparse *parse)
197 {
198 	struct mdoc	*p;
199 
200 	p = mandoc_calloc(1, sizeof(struct mdoc));
201 
202 	p->parse = parse;
203 	p->regs = regs;
204 
205 	mdoc_hash_init();
206 	mdoc_alloc1(p);
207 	return(p);
208 }
209 
210 
211 /*
212  * Climb back up the parse tree, validating open scopes.  Mostly calls
213  * through to macro_end() in macro.c.
214  */
215 int
216 mdoc_endparse(struct mdoc *m)
217 {
218 
219 	assert( ! (MDOC_HALT & m->flags));
220 	if (mdoc_macroend(m))
221 		return(1);
222 	m->flags |= MDOC_HALT;
223 	return(0);
224 }
225 
226 int
227 mdoc_addeqn(struct mdoc *m, const struct eqn *ep)
228 {
229 	struct mdoc_node *n;
230 
231 	assert( ! (MDOC_HALT & m->flags));
232 
233 	/* No text before an initial macro. */
234 
235 	if (SEC_NONE == m->lastnamed) {
236 		mdoc_pmsg(m, ep->line, ep->pos, MANDOCERR_NOTEXT);
237 		return(1);
238 	}
239 
240 	n = node_alloc(m, ep->line, ep->pos, MDOC_MAX, MDOC_EQN);
241 	n->eqn = ep;
242 
243 	if ( ! node_append(m, n))
244 		return(0);
245 
246 	m->next = MDOC_NEXT_SIBLING;
247 	return(1);
248 }
249 
250 int
251 mdoc_addspan(struct mdoc *m, const struct tbl_span *sp)
252 {
253 	struct mdoc_node *n;
254 
255 	assert( ! (MDOC_HALT & m->flags));
256 
257 	/* No text before an initial macro. */
258 
259 	if (SEC_NONE == m->lastnamed) {
260 		mdoc_pmsg(m, sp->line, 0, MANDOCERR_NOTEXT);
261 		return(1);
262 	}
263 
264 	n = node_alloc(m, sp->line, 0, MDOC_MAX, MDOC_TBL);
265 	n->span = sp;
266 
267 	if ( ! node_append(m, n))
268 		return(0);
269 
270 	m->next = MDOC_NEXT_SIBLING;
271 	return(1);
272 }
273 
274 
275 /*
276  * Main parse routine.  Parses a single line -- really just hands off to
277  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
278  */
279 int
280 mdoc_parseln(struct mdoc *m, int ln, char *buf, int offs)
281 {
282 
283 	assert( ! (MDOC_HALT & m->flags));
284 
285 	m->flags |= MDOC_NEWLINE;
286 
287 	/*
288 	 * Let the roff nS register switch SYNOPSIS mode early,
289 	 * such that the parser knows at all times
290 	 * whether this mode is on or off.
291 	 * Note that this mode is also switched by the Sh macro.
292 	 */
293 	if (m->regs->regs[(int)REG_nS].set) {
294 		if (m->regs->regs[(int)REG_nS].v.u)
295 			m->flags |= MDOC_SYNOPSIS;
296 		else
297 			m->flags &= ~MDOC_SYNOPSIS;
298 	}
299 
300 	return(mandoc_getcontrol(buf, &offs) ?
301 			mdoc_pmacro(m, ln, buf, offs) :
302 			mdoc_ptext(m, ln, buf, offs));
303 }
304 
305 int
306 mdoc_macro(MACRO_PROT_ARGS)
307 {
308 	assert(tok < MDOC_MAX);
309 
310 	/* If we're in the body, deny prologue calls. */
311 
312 	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
313 			MDOC_PBODY & m->flags) {
314 		mdoc_pmsg(m, line, ppos, MANDOCERR_BADBODY);
315 		return(1);
316 	}
317 
318 	/* If we're in the prologue, deny "body" macros.  */
319 
320 	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
321 			! (MDOC_PBODY & m->flags)) {
322 		mdoc_pmsg(m, line, ppos, MANDOCERR_BADPROLOG);
323 		if (NULL == m->meta.msec)
324 			m->meta.msec = mandoc_strdup("1");
325 		if (NULL == m->meta.title)
326 			m->meta.title = mandoc_strdup("UNKNOWN");
327 		if (NULL == m->meta.vol)
328 			m->meta.vol = mandoc_strdup("LOCAL");
329 		if (NULL == m->meta.os)
330 			m->meta.os = mandoc_strdup("LOCAL");
331 		if (NULL == m->meta.date)
332 			m->meta.date = mandoc_normdate
333 				(m->parse, NULL, line, ppos);
334 		m->flags |= MDOC_PBODY;
335 	}
336 
337 	return((*mdoc_macros[tok].fp)(m, tok, line, ppos, pos, buf));
338 }
339 
340 
341 static int
342 node_append(struct mdoc *mdoc, struct mdoc_node *p)
343 {
344 
345 	assert(mdoc->last);
346 	assert(mdoc->first);
347 	assert(MDOC_ROOT != p->type);
348 
349 	switch (mdoc->next) {
350 	case (MDOC_NEXT_SIBLING):
351 		mdoc->last->next = p;
352 		p->prev = mdoc->last;
353 		p->parent = mdoc->last->parent;
354 		break;
355 	case (MDOC_NEXT_CHILD):
356 		mdoc->last->child = p;
357 		p->parent = mdoc->last;
358 		break;
359 	default:
360 		abort();
361 		/* NOTREACHED */
362 	}
363 
364 	p->parent->nchild++;
365 
366 	/*
367 	 * Copy over the normalised-data pointer of our parent.  Not
368 	 * everybody has one, but copying a null pointer is fine.
369 	 */
370 
371 	switch (p->type) {
372 	case (MDOC_BODY):
373 		/* FALLTHROUGH */
374 	case (MDOC_TAIL):
375 		/* FALLTHROUGH */
376 	case (MDOC_HEAD):
377 		p->norm = p->parent->norm;
378 		break;
379 	default:
380 		break;
381 	}
382 
383 	if ( ! mdoc_valid_pre(mdoc, p))
384 		return(0);
385 
386 	switch (p->type) {
387 	case (MDOC_HEAD):
388 		assert(MDOC_BLOCK == p->parent->type);
389 		p->parent->head = p;
390 		break;
391 	case (MDOC_TAIL):
392 		assert(MDOC_BLOCK == p->parent->type);
393 		p->parent->tail = p;
394 		break;
395 	case (MDOC_BODY):
396 		if (p->end)
397 			break;
398 		assert(MDOC_BLOCK == p->parent->type);
399 		p->parent->body = p;
400 		break;
401 	default:
402 		break;
403 	}
404 
405 	mdoc->last = p;
406 
407 	switch (p->type) {
408 	case (MDOC_TBL):
409 		/* FALLTHROUGH */
410 	case (MDOC_TEXT):
411 		if ( ! mdoc_valid_post(mdoc))
412 			return(0);
413 		break;
414 	default:
415 		break;
416 	}
417 
418 	return(1);
419 }
420 
421 
422 static struct mdoc_node *
423 node_alloc(struct mdoc *m, int line, int pos,
424 		enum mdoct tok, enum mdoc_type type)
425 {
426 	struct mdoc_node *p;
427 
428 	p = mandoc_calloc(1, sizeof(struct mdoc_node));
429 	p->sec = m->lastsec;
430 	p->line = line;
431 	p->pos = pos;
432 	p->tok = tok;
433 	p->type = type;
434 
435 	/* Flag analysis. */
436 
437 	if (MDOC_SYNOPSIS & m->flags)
438 		p->flags |= MDOC_SYNPRETTY;
439 	else
440 		p->flags &= ~MDOC_SYNPRETTY;
441 	if (MDOC_NEWLINE & m->flags)
442 		p->flags |= MDOC_LINE;
443 	m->flags &= ~MDOC_NEWLINE;
444 
445 	return(p);
446 }
447 
448 
449 int
450 mdoc_tail_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
451 {
452 	struct mdoc_node *p;
453 
454 	p = node_alloc(m, line, pos, tok, MDOC_TAIL);
455 	if ( ! node_append(m, p))
456 		return(0);
457 	m->next = MDOC_NEXT_CHILD;
458 	return(1);
459 }
460 
461 
462 int
463 mdoc_head_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
464 {
465 	struct mdoc_node *p;
466 
467 	assert(m->first);
468 	assert(m->last);
469 
470 	p = node_alloc(m, line, pos, tok, MDOC_HEAD);
471 	if ( ! node_append(m, p))
472 		return(0);
473 	m->next = MDOC_NEXT_CHILD;
474 	return(1);
475 }
476 
477 
478 int
479 mdoc_body_alloc(struct mdoc *m, int line, int pos, enum mdoct tok)
480 {
481 	struct mdoc_node *p;
482 
483 	p = node_alloc(m, line, pos, tok, MDOC_BODY);
484 	if ( ! node_append(m, p))
485 		return(0);
486 	m->next = MDOC_NEXT_CHILD;
487 	return(1);
488 }
489 
490 
491 int
492 mdoc_endbody_alloc(struct mdoc *m, int line, int pos, enum mdoct tok,
493 		struct mdoc_node *body, enum mdoc_endbody end)
494 {
495 	struct mdoc_node *p;
496 
497 	p = node_alloc(m, line, pos, tok, MDOC_BODY);
498 	p->pending = body;
499 	p->end = end;
500 	if ( ! node_append(m, p))
501 		return(0);
502 	m->next = MDOC_NEXT_SIBLING;
503 	return(1);
504 }
505 
506 
507 int
508 mdoc_block_alloc(struct mdoc *m, int line, int pos,
509 		enum mdoct tok, struct mdoc_arg *args)
510 {
511 	struct mdoc_node *p;
512 
513 	p = node_alloc(m, line, pos, tok, MDOC_BLOCK);
514 	p->args = args;
515 	if (p->args)
516 		(args->refcnt)++;
517 
518 	switch (tok) {
519 	case (MDOC_Bd):
520 		/* FALLTHROUGH */
521 	case (MDOC_Bf):
522 		/* FALLTHROUGH */
523 	case (MDOC_Bl):
524 		/* FALLTHROUGH */
525 	case (MDOC_Rs):
526 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
527 		break;
528 	default:
529 		break;
530 	}
531 
532 	if ( ! node_append(m, p))
533 		return(0);
534 	m->next = MDOC_NEXT_CHILD;
535 	return(1);
536 }
537 
538 
539 int
540 mdoc_elem_alloc(struct mdoc *m, int line, int pos,
541 		enum mdoct tok, struct mdoc_arg *args)
542 {
543 	struct mdoc_node *p;
544 
545 	p = node_alloc(m, line, pos, tok, MDOC_ELEM);
546 	p->args = args;
547 	if (p->args)
548 		(args->refcnt)++;
549 
550 	switch (tok) {
551 	case (MDOC_An):
552 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
553 		break;
554 	default:
555 		break;
556 	}
557 
558 	if ( ! node_append(m, p))
559 		return(0);
560 	m->next = MDOC_NEXT_CHILD;
561 	return(1);
562 }
563 
564 int
565 mdoc_word_alloc(struct mdoc *m, int line, int pos, const char *p)
566 {
567 	struct mdoc_node *n;
568 	size_t		  sv, len;
569 
570 	len = strlen(p);
571 
572 	n = node_alloc(m, line, pos, MDOC_MAX, MDOC_TEXT);
573 	n->string = mandoc_malloc(len + 1);
574 	sv = strlcpy(n->string, p, len + 1);
575 
576 	/* Prohibit truncation. */
577 	assert(sv < len + 1);
578 
579 	if ( ! node_append(m, n))
580 		return(0);
581 
582 	m->next = MDOC_NEXT_SIBLING;
583 	return(1);
584 }
585 
586 
587 static void
588 mdoc_node_free(struct mdoc_node *p)
589 {
590 
591 	if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type)
592 		free(p->norm);
593 	if (p->string)
594 		free(p->string);
595 	if (p->args)
596 		mdoc_argv_free(p->args);
597 	free(p);
598 }
599 
600 
601 static void
602 mdoc_node_unlink(struct mdoc *m, struct mdoc_node *n)
603 {
604 
605 	/* Adjust siblings. */
606 
607 	if (n->prev)
608 		n->prev->next = n->next;
609 	if (n->next)
610 		n->next->prev = n->prev;
611 
612 	/* Adjust parent. */
613 
614 	if (n->parent) {
615 		n->parent->nchild--;
616 		if (n->parent->child == n)
617 			n->parent->child = n->prev ? n->prev : n->next;
618 		if (n->parent->last == n)
619 			n->parent->last = n->prev ? n->prev : NULL;
620 	}
621 
622 	/* Adjust parse point, if applicable. */
623 
624 	if (m && m->last == n) {
625 		if (n->prev) {
626 			m->last = n->prev;
627 			m->next = MDOC_NEXT_SIBLING;
628 		} else {
629 			m->last = n->parent;
630 			m->next = MDOC_NEXT_CHILD;
631 		}
632 	}
633 
634 	if (m && m->first == n)
635 		m->first = NULL;
636 }
637 
638 
639 void
640 mdoc_node_delete(struct mdoc *m, struct mdoc_node *p)
641 {
642 
643 	while (p->child) {
644 		assert(p->nchild);
645 		mdoc_node_delete(m, p->child);
646 	}
647 	assert(0 == p->nchild);
648 
649 	mdoc_node_unlink(m, p);
650 	mdoc_node_free(p);
651 }
652 
653 
654 /*
655  * Parse free-form text, that is, a line that does not begin with the
656  * control character.
657  */
658 static int
659 mdoc_ptext(struct mdoc *m, int line, char *buf, int offs)
660 {
661 	char		 *c, *ws, *end;
662 	struct mdoc_node *n;
663 
664 	/* No text before an initial macro. */
665 
666 	if (SEC_NONE == m->lastnamed) {
667 		mdoc_pmsg(m, line, offs, MANDOCERR_NOTEXT);
668 		return(1);
669 	}
670 
671 	assert(m->last);
672 	n = m->last;
673 
674 	/*
675 	 * Divert directly to list processing if we're encountering a
676 	 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry
677 	 * (a MDOC_BODY means it's already open, in which case we should
678 	 * process within its context in the normal way).
679 	 */
680 
681 	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
682 			LIST_column == n->norm->Bl.type) {
683 		/* `Bl' is open without any children. */
684 		m->flags |= MDOC_FREECOL;
685 		return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf));
686 	}
687 
688 	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
689 			NULL != n->parent &&
690 			MDOC_Bl == n->parent->tok &&
691 			LIST_column == n->parent->norm->Bl.type) {
692 		/* `Bl' has block-level `It' children. */
693 		m->flags |= MDOC_FREECOL;
694 		return(mdoc_macro(m, MDOC_It, line, offs, &offs, buf));
695 	}
696 
697 	/*
698 	 * Search for the beginning of unescaped trailing whitespace (ws)
699 	 * and for the first character not to be output (end).
700 	 */
701 
702 	/* FIXME: replace with strcspn(). */
703 	ws = NULL;
704 	for (c = end = buf + offs; *c; c++) {
705 		switch (*c) {
706 		case '-':
707 			if (mandoc_hyph(buf + offs, c))
708 				*c = ASCII_HYPH;
709 			ws = NULL;
710 			break;
711 		case ' ':
712 			if (NULL == ws)
713 				ws = c;
714 			continue;
715 		case '\t':
716 			/*
717 			 * Always warn about trailing tabs,
718 			 * even outside literal context,
719 			 * where they should be put on the next line.
720 			 */
721 			if (NULL == ws)
722 				ws = c;
723 			/*
724 			 * Strip trailing tabs in literal context only;
725 			 * outside, they affect the next line.
726 			 */
727 			if (MDOC_LITERAL & m->flags)
728 				continue;
729 			break;
730 		case '\\':
731 			/* Skip the escaped character, too, if any. */
732 			if (c[1])
733 				c++;
734 			/* FALLTHROUGH */
735 		default:
736 			ws = NULL;
737 			break;
738 		}
739 		end = c + 1;
740 	}
741 	*end = '\0';
742 
743 	if (ws)
744 		mdoc_pmsg(m, line, (int)(ws-buf), MANDOCERR_EOLNSPACE);
745 
746 	if ('\0' == buf[offs] && ! (MDOC_LITERAL & m->flags)) {
747 		mdoc_pmsg(m, line, (int)(c-buf), MANDOCERR_NOBLANKLN);
748 
749 		/*
750 		 * Insert a `sp' in the case of a blank line.  Technically,
751 		 * blank lines aren't allowed, but enough manuals assume this
752 		 * behaviour that we want to work around it.
753 		 */
754 		if ( ! mdoc_elem_alloc(m, line, offs, MDOC_sp, NULL))
755 			return(0);
756 
757 		m->next = MDOC_NEXT_SIBLING;
758 		return(1);
759 	}
760 
761 	if ( ! mdoc_word_alloc(m, line, offs, buf+offs))
762 		return(0);
763 
764 	if (MDOC_LITERAL & m->flags)
765 		return(1);
766 
767 	/*
768 	 * End-of-sentence check.  If the last character is an unescaped
769 	 * EOS character, then flag the node as being the end of a
770 	 * sentence.  The front-end will know how to interpret this.
771 	 */
772 
773 	assert(buf < end);
774 
775 	if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0))
776 		m->last->flags |= MDOC_EOS;
777 
778 	return(1);
779 }
780 
781 
782 /*
783  * Parse a macro line, that is, a line beginning with the control
784  * character.
785  */
786 static int
787 mdoc_pmacro(struct mdoc *m, int ln, char *buf, int offs)
788 {
789 	enum mdoct	  tok;
790 	int		  i, sv;
791 	char		  mac[5];
792 	struct mdoc_node *n;
793 
794 	/* Empty post-control lines are ignored. */
795 
796 	if ('"' == buf[offs]) {
797 		mdoc_pmsg(m, ln, offs, MANDOCERR_BADCOMMENT);
798 		return(1);
799 	} else if ('\0' == buf[offs])
800 		return(1);
801 
802 	sv = offs;
803 
804 	/*
805 	 * Copy the first word into a nil-terminated buffer.
806 	 * Stop copying when a tab, space, or eoln is encountered.
807 	 */
808 
809 	i = 0;
810 	while (i < 4 && '\0' != buf[offs] &&
811 			' ' != buf[offs] && '\t' != buf[offs])
812 		mac[i++] = buf[offs++];
813 
814 	mac[i] = '\0';
815 
816 	tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX;
817 
818 	if (MDOC_MAX == tok) {
819 		mandoc_vmsg(MANDOCERR_MACRO, m->parse,
820 				ln, sv, "%s", buf + sv - 1);
821 		return(1);
822 	}
823 
824 	/* Disregard the first trailing tab, if applicable. */
825 
826 	if ('\t' == buf[offs])
827 		offs++;
828 
829 	/* Jump to the next non-whitespace word. */
830 
831 	while (buf[offs] && ' ' == buf[offs])
832 		offs++;
833 
834 	/*
835 	 * Trailing whitespace.  Note that tabs are allowed to be passed
836 	 * into the parser as "text", so we only warn about spaces here.
837 	 */
838 
839 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
840 		mdoc_pmsg(m, ln, offs - 1, MANDOCERR_EOLNSPACE);
841 
842 	/*
843 	 * If an initial macro or a list invocation, divert directly
844 	 * into macro processing.
845 	 */
846 
847 	if (NULL == m->last || MDOC_It == tok || MDOC_El == tok) {
848 		if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf))
849 			goto err;
850 		return(1);
851 	}
852 
853 	n = m->last;
854 	assert(m->last);
855 
856 	/*
857 	 * If the first macro of a `Bl -column', open an `It' block
858 	 * context around the parsed macro.
859 	 */
860 
861 	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
862 			LIST_column == n->norm->Bl.type) {
863 		m->flags |= MDOC_FREECOL;
864 		if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf))
865 			goto err;
866 		return(1);
867 	}
868 
869 	/*
870 	 * If we're following a block-level `It' within a `Bl -column'
871 	 * context (perhaps opened in the above block or in ptext()),
872 	 * then open an `It' block context around the parsed macro.
873 	 */
874 
875 	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
876 			NULL != n->parent &&
877 			MDOC_Bl == n->parent->tok &&
878 			LIST_column == n->parent->norm->Bl.type) {
879 		m->flags |= MDOC_FREECOL;
880 		if ( ! mdoc_macro(m, MDOC_It, ln, sv, &sv, buf))
881 			goto err;
882 		return(1);
883 	}
884 
885 	/* Normal processing of a macro. */
886 
887 	if ( ! mdoc_macro(m, tok, ln, sv, &offs, buf))
888 		goto err;
889 
890 	return(1);
891 
892 err:	/* Error out. */
893 
894 	m->flags |= MDOC_HALT;
895 	return(0);
896 }
897 
898 enum mdelim
899 mdoc_isdelim(const char *p)
900 {
901 
902 	if ('\0' == p[0])
903 		return(DELIM_NONE);
904 
905 	if ('\0' == p[1])
906 		switch (p[0]) {
907 		case('('):
908 			/* FALLTHROUGH */
909 		case('['):
910 			return(DELIM_OPEN);
911 		case('|'):
912 			return(DELIM_MIDDLE);
913 		case('.'):
914 			/* FALLTHROUGH */
915 		case(','):
916 			/* FALLTHROUGH */
917 		case(';'):
918 			/* FALLTHROUGH */
919 		case(':'):
920 			/* FALLTHROUGH */
921 		case('?'):
922 			/* FALLTHROUGH */
923 		case('!'):
924 			/* FALLTHROUGH */
925 		case(')'):
926 			/* FALLTHROUGH */
927 		case(']'):
928 			return(DELIM_CLOSE);
929 		default:
930 			return(DELIM_NONE);
931 		}
932 
933 	if ('\\' != p[0])
934 		return(DELIM_NONE);
935 
936 	if (0 == strcmp(p + 1, "."))
937 		return(DELIM_CLOSE);
938 	if (0 == strcmp(p + 1, "*(Ba"))
939 		return(DELIM_MIDDLE);
940 
941 	return(DELIM_NONE);
942 }
943