xref: /dragonfly/contrib/mdocml/man.c (revision 0fe46dc6)
1 /*	$Id: man.c,v 1.137 2014/08/01 21:24:17 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 
23 #include <sys/types.h>
24 
25 #include <assert.h>
26 #include <ctype.h>
27 #include <stdarg.h>
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 
32 #include "man.h"
33 #include "mandoc.h"
34 #include "mandoc_aux.h"
35 #include "libman.h"
36 #include "libmandoc.h"
37 
38 const	char *const __man_macronames[MAN_MAX] = {
39 	"br",		"TH",		"SH",		"SS",
40 	"TP",		"LP",		"PP",		"P",
41 	"IP",		"HP",		"SM",		"SB",
42 	"BI",		"IB",		"BR",		"RB",
43 	"R",		"B",		"I",		"IR",
44 	"RI",		"na",		"sp",		"nf",
45 	"fi",		"RE",		"RS",		"DT",
46 	"UC",		"PD",		"AT",		"in",
47 	"ft",		"OP",		"EX",		"EE",
48 	"UR",		"UE",		"ll"
49 	};
50 
51 const	char * const *man_macronames = __man_macronames;
52 
53 static	struct man_node	*man_node_alloc(struct man *, int, int,
54 				enum man_type, enum mant);
55 static	int		 man_node_append(struct man *,
56 				struct man_node *);
57 static	void		 man_node_free(struct man_node *);
58 static	void		 man_node_unlink(struct man *,
59 				struct man_node *);
60 static	int		 man_ptext(struct man *, int, char *, int);
61 static	int		 man_pmacro(struct man *, int, char *, int);
62 static	void		 man_free1(struct man *);
63 static	void		 man_alloc1(struct man *);
64 static	int		 man_descope(struct man *, int, int);
65 
66 
67 const struct man_node *
68 man_node(const struct man *man)
69 {
70 
71 	return(man->first);
72 }
73 
74 const struct man_meta *
75 man_meta(const struct man *man)
76 {
77 
78 	return(&man->meta);
79 }
80 
81 void
82 man_reset(struct man *man)
83 {
84 
85 	man_free1(man);
86 	man_alloc1(man);
87 }
88 
89 void
90 man_free(struct man *man)
91 {
92 
93 	man_free1(man);
94 	free(man);
95 }
96 
97 struct man *
98 man_alloc(struct roff *roff, struct mparse *parse, int quick)
99 {
100 	struct man	*p;
101 
102 	p = mandoc_calloc(1, sizeof(struct man));
103 
104 	man_hash_init();
105 	p->parse = parse;
106 	p->quick = quick;
107 	p->roff = roff;
108 
109 	man_alloc1(p);
110 	return(p);
111 }
112 
113 int
114 man_endparse(struct man *man)
115 {
116 
117 	return(man_macroend(man));
118 }
119 
120 int
121 man_parseln(struct man *man, int ln, char *buf, int offs)
122 {
123 
124 	man->flags |= MAN_NEWLINE;
125 
126 	return (roff_getcontrol(man->roff, buf, &offs) ?
127 	    man_pmacro(man, ln, buf, offs) :
128 	    man_ptext(man, ln, buf, offs));
129 }
130 
131 static void
132 man_free1(struct man *man)
133 {
134 
135 	if (man->first)
136 		man_node_delete(man, man->first);
137 	if (man->meta.title)
138 		free(man->meta.title);
139 	if (man->meta.source)
140 		free(man->meta.source);
141 	if (man->meta.date)
142 		free(man->meta.date);
143 	if (man->meta.vol)
144 		free(man->meta.vol);
145 	if (man->meta.msec)
146 		free(man->meta.msec);
147 }
148 
149 static void
150 man_alloc1(struct man *man)
151 {
152 
153 	memset(&man->meta, 0, sizeof(struct man_meta));
154 	man->flags = 0;
155 	man->last = mandoc_calloc(1, sizeof(struct man_node));
156 	man->first = man->last;
157 	man->last->type = MAN_ROOT;
158 	man->last->tok = MAN_MAX;
159 	man->next = MAN_NEXT_CHILD;
160 }
161 
162 
163 static int
164 man_node_append(struct man *man, struct man_node *p)
165 {
166 
167 	assert(man->last);
168 	assert(man->first);
169 	assert(MAN_ROOT != p->type);
170 
171 	switch (man->next) {
172 	case MAN_NEXT_SIBLING:
173 		man->last->next = p;
174 		p->prev = man->last;
175 		p->parent = man->last->parent;
176 		break;
177 	case MAN_NEXT_CHILD:
178 		man->last->child = p;
179 		p->parent = man->last;
180 		break;
181 	default:
182 		abort();
183 		/* NOTREACHED */
184 	}
185 
186 	assert(p->parent);
187 	p->parent->nchild++;
188 
189 	switch (p->type) {
190 	case MAN_BLOCK:
191 		if (p->tok == MAN_SH || p->tok == MAN_SS)
192 			man->flags &= ~MAN_LITERAL;
193 		break;
194 	case MAN_HEAD:
195 		assert(MAN_BLOCK == p->parent->type);
196 		p->parent->head = p;
197 		break;
198 	case MAN_TAIL:
199 		assert(MAN_BLOCK == p->parent->type);
200 		p->parent->tail = p;
201 		break;
202 	case MAN_BODY:
203 		assert(MAN_BLOCK == p->parent->type);
204 		p->parent->body = p;
205 		break;
206 	default:
207 		break;
208 	}
209 
210 	man->last = p;
211 
212 	switch (p->type) {
213 	case MAN_TBL:
214 		/* FALLTHROUGH */
215 	case MAN_TEXT:
216 		if ( ! man_valid_post(man))
217 			return(0);
218 		break;
219 	default:
220 		break;
221 	}
222 
223 	return(1);
224 }
225 
226 static struct man_node *
227 man_node_alloc(struct man *man, int line, int pos,
228 		enum man_type type, enum mant tok)
229 {
230 	struct man_node *p;
231 
232 	p = mandoc_calloc(1, sizeof(struct man_node));
233 	p->line = line;
234 	p->pos = pos;
235 	p->type = type;
236 	p->tok = tok;
237 
238 	if (MAN_NEWLINE & man->flags)
239 		p->flags |= MAN_LINE;
240 	man->flags &= ~MAN_NEWLINE;
241 	return(p);
242 }
243 
244 int
245 man_elem_alloc(struct man *man, int line, int pos, enum mant tok)
246 {
247 	struct man_node *p;
248 
249 	p = man_node_alloc(man, line, pos, MAN_ELEM, tok);
250 	if ( ! man_node_append(man, p))
251 		return(0);
252 	man->next = MAN_NEXT_CHILD;
253 	return(1);
254 }
255 
256 int
257 man_tail_alloc(struct man *man, int line, int pos, enum mant tok)
258 {
259 	struct man_node *p;
260 
261 	p = man_node_alloc(man, line, pos, MAN_TAIL, tok);
262 	if ( ! man_node_append(man, p))
263 		return(0);
264 	man->next = MAN_NEXT_CHILD;
265 	return(1);
266 }
267 
268 int
269 man_head_alloc(struct man *man, int line, int pos, enum mant tok)
270 {
271 	struct man_node *p;
272 
273 	p = man_node_alloc(man, line, pos, MAN_HEAD, tok);
274 	if ( ! man_node_append(man, p))
275 		return(0);
276 	man->next = MAN_NEXT_CHILD;
277 	return(1);
278 }
279 
280 int
281 man_body_alloc(struct man *man, int line, int pos, enum mant tok)
282 {
283 	struct man_node *p;
284 
285 	p = man_node_alloc(man, line, pos, MAN_BODY, tok);
286 	if ( ! man_node_append(man, p))
287 		return(0);
288 	man->next = MAN_NEXT_CHILD;
289 	return(1);
290 }
291 
292 int
293 man_block_alloc(struct man *man, int line, int pos, enum mant tok)
294 {
295 	struct man_node *p;
296 
297 	p = man_node_alloc(man, line, pos, MAN_BLOCK, tok);
298 	if ( ! man_node_append(man, p))
299 		return(0);
300 	man->next = MAN_NEXT_CHILD;
301 	return(1);
302 }
303 
304 int
305 man_word_alloc(struct man *man, int line, int pos, const char *word)
306 {
307 	struct man_node	*n;
308 
309 	n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX);
310 	n->string = roff_strdup(man->roff, word);
311 
312 	if ( ! man_node_append(man, n))
313 		return(0);
314 
315 	man->next = MAN_NEXT_SIBLING;
316 	return(1);
317 }
318 
319 /*
320  * Free all of the resources held by a node.  This does NOT unlink a
321  * node from its context; for that, see man_node_unlink().
322  */
323 static void
324 man_node_free(struct man_node *p)
325 {
326 
327 	if (p->string)
328 		free(p->string);
329 	free(p);
330 }
331 
332 void
333 man_node_delete(struct man *man, struct man_node *p)
334 {
335 
336 	while (p->child)
337 		man_node_delete(man, p->child);
338 
339 	man_node_unlink(man, p);
340 	man_node_free(p);
341 }
342 
343 int
344 man_addeqn(struct man *man, const struct eqn *ep)
345 {
346 	struct man_node	*n;
347 
348 	n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX);
349 	n->eqn = ep;
350 
351 	if ( ! man_node_append(man, n))
352 		return(0);
353 
354 	man->next = MAN_NEXT_SIBLING;
355 	return(man_descope(man, ep->ln, ep->pos));
356 }
357 
358 int
359 man_addspan(struct man *man, const struct tbl_span *sp)
360 {
361 	struct man_node	*n;
362 
363 	n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX);
364 	n->span = sp;
365 
366 	if ( ! man_node_append(man, n))
367 		return(0);
368 
369 	man->next = MAN_NEXT_SIBLING;
370 	return(man_descope(man, sp->line, 0));
371 }
372 
373 static int
374 man_descope(struct man *man, int line, int offs)
375 {
376 	/*
377 	 * Co-ordinate what happens with having a next-line scope open:
378 	 * first close out the element scope (if applicable), then close
379 	 * out the block scope (also if applicable).
380 	 */
381 
382 	if (MAN_ELINE & man->flags) {
383 		man->flags &= ~MAN_ELINE;
384 		if ( ! man_unscope(man, man->last->parent))
385 			return(0);
386 	}
387 
388 	if ( ! (MAN_BLINE & man->flags))
389 		return(1);
390 	man->flags &= ~MAN_BLINE;
391 
392 	if ( ! man_unscope(man, man->last->parent))
393 		return(0);
394 	return(man_body_alloc(man, line, offs, man->last->tok));
395 }
396 
397 static int
398 man_ptext(struct man *man, int line, char *buf, int offs)
399 {
400 	int		 i;
401 
402 	/* Literal free-form text whitespace is preserved. */
403 
404 	if (MAN_LITERAL & man->flags) {
405 		if ( ! man_word_alloc(man, line, offs, buf + offs))
406 			return(0);
407 		return(man_descope(man, line, offs));
408 	}
409 
410 	for (i = offs; ' ' == buf[i]; i++)
411 		/* Skip leading whitespace. */ ;
412 
413 	/*
414 	 * Blank lines are ignored right after headings
415 	 * but add a single vertical space elsewhere.
416 	 */
417 
418 	if ('\0' == buf[i]) {
419 		/* Allocate a blank entry. */
420 		if (MAN_SH != man->last->tok &&
421 		    MAN_SS != man->last->tok) {
422 			if ( ! man_elem_alloc(man, line, offs, MAN_sp))
423 				return(0);
424 			man->next = MAN_NEXT_SIBLING;
425 		}
426 		return(1);
427 	}
428 
429 	/*
430 	 * Warn if the last un-escaped character is whitespace. Then
431 	 * strip away the remaining spaces (tabs stay!).
432 	 */
433 
434 	i = (int)strlen(buf);
435 	assert(i);
436 
437 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
438 		if (i > 1 && '\\' != buf[i - 2])
439 			mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
440 			    line, i - 1, NULL);
441 
442 		for (--i; i && ' ' == buf[i]; i--)
443 			/* Spin back to non-space. */ ;
444 
445 		/* Jump ahead of escaped whitespace. */
446 		i += '\\' == buf[i] ? 2 : 1;
447 
448 		buf[i] = '\0';
449 	}
450 
451 	if ( ! man_word_alloc(man, line, offs, buf + offs))
452 		return(0);
453 
454 	/*
455 	 * End-of-sentence check.  If the last character is an unescaped
456 	 * EOS character, then flag the node as being the end of a
457 	 * sentence.  The front-end will know how to interpret this.
458 	 */
459 
460 	assert(i);
461 	if (mandoc_eos(buf, (size_t)i))
462 		man->last->flags |= MAN_EOS;
463 
464 	return(man_descope(man, line, offs));
465 }
466 
467 static int
468 man_pmacro(struct man *man, int ln, char *buf, int offs)
469 {
470 	char		 mac[5];
471 	struct man_node	*n;
472 	enum mant	 tok;
473 	int		 i, ppos;
474 	int		 bline;
475 
476 	if ('"' == buf[offs]) {
477 		mandoc_msg(MANDOCERR_COMMENT_BAD, man->parse,
478 		    ln, offs, NULL);
479 		return(1);
480 	} else if ('\0' == buf[offs])
481 		return(1);
482 
483 	ppos = offs;
484 
485 	/*
486 	 * Copy the first word into a nil-terminated buffer.
487 	 * Stop copying when a tab, space, or eoln is encountered.
488 	 */
489 
490 	i = 0;
491 	while (i < 4 && '\0' != buf[offs] && ' ' != buf[offs] &&
492 	    '\t' != buf[offs])
493 		mac[i++] = buf[offs++];
494 
495 	mac[i] = '\0';
496 
497 	tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX;
498 
499 	if (MAN_MAX == tok) {
500 		mandoc_msg(MANDOCERR_MACRO, man->parse,
501 		    ln, ppos, buf + ppos - 1);
502 		return(1);
503 	}
504 
505 	/* The macro is sane.  Jump to the next word. */
506 
507 	while (buf[offs] && ' ' == buf[offs])
508 		offs++;
509 
510 	/*
511 	 * Trailing whitespace.  Note that tabs are allowed to be passed
512 	 * into the parser as "text", so we only warn about spaces here.
513 	 */
514 
515 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
516 		mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
517 		    ln, offs - 1, NULL);
518 
519 	/*
520 	 * Remove prior ELINE macro, as it's being clobbered by a new
521 	 * macro.  Note that NSCOPED macros do not close out ELINE
522 	 * macros---they don't print text---so we let those slip by.
523 	 */
524 
525 	if ( ! (MAN_NSCOPED & man_macros[tok].flags) &&
526 			man->flags & MAN_ELINE) {
527 		n = man->last;
528 		assert(MAN_TEXT != n->type);
529 
530 		/* Remove repeated NSCOPED macros causing ELINE. */
531 
532 		if (MAN_NSCOPED & man_macros[n->tok].flags)
533 			n = n->parent;
534 
535 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, n->line,
536 		    n->pos, "%s breaks %s", man_macronames[tok],
537 		    man_macronames[n->tok]);
538 
539 		man_node_delete(man, n);
540 		man->flags &= ~MAN_ELINE;
541 	}
542 
543 	/*
544 	 * Remove prior BLINE macro that is being clobbered.
545 	 */
546 	if ((man->flags & MAN_BLINE) &&
547 	    (MAN_BSCOPE & man_macros[tok].flags)) {
548 		n = man->last;
549 
550 		/* Might be a text node like 8 in
551 		 * .TP 8
552 		 * .SH foo
553 		 */
554 		if (MAN_TEXT == n->type)
555 			n = n->parent;
556 
557 		/* Remove element that didn't end BLINE, if any. */
558 		if ( ! (MAN_BSCOPE & man_macros[n->tok].flags))
559 			n = n->parent;
560 
561 		assert(MAN_HEAD == n->type);
562 		n = n->parent;
563 		assert(MAN_BLOCK == n->type);
564 		assert(MAN_SCOPED & man_macros[n->tok].flags);
565 
566 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, n->line,
567 		    n->pos, "%s breaks %s", man_macronames[tok],
568 		    man_macronames[n->tok]);
569 
570 		man_node_delete(man, n);
571 		man->flags &= ~MAN_BLINE;
572 	}
573 
574 	/* Remember whether we are in next-line scope for a block head. */
575 
576 	bline = man->flags & MAN_BLINE;
577 
578 	/* Call to handler... */
579 
580 	assert(man_macros[tok].fp);
581 	if ( ! (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf))
582 		return(0);
583 
584 	/* In quick mode (for mandocdb), abort after the NAME section. */
585 
586 	if (man->quick && MAN_SH == tok) {
587 		n = man->last;
588 		if (MAN_BODY == n->type &&
589 		    strcmp(n->prev->child->string, "NAME"))
590 			return(2);
591 	}
592 
593 	/*
594 	 * If we are in a next-line scope for a block head,
595 	 * close it out now and switch to the body,
596 	 * unless the next-line scope is allowed to continue.
597 	 */
598 
599 	if ( ! bline || man->flags & MAN_ELINE ||
600 	    man_macros[tok].flags & MAN_NSCOPED)
601 		return(1);
602 
603 	assert(MAN_BLINE & man->flags);
604 	man->flags &= ~MAN_BLINE;
605 
606 	if ( ! man_unscope(man, man->last->parent))
607 		return(0);
608 	return(man_body_alloc(man, ln, ppos, man->last->tok));
609 }
610 
611 /*
612  * Unlink a node from its context.  If "man" is provided, the last parse
613  * point will also be adjusted accordingly.
614  */
615 static void
616 man_node_unlink(struct man *man, struct man_node *n)
617 {
618 
619 	/* Adjust siblings. */
620 
621 	if (n->prev)
622 		n->prev->next = n->next;
623 	if (n->next)
624 		n->next->prev = n->prev;
625 
626 	/* Adjust parent. */
627 
628 	if (n->parent) {
629 		n->parent->nchild--;
630 		if (n->parent->child == n)
631 			n->parent->child = n->prev ? n->prev : n->next;
632 	}
633 
634 	/* Adjust parse point, if applicable. */
635 
636 	if (man && man->last == n) {
637 		/*XXX: this can occur when bailing from validation. */
638 		/*assert(NULL == n->next);*/
639 		if (n->prev) {
640 			man->last = n->prev;
641 			man->next = MAN_NEXT_SIBLING;
642 		} else {
643 			man->last = n->parent;
644 			man->next = MAN_NEXT_CHILD;
645 		}
646 	}
647 
648 	if (man && man->first == n)
649 		man->first = NULL;
650 }
651 
652 const struct mparse *
653 man_mparse(const struct man *man)
654 {
655 
656 	assert(man && man->parse);
657 	return(man->parse);
658 }
659 
660 void
661 man_deroff(char **dest, const struct man_node *n)
662 {
663 	char	*cp;
664 	size_t	 sz;
665 
666 	if (MAN_TEXT != n->type) {
667 		for (n = n->child; n; n = n->next)
668 			man_deroff(dest, n);
669 		return;
670 	}
671 
672 	/* Skip leading whitespace and escape sequences. */
673 
674 	cp = n->string;
675 	while ('\0' != *cp) {
676 		if ('\\' == *cp) {
677 			cp++;
678 			mandoc_escape((const char **)&cp, NULL, NULL);
679 		} else if (isspace((unsigned char)*cp))
680 			cp++;
681 		else
682 			break;
683 	}
684 
685 	/* Skip trailing whitespace. */
686 
687 	for (sz = strlen(cp); sz; sz--)
688 		if (0 == isspace((unsigned char)cp[sz-1]))
689 			break;
690 
691 	/* Skip empty strings. */
692 
693 	if (0 == sz)
694 		return;
695 
696 	if (NULL == *dest) {
697 		*dest = mandoc_strndup(cp, sz);
698 		return;
699 	}
700 
701 	mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp);
702 	free(*dest);
703 	*dest = cp;
704 }
705