xref: /openbsd/usr.bin/mandoc/man.c (revision 8529ddd3)
1 /*	$OpenBSD: man.c,v 1.111 2015/04/23 16:17:04 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include <sys/types.h>
20 
21 #include <assert.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 
28 #include "mandoc_aux.h"
29 #include "mandoc.h"
30 #include "roff.h"
31 #include "man.h"
32 #include "libmandoc.h"
33 #include "roff_int.h"
34 #include "libman.h"
35 
36 const	char *const __man_macronames[MAN_MAX] = {
37 	"br",		"TH",		"SH",		"SS",
38 	"TP",		"LP",		"PP",		"P",
39 	"IP",		"HP",		"SM",		"SB",
40 	"BI",		"IB",		"BR",		"RB",
41 	"R",		"B",		"I",		"IR",
42 	"RI",		"sp",		"nf",
43 	"fi",		"RE",		"RS",		"DT",
44 	"UC",		"PD",		"AT",		"in",
45 	"ft",		"OP",		"EX",		"EE",
46 	"UR",		"UE",		"ll"
47 	};
48 
49 const	char * const *man_macronames = __man_macronames;
50 
51 static	void		 man_descope(struct roff_man *, int, int);
52 static	int		 man_ptext(struct roff_man *, int, char *, int);
53 static	int		 man_pmacro(struct roff_man *, int, char *, int);
54 
55 
56 int
57 man_parseln(struct roff_man *man, int ln, char *buf, int offs)
58 {
59 
60 	if (man->last->type != ROFFT_EQN || ln > man->last->line)
61 		man->flags |= MAN_NEWLINE;
62 
63 	return (roff_getcontrol(man->roff, buf, &offs) ?
64 	    man_pmacro(man, ln, buf, offs) :
65 	    man_ptext(man, ln, buf, offs));
66 }
67 
68 static void
69 man_descope(struct roff_man *man, int line, int offs)
70 {
71 	/*
72 	 * Co-ordinate what happens with having a next-line scope open:
73 	 * first close out the element scope (if applicable), then close
74 	 * out the block scope (also if applicable).
75 	 */
76 
77 	if (man->flags & MAN_ELINE) {
78 		man->flags &= ~MAN_ELINE;
79 		man_unscope(man, man->last->parent);
80 	}
81 	if ( ! (man->flags & MAN_BLINE))
82 		return;
83 	man->flags &= ~MAN_BLINE;
84 	man_unscope(man, man->last->parent);
85 	roff_body_alloc(man, line, offs, man->last->tok);
86 }
87 
88 static int
89 man_ptext(struct roff_man *man, int line, char *buf, int offs)
90 {
91 	int		 i;
92 
93 	/* Literal free-form text whitespace is preserved. */
94 
95 	if (man->flags & MAN_LITERAL) {
96 		roff_word_alloc(man, line, offs, buf + offs);
97 		man_descope(man, line, offs);
98 		return(1);
99 	}
100 
101 	for (i = offs; buf[i] == ' '; i++)
102 		/* Skip leading whitespace. */ ;
103 
104 	/*
105 	 * Blank lines are ignored right after headings
106 	 * but add a single vertical space elsewhere.
107 	 */
108 
109 	if (buf[i] == '\0') {
110 		/* Allocate a blank entry. */
111 		if (man->last->tok != MAN_SH &&
112 		    man->last->tok != MAN_SS) {
113 			roff_elem_alloc(man, line, offs, MAN_sp);
114 			man->next = ROFF_NEXT_SIBLING;
115 		}
116 		return(1);
117 	}
118 
119 	/*
120 	 * Warn if the last un-escaped character is whitespace. Then
121 	 * strip away the remaining spaces (tabs stay!).
122 	 */
123 
124 	i = (int)strlen(buf);
125 	assert(i);
126 
127 	if (' ' == buf[i - 1] || '\t' == buf[i - 1]) {
128 		if (i > 1 && '\\' != buf[i - 2])
129 			mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
130 			    line, i - 1, NULL);
131 
132 		for (--i; i && ' ' == buf[i]; i--)
133 			/* Spin back to non-space. */ ;
134 
135 		/* Jump ahead of escaped whitespace. */
136 		i += '\\' == buf[i] ? 2 : 1;
137 
138 		buf[i] = '\0';
139 	}
140 	roff_word_alloc(man, line, offs, buf + offs);
141 
142 	/*
143 	 * End-of-sentence check.  If the last character is an unescaped
144 	 * EOS character, then flag the node as being the end of a
145 	 * sentence.  The front-end will know how to interpret this.
146 	 */
147 
148 	assert(i);
149 	if (mandoc_eos(buf, (size_t)i))
150 		man->last->flags |= MAN_EOS;
151 
152 	man_descope(man, line, offs);
153 	return(1);
154 }
155 
156 static int
157 man_pmacro(struct roff_man *man, int ln, char *buf, int offs)
158 {
159 	struct roff_node *n;
160 	const char	*cp;
161 	int		 tok;
162 	int		 i, ppos;
163 	int		 bline;
164 	char		 mac[5];
165 
166 	ppos = offs;
167 
168 	/*
169 	 * Copy the first word into a nil-terminated buffer.
170 	 * Stop when a space, tab, escape, or eoln is encountered.
171 	 */
172 
173 	i = 0;
174 	while (i < 4 && strchr(" \t\\", buf[offs]) == NULL)
175 		mac[i++] = buf[offs++];
176 
177 	mac[i] = '\0';
178 
179 	tok = (i > 0 && i < 4) ? man_hash_find(mac) : TOKEN_NONE;
180 
181 	if (tok == TOKEN_NONE) {
182 		mandoc_msg(MANDOCERR_MACRO, man->parse,
183 		    ln, ppos, buf + ppos - 1);
184 		return(1);
185 	}
186 
187 	/* Skip a leading escape sequence or tab. */
188 
189 	switch (buf[offs]) {
190 	case '\\':
191 		cp = buf + offs + 1;
192 		mandoc_escape(&cp, NULL, NULL);
193 		offs = cp - buf;
194 		break;
195 	case '\t':
196 		offs++;
197 		break;
198 	default:
199 		break;
200 	}
201 
202 	/* Jump to the next non-whitespace word. */
203 
204 	while (buf[offs] && buf[offs] == ' ')
205 		offs++;
206 
207 	/*
208 	 * Trailing whitespace.  Note that tabs are allowed to be passed
209 	 * into the parser as "text", so we only warn about spaces here.
210 	 */
211 
212 	if (buf[offs] == '\0' && buf[offs - 1] == ' ')
213 		mandoc_msg(MANDOCERR_SPACE_EOL, man->parse,
214 		    ln, offs - 1, NULL);
215 
216 	/*
217 	 * Some macros break next-line scopes; otherwise, remember
218 	 * whether we are in next-line scope for a block head.
219 	 */
220 
221 	man_breakscope(man, tok);
222 	bline = man->flags & MAN_BLINE;
223 
224 	/* Call to handler... */
225 
226 	assert(man_macros[tok].fp);
227 	(*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf);
228 
229 	/* In quick mode (for mandocdb), abort after the NAME section. */
230 
231 	if (man->quick && tok == MAN_SH) {
232 		n = man->last;
233 		if (n->type == ROFFT_BODY &&
234 		    strcmp(n->prev->child->string, "NAME"))
235 			return(2);
236 	}
237 
238 	/*
239 	 * If we are in a next-line scope for a block head,
240 	 * close it out now and switch to the body,
241 	 * unless the next-line scope is allowed to continue.
242 	 */
243 
244 	if ( ! bline || man->flags & MAN_ELINE ||
245 	    man_macros[tok].flags & MAN_NSCOPED)
246 		return(1);
247 
248 	assert(man->flags & MAN_BLINE);
249 	man->flags &= ~MAN_BLINE;
250 
251 	man_unscope(man, man->last->parent);
252 	roff_body_alloc(man, ln, ppos, man->last->tok);
253 	return(1);
254 }
255 
256 void
257 man_breakscope(struct roff_man *man, int tok)
258 {
259 	struct roff_node *n;
260 
261 	/*
262 	 * An element next line scope is open,
263 	 * and the new macro is not allowed inside elements.
264 	 * Delete the element that is being broken.
265 	 */
266 
267 	if (man->flags & MAN_ELINE && (tok == TOKEN_NONE ||
268 	    ! (man_macros[tok].flags & MAN_NSCOPED))) {
269 		n = man->last;
270 		assert(n->type != ROFFT_TEXT);
271 		if (man_macros[n->tok].flags & MAN_NSCOPED)
272 			n = n->parent;
273 
274 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
275 		    n->line, n->pos, "%s breaks %s",
276 		    tok == TOKEN_NONE ? "TS" : man_macronames[tok],
277 		    man_macronames[n->tok]);
278 
279 		roff_node_delete(man, n);
280 		man->flags &= ~MAN_ELINE;
281 	}
282 
283 	/*
284 	 * A block header next line scope is open,
285 	 * and the new macro is not allowed inside block headers.
286 	 * Delete the block that is being broken.
287 	 */
288 
289 	if (man->flags & MAN_BLINE && (tok == TOKEN_NONE ||
290 	    man_macros[tok].flags & MAN_BSCOPE)) {
291 		n = man->last;
292 		if (n->type == ROFFT_TEXT)
293 			n = n->parent;
294 		if ( ! (man_macros[n->tok].flags & MAN_BSCOPE))
295 			n = n->parent;
296 
297 		assert(n->type == ROFFT_HEAD);
298 		n = n->parent;
299 		assert(n->type == ROFFT_BLOCK);
300 		assert(man_macros[n->tok].flags & MAN_SCOPED);
301 
302 		mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse,
303 		    n->line, n->pos, "%s breaks %s",
304 		    tok == TOKEN_NONE ? "TS" : man_macronames[tok],
305 		    man_macronames[n->tok]);
306 
307 		roff_node_delete(man, n);
308 		man->flags &= ~MAN_BLINE;
309 	}
310 }
311 
312 const struct mparse *
313 man_mparse(const struct roff_man *man)
314 {
315 
316 	assert(man && man->parse);
317 	return(man->parse);
318 }
319