xref: /openbsd/usr.bin/mandoc/mdoc.c (revision 6f40fd34)
1 /*	$OpenBSD: mdoc.c,v 1.156 2017/06/17 13:05:47 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <stdarg.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <time.h>
27 
28 #include "mandoc_aux.h"
29 #include "mandoc.h"
30 #include "roff.h"
31 #include "mdoc.h"
32 #include "libmandoc.h"
33 #include "roff_int.h"
34 #include "libmdoc.h"
35 
36 const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
37 	"split",		"nosplit",		"ragged",
38 	"unfilled",		"literal",		"file",
39 	"offset",		"bullet",		"dash",
40 	"hyphen",		"item",			"enum",
41 	"tag",			"diag",			"hang",
42 	"ohang",		"inset",		"column",
43 	"width",		"compact",		"std",
44 	"filled",		"words",		"emphasis",
45 	"symbolic",		"nested",		"centered"
46 };
47 const	char * const *mdoc_argnames = __mdoc_argnames;
48 
49 static	int		  mdoc_ptext(struct roff_man *, int, char *, int);
50 static	int		  mdoc_pmacro(struct roff_man *, int, char *, int);
51 
52 
53 /*
54  * Main parse routine.  Parses a single line -- really just hands off to
55  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
56  */
57 int
58 mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
59 {
60 
61 	if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
62 		mdoc->flags |= MDOC_NEWLINE;
63 
64 	/*
65 	 * Let the roff nS register switch SYNOPSIS mode early,
66 	 * such that the parser knows at all times
67 	 * whether this mode is on or off.
68 	 * Note that this mode is also switched by the Sh macro.
69 	 */
70 	if (roff_getreg(mdoc->roff, "nS"))
71 		mdoc->flags |= MDOC_SYNOPSIS;
72 	else
73 		mdoc->flags &= ~MDOC_SYNOPSIS;
74 
75 	return roff_getcontrol(mdoc->roff, buf, &offs) ?
76 	    mdoc_pmacro(mdoc, ln, buf, offs) :
77 	    mdoc_ptext(mdoc, ln, buf, offs);
78 }
79 
80 void
81 mdoc_macro(MACRO_PROT_ARGS)
82 {
83 	assert(tok >= MDOC_Dd && tok < MDOC_MAX);
84 	(*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf);
85 }
86 
87 void
88 mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok)
89 {
90 	struct roff_node *p;
91 
92 	p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
93 	roff_node_append(mdoc, p);
94 	mdoc->next = ROFF_NEXT_CHILD;
95 }
96 
97 struct roff_node *
98 mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos,
99     enum roff_tok tok, struct roff_node *body)
100 {
101 	struct roff_node *p;
102 
103 	body->flags |= NODE_ENDED;
104 	body->parent->flags |= NODE_ENDED;
105 	p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
106 	p->body = body;
107 	p->norm = body->norm;
108 	p->end = ENDBODY_SPACE;
109 	roff_node_append(mdoc, p);
110 	mdoc->next = ROFF_NEXT_SIBLING;
111 	return p;
112 }
113 
114 struct roff_node *
115 mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
116     enum roff_tok tok, struct mdoc_arg *args)
117 {
118 	struct roff_node *p;
119 
120 	p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
121 	p->args = args;
122 	if (p->args)
123 		(args->refcnt)++;
124 
125 	switch (tok) {
126 	case MDOC_Bd:
127 	case MDOC_Bf:
128 	case MDOC_Bl:
129 	case MDOC_En:
130 	case MDOC_Rs:
131 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
132 		break;
133 	default:
134 		break;
135 	}
136 	roff_node_append(mdoc, p);
137 	mdoc->next = ROFF_NEXT_CHILD;
138 	return p;
139 }
140 
141 void
142 mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
143      enum roff_tok tok, struct mdoc_arg *args)
144 {
145 	struct roff_node *p;
146 
147 	p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
148 	p->args = args;
149 	if (p->args)
150 		(args->refcnt)++;
151 
152 	switch (tok) {
153 	case MDOC_An:
154 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
155 		break;
156 	default:
157 		break;
158 	}
159 	roff_node_append(mdoc, p);
160 	mdoc->next = ROFF_NEXT_CHILD;
161 }
162 
163 void
164 mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p)
165 {
166 
167 	roff_node_unlink(mdoc, p);
168 	p->prev = p->next = NULL;
169 	roff_node_append(mdoc, p);
170 }
171 
172 /*
173  * Parse free-form text, that is, a line that does not begin with the
174  * control character.
175  */
176 static int
177 mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
178 {
179 	struct roff_node *n;
180 	const char	 *cp, *sp;
181 	char		 *c, *ws, *end;
182 
183 	n = mdoc->last;
184 
185 	/*
186 	 * If a column list contains plain text, assume an implicit item
187 	 * macro.  This can happen one or more times at the beginning
188 	 * of such a list, intermixed with non-It mdoc macros and with
189 	 * nodes generated on the roff level, for example by tbl.
190 	 */
191 
192 	if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
193 	     n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
194 	    (n->parent != NULL && n->parent->tok == MDOC_Bl &&
195 	     n->parent->norm->Bl.type == LIST_column)) {
196 		mdoc->flags |= MDOC_FREECOL;
197 		mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf);
198 		return 1;
199 	}
200 
201 	/*
202 	 * Search for the beginning of unescaped trailing whitespace (ws)
203 	 * and for the first character not to be output (end).
204 	 */
205 
206 	/* FIXME: replace with strcspn(). */
207 	ws = NULL;
208 	for (c = end = buf + offs; *c; c++) {
209 		switch (*c) {
210 		case ' ':
211 			if (NULL == ws)
212 				ws = c;
213 			continue;
214 		case '\t':
215 			/*
216 			 * Always warn about trailing tabs,
217 			 * even outside literal context,
218 			 * where they should be put on the next line.
219 			 */
220 			if (NULL == ws)
221 				ws = c;
222 			/*
223 			 * Strip trailing tabs in literal context only;
224 			 * outside, they affect the next line.
225 			 */
226 			if (MDOC_LITERAL & mdoc->flags)
227 				continue;
228 			break;
229 		case '\\':
230 			/* Skip the escaped character, too, if any. */
231 			if (c[1])
232 				c++;
233 			/* FALLTHROUGH */
234 		default:
235 			ws = NULL;
236 			break;
237 		}
238 		end = c + 1;
239 	}
240 	*end = '\0';
241 
242 	if (ws)
243 		mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
244 		    line, (int)(ws-buf), NULL);
245 
246 	/*
247 	 * Blank lines are allowed in no-fill mode
248 	 * and cancel preceding \c,
249 	 * but add a single vertical space elsewhere.
250 	 */
251 
252 	if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) {
253 		switch (mdoc->last->type) {
254 		case ROFFT_TEXT:
255 			sp = mdoc->last->string;
256 			cp = end = strchr(sp, '\0') - 2;
257 			if (cp < sp || cp[0] != '\\' || cp[1] != 'c')
258 				break;
259 			while (cp > sp && cp[-1] == '\\')
260 				cp--;
261 			if ((end - cp) % 2)
262 				break;
263 			*end = '\0';
264 			return 1;
265 		default:
266 			break;
267 		}
268 		mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse,
269 		    line, (int)(c - buf), NULL);
270 		roff_elem_alloc(mdoc, line, offs, ROFF_sp);
271 		mdoc->last->flags |= NODE_VALID | NODE_ENDED;
272 		mdoc->next = ROFF_NEXT_SIBLING;
273 		return 1;
274 	}
275 
276 	roff_word_alloc(mdoc, line, offs, buf+offs);
277 
278 	if (mdoc->flags & MDOC_LITERAL)
279 		return 1;
280 
281 	/*
282 	 * End-of-sentence check.  If the last character is an unescaped
283 	 * EOS character, then flag the node as being the end of a
284 	 * sentence.  The front-end will know how to interpret this.
285 	 */
286 
287 	assert(buf < end);
288 
289 	if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
290 		mdoc->last->flags |= NODE_EOS;
291 
292 	for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) {
293 		if (c - buf < offs + 2)
294 			continue;
295 		if (end - c < 3)
296 			break;
297 		if (c[1] != ' ' ||
298 		    isalpha((unsigned char)c[-2]) == 0 ||
299 		    isalpha((unsigned char)c[-1]) == 0 ||
300 		    (c[-2] == 'n' && c[-1] == 'c') ||
301 		    (c[-2] == 'v' && c[-1] == 's'))
302 			continue;
303 		c += 2;
304 		if (*c == ' ')
305 			c++;
306 		if (*c == ' ')
307 			c++;
308 		if (isupper((unsigned char)(*c)))
309 			mandoc_msg(MANDOCERR_EOS, mdoc->parse,
310 			    line, (int)(c - buf), NULL);
311 	}
312 
313 	return 1;
314 }
315 
316 /*
317  * Parse a macro line, that is, a line beginning with the control
318  * character.
319  */
320 static int
321 mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
322 {
323 	struct roff_node *n;
324 	const char	 *cp;
325 	size_t		  sz;
326 	enum roff_tok	  tok;
327 	int		  sv;
328 
329 	/* Determine the line macro. */
330 
331 	sv = offs;
332 	tok = TOKEN_NONE;
333 	for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
334 		offs++;
335 	if (sz == 2 || sz == 3)
336 		tok = roffhash_find(mdoc->mdocmac, buf + sv, sz);
337 	if (tok == TOKEN_NONE) {
338 		mandoc_msg(MANDOCERR_MACRO, mdoc->parse,
339 		    ln, sv, buf + sv - 1);
340 		return 1;
341 	}
342 
343 	/* Skip a leading escape sequence or tab. */
344 
345 	switch (buf[offs]) {
346 	case '\\':
347 		cp = buf + offs + 1;
348 		mandoc_escape(&cp, NULL, NULL);
349 		offs = cp - buf;
350 		break;
351 	case '\t':
352 		offs++;
353 		break;
354 	default:
355 		break;
356 	}
357 
358 	/* Jump to the next non-whitespace word. */
359 
360 	while (buf[offs] == ' ')
361 		offs++;
362 
363 	/*
364 	 * Trailing whitespace.  Note that tabs are allowed to be passed
365 	 * into the parser as "text", so we only warn about spaces here.
366 	 */
367 
368 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
369 		mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse,
370 		    ln, offs - 1, NULL);
371 
372 	/*
373 	 * If an initial macro or a list invocation, divert directly
374 	 * into macro processing.
375 	 */
376 
377 	n = mdoc->last;
378 	if (n == NULL || tok == MDOC_It || tok == MDOC_El) {
379 		mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
380 		return 1;
381 	}
382 
383 	/*
384 	 * If a column list contains a non-It macro, assume an implicit
385 	 * item macro.  This can happen one or more times at the
386 	 * beginning of such a list, intermixed with text lines and
387 	 * with nodes generated on the roff level, for example by tbl.
388 	 */
389 
390 	if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
391 	     n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
392 	    (n->parent != NULL && n->parent->tok == MDOC_Bl &&
393 	     n->parent->norm->Bl.type == LIST_column)) {
394 		mdoc->flags |= MDOC_FREECOL;
395 		mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf);
396 		return 1;
397 	}
398 
399 	/* Normal processing of a macro. */
400 
401 	mdoc_macro(mdoc, tok, ln, sv, &offs, buf);
402 
403 	/* In quick mode (for mandocdb), abort after the NAME section. */
404 
405 	if (mdoc->quick && MDOC_Sh == tok &&
406 	    SEC_NAME != mdoc->last->sec)
407 		return 2;
408 
409 	return 1;
410 }
411 
412 enum mdelim
413 mdoc_isdelim(const char *p)
414 {
415 
416 	if ('\0' == p[0])
417 		return DELIM_NONE;
418 
419 	if ('\0' == p[1])
420 		switch (p[0]) {
421 		case '(':
422 		case '[':
423 			return DELIM_OPEN;
424 		case '|':
425 			return DELIM_MIDDLE;
426 		case '.':
427 		case ',':
428 		case ';':
429 		case ':':
430 		case '?':
431 		case '!':
432 		case ')':
433 		case ']':
434 			return DELIM_CLOSE;
435 		default:
436 			return DELIM_NONE;
437 		}
438 
439 	if ('\\' != p[0])
440 		return DELIM_NONE;
441 
442 	if (0 == strcmp(p + 1, "."))
443 		return DELIM_CLOSE;
444 	if (0 == strcmp(p + 1, "fR|\\fP"))
445 		return DELIM_MIDDLE;
446 
447 	return DELIM_NONE;
448 }
449 
450 void
451 mdoc_validate(struct roff_man *mdoc)
452 {
453 
454 	mdoc->last = mdoc->first;
455 	mdoc_node_validate(mdoc);
456 	mdoc_state_reset(mdoc);
457 }
458