xref: /dragonfly/contrib/mdocml/man_validate.c (revision bb8c85ff)
1 /*	$Id: man_validate.c,v 1.105 2014/08/06 15:09:05 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <limits.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <time.h>
32 
33 #include "man.h"
34 #include "mandoc.h"
35 #include "mandoc_aux.h"
36 #include "libman.h"
37 #include "libmandoc.h"
38 
39 #define	CHKARGS	  struct man *man, struct man_node *n
40 
41 typedef	int	(*v_check)(CHKARGS);
42 
43 static	int	  check_eq0(CHKARGS);
44 static	int	  check_eq2(CHKARGS);
45 static	int	  check_le1(CHKARGS);
46 static	int	  check_le5(CHKARGS);
47 static	int	  check_par(CHKARGS);
48 static	int	  check_part(CHKARGS);
49 static	int	  check_root(CHKARGS);
50 static	int	  check_text(CHKARGS);
51 
52 static	int	  post_AT(CHKARGS);
53 static	int	  post_IP(CHKARGS);
54 static	int	  post_vs(CHKARGS);
55 static	int	  post_fi(CHKARGS);
56 static	int	  post_ft(CHKARGS);
57 static	int	  post_nf(CHKARGS);
58 static	int	  post_TH(CHKARGS);
59 static	int	  post_UC(CHKARGS);
60 static	int	  post_UR(CHKARGS);
61 
62 static	v_check man_valids[MAN_MAX] = {
63 	post_vs,    /* br */
64 	post_TH,    /* TH */
65 	NULL,       /* SH */
66 	NULL,       /* SS */
67 	NULL,       /* TP */
68 	check_par,  /* LP */
69 	check_par,  /* PP */
70 	check_par,  /* P */
71 	post_IP,    /* IP */
72 	NULL,       /* HP */
73 	NULL,       /* SM */
74 	NULL,       /* SB */
75 	NULL,       /* BI */
76 	NULL,       /* IB */
77 	NULL,       /* BR */
78 	NULL,       /* RB */
79 	NULL,       /* R */
80 	NULL,       /* B */
81 	NULL,       /* I */
82 	NULL,       /* IR */
83 	NULL,       /* RI */
84 	check_eq0,  /* na */
85 	post_vs,    /* sp */
86 	post_nf,    /* nf */
87 	post_fi,    /* fi */
88 	NULL,       /* RE */
89 	check_part, /* RS */
90 	NULL,       /* DT */
91 	post_UC,    /* UC */
92 	check_le1,  /* PD */
93 	post_AT,    /* AT */
94 	NULL,       /* in */
95 	post_ft,    /* ft */
96 	check_eq2,  /* OP */
97 	post_nf,    /* EX */
98 	post_fi,    /* EE */
99 	post_UR,    /* UR */
100 	NULL,       /* UE */
101 	NULL,       /* ll */
102 };
103 
104 
105 int
106 man_valid_post(struct man *man)
107 {
108 	struct man_node	*n;
109 	v_check		*cp;
110 
111 	n = man->last;
112 	if (n->flags & MAN_VALID)
113 		return(1);
114 	n->flags |= MAN_VALID;
115 
116 	switch (n->type) {
117 	case MAN_TEXT:
118 		return(check_text(man, n));
119 	case MAN_ROOT:
120 		return(check_root(man, n));
121 	case MAN_EQN:
122 		/* FALLTHROUGH */
123 	case MAN_TBL:
124 		return(1);
125 	default:
126 		cp = man_valids + n->tok;
127 		return(*cp ? (*cp)(man, n) : 1);
128 	}
129 }
130 
131 static int
132 check_root(CHKARGS)
133 {
134 
135 	assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
136 
137 	if (NULL == man->first->child)
138 		mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse,
139 		    n->line, n->pos, NULL);
140 	else
141 		man->meta.hasbody = 1;
142 
143 	if (NULL == man->meta.title) {
144 		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
145 		    n->line, n->pos, NULL);
146 
147 		/*
148 		 * If a title hasn't been set, do so now (by
149 		 * implication, date and section also aren't set).
150 		 */
151 
152 		man->meta.title = mandoc_strdup("");
153 		man->meta.msec = mandoc_strdup("");
154 		man->meta.date = man->quick ? mandoc_strdup("") :
155 		    mandoc_normdate(man->parse, NULL, n->line, n->pos);
156 	}
157 
158 	return(1);
159 }
160 
161 static int
162 check_text(CHKARGS)
163 {
164 	char		*cp, *p;
165 
166 	if (MAN_LITERAL & man->flags)
167 		return(1);
168 
169 	cp = n->string;
170 	for (p = cp; NULL != (p = strchr(p, '\t')); p++)
171 		mandoc_msg(MANDOCERR_FI_TAB, man->parse,
172 		    n->line, n->pos + (p - cp), NULL);
173 	return(1);
174 }
175 
176 #define	INEQ_DEFINE(x, ineq, name) \
177 static int \
178 check_##name(CHKARGS) \
179 { \
180 	if (n->nchild ineq (x)) \
181 		return(1); \
182 	mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, n->pos, \
183 	    "line arguments %s %d (have %d)", \
184 	    #ineq, (x), n->nchild); \
185 	return(1); \
186 }
187 
188 INEQ_DEFINE(0, ==, eq0)
189 INEQ_DEFINE(2, ==, eq2)
190 INEQ_DEFINE(1, <=, le1)
191 INEQ_DEFINE(5, <=, le5)
192 
193 static int
194 post_UR(CHKARGS)
195 {
196 
197 	if (MAN_HEAD == n->type && 1 != n->nchild)
198 		mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line,
199 		    n->pos, "line arguments eq 1 (have %d)", n->nchild);
200 
201 	return(check_part(man, n));
202 }
203 
204 static int
205 post_ft(CHKARGS)
206 {
207 	char	*cp;
208 	int	 ok;
209 
210 	if (0 == n->nchild)
211 		return(1);
212 
213 	ok = 0;
214 	cp = n->child->string;
215 	switch (*cp) {
216 	case '1':
217 		/* FALLTHROUGH */
218 	case '2':
219 		/* FALLTHROUGH */
220 	case '3':
221 		/* FALLTHROUGH */
222 	case '4':
223 		/* FALLTHROUGH */
224 	case 'I':
225 		/* FALLTHROUGH */
226 	case 'P':
227 		/* FALLTHROUGH */
228 	case 'R':
229 		if ('\0' == cp[1])
230 			ok = 1;
231 		break;
232 	case 'B':
233 		if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
234 			ok = 1;
235 		break;
236 	case 'C':
237 		if ('W' == cp[1] && '\0' == cp[2])
238 			ok = 1;
239 		break;
240 	default:
241 		break;
242 	}
243 
244 	if (0 == ok) {
245 		mandoc_vmsg(MANDOCERR_FT_BAD, man->parse,
246 		    n->line, n->pos, "ft %s", cp);
247 		*cp = '\0';
248 	}
249 
250 	if (1 < n->nchild)
251 		mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line,
252 		    n->pos, "want one child (have %d)", n->nchild);
253 
254 	return(1);
255 }
256 
257 static int
258 check_part(CHKARGS)
259 {
260 
261 	if (MAN_BODY == n->type && 0 == n->nchild)
262 		mandoc_msg(MANDOCERR_ARGCWARN, man->parse, n->line,
263 		    n->pos, "want children (have none)");
264 
265 	return(1);
266 }
267 
268 static int
269 check_par(CHKARGS)
270 {
271 
272 	switch (n->type) {
273 	case MAN_BLOCK:
274 		if (0 == n->body->nchild)
275 			man_node_delete(man, n);
276 		break;
277 	case MAN_BODY:
278 		if (0 == n->nchild)
279 			mandoc_vmsg(MANDOCERR_PAR_SKIP,
280 			    man->parse, n->line, n->pos,
281 			    "%s empty", man_macronames[n->tok]);
282 		break;
283 	case MAN_HEAD:
284 		if (n->nchild)
285 			mandoc_vmsg(MANDOCERR_ARG_SKIP,
286 			    man->parse, n->line, n->pos,
287 			    "%s %s%s", man_macronames[n->tok],
288 			    n->child->string,
289 			    n->nchild > 1 ? " ..." : "");
290 		break;
291 	default:
292 		break;
293 	}
294 
295 	return(1);
296 }
297 
298 static int
299 post_IP(CHKARGS)
300 {
301 
302 	switch (n->type) {
303 	case MAN_BLOCK:
304 		if (0 == n->head->nchild && 0 == n->body->nchild)
305 			man_node_delete(man, n);
306 		break;
307 	case MAN_BODY:
308 		if (0 == n->parent->head->nchild && 0 == n->nchild)
309 			mandoc_vmsg(MANDOCERR_PAR_SKIP,
310 			    man->parse, n->line, n->pos,
311 			    "%s empty", man_macronames[n->tok]);
312 		break;
313 	default:
314 		break;
315 	}
316 	return(1);
317 }
318 
319 static int
320 post_TH(CHKARGS)
321 {
322 	struct man_node	*nb;
323 	const char	*p;
324 
325 	check_le5(man, n);
326 
327 	free(man->meta.title);
328 	free(man->meta.vol);
329 	free(man->meta.source);
330 	free(man->meta.msec);
331 	free(man->meta.date);
332 
333 	man->meta.title = man->meta.vol = man->meta.date =
334 	    man->meta.msec = man->meta.source = NULL;
335 
336 	nb = n;
337 
338 	/* ->TITLE<- MSEC DATE SOURCE VOL */
339 
340 	n = n->child;
341 	if (n && n->string) {
342 		for (p = n->string; '\0' != *p; p++) {
343 			/* Only warn about this once... */
344 			if (isalpha((unsigned char)*p) &&
345 			    ! isupper((unsigned char)*p)) {
346 				mandoc_vmsg(MANDOCERR_TITLE_CASE,
347 				    man->parse, n->line,
348 				    n->pos + (p - n->string),
349 				    "TH %s", n->string);
350 				break;
351 			}
352 		}
353 		man->meta.title = mandoc_strdup(n->string);
354 	} else {
355 		man->meta.title = mandoc_strdup("");
356 		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
357 		    nb->line, nb->pos, "TH");
358 	}
359 
360 	/* TITLE ->MSEC<- DATE SOURCE VOL */
361 
362 	if (n)
363 		n = n->next;
364 	if (n && n->string)
365 		man->meta.msec = mandoc_strdup(n->string);
366 	else {
367 		man->meta.msec = mandoc_strdup("");
368 		mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse,
369 		    nb->line, nb->pos, "TH %s", man->meta.title);
370 	}
371 
372 	/* TITLE MSEC ->DATE<- SOURCE VOL */
373 
374 	if (n)
375 		n = n->next;
376 	if (n && n->string && '\0' != n->string[0]) {
377 		man->meta.date = man->quick ?
378 		    mandoc_strdup(n->string) :
379 		    mandoc_normdate(man->parse, n->string,
380 			n->line, n->pos);
381 	} else {
382 		man->meta.date = mandoc_strdup("");
383 		mandoc_msg(MANDOCERR_DATE_MISSING, man->parse,
384 		    n ? n->line : nb->line,
385 		    n ? n->pos : nb->pos, "TH");
386 	}
387 
388 	/* TITLE MSEC DATE ->SOURCE<- VOL */
389 
390 	if (n && (n = n->next))
391 		man->meta.source = mandoc_strdup(n->string);
392 
393 	/* TITLE MSEC DATE SOURCE ->VOL<- */
394 	/* If missing, use the default VOL name for MSEC. */
395 
396 	if (n && (n = n->next))
397 		man->meta.vol = mandoc_strdup(n->string);
398 	else if ('\0' != man->meta.msec[0] &&
399 	    (NULL != (p = mandoc_a2msec(man->meta.msec))))
400 		man->meta.vol = mandoc_strdup(p);
401 
402 	/*
403 	 * Remove the `TH' node after we've processed it for our
404 	 * meta-data.
405 	 */
406 	man_node_delete(man, man->last);
407 	return(1);
408 }
409 
410 static int
411 post_nf(CHKARGS)
412 {
413 
414 	check_eq0(man, n);
415 
416 	if (MAN_LITERAL & man->flags)
417 		mandoc_msg(MANDOCERR_NF_SKIP, man->parse,
418 		    n->line, n->pos, "nf");
419 
420 	man->flags |= MAN_LITERAL;
421 	return(1);
422 }
423 
424 static int
425 post_fi(CHKARGS)
426 {
427 
428 	check_eq0(man, n);
429 
430 	if ( ! (MAN_LITERAL & man->flags))
431 		mandoc_msg(MANDOCERR_FI_SKIP, man->parse,
432 		    n->line, n->pos, "fi");
433 
434 	man->flags &= ~MAN_LITERAL;
435 	return(1);
436 }
437 
438 static int
439 post_UC(CHKARGS)
440 {
441 	static const char * const bsd_versions[] = {
442 	    "3rd Berkeley Distribution",
443 	    "4th Berkeley Distribution",
444 	    "4.2 Berkeley Distribution",
445 	    "4.3 Berkeley Distribution",
446 	    "4.4 Berkeley Distribution",
447 	};
448 
449 	const char	*p, *s;
450 
451 	n = n->child;
452 
453 	if (NULL == n || MAN_TEXT != n->type)
454 		p = bsd_versions[0];
455 	else {
456 		s = n->string;
457 		if (0 == strcmp(s, "3"))
458 			p = bsd_versions[0];
459 		else if (0 == strcmp(s, "4"))
460 			p = bsd_versions[1];
461 		else if (0 == strcmp(s, "5"))
462 			p = bsd_versions[2];
463 		else if (0 == strcmp(s, "6"))
464 			p = bsd_versions[3];
465 		else if (0 == strcmp(s, "7"))
466 			p = bsd_versions[4];
467 		else
468 			p = bsd_versions[0];
469 	}
470 
471 	free(man->meta.source);
472 	man->meta.source = mandoc_strdup(p);
473 	return(1);
474 }
475 
476 static int
477 post_AT(CHKARGS)
478 {
479 	static const char * const unix_versions[] = {
480 	    "7th Edition",
481 	    "System III",
482 	    "System V",
483 	    "System V Release 2",
484 	};
485 
486 	const char	*p, *s;
487 	struct man_node	*nn;
488 
489 	n = n->child;
490 
491 	if (NULL == n || MAN_TEXT != n->type)
492 		p = unix_versions[0];
493 	else {
494 		s = n->string;
495 		if (0 == strcmp(s, "3"))
496 			p = unix_versions[0];
497 		else if (0 == strcmp(s, "4"))
498 			p = unix_versions[1];
499 		else if (0 == strcmp(s, "5")) {
500 			nn = n->next;
501 			if (nn && MAN_TEXT == nn->type && nn->string[0])
502 				p = unix_versions[3];
503 			else
504 				p = unix_versions[2];
505 		} else
506 			p = unix_versions[0];
507 	}
508 
509 	free(man->meta.source);
510 	man->meta.source = mandoc_strdup(p);
511 	return(1);
512 }
513 
514 static int
515 post_vs(CHKARGS)
516 {
517 
518 	if (n->tok == MAN_br)
519 		check_eq0(man, n);
520 	else
521 		check_le1(man, n);
522 
523 	if (NULL != n->prev)
524 		return(1);
525 
526 	switch (n->parent->tok) {
527 	case MAN_SH:
528 		/* FALLTHROUGH */
529 	case MAN_SS:
530 		mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos,
531 		    "%s after %s", man_macronames[n->tok],
532 		    man_macronames[n->parent->tok]);
533 		/* FALLTHROUGH */
534 	case MAN_MAX:
535 		/*
536 		 * Don't warn about this because it occurs in pod2man
537 		 * and would cause considerable (unfixable) warnage.
538 		 */
539 		man_node_delete(man, n);
540 		break;
541 	default:
542 		break;
543 	}
544 
545 	return(1);
546 }
547