1 /* $Id: man_validate.c,v 1.105 2014/08/06 15:09:05 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <errno.h> 27 #include <limits.h> 28 #include <stdarg.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include <time.h> 32 33 #include "man.h" 34 #include "mandoc.h" 35 #include "mandoc_aux.h" 36 #include "libman.h" 37 #include "libmandoc.h" 38 39 #define CHKARGS struct man *man, struct man_node *n 40 41 typedef int (*v_check)(CHKARGS); 42 43 static int check_eq0(CHKARGS); 44 static int check_eq2(CHKARGS); 45 static int check_le1(CHKARGS); 46 static int check_le5(CHKARGS); 47 static int check_par(CHKARGS); 48 static int check_part(CHKARGS); 49 static int check_root(CHKARGS); 50 static int check_text(CHKARGS); 51 52 static int post_AT(CHKARGS); 53 static int post_IP(CHKARGS); 54 static int post_vs(CHKARGS); 55 static int post_fi(CHKARGS); 56 static int post_ft(CHKARGS); 57 static int post_nf(CHKARGS); 58 static int post_TH(CHKARGS); 59 static int post_UC(CHKARGS); 60 static int post_UR(CHKARGS); 61 62 static v_check man_valids[MAN_MAX] = { 63 post_vs, /* br */ 64 post_TH, /* TH */ 65 NULL, /* SH */ 66 NULL, /* SS */ 67 NULL, /* TP */ 68 check_par, /* LP */ 69 check_par, /* PP */ 70 check_par, /* P */ 71 post_IP, /* IP */ 72 NULL, /* HP */ 73 NULL, /* SM */ 74 NULL, /* SB */ 75 NULL, /* BI */ 76 NULL, /* IB */ 77 NULL, /* BR */ 78 NULL, /* RB */ 79 NULL, /* R */ 80 NULL, /* B */ 81 NULL, /* I */ 82 NULL, /* IR */ 83 NULL, /* RI */ 84 check_eq0, /* na */ 85 post_vs, /* sp */ 86 post_nf, /* nf */ 87 post_fi, /* fi */ 88 NULL, /* RE */ 89 check_part, /* RS */ 90 NULL, /* DT */ 91 post_UC, /* UC */ 92 check_le1, /* PD */ 93 post_AT, /* AT */ 94 NULL, /* in */ 95 post_ft, /* ft */ 96 check_eq2, /* OP */ 97 post_nf, /* EX */ 98 post_fi, /* EE */ 99 post_UR, /* UR */ 100 NULL, /* UE */ 101 NULL, /* ll */ 102 }; 103 104 105 int 106 man_valid_post(struct man *man) 107 { 108 struct man_node *n; 109 v_check *cp; 110 111 n = man->last; 112 if (n->flags & MAN_VALID) 113 return(1); 114 n->flags |= MAN_VALID; 115 116 switch (n->type) { 117 case MAN_TEXT: 118 return(check_text(man, n)); 119 case MAN_ROOT: 120 return(check_root(man, n)); 121 case MAN_EQN: 122 /* FALLTHROUGH */ 123 case MAN_TBL: 124 return(1); 125 default: 126 cp = man_valids + n->tok; 127 return(*cp ? (*cp)(man, n) : 1); 128 } 129 } 130 131 static int 132 check_root(CHKARGS) 133 { 134 135 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 136 137 if (NULL == man->first->child) 138 mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse, 139 n->line, n->pos, NULL); 140 else 141 man->meta.hasbody = 1; 142 143 if (NULL == man->meta.title) { 144 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, 145 n->line, n->pos, NULL); 146 147 /* 148 * If a title hasn't been set, do so now (by 149 * implication, date and section also aren't set). 150 */ 151 152 man->meta.title = mandoc_strdup(""); 153 man->meta.msec = mandoc_strdup(""); 154 man->meta.date = man->quick ? mandoc_strdup("") : 155 mandoc_normdate(man->parse, NULL, n->line, n->pos); 156 } 157 158 return(1); 159 } 160 161 static int 162 check_text(CHKARGS) 163 { 164 char *cp, *p; 165 166 if (MAN_LITERAL & man->flags) 167 return(1); 168 169 cp = n->string; 170 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 171 mandoc_msg(MANDOCERR_FI_TAB, man->parse, 172 n->line, n->pos + (p - cp), NULL); 173 return(1); 174 } 175 176 #define INEQ_DEFINE(x, ineq, name) \ 177 static int \ 178 check_##name(CHKARGS) \ 179 { \ 180 if (n->nchild ineq (x)) \ 181 return(1); \ 182 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, n->pos, \ 183 "line arguments %s %d (have %d)", \ 184 #ineq, (x), n->nchild); \ 185 return(1); \ 186 } 187 188 INEQ_DEFINE(0, ==, eq0) 189 INEQ_DEFINE(2, ==, eq2) 190 INEQ_DEFINE(1, <=, le1) 191 INEQ_DEFINE(5, <=, le5) 192 193 static int 194 post_UR(CHKARGS) 195 { 196 197 if (MAN_HEAD == n->type && 1 != n->nchild) 198 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, 199 n->pos, "line arguments eq 1 (have %d)", n->nchild); 200 201 return(check_part(man, n)); 202 } 203 204 static int 205 post_ft(CHKARGS) 206 { 207 char *cp; 208 int ok; 209 210 if (0 == n->nchild) 211 return(1); 212 213 ok = 0; 214 cp = n->child->string; 215 switch (*cp) { 216 case '1': 217 /* FALLTHROUGH */ 218 case '2': 219 /* FALLTHROUGH */ 220 case '3': 221 /* FALLTHROUGH */ 222 case '4': 223 /* FALLTHROUGH */ 224 case 'I': 225 /* FALLTHROUGH */ 226 case 'P': 227 /* FALLTHROUGH */ 228 case 'R': 229 if ('\0' == cp[1]) 230 ok = 1; 231 break; 232 case 'B': 233 if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2])) 234 ok = 1; 235 break; 236 case 'C': 237 if ('W' == cp[1] && '\0' == cp[2]) 238 ok = 1; 239 break; 240 default: 241 break; 242 } 243 244 if (0 == ok) { 245 mandoc_vmsg(MANDOCERR_FT_BAD, man->parse, 246 n->line, n->pos, "ft %s", cp); 247 *cp = '\0'; 248 } 249 250 if (1 < n->nchild) 251 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, 252 n->pos, "want one child (have %d)", n->nchild); 253 254 return(1); 255 } 256 257 static int 258 check_part(CHKARGS) 259 { 260 261 if (MAN_BODY == n->type && 0 == n->nchild) 262 mandoc_msg(MANDOCERR_ARGCWARN, man->parse, n->line, 263 n->pos, "want children (have none)"); 264 265 return(1); 266 } 267 268 static int 269 check_par(CHKARGS) 270 { 271 272 switch (n->type) { 273 case MAN_BLOCK: 274 if (0 == n->body->nchild) 275 man_node_delete(man, n); 276 break; 277 case MAN_BODY: 278 if (0 == n->nchild) 279 mandoc_vmsg(MANDOCERR_PAR_SKIP, 280 man->parse, n->line, n->pos, 281 "%s empty", man_macronames[n->tok]); 282 break; 283 case MAN_HEAD: 284 if (n->nchild) 285 mandoc_vmsg(MANDOCERR_ARG_SKIP, 286 man->parse, n->line, n->pos, 287 "%s %s%s", man_macronames[n->tok], 288 n->child->string, 289 n->nchild > 1 ? " ..." : ""); 290 break; 291 default: 292 break; 293 } 294 295 return(1); 296 } 297 298 static int 299 post_IP(CHKARGS) 300 { 301 302 switch (n->type) { 303 case MAN_BLOCK: 304 if (0 == n->head->nchild && 0 == n->body->nchild) 305 man_node_delete(man, n); 306 break; 307 case MAN_BODY: 308 if (0 == n->parent->head->nchild && 0 == n->nchild) 309 mandoc_vmsg(MANDOCERR_PAR_SKIP, 310 man->parse, n->line, n->pos, 311 "%s empty", man_macronames[n->tok]); 312 break; 313 default: 314 break; 315 } 316 return(1); 317 } 318 319 static int 320 post_TH(CHKARGS) 321 { 322 struct man_node *nb; 323 const char *p; 324 325 check_le5(man, n); 326 327 free(man->meta.title); 328 free(man->meta.vol); 329 free(man->meta.source); 330 free(man->meta.msec); 331 free(man->meta.date); 332 333 man->meta.title = man->meta.vol = man->meta.date = 334 man->meta.msec = man->meta.source = NULL; 335 336 nb = n; 337 338 /* ->TITLE<- MSEC DATE SOURCE VOL */ 339 340 n = n->child; 341 if (n && n->string) { 342 for (p = n->string; '\0' != *p; p++) { 343 /* Only warn about this once... */ 344 if (isalpha((unsigned char)*p) && 345 ! isupper((unsigned char)*p)) { 346 mandoc_vmsg(MANDOCERR_TITLE_CASE, 347 man->parse, n->line, 348 n->pos + (p - n->string), 349 "TH %s", n->string); 350 break; 351 } 352 } 353 man->meta.title = mandoc_strdup(n->string); 354 } else { 355 man->meta.title = mandoc_strdup(""); 356 mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse, 357 nb->line, nb->pos, "TH"); 358 } 359 360 /* TITLE ->MSEC<- DATE SOURCE VOL */ 361 362 if (n) 363 n = n->next; 364 if (n && n->string) 365 man->meta.msec = mandoc_strdup(n->string); 366 else { 367 man->meta.msec = mandoc_strdup(""); 368 mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse, 369 nb->line, nb->pos, "TH %s", man->meta.title); 370 } 371 372 /* TITLE MSEC ->DATE<- SOURCE VOL */ 373 374 if (n) 375 n = n->next; 376 if (n && n->string && '\0' != n->string[0]) { 377 man->meta.date = man->quick ? 378 mandoc_strdup(n->string) : 379 mandoc_normdate(man->parse, n->string, 380 n->line, n->pos); 381 } else { 382 man->meta.date = mandoc_strdup(""); 383 mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, 384 n ? n->line : nb->line, 385 n ? n->pos : nb->pos, "TH"); 386 } 387 388 /* TITLE MSEC DATE ->SOURCE<- VOL */ 389 390 if (n && (n = n->next)) 391 man->meta.source = mandoc_strdup(n->string); 392 393 /* TITLE MSEC DATE SOURCE ->VOL<- */ 394 /* If missing, use the default VOL name for MSEC. */ 395 396 if (n && (n = n->next)) 397 man->meta.vol = mandoc_strdup(n->string); 398 else if ('\0' != man->meta.msec[0] && 399 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 400 man->meta.vol = mandoc_strdup(p); 401 402 /* 403 * Remove the `TH' node after we've processed it for our 404 * meta-data. 405 */ 406 man_node_delete(man, man->last); 407 return(1); 408 } 409 410 static int 411 post_nf(CHKARGS) 412 { 413 414 check_eq0(man, n); 415 416 if (MAN_LITERAL & man->flags) 417 mandoc_msg(MANDOCERR_NF_SKIP, man->parse, 418 n->line, n->pos, "nf"); 419 420 man->flags |= MAN_LITERAL; 421 return(1); 422 } 423 424 static int 425 post_fi(CHKARGS) 426 { 427 428 check_eq0(man, n); 429 430 if ( ! (MAN_LITERAL & man->flags)) 431 mandoc_msg(MANDOCERR_FI_SKIP, man->parse, 432 n->line, n->pos, "fi"); 433 434 man->flags &= ~MAN_LITERAL; 435 return(1); 436 } 437 438 static int 439 post_UC(CHKARGS) 440 { 441 static const char * const bsd_versions[] = { 442 "3rd Berkeley Distribution", 443 "4th Berkeley Distribution", 444 "4.2 Berkeley Distribution", 445 "4.3 Berkeley Distribution", 446 "4.4 Berkeley Distribution", 447 }; 448 449 const char *p, *s; 450 451 n = n->child; 452 453 if (NULL == n || MAN_TEXT != n->type) 454 p = bsd_versions[0]; 455 else { 456 s = n->string; 457 if (0 == strcmp(s, "3")) 458 p = bsd_versions[0]; 459 else if (0 == strcmp(s, "4")) 460 p = bsd_versions[1]; 461 else if (0 == strcmp(s, "5")) 462 p = bsd_versions[2]; 463 else if (0 == strcmp(s, "6")) 464 p = bsd_versions[3]; 465 else if (0 == strcmp(s, "7")) 466 p = bsd_versions[4]; 467 else 468 p = bsd_versions[0]; 469 } 470 471 free(man->meta.source); 472 man->meta.source = mandoc_strdup(p); 473 return(1); 474 } 475 476 static int 477 post_AT(CHKARGS) 478 { 479 static const char * const unix_versions[] = { 480 "7th Edition", 481 "System III", 482 "System V", 483 "System V Release 2", 484 }; 485 486 const char *p, *s; 487 struct man_node *nn; 488 489 n = n->child; 490 491 if (NULL == n || MAN_TEXT != n->type) 492 p = unix_versions[0]; 493 else { 494 s = n->string; 495 if (0 == strcmp(s, "3")) 496 p = unix_versions[0]; 497 else if (0 == strcmp(s, "4")) 498 p = unix_versions[1]; 499 else if (0 == strcmp(s, "5")) { 500 nn = n->next; 501 if (nn && MAN_TEXT == nn->type && nn->string[0]) 502 p = unix_versions[3]; 503 else 504 p = unix_versions[2]; 505 } else 506 p = unix_versions[0]; 507 } 508 509 free(man->meta.source); 510 man->meta.source = mandoc_strdup(p); 511 return(1); 512 } 513 514 static int 515 post_vs(CHKARGS) 516 { 517 518 if (n->tok == MAN_br) 519 check_eq0(man, n); 520 else 521 check_le1(man, n); 522 523 if (NULL != n->prev) 524 return(1); 525 526 switch (n->parent->tok) { 527 case MAN_SH: 528 /* FALLTHROUGH */ 529 case MAN_SS: 530 mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos, 531 "%s after %s", man_macronames[n->tok], 532 man_macronames[n->parent->tok]); 533 /* FALLTHROUGH */ 534 case MAN_MAX: 535 /* 536 * Don't warn about this because it occurs in pod2man 537 * and would cause considerable (unfixable) warnage. 538 */ 539 man_node_delete(man, n); 540 break; 541 default: 542 break; 543 } 544 545 return(1); 546 } 547