1 /* $Id: man_validate.c,v 1.146 2018/12/31 10:04:39 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2018 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <errno.h> 25 #include <limits.h> 26 #include <stdarg.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <time.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "man.h" 36 #include "libmandoc.h" 37 #include "roff_int.h" 38 #include "libman.h" 39 40 #define CHKARGS struct roff_man *man, struct roff_node *n 41 42 typedef void (*v_check)(CHKARGS); 43 44 static void check_abort(CHKARGS); 45 static void check_par(CHKARGS); 46 static void check_part(CHKARGS); 47 static void check_root(CHKARGS); 48 static void check_text(CHKARGS); 49 50 static void post_AT(CHKARGS); 51 static void post_EE(CHKARGS); 52 static void post_EX(CHKARGS); 53 static void post_IP(CHKARGS); 54 static void post_OP(CHKARGS); 55 static void post_SH(CHKARGS); 56 static void post_TH(CHKARGS); 57 static void post_UC(CHKARGS); 58 static void post_UR(CHKARGS); 59 static void post_in(CHKARGS); 60 61 static const v_check man_valids[MAN_MAX - MAN_TH] = { 62 post_TH, /* TH */ 63 post_SH, /* SH */ 64 post_SH, /* SS */ 65 NULL, /* TP */ 66 NULL, /* TQ */ 67 check_abort,/* LP */ 68 check_par, /* PP */ 69 check_abort,/* P */ 70 post_IP, /* IP */ 71 NULL, /* HP */ 72 NULL, /* SM */ 73 NULL, /* SB */ 74 NULL, /* BI */ 75 NULL, /* IB */ 76 NULL, /* BR */ 77 NULL, /* RB */ 78 NULL, /* R */ 79 NULL, /* B */ 80 NULL, /* I */ 81 NULL, /* IR */ 82 NULL, /* RI */ 83 NULL, /* RE */ 84 check_part, /* RS */ 85 NULL, /* DT */ 86 post_UC, /* UC */ 87 NULL, /* PD */ 88 post_AT, /* AT */ 89 post_in, /* in */ 90 NULL, /* SY */ 91 NULL, /* YS */ 92 post_OP, /* OP */ 93 post_EX, /* EX */ 94 post_EE, /* EE */ 95 post_UR, /* UR */ 96 NULL, /* UE */ 97 post_UR, /* MT */ 98 NULL, /* ME */ 99 }; 100 101 102 /* Validate the subtree rooted at man->last. */ 103 void 104 man_validate(struct roff_man *man) 105 { 106 struct roff_node *n; 107 const v_check *cp; 108 109 /* 110 * Translate obsolete macros such that later code 111 * does not need to look for them. 112 */ 113 114 n = man->last; 115 switch (n->tok) { 116 case MAN_LP: 117 case MAN_P: 118 n->tok = MAN_PP; 119 break; 120 default: 121 break; 122 } 123 124 /* 125 * Iterate over all children, recursing into each one 126 * in turn, depth-first. 127 */ 128 129 man->last = man->last->child; 130 while (man->last != NULL) { 131 man_validate(man); 132 if (man->last == n) 133 man->last = man->last->child; 134 else 135 man->last = man->last->next; 136 } 137 138 /* Finally validate the macro itself. */ 139 140 man->last = n; 141 man->next = ROFF_NEXT_SIBLING; 142 switch (n->type) { 143 case ROFFT_TEXT: 144 check_text(man, n); 145 break; 146 case ROFFT_ROOT: 147 check_root(man, n); 148 break; 149 case ROFFT_COMMENT: 150 case ROFFT_EQN: 151 case ROFFT_TBL: 152 break; 153 default: 154 if (n->tok < ROFF_MAX) { 155 roff_validate(man); 156 break; 157 } 158 assert(n->tok >= MAN_TH && n->tok < MAN_MAX); 159 cp = man_valids + (n->tok - MAN_TH); 160 if (*cp) 161 (*cp)(man, n); 162 if (man->last == n) 163 n->flags |= NODE_VALID; 164 break; 165 } 166 } 167 168 static void 169 check_root(CHKARGS) 170 { 171 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 172 173 if (n->last == NULL || n->last->type == ROFFT_COMMENT) 174 mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL); 175 else 176 man->meta.hasbody = 1; 177 178 if (NULL == man->meta.title) { 179 mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL); 180 181 /* 182 * If a title hasn't been set, do so now (by 183 * implication, date and section also aren't set). 184 */ 185 186 man->meta.title = mandoc_strdup(""); 187 man->meta.msec = mandoc_strdup(""); 188 man->meta.date = man->quick ? mandoc_strdup("") : 189 mandoc_normdate(man, NULL, n->line, n->pos); 190 } 191 192 if (man->meta.os_e && 193 (man->meta.rcsids & (1 << man->meta.os_e)) == 0) 194 mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0, 195 man->meta.os_e == MANDOC_OS_OPENBSD ? 196 "(OpenBSD)" : "(NetBSD)"); 197 } 198 199 static void 200 check_abort(CHKARGS) 201 { 202 abort(); 203 } 204 205 static void 206 check_text(CHKARGS) 207 { 208 char *cp, *p; 209 210 if (n->flags & NODE_NOFILL) 211 return; 212 213 cp = n->string; 214 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 215 mandoc_msg(MANDOCERR_FI_TAB, 216 n->line, n->pos + (int)(p - cp), NULL); 217 } 218 219 static void 220 post_EE(CHKARGS) 221 { 222 if ((n->flags & NODE_NOFILL) == 0) 223 mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "EE"); 224 } 225 226 static void 227 post_EX(CHKARGS) 228 { 229 if (n->flags & NODE_NOFILL) 230 mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "EX"); 231 } 232 233 static void 234 post_OP(CHKARGS) 235 { 236 237 if (n->child == NULL) 238 mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP"); 239 else if (n->child->next != NULL && n->child->next->next != NULL) { 240 n = n->child->next->next; 241 mandoc_msg(MANDOCERR_ARG_EXCESS, 242 n->line, n->pos, "OP ... %s", n->string); 243 } 244 } 245 246 static void 247 post_SH(CHKARGS) 248 { 249 struct roff_node *nc; 250 251 if (n->type != ROFFT_BODY || (nc = n->child) == NULL) 252 return; 253 254 if (nc->tok == MAN_PP && nc->body->child != NULL) { 255 while (nc->body->last != NULL) { 256 man->next = ROFF_NEXT_CHILD; 257 roff_node_relink(man, nc->body->last); 258 man->last = n; 259 } 260 } 261 262 if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) { 263 mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos, 264 "%s after %s", roff_name[nc->tok], roff_name[n->tok]); 265 roff_node_delete(man, nc); 266 } 267 268 /* 269 * Trailing PP is empty, so it is deleted by check_par(). 270 * Trailing sp is significant. 271 */ 272 273 if ((nc = n->last) != NULL && nc->tok == ROFF_br) { 274 mandoc_msg(MANDOCERR_PAR_SKIP, 275 nc->line, nc->pos, "%s at the end of %s", 276 roff_name[nc->tok], roff_name[n->tok]); 277 roff_node_delete(man, nc); 278 } 279 } 280 281 static void 282 post_UR(CHKARGS) 283 { 284 if (n->type == ROFFT_HEAD && n->child == NULL) 285 mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos, 286 "%s", roff_name[n->tok]); 287 check_part(man, n); 288 } 289 290 static void 291 check_part(CHKARGS) 292 { 293 294 if (n->type == ROFFT_BODY && n->child == NULL) 295 mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, 296 "%s", roff_name[n->tok]); 297 } 298 299 static void 300 check_par(CHKARGS) 301 { 302 303 switch (n->type) { 304 case ROFFT_BLOCK: 305 if (n->body->child == NULL) 306 roff_node_delete(man, n); 307 break; 308 case ROFFT_BODY: 309 if (n->child != NULL && 310 (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) { 311 mandoc_msg(MANDOCERR_PAR_SKIP, 312 n->child->line, n->child->pos, 313 "%s after %s", roff_name[n->child->tok], 314 roff_name[n->tok]); 315 roff_node_delete(man, n->child); 316 } 317 if (n->child == NULL) 318 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 319 "%s empty", roff_name[n->tok]); 320 break; 321 case ROFFT_HEAD: 322 if (n->child != NULL) 323 mandoc_msg(MANDOCERR_ARG_SKIP, 324 n->line, n->pos, "%s %s%s", 325 roff_name[n->tok], n->child->string, 326 n->child->next != NULL ? " ..." : ""); 327 break; 328 default: 329 break; 330 } 331 } 332 333 static void 334 post_IP(CHKARGS) 335 { 336 337 switch (n->type) { 338 case ROFFT_BLOCK: 339 if (n->head->child == NULL && n->body->child == NULL) 340 roff_node_delete(man, n); 341 break; 342 case ROFFT_BODY: 343 if (n->parent->head->child == NULL && n->child == NULL) 344 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 345 "%s empty", roff_name[n->tok]); 346 break; 347 default: 348 break; 349 } 350 } 351 352 static void 353 post_TH(CHKARGS) 354 { 355 struct roff_node *nb; 356 const char *p; 357 358 free(man->meta.title); 359 free(man->meta.vol); 360 free(man->meta.os); 361 free(man->meta.msec); 362 free(man->meta.date); 363 364 man->meta.title = man->meta.vol = man->meta.date = 365 man->meta.msec = man->meta.os = NULL; 366 367 nb = n; 368 369 /* ->TITLE<- MSEC DATE OS VOL */ 370 371 n = n->child; 372 if (n && n->string) { 373 for (p = n->string; '\0' != *p; p++) { 374 /* Only warn about this once... */ 375 if (isalpha((unsigned char)*p) && 376 ! isupper((unsigned char)*p)) { 377 mandoc_msg(MANDOCERR_TITLE_CASE, n->line, 378 n->pos + (int)(p - n->string), 379 "TH %s", n->string); 380 break; 381 } 382 } 383 man->meta.title = mandoc_strdup(n->string); 384 } else { 385 man->meta.title = mandoc_strdup(""); 386 mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH"); 387 } 388 389 /* TITLE ->MSEC<- DATE OS VOL */ 390 391 if (n) 392 n = n->next; 393 if (n && n->string) 394 man->meta.msec = mandoc_strdup(n->string); 395 else { 396 man->meta.msec = mandoc_strdup(""); 397 mandoc_msg(MANDOCERR_MSEC_MISSING, 398 nb->line, nb->pos, "TH %s", man->meta.title); 399 } 400 401 /* TITLE MSEC ->DATE<- OS VOL */ 402 403 if (n) 404 n = n->next; 405 if (n && n->string && '\0' != n->string[0]) { 406 man->meta.date = man->quick ? 407 mandoc_strdup(n->string) : 408 mandoc_normdate(man, n->string, n->line, n->pos); 409 } else { 410 man->meta.date = mandoc_strdup(""); 411 mandoc_msg(MANDOCERR_DATE_MISSING, 412 n ? n->line : nb->line, 413 n ? n->pos : nb->pos, "TH"); 414 } 415 416 /* TITLE MSEC DATE ->OS<- VOL */ 417 418 if (n && (n = n->next)) 419 man->meta.os = mandoc_strdup(n->string); 420 else if (man->os_s != NULL) 421 man->meta.os = mandoc_strdup(man->os_s); 422 if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) { 423 if (strstr(man->meta.os, "OpenBSD") != NULL) 424 man->meta.os_e = MANDOC_OS_OPENBSD; 425 else if (strstr(man->meta.os, "NetBSD") != NULL) 426 man->meta.os_e = MANDOC_OS_NETBSD; 427 } 428 429 /* TITLE MSEC DATE OS ->VOL<- */ 430 /* If missing, use the default VOL name for MSEC. */ 431 432 if (n && (n = n->next)) 433 man->meta.vol = mandoc_strdup(n->string); 434 else if ('\0' != man->meta.msec[0] && 435 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 436 man->meta.vol = mandoc_strdup(p); 437 438 if (n != NULL && (n = n->next) != NULL) 439 mandoc_msg(MANDOCERR_ARG_EXCESS, 440 n->line, n->pos, "TH ... %s", n->string); 441 442 /* 443 * Remove the `TH' node after we've processed it for our 444 * meta-data. 445 */ 446 roff_node_delete(man, man->last); 447 } 448 449 static void 450 post_UC(CHKARGS) 451 { 452 static const char * const bsd_versions[] = { 453 "3rd Berkeley Distribution", 454 "4th Berkeley Distribution", 455 "4.2 Berkeley Distribution", 456 "4.3 Berkeley Distribution", 457 "4.4 Berkeley Distribution", 458 }; 459 460 const char *p, *s; 461 462 n = n->child; 463 464 if (n == NULL || n->type != ROFFT_TEXT) 465 p = bsd_versions[0]; 466 else { 467 s = n->string; 468 if (0 == strcmp(s, "3")) 469 p = bsd_versions[0]; 470 else if (0 == strcmp(s, "4")) 471 p = bsd_versions[1]; 472 else if (0 == strcmp(s, "5")) 473 p = bsd_versions[2]; 474 else if (0 == strcmp(s, "6")) 475 p = bsd_versions[3]; 476 else if (0 == strcmp(s, "7")) 477 p = bsd_versions[4]; 478 else 479 p = bsd_versions[0]; 480 } 481 482 free(man->meta.os); 483 man->meta.os = mandoc_strdup(p); 484 } 485 486 static void 487 post_AT(CHKARGS) 488 { 489 static const char * const unix_versions[] = { 490 "7th Edition", 491 "System III", 492 "System V", 493 "System V Release 2", 494 }; 495 496 struct roff_node *nn; 497 const char *p, *s; 498 499 n = n->child; 500 501 if (n == NULL || n->type != ROFFT_TEXT) 502 p = unix_versions[0]; 503 else { 504 s = n->string; 505 if (0 == strcmp(s, "3")) 506 p = unix_versions[0]; 507 else if (0 == strcmp(s, "4")) 508 p = unix_versions[1]; 509 else if (0 == strcmp(s, "5")) { 510 nn = n->next; 511 if (nn != NULL && 512 nn->type == ROFFT_TEXT && 513 nn->string[0] != '\0') 514 p = unix_versions[3]; 515 else 516 p = unix_versions[2]; 517 } else 518 p = unix_versions[0]; 519 } 520 521 free(man->meta.os); 522 man->meta.os = mandoc_strdup(p); 523 } 524 525 static void 526 post_in(CHKARGS) 527 { 528 char *s; 529 530 if (n->parent->tok != MAN_TP || 531 n->parent->type != ROFFT_HEAD || 532 n->child == NULL || 533 *n->child->string == '+' || 534 *n->child->string == '-') 535 return; 536 mandoc_asprintf(&s, "+%s", n->child->string); 537 free(n->child->string); 538 n->child->string = s; 539 } 540