1 /* $OpenBSD: cgi.c,v 1.120 2022/12/26 19:16:02 jmc Exp $ */ 2 /* 3 * Copyright (c) 2014-2019, 2021, 2022 Ingo Schwarze <schwarze@usta.de> 4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 5 * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * Implementation of the man.cgi(8) program. 20 */ 21 #include <sys/types.h> 22 #include <sys/time.h> 23 24 #include <ctype.h> 25 #include <err.h> 26 #include <errno.h> 27 #include <fcntl.h> 28 #include <limits.h> 29 #include <stdint.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <unistd.h> 34 35 #include "mandoc_aux.h" 36 #include "mandoc.h" 37 #include "roff.h" 38 #include "mdoc.h" 39 #include "man.h" 40 #include "mandoc_parse.h" 41 #include "main.h" 42 #include "manconf.h" 43 #include "mansearch.h" 44 #include "cgi.h" 45 46 /* 47 * A query as passed to the search function. 48 */ 49 struct query { 50 char *manpath; /* desired manual directory */ 51 char *arch; /* architecture */ 52 char *sec; /* manual section */ 53 char *query; /* unparsed query expression */ 54 int equal; /* match whole names, not substrings */ 55 }; 56 57 struct req { 58 struct query q; 59 char **p; /* array of available manpaths */ 60 size_t psz; /* number of available manpaths */ 61 int isquery; /* QUERY_STRING used, not PATH_INFO */ 62 }; 63 64 enum focus { 65 FOCUS_NONE = 0, 66 FOCUS_QUERY 67 }; 68 69 static void html_print(const char *); 70 static void html_putchar(char); 71 static int http_decode(char *); 72 static void http_encode(const char *); 73 static void parse_manpath_conf(struct req *); 74 static void parse_path_info(struct req *, const char *); 75 static void parse_query_string(struct req *, const char *); 76 static void pg_error_badrequest(const char *); 77 static void pg_error_internal(void); 78 static void pg_index(const struct req *); 79 static void pg_noresult(const struct req *, int, const char *, 80 const char *); 81 static void pg_redirect(const struct req *, const char *); 82 static void pg_search(const struct req *); 83 static void pg_searchres(const struct req *, 84 struct manpage *, size_t); 85 static void pg_show(struct req *, const char *); 86 static int resp_begin_html(int, const char *, const char *); 87 static void resp_begin_http(int, const char *); 88 static void resp_catman(const struct req *, const char *); 89 static int resp_copy(const char *, const char *); 90 static void resp_end_html(void); 91 static void resp_format(const struct req *, const char *); 92 static void resp_searchform(const struct req *, enum focus); 93 static void resp_show(const struct req *, const char *); 94 static void set_query_attr(char **, char **); 95 static int validate_arch(const char *); 96 static int validate_filename(const char *); 97 static int validate_manpath(const struct req *, const char *); 98 static int validate_urifrag(const char *); 99 100 static const char *scriptname = SCRIPT_NAME; 101 102 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 103 static const char *const sec_numbers[] = { 104 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 105 }; 106 static const char *const sec_names[] = { 107 "All Sections", 108 "1 - General Commands", 109 "2 - System Calls", 110 "3 - Library Functions", 111 "3p - Perl Library", 112 "4 - Device Drivers", 113 "5 - File Formats", 114 "6 - Games", 115 "7 - Miscellaneous Information", 116 "8 - System Manager\'s Manual", 117 "9 - Kernel Developer\'s Manual" 118 }; 119 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 120 121 static const char *const arch_names[] = { 122 "amd64", "alpha", "armv7", "arm64", 123 "hppa", "i386", "landisk", "loongson", 124 "luna88k", "macppc", "mips64", "octeon", 125 "powerpc64", "riscv64", "sparc64", 126 127 "amiga", "arc", "armish", "arm32", 128 "atari", "aviion", "beagle", "cats", 129 "hppa64", "hp300", 130 "ia64", "mac68k", "mvme68k", "mvme88k", 131 "mvmeppc", "palm", "pc532", "pegasos", 132 "pmax", "powerpc", "sgi", "socppc", 133 "solbourne", "sparc", 134 "sun3", "vax", "wgrisc", "x68k", 135 "zaurus" 136 }; 137 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 138 139 /* 140 * Print a character, escaping HTML along the way. 141 * This will pass non-ASCII straight to output: be warned! 142 */ 143 static void 144 html_putchar(char c) 145 { 146 147 switch (c) { 148 case '"': 149 printf("""); 150 break; 151 case '&': 152 printf("&"); 153 break; 154 case '>': 155 printf(">"); 156 break; 157 case '<': 158 printf("<"); 159 break; 160 default: 161 putchar((unsigned char)c); 162 break; 163 } 164 } 165 166 /* 167 * Call through to html_putchar(). 168 * Accepts NULL strings. 169 */ 170 static void 171 html_print(const char *p) 172 { 173 174 if (NULL == p) 175 return; 176 while ('\0' != *p) 177 html_putchar(*p++); 178 } 179 180 /* 181 * Transfer the responsibility for the allocated string *val 182 * to the query structure. 183 */ 184 static void 185 set_query_attr(char **attr, char **val) 186 { 187 188 free(*attr); 189 if (**val == '\0') { 190 *attr = NULL; 191 free(*val); 192 } else 193 *attr = *val; 194 *val = NULL; 195 } 196 197 /* 198 * Parse the QUERY_STRING for key-value pairs 199 * and store the values into the query structure. 200 */ 201 static void 202 parse_query_string(struct req *req, const char *qs) 203 { 204 char *key, *val; 205 size_t keysz, valsz; 206 207 req->isquery = 1; 208 req->q.manpath = NULL; 209 req->q.arch = NULL; 210 req->q.sec = NULL; 211 req->q.query = NULL; 212 req->q.equal = 1; 213 214 key = val = NULL; 215 while (*qs != '\0') { 216 217 /* Parse one key. */ 218 219 keysz = strcspn(qs, "=;&"); 220 key = mandoc_strndup(qs, keysz); 221 qs += keysz; 222 if (*qs != '=') 223 goto next; 224 225 /* Parse one value. */ 226 227 valsz = strcspn(++qs, ";&"); 228 val = mandoc_strndup(qs, valsz); 229 qs += valsz; 230 231 /* Decode and catch encoding errors. */ 232 233 if ( ! (http_decode(key) && http_decode(val))) 234 goto next; 235 236 /* Handle key-value pairs. */ 237 238 if ( ! strcmp(key, "query")) 239 set_query_attr(&req->q.query, &val); 240 241 else if ( ! strcmp(key, "apropos")) 242 req->q.equal = !strcmp(val, "0"); 243 244 else if ( ! strcmp(key, "manpath")) { 245 #ifdef COMPAT_OLDURI 246 if ( ! strncmp(val, "OpenBSD ", 8)) { 247 val[7] = '-'; 248 if ('C' == val[8]) 249 val[8] = 'c'; 250 } 251 #endif 252 set_query_attr(&req->q.manpath, &val); 253 } 254 255 else if ( ! (strcmp(key, "sec") 256 #ifdef COMPAT_OLDURI 257 && strcmp(key, "sektion") 258 #endif 259 )) { 260 if ( ! strcmp(val, "0")) 261 *val = '\0'; 262 set_query_attr(&req->q.sec, &val); 263 } 264 265 else if ( ! strcmp(key, "arch")) { 266 if ( ! strcmp(val, "default")) 267 *val = '\0'; 268 set_query_attr(&req->q.arch, &val); 269 } 270 271 /* 272 * The key must be freed in any case. 273 * The val may have been handed over to the query 274 * structure, in which case it is now NULL. 275 */ 276 next: 277 free(key); 278 key = NULL; 279 free(val); 280 val = NULL; 281 282 if (*qs != '\0') 283 qs++; 284 } 285 } 286 287 /* 288 * HTTP-decode a string. The standard explanation is that this turns 289 * "%4e+foo" into "n foo" in the regular way. This is done in-place 290 * over the allocated string. 291 */ 292 static int 293 http_decode(char *p) 294 { 295 char hex[3]; 296 char *q; 297 int c; 298 299 hex[2] = '\0'; 300 301 q = p; 302 for ( ; '\0' != *p; p++, q++) { 303 if ('%' == *p) { 304 if ('\0' == (hex[0] = *(p + 1))) 305 return 0; 306 if ('\0' == (hex[1] = *(p + 2))) 307 return 0; 308 if (1 != sscanf(hex, "%x", &c)) 309 return 0; 310 if ('\0' == c) 311 return 0; 312 313 *q = (char)c; 314 p += 2; 315 } else 316 *q = '+' == *p ? ' ' : *p; 317 } 318 319 *q = '\0'; 320 return 1; 321 } 322 323 static void 324 http_encode(const char *p) 325 { 326 for (; *p != '\0'; p++) { 327 if (isalnum((unsigned char)*p) == 0 && 328 strchr("-._~", *p) == NULL) 329 printf("%%%2.2X", (unsigned char)*p); 330 else 331 putchar(*p); 332 } 333 } 334 335 static void 336 resp_begin_http(int code, const char *msg) 337 { 338 339 if (200 != code) 340 printf("Status: %d %s\r\n", code, msg); 341 342 printf("Content-Type: text/html; charset=utf-8\r\n" 343 "Cache-Control: no-cache\r\n" 344 "Content-Security-Policy: default-src 'none'; " 345 "style-src 'self' 'unsafe-inline'\r\n" 346 "Pragma: no-cache\r\n" 347 "\r\n"); 348 349 fflush(stdout); 350 } 351 352 static int 353 resp_copy(const char *element, const char *filename) 354 { 355 char buf[4096]; 356 ssize_t sz; 357 int fd; 358 359 if ((fd = open(filename, O_RDONLY)) == -1) 360 return 0; 361 362 if (element != NULL) 363 printf("<%s>\n", element); 364 fflush(stdout); 365 while ((sz = read(fd, buf, sizeof(buf))) > 0) 366 write(STDOUT_FILENO, buf, sz); 367 close(fd); 368 return 1; 369 } 370 371 static int 372 resp_begin_html(int code, const char *msg, const char *file) 373 { 374 const char *name, *sec, *cp; 375 int namesz, secsz; 376 377 resp_begin_http(code, msg); 378 379 printf("<!DOCTYPE html>\n" 380 "<html>\n" 381 "<head>\n" 382 " <meta charset=\"UTF-8\"/>\n" 383 " <meta name=\"viewport\"" 384 " content=\"width=device-width, initial-scale=1.0\">\n" 385 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 386 " type=\"text/css\" media=\"all\">\n" 387 " <title>", 388 CSS_DIR); 389 if (file != NULL) { 390 cp = strrchr(file, '/'); 391 name = cp == NULL ? file : cp + 1; 392 cp = strrchr(name, '.'); 393 namesz = cp == NULL ? strlen(name) : cp - name; 394 sec = NULL; 395 if (cp != NULL && cp[1] != '0') { 396 sec = cp + 1; 397 secsz = strlen(sec); 398 } else if (name - file > 1) { 399 for (cp = name - 2; cp >= file; cp--) { 400 if (*cp < '1' || *cp > '9') 401 continue; 402 sec = cp; 403 secsz = name - cp - 1; 404 break; 405 } 406 } 407 printf("%.*s", namesz, name); 408 if (sec != NULL) 409 printf("(%.*s)", secsz, sec); 410 fputs(" - ", stdout); 411 } 412 printf("%s</title>\n" 413 "</head>\n" 414 "<body>\n", 415 CUSTOMIZE_TITLE); 416 417 return resp_copy("header", MAN_DIR "/header.html"); 418 } 419 420 static void 421 resp_end_html(void) 422 { 423 if (resp_copy("footer", MAN_DIR "/footer.html")) 424 puts("</footer>"); 425 426 puts("</body>\n" 427 "</html>"); 428 } 429 430 static void 431 resp_searchform(const struct req *req, enum focus focus) 432 { 433 int i; 434 435 printf("<form role=\"search\" action=\"/%s\" method=\"get\" " 436 "autocomplete=\"off\" autocapitalize=\"none\">\n" 437 " <fieldset>\n" 438 " <legend>Manual Page Search Parameters</legend>\n", 439 scriptname); 440 441 /* Write query input box. */ 442 443 printf(" <label>Search query:\n" 444 " <input type=\"search\" name=\"query\" value=\""); 445 if (req->q.query != NULL) 446 html_print(req->q.query); 447 printf("\" size=\"40\""); 448 if (focus == FOCUS_QUERY) 449 printf(" autofocus"); 450 puts(">\n </label>"); 451 452 /* Write submission buttons. */ 453 454 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 455 "man</button>\n" 456 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 457 "apropos</button>\n" 458 " <br/>\n"); 459 460 /* Write section selector. */ 461 462 puts(" <select name=\"sec\" aria-label=\"Manual section\">"); 463 for (i = 0; i < sec_MAX; i++) { 464 printf(" <option value=\"%s\"", sec_numbers[i]); 465 if (NULL != req->q.sec && 466 0 == strcmp(sec_numbers[i], req->q.sec)) 467 printf(" selected=\"selected\""); 468 printf(">%s</option>\n", sec_names[i]); 469 } 470 puts(" </select>"); 471 472 /* Write architecture selector. */ 473 474 printf( " <select name=\"arch\" aria-label=\"CPU architecture\">\n" 475 " <option value=\"default\""); 476 if (NULL == req->q.arch) 477 printf(" selected=\"selected\""); 478 puts(">All Architectures</option>"); 479 for (i = 0; i < arch_MAX; i++) { 480 printf(" <option"); 481 if (NULL != req->q.arch && 482 0 == strcmp(arch_names[i], req->q.arch)) 483 printf(" selected=\"selected\""); 484 printf(">%s</option>\n", arch_names[i]); 485 } 486 puts(" </select>"); 487 488 /* Write manpath selector. */ 489 490 if (req->psz > 1) { 491 puts(" <select name=\"manpath\"" 492 " aria-label=\"Manual path\">"); 493 for (i = 0; i < (int)req->psz; i++) { 494 printf(" <option"); 495 if (strcmp(req->q.manpath, req->p[i]) == 0) 496 printf(" selected=\"selected\""); 497 printf(">"); 498 html_print(req->p[i]); 499 puts("</option>"); 500 } 501 puts(" </select>"); 502 } 503 504 puts(" </fieldset>\n" 505 "</form>"); 506 } 507 508 static int 509 validate_urifrag(const char *frag) 510 { 511 512 while ('\0' != *frag) { 513 if ( ! (isalnum((unsigned char)*frag) || 514 '-' == *frag || '.' == *frag || 515 '/' == *frag || '_' == *frag)) 516 return 0; 517 frag++; 518 } 519 return 1; 520 } 521 522 static int 523 validate_manpath(const struct req *req, const char* manpath) 524 { 525 size_t i; 526 527 for (i = 0; i < req->psz; i++) 528 if ( ! strcmp(manpath, req->p[i])) 529 return 1; 530 531 return 0; 532 } 533 534 static int 535 validate_arch(const char *arch) 536 { 537 int i; 538 539 for (i = 0; i < arch_MAX; i++) 540 if (strcmp(arch, arch_names[i]) == 0) 541 return 1; 542 543 return 0; 544 } 545 546 static int 547 validate_filename(const char *file) 548 { 549 550 if ('.' == file[0] && '/' == file[1]) 551 file += 2; 552 553 return ! (strstr(file, "../") || strstr(file, "/..") || 554 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 555 } 556 557 static void 558 pg_index(const struct req *req) 559 { 560 if (resp_begin_html(200, NULL, NULL) == 0) 561 puts("<header>"); 562 resp_searchform(req, FOCUS_QUERY); 563 printf("</header>\n" 564 "<main>\n" 565 "<p role=\"doc-notice\" aria-label=\"Usage\">\n" 566 "This web interface is documented in the\n" 567 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\"" 568 " aria-label=\"man dot CGI, section 8\">man.cgi(8)</a>\n" 569 "manual, and the\n" 570 "<a class=\"Xr\" href=\"/%s%sapropos.1\"" 571 " aria-label=\"apropos, section 1\">apropos(1)</a>\n" 572 "manual explains the query syntax.\n" 573 "</p>\n" 574 "</main>\n", 575 scriptname, *scriptname == '\0' ? "" : "/", 576 scriptname, *scriptname == '\0' ? "" : "/"); 577 resp_end_html(); 578 } 579 580 static void 581 pg_noresult(const struct req *req, int code, const char *http_msg, 582 const char *user_msg) 583 { 584 if (resp_begin_html(code, http_msg, NULL) == 0) 585 puts("<header>"); 586 resp_searchform(req, FOCUS_QUERY); 587 puts("</header>"); 588 puts("<main>"); 589 puts("<p role=\"doc-notice\" aria-label=\"No result\">"); 590 puts(user_msg); 591 puts("</p>"); 592 puts("</main>"); 593 resp_end_html(); 594 } 595 596 static void 597 pg_error_badrequest(const char *msg) 598 { 599 if (resp_begin_html(400, "Bad Request", NULL)) 600 puts("</header>"); 601 puts("<main>\n" 602 "<h1>Bad Request</h1>\n" 603 "<p role=\"doc-notice\" aria-label=\"Bad Request\">"); 604 puts(msg); 605 printf("Try again from the\n" 606 "<a href=\"/%s\">main page</a>.\n" 607 "</p>\n" 608 "</main>\n", scriptname); 609 resp_end_html(); 610 } 611 612 static void 613 pg_error_internal(void) 614 { 615 if (resp_begin_html(500, "Internal Server Error", NULL)) 616 puts("</header>"); 617 puts("<main><p role=\"doc-notice\">Internal Server Error</p></main>"); 618 resp_end_html(); 619 } 620 621 static void 622 pg_redirect(const struct req *req, const char *name) 623 { 624 printf("Status: 303 See Other\r\n" 625 "Location: /"); 626 if (*scriptname != '\0') 627 printf("%s/", scriptname); 628 if (strcmp(req->q.manpath, req->p[0])) 629 printf("%s/", req->q.manpath); 630 if (req->q.arch != NULL) 631 printf("%s/", req->q.arch); 632 http_encode(name); 633 if (req->q.sec != NULL) { 634 putchar('.'); 635 http_encode(req->q.sec); 636 } 637 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 638 } 639 640 static void 641 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 642 { 643 char *arch, *archend; 644 const char *sec; 645 size_t i, iuse; 646 int archprio, archpriouse; 647 int prio, priouse; 648 int have_header; 649 650 for (i = 0; i < sz; i++) { 651 if (validate_filename(r[i].file)) 652 continue; 653 warnx("invalid filename %s in %s database", 654 r[i].file, req->q.manpath); 655 pg_error_internal(); 656 return; 657 } 658 659 if (req->isquery && sz == 1) { 660 /* 661 * If we have just one result, then jump there now 662 * without any delay. 663 */ 664 printf("Status: 303 See Other\r\n" 665 "Location: /"); 666 if (*scriptname != '\0') 667 printf("%s/", scriptname); 668 if (strcmp(req->q.manpath, req->p[0])) 669 printf("%s/", req->q.manpath); 670 printf("%s\r\n" 671 "Content-Type: text/html; charset=utf-8\r\n\r\n", 672 r[0].file); 673 return; 674 } 675 676 /* 677 * In man(1) mode, show one of the pages 678 * even if more than one is found. 679 */ 680 681 iuse = 0; 682 if (req->q.equal || sz == 1) { 683 priouse = 20; 684 archpriouse = 3; 685 for (i = 0; i < sz; i++) { 686 sec = r[i].file; 687 sec += strcspn(sec, "123456789"); 688 if (sec[0] == '\0') 689 continue; 690 prio = sec_prios[sec[0] - '1']; 691 if (sec[1] != '/') 692 prio += 10; 693 if (req->q.arch == NULL) { 694 archprio = 695 ((arch = strchr(sec + 1, '/')) 696 == NULL) ? 3 : 697 ((archend = strchr(arch + 1, '/')) 698 == NULL) ? 0 : 699 strncmp(arch, "amd64/", 700 archend - arch) ? 2 : 1; 701 if (archprio < archpriouse) { 702 archpriouse = archprio; 703 priouse = prio; 704 iuse = i; 705 continue; 706 } 707 if (archprio > archpriouse) 708 continue; 709 } 710 if (prio >= priouse) 711 continue; 712 priouse = prio; 713 iuse = i; 714 } 715 have_header = resp_begin_html(200, NULL, r[iuse].file); 716 } else 717 have_header = resp_begin_html(200, NULL, NULL); 718 719 if (have_header == 0) 720 puts("<header>"); 721 resp_searchform(req, 722 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 723 puts("</header>"); 724 725 if (sz > 1) { 726 puts("<nav>"); 727 puts("<table class=\"results\">"); 728 for (i = 0; i < sz; i++) { 729 printf(" <tr>\n" 730 " <td>" 731 "<a class=\"Xr\" href=\"/"); 732 if (*scriptname != '\0') 733 printf("%s/", scriptname); 734 if (strcmp(req->q.manpath, req->p[0])) 735 printf("%s/", req->q.manpath); 736 printf("%s\">", r[i].file); 737 html_print(r[i].names); 738 printf("</a></td>\n" 739 " <td><span class=\"Nd\">"); 740 html_print(r[i].output); 741 puts("</span></td>\n" 742 " </tr>"); 743 } 744 puts("</table>"); 745 puts("</nav>"); 746 } 747 748 if (req->q.equal || sz == 1) { 749 puts("<hr>"); 750 resp_show(req, r[iuse].file); 751 } 752 753 resp_end_html(); 754 } 755 756 static void 757 resp_catman(const struct req *req, const char *file) 758 { 759 FILE *f; 760 char *p; 761 size_t sz; 762 ssize_t len; 763 int i; 764 int italic, bold; 765 766 if ((f = fopen(file, "r")) == NULL) { 767 puts("<p role=\"doc-notice\">\n" 768 " You specified an invalid manual file.\n" 769 "</p>"); 770 return; 771 } 772 773 puts("<div class=\"catman\">\n" 774 "<pre>"); 775 776 p = NULL; 777 sz = 0; 778 779 while ((len = getline(&p, &sz, f)) != -1) { 780 bold = italic = 0; 781 for (i = 0; i < len - 1; i++) { 782 /* 783 * This means that the catpage is out of state. 784 * Ignore it and keep going (although the 785 * catpage is bogus). 786 */ 787 788 if ('\b' == p[i] || '\n' == p[i]) 789 continue; 790 791 /* 792 * Print a regular character. 793 * Close out any bold/italic scopes. 794 * If we're in back-space mode, make sure we'll 795 * have something to enter when we backspace. 796 */ 797 798 if ('\b' != p[i + 1]) { 799 if (italic) 800 printf("</i>"); 801 if (bold) 802 printf("</b>"); 803 italic = bold = 0; 804 html_putchar(p[i]); 805 continue; 806 } else if (i + 2 >= len) 807 continue; 808 809 /* Italic mode. */ 810 811 if ('_' == p[i]) { 812 if (bold) 813 printf("</b>"); 814 if ( ! italic) 815 printf("<i>"); 816 bold = 0; 817 italic = 1; 818 i += 2; 819 html_putchar(p[i]); 820 continue; 821 } 822 823 /* 824 * Handle funny behaviour troff-isms. 825 * These grok'd from the original man2html.c. 826 */ 827 828 if (('+' == p[i] && 'o' == p[i + 2]) || 829 ('o' == p[i] && '+' == p[i + 2]) || 830 ('|' == p[i] && '=' == p[i + 2]) || 831 ('=' == p[i] && '|' == p[i + 2]) || 832 ('*' == p[i] && '=' == p[i + 2]) || 833 ('=' == p[i] && '*' == p[i + 2]) || 834 ('*' == p[i] && '|' == p[i + 2]) || 835 ('|' == p[i] && '*' == p[i + 2])) { 836 if (italic) 837 printf("</i>"); 838 if (bold) 839 printf("</b>"); 840 italic = bold = 0; 841 putchar('*'); 842 i += 2; 843 continue; 844 } else if (('|' == p[i] && '-' == p[i + 2]) || 845 ('-' == p[i] && '|' == p[i + 1]) || 846 ('+' == p[i] && '-' == p[i + 1]) || 847 ('-' == p[i] && '+' == p[i + 1]) || 848 ('+' == p[i] && '|' == p[i + 1]) || 849 ('|' == p[i] && '+' == p[i + 1])) { 850 if (italic) 851 printf("</i>"); 852 if (bold) 853 printf("</b>"); 854 italic = bold = 0; 855 putchar('+'); 856 i += 2; 857 continue; 858 } 859 860 /* Bold mode. */ 861 862 if (italic) 863 printf("</i>"); 864 if ( ! bold) 865 printf("<b>"); 866 bold = 1; 867 italic = 0; 868 i += 2; 869 html_putchar(p[i]); 870 } 871 872 /* 873 * Clean up the last character. 874 * We can get to a newline; don't print that. 875 */ 876 877 if (italic) 878 printf("</i>"); 879 if (bold) 880 printf("</b>"); 881 882 if (i == len - 1 && p[i] != '\n') 883 html_putchar(p[i]); 884 885 putchar('\n'); 886 } 887 free(p); 888 889 puts("</pre>\n" 890 "</div>"); 891 892 fclose(f); 893 } 894 895 static void 896 resp_format(const struct req *req, const char *file) 897 { 898 struct manoutput conf; 899 struct mparse *mp; 900 struct roff_meta *meta; 901 void *vp; 902 int fd; 903 int usepath; 904 905 if (-1 == (fd = open(file, O_RDONLY))) { 906 puts("<p role=\"doc-notice\">\n" 907 " You specified an invalid manual file.\n" 908 "</p>"); 909 return; 910 } 911 912 mchars_alloc(); 913 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 914 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 915 mparse_readfd(mp, fd, file); 916 close(fd); 917 meta = mparse_result(mp); 918 919 memset(&conf, 0, sizeof(conf)); 920 conf.fragment = 1; 921 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 922 usepath = strcmp(req->q.manpath, req->p[0]); 923 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 924 scriptname, *scriptname == '\0' ? "" : "/", 925 usepath ? req->q.manpath : "", usepath ? "/" : ""); 926 927 vp = html_alloc(&conf); 928 if (meta->macroset == MACROSET_MDOC) 929 html_mdoc(vp, meta); 930 else 931 html_man(vp, meta); 932 933 html_free(vp); 934 mparse_free(mp); 935 mchars_free(); 936 free(conf.man); 937 free(conf.style); 938 } 939 940 static void 941 resp_show(const struct req *req, const char *file) 942 { 943 944 if ('.' == file[0] && '/' == file[1]) 945 file += 2; 946 947 if ('c' == *file) 948 resp_catman(req, file); 949 else 950 resp_format(req, file); 951 } 952 953 static void 954 pg_show(struct req *req, const char *fullpath) 955 { 956 char *manpath; 957 const char *file; 958 959 if ((file = strchr(fullpath, '/')) == NULL) { 960 pg_error_badrequest( 961 "You did not specify a page to show."); 962 return; 963 } 964 manpath = mandoc_strndup(fullpath, file - fullpath); 965 file++; 966 967 if ( ! validate_manpath(req, manpath)) { 968 pg_error_badrequest( 969 "You specified an invalid manpath."); 970 free(manpath); 971 return; 972 } 973 974 /* 975 * Begin by chdir()ing into the manpath. 976 * This way we can pick up the database files, which are 977 * relative to the manpath root. 978 */ 979 980 if (chdir(manpath) == -1) { 981 warn("chdir %s", manpath); 982 pg_error_internal(); 983 free(manpath); 984 return; 985 } 986 free(manpath); 987 988 if ( ! validate_filename(file)) { 989 pg_error_badrequest( 990 "You specified an invalid manual file."); 991 return; 992 } 993 994 if (resp_begin_html(200, NULL, file) == 0) 995 puts("<header>"); 996 resp_searchform(req, FOCUS_NONE); 997 puts("</header>"); 998 resp_show(req, file); 999 resp_end_html(); 1000 } 1001 1002 static void 1003 pg_search(const struct req *req) 1004 { 1005 struct mansearch search; 1006 struct manpaths paths; 1007 struct manpage *res; 1008 char **argv; 1009 char *query, *rp, *wp; 1010 size_t ressz; 1011 int argc; 1012 1013 /* 1014 * Begin by chdir()ing into the root of the manpath. 1015 * This way we can pick up the database files, which are 1016 * relative to the manpath root. 1017 */ 1018 1019 if (chdir(req->q.manpath) == -1) { 1020 warn("chdir %s", req->q.manpath); 1021 pg_error_internal(); 1022 return; 1023 } 1024 1025 search.arch = req->q.arch; 1026 search.sec = req->q.sec; 1027 search.outkey = "Nd"; 1028 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 1029 search.firstmatch = 1; 1030 1031 paths.sz = 1; 1032 paths.paths = mandoc_malloc(sizeof(char *)); 1033 paths.paths[0] = mandoc_strdup("."); 1034 1035 /* 1036 * Break apart at spaces with backslash-escaping. 1037 */ 1038 1039 argc = 0; 1040 argv = NULL; 1041 rp = query = mandoc_strdup(req->q.query); 1042 for (;;) { 1043 while (isspace((unsigned char)*rp)) 1044 rp++; 1045 if (*rp == '\0') 1046 break; 1047 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 1048 argv[argc++] = wp = rp; 1049 for (;;) { 1050 if (isspace((unsigned char)*rp)) { 1051 *wp = '\0'; 1052 rp++; 1053 break; 1054 } 1055 if (rp[0] == '\\' && rp[1] != '\0') 1056 rp++; 1057 if (wp != rp) 1058 *wp = *rp; 1059 if (*rp == '\0') 1060 break; 1061 wp++; 1062 rp++; 1063 } 1064 } 1065 1066 res = NULL; 1067 ressz = 0; 1068 if (req->isquery && req->q.equal && argc == 1) 1069 pg_redirect(req, argv[0]); 1070 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1071 pg_noresult(req, 400, "Bad Request", 1072 "You entered an invalid query."); 1073 else if (ressz == 0) 1074 pg_noresult(req, 404, "Not Found", "No results found."); 1075 else 1076 pg_searchres(req, res, ressz); 1077 1078 free(query); 1079 mansearch_free(res, ressz); 1080 free(paths.paths[0]); 1081 free(paths.paths); 1082 } 1083 1084 int 1085 main(void) 1086 { 1087 struct req req; 1088 struct itimerval itimer; 1089 const char *path; 1090 const char *querystring; 1091 int i; 1092 1093 /* 1094 * The "rpath" pledge could be revoked after mparse_readfd() 1095 * if the file descriptor to "/footer.html" would be opened 1096 * up front, but it's probably not worth the complication 1097 * of the code it would cause: it would require scattering 1098 * pledge() calls in multiple low-level resp_*() functions. 1099 */ 1100 1101 if (pledge("stdio rpath", NULL) == -1) { 1102 warn("pledge"); 1103 pg_error_internal(); 1104 return EXIT_FAILURE; 1105 } 1106 1107 /* Poor man's ReDoS mitigation. */ 1108 1109 itimer.it_value.tv_sec = 2; 1110 itimer.it_value.tv_usec = 0; 1111 itimer.it_interval.tv_sec = 2; 1112 itimer.it_interval.tv_usec = 0; 1113 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1114 warn("setitimer"); 1115 pg_error_internal(); 1116 return EXIT_FAILURE; 1117 } 1118 1119 /* 1120 * First we change directory into the MAN_DIR so that 1121 * subsequent scanning for manpath directories is rooted 1122 * relative to the same position. 1123 */ 1124 1125 if (chdir(MAN_DIR) == -1) { 1126 warn("MAN_DIR: %s", MAN_DIR); 1127 pg_error_internal(); 1128 return EXIT_FAILURE; 1129 } 1130 1131 memset(&req, 0, sizeof(struct req)); 1132 req.q.equal = 1; 1133 parse_manpath_conf(&req); 1134 1135 /* Parse the path info and the query string. */ 1136 1137 if ((path = getenv("PATH_INFO")) == NULL) 1138 path = ""; 1139 else if (*path == '/') 1140 path++; 1141 1142 if (*path != '\0') { 1143 parse_path_info(&req, path); 1144 if (req.q.manpath == NULL || req.q.sec == NULL || 1145 *req.q.query == '\0' || access(path, F_OK) == -1) 1146 path = ""; 1147 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1148 parse_query_string(&req, querystring); 1149 1150 /* Validate parsed data and add defaults. */ 1151 1152 if (req.q.manpath == NULL) 1153 req.q.manpath = mandoc_strdup(req.p[0]); 1154 else if ( ! validate_manpath(&req, req.q.manpath)) { 1155 pg_error_badrequest( 1156 "You specified an invalid manpath."); 1157 return EXIT_FAILURE; 1158 } 1159 1160 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1161 pg_error_badrequest( 1162 "You specified an invalid architecture."); 1163 return EXIT_FAILURE; 1164 } 1165 1166 /* Dispatch to the three different pages. */ 1167 1168 if ('\0' != *path) 1169 pg_show(&req, path); 1170 else if (NULL != req.q.query) 1171 pg_search(&req); 1172 else 1173 pg_index(&req); 1174 1175 free(req.q.manpath); 1176 free(req.q.arch); 1177 free(req.q.sec); 1178 free(req.q.query); 1179 for (i = 0; i < (int)req.psz; i++) 1180 free(req.p[i]); 1181 free(req.p); 1182 return EXIT_SUCCESS; 1183 } 1184 1185 /* 1186 * Translate PATH_INFO to a query. 1187 */ 1188 static void 1189 parse_path_info(struct req *req, const char *path) 1190 { 1191 const char *name, *sec, *end; 1192 1193 req->isquery = 0; 1194 req->q.equal = 1; 1195 req->q.manpath = NULL; 1196 req->q.arch = NULL; 1197 1198 /* Mandatory manual page name. */ 1199 if ((name = strrchr(path, '/')) == NULL) 1200 name = path; 1201 else 1202 name++; 1203 1204 /* Optional trailing section. */ 1205 sec = strrchr(name, '.'); 1206 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1207 req->q.query = mandoc_strndup(name, sec - name - 1); 1208 req->q.sec = mandoc_strdup(sec); 1209 } else { 1210 req->q.query = mandoc_strdup(name); 1211 req->q.sec = NULL; 1212 } 1213 1214 /* Handle the case of name[.section] only. */ 1215 if (name == path) 1216 return; 1217 1218 /* Optional manpath. */ 1219 end = strchr(path, '/'); 1220 req->q.manpath = mandoc_strndup(path, end - path); 1221 if (validate_manpath(req, req->q.manpath)) { 1222 path = end + 1; 1223 if (name == path) 1224 return; 1225 } else { 1226 free(req->q.manpath); 1227 req->q.manpath = NULL; 1228 } 1229 1230 /* Optional section. */ 1231 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1232 path += 3; 1233 end = strchr(path, '/'); 1234 free(req->q.sec); 1235 req->q.sec = mandoc_strndup(path, end - path); 1236 path = end + 1; 1237 if (name == path) 1238 return; 1239 } 1240 1241 /* Optional architecture. */ 1242 end = strchr(path, '/'); 1243 if (end + 1 != name) { 1244 pg_error_badrequest( 1245 "You specified too many directory components."); 1246 exit(EXIT_FAILURE); 1247 } 1248 req->q.arch = mandoc_strndup(path, end - path); 1249 if (validate_arch(req->q.arch) == 0) { 1250 pg_error_badrequest( 1251 "You specified an invalid directory component."); 1252 exit(EXIT_FAILURE); 1253 } 1254 } 1255 1256 /* 1257 * Scan for indexable paths. 1258 */ 1259 static void 1260 parse_manpath_conf(struct req *req) 1261 { 1262 FILE *fp; 1263 char *dp; 1264 size_t dpsz; 1265 ssize_t len; 1266 1267 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1268 warn("%s/manpath.conf", MAN_DIR); 1269 pg_error_internal(); 1270 exit(EXIT_FAILURE); 1271 } 1272 1273 dp = NULL; 1274 dpsz = 0; 1275 1276 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1277 if (dp[len - 1] == '\n') 1278 dp[--len] = '\0'; 1279 req->p = mandoc_realloc(req->p, 1280 (req->psz + 1) * sizeof(char *)); 1281 if ( ! validate_urifrag(dp)) { 1282 warnx("%s/manpath.conf contains " 1283 "unsafe path \"%s\"", MAN_DIR, dp); 1284 pg_error_internal(); 1285 exit(EXIT_FAILURE); 1286 } 1287 if (strchr(dp, '/') != NULL) { 1288 warnx("%s/manpath.conf contains " 1289 "path with slash \"%s\"", MAN_DIR, dp); 1290 pg_error_internal(); 1291 exit(EXIT_FAILURE); 1292 } 1293 req->p[req->psz++] = dp; 1294 dp = NULL; 1295 dpsz = 0; 1296 } 1297 free(dp); 1298 1299 if (req->p == NULL) { 1300 warnx("%s/manpath.conf is empty", MAN_DIR); 1301 pg_error_internal(); 1302 exit(EXIT_FAILURE); 1303 } 1304 } 1305