1 /* $OpenBSD: cgi.c,v 1.107 2019/11/10 22:18:01 bentley Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "mandoc_parse.h" 38 #include "main.h" 39 #include "manconf.h" 40 #include "mansearch.h" 41 #include "cgi.h" 42 43 /* 44 * A query as passed to the search function. 45 */ 46 struct query { 47 char *manpath; /* desired manual directory */ 48 char *arch; /* architecture */ 49 char *sec; /* manual section */ 50 char *query; /* unparsed query expression */ 51 int equal; /* match whole names, not substrings */ 52 }; 53 54 struct req { 55 struct query q; 56 char **p; /* array of available manpaths */ 57 size_t psz; /* number of available manpaths */ 58 int isquery; /* QUERY_STRING used, not PATH_INFO */ 59 }; 60 61 enum focus { 62 FOCUS_NONE = 0, 63 FOCUS_QUERY 64 }; 65 66 static void html_print(const char *); 67 static void html_putchar(char); 68 static int http_decode(char *); 69 static void http_encode(const char *p); 70 static void parse_manpath_conf(struct req *); 71 static void parse_path_info(struct req *req, const char *path); 72 static void parse_query_string(struct req *, const char *); 73 static void pg_error_badrequest(const char *); 74 static void pg_error_internal(void); 75 static void pg_index(const struct req *); 76 static void pg_noresult(const struct req *, int, const char *, 77 const char *); 78 static void pg_redirect(const struct req *, const char *); 79 static void pg_search(const struct req *); 80 static void pg_searchres(const struct req *, 81 struct manpage *, size_t); 82 static void pg_show(struct req *, const char *); 83 static void resp_begin_html(int, const char *, const char *); 84 static void resp_begin_http(int, const char *); 85 static void resp_catman(const struct req *, const char *); 86 static void resp_copy(const char *); 87 static void resp_end_html(void); 88 static void resp_format(const struct req *, const char *); 89 static void resp_searchform(const struct req *, enum focus); 90 static void resp_show(const struct req *, const char *); 91 static void set_query_attr(char **, char **); 92 static int validate_arch(const char *); 93 static int validate_filename(const char *); 94 static int validate_manpath(const struct req *, const char *); 95 static int validate_urifrag(const char *); 96 97 static const char *scriptname = SCRIPT_NAME; 98 99 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 100 static const char *const sec_numbers[] = { 101 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 102 }; 103 static const char *const sec_names[] = { 104 "All Sections", 105 "1 - General Commands", 106 "2 - System Calls", 107 "3 - Library Functions", 108 "3p - Perl Library", 109 "4 - Device Drivers", 110 "5 - File Formats", 111 "6 - Games", 112 "7 - Miscellaneous Information", 113 "8 - System Manager\'s Manual", 114 "9 - Kernel Developer\'s Manual" 115 }; 116 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 117 118 static const char *const arch_names[] = { 119 "amd64", "alpha", "armv7", "arm64", 120 "hppa", "i386", "landisk", 121 "loongson", "luna88k", "macppc", "mips64", 122 "octeon", "sgi", "socppc", "sparc64", 123 "amiga", "arc", "armish", "arm32", 124 "atari", "aviion", "beagle", "cats", 125 "hppa64", "hp300", 126 "ia64", "mac68k", "mvme68k", "mvme88k", 127 "mvmeppc", "palm", "pc532", "pegasos", 128 "pmax", "powerpc", "solbourne", "sparc", 129 "sun3", "vax", "wgrisc", "x68k", 130 "zaurus" 131 }; 132 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 133 134 /* 135 * Print a character, escaping HTML along the way. 136 * This will pass non-ASCII straight to output: be warned! 137 */ 138 static void 139 html_putchar(char c) 140 { 141 142 switch (c) { 143 case '"': 144 printf("""); 145 break; 146 case '&': 147 printf("&"); 148 break; 149 case '>': 150 printf(">"); 151 break; 152 case '<': 153 printf("<"); 154 break; 155 default: 156 putchar((unsigned char)c); 157 break; 158 } 159 } 160 161 /* 162 * Call through to html_putchar(). 163 * Accepts NULL strings. 164 */ 165 static void 166 html_print(const char *p) 167 { 168 169 if (NULL == p) 170 return; 171 while ('\0' != *p) 172 html_putchar(*p++); 173 } 174 175 /* 176 * Transfer the responsibility for the allocated string *val 177 * to the query structure. 178 */ 179 static void 180 set_query_attr(char **attr, char **val) 181 { 182 183 free(*attr); 184 if (**val == '\0') { 185 *attr = NULL; 186 free(*val); 187 } else 188 *attr = *val; 189 *val = NULL; 190 } 191 192 /* 193 * Parse the QUERY_STRING for key-value pairs 194 * and store the values into the query structure. 195 */ 196 static void 197 parse_query_string(struct req *req, const char *qs) 198 { 199 char *key, *val; 200 size_t keysz, valsz; 201 202 req->isquery = 1; 203 req->q.manpath = NULL; 204 req->q.arch = NULL; 205 req->q.sec = NULL; 206 req->q.query = NULL; 207 req->q.equal = 1; 208 209 key = val = NULL; 210 while (*qs != '\0') { 211 212 /* Parse one key. */ 213 214 keysz = strcspn(qs, "=;&"); 215 key = mandoc_strndup(qs, keysz); 216 qs += keysz; 217 if (*qs != '=') 218 goto next; 219 220 /* Parse one value. */ 221 222 valsz = strcspn(++qs, ";&"); 223 val = mandoc_strndup(qs, valsz); 224 qs += valsz; 225 226 /* Decode and catch encoding errors. */ 227 228 if ( ! (http_decode(key) && http_decode(val))) 229 goto next; 230 231 /* Handle key-value pairs. */ 232 233 if ( ! strcmp(key, "query")) 234 set_query_attr(&req->q.query, &val); 235 236 else if ( ! strcmp(key, "apropos")) 237 req->q.equal = !strcmp(val, "0"); 238 239 else if ( ! strcmp(key, "manpath")) { 240 #ifdef COMPAT_OLDURI 241 if ( ! strncmp(val, "OpenBSD ", 8)) { 242 val[7] = '-'; 243 if ('C' == val[8]) 244 val[8] = 'c'; 245 } 246 #endif 247 set_query_attr(&req->q.manpath, &val); 248 } 249 250 else if ( ! (strcmp(key, "sec") 251 #ifdef COMPAT_OLDURI 252 && strcmp(key, "sektion") 253 #endif 254 )) { 255 if ( ! strcmp(val, "0")) 256 *val = '\0'; 257 set_query_attr(&req->q.sec, &val); 258 } 259 260 else if ( ! strcmp(key, "arch")) { 261 if ( ! strcmp(val, "default")) 262 *val = '\0'; 263 set_query_attr(&req->q.arch, &val); 264 } 265 266 /* 267 * The key must be freed in any case. 268 * The val may have been handed over to the query 269 * structure, in which case it is now NULL. 270 */ 271 next: 272 free(key); 273 key = NULL; 274 free(val); 275 val = NULL; 276 277 if (*qs != '\0') 278 qs++; 279 } 280 } 281 282 /* 283 * HTTP-decode a string. The standard explanation is that this turns 284 * "%4e+foo" into "n foo" in the regular way. This is done in-place 285 * over the allocated string. 286 */ 287 static int 288 http_decode(char *p) 289 { 290 char hex[3]; 291 char *q; 292 int c; 293 294 hex[2] = '\0'; 295 296 q = p; 297 for ( ; '\0' != *p; p++, q++) { 298 if ('%' == *p) { 299 if ('\0' == (hex[0] = *(p + 1))) 300 return 0; 301 if ('\0' == (hex[1] = *(p + 2))) 302 return 0; 303 if (1 != sscanf(hex, "%x", &c)) 304 return 0; 305 if ('\0' == c) 306 return 0; 307 308 *q = (char)c; 309 p += 2; 310 } else 311 *q = '+' == *p ? ' ' : *p; 312 } 313 314 *q = '\0'; 315 return 1; 316 } 317 318 static void 319 http_encode(const char *p) 320 { 321 for (; *p != '\0'; p++) { 322 if (isalnum((unsigned char)*p) == 0 && 323 strchr("-._~", *p) == NULL) 324 printf("%%%2.2X", (unsigned char)*p); 325 else 326 putchar(*p); 327 } 328 } 329 330 static void 331 resp_begin_http(int code, const char *msg) 332 { 333 334 if (200 != code) 335 printf("Status: %d %s\r\n", code, msg); 336 337 printf("Content-Type: text/html; charset=utf-8\r\n" 338 "Cache-Control: no-cache\r\n" 339 "Content-Security-Policy: default-src 'none'; " 340 "style-src 'self' 'unsafe-inline'\r\n" 341 "Pragma: no-cache\r\n" 342 "\r\n"); 343 344 fflush(stdout); 345 } 346 347 static void 348 resp_copy(const char *filename) 349 { 350 char buf[4096]; 351 ssize_t sz; 352 int fd; 353 354 if ((fd = open(filename, O_RDONLY)) != -1) { 355 fflush(stdout); 356 while ((sz = read(fd, buf, sizeof(buf))) > 0) 357 write(STDOUT_FILENO, buf, sz); 358 close(fd); 359 } 360 } 361 362 static void 363 resp_begin_html(int code, const char *msg, const char *file) 364 { 365 char *cp; 366 367 resp_begin_http(code, msg); 368 369 printf("<!DOCTYPE html>\n" 370 "<html>\n" 371 "<head>\n" 372 " <meta charset=\"UTF-8\"/>\n" 373 " <meta name=\"viewport\"" 374 " content=\"width=device-width, initial-scale=1.0\">\n" 375 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 376 " type=\"text/css\" media=\"all\">\n" 377 " <title>", 378 CSS_DIR); 379 if (file != NULL) { 380 if ((cp = strrchr(file, '/')) != NULL) 381 file = cp + 1; 382 if ((cp = strrchr(file, '.')) != NULL) { 383 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); 384 } else 385 printf("%s - ", file); 386 } 387 printf("%s</title>\n" 388 "</head>\n" 389 "<body>\n", 390 CUSTOMIZE_TITLE); 391 392 resp_copy(MAN_DIR "/header.html"); 393 } 394 395 static void 396 resp_end_html(void) 397 { 398 399 resp_copy(MAN_DIR "/footer.html"); 400 401 puts("</body>\n" 402 "</html>"); 403 } 404 405 static void 406 resp_searchform(const struct req *req, enum focus focus) 407 { 408 int i; 409 410 printf("<form action=\"/%s\" method=\"get\">\n" 411 " <fieldset>\n" 412 " <legend>Manual Page Search Parameters</legend>\n", 413 scriptname); 414 415 /* Write query input box. */ 416 417 printf(" <input type=\"search\" name=\"query\" value=\""); 418 if (req->q.query != NULL) 419 html_print(req->q.query); 420 printf( "\" size=\"40\""); 421 if (focus == FOCUS_QUERY) 422 printf(" autofocus"); 423 puts(">"); 424 425 /* Write submission buttons. */ 426 427 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 428 "man</button>\n" 429 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 430 "apropos</button>\n" 431 " <br/>\n"); 432 433 /* Write section selector. */ 434 435 puts(" <select name=\"sec\">"); 436 for (i = 0; i < sec_MAX; i++) { 437 printf(" <option value=\"%s\"", sec_numbers[i]); 438 if (NULL != req->q.sec && 439 0 == strcmp(sec_numbers[i], req->q.sec)) 440 printf(" selected=\"selected\""); 441 printf(">%s</option>\n", sec_names[i]); 442 } 443 puts(" </select>"); 444 445 /* Write architecture selector. */ 446 447 printf( " <select name=\"arch\">\n" 448 " <option value=\"default\""); 449 if (NULL == req->q.arch) 450 printf(" selected=\"selected\""); 451 puts(">All Architectures</option>"); 452 for (i = 0; i < arch_MAX; i++) { 453 printf(" <option"); 454 if (NULL != req->q.arch && 455 0 == strcmp(arch_names[i], req->q.arch)) 456 printf(" selected=\"selected\""); 457 printf(">%s</option>\n", arch_names[i]); 458 } 459 puts(" </select>"); 460 461 /* Write manpath selector. */ 462 463 if (req->psz > 1) { 464 puts(" <select name=\"manpath\">"); 465 for (i = 0; i < (int)req->psz; i++) { 466 printf(" <option"); 467 if (strcmp(req->q.manpath, req->p[i]) == 0) 468 printf(" selected=\"selected\""); 469 printf(">"); 470 html_print(req->p[i]); 471 puts("</option>"); 472 } 473 puts(" </select>"); 474 } 475 476 puts(" </fieldset>\n" 477 "</form>"); 478 } 479 480 static int 481 validate_urifrag(const char *frag) 482 { 483 484 while ('\0' != *frag) { 485 if ( ! (isalnum((unsigned char)*frag) || 486 '-' == *frag || '.' == *frag || 487 '/' == *frag || '_' == *frag)) 488 return 0; 489 frag++; 490 } 491 return 1; 492 } 493 494 static int 495 validate_manpath(const struct req *req, const char* manpath) 496 { 497 size_t i; 498 499 for (i = 0; i < req->psz; i++) 500 if ( ! strcmp(manpath, req->p[i])) 501 return 1; 502 503 return 0; 504 } 505 506 static int 507 validate_arch(const char *arch) 508 { 509 int i; 510 511 for (i = 0; i < arch_MAX; i++) 512 if (strcmp(arch, arch_names[i]) == 0) 513 return 1; 514 515 return 0; 516 } 517 518 static int 519 validate_filename(const char *file) 520 { 521 522 if ('.' == file[0] && '/' == file[1]) 523 file += 2; 524 525 return ! (strstr(file, "../") || strstr(file, "/..") || 526 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 527 } 528 529 static void 530 pg_index(const struct req *req) 531 { 532 533 resp_begin_html(200, NULL, NULL); 534 resp_searchform(req, FOCUS_QUERY); 535 printf("<p>\n" 536 "This web interface is documented in the\n" 537 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 538 "manual, and the\n" 539 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 540 "manual explains the query syntax.\n" 541 "</p>\n", 542 scriptname, *scriptname == '\0' ? "" : "/", 543 scriptname, *scriptname == '\0' ? "" : "/"); 544 resp_end_html(); 545 } 546 547 static void 548 pg_noresult(const struct req *req, int code, const char *http_msg, 549 const char *user_msg) 550 { 551 resp_begin_html(code, http_msg, NULL); 552 resp_searchform(req, FOCUS_QUERY); 553 puts("<p>"); 554 puts(user_msg); 555 puts("</p>"); 556 resp_end_html(); 557 } 558 559 static void 560 pg_error_badrequest(const char *msg) 561 { 562 563 resp_begin_html(400, "Bad Request", NULL); 564 puts("<h1>Bad Request</h1>\n" 565 "<p>\n"); 566 puts(msg); 567 printf("Try again from the\n" 568 "<a href=\"/%s\">main page</a>.\n" 569 "</p>", scriptname); 570 resp_end_html(); 571 } 572 573 static void 574 pg_error_internal(void) 575 { 576 resp_begin_html(500, "Internal Server Error", NULL); 577 puts("<p>Internal Server Error</p>"); 578 resp_end_html(); 579 } 580 581 static void 582 pg_redirect(const struct req *req, const char *name) 583 { 584 printf("Status: 303 See Other\r\n" 585 "Location: /"); 586 if (*scriptname != '\0') 587 printf("%s/", scriptname); 588 if (strcmp(req->q.manpath, req->p[0])) 589 printf("%s/", req->q.manpath); 590 if (req->q.arch != NULL) 591 printf("%s/", req->q.arch); 592 http_encode(name); 593 if (req->q.sec != NULL) { 594 putchar('.'); 595 http_encode(req->q.sec); 596 } 597 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 598 } 599 600 static void 601 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 602 { 603 char *arch, *archend; 604 const char *sec; 605 size_t i, iuse; 606 int archprio, archpriouse; 607 int prio, priouse; 608 609 for (i = 0; i < sz; i++) { 610 if (validate_filename(r[i].file)) 611 continue; 612 warnx("invalid filename %s in %s database", 613 r[i].file, req->q.manpath); 614 pg_error_internal(); 615 return; 616 } 617 618 if (req->isquery && sz == 1) { 619 /* 620 * If we have just one result, then jump there now 621 * without any delay. 622 */ 623 printf("Status: 303 See Other\r\n" 624 "Location: /"); 625 if (*scriptname != '\0') 626 printf("%s/", scriptname); 627 if (strcmp(req->q.manpath, req->p[0])) 628 printf("%s/", req->q.manpath); 629 printf("%s\r\n" 630 "Content-Type: text/html; charset=utf-8\r\n\r\n", 631 r[0].file); 632 return; 633 } 634 635 /* 636 * In man(1) mode, show one of the pages 637 * even if more than one is found. 638 */ 639 640 iuse = 0; 641 if (req->q.equal || sz == 1) { 642 priouse = 20; 643 archpriouse = 3; 644 for (i = 0; i < sz; i++) { 645 sec = r[i].file; 646 sec += strcspn(sec, "123456789"); 647 if (sec[0] == '\0') 648 continue; 649 prio = sec_prios[sec[0] - '1']; 650 if (sec[1] != '/') 651 prio += 10; 652 if (req->q.arch == NULL) { 653 archprio = 654 ((arch = strchr(sec + 1, '/')) 655 == NULL) ? 3 : 656 ((archend = strchr(arch + 1, '/')) 657 == NULL) ? 0 : 658 strncmp(arch, "amd64/", 659 archend - arch) ? 2 : 1; 660 if (archprio < archpriouse) { 661 archpriouse = archprio; 662 priouse = prio; 663 iuse = i; 664 continue; 665 } 666 if (archprio > archpriouse) 667 continue; 668 } 669 if (prio >= priouse) 670 continue; 671 priouse = prio; 672 iuse = i; 673 } 674 resp_begin_html(200, NULL, r[iuse].file); 675 } else 676 resp_begin_html(200, NULL, NULL); 677 678 resp_searchform(req, 679 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 680 681 if (sz > 1) { 682 puts("<table class=\"results\">"); 683 for (i = 0; i < sz; i++) { 684 printf(" <tr>\n" 685 " <td>" 686 "<a class=\"Xr\" href=\"/"); 687 if (*scriptname != '\0') 688 printf("%s/", scriptname); 689 if (strcmp(req->q.manpath, req->p[0])) 690 printf("%s/", req->q.manpath); 691 printf("%s\">", r[i].file); 692 html_print(r[i].names); 693 printf("</a></td>\n" 694 " <td><span class=\"Nd\">"); 695 html_print(r[i].output); 696 puts("</span></td>\n" 697 " </tr>"); 698 } 699 puts("</table>"); 700 } 701 702 if (req->q.equal || sz == 1) { 703 puts("<hr>"); 704 resp_show(req, r[iuse].file); 705 } 706 707 resp_end_html(); 708 } 709 710 static void 711 resp_catman(const struct req *req, const char *file) 712 { 713 FILE *f; 714 char *p; 715 size_t sz; 716 ssize_t len; 717 int i; 718 int italic, bold; 719 720 if ((f = fopen(file, "r")) == NULL) { 721 puts("<p>You specified an invalid manual file.</p>"); 722 return; 723 } 724 725 puts("<div class=\"catman\">\n" 726 "<pre>"); 727 728 p = NULL; 729 sz = 0; 730 731 while ((len = getline(&p, &sz, f)) != -1) { 732 bold = italic = 0; 733 for (i = 0; i < len - 1; i++) { 734 /* 735 * This means that the catpage is out of state. 736 * Ignore it and keep going (although the 737 * catpage is bogus). 738 */ 739 740 if ('\b' == p[i] || '\n' == p[i]) 741 continue; 742 743 /* 744 * Print a regular character. 745 * Close out any bold/italic scopes. 746 * If we're in back-space mode, make sure we'll 747 * have something to enter when we backspace. 748 */ 749 750 if ('\b' != p[i + 1]) { 751 if (italic) 752 printf("</i>"); 753 if (bold) 754 printf("</b>"); 755 italic = bold = 0; 756 html_putchar(p[i]); 757 continue; 758 } else if (i + 2 >= len) 759 continue; 760 761 /* Italic mode. */ 762 763 if ('_' == p[i]) { 764 if (bold) 765 printf("</b>"); 766 if ( ! italic) 767 printf("<i>"); 768 bold = 0; 769 italic = 1; 770 i += 2; 771 html_putchar(p[i]); 772 continue; 773 } 774 775 /* 776 * Handle funny behaviour troff-isms. 777 * These grok'd from the original man2html.c. 778 */ 779 780 if (('+' == p[i] && 'o' == p[i + 2]) || 781 ('o' == p[i] && '+' == p[i + 2]) || 782 ('|' == p[i] && '=' == p[i + 2]) || 783 ('=' == p[i] && '|' == p[i + 2]) || 784 ('*' == p[i] && '=' == p[i + 2]) || 785 ('=' == p[i] && '*' == p[i + 2]) || 786 ('*' == p[i] && '|' == p[i + 2]) || 787 ('|' == p[i] && '*' == p[i + 2])) { 788 if (italic) 789 printf("</i>"); 790 if (bold) 791 printf("</b>"); 792 italic = bold = 0; 793 putchar('*'); 794 i += 2; 795 continue; 796 } else if (('|' == p[i] && '-' == p[i + 2]) || 797 ('-' == p[i] && '|' == p[i + 1]) || 798 ('+' == p[i] && '-' == p[i + 1]) || 799 ('-' == p[i] && '+' == p[i + 1]) || 800 ('+' == p[i] && '|' == p[i + 1]) || 801 ('|' == p[i] && '+' == p[i + 1])) { 802 if (italic) 803 printf("</i>"); 804 if (bold) 805 printf("</b>"); 806 italic = bold = 0; 807 putchar('+'); 808 i += 2; 809 continue; 810 } 811 812 /* Bold mode. */ 813 814 if (italic) 815 printf("</i>"); 816 if ( ! bold) 817 printf("<b>"); 818 bold = 1; 819 italic = 0; 820 i += 2; 821 html_putchar(p[i]); 822 } 823 824 /* 825 * Clean up the last character. 826 * We can get to a newline; don't print that. 827 */ 828 829 if (italic) 830 printf("</i>"); 831 if (bold) 832 printf("</b>"); 833 834 if (i == len - 1 && p[i] != '\n') 835 html_putchar(p[i]); 836 837 putchar('\n'); 838 } 839 free(p); 840 841 puts("</pre>\n" 842 "</div>"); 843 844 fclose(f); 845 } 846 847 static void 848 resp_format(const struct req *req, const char *file) 849 { 850 struct manoutput conf; 851 struct mparse *mp; 852 struct roff_meta *meta; 853 void *vp; 854 int fd; 855 int usepath; 856 857 if (-1 == (fd = open(file, O_RDONLY, 0))) { 858 puts("<p>You specified an invalid manual file.</p>"); 859 return; 860 } 861 862 mchars_alloc(); 863 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 864 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 865 mparse_readfd(mp, fd, file); 866 close(fd); 867 meta = mparse_result(mp); 868 869 memset(&conf, 0, sizeof(conf)); 870 conf.fragment = 1; 871 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 872 usepath = strcmp(req->q.manpath, req->p[0]); 873 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 874 scriptname, *scriptname == '\0' ? "" : "/", 875 usepath ? req->q.manpath : "", usepath ? "/" : ""); 876 877 vp = html_alloc(&conf); 878 if (meta->macroset == MACROSET_MDOC) 879 html_mdoc(vp, meta); 880 else 881 html_man(vp, meta); 882 883 html_free(vp); 884 mparse_free(mp); 885 mchars_free(); 886 free(conf.man); 887 free(conf.style); 888 } 889 890 static void 891 resp_show(const struct req *req, const char *file) 892 { 893 894 if ('.' == file[0] && '/' == file[1]) 895 file += 2; 896 897 if ('c' == *file) 898 resp_catman(req, file); 899 else 900 resp_format(req, file); 901 } 902 903 static void 904 pg_show(struct req *req, const char *fullpath) 905 { 906 char *manpath; 907 const char *file; 908 909 if ((file = strchr(fullpath, '/')) == NULL) { 910 pg_error_badrequest( 911 "You did not specify a page to show."); 912 return; 913 } 914 manpath = mandoc_strndup(fullpath, file - fullpath); 915 file++; 916 917 if ( ! validate_manpath(req, manpath)) { 918 pg_error_badrequest( 919 "You specified an invalid manpath."); 920 free(manpath); 921 return; 922 } 923 924 /* 925 * Begin by chdir()ing into the manpath. 926 * This way we can pick up the database files, which are 927 * relative to the manpath root. 928 */ 929 930 if (chdir(manpath) == -1) { 931 warn("chdir %s", manpath); 932 pg_error_internal(); 933 free(manpath); 934 return; 935 } 936 free(manpath); 937 938 if ( ! validate_filename(file)) { 939 pg_error_badrequest( 940 "You specified an invalid manual file."); 941 return; 942 } 943 944 resp_begin_html(200, NULL, file); 945 resp_searchform(req, FOCUS_NONE); 946 resp_show(req, file); 947 resp_end_html(); 948 } 949 950 static void 951 pg_search(const struct req *req) 952 { 953 struct mansearch search; 954 struct manpaths paths; 955 struct manpage *res; 956 char **argv; 957 char *query, *rp, *wp; 958 size_t ressz; 959 int argc; 960 961 /* 962 * Begin by chdir()ing into the root of the manpath. 963 * This way we can pick up the database files, which are 964 * relative to the manpath root. 965 */ 966 967 if (chdir(req->q.manpath) == -1) { 968 warn("chdir %s", req->q.manpath); 969 pg_error_internal(); 970 return; 971 } 972 973 search.arch = req->q.arch; 974 search.sec = req->q.sec; 975 search.outkey = "Nd"; 976 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 977 search.firstmatch = 1; 978 979 paths.sz = 1; 980 paths.paths = mandoc_malloc(sizeof(char *)); 981 paths.paths[0] = mandoc_strdup("."); 982 983 /* 984 * Break apart at spaces with backslash-escaping. 985 */ 986 987 argc = 0; 988 argv = NULL; 989 rp = query = mandoc_strdup(req->q.query); 990 for (;;) { 991 while (isspace((unsigned char)*rp)) 992 rp++; 993 if (*rp == '\0') 994 break; 995 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 996 argv[argc++] = wp = rp; 997 for (;;) { 998 if (isspace((unsigned char)*rp)) { 999 *wp = '\0'; 1000 rp++; 1001 break; 1002 } 1003 if (rp[0] == '\\' && rp[1] != '\0') 1004 rp++; 1005 if (wp != rp) 1006 *wp = *rp; 1007 if (*rp == '\0') 1008 break; 1009 wp++; 1010 rp++; 1011 } 1012 } 1013 1014 res = NULL; 1015 ressz = 0; 1016 if (req->isquery && req->q.equal && argc == 1) 1017 pg_redirect(req, argv[0]); 1018 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1019 pg_noresult(req, 400, "Bad Request", 1020 "You entered an invalid query."); 1021 else if (ressz == 0) 1022 pg_noresult(req, 404, "Not Found", "No results found."); 1023 else 1024 pg_searchres(req, res, ressz); 1025 1026 free(query); 1027 mansearch_free(res, ressz); 1028 free(paths.paths[0]); 1029 free(paths.paths); 1030 } 1031 1032 int 1033 main(void) 1034 { 1035 struct req req; 1036 struct itimerval itimer; 1037 const char *path; 1038 const char *querystring; 1039 int i; 1040 1041 /* 1042 * The "rpath" pledge could be revoked after mparse_readfd() 1043 * if the file desciptor to "/footer.html" would be opened 1044 * up front, but it's probably not worth the complication 1045 * of the code it would cause: it would require scattering 1046 * pledge() calls in multiple low-level resp_*() functions. 1047 */ 1048 1049 if (pledge("stdio rpath", NULL) == -1) { 1050 warn("pledge"); 1051 pg_error_internal(); 1052 return EXIT_FAILURE; 1053 } 1054 1055 /* Poor man's ReDoS mitigation. */ 1056 1057 itimer.it_value.tv_sec = 2; 1058 itimer.it_value.tv_usec = 0; 1059 itimer.it_interval.tv_sec = 2; 1060 itimer.it_interval.tv_usec = 0; 1061 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1062 warn("setitimer"); 1063 pg_error_internal(); 1064 return EXIT_FAILURE; 1065 } 1066 1067 /* 1068 * First we change directory into the MAN_DIR so that 1069 * subsequent scanning for manpath directories is rooted 1070 * relative to the same position. 1071 */ 1072 1073 if (chdir(MAN_DIR) == -1) { 1074 warn("MAN_DIR: %s", MAN_DIR); 1075 pg_error_internal(); 1076 return EXIT_FAILURE; 1077 } 1078 1079 memset(&req, 0, sizeof(struct req)); 1080 req.q.equal = 1; 1081 parse_manpath_conf(&req); 1082 1083 /* Parse the path info and the query string. */ 1084 1085 if ((path = getenv("PATH_INFO")) == NULL) 1086 path = ""; 1087 else if (*path == '/') 1088 path++; 1089 1090 if (*path != '\0') { 1091 parse_path_info(&req, path); 1092 if (req.q.manpath == NULL || req.q.sec == NULL || 1093 *req.q.query == '\0' || access(path, F_OK) == -1) 1094 path = ""; 1095 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1096 parse_query_string(&req, querystring); 1097 1098 /* Validate parsed data and add defaults. */ 1099 1100 if (req.q.manpath == NULL) 1101 req.q.manpath = mandoc_strdup(req.p[0]); 1102 else if ( ! validate_manpath(&req, req.q.manpath)) { 1103 pg_error_badrequest( 1104 "You specified an invalid manpath."); 1105 return EXIT_FAILURE; 1106 } 1107 1108 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1109 pg_error_badrequest( 1110 "You specified an invalid architecture."); 1111 return EXIT_FAILURE; 1112 } 1113 1114 /* Dispatch to the three different pages. */ 1115 1116 if ('\0' != *path) 1117 pg_show(&req, path); 1118 else if (NULL != req.q.query) 1119 pg_search(&req); 1120 else 1121 pg_index(&req); 1122 1123 free(req.q.manpath); 1124 free(req.q.arch); 1125 free(req.q.sec); 1126 free(req.q.query); 1127 for (i = 0; i < (int)req.psz; i++) 1128 free(req.p[i]); 1129 free(req.p); 1130 return EXIT_SUCCESS; 1131 } 1132 1133 /* 1134 * Translate PATH_INFO to a query. 1135 */ 1136 static void 1137 parse_path_info(struct req *req, const char *path) 1138 { 1139 const char *name, *sec, *end; 1140 1141 req->isquery = 0; 1142 req->q.equal = 1; 1143 req->q.manpath = NULL; 1144 req->q.arch = NULL; 1145 1146 /* Mandatory manual page name. */ 1147 if ((name = strrchr(path, '/')) == NULL) 1148 name = path; 1149 else 1150 name++; 1151 1152 /* Optional trailing section. */ 1153 sec = strrchr(name, '.'); 1154 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1155 req->q.query = mandoc_strndup(name, sec - name - 1); 1156 req->q.sec = mandoc_strdup(sec); 1157 } else { 1158 req->q.query = mandoc_strdup(name); 1159 req->q.sec = NULL; 1160 } 1161 1162 /* Handle the case of name[.section] only. */ 1163 if (name == path) 1164 return; 1165 1166 /* Optional manpath. */ 1167 end = strchr(path, '/'); 1168 req->q.manpath = mandoc_strndup(path, end - path); 1169 if (validate_manpath(req, req->q.manpath)) { 1170 path = end + 1; 1171 if (name == path) 1172 return; 1173 } else { 1174 free(req->q.manpath); 1175 req->q.manpath = NULL; 1176 } 1177 1178 /* Optional section. */ 1179 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1180 path += 3; 1181 end = strchr(path, '/'); 1182 free(req->q.sec); 1183 req->q.sec = mandoc_strndup(path, end - path); 1184 path = end + 1; 1185 if (name == path) 1186 return; 1187 } 1188 1189 /* Optional architecture. */ 1190 end = strchr(path, '/'); 1191 if (end + 1 != name) { 1192 pg_error_badrequest( 1193 "You specified too many directory components."); 1194 exit(EXIT_FAILURE); 1195 } 1196 req->q.arch = mandoc_strndup(path, end - path); 1197 if (validate_arch(req->q.arch) == 0) { 1198 pg_error_badrequest( 1199 "You specified an invalid directory component."); 1200 exit(EXIT_FAILURE); 1201 } 1202 } 1203 1204 /* 1205 * Scan for indexable paths. 1206 */ 1207 static void 1208 parse_manpath_conf(struct req *req) 1209 { 1210 FILE *fp; 1211 char *dp; 1212 size_t dpsz; 1213 ssize_t len; 1214 1215 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1216 warn("%s/manpath.conf", MAN_DIR); 1217 pg_error_internal(); 1218 exit(EXIT_FAILURE); 1219 } 1220 1221 dp = NULL; 1222 dpsz = 0; 1223 1224 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1225 if (dp[len - 1] == '\n') 1226 dp[--len] = '\0'; 1227 req->p = mandoc_realloc(req->p, 1228 (req->psz + 1) * sizeof(char *)); 1229 if ( ! validate_urifrag(dp)) { 1230 warnx("%s/manpath.conf contains " 1231 "unsafe path \"%s\"", MAN_DIR, dp); 1232 pg_error_internal(); 1233 exit(EXIT_FAILURE); 1234 } 1235 if (strchr(dp, '/') != NULL) { 1236 warnx("%s/manpath.conf contains " 1237 "path with slash \"%s\"", MAN_DIR, dp); 1238 pg_error_internal(); 1239 exit(EXIT_FAILURE); 1240 } 1241 req->p[req->psz++] = dp; 1242 dp = NULL; 1243 dpsz = 0; 1244 } 1245 free(dp); 1246 1247 if (req->p == NULL) { 1248 warnx("%s/manpath.conf is empty", MAN_DIR); 1249 pg_error_internal(); 1250 exit(EXIT_FAILURE); 1251 } 1252 } 1253