1 /* $OpenBSD: cgi.c,v 1.77 2016/08/18 00:44:37 jsg Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 #include <sys/time.h> 20 21 #include <ctype.h> 22 #include <err.h> 23 #include <errno.h> 24 #include <fcntl.h> 25 #include <limits.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <unistd.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "mdoc.h" 36 #include "man.h" 37 #include "main.h" 38 #include "manconf.h" 39 #include "mansearch.h" 40 #include "cgi.h" 41 42 /* 43 * A query as passed to the search function. 44 */ 45 struct query { 46 char *manpath; /* desired manual directory */ 47 char *arch; /* architecture */ 48 char *sec; /* manual section */ 49 char *query; /* unparsed query expression */ 50 int equal; /* match whole names, not substrings */ 51 }; 52 53 struct req { 54 struct query q; 55 char **p; /* array of available manpaths */ 56 size_t psz; /* number of available manpaths */ 57 int isquery; /* QUERY_STRING used, not PATH_INFO */ 58 }; 59 60 enum focus { 61 FOCUS_NONE = 0, 62 FOCUS_QUERY 63 }; 64 65 static void html_print(const char *); 66 static void html_putchar(char); 67 static int http_decode(char *); 68 static void parse_manpath_conf(struct req *); 69 static void parse_path_info(struct req *req, const char *path); 70 static void parse_query_string(struct req *, const char *); 71 static void pg_error_badrequest(const char *); 72 static void pg_error_internal(void); 73 static void pg_index(const struct req *); 74 static void pg_noresult(const struct req *, const char *); 75 static void pg_search(const struct req *); 76 static void pg_searchres(const struct req *, 77 struct manpage *, size_t); 78 static void pg_show(struct req *, const char *); 79 static void resp_begin_html(int, const char *); 80 static void resp_begin_http(int, const char *); 81 static void resp_catman(const struct req *, const char *); 82 static void resp_copy(const char *); 83 static void resp_end_html(void); 84 static void resp_format(const struct req *, const char *); 85 static void resp_searchform(const struct req *, enum focus); 86 static void resp_show(const struct req *, const char *); 87 static void set_query_attr(char **, char **); 88 static int validate_filename(const char *); 89 static int validate_manpath(const struct req *, const char *); 90 static int validate_urifrag(const char *); 91 92 static const char *scriptname = SCRIPT_NAME; 93 94 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 95 static const char *const sec_numbers[] = { 96 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 97 }; 98 static const char *const sec_names[] = { 99 "All Sections", 100 "1 - General Commands", 101 "2 - System Calls", 102 "3 - Library Functions", 103 "3p - Perl Library", 104 "4 - Device Drivers", 105 "5 - File Formats", 106 "6 - Games", 107 "7 - Miscellaneous Information", 108 "8 - System Manager\'s Manual", 109 "9 - Kernel Developer\'s Manual" 110 }; 111 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 112 113 static const char *const arch_names[] = { 114 "amd64", "alpha", "armv7", 115 "hppa", "i386", "landisk", 116 "loongson", "luna88k", "macppc", "mips64", 117 "octeon", "sgi", "socppc", "sparc", 118 "sparc64", "zaurus", 119 "amiga", "arc", "armish", "arm32", 120 "atari", "aviion", "beagle", "cats", 121 "hppa64", "hp300", 122 "ia64", "mac68k", "mvme68k", "mvme88k", 123 "mvmeppc", "palm", "pc532", "pegasos", 124 "pmax", "powerpc", "solbourne", "sun3", 125 "vax", "wgrisc", "x68k" 126 }; 127 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 128 129 /* 130 * Print a character, escaping HTML along the way. 131 * This will pass non-ASCII straight to output: be warned! 132 */ 133 static void 134 html_putchar(char c) 135 { 136 137 switch (c) { 138 case ('"'): 139 printf(""e;"); 140 break; 141 case ('&'): 142 printf("&"); 143 break; 144 case ('>'): 145 printf(">"); 146 break; 147 case ('<'): 148 printf("<"); 149 break; 150 default: 151 putchar((unsigned char)c); 152 break; 153 } 154 } 155 156 /* 157 * Call through to html_putchar(). 158 * Accepts NULL strings. 159 */ 160 static void 161 html_print(const char *p) 162 { 163 164 if (NULL == p) 165 return; 166 while ('\0' != *p) 167 html_putchar(*p++); 168 } 169 170 /* 171 * Transfer the responsibility for the allocated string *val 172 * to the query structure. 173 */ 174 static void 175 set_query_attr(char **attr, char **val) 176 { 177 178 free(*attr); 179 if (**val == '\0') { 180 *attr = NULL; 181 free(*val); 182 } else 183 *attr = *val; 184 *val = NULL; 185 } 186 187 /* 188 * Parse the QUERY_STRING for key-value pairs 189 * and store the values into the query structure. 190 */ 191 static void 192 parse_query_string(struct req *req, const char *qs) 193 { 194 char *key, *val; 195 size_t keysz, valsz; 196 197 req->isquery = 1; 198 req->q.manpath = NULL; 199 req->q.arch = NULL; 200 req->q.sec = NULL; 201 req->q.query = NULL; 202 req->q.equal = 1; 203 204 key = val = NULL; 205 while (*qs != '\0') { 206 207 /* Parse one key. */ 208 209 keysz = strcspn(qs, "=;&"); 210 key = mandoc_strndup(qs, keysz); 211 qs += keysz; 212 if (*qs != '=') 213 goto next; 214 215 /* Parse one value. */ 216 217 valsz = strcspn(++qs, ";&"); 218 val = mandoc_strndup(qs, valsz); 219 qs += valsz; 220 221 /* Decode and catch encoding errors. */ 222 223 if ( ! (http_decode(key) && http_decode(val))) 224 goto next; 225 226 /* Handle key-value pairs. */ 227 228 if ( ! strcmp(key, "query")) 229 set_query_attr(&req->q.query, &val); 230 231 else if ( ! strcmp(key, "apropos")) 232 req->q.equal = !strcmp(val, "0"); 233 234 else if ( ! strcmp(key, "manpath")) { 235 #ifdef COMPAT_OLDURI 236 if ( ! strncmp(val, "OpenBSD ", 8)) { 237 val[7] = '-'; 238 if ('C' == val[8]) 239 val[8] = 'c'; 240 } 241 #endif 242 set_query_attr(&req->q.manpath, &val); 243 } 244 245 else if ( ! (strcmp(key, "sec") 246 #ifdef COMPAT_OLDURI 247 && strcmp(key, "sektion") 248 #endif 249 )) { 250 if ( ! strcmp(val, "0")) 251 *val = '\0'; 252 set_query_attr(&req->q.sec, &val); 253 } 254 255 else if ( ! strcmp(key, "arch")) { 256 if ( ! strcmp(val, "default")) 257 *val = '\0'; 258 set_query_attr(&req->q.arch, &val); 259 } 260 261 /* 262 * The key must be freed in any case. 263 * The val may have been handed over to the query 264 * structure, in which case it is now NULL. 265 */ 266 next: 267 free(key); 268 key = NULL; 269 free(val); 270 val = NULL; 271 272 if (*qs != '\0') 273 qs++; 274 } 275 } 276 277 /* 278 * HTTP-decode a string. The standard explanation is that this turns 279 * "%4e+foo" into "n foo" in the regular way. This is done in-place 280 * over the allocated string. 281 */ 282 static int 283 http_decode(char *p) 284 { 285 char hex[3]; 286 char *q; 287 int c; 288 289 hex[2] = '\0'; 290 291 q = p; 292 for ( ; '\0' != *p; p++, q++) { 293 if ('%' == *p) { 294 if ('\0' == (hex[0] = *(p + 1))) 295 return 0; 296 if ('\0' == (hex[1] = *(p + 2))) 297 return 0; 298 if (1 != sscanf(hex, "%x", &c)) 299 return 0; 300 if ('\0' == c) 301 return 0; 302 303 *q = (char)c; 304 p += 2; 305 } else 306 *q = '+' == *p ? ' ' : *p; 307 } 308 309 *q = '\0'; 310 return 1; 311 } 312 313 static void 314 resp_begin_http(int code, const char *msg) 315 { 316 317 if (200 != code) 318 printf("Status: %d %s\r\n", code, msg); 319 320 printf("Content-Type: text/html; charset=utf-8\r\n" 321 "Cache-Control: no-cache\r\n" 322 "Pragma: no-cache\r\n" 323 "\r\n"); 324 325 fflush(stdout); 326 } 327 328 static void 329 resp_copy(const char *filename) 330 { 331 char buf[4096]; 332 ssize_t sz; 333 int fd; 334 335 if ((fd = open(filename, O_RDONLY)) != -1) { 336 fflush(stdout); 337 while ((sz = read(fd, buf, sizeof(buf))) > 0) 338 write(STDOUT_FILENO, buf, sz); 339 close(fd); 340 } 341 } 342 343 static void 344 resp_begin_html(int code, const char *msg) 345 { 346 347 resp_begin_http(code, msg); 348 349 printf("<!DOCTYPE html>\n" 350 "<html>\n" 351 "<head>\n" 352 "<meta charset=\"UTF-8\"/>\n" 353 "<link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 354 " type=\"text/css\" media=\"all\">\n" 355 "<title>%s</title>\n" 356 "</head>\n" 357 "<body>\n" 358 "<!-- Begin page content. //-->\n", 359 CSS_DIR, CUSTOMIZE_TITLE); 360 361 resp_copy(MAN_DIR "/header.html"); 362 } 363 364 static void 365 resp_end_html(void) 366 { 367 368 resp_copy(MAN_DIR "/footer.html"); 369 370 puts("</body>\n" 371 "</html>"); 372 } 373 374 static void 375 resp_searchform(const struct req *req, enum focus focus) 376 { 377 int i; 378 379 puts("<!-- Begin search form. //-->"); 380 printf("<div id=\"mancgi\">\n" 381 "<form action=\"/%s\" method=\"get\">\n" 382 "<fieldset>\n" 383 "<legend>Manual Page Search Parameters</legend>\n", 384 scriptname); 385 386 /* Write query input box. */ 387 388 printf("<input type=\"text\" name=\"query\" value=\""); 389 if (req->q.query != NULL) 390 html_print(req->q.query); 391 printf( "\" size=\"40\""); 392 if (focus == FOCUS_QUERY) 393 printf(" autofocus"); 394 puts(">"); 395 396 /* Write submission buttons. */ 397 398 printf( "<button type=\"submit\" name=\"apropos\" value=\"0\">" 399 "man</button>\n" 400 "<button type=\"submit\" name=\"apropos\" value=\"1\">" 401 "apropos</button>\n<br/>\n"); 402 403 /* Write section selector. */ 404 405 puts("<select name=\"sec\">"); 406 for (i = 0; i < sec_MAX; i++) { 407 printf("<option value=\"%s\"", sec_numbers[i]); 408 if (NULL != req->q.sec && 409 0 == strcmp(sec_numbers[i], req->q.sec)) 410 printf(" selected=\"selected\""); 411 printf(">%s</option>\n", sec_names[i]); 412 } 413 puts("</select>"); 414 415 /* Write architecture selector. */ 416 417 printf( "<select name=\"arch\">\n" 418 "<option value=\"default\""); 419 if (NULL == req->q.arch) 420 printf(" selected=\"selected\""); 421 puts(">All Architectures</option>"); 422 for (i = 0; i < arch_MAX; i++) { 423 printf("<option value=\"%s\"", arch_names[i]); 424 if (NULL != req->q.arch && 425 0 == strcmp(arch_names[i], req->q.arch)) 426 printf(" selected=\"selected\""); 427 printf(">%s</option>\n", arch_names[i]); 428 } 429 puts("</select>"); 430 431 /* Write manpath selector. */ 432 433 if (req->psz > 1) { 434 puts("<select name=\"manpath\">"); 435 for (i = 0; i < (int)req->psz; i++) { 436 printf("<option "); 437 if (strcmp(req->q.manpath, req->p[i]) == 0) 438 printf("selected=\"selected\" "); 439 printf("value=\""); 440 html_print(req->p[i]); 441 printf("\">"); 442 html_print(req->p[i]); 443 puts("</option>"); 444 } 445 puts("</select>"); 446 } 447 448 puts("</fieldset>\n" 449 "</form>\n" 450 "</div>"); 451 puts("<!-- End search form. //-->"); 452 } 453 454 static int 455 validate_urifrag(const char *frag) 456 { 457 458 while ('\0' != *frag) { 459 if ( ! (isalnum((unsigned char)*frag) || 460 '-' == *frag || '.' == *frag || 461 '/' == *frag || '_' == *frag)) 462 return 0; 463 frag++; 464 } 465 return 1; 466 } 467 468 static int 469 validate_manpath(const struct req *req, const char* manpath) 470 { 471 size_t i; 472 473 for (i = 0; i < req->psz; i++) 474 if ( ! strcmp(manpath, req->p[i])) 475 return 1; 476 477 return 0; 478 } 479 480 static int 481 validate_filename(const char *file) 482 { 483 484 if ('.' == file[0] && '/' == file[1]) 485 file += 2; 486 487 return ! (strstr(file, "../") || strstr(file, "/..") || 488 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 489 } 490 491 static void 492 pg_index(const struct req *req) 493 { 494 495 resp_begin_html(200, NULL); 496 resp_searchform(req, FOCUS_QUERY); 497 printf("<p>\n" 498 "This web interface is documented in the\n" 499 "<a href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 500 "manual, and the\n" 501 "<a href=\"/%s%sapropos.1\">apropos(1)</a>\n" 502 "manual explains the query syntax.\n" 503 "</p>\n", 504 scriptname, *scriptname == '\0' ? "" : "/", 505 scriptname, *scriptname == '\0' ? "" : "/"); 506 resp_end_html(); 507 } 508 509 static void 510 pg_noresult(const struct req *req, const char *msg) 511 { 512 resp_begin_html(200, NULL); 513 resp_searchform(req, FOCUS_QUERY); 514 puts("<p>"); 515 puts(msg); 516 puts("</p>"); 517 resp_end_html(); 518 } 519 520 static void 521 pg_error_badrequest(const char *msg) 522 { 523 524 resp_begin_html(400, "Bad Request"); 525 puts("<h1>Bad Request</h1>\n" 526 "<p>\n"); 527 puts(msg); 528 printf("Try again from the\n" 529 "<a href=\"/%s\">main page</a>.\n" 530 "</p>", scriptname); 531 resp_end_html(); 532 } 533 534 static void 535 pg_error_internal(void) 536 { 537 resp_begin_html(500, "Internal Server Error"); 538 puts("<p>Internal Server Error</p>"); 539 resp_end_html(); 540 } 541 542 static void 543 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 544 { 545 char *arch, *archend; 546 const char *sec; 547 size_t i, iuse; 548 int archprio, archpriouse; 549 int prio, priouse; 550 551 for (i = 0; i < sz; i++) { 552 if (validate_filename(r[i].file)) 553 continue; 554 warnx("invalid filename %s in %s database", 555 r[i].file, req->q.manpath); 556 pg_error_internal(); 557 return; 558 } 559 560 if (req->isquery && sz == 1) { 561 /* 562 * If we have just one result, then jump there now 563 * without any delay. 564 */ 565 printf("Status: 303 See Other\r\n"); 566 printf("Location: http://%s/%s%s%s/%s", 567 HTTP_HOST, scriptname, 568 *scriptname == '\0' ? "" : "/", 569 req->q.manpath, r[0].file); 570 printf("\r\n" 571 "Content-Type: text/html; charset=utf-8\r\n" 572 "\r\n"); 573 return; 574 } 575 576 resp_begin_html(200, NULL); 577 resp_searchform(req, 578 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 579 580 if (sz > 1) { 581 puts("<div class=\"results\">"); 582 puts("<table>"); 583 584 for (i = 0; i < sz; i++) { 585 printf("<tr>\n" 586 "<td class=\"title\">\n" 587 "<a href=\"/%s%s%s/%s", 588 scriptname, *scriptname == '\0' ? "" : "/", 589 req->q.manpath, r[i].file); 590 printf("\">"); 591 html_print(r[i].names); 592 printf("</a>\n" 593 "</td>\n" 594 "<td class=\"desc\">"); 595 html_print(r[i].output); 596 puts("</td>\n" 597 "</tr>"); 598 } 599 600 puts("</table>\n" 601 "</div>"); 602 } 603 604 /* 605 * In man(1) mode, show one of the pages 606 * even if more than one is found. 607 */ 608 609 if (req->q.equal || sz == 1) { 610 puts("<hr>"); 611 iuse = 0; 612 priouse = 20; 613 archpriouse = 3; 614 for (i = 0; i < sz; i++) { 615 sec = r[i].file; 616 sec += strcspn(sec, "123456789"); 617 if (sec[0] == '\0') 618 continue; 619 prio = sec_prios[sec[0] - '1']; 620 if (sec[1] != '/') 621 prio += 10; 622 if (req->q.arch == NULL) { 623 archprio = 624 ((arch = strchr(sec + 1, '/')) 625 == NULL) ? 3 : 626 ((archend = strchr(arch + 1, '/')) 627 == NULL) ? 0 : 628 strncmp(arch, "amd64/", 629 archend - arch) ? 2 : 1; 630 if (archprio < archpriouse) { 631 archpriouse = archprio; 632 priouse = prio; 633 iuse = i; 634 continue; 635 } 636 if (archprio > archpriouse) 637 continue; 638 } 639 if (prio >= priouse) 640 continue; 641 priouse = prio; 642 iuse = i; 643 } 644 resp_show(req, r[iuse].file); 645 } 646 647 resp_end_html(); 648 } 649 650 static void 651 resp_catman(const struct req *req, const char *file) 652 { 653 FILE *f; 654 char *p; 655 size_t sz; 656 ssize_t len; 657 int i; 658 int italic, bold; 659 660 if ((f = fopen(file, "r")) == NULL) { 661 puts("<p>You specified an invalid manual file.</p>"); 662 return; 663 } 664 665 puts("<div class=\"catman\">\n" 666 "<pre>"); 667 668 p = NULL; 669 sz = 0; 670 671 while ((len = getline(&p, &sz, f)) != -1) { 672 bold = italic = 0; 673 for (i = 0; i < len - 1; i++) { 674 /* 675 * This means that the catpage is out of state. 676 * Ignore it and keep going (although the 677 * catpage is bogus). 678 */ 679 680 if ('\b' == p[i] || '\n' == p[i]) 681 continue; 682 683 /* 684 * Print a regular character. 685 * Close out any bold/italic scopes. 686 * If we're in back-space mode, make sure we'll 687 * have something to enter when we backspace. 688 */ 689 690 if ('\b' != p[i + 1]) { 691 if (italic) 692 printf("</i>"); 693 if (bold) 694 printf("</b>"); 695 italic = bold = 0; 696 html_putchar(p[i]); 697 continue; 698 } else if (i + 2 >= len) 699 continue; 700 701 /* Italic mode. */ 702 703 if ('_' == p[i]) { 704 if (bold) 705 printf("</b>"); 706 if ( ! italic) 707 printf("<i>"); 708 bold = 0; 709 italic = 1; 710 i += 2; 711 html_putchar(p[i]); 712 continue; 713 } 714 715 /* 716 * Handle funny behaviour troff-isms. 717 * These grok'd from the original man2html.c. 718 */ 719 720 if (('+' == p[i] && 'o' == p[i + 2]) || 721 ('o' == p[i] && '+' == p[i + 2]) || 722 ('|' == p[i] && '=' == p[i + 2]) || 723 ('=' == p[i] && '|' == p[i + 2]) || 724 ('*' == p[i] && '=' == p[i + 2]) || 725 ('=' == p[i] && '*' == p[i + 2]) || 726 ('*' == p[i] && '|' == p[i + 2]) || 727 ('|' == p[i] && '*' == p[i + 2])) { 728 if (italic) 729 printf("</i>"); 730 if (bold) 731 printf("</b>"); 732 italic = bold = 0; 733 putchar('*'); 734 i += 2; 735 continue; 736 } else if (('|' == p[i] && '-' == p[i + 2]) || 737 ('-' == p[i] && '|' == p[i + 1]) || 738 ('+' == p[i] && '-' == p[i + 1]) || 739 ('-' == p[i] && '+' == p[i + 1]) || 740 ('+' == p[i] && '|' == p[i + 1]) || 741 ('|' == p[i] && '+' == p[i + 1])) { 742 if (italic) 743 printf("</i>"); 744 if (bold) 745 printf("</b>"); 746 italic = bold = 0; 747 putchar('+'); 748 i += 2; 749 continue; 750 } 751 752 /* Bold mode. */ 753 754 if (italic) 755 printf("</i>"); 756 if ( ! bold) 757 printf("<b>"); 758 bold = 1; 759 italic = 0; 760 i += 2; 761 html_putchar(p[i]); 762 } 763 764 /* 765 * Clean up the last character. 766 * We can get to a newline; don't print that. 767 */ 768 769 if (italic) 770 printf("</i>"); 771 if (bold) 772 printf("</b>"); 773 774 if (i == len - 1 && p[i] != '\n') 775 html_putchar(p[i]); 776 777 putchar('\n'); 778 } 779 free(p); 780 781 puts("</pre>\n" 782 "</div>"); 783 784 fclose(f); 785 } 786 787 static void 788 resp_format(const struct req *req, const char *file) 789 { 790 struct manoutput conf; 791 struct mparse *mp; 792 struct roff_man *man; 793 void *vp; 794 int fd; 795 int usepath; 796 797 if (-1 == (fd = open(file, O_RDONLY, 0))) { 798 puts("<p>You specified an invalid manual file.</p>"); 799 return; 800 } 801 802 mchars_alloc(); 803 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1, 804 MANDOCLEVEL_BADARG, NULL, req->q.manpath); 805 mparse_readfd(mp, fd, file); 806 close(fd); 807 808 memset(&conf, 0, sizeof(conf)); 809 conf.fragment = 1; 810 usepath = strcmp(req->q.manpath, req->p[0]); 811 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S", 812 usepath ? req->q.manpath : "", usepath ? "/" : ""); 813 814 mparse_result(mp, &man, NULL); 815 if (man == NULL) { 816 warnx("fatal mandoc error: %s/%s", req->q.manpath, file); 817 pg_error_internal(); 818 mparse_free(mp); 819 mchars_free(); 820 return; 821 } 822 823 vp = html_alloc(&conf); 824 825 if (man->macroset == MACROSET_MDOC) { 826 mdoc_validate(man); 827 html_mdoc(vp, man); 828 } else { 829 man_validate(man); 830 html_man(vp, man); 831 } 832 833 html_free(vp); 834 mparse_free(mp); 835 mchars_free(); 836 free(conf.man); 837 } 838 839 static void 840 resp_show(const struct req *req, const char *file) 841 { 842 843 if ('.' == file[0] && '/' == file[1]) 844 file += 2; 845 846 if ('c' == *file) 847 resp_catman(req, file); 848 else 849 resp_format(req, file); 850 } 851 852 static void 853 pg_show(struct req *req, const char *fullpath) 854 { 855 char *manpath; 856 const char *file; 857 858 if ((file = strchr(fullpath, '/')) == NULL) { 859 pg_error_badrequest( 860 "You did not specify a page to show."); 861 return; 862 } 863 manpath = mandoc_strndup(fullpath, file - fullpath); 864 file++; 865 866 if ( ! validate_manpath(req, manpath)) { 867 pg_error_badrequest( 868 "You specified an invalid manpath."); 869 free(manpath); 870 return; 871 } 872 873 /* 874 * Begin by chdir()ing into the manpath. 875 * This way we can pick up the database files, which are 876 * relative to the manpath root. 877 */ 878 879 if (chdir(manpath) == -1) { 880 warn("chdir %s", manpath); 881 pg_error_internal(); 882 free(manpath); 883 return; 884 } 885 free(manpath); 886 887 if ( ! validate_filename(file)) { 888 pg_error_badrequest( 889 "You specified an invalid manual file."); 890 return; 891 } 892 893 resp_begin_html(200, NULL); 894 resp_searchform(req, FOCUS_NONE); 895 resp_show(req, file); 896 resp_end_html(); 897 } 898 899 static void 900 pg_search(const struct req *req) 901 { 902 struct mansearch search; 903 struct manpaths paths; 904 struct manpage *res; 905 char **argv; 906 char *query, *rp, *wp; 907 size_t ressz; 908 int argc; 909 910 /* 911 * Begin by chdir()ing into the root of the manpath. 912 * This way we can pick up the database files, which are 913 * relative to the manpath root. 914 */ 915 916 if (chdir(req->q.manpath) == -1) { 917 warn("chdir %s", req->q.manpath); 918 pg_error_internal(); 919 return; 920 } 921 922 search.arch = req->q.arch; 923 search.sec = req->q.sec; 924 search.outkey = "Nd"; 925 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 926 search.firstmatch = 1; 927 928 paths.sz = 1; 929 paths.paths = mandoc_malloc(sizeof(char *)); 930 paths.paths[0] = mandoc_strdup("."); 931 932 /* 933 * Break apart at spaces with backslash-escaping. 934 */ 935 936 argc = 0; 937 argv = NULL; 938 rp = query = mandoc_strdup(req->q.query); 939 for (;;) { 940 while (isspace((unsigned char)*rp)) 941 rp++; 942 if (*rp == '\0') 943 break; 944 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 945 argv[argc++] = wp = rp; 946 for (;;) { 947 if (isspace((unsigned char)*rp)) { 948 *wp = '\0'; 949 rp++; 950 break; 951 } 952 if (rp[0] == '\\' && rp[1] != '\0') 953 rp++; 954 if (wp != rp) 955 *wp = *rp; 956 if (*rp == '\0') 957 break; 958 wp++; 959 rp++; 960 } 961 } 962 963 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz)) 964 pg_noresult(req, "You entered an invalid query."); 965 else if (0 == ressz) 966 pg_noresult(req, "No results found."); 967 else 968 pg_searchres(req, res, ressz); 969 970 free(query); 971 mansearch_free(res, ressz); 972 free(paths.paths[0]); 973 free(paths.paths); 974 } 975 976 int 977 main(void) 978 { 979 struct req req; 980 struct itimerval itimer; 981 const char *path; 982 const char *querystring; 983 int i; 984 985 /* Poor man's ReDoS mitigation. */ 986 987 itimer.it_value.tv_sec = 2; 988 itimer.it_value.tv_usec = 0; 989 itimer.it_interval.tv_sec = 2; 990 itimer.it_interval.tv_usec = 0; 991 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 992 warn("setitimer"); 993 pg_error_internal(); 994 return EXIT_FAILURE; 995 } 996 997 /* 998 * First we change directory into the MAN_DIR so that 999 * subsequent scanning for manpath directories is rooted 1000 * relative to the same position. 1001 */ 1002 1003 if (chdir(MAN_DIR) == -1) { 1004 warn("MAN_DIR: %s", MAN_DIR); 1005 pg_error_internal(); 1006 return EXIT_FAILURE; 1007 } 1008 1009 memset(&req, 0, sizeof(struct req)); 1010 req.q.equal = 1; 1011 parse_manpath_conf(&req); 1012 1013 /* Parse the path info and the query string. */ 1014 1015 if ((path = getenv("PATH_INFO")) == NULL) 1016 path = ""; 1017 else if (*path == '/') 1018 path++; 1019 1020 if (*path != '\0') { 1021 parse_path_info(&req, path); 1022 if (req.q.manpath == NULL || access(path, F_OK) == -1) 1023 path = ""; 1024 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1025 parse_query_string(&req, querystring); 1026 1027 /* Validate parsed data and add defaults. */ 1028 1029 if (req.q.manpath == NULL) 1030 req.q.manpath = mandoc_strdup(req.p[0]); 1031 else if ( ! validate_manpath(&req, req.q.manpath)) { 1032 pg_error_badrequest( 1033 "You specified an invalid manpath."); 1034 return EXIT_FAILURE; 1035 } 1036 1037 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) { 1038 pg_error_badrequest( 1039 "You specified an invalid architecture."); 1040 return EXIT_FAILURE; 1041 } 1042 1043 /* Dispatch to the three different pages. */ 1044 1045 if ('\0' != *path) 1046 pg_show(&req, path); 1047 else if (NULL != req.q.query) 1048 pg_search(&req); 1049 else 1050 pg_index(&req); 1051 1052 free(req.q.manpath); 1053 free(req.q.arch); 1054 free(req.q.sec); 1055 free(req.q.query); 1056 for (i = 0; i < (int)req.psz; i++) 1057 free(req.p[i]); 1058 free(req.p); 1059 return EXIT_SUCCESS; 1060 } 1061 1062 /* 1063 * If PATH_INFO is not a file name, translate it to a query. 1064 */ 1065 static void 1066 parse_path_info(struct req *req, const char *path) 1067 { 1068 char *dir[4]; 1069 int i; 1070 1071 req->isquery = 0; 1072 req->q.equal = 1; 1073 req->q.manpath = mandoc_strdup(path); 1074 req->q.arch = NULL; 1075 1076 /* Mandatory manual page name. */ 1077 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) { 1078 req->q.query = req->q.manpath; 1079 req->q.manpath = NULL; 1080 } else 1081 *req->q.query++ = '\0'; 1082 1083 /* Optional trailing section. */ 1084 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) { 1085 if(isdigit((unsigned char)req->q.sec[1])) { 1086 *req->q.sec++ = '\0'; 1087 req->q.sec = mandoc_strdup(req->q.sec); 1088 } else 1089 req->q.sec = NULL; 1090 } 1091 1092 /* Handle the case of name[.section] only. */ 1093 if (req->q.manpath == NULL) 1094 return; 1095 req->q.query = mandoc_strdup(req->q.query); 1096 1097 /* Split directory components. */ 1098 dir[i = 0] = req->q.manpath; 1099 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) { 1100 if (++i == 3) { 1101 pg_error_badrequest( 1102 "You specified too many directory components."); 1103 exit(EXIT_FAILURE); 1104 } 1105 *dir[i]++ = '\0'; 1106 } 1107 1108 /* Optional manpath. */ 1109 if ((i = validate_manpath(req, req->q.manpath)) == 0) 1110 req->q.manpath = NULL; 1111 else if (dir[1] == NULL) 1112 return; 1113 1114 /* Optional section. */ 1115 if (strncmp(dir[i], "man", 3) == 0) { 1116 free(req->q.sec); 1117 req->q.sec = mandoc_strdup(dir[i++] + 3); 1118 } 1119 if (dir[i] == NULL) { 1120 if (req->q.manpath == NULL) 1121 free(dir[0]); 1122 return; 1123 } 1124 if (dir[i + 1] != NULL) { 1125 pg_error_badrequest( 1126 "You specified an invalid directory component."); 1127 exit(EXIT_FAILURE); 1128 } 1129 1130 /* Optional architecture. */ 1131 if (i) { 1132 req->q.arch = mandoc_strdup(dir[i]); 1133 if (req->q.manpath == NULL) 1134 free(dir[0]); 1135 } else 1136 req->q.arch = dir[0]; 1137 } 1138 1139 /* 1140 * Scan for indexable paths. 1141 */ 1142 static void 1143 parse_manpath_conf(struct req *req) 1144 { 1145 FILE *fp; 1146 char *dp; 1147 size_t dpsz; 1148 ssize_t len; 1149 1150 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1151 warn("%s/manpath.conf", MAN_DIR); 1152 pg_error_internal(); 1153 exit(EXIT_FAILURE); 1154 } 1155 1156 dp = NULL; 1157 dpsz = 0; 1158 1159 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1160 if (dp[len - 1] == '\n') 1161 dp[--len] = '\0'; 1162 req->p = mandoc_realloc(req->p, 1163 (req->psz + 1) * sizeof(char *)); 1164 if ( ! validate_urifrag(dp)) { 1165 warnx("%s/manpath.conf contains " 1166 "unsafe path \"%s\"", MAN_DIR, dp); 1167 pg_error_internal(); 1168 exit(EXIT_FAILURE); 1169 } 1170 if (strchr(dp, '/') != NULL) { 1171 warnx("%s/manpath.conf contains " 1172 "path with slash \"%s\"", MAN_DIR, dp); 1173 pg_error_internal(); 1174 exit(EXIT_FAILURE); 1175 } 1176 req->p[req->psz++] = dp; 1177 dp = NULL; 1178 dpsz = 0; 1179 } 1180 free(dp); 1181 1182 if (req->p == NULL) { 1183 warnx("%s/manpath.conf is empty", MAN_DIR); 1184 pg_error_internal(); 1185 exit(EXIT_FAILURE); 1186 } 1187 } 1188