1 /* $OpenBSD: cgi.c,v 1.120 2022/12/26 19:16:02 jmc Exp $ */
2 /*
3 * Copyright (c) 2014-2019, 2021, 2022 Ingo Schwarze <schwarze@usta.de>
4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 *
19 * Implementation of the man.cgi(8) program.
20 */
21 #include <sys/types.h>
22 #include <sys/time.h>
23
24 #include <ctype.h>
25 #include <err.h>
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <limits.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
34
35 #include "mandoc_aux.h"
36 #include "mandoc.h"
37 #include "roff.h"
38 #include "mdoc.h"
39 #include "man.h"
40 #include "mandoc_parse.h"
41 #include "main.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "cgi.h"
45
46 /*
47 * A query as passed to the search function.
48 */
49 struct query {
50 char *manpath; /* desired manual directory */
51 char *arch; /* architecture */
52 char *sec; /* manual section */
53 char *query; /* unparsed query expression */
54 int equal; /* match whole names, not substrings */
55 };
56
57 struct req {
58 struct query q;
59 char **p; /* array of available manpaths */
60 size_t psz; /* number of available manpaths */
61 int isquery; /* QUERY_STRING used, not PATH_INFO */
62 };
63
64 enum focus {
65 FOCUS_NONE = 0,
66 FOCUS_QUERY
67 };
68
69 static void html_print(const char *);
70 static void html_putchar(char);
71 static int http_decode(char *);
72 static void http_encode(const char *);
73 static void parse_manpath_conf(struct req *);
74 static void parse_path_info(struct req *, const char *);
75 static void parse_query_string(struct req *, const char *);
76 static void pg_error_badrequest(const char *);
77 static void pg_error_internal(void);
78 static void pg_index(const struct req *);
79 static void pg_noresult(const struct req *, int, const char *,
80 const char *);
81 static void pg_redirect(const struct req *, const char *);
82 static void pg_search(const struct req *);
83 static void pg_searchres(const struct req *,
84 struct manpage *, size_t);
85 static void pg_show(struct req *, const char *);
86 static int resp_begin_html(int, const char *, const char *);
87 static void resp_begin_http(int, const char *);
88 static void resp_catman(const struct req *, const char *);
89 static int resp_copy(const char *, const char *);
90 static void resp_end_html(void);
91 static void resp_format(const struct req *, const char *);
92 static void resp_searchform(const struct req *, enum focus);
93 static void resp_show(const struct req *, const char *);
94 static void set_query_attr(char **, char **);
95 static int validate_arch(const char *);
96 static int validate_filename(const char *);
97 static int validate_manpath(const struct req *, const char *);
98 static int validate_urifrag(const char *);
99
100 static const char *scriptname = SCRIPT_NAME;
101
102 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
103 static const char *const sec_numbers[] = {
104 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
105 };
106 static const char *const sec_names[] = {
107 "All Sections",
108 "1 - General Commands",
109 "2 - System Calls",
110 "3 - Library Functions",
111 "3p - Perl Library",
112 "4 - Device Drivers",
113 "5 - File Formats",
114 "6 - Games",
115 "7 - Miscellaneous Information",
116 "8 - System Manager\'s Manual",
117 "9 - Kernel Developer\'s Manual"
118 };
119 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
120
121 static const char *const arch_names[] = {
122 "amd64", "alpha", "armv7", "arm64",
123 "hppa", "i386", "landisk", "loongson",
124 "luna88k", "macppc", "mips64", "octeon",
125 "powerpc64", "riscv64", "sparc64",
126
127 "amiga", "arc", "armish", "arm32",
128 "atari", "aviion", "beagle", "cats",
129 "hppa64", "hp300",
130 "ia64", "mac68k", "mvme68k", "mvme88k",
131 "mvmeppc", "palm", "pc532", "pegasos",
132 "pmax", "powerpc", "sgi", "socppc",
133 "solbourne", "sparc",
134 "sun3", "vax", "wgrisc", "x68k",
135 "zaurus"
136 };
137 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
138
139 /*
140 * Print a character, escaping HTML along the way.
141 * This will pass non-ASCII straight to output: be warned!
142 */
143 static void
html_putchar(char c)144 html_putchar(char c)
145 {
146
147 switch (c) {
148 case '"':
149 printf(""");
150 break;
151 case '&':
152 printf("&");
153 break;
154 case '>':
155 printf(">");
156 break;
157 case '<':
158 printf("<");
159 break;
160 default:
161 putchar((unsigned char)c);
162 break;
163 }
164 }
165
166 /*
167 * Call through to html_putchar().
168 * Accepts NULL strings.
169 */
170 static void
html_print(const char * p)171 html_print(const char *p)
172 {
173
174 if (NULL == p)
175 return;
176 while ('\0' != *p)
177 html_putchar(*p++);
178 }
179
180 /*
181 * Transfer the responsibility for the allocated string *val
182 * to the query structure.
183 */
184 static void
set_query_attr(char ** attr,char ** val)185 set_query_attr(char **attr, char **val)
186 {
187
188 free(*attr);
189 if (**val == '\0') {
190 *attr = NULL;
191 free(*val);
192 } else
193 *attr = *val;
194 *val = NULL;
195 }
196
197 /*
198 * Parse the QUERY_STRING for key-value pairs
199 * and store the values into the query structure.
200 */
201 static void
parse_query_string(struct req * req,const char * qs)202 parse_query_string(struct req *req, const char *qs)
203 {
204 char *key, *val;
205 size_t keysz, valsz;
206
207 req->isquery = 1;
208 req->q.manpath = NULL;
209 req->q.arch = NULL;
210 req->q.sec = NULL;
211 req->q.query = NULL;
212 req->q.equal = 1;
213
214 key = val = NULL;
215 while (*qs != '\0') {
216
217 /* Parse one key. */
218
219 keysz = strcspn(qs, "=;&");
220 key = mandoc_strndup(qs, keysz);
221 qs += keysz;
222 if (*qs != '=')
223 goto next;
224
225 /* Parse one value. */
226
227 valsz = strcspn(++qs, ";&");
228 val = mandoc_strndup(qs, valsz);
229 qs += valsz;
230
231 /* Decode and catch encoding errors. */
232
233 if ( ! (http_decode(key) && http_decode(val)))
234 goto next;
235
236 /* Handle key-value pairs. */
237
238 if ( ! strcmp(key, "query"))
239 set_query_attr(&req->q.query, &val);
240
241 else if ( ! strcmp(key, "apropos"))
242 req->q.equal = !strcmp(val, "0");
243
244 else if ( ! strcmp(key, "manpath")) {
245 #ifdef COMPAT_OLDURI
246 if ( ! strncmp(val, "OpenBSD ", 8)) {
247 val[7] = '-';
248 if ('C' == val[8])
249 val[8] = 'c';
250 }
251 #endif
252 set_query_attr(&req->q.manpath, &val);
253 }
254
255 else if ( ! (strcmp(key, "sec")
256 #ifdef COMPAT_OLDURI
257 && strcmp(key, "sektion")
258 #endif
259 )) {
260 if ( ! strcmp(val, "0"))
261 *val = '\0';
262 set_query_attr(&req->q.sec, &val);
263 }
264
265 else if ( ! strcmp(key, "arch")) {
266 if ( ! strcmp(val, "default"))
267 *val = '\0';
268 set_query_attr(&req->q.arch, &val);
269 }
270
271 /*
272 * The key must be freed in any case.
273 * The val may have been handed over to the query
274 * structure, in which case it is now NULL.
275 */
276 next:
277 free(key);
278 key = NULL;
279 free(val);
280 val = NULL;
281
282 if (*qs != '\0')
283 qs++;
284 }
285 }
286
287 /*
288 * HTTP-decode a string. The standard explanation is that this turns
289 * "%4e+foo" into "n foo" in the regular way. This is done in-place
290 * over the allocated string.
291 */
292 static int
http_decode(char * p)293 http_decode(char *p)
294 {
295 char hex[3];
296 char *q;
297 int c;
298
299 hex[2] = '\0';
300
301 q = p;
302 for ( ; '\0' != *p; p++, q++) {
303 if ('%' == *p) {
304 if ('\0' == (hex[0] = *(p + 1)))
305 return 0;
306 if ('\0' == (hex[1] = *(p + 2)))
307 return 0;
308 if (1 != sscanf(hex, "%x", &c))
309 return 0;
310 if ('\0' == c)
311 return 0;
312
313 *q = (char)c;
314 p += 2;
315 } else
316 *q = '+' == *p ? ' ' : *p;
317 }
318
319 *q = '\0';
320 return 1;
321 }
322
323 static void
http_encode(const char * p)324 http_encode(const char *p)
325 {
326 for (; *p != '\0'; p++) {
327 if (isalnum((unsigned char)*p) == 0 &&
328 strchr("-._~", *p) == NULL)
329 printf("%%%2.2X", (unsigned char)*p);
330 else
331 putchar(*p);
332 }
333 }
334
335 static void
resp_begin_http(int code,const char * msg)336 resp_begin_http(int code, const char *msg)
337 {
338
339 if (200 != code)
340 printf("Status: %d %s\r\n", code, msg);
341
342 printf("Content-Type: text/html; charset=utf-8\r\n"
343 "Cache-Control: no-cache\r\n"
344 "Content-Security-Policy: default-src 'none'; "
345 "style-src 'self' 'unsafe-inline'\r\n"
346 "Pragma: no-cache\r\n"
347 "\r\n");
348
349 fflush(stdout);
350 }
351
352 static int
resp_copy(const char * element,const char * filename)353 resp_copy(const char *element, const char *filename)
354 {
355 char buf[4096];
356 ssize_t sz;
357 int fd;
358
359 if ((fd = open(filename, O_RDONLY)) == -1)
360 return 0;
361
362 if (element != NULL)
363 printf("<%s>\n", element);
364 fflush(stdout);
365 while ((sz = read(fd, buf, sizeof(buf))) > 0)
366 write(STDOUT_FILENO, buf, sz);
367 close(fd);
368 return 1;
369 }
370
371 static int
resp_begin_html(int code,const char * msg,const char * file)372 resp_begin_html(int code, const char *msg, const char *file)
373 {
374 const char *name, *sec, *cp;
375 int namesz, secsz;
376
377 resp_begin_http(code, msg);
378
379 printf("<!DOCTYPE html>\n"
380 "<html>\n"
381 "<head>\n"
382 " <meta charset=\"UTF-8\"/>\n"
383 " <meta name=\"viewport\""
384 " content=\"width=device-width, initial-scale=1.0\">\n"
385 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
386 " type=\"text/css\" media=\"all\">\n"
387 " <title>",
388 CSS_DIR);
389 if (file != NULL) {
390 cp = strrchr(file, '/');
391 name = cp == NULL ? file : cp + 1;
392 cp = strrchr(name, '.');
393 namesz = cp == NULL ? strlen(name) : cp - name;
394 sec = NULL;
395 if (cp != NULL && cp[1] != '0') {
396 sec = cp + 1;
397 secsz = strlen(sec);
398 } else if (name - file > 1) {
399 for (cp = name - 2; cp >= file; cp--) {
400 if (*cp < '1' || *cp > '9')
401 continue;
402 sec = cp;
403 secsz = name - cp - 1;
404 break;
405 }
406 }
407 printf("%.*s", namesz, name);
408 if (sec != NULL)
409 printf("(%.*s)", secsz, sec);
410 fputs(" - ", stdout);
411 }
412 printf("%s</title>\n"
413 "</head>\n"
414 "<body>\n",
415 CUSTOMIZE_TITLE);
416
417 return resp_copy("header", MAN_DIR "/header.html");
418 }
419
420 static void
resp_end_html(void)421 resp_end_html(void)
422 {
423 if (resp_copy("footer", MAN_DIR "/footer.html"))
424 puts("</footer>");
425
426 puts("</body>\n"
427 "</html>");
428 }
429
430 static void
resp_searchform(const struct req * req,enum focus focus)431 resp_searchform(const struct req *req, enum focus focus)
432 {
433 int i;
434
435 printf("<form role=\"search\" action=\"/%s\" method=\"get\" "
436 "autocomplete=\"off\" autocapitalize=\"none\">\n"
437 " <fieldset>\n"
438 " <legend>Manual Page Search Parameters</legend>\n",
439 scriptname);
440
441 /* Write query input box. */
442
443 printf(" <label>Search query:\n"
444 " <input type=\"search\" name=\"query\" value=\"");
445 if (req->q.query != NULL)
446 html_print(req->q.query);
447 printf("\" size=\"40\"");
448 if (focus == FOCUS_QUERY)
449 printf(" autofocus");
450 puts(">\n </label>");
451
452 /* Write submission buttons. */
453
454 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
455 "man</button>\n"
456 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
457 "apropos</button>\n"
458 " <br/>\n");
459
460 /* Write section selector. */
461
462 puts(" <select name=\"sec\" aria-label=\"Manual section\">");
463 for (i = 0; i < sec_MAX; i++) {
464 printf(" <option value=\"%s\"", sec_numbers[i]);
465 if (NULL != req->q.sec &&
466 0 == strcmp(sec_numbers[i], req->q.sec))
467 printf(" selected=\"selected\"");
468 printf(">%s</option>\n", sec_names[i]);
469 }
470 puts(" </select>");
471
472 /* Write architecture selector. */
473
474 printf( " <select name=\"arch\" aria-label=\"CPU architecture\">\n"
475 " <option value=\"default\"");
476 if (NULL == req->q.arch)
477 printf(" selected=\"selected\"");
478 puts(">All Architectures</option>");
479 for (i = 0; i < arch_MAX; i++) {
480 printf(" <option");
481 if (NULL != req->q.arch &&
482 0 == strcmp(arch_names[i], req->q.arch))
483 printf(" selected=\"selected\"");
484 printf(">%s</option>\n", arch_names[i]);
485 }
486 puts(" </select>");
487
488 /* Write manpath selector. */
489
490 if (req->psz > 1) {
491 puts(" <select name=\"manpath\""
492 " aria-label=\"Manual path\">");
493 for (i = 0; i < (int)req->psz; i++) {
494 printf(" <option");
495 if (strcmp(req->q.manpath, req->p[i]) == 0)
496 printf(" selected=\"selected\"");
497 printf(">");
498 html_print(req->p[i]);
499 puts("</option>");
500 }
501 puts(" </select>");
502 }
503
504 puts(" </fieldset>\n"
505 "</form>");
506 }
507
508 static int
validate_urifrag(const char * frag)509 validate_urifrag(const char *frag)
510 {
511
512 while ('\0' != *frag) {
513 if ( ! (isalnum((unsigned char)*frag) ||
514 '-' == *frag || '.' == *frag ||
515 '/' == *frag || '_' == *frag))
516 return 0;
517 frag++;
518 }
519 return 1;
520 }
521
522 static int
validate_manpath(const struct req * req,const char * manpath)523 validate_manpath(const struct req *req, const char* manpath)
524 {
525 size_t i;
526
527 for (i = 0; i < req->psz; i++)
528 if ( ! strcmp(manpath, req->p[i]))
529 return 1;
530
531 return 0;
532 }
533
534 static int
validate_arch(const char * arch)535 validate_arch(const char *arch)
536 {
537 int i;
538
539 for (i = 0; i < arch_MAX; i++)
540 if (strcmp(arch, arch_names[i]) == 0)
541 return 1;
542
543 return 0;
544 }
545
546 static int
validate_filename(const char * file)547 validate_filename(const char *file)
548 {
549
550 if ('.' == file[0] && '/' == file[1])
551 file += 2;
552
553 return ! (strstr(file, "../") || strstr(file, "/..") ||
554 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
555 }
556
557 static void
pg_index(const struct req * req)558 pg_index(const struct req *req)
559 {
560 if (resp_begin_html(200, NULL, NULL) == 0)
561 puts("<header>");
562 resp_searchform(req, FOCUS_QUERY);
563 printf("</header>\n"
564 "<main>\n"
565 "<p role=\"doc-notice\" aria-label=\"Usage\">\n"
566 "This web interface is documented in the\n"
567 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\""
568 " aria-label=\"man dot CGI, section 8\">man.cgi(8)</a>\n"
569 "manual, and the\n"
570 "<a class=\"Xr\" href=\"/%s%sapropos.1\""
571 " aria-label=\"apropos, section 1\">apropos(1)</a>\n"
572 "manual explains the query syntax.\n"
573 "</p>\n"
574 "</main>\n",
575 scriptname, *scriptname == '\0' ? "" : "/",
576 scriptname, *scriptname == '\0' ? "" : "/");
577 resp_end_html();
578 }
579
580 static void
pg_noresult(const struct req * req,int code,const char * http_msg,const char * user_msg)581 pg_noresult(const struct req *req, int code, const char *http_msg,
582 const char *user_msg)
583 {
584 if (resp_begin_html(code, http_msg, NULL) == 0)
585 puts("<header>");
586 resp_searchform(req, FOCUS_QUERY);
587 puts("</header>");
588 puts("<main>");
589 puts("<p role=\"doc-notice\" aria-label=\"No result\">");
590 puts(user_msg);
591 puts("</p>");
592 puts("</main>");
593 resp_end_html();
594 }
595
596 static void
pg_error_badrequest(const char * msg)597 pg_error_badrequest(const char *msg)
598 {
599 if (resp_begin_html(400, "Bad Request", NULL))
600 puts("</header>");
601 puts("<main>\n"
602 "<h1>Bad Request</h1>\n"
603 "<p role=\"doc-notice\" aria-label=\"Bad Request\">");
604 puts(msg);
605 printf("Try again from the\n"
606 "<a href=\"/%s\">main page</a>.\n"
607 "</p>\n"
608 "</main>\n", scriptname);
609 resp_end_html();
610 }
611
612 static void
pg_error_internal(void)613 pg_error_internal(void)
614 {
615 if (resp_begin_html(500, "Internal Server Error", NULL))
616 puts("</header>");
617 puts("<main><p role=\"doc-notice\">Internal Server Error</p></main>");
618 resp_end_html();
619 }
620
621 static void
pg_redirect(const struct req * req,const char * name)622 pg_redirect(const struct req *req, const char *name)
623 {
624 printf("Status: 303 See Other\r\n"
625 "Location: /");
626 if (*scriptname != '\0')
627 printf("%s/", scriptname);
628 if (strcmp(req->q.manpath, req->p[0]))
629 printf("%s/", req->q.manpath);
630 if (req->q.arch != NULL)
631 printf("%s/", req->q.arch);
632 http_encode(name);
633 if (req->q.sec != NULL) {
634 putchar('.');
635 http_encode(req->q.sec);
636 }
637 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
638 }
639
640 static void
pg_searchres(const struct req * req,struct manpage * r,size_t sz)641 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
642 {
643 char *arch, *archend;
644 const char *sec;
645 size_t i, iuse;
646 int archprio, archpriouse;
647 int prio, priouse;
648 int have_header;
649
650 for (i = 0; i < sz; i++) {
651 if (validate_filename(r[i].file))
652 continue;
653 warnx("invalid filename %s in %s database",
654 r[i].file, req->q.manpath);
655 pg_error_internal();
656 return;
657 }
658
659 if (req->isquery && sz == 1) {
660 /*
661 * If we have just one result, then jump there now
662 * without any delay.
663 */
664 printf("Status: 303 See Other\r\n"
665 "Location: /");
666 if (*scriptname != '\0')
667 printf("%s/", scriptname);
668 if (strcmp(req->q.manpath, req->p[0]))
669 printf("%s/", req->q.manpath);
670 printf("%s\r\n"
671 "Content-Type: text/html; charset=utf-8\r\n\r\n",
672 r[0].file);
673 return;
674 }
675
676 /*
677 * In man(1) mode, show one of the pages
678 * even if more than one is found.
679 */
680
681 iuse = 0;
682 if (req->q.equal || sz == 1) {
683 priouse = 20;
684 archpriouse = 3;
685 for (i = 0; i < sz; i++) {
686 sec = r[i].file;
687 sec += strcspn(sec, "123456789");
688 if (sec[0] == '\0')
689 continue;
690 prio = sec_prios[sec[0] - '1'];
691 if (sec[1] != '/')
692 prio += 10;
693 if (req->q.arch == NULL) {
694 archprio =
695 ((arch = strchr(sec + 1, '/'))
696 == NULL) ? 3 :
697 ((archend = strchr(arch + 1, '/'))
698 == NULL) ? 0 :
699 strncmp(arch, "amd64/",
700 archend - arch) ? 2 : 1;
701 if (archprio < archpriouse) {
702 archpriouse = archprio;
703 priouse = prio;
704 iuse = i;
705 continue;
706 }
707 if (archprio > archpriouse)
708 continue;
709 }
710 if (prio >= priouse)
711 continue;
712 priouse = prio;
713 iuse = i;
714 }
715 have_header = resp_begin_html(200, NULL, r[iuse].file);
716 } else
717 have_header = resp_begin_html(200, NULL, NULL);
718
719 if (have_header == 0)
720 puts("<header>");
721 resp_searchform(req,
722 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
723 puts("</header>");
724
725 if (sz > 1) {
726 puts("<nav>");
727 puts("<table class=\"results\">");
728 for (i = 0; i < sz; i++) {
729 printf(" <tr>\n"
730 " <td>"
731 "<a class=\"Xr\" href=\"/");
732 if (*scriptname != '\0')
733 printf("%s/", scriptname);
734 if (strcmp(req->q.manpath, req->p[0]))
735 printf("%s/", req->q.manpath);
736 printf("%s\">", r[i].file);
737 html_print(r[i].names);
738 printf("</a></td>\n"
739 " <td><span class=\"Nd\">");
740 html_print(r[i].output);
741 puts("</span></td>\n"
742 " </tr>");
743 }
744 puts("</table>");
745 puts("</nav>");
746 }
747
748 if (req->q.equal || sz == 1) {
749 puts("<hr>");
750 resp_show(req, r[iuse].file);
751 }
752
753 resp_end_html();
754 }
755
756 static void
resp_catman(const struct req * req,const char * file)757 resp_catman(const struct req *req, const char *file)
758 {
759 FILE *f;
760 char *p;
761 size_t sz;
762 ssize_t len;
763 int i;
764 int italic, bold;
765
766 if ((f = fopen(file, "r")) == NULL) {
767 puts("<p role=\"doc-notice\">\n"
768 " You specified an invalid manual file.\n"
769 "</p>");
770 return;
771 }
772
773 puts("<div class=\"catman\">\n"
774 "<pre>");
775
776 p = NULL;
777 sz = 0;
778
779 while ((len = getline(&p, &sz, f)) != -1) {
780 bold = italic = 0;
781 for (i = 0; i < len - 1; i++) {
782 /*
783 * This means that the catpage is out of state.
784 * Ignore it and keep going (although the
785 * catpage is bogus).
786 */
787
788 if ('\b' == p[i] || '\n' == p[i])
789 continue;
790
791 /*
792 * Print a regular character.
793 * Close out any bold/italic scopes.
794 * If we're in back-space mode, make sure we'll
795 * have something to enter when we backspace.
796 */
797
798 if ('\b' != p[i + 1]) {
799 if (italic)
800 printf("</i>");
801 if (bold)
802 printf("</b>");
803 italic = bold = 0;
804 html_putchar(p[i]);
805 continue;
806 } else if (i + 2 >= len)
807 continue;
808
809 /* Italic mode. */
810
811 if ('_' == p[i]) {
812 if (bold)
813 printf("</b>");
814 if ( ! italic)
815 printf("<i>");
816 bold = 0;
817 italic = 1;
818 i += 2;
819 html_putchar(p[i]);
820 continue;
821 }
822
823 /*
824 * Handle funny behaviour troff-isms.
825 * These grok'd from the original man2html.c.
826 */
827
828 if (('+' == p[i] && 'o' == p[i + 2]) ||
829 ('o' == p[i] && '+' == p[i + 2]) ||
830 ('|' == p[i] && '=' == p[i + 2]) ||
831 ('=' == p[i] && '|' == p[i + 2]) ||
832 ('*' == p[i] && '=' == p[i + 2]) ||
833 ('=' == p[i] && '*' == p[i + 2]) ||
834 ('*' == p[i] && '|' == p[i + 2]) ||
835 ('|' == p[i] && '*' == p[i + 2])) {
836 if (italic)
837 printf("</i>");
838 if (bold)
839 printf("</b>");
840 italic = bold = 0;
841 putchar('*');
842 i += 2;
843 continue;
844 } else if (('|' == p[i] && '-' == p[i + 2]) ||
845 ('-' == p[i] && '|' == p[i + 1]) ||
846 ('+' == p[i] && '-' == p[i + 1]) ||
847 ('-' == p[i] && '+' == p[i + 1]) ||
848 ('+' == p[i] && '|' == p[i + 1]) ||
849 ('|' == p[i] && '+' == p[i + 1])) {
850 if (italic)
851 printf("</i>");
852 if (bold)
853 printf("</b>");
854 italic = bold = 0;
855 putchar('+');
856 i += 2;
857 continue;
858 }
859
860 /* Bold mode. */
861
862 if (italic)
863 printf("</i>");
864 if ( ! bold)
865 printf("<b>");
866 bold = 1;
867 italic = 0;
868 i += 2;
869 html_putchar(p[i]);
870 }
871
872 /*
873 * Clean up the last character.
874 * We can get to a newline; don't print that.
875 */
876
877 if (italic)
878 printf("</i>");
879 if (bold)
880 printf("</b>");
881
882 if (i == len - 1 && p[i] != '\n')
883 html_putchar(p[i]);
884
885 putchar('\n');
886 }
887 free(p);
888
889 puts("</pre>\n"
890 "</div>");
891
892 fclose(f);
893 }
894
895 static void
resp_format(const struct req * req,const char * file)896 resp_format(const struct req *req, const char *file)
897 {
898 struct manoutput conf;
899 struct mparse *mp;
900 struct roff_meta *meta;
901 void *vp;
902 int fd;
903 int usepath;
904
905 if (-1 == (fd = open(file, O_RDONLY))) {
906 puts("<p role=\"doc-notice\">\n"
907 " You specified an invalid manual file.\n"
908 "</p>");
909 return;
910 }
911
912 mchars_alloc();
913 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
914 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
915 mparse_readfd(mp, fd, file);
916 close(fd);
917 meta = mparse_result(mp);
918
919 memset(&conf, 0, sizeof(conf));
920 conf.fragment = 1;
921 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
922 usepath = strcmp(req->q.manpath, req->p[0]);
923 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
924 scriptname, *scriptname == '\0' ? "" : "/",
925 usepath ? req->q.manpath : "", usepath ? "/" : "");
926
927 vp = html_alloc(&conf);
928 if (meta->macroset == MACROSET_MDOC)
929 html_mdoc(vp, meta);
930 else
931 html_man(vp, meta);
932
933 html_free(vp);
934 mparse_free(mp);
935 mchars_free();
936 free(conf.man);
937 free(conf.style);
938 }
939
940 static void
resp_show(const struct req * req,const char * file)941 resp_show(const struct req *req, const char *file)
942 {
943
944 if ('.' == file[0] && '/' == file[1])
945 file += 2;
946
947 if ('c' == *file)
948 resp_catman(req, file);
949 else
950 resp_format(req, file);
951 }
952
953 static void
pg_show(struct req * req,const char * fullpath)954 pg_show(struct req *req, const char *fullpath)
955 {
956 char *manpath;
957 const char *file;
958
959 if ((file = strchr(fullpath, '/')) == NULL) {
960 pg_error_badrequest(
961 "You did not specify a page to show.");
962 return;
963 }
964 manpath = mandoc_strndup(fullpath, file - fullpath);
965 file++;
966
967 if ( ! validate_manpath(req, manpath)) {
968 pg_error_badrequest(
969 "You specified an invalid manpath.");
970 free(manpath);
971 return;
972 }
973
974 /*
975 * Begin by chdir()ing into the manpath.
976 * This way we can pick up the database files, which are
977 * relative to the manpath root.
978 */
979
980 if (chdir(manpath) == -1) {
981 warn("chdir %s", manpath);
982 pg_error_internal();
983 free(manpath);
984 return;
985 }
986 free(manpath);
987
988 if ( ! validate_filename(file)) {
989 pg_error_badrequest(
990 "You specified an invalid manual file.");
991 return;
992 }
993
994 if (resp_begin_html(200, NULL, file) == 0)
995 puts("<header>");
996 resp_searchform(req, FOCUS_NONE);
997 puts("</header>");
998 resp_show(req, file);
999 resp_end_html();
1000 }
1001
1002 static void
pg_search(const struct req * req)1003 pg_search(const struct req *req)
1004 {
1005 struct mansearch search;
1006 struct manpaths paths;
1007 struct manpage *res;
1008 char **argv;
1009 char *query, *rp, *wp;
1010 size_t ressz;
1011 int argc;
1012
1013 /*
1014 * Begin by chdir()ing into the root of the manpath.
1015 * This way we can pick up the database files, which are
1016 * relative to the manpath root.
1017 */
1018
1019 if (chdir(req->q.manpath) == -1) {
1020 warn("chdir %s", req->q.manpath);
1021 pg_error_internal();
1022 return;
1023 }
1024
1025 search.arch = req->q.arch;
1026 search.sec = req->q.sec;
1027 search.outkey = "Nd";
1028 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1029 search.firstmatch = 1;
1030
1031 paths.sz = 1;
1032 paths.paths = mandoc_malloc(sizeof(char *));
1033 paths.paths[0] = mandoc_strdup(".");
1034
1035 /*
1036 * Break apart at spaces with backslash-escaping.
1037 */
1038
1039 argc = 0;
1040 argv = NULL;
1041 rp = query = mandoc_strdup(req->q.query);
1042 for (;;) {
1043 while (isspace((unsigned char)*rp))
1044 rp++;
1045 if (*rp == '\0')
1046 break;
1047 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1048 argv[argc++] = wp = rp;
1049 for (;;) {
1050 if (isspace((unsigned char)*rp)) {
1051 *wp = '\0';
1052 rp++;
1053 break;
1054 }
1055 if (rp[0] == '\\' && rp[1] != '\0')
1056 rp++;
1057 if (wp != rp)
1058 *wp = *rp;
1059 if (*rp == '\0')
1060 break;
1061 wp++;
1062 rp++;
1063 }
1064 }
1065
1066 res = NULL;
1067 ressz = 0;
1068 if (req->isquery && req->q.equal && argc == 1)
1069 pg_redirect(req, argv[0]);
1070 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1071 pg_noresult(req, 400, "Bad Request",
1072 "You entered an invalid query.");
1073 else if (ressz == 0)
1074 pg_noresult(req, 404, "Not Found", "No results found.");
1075 else
1076 pg_searchres(req, res, ressz);
1077
1078 free(query);
1079 mansearch_free(res, ressz);
1080 free(paths.paths[0]);
1081 free(paths.paths);
1082 }
1083
1084 int
main(void)1085 main(void)
1086 {
1087 struct req req;
1088 struct itimerval itimer;
1089 const char *path;
1090 const char *querystring;
1091 int i;
1092
1093 /*
1094 * The "rpath" pledge could be revoked after mparse_readfd()
1095 * if the file descriptor to "/footer.html" would be opened
1096 * up front, but it's probably not worth the complication
1097 * of the code it would cause: it would require scattering
1098 * pledge() calls in multiple low-level resp_*() functions.
1099 */
1100
1101 if (pledge("stdio rpath", NULL) == -1) {
1102 warn("pledge");
1103 pg_error_internal();
1104 return EXIT_FAILURE;
1105 }
1106
1107 /* Poor man's ReDoS mitigation. */
1108
1109 itimer.it_value.tv_sec = 2;
1110 itimer.it_value.tv_usec = 0;
1111 itimer.it_interval.tv_sec = 2;
1112 itimer.it_interval.tv_usec = 0;
1113 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1114 warn("setitimer");
1115 pg_error_internal();
1116 return EXIT_FAILURE;
1117 }
1118
1119 /*
1120 * First we change directory into the MAN_DIR so that
1121 * subsequent scanning for manpath directories is rooted
1122 * relative to the same position.
1123 */
1124
1125 if (chdir(MAN_DIR) == -1) {
1126 warn("MAN_DIR: %s", MAN_DIR);
1127 pg_error_internal();
1128 return EXIT_FAILURE;
1129 }
1130
1131 memset(&req, 0, sizeof(struct req));
1132 req.q.equal = 1;
1133 parse_manpath_conf(&req);
1134
1135 /* Parse the path info and the query string. */
1136
1137 if ((path = getenv("PATH_INFO")) == NULL)
1138 path = "";
1139 else if (*path == '/')
1140 path++;
1141
1142 if (*path != '\0') {
1143 parse_path_info(&req, path);
1144 if (req.q.manpath == NULL || req.q.sec == NULL ||
1145 *req.q.query == '\0' || access(path, F_OK) == -1)
1146 path = "";
1147 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1148 parse_query_string(&req, querystring);
1149
1150 /* Validate parsed data and add defaults. */
1151
1152 if (req.q.manpath == NULL)
1153 req.q.manpath = mandoc_strdup(req.p[0]);
1154 else if ( ! validate_manpath(&req, req.q.manpath)) {
1155 pg_error_badrequest(
1156 "You specified an invalid manpath.");
1157 return EXIT_FAILURE;
1158 }
1159
1160 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1161 pg_error_badrequest(
1162 "You specified an invalid architecture.");
1163 return EXIT_FAILURE;
1164 }
1165
1166 /* Dispatch to the three different pages. */
1167
1168 if ('\0' != *path)
1169 pg_show(&req, path);
1170 else if (NULL != req.q.query)
1171 pg_search(&req);
1172 else
1173 pg_index(&req);
1174
1175 free(req.q.manpath);
1176 free(req.q.arch);
1177 free(req.q.sec);
1178 free(req.q.query);
1179 for (i = 0; i < (int)req.psz; i++)
1180 free(req.p[i]);
1181 free(req.p);
1182 return EXIT_SUCCESS;
1183 }
1184
1185 /*
1186 * Translate PATH_INFO to a query.
1187 */
1188 static void
parse_path_info(struct req * req,const char * path)1189 parse_path_info(struct req *req, const char *path)
1190 {
1191 const char *name, *sec, *end;
1192
1193 req->isquery = 0;
1194 req->q.equal = 1;
1195 req->q.manpath = NULL;
1196 req->q.arch = NULL;
1197
1198 /* Mandatory manual page name. */
1199 if ((name = strrchr(path, '/')) == NULL)
1200 name = path;
1201 else
1202 name++;
1203
1204 /* Optional trailing section. */
1205 sec = strrchr(name, '.');
1206 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1207 req->q.query = mandoc_strndup(name, sec - name - 1);
1208 req->q.sec = mandoc_strdup(sec);
1209 } else {
1210 req->q.query = mandoc_strdup(name);
1211 req->q.sec = NULL;
1212 }
1213
1214 /* Handle the case of name[.section] only. */
1215 if (name == path)
1216 return;
1217
1218 /* Optional manpath. */
1219 end = strchr(path, '/');
1220 req->q.manpath = mandoc_strndup(path, end - path);
1221 if (validate_manpath(req, req->q.manpath)) {
1222 path = end + 1;
1223 if (name == path)
1224 return;
1225 } else {
1226 free(req->q.manpath);
1227 req->q.manpath = NULL;
1228 }
1229
1230 /* Optional section. */
1231 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1232 path += 3;
1233 end = strchr(path, '/');
1234 free(req->q.sec);
1235 req->q.sec = mandoc_strndup(path, end - path);
1236 path = end + 1;
1237 if (name == path)
1238 return;
1239 }
1240
1241 /* Optional architecture. */
1242 end = strchr(path, '/');
1243 if (end + 1 != name) {
1244 pg_error_badrequest(
1245 "You specified too many directory components.");
1246 exit(EXIT_FAILURE);
1247 }
1248 req->q.arch = mandoc_strndup(path, end - path);
1249 if (validate_arch(req->q.arch) == 0) {
1250 pg_error_badrequest(
1251 "You specified an invalid directory component.");
1252 exit(EXIT_FAILURE);
1253 }
1254 }
1255
1256 /*
1257 * Scan for indexable paths.
1258 */
1259 static void
parse_manpath_conf(struct req * req)1260 parse_manpath_conf(struct req *req)
1261 {
1262 FILE *fp;
1263 char *dp;
1264 size_t dpsz;
1265 ssize_t len;
1266
1267 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1268 warn("%s/manpath.conf", MAN_DIR);
1269 pg_error_internal();
1270 exit(EXIT_FAILURE);
1271 }
1272
1273 dp = NULL;
1274 dpsz = 0;
1275
1276 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1277 if (dp[len - 1] == '\n')
1278 dp[--len] = '\0';
1279 req->p = mandoc_realloc(req->p,
1280 (req->psz + 1) * sizeof(char *));
1281 if ( ! validate_urifrag(dp)) {
1282 warnx("%s/manpath.conf contains "
1283 "unsafe path \"%s\"", MAN_DIR, dp);
1284 pg_error_internal();
1285 exit(EXIT_FAILURE);
1286 }
1287 if (strchr(dp, '/') != NULL) {
1288 warnx("%s/manpath.conf contains "
1289 "path with slash \"%s\"", MAN_DIR, dp);
1290 pg_error_internal();
1291 exit(EXIT_FAILURE);
1292 }
1293 req->p[req->psz++] = dp;
1294 dp = NULL;
1295 dpsz = 0;
1296 }
1297 free(dp);
1298
1299 if (req->p == NULL) {
1300 warnx("%s/manpath.conf is empty", MAN_DIR);
1301 pg_error_internal();
1302 exit(EXIT_FAILURE);
1303 }
1304 }
1305