xref: /openbsd/usr.bin/mandoc/cgi.c (revision d415bd75)
1 /* $OpenBSD: cgi.c,v 1.120 2022/12/26 19:16:02 jmc Exp $ */
2 /*
3  * Copyright (c) 2014-2019, 2021, 2022 Ingo Schwarze <schwarze@usta.de>
4  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5  * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  * Implementation of the man.cgi(8) program.
20  */
21 #include <sys/types.h>
22 #include <sys/time.h>
23 
24 #include <ctype.h>
25 #include <err.h>
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <limits.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
34 
35 #include "mandoc_aux.h"
36 #include "mandoc.h"
37 #include "roff.h"
38 #include "mdoc.h"
39 #include "man.h"
40 #include "mandoc_parse.h"
41 #include "main.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "cgi.h"
45 
46 /*
47  * A query as passed to the search function.
48  */
49 struct	query {
50 	char		*manpath; /* desired manual directory */
51 	char		*arch; /* architecture */
52 	char		*sec; /* manual section */
53 	char		*query; /* unparsed query expression */
54 	int		 equal; /* match whole names, not substrings */
55 };
56 
57 struct	req {
58 	struct query	  q;
59 	char		**p; /* array of available manpaths */
60 	size_t		  psz; /* number of available manpaths */
61 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
62 };
63 
64 enum	focus {
65 	FOCUS_NONE = 0,
66 	FOCUS_QUERY
67 };
68 
69 static	void		 html_print(const char *);
70 static	void		 html_putchar(char);
71 static	int		 http_decode(char *);
72 static	void		 http_encode(const char *);
73 static	void		 parse_manpath_conf(struct req *);
74 static	void		 parse_path_info(struct req *, const char *);
75 static	void		 parse_query_string(struct req *, const char *);
76 static	void		 pg_error_badrequest(const char *);
77 static	void		 pg_error_internal(void);
78 static	void		 pg_index(const struct req *);
79 static	void		 pg_noresult(const struct req *, int, const char *,
80 				const char *);
81 static	void		 pg_redirect(const struct req *, const char *);
82 static	void		 pg_search(const struct req *);
83 static	void		 pg_searchres(const struct req *,
84 				struct manpage *, size_t);
85 static	void		 pg_show(struct req *, const char *);
86 static	int		 resp_begin_html(int, const char *, const char *);
87 static	void		 resp_begin_http(int, const char *);
88 static	void		 resp_catman(const struct req *, const char *);
89 static	int		 resp_copy(const char *, const char *);
90 static	void		 resp_end_html(void);
91 static	void		 resp_format(const struct req *, const char *);
92 static	void		 resp_searchform(const struct req *, enum focus);
93 static	void		 resp_show(const struct req *, const char *);
94 static	void		 set_query_attr(char **, char **);
95 static	int		 validate_arch(const char *);
96 static	int		 validate_filename(const char *);
97 static	int		 validate_manpath(const struct req *, const char *);
98 static	int		 validate_urifrag(const char *);
99 
100 static	const char	 *scriptname = SCRIPT_NAME;
101 
102 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
103 static	const char *const sec_numbers[] = {
104     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
105 };
106 static	const char *const sec_names[] = {
107     "All Sections",
108     "1 - General Commands",
109     "2 - System Calls",
110     "3 - Library Functions",
111     "3p - Perl Library",
112     "4 - Device Drivers",
113     "5 - File Formats",
114     "6 - Games",
115     "7 - Miscellaneous Information",
116     "8 - System Manager\'s Manual",
117     "9 - Kernel Developer\'s Manual"
118 };
119 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
120 
121 static	const char *const arch_names[] = {
122     "amd64",       "alpha",       "armv7",       "arm64",
123     "hppa",        "i386",        "landisk",     "loongson",
124     "luna88k",     "macppc",      "mips64",      "octeon",
125     "powerpc64",   "riscv64",     "sparc64",
126 
127     "amiga",       "arc",         "armish",      "arm32",
128     "atari",       "aviion",      "beagle",      "cats",
129     "hppa64",      "hp300",
130     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
131     "mvmeppc",     "palm",        "pc532",       "pegasos",
132     "pmax",        "powerpc",     "sgi",         "socppc",
133     "solbourne",   "sparc",
134     "sun3",        "vax",         "wgrisc",      "x68k",
135     "zaurus"
136 };
137 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
138 
139 /*
140  * Print a character, escaping HTML along the way.
141  * This will pass non-ASCII straight to output: be warned!
142  */
143 static void
144 html_putchar(char c)
145 {
146 
147 	switch (c) {
148 	case '"':
149 		printf("&quot;");
150 		break;
151 	case '&':
152 		printf("&amp;");
153 		break;
154 	case '>':
155 		printf("&gt;");
156 		break;
157 	case '<':
158 		printf("&lt;");
159 		break;
160 	default:
161 		putchar((unsigned char)c);
162 		break;
163 	}
164 }
165 
166 /*
167  * Call through to html_putchar().
168  * Accepts NULL strings.
169  */
170 static void
171 html_print(const char *p)
172 {
173 
174 	if (NULL == p)
175 		return;
176 	while ('\0' != *p)
177 		html_putchar(*p++);
178 }
179 
180 /*
181  * Transfer the responsibility for the allocated string *val
182  * to the query structure.
183  */
184 static void
185 set_query_attr(char **attr, char **val)
186 {
187 
188 	free(*attr);
189 	if (**val == '\0') {
190 		*attr = NULL;
191 		free(*val);
192 	} else
193 		*attr = *val;
194 	*val = NULL;
195 }
196 
197 /*
198  * Parse the QUERY_STRING for key-value pairs
199  * and store the values into the query structure.
200  */
201 static void
202 parse_query_string(struct req *req, const char *qs)
203 {
204 	char		*key, *val;
205 	size_t		 keysz, valsz;
206 
207 	req->isquery	= 1;
208 	req->q.manpath	= NULL;
209 	req->q.arch	= NULL;
210 	req->q.sec	= NULL;
211 	req->q.query	= NULL;
212 	req->q.equal	= 1;
213 
214 	key = val = NULL;
215 	while (*qs != '\0') {
216 
217 		/* Parse one key. */
218 
219 		keysz = strcspn(qs, "=;&");
220 		key = mandoc_strndup(qs, keysz);
221 		qs += keysz;
222 		if (*qs != '=')
223 			goto next;
224 
225 		/* Parse one value. */
226 
227 		valsz = strcspn(++qs, ";&");
228 		val = mandoc_strndup(qs, valsz);
229 		qs += valsz;
230 
231 		/* Decode and catch encoding errors. */
232 
233 		if ( ! (http_decode(key) && http_decode(val)))
234 			goto next;
235 
236 		/* Handle key-value pairs. */
237 
238 		if ( ! strcmp(key, "query"))
239 			set_query_attr(&req->q.query, &val);
240 
241 		else if ( ! strcmp(key, "apropos"))
242 			req->q.equal = !strcmp(val, "0");
243 
244 		else if ( ! strcmp(key, "manpath")) {
245 #ifdef COMPAT_OLDURI
246 			if ( ! strncmp(val, "OpenBSD ", 8)) {
247 				val[7] = '-';
248 				if ('C' == val[8])
249 					val[8] = 'c';
250 			}
251 #endif
252 			set_query_attr(&req->q.manpath, &val);
253 		}
254 
255 		else if ( ! (strcmp(key, "sec")
256 #ifdef COMPAT_OLDURI
257 		    && strcmp(key, "sektion")
258 #endif
259 		    )) {
260 			if ( ! strcmp(val, "0"))
261 				*val = '\0';
262 			set_query_attr(&req->q.sec, &val);
263 		}
264 
265 		else if ( ! strcmp(key, "arch")) {
266 			if ( ! strcmp(val, "default"))
267 				*val = '\0';
268 			set_query_attr(&req->q.arch, &val);
269 		}
270 
271 		/*
272 		 * The key must be freed in any case.
273 		 * The val may have been handed over to the query
274 		 * structure, in which case it is now NULL.
275 		 */
276 next:
277 		free(key);
278 		key = NULL;
279 		free(val);
280 		val = NULL;
281 
282 		if (*qs != '\0')
283 			qs++;
284 	}
285 }
286 
287 /*
288  * HTTP-decode a string.  The standard explanation is that this turns
289  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
290  * over the allocated string.
291  */
292 static int
293 http_decode(char *p)
294 {
295 	char             hex[3];
296 	char		*q;
297 	int              c;
298 
299 	hex[2] = '\0';
300 
301 	q = p;
302 	for ( ; '\0' != *p; p++, q++) {
303 		if ('%' == *p) {
304 			if ('\0' == (hex[0] = *(p + 1)))
305 				return 0;
306 			if ('\0' == (hex[1] = *(p + 2)))
307 				return 0;
308 			if (1 != sscanf(hex, "%x", &c))
309 				return 0;
310 			if ('\0' == c)
311 				return 0;
312 
313 			*q = (char)c;
314 			p += 2;
315 		} else
316 			*q = '+' == *p ? ' ' : *p;
317 	}
318 
319 	*q = '\0';
320 	return 1;
321 }
322 
323 static void
324 http_encode(const char *p)
325 {
326 	for (; *p != '\0'; p++) {
327 		if (isalnum((unsigned char)*p) == 0 &&
328 		    strchr("-._~", *p) == NULL)
329 			printf("%%%2.2X", (unsigned char)*p);
330 		else
331 			putchar(*p);
332 	}
333 }
334 
335 static void
336 resp_begin_http(int code, const char *msg)
337 {
338 
339 	if (200 != code)
340 		printf("Status: %d %s\r\n", code, msg);
341 
342 	printf("Content-Type: text/html; charset=utf-8\r\n"
343 	     "Cache-Control: no-cache\r\n"
344 	     "Content-Security-Policy: default-src 'none'; "
345 	     "style-src 'self' 'unsafe-inline'\r\n"
346 	     "Pragma: no-cache\r\n"
347 	     "\r\n");
348 
349 	fflush(stdout);
350 }
351 
352 static int
353 resp_copy(const char *element, const char *filename)
354 {
355 	char	 buf[4096];
356 	ssize_t	 sz;
357 	int	 fd;
358 
359 	if ((fd = open(filename, O_RDONLY)) == -1)
360 		return 0;
361 
362 	if (element != NULL)
363 		printf("<%s>\n", element);
364 	fflush(stdout);
365 	while ((sz = read(fd, buf, sizeof(buf))) > 0)
366 		write(STDOUT_FILENO, buf, sz);
367 	close(fd);
368 	return 1;
369 }
370 
371 static int
372 resp_begin_html(int code, const char *msg, const char *file)
373 {
374 	const char	*name, *sec, *cp;
375 	int		 namesz, secsz;
376 
377 	resp_begin_http(code, msg);
378 
379 	printf("<!DOCTYPE html>\n"
380 	       "<html>\n"
381 	       "<head>\n"
382 	       "  <meta charset=\"UTF-8\"/>\n"
383 	       "  <meta name=\"viewport\""
384 		      " content=\"width=device-width, initial-scale=1.0\">\n"
385 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
386 	       " type=\"text/css\" media=\"all\">\n"
387 	       "  <title>",
388 	       CSS_DIR);
389 	if (file != NULL) {
390 		cp = strrchr(file, '/');
391 		name = cp == NULL ? file : cp + 1;
392 		cp = strrchr(name, '.');
393 		namesz = cp == NULL ? strlen(name) : cp - name;
394 		sec = NULL;
395 		if (cp != NULL && cp[1] != '0') {
396 			sec = cp + 1;
397 			secsz = strlen(sec);
398 		} else if (name - file > 1) {
399 			for (cp = name - 2; cp >= file; cp--) {
400 				if (*cp < '1' || *cp > '9')
401 					continue;
402 				sec = cp;
403 				secsz = name - cp - 1;
404 				break;
405 			}
406 		}
407 		printf("%.*s", namesz, name);
408 		if (sec != NULL)
409 			printf("(%.*s)", secsz, sec);
410 		fputs(" - ", stdout);
411 	}
412 	printf("%s</title>\n"
413 	       "</head>\n"
414 	       "<body>\n",
415 	       CUSTOMIZE_TITLE);
416 
417 	return resp_copy("header", MAN_DIR "/header.html");
418 }
419 
420 static void
421 resp_end_html(void)
422 {
423 	if (resp_copy("footer", MAN_DIR "/footer.html"))
424 		puts("</footer>");
425 
426 	puts("</body>\n"
427 	     "</html>");
428 }
429 
430 static void
431 resp_searchform(const struct req *req, enum focus focus)
432 {
433 	int		 i;
434 
435 	printf("<form role=\"search\" action=\"/%s\" method=\"get\" "
436 	       "autocomplete=\"off\" autocapitalize=\"none\">\n"
437 	       "  <fieldset>\n"
438 	       "    <legend>Manual Page Search Parameters</legend>\n",
439 	       scriptname);
440 
441 	/* Write query input box. */
442 
443 	printf("    <label>Search query:\n"
444 	       "      <input type=\"search\" name=\"query\" value=\"");
445 	if (req->q.query != NULL)
446 		html_print(req->q.query);
447 	printf("\" size=\"40\"");
448 	if (focus == FOCUS_QUERY)
449 		printf(" autofocus");
450 	puts(">\n    </label>");
451 
452 	/* Write submission buttons. */
453 
454 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
455 		"man</button>\n"
456 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
457 		"apropos</button>\n"
458 		"    <br/>\n");
459 
460 	/* Write section selector. */
461 
462 	puts("    <select name=\"sec\" aria-label=\"Manual section\">");
463 	for (i = 0; i < sec_MAX; i++) {
464 		printf("      <option value=\"%s\"", sec_numbers[i]);
465 		if (NULL != req->q.sec &&
466 		    0 == strcmp(sec_numbers[i], req->q.sec))
467 			printf(" selected=\"selected\"");
468 		printf(">%s</option>\n", sec_names[i]);
469 	}
470 	puts("    </select>");
471 
472 	/* Write architecture selector. */
473 
474 	printf(	"    <select name=\"arch\" aria-label=\"CPU architecture\">\n"
475 		"      <option value=\"default\"");
476 	if (NULL == req->q.arch)
477 		printf(" selected=\"selected\"");
478 	puts(">All Architectures</option>");
479 	for (i = 0; i < arch_MAX; i++) {
480 		printf("      <option");
481 		if (NULL != req->q.arch &&
482 		    0 == strcmp(arch_names[i], req->q.arch))
483 			printf(" selected=\"selected\"");
484 		printf(">%s</option>\n", arch_names[i]);
485 	}
486 	puts("    </select>");
487 
488 	/* Write manpath selector. */
489 
490 	if (req->psz > 1) {
491 		puts("    <select name=\"manpath\""
492 		     " aria-label=\"Manual path\">");
493 		for (i = 0; i < (int)req->psz; i++) {
494 			printf("      <option");
495 			if (strcmp(req->q.manpath, req->p[i]) == 0)
496 				printf(" selected=\"selected\"");
497 			printf(">");
498 			html_print(req->p[i]);
499 			puts("</option>");
500 		}
501 		puts("    </select>");
502 	}
503 
504 	puts("  </fieldset>\n"
505 	     "</form>");
506 }
507 
508 static int
509 validate_urifrag(const char *frag)
510 {
511 
512 	while ('\0' != *frag) {
513 		if ( ! (isalnum((unsigned char)*frag) ||
514 		    '-' == *frag || '.' == *frag ||
515 		    '/' == *frag || '_' == *frag))
516 			return 0;
517 		frag++;
518 	}
519 	return 1;
520 }
521 
522 static int
523 validate_manpath(const struct req *req, const char* manpath)
524 {
525 	size_t	 i;
526 
527 	for (i = 0; i < req->psz; i++)
528 		if ( ! strcmp(manpath, req->p[i]))
529 			return 1;
530 
531 	return 0;
532 }
533 
534 static int
535 validate_arch(const char *arch)
536 {
537 	int	 i;
538 
539 	for (i = 0; i < arch_MAX; i++)
540 		if (strcmp(arch, arch_names[i]) == 0)
541 			return 1;
542 
543 	return 0;
544 }
545 
546 static int
547 validate_filename(const char *file)
548 {
549 
550 	if ('.' == file[0] && '/' == file[1])
551 		file += 2;
552 
553 	return ! (strstr(file, "../") || strstr(file, "/..") ||
554 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
555 }
556 
557 static void
558 pg_index(const struct req *req)
559 {
560 	if (resp_begin_html(200, NULL, NULL) == 0)
561 		puts("<header>");
562 	resp_searchform(req, FOCUS_QUERY);
563 	printf("</header>\n"
564 	       "<main>\n"
565 	       "<p role=\"doc-notice\" aria-label=\"Usage\">\n"
566 	       "This web interface is documented in the\n"
567 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\""
568 	       " aria-label=\"man dot CGI, section 8\">man.cgi(8)</a>\n"
569 	       "manual, and the\n"
570 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\""
571 	       " aria-label=\"apropos, section 1\">apropos(1)</a>\n"
572 	       "manual explains the query syntax.\n"
573 	       "</p>\n"
574 	       "</main>\n",
575 	       scriptname, *scriptname == '\0' ? "" : "/",
576 	       scriptname, *scriptname == '\0' ? "" : "/");
577 	resp_end_html();
578 }
579 
580 static void
581 pg_noresult(const struct req *req, int code, const char *http_msg,
582     const char *user_msg)
583 {
584 	if (resp_begin_html(code, http_msg, NULL) == 0)
585 		puts("<header>");
586 	resp_searchform(req, FOCUS_QUERY);
587 	puts("</header>");
588 	puts("<main>");
589 	puts("<p role=\"doc-notice\" aria-label=\"No result\">");
590 	puts(user_msg);
591 	puts("</p>");
592 	puts("</main>");
593 	resp_end_html();
594 }
595 
596 static void
597 pg_error_badrequest(const char *msg)
598 {
599 	if (resp_begin_html(400, "Bad Request", NULL))
600 		puts("</header>");
601 	puts("<main>\n"
602 	     "<h1>Bad Request</h1>\n"
603 	     "<p role=\"doc-notice\" aria-label=\"Bad Request\">");
604 	puts(msg);
605 	printf("Try again from the\n"
606 	       "<a href=\"/%s\">main page</a>.\n"
607 	       "</p>\n"
608 	       "</main>\n", scriptname);
609 	resp_end_html();
610 }
611 
612 static void
613 pg_error_internal(void)
614 {
615 	if (resp_begin_html(500, "Internal Server Error", NULL))
616 		puts("</header>");
617 	puts("<main><p role=\"doc-notice\">Internal Server Error</p></main>");
618 	resp_end_html();
619 }
620 
621 static void
622 pg_redirect(const struct req *req, const char *name)
623 {
624 	printf("Status: 303 See Other\r\n"
625 	    "Location: /");
626 	if (*scriptname != '\0')
627 		printf("%s/", scriptname);
628 	if (strcmp(req->q.manpath, req->p[0]))
629 		printf("%s/", req->q.manpath);
630 	if (req->q.arch != NULL)
631 		printf("%s/", req->q.arch);
632 	http_encode(name);
633 	if (req->q.sec != NULL) {
634 		putchar('.');
635 		http_encode(req->q.sec);
636 	}
637 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
638 }
639 
640 static void
641 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
642 {
643 	char		*arch, *archend;
644 	const char	*sec;
645 	size_t		 i, iuse;
646 	int		 archprio, archpriouse;
647 	int		 prio, priouse;
648 	int		 have_header;
649 
650 	for (i = 0; i < sz; i++) {
651 		if (validate_filename(r[i].file))
652 			continue;
653 		warnx("invalid filename %s in %s database",
654 		    r[i].file, req->q.manpath);
655 		pg_error_internal();
656 		return;
657 	}
658 
659 	if (req->isquery && sz == 1) {
660 		/*
661 		 * If we have just one result, then jump there now
662 		 * without any delay.
663 		 */
664 		printf("Status: 303 See Other\r\n"
665 		    "Location: /");
666 		if (*scriptname != '\0')
667 			printf("%s/", scriptname);
668 		if (strcmp(req->q.manpath, req->p[0]))
669 			printf("%s/", req->q.manpath);
670 		printf("%s\r\n"
671 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
672 		    r[0].file);
673 		return;
674 	}
675 
676 	/*
677 	 * In man(1) mode, show one of the pages
678 	 * even if more than one is found.
679 	 */
680 
681 	iuse = 0;
682 	if (req->q.equal || sz == 1) {
683 		priouse = 20;
684 		archpriouse = 3;
685 		for (i = 0; i < sz; i++) {
686 			sec = r[i].file;
687 			sec += strcspn(sec, "123456789");
688 			if (sec[0] == '\0')
689 				continue;
690 			prio = sec_prios[sec[0] - '1'];
691 			if (sec[1] != '/')
692 				prio += 10;
693 			if (req->q.arch == NULL) {
694 				archprio =
695 				    ((arch = strchr(sec + 1, '/'))
696 					== NULL) ? 3 :
697 				    ((archend = strchr(arch + 1, '/'))
698 					== NULL) ? 0 :
699 				    strncmp(arch, "amd64/",
700 					archend - arch) ? 2 : 1;
701 				if (archprio < archpriouse) {
702 					archpriouse = archprio;
703 					priouse = prio;
704 					iuse = i;
705 					continue;
706 				}
707 				if (archprio > archpriouse)
708 					continue;
709 			}
710 			if (prio >= priouse)
711 				continue;
712 			priouse = prio;
713 			iuse = i;
714 		}
715 		have_header = resp_begin_html(200, NULL, r[iuse].file);
716 	} else
717 		have_header = resp_begin_html(200, NULL, NULL);
718 
719 	if (have_header == 0)
720 		puts("<header>");
721 	resp_searchform(req,
722 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
723 	puts("</header>");
724 
725 	if (sz > 1) {
726 		puts("<nav>");
727 		puts("<table class=\"results\">");
728 		for (i = 0; i < sz; i++) {
729 			printf("  <tr>\n"
730 			       "    <td>"
731 			       "<a class=\"Xr\" href=\"/");
732 			if (*scriptname != '\0')
733 				printf("%s/", scriptname);
734 			if (strcmp(req->q.manpath, req->p[0]))
735 				printf("%s/", req->q.manpath);
736 			printf("%s\">", r[i].file);
737 			html_print(r[i].names);
738 			printf("</a></td>\n"
739 			       "    <td><span class=\"Nd\">");
740 			html_print(r[i].output);
741 			puts("</span></td>\n"
742 			     "  </tr>");
743 		}
744 		puts("</table>");
745 		puts("</nav>");
746 	}
747 
748 	if (req->q.equal || sz == 1) {
749 		puts("<hr>");
750 		resp_show(req, r[iuse].file);
751 	}
752 
753 	resp_end_html();
754 }
755 
756 static void
757 resp_catman(const struct req *req, const char *file)
758 {
759 	FILE		*f;
760 	char		*p;
761 	size_t		 sz;
762 	ssize_t		 len;
763 	int		 i;
764 	int		 italic, bold;
765 
766 	if ((f = fopen(file, "r")) == NULL) {
767 		puts("<p role=\"doc-notice\">\n"
768 		     "  You specified an invalid manual file.\n"
769 		     "</p>");
770 		return;
771 	}
772 
773 	puts("<div class=\"catman\">\n"
774 	     "<pre>");
775 
776 	p = NULL;
777 	sz = 0;
778 
779 	while ((len = getline(&p, &sz, f)) != -1) {
780 		bold = italic = 0;
781 		for (i = 0; i < len - 1; i++) {
782 			/*
783 			 * This means that the catpage is out of state.
784 			 * Ignore it and keep going (although the
785 			 * catpage is bogus).
786 			 */
787 
788 			if ('\b' == p[i] || '\n' == p[i])
789 				continue;
790 
791 			/*
792 			 * Print a regular character.
793 			 * Close out any bold/italic scopes.
794 			 * If we're in back-space mode, make sure we'll
795 			 * have something to enter when we backspace.
796 			 */
797 
798 			if ('\b' != p[i + 1]) {
799 				if (italic)
800 					printf("</i>");
801 				if (bold)
802 					printf("</b>");
803 				italic = bold = 0;
804 				html_putchar(p[i]);
805 				continue;
806 			} else if (i + 2 >= len)
807 				continue;
808 
809 			/* Italic mode. */
810 
811 			if ('_' == p[i]) {
812 				if (bold)
813 					printf("</b>");
814 				if ( ! italic)
815 					printf("<i>");
816 				bold = 0;
817 				italic = 1;
818 				i += 2;
819 				html_putchar(p[i]);
820 				continue;
821 			}
822 
823 			/*
824 			 * Handle funny behaviour troff-isms.
825 			 * These grok'd from the original man2html.c.
826 			 */
827 
828 			if (('+' == p[i] && 'o' == p[i + 2]) ||
829 					('o' == p[i] && '+' == p[i + 2]) ||
830 					('|' == p[i] && '=' == p[i + 2]) ||
831 					('=' == p[i] && '|' == p[i + 2]) ||
832 					('*' == p[i] && '=' == p[i + 2]) ||
833 					('=' == p[i] && '*' == p[i + 2]) ||
834 					('*' == p[i] && '|' == p[i + 2]) ||
835 					('|' == p[i] && '*' == p[i + 2]))  {
836 				if (italic)
837 					printf("</i>");
838 				if (bold)
839 					printf("</b>");
840 				italic = bold = 0;
841 				putchar('*');
842 				i += 2;
843 				continue;
844 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
845 					('-' == p[i] && '|' == p[i + 1]) ||
846 					('+' == p[i] && '-' == p[i + 1]) ||
847 					('-' == p[i] && '+' == p[i + 1]) ||
848 					('+' == p[i] && '|' == p[i + 1]) ||
849 					('|' == p[i] && '+' == p[i + 1]))  {
850 				if (italic)
851 					printf("</i>");
852 				if (bold)
853 					printf("</b>");
854 				italic = bold = 0;
855 				putchar('+');
856 				i += 2;
857 				continue;
858 			}
859 
860 			/* Bold mode. */
861 
862 			if (italic)
863 				printf("</i>");
864 			if ( ! bold)
865 				printf("<b>");
866 			bold = 1;
867 			italic = 0;
868 			i += 2;
869 			html_putchar(p[i]);
870 		}
871 
872 		/*
873 		 * Clean up the last character.
874 		 * We can get to a newline; don't print that.
875 		 */
876 
877 		if (italic)
878 			printf("</i>");
879 		if (bold)
880 			printf("</b>");
881 
882 		if (i == len - 1 && p[i] != '\n')
883 			html_putchar(p[i]);
884 
885 		putchar('\n');
886 	}
887 	free(p);
888 
889 	puts("</pre>\n"
890 	     "</div>");
891 
892 	fclose(f);
893 }
894 
895 static void
896 resp_format(const struct req *req, const char *file)
897 {
898 	struct manoutput conf;
899 	struct mparse	*mp;
900 	struct roff_meta *meta;
901 	void		*vp;
902 	int		 fd;
903 	int		 usepath;
904 
905 	if (-1 == (fd = open(file, O_RDONLY))) {
906 		puts("<p role=\"doc-notice\">\n"
907 		     "  You specified an invalid manual file.\n"
908 		     "</p>");
909 		return;
910 	}
911 
912 	mchars_alloc();
913 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
914 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
915 	mparse_readfd(mp, fd, file);
916 	close(fd);
917 	meta = mparse_result(mp);
918 
919 	memset(&conf, 0, sizeof(conf));
920 	conf.fragment = 1;
921 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
922 	usepath = strcmp(req->q.manpath, req->p[0]);
923 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
924 	    scriptname, *scriptname == '\0' ? "" : "/",
925 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
926 
927 	vp = html_alloc(&conf);
928 	if (meta->macroset == MACROSET_MDOC)
929 		html_mdoc(vp, meta);
930 	else
931 		html_man(vp, meta);
932 
933 	html_free(vp);
934 	mparse_free(mp);
935 	mchars_free();
936 	free(conf.man);
937 	free(conf.style);
938 }
939 
940 static void
941 resp_show(const struct req *req, const char *file)
942 {
943 
944 	if ('.' == file[0] && '/' == file[1])
945 		file += 2;
946 
947 	if ('c' == *file)
948 		resp_catman(req, file);
949 	else
950 		resp_format(req, file);
951 }
952 
953 static void
954 pg_show(struct req *req, const char *fullpath)
955 {
956 	char		*manpath;
957 	const char	*file;
958 
959 	if ((file = strchr(fullpath, '/')) == NULL) {
960 		pg_error_badrequest(
961 		    "You did not specify a page to show.");
962 		return;
963 	}
964 	manpath = mandoc_strndup(fullpath, file - fullpath);
965 	file++;
966 
967 	if ( ! validate_manpath(req, manpath)) {
968 		pg_error_badrequest(
969 		    "You specified an invalid manpath.");
970 		free(manpath);
971 		return;
972 	}
973 
974 	/*
975 	 * Begin by chdir()ing into the manpath.
976 	 * This way we can pick up the database files, which are
977 	 * relative to the manpath root.
978 	 */
979 
980 	if (chdir(manpath) == -1) {
981 		warn("chdir %s", manpath);
982 		pg_error_internal();
983 		free(manpath);
984 		return;
985 	}
986 	free(manpath);
987 
988 	if ( ! validate_filename(file)) {
989 		pg_error_badrequest(
990 		    "You specified an invalid manual file.");
991 		return;
992 	}
993 
994 	if (resp_begin_html(200, NULL, file) == 0)
995 		puts("<header>");
996 	resp_searchform(req, FOCUS_NONE);
997 	puts("</header>");
998 	resp_show(req, file);
999 	resp_end_html();
1000 }
1001 
1002 static void
1003 pg_search(const struct req *req)
1004 {
1005 	struct mansearch	  search;
1006 	struct manpaths		  paths;
1007 	struct manpage		 *res;
1008 	char			**argv;
1009 	char			 *query, *rp, *wp;
1010 	size_t			  ressz;
1011 	int			  argc;
1012 
1013 	/*
1014 	 * Begin by chdir()ing into the root of the manpath.
1015 	 * This way we can pick up the database files, which are
1016 	 * relative to the manpath root.
1017 	 */
1018 
1019 	if (chdir(req->q.manpath) == -1) {
1020 		warn("chdir %s", req->q.manpath);
1021 		pg_error_internal();
1022 		return;
1023 	}
1024 
1025 	search.arch = req->q.arch;
1026 	search.sec = req->q.sec;
1027 	search.outkey = "Nd";
1028 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1029 	search.firstmatch = 1;
1030 
1031 	paths.sz = 1;
1032 	paths.paths = mandoc_malloc(sizeof(char *));
1033 	paths.paths[0] = mandoc_strdup(".");
1034 
1035 	/*
1036 	 * Break apart at spaces with backslash-escaping.
1037 	 */
1038 
1039 	argc = 0;
1040 	argv = NULL;
1041 	rp = query = mandoc_strdup(req->q.query);
1042 	for (;;) {
1043 		while (isspace((unsigned char)*rp))
1044 			rp++;
1045 		if (*rp == '\0')
1046 			break;
1047 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1048 		argv[argc++] = wp = rp;
1049 		for (;;) {
1050 			if (isspace((unsigned char)*rp)) {
1051 				*wp = '\0';
1052 				rp++;
1053 				break;
1054 			}
1055 			if (rp[0] == '\\' && rp[1] != '\0')
1056 				rp++;
1057 			if (wp != rp)
1058 				*wp = *rp;
1059 			if (*rp == '\0')
1060 				break;
1061 			wp++;
1062 			rp++;
1063 		}
1064 	}
1065 
1066 	res = NULL;
1067 	ressz = 0;
1068 	if (req->isquery && req->q.equal && argc == 1)
1069 		pg_redirect(req, argv[0]);
1070 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1071 		pg_noresult(req, 400, "Bad Request",
1072 		    "You entered an invalid query.");
1073 	else if (ressz == 0)
1074 		pg_noresult(req, 404, "Not Found", "No results found.");
1075 	else
1076 		pg_searchres(req, res, ressz);
1077 
1078 	free(query);
1079 	mansearch_free(res, ressz);
1080 	free(paths.paths[0]);
1081 	free(paths.paths);
1082 }
1083 
1084 int
1085 main(void)
1086 {
1087 	struct req	 req;
1088 	struct itimerval itimer;
1089 	const char	*path;
1090 	const char	*querystring;
1091 	int		 i;
1092 
1093 	/*
1094 	 * The "rpath" pledge could be revoked after mparse_readfd()
1095 	 * if the file descriptor to "/footer.html" would be opened
1096 	 * up front, but it's probably not worth the complication
1097 	 * of the code it would cause: it would require scattering
1098 	 * pledge() calls in multiple low-level resp_*() functions.
1099 	 */
1100 
1101 	if (pledge("stdio rpath", NULL) == -1) {
1102 		warn("pledge");
1103 		pg_error_internal();
1104 		return EXIT_FAILURE;
1105 	}
1106 
1107 	/* Poor man's ReDoS mitigation. */
1108 
1109 	itimer.it_value.tv_sec = 2;
1110 	itimer.it_value.tv_usec = 0;
1111 	itimer.it_interval.tv_sec = 2;
1112 	itimer.it_interval.tv_usec = 0;
1113 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1114 		warn("setitimer");
1115 		pg_error_internal();
1116 		return EXIT_FAILURE;
1117 	}
1118 
1119 	/*
1120 	 * First we change directory into the MAN_DIR so that
1121 	 * subsequent scanning for manpath directories is rooted
1122 	 * relative to the same position.
1123 	 */
1124 
1125 	if (chdir(MAN_DIR) == -1) {
1126 		warn("MAN_DIR: %s", MAN_DIR);
1127 		pg_error_internal();
1128 		return EXIT_FAILURE;
1129 	}
1130 
1131 	memset(&req, 0, sizeof(struct req));
1132 	req.q.equal = 1;
1133 	parse_manpath_conf(&req);
1134 
1135 	/* Parse the path info and the query string. */
1136 
1137 	if ((path = getenv("PATH_INFO")) == NULL)
1138 		path = "";
1139 	else if (*path == '/')
1140 		path++;
1141 
1142 	if (*path != '\0') {
1143 		parse_path_info(&req, path);
1144 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1145 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1146 			path = "";
1147 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1148 		parse_query_string(&req, querystring);
1149 
1150 	/* Validate parsed data and add defaults. */
1151 
1152 	if (req.q.manpath == NULL)
1153 		req.q.manpath = mandoc_strdup(req.p[0]);
1154 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1155 		pg_error_badrequest(
1156 		    "You specified an invalid manpath.");
1157 		return EXIT_FAILURE;
1158 	}
1159 
1160 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1161 		pg_error_badrequest(
1162 		    "You specified an invalid architecture.");
1163 		return EXIT_FAILURE;
1164 	}
1165 
1166 	/* Dispatch to the three different pages. */
1167 
1168 	if ('\0' != *path)
1169 		pg_show(&req, path);
1170 	else if (NULL != req.q.query)
1171 		pg_search(&req);
1172 	else
1173 		pg_index(&req);
1174 
1175 	free(req.q.manpath);
1176 	free(req.q.arch);
1177 	free(req.q.sec);
1178 	free(req.q.query);
1179 	for (i = 0; i < (int)req.psz; i++)
1180 		free(req.p[i]);
1181 	free(req.p);
1182 	return EXIT_SUCCESS;
1183 }
1184 
1185 /*
1186  * Translate PATH_INFO to a query.
1187  */
1188 static void
1189 parse_path_info(struct req *req, const char *path)
1190 {
1191 	const char	*name, *sec, *end;
1192 
1193 	req->isquery = 0;
1194 	req->q.equal = 1;
1195 	req->q.manpath = NULL;
1196 	req->q.arch = NULL;
1197 
1198 	/* Mandatory manual page name. */
1199 	if ((name = strrchr(path, '/')) == NULL)
1200 		name = path;
1201 	else
1202 		name++;
1203 
1204 	/* Optional trailing section. */
1205 	sec = strrchr(name, '.');
1206 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1207 		req->q.query = mandoc_strndup(name, sec - name - 1);
1208 		req->q.sec = mandoc_strdup(sec);
1209 	} else {
1210 		req->q.query = mandoc_strdup(name);
1211 		req->q.sec = NULL;
1212 	}
1213 
1214 	/* Handle the case of name[.section] only. */
1215 	if (name == path)
1216 		return;
1217 
1218 	/* Optional manpath. */
1219 	end = strchr(path, '/');
1220 	req->q.manpath = mandoc_strndup(path, end - path);
1221 	if (validate_manpath(req, req->q.manpath)) {
1222 		path = end + 1;
1223 		if (name == path)
1224 			return;
1225 	} else {
1226 		free(req->q.manpath);
1227 		req->q.manpath = NULL;
1228 	}
1229 
1230 	/* Optional section. */
1231 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1232 		path += 3;
1233 		end = strchr(path, '/');
1234 		free(req->q.sec);
1235 		req->q.sec = mandoc_strndup(path, end - path);
1236 		path = end + 1;
1237 		if (name == path)
1238 			return;
1239 	}
1240 
1241 	/* Optional architecture. */
1242 	end = strchr(path, '/');
1243 	if (end + 1 != name) {
1244 		pg_error_badrequest(
1245 		    "You specified too many directory components.");
1246 		exit(EXIT_FAILURE);
1247 	}
1248 	req->q.arch = mandoc_strndup(path, end - path);
1249 	if (validate_arch(req->q.arch) == 0) {
1250 		pg_error_badrequest(
1251 		    "You specified an invalid directory component.");
1252 		exit(EXIT_FAILURE);
1253 	}
1254 }
1255 
1256 /*
1257  * Scan for indexable paths.
1258  */
1259 static void
1260 parse_manpath_conf(struct req *req)
1261 {
1262 	FILE	*fp;
1263 	char	*dp;
1264 	size_t	 dpsz;
1265 	ssize_t	 len;
1266 
1267 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1268 		warn("%s/manpath.conf", MAN_DIR);
1269 		pg_error_internal();
1270 		exit(EXIT_FAILURE);
1271 	}
1272 
1273 	dp = NULL;
1274 	dpsz = 0;
1275 
1276 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1277 		if (dp[len - 1] == '\n')
1278 			dp[--len] = '\0';
1279 		req->p = mandoc_realloc(req->p,
1280 		    (req->psz + 1) * sizeof(char *));
1281 		if ( ! validate_urifrag(dp)) {
1282 			warnx("%s/manpath.conf contains "
1283 			    "unsafe path \"%s\"", MAN_DIR, dp);
1284 			pg_error_internal();
1285 			exit(EXIT_FAILURE);
1286 		}
1287 		if (strchr(dp, '/') != NULL) {
1288 			warnx("%s/manpath.conf contains "
1289 			    "path with slash \"%s\"", MAN_DIR, dp);
1290 			pg_error_internal();
1291 			exit(EXIT_FAILURE);
1292 		}
1293 		req->p[req->psz++] = dp;
1294 		dp = NULL;
1295 		dpsz = 0;
1296 	}
1297 	free(dp);
1298 
1299 	if (req->p == NULL) {
1300 		warnx("%s/manpath.conf is empty", MAN_DIR);
1301 		pg_error_internal();
1302 		exit(EXIT_FAILURE);
1303 	}
1304 }
1305