xref: /openbsd/usr.bin/mandoc/cgi.c (revision 274d7c50)
1 /*	$OpenBSD: cgi.c,v 1.107 2019/11/10 22:18:01 bentley Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014-2019 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "mandoc_parse.h"
38 #include "main.h"
39 #include "manconf.h"
40 #include "mansearch.h"
41 #include "cgi.h"
42 
43 /*
44  * A query as passed to the search function.
45  */
46 struct	query {
47 	char		*manpath; /* desired manual directory */
48 	char		*arch; /* architecture */
49 	char		*sec; /* manual section */
50 	char		*query; /* unparsed query expression */
51 	int		 equal; /* match whole names, not substrings */
52 };
53 
54 struct	req {
55 	struct query	  q;
56 	char		**p; /* array of available manpaths */
57 	size_t		  psz; /* number of available manpaths */
58 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
59 };
60 
61 enum	focus {
62 	FOCUS_NONE = 0,
63 	FOCUS_QUERY
64 };
65 
66 static	void		 html_print(const char *);
67 static	void		 html_putchar(char);
68 static	int		 http_decode(char *);
69 static	void		 http_encode(const char *p);
70 static	void		 parse_manpath_conf(struct req *);
71 static	void		 parse_path_info(struct req *req, const char *path);
72 static	void		 parse_query_string(struct req *, const char *);
73 static	void		 pg_error_badrequest(const char *);
74 static	void		 pg_error_internal(void);
75 static	void		 pg_index(const struct req *);
76 static	void		 pg_noresult(const struct req *, int, const char *,
77 				const char *);
78 static	void		 pg_redirect(const struct req *, const char *);
79 static	void		 pg_search(const struct req *);
80 static	void		 pg_searchres(const struct req *,
81 				struct manpage *, size_t);
82 static	void		 pg_show(struct req *, const char *);
83 static	void		 resp_begin_html(int, const char *, const char *);
84 static	void		 resp_begin_http(int, const char *);
85 static	void		 resp_catman(const struct req *, const char *);
86 static	void		 resp_copy(const char *);
87 static	void		 resp_end_html(void);
88 static	void		 resp_format(const struct req *, const char *);
89 static	void		 resp_searchform(const struct req *, enum focus);
90 static	void		 resp_show(const struct req *, const char *);
91 static	void		 set_query_attr(char **, char **);
92 static	int		 validate_arch(const char *);
93 static	int		 validate_filename(const char *);
94 static	int		 validate_manpath(const struct req *, const char *);
95 static	int		 validate_urifrag(const char *);
96 
97 static	const char	 *scriptname = SCRIPT_NAME;
98 
99 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
100 static	const char *const sec_numbers[] = {
101     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
102 };
103 static	const char *const sec_names[] = {
104     "All Sections",
105     "1 - General Commands",
106     "2 - System Calls",
107     "3 - Library Functions",
108     "3p - Perl Library",
109     "4 - Device Drivers",
110     "5 - File Formats",
111     "6 - Games",
112     "7 - Miscellaneous Information",
113     "8 - System Manager\'s Manual",
114     "9 - Kernel Developer\'s Manual"
115 };
116 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
117 
118 static	const char *const arch_names[] = {
119     "amd64",       "alpha",       "armv7",	"arm64",
120     "hppa",        "i386",        "landisk",
121     "loongson",    "luna88k",     "macppc",      "mips64",
122     "octeon",      "sgi",         "socppc",      "sparc64",
123     "amiga",       "arc",         "armish",      "arm32",
124     "atari",       "aviion",      "beagle",      "cats",
125     "hppa64",      "hp300",
126     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
127     "mvmeppc",     "palm",        "pc532",       "pegasos",
128     "pmax",        "powerpc",     "solbourne",   "sparc",
129     "sun3",        "vax",         "wgrisc",      "x68k",
130     "zaurus"
131 };
132 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
133 
134 /*
135  * Print a character, escaping HTML along the way.
136  * This will pass non-ASCII straight to output: be warned!
137  */
138 static void
139 html_putchar(char c)
140 {
141 
142 	switch (c) {
143 	case '"':
144 		printf("&quot;");
145 		break;
146 	case '&':
147 		printf("&amp;");
148 		break;
149 	case '>':
150 		printf("&gt;");
151 		break;
152 	case '<':
153 		printf("&lt;");
154 		break;
155 	default:
156 		putchar((unsigned char)c);
157 		break;
158 	}
159 }
160 
161 /*
162  * Call through to html_putchar().
163  * Accepts NULL strings.
164  */
165 static void
166 html_print(const char *p)
167 {
168 
169 	if (NULL == p)
170 		return;
171 	while ('\0' != *p)
172 		html_putchar(*p++);
173 }
174 
175 /*
176  * Transfer the responsibility for the allocated string *val
177  * to the query structure.
178  */
179 static void
180 set_query_attr(char **attr, char **val)
181 {
182 
183 	free(*attr);
184 	if (**val == '\0') {
185 		*attr = NULL;
186 		free(*val);
187 	} else
188 		*attr = *val;
189 	*val = NULL;
190 }
191 
192 /*
193  * Parse the QUERY_STRING for key-value pairs
194  * and store the values into the query structure.
195  */
196 static void
197 parse_query_string(struct req *req, const char *qs)
198 {
199 	char		*key, *val;
200 	size_t		 keysz, valsz;
201 
202 	req->isquery	= 1;
203 	req->q.manpath	= NULL;
204 	req->q.arch	= NULL;
205 	req->q.sec	= NULL;
206 	req->q.query	= NULL;
207 	req->q.equal	= 1;
208 
209 	key = val = NULL;
210 	while (*qs != '\0') {
211 
212 		/* Parse one key. */
213 
214 		keysz = strcspn(qs, "=;&");
215 		key = mandoc_strndup(qs, keysz);
216 		qs += keysz;
217 		if (*qs != '=')
218 			goto next;
219 
220 		/* Parse one value. */
221 
222 		valsz = strcspn(++qs, ";&");
223 		val = mandoc_strndup(qs, valsz);
224 		qs += valsz;
225 
226 		/* Decode and catch encoding errors. */
227 
228 		if ( ! (http_decode(key) && http_decode(val)))
229 			goto next;
230 
231 		/* Handle key-value pairs. */
232 
233 		if ( ! strcmp(key, "query"))
234 			set_query_attr(&req->q.query, &val);
235 
236 		else if ( ! strcmp(key, "apropos"))
237 			req->q.equal = !strcmp(val, "0");
238 
239 		else if ( ! strcmp(key, "manpath")) {
240 #ifdef COMPAT_OLDURI
241 			if ( ! strncmp(val, "OpenBSD ", 8)) {
242 				val[7] = '-';
243 				if ('C' == val[8])
244 					val[8] = 'c';
245 			}
246 #endif
247 			set_query_attr(&req->q.manpath, &val);
248 		}
249 
250 		else if ( ! (strcmp(key, "sec")
251 #ifdef COMPAT_OLDURI
252 		    && strcmp(key, "sektion")
253 #endif
254 		    )) {
255 			if ( ! strcmp(val, "0"))
256 				*val = '\0';
257 			set_query_attr(&req->q.sec, &val);
258 		}
259 
260 		else if ( ! strcmp(key, "arch")) {
261 			if ( ! strcmp(val, "default"))
262 				*val = '\0';
263 			set_query_attr(&req->q.arch, &val);
264 		}
265 
266 		/*
267 		 * The key must be freed in any case.
268 		 * The val may have been handed over to the query
269 		 * structure, in which case it is now NULL.
270 		 */
271 next:
272 		free(key);
273 		key = NULL;
274 		free(val);
275 		val = NULL;
276 
277 		if (*qs != '\0')
278 			qs++;
279 	}
280 }
281 
282 /*
283  * HTTP-decode a string.  The standard explanation is that this turns
284  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
285  * over the allocated string.
286  */
287 static int
288 http_decode(char *p)
289 {
290 	char             hex[3];
291 	char		*q;
292 	int              c;
293 
294 	hex[2] = '\0';
295 
296 	q = p;
297 	for ( ; '\0' != *p; p++, q++) {
298 		if ('%' == *p) {
299 			if ('\0' == (hex[0] = *(p + 1)))
300 				return 0;
301 			if ('\0' == (hex[1] = *(p + 2)))
302 				return 0;
303 			if (1 != sscanf(hex, "%x", &c))
304 				return 0;
305 			if ('\0' == c)
306 				return 0;
307 
308 			*q = (char)c;
309 			p += 2;
310 		} else
311 			*q = '+' == *p ? ' ' : *p;
312 	}
313 
314 	*q = '\0';
315 	return 1;
316 }
317 
318 static void
319 http_encode(const char *p)
320 {
321 	for (; *p != '\0'; p++) {
322 		if (isalnum((unsigned char)*p) == 0 &&
323 		    strchr("-._~", *p) == NULL)
324 			printf("%%%2.2X", (unsigned char)*p);
325 		else
326 			putchar(*p);
327 	}
328 }
329 
330 static void
331 resp_begin_http(int code, const char *msg)
332 {
333 
334 	if (200 != code)
335 		printf("Status: %d %s\r\n", code, msg);
336 
337 	printf("Content-Type: text/html; charset=utf-8\r\n"
338 	     "Cache-Control: no-cache\r\n"
339 	     "Content-Security-Policy: default-src 'none'; "
340 	     "style-src 'self' 'unsafe-inline'\r\n"
341 	     "Pragma: no-cache\r\n"
342 	     "\r\n");
343 
344 	fflush(stdout);
345 }
346 
347 static void
348 resp_copy(const char *filename)
349 {
350 	char	 buf[4096];
351 	ssize_t	 sz;
352 	int	 fd;
353 
354 	if ((fd = open(filename, O_RDONLY)) != -1) {
355 		fflush(stdout);
356 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
357 			write(STDOUT_FILENO, buf, sz);
358 		close(fd);
359 	}
360 }
361 
362 static void
363 resp_begin_html(int code, const char *msg, const char *file)
364 {
365 	char	*cp;
366 
367 	resp_begin_http(code, msg);
368 
369 	printf("<!DOCTYPE html>\n"
370 	       "<html>\n"
371 	       "<head>\n"
372 	       "  <meta charset=\"UTF-8\"/>\n"
373 	       "  <meta name=\"viewport\""
374 		      " content=\"width=device-width, initial-scale=1.0\">\n"
375 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
376 	       " type=\"text/css\" media=\"all\">\n"
377 	       "  <title>",
378 	       CSS_DIR);
379 	if (file != NULL) {
380 		if ((cp = strrchr(file, '/')) != NULL)
381 			file = cp + 1;
382 		if ((cp = strrchr(file, '.')) != NULL) {
383 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
384 		} else
385 			printf("%s - ", file);
386 	}
387 	printf("%s</title>\n"
388 	       "</head>\n"
389 	       "<body>\n",
390 	       CUSTOMIZE_TITLE);
391 
392 	resp_copy(MAN_DIR "/header.html");
393 }
394 
395 static void
396 resp_end_html(void)
397 {
398 
399 	resp_copy(MAN_DIR "/footer.html");
400 
401 	puts("</body>\n"
402 	     "</html>");
403 }
404 
405 static void
406 resp_searchform(const struct req *req, enum focus focus)
407 {
408 	int		 i;
409 
410 	printf("<form action=\"/%s\" method=\"get\">\n"
411 	       "  <fieldset>\n"
412 	       "    <legend>Manual Page Search Parameters</legend>\n",
413 	       scriptname);
414 
415 	/* Write query input box. */
416 
417 	printf("    <input type=\"search\" name=\"query\" value=\"");
418 	if (req->q.query != NULL)
419 		html_print(req->q.query);
420 	printf( "\" size=\"40\"");
421 	if (focus == FOCUS_QUERY)
422 		printf(" autofocus");
423 	puts(">");
424 
425 	/* Write submission buttons. */
426 
427 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
428 		"man</button>\n"
429 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
430 		"apropos</button>\n"
431 		"    <br/>\n");
432 
433 	/* Write section selector. */
434 
435 	puts("    <select name=\"sec\">");
436 	for (i = 0; i < sec_MAX; i++) {
437 		printf("      <option value=\"%s\"", sec_numbers[i]);
438 		if (NULL != req->q.sec &&
439 		    0 == strcmp(sec_numbers[i], req->q.sec))
440 			printf(" selected=\"selected\"");
441 		printf(">%s</option>\n", sec_names[i]);
442 	}
443 	puts("    </select>");
444 
445 	/* Write architecture selector. */
446 
447 	printf(	"    <select name=\"arch\">\n"
448 		"      <option value=\"default\"");
449 	if (NULL == req->q.arch)
450 		printf(" selected=\"selected\"");
451 	puts(">All Architectures</option>");
452 	for (i = 0; i < arch_MAX; i++) {
453 		printf("      <option");
454 		if (NULL != req->q.arch &&
455 		    0 == strcmp(arch_names[i], req->q.arch))
456 			printf(" selected=\"selected\"");
457 		printf(">%s</option>\n", arch_names[i]);
458 	}
459 	puts("    </select>");
460 
461 	/* Write manpath selector. */
462 
463 	if (req->psz > 1) {
464 		puts("    <select name=\"manpath\">");
465 		for (i = 0; i < (int)req->psz; i++) {
466 			printf("      <option");
467 			if (strcmp(req->q.manpath, req->p[i]) == 0)
468 				printf(" selected=\"selected\"");
469 			printf(">");
470 			html_print(req->p[i]);
471 			puts("</option>");
472 		}
473 		puts("    </select>");
474 	}
475 
476 	puts("  </fieldset>\n"
477 	     "</form>");
478 }
479 
480 static int
481 validate_urifrag(const char *frag)
482 {
483 
484 	while ('\0' != *frag) {
485 		if ( ! (isalnum((unsigned char)*frag) ||
486 		    '-' == *frag || '.' == *frag ||
487 		    '/' == *frag || '_' == *frag))
488 			return 0;
489 		frag++;
490 	}
491 	return 1;
492 }
493 
494 static int
495 validate_manpath(const struct req *req, const char* manpath)
496 {
497 	size_t	 i;
498 
499 	for (i = 0; i < req->psz; i++)
500 		if ( ! strcmp(manpath, req->p[i]))
501 			return 1;
502 
503 	return 0;
504 }
505 
506 static int
507 validate_arch(const char *arch)
508 {
509 	int	 i;
510 
511 	for (i = 0; i < arch_MAX; i++)
512 		if (strcmp(arch, arch_names[i]) == 0)
513 			return 1;
514 
515 	return 0;
516 }
517 
518 static int
519 validate_filename(const char *file)
520 {
521 
522 	if ('.' == file[0] && '/' == file[1])
523 		file += 2;
524 
525 	return ! (strstr(file, "../") || strstr(file, "/..") ||
526 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
527 }
528 
529 static void
530 pg_index(const struct req *req)
531 {
532 
533 	resp_begin_html(200, NULL, NULL);
534 	resp_searchform(req, FOCUS_QUERY);
535 	printf("<p>\n"
536 	       "This web interface is documented in the\n"
537 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
538 	       "manual, and the\n"
539 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
540 	       "manual explains the query syntax.\n"
541 	       "</p>\n",
542 	       scriptname, *scriptname == '\0' ? "" : "/",
543 	       scriptname, *scriptname == '\0' ? "" : "/");
544 	resp_end_html();
545 }
546 
547 static void
548 pg_noresult(const struct req *req, int code, const char *http_msg,
549     const char *user_msg)
550 {
551 	resp_begin_html(code, http_msg, NULL);
552 	resp_searchform(req, FOCUS_QUERY);
553 	puts("<p>");
554 	puts(user_msg);
555 	puts("</p>");
556 	resp_end_html();
557 }
558 
559 static void
560 pg_error_badrequest(const char *msg)
561 {
562 
563 	resp_begin_html(400, "Bad Request", NULL);
564 	puts("<h1>Bad Request</h1>\n"
565 	     "<p>\n");
566 	puts(msg);
567 	printf("Try again from the\n"
568 	       "<a href=\"/%s\">main page</a>.\n"
569 	       "</p>", scriptname);
570 	resp_end_html();
571 }
572 
573 static void
574 pg_error_internal(void)
575 {
576 	resp_begin_html(500, "Internal Server Error", NULL);
577 	puts("<p>Internal Server Error</p>");
578 	resp_end_html();
579 }
580 
581 static void
582 pg_redirect(const struct req *req, const char *name)
583 {
584 	printf("Status: 303 See Other\r\n"
585 	    "Location: /");
586 	if (*scriptname != '\0')
587 		printf("%s/", scriptname);
588 	if (strcmp(req->q.manpath, req->p[0]))
589 		printf("%s/", req->q.manpath);
590 	if (req->q.arch != NULL)
591 		printf("%s/", req->q.arch);
592 	http_encode(name);
593 	if (req->q.sec != NULL) {
594 		putchar('.');
595 		http_encode(req->q.sec);
596 	}
597 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
598 }
599 
600 static void
601 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
602 {
603 	char		*arch, *archend;
604 	const char	*sec;
605 	size_t		 i, iuse;
606 	int		 archprio, archpriouse;
607 	int		 prio, priouse;
608 
609 	for (i = 0; i < sz; i++) {
610 		if (validate_filename(r[i].file))
611 			continue;
612 		warnx("invalid filename %s in %s database",
613 		    r[i].file, req->q.manpath);
614 		pg_error_internal();
615 		return;
616 	}
617 
618 	if (req->isquery && sz == 1) {
619 		/*
620 		 * If we have just one result, then jump there now
621 		 * without any delay.
622 		 */
623 		printf("Status: 303 See Other\r\n"
624 		    "Location: /");
625 		if (*scriptname != '\0')
626 			printf("%s/", scriptname);
627 		if (strcmp(req->q.manpath, req->p[0]))
628 			printf("%s/", req->q.manpath);
629 		printf("%s\r\n"
630 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
631 		    r[0].file);
632 		return;
633 	}
634 
635 	/*
636 	 * In man(1) mode, show one of the pages
637 	 * even if more than one is found.
638 	 */
639 
640 	iuse = 0;
641 	if (req->q.equal || sz == 1) {
642 		priouse = 20;
643 		archpriouse = 3;
644 		for (i = 0; i < sz; i++) {
645 			sec = r[i].file;
646 			sec += strcspn(sec, "123456789");
647 			if (sec[0] == '\0')
648 				continue;
649 			prio = sec_prios[sec[0] - '1'];
650 			if (sec[1] != '/')
651 				prio += 10;
652 			if (req->q.arch == NULL) {
653 				archprio =
654 				    ((arch = strchr(sec + 1, '/'))
655 					== NULL) ? 3 :
656 				    ((archend = strchr(arch + 1, '/'))
657 					== NULL) ? 0 :
658 				    strncmp(arch, "amd64/",
659 					archend - arch) ? 2 : 1;
660 				if (archprio < archpriouse) {
661 					archpriouse = archprio;
662 					priouse = prio;
663 					iuse = i;
664 					continue;
665 				}
666 				if (archprio > archpriouse)
667 					continue;
668 			}
669 			if (prio >= priouse)
670 				continue;
671 			priouse = prio;
672 			iuse = i;
673 		}
674 		resp_begin_html(200, NULL, r[iuse].file);
675 	} else
676 		resp_begin_html(200, NULL, NULL);
677 
678 	resp_searchform(req,
679 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
680 
681 	if (sz > 1) {
682 		puts("<table class=\"results\">");
683 		for (i = 0; i < sz; i++) {
684 			printf("  <tr>\n"
685 			       "    <td>"
686 			       "<a class=\"Xr\" href=\"/");
687 			if (*scriptname != '\0')
688 				printf("%s/", scriptname);
689 			if (strcmp(req->q.manpath, req->p[0]))
690 				printf("%s/", req->q.manpath);
691 			printf("%s\">", r[i].file);
692 			html_print(r[i].names);
693 			printf("</a></td>\n"
694 			       "    <td><span class=\"Nd\">");
695 			html_print(r[i].output);
696 			puts("</span></td>\n"
697 			     "  </tr>");
698 		}
699 		puts("</table>");
700 	}
701 
702 	if (req->q.equal || sz == 1) {
703 		puts("<hr>");
704 		resp_show(req, r[iuse].file);
705 	}
706 
707 	resp_end_html();
708 }
709 
710 static void
711 resp_catman(const struct req *req, const char *file)
712 {
713 	FILE		*f;
714 	char		*p;
715 	size_t		 sz;
716 	ssize_t		 len;
717 	int		 i;
718 	int		 italic, bold;
719 
720 	if ((f = fopen(file, "r")) == NULL) {
721 		puts("<p>You specified an invalid manual file.</p>");
722 		return;
723 	}
724 
725 	puts("<div class=\"catman\">\n"
726 	     "<pre>");
727 
728 	p = NULL;
729 	sz = 0;
730 
731 	while ((len = getline(&p, &sz, f)) != -1) {
732 		bold = italic = 0;
733 		for (i = 0; i < len - 1; i++) {
734 			/*
735 			 * This means that the catpage is out of state.
736 			 * Ignore it and keep going (although the
737 			 * catpage is bogus).
738 			 */
739 
740 			if ('\b' == p[i] || '\n' == p[i])
741 				continue;
742 
743 			/*
744 			 * Print a regular character.
745 			 * Close out any bold/italic scopes.
746 			 * If we're in back-space mode, make sure we'll
747 			 * have something to enter when we backspace.
748 			 */
749 
750 			if ('\b' != p[i + 1]) {
751 				if (italic)
752 					printf("</i>");
753 				if (bold)
754 					printf("</b>");
755 				italic = bold = 0;
756 				html_putchar(p[i]);
757 				continue;
758 			} else if (i + 2 >= len)
759 				continue;
760 
761 			/* Italic mode. */
762 
763 			if ('_' == p[i]) {
764 				if (bold)
765 					printf("</b>");
766 				if ( ! italic)
767 					printf("<i>");
768 				bold = 0;
769 				italic = 1;
770 				i += 2;
771 				html_putchar(p[i]);
772 				continue;
773 			}
774 
775 			/*
776 			 * Handle funny behaviour troff-isms.
777 			 * These grok'd from the original man2html.c.
778 			 */
779 
780 			if (('+' == p[i] && 'o' == p[i + 2]) ||
781 					('o' == p[i] && '+' == p[i + 2]) ||
782 					('|' == p[i] && '=' == p[i + 2]) ||
783 					('=' == p[i] && '|' == p[i + 2]) ||
784 					('*' == p[i] && '=' == p[i + 2]) ||
785 					('=' == p[i] && '*' == p[i + 2]) ||
786 					('*' == p[i] && '|' == p[i + 2]) ||
787 					('|' == p[i] && '*' == p[i + 2]))  {
788 				if (italic)
789 					printf("</i>");
790 				if (bold)
791 					printf("</b>");
792 				italic = bold = 0;
793 				putchar('*');
794 				i += 2;
795 				continue;
796 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
797 					('-' == p[i] && '|' == p[i + 1]) ||
798 					('+' == p[i] && '-' == p[i + 1]) ||
799 					('-' == p[i] && '+' == p[i + 1]) ||
800 					('+' == p[i] && '|' == p[i + 1]) ||
801 					('|' == p[i] && '+' == p[i + 1]))  {
802 				if (italic)
803 					printf("</i>");
804 				if (bold)
805 					printf("</b>");
806 				italic = bold = 0;
807 				putchar('+');
808 				i += 2;
809 				continue;
810 			}
811 
812 			/* Bold mode. */
813 
814 			if (italic)
815 				printf("</i>");
816 			if ( ! bold)
817 				printf("<b>");
818 			bold = 1;
819 			italic = 0;
820 			i += 2;
821 			html_putchar(p[i]);
822 		}
823 
824 		/*
825 		 * Clean up the last character.
826 		 * We can get to a newline; don't print that.
827 		 */
828 
829 		if (italic)
830 			printf("</i>");
831 		if (bold)
832 			printf("</b>");
833 
834 		if (i == len - 1 && p[i] != '\n')
835 			html_putchar(p[i]);
836 
837 		putchar('\n');
838 	}
839 	free(p);
840 
841 	puts("</pre>\n"
842 	     "</div>");
843 
844 	fclose(f);
845 }
846 
847 static void
848 resp_format(const struct req *req, const char *file)
849 {
850 	struct manoutput conf;
851 	struct mparse	*mp;
852 	struct roff_meta *meta;
853 	void		*vp;
854 	int		 fd;
855 	int		 usepath;
856 
857 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
858 		puts("<p>You specified an invalid manual file.</p>");
859 		return;
860 	}
861 
862 	mchars_alloc();
863 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
864 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
865 	mparse_readfd(mp, fd, file);
866 	close(fd);
867 	meta = mparse_result(mp);
868 
869 	memset(&conf, 0, sizeof(conf));
870 	conf.fragment = 1;
871 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
872 	usepath = strcmp(req->q.manpath, req->p[0]);
873 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
874 	    scriptname, *scriptname == '\0' ? "" : "/",
875 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
876 
877 	vp = html_alloc(&conf);
878 	if (meta->macroset == MACROSET_MDOC)
879 		html_mdoc(vp, meta);
880 	else
881 		html_man(vp, meta);
882 
883 	html_free(vp);
884 	mparse_free(mp);
885 	mchars_free();
886 	free(conf.man);
887 	free(conf.style);
888 }
889 
890 static void
891 resp_show(const struct req *req, const char *file)
892 {
893 
894 	if ('.' == file[0] && '/' == file[1])
895 		file += 2;
896 
897 	if ('c' == *file)
898 		resp_catman(req, file);
899 	else
900 		resp_format(req, file);
901 }
902 
903 static void
904 pg_show(struct req *req, const char *fullpath)
905 {
906 	char		*manpath;
907 	const char	*file;
908 
909 	if ((file = strchr(fullpath, '/')) == NULL) {
910 		pg_error_badrequest(
911 		    "You did not specify a page to show.");
912 		return;
913 	}
914 	manpath = mandoc_strndup(fullpath, file - fullpath);
915 	file++;
916 
917 	if ( ! validate_manpath(req, manpath)) {
918 		pg_error_badrequest(
919 		    "You specified an invalid manpath.");
920 		free(manpath);
921 		return;
922 	}
923 
924 	/*
925 	 * Begin by chdir()ing into the manpath.
926 	 * This way we can pick up the database files, which are
927 	 * relative to the manpath root.
928 	 */
929 
930 	if (chdir(manpath) == -1) {
931 		warn("chdir %s", manpath);
932 		pg_error_internal();
933 		free(manpath);
934 		return;
935 	}
936 	free(manpath);
937 
938 	if ( ! validate_filename(file)) {
939 		pg_error_badrequest(
940 		    "You specified an invalid manual file.");
941 		return;
942 	}
943 
944 	resp_begin_html(200, NULL, file);
945 	resp_searchform(req, FOCUS_NONE);
946 	resp_show(req, file);
947 	resp_end_html();
948 }
949 
950 static void
951 pg_search(const struct req *req)
952 {
953 	struct mansearch	  search;
954 	struct manpaths		  paths;
955 	struct manpage		 *res;
956 	char			**argv;
957 	char			 *query, *rp, *wp;
958 	size_t			  ressz;
959 	int			  argc;
960 
961 	/*
962 	 * Begin by chdir()ing into the root of the manpath.
963 	 * This way we can pick up the database files, which are
964 	 * relative to the manpath root.
965 	 */
966 
967 	if (chdir(req->q.manpath) == -1) {
968 		warn("chdir %s", req->q.manpath);
969 		pg_error_internal();
970 		return;
971 	}
972 
973 	search.arch = req->q.arch;
974 	search.sec = req->q.sec;
975 	search.outkey = "Nd";
976 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
977 	search.firstmatch = 1;
978 
979 	paths.sz = 1;
980 	paths.paths = mandoc_malloc(sizeof(char *));
981 	paths.paths[0] = mandoc_strdup(".");
982 
983 	/*
984 	 * Break apart at spaces with backslash-escaping.
985 	 */
986 
987 	argc = 0;
988 	argv = NULL;
989 	rp = query = mandoc_strdup(req->q.query);
990 	for (;;) {
991 		while (isspace((unsigned char)*rp))
992 			rp++;
993 		if (*rp == '\0')
994 			break;
995 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
996 		argv[argc++] = wp = rp;
997 		for (;;) {
998 			if (isspace((unsigned char)*rp)) {
999 				*wp = '\0';
1000 				rp++;
1001 				break;
1002 			}
1003 			if (rp[0] == '\\' && rp[1] != '\0')
1004 				rp++;
1005 			if (wp != rp)
1006 				*wp = *rp;
1007 			if (*rp == '\0')
1008 				break;
1009 			wp++;
1010 			rp++;
1011 		}
1012 	}
1013 
1014 	res = NULL;
1015 	ressz = 0;
1016 	if (req->isquery && req->q.equal && argc == 1)
1017 		pg_redirect(req, argv[0]);
1018 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1019 		pg_noresult(req, 400, "Bad Request",
1020 		    "You entered an invalid query.");
1021 	else if (ressz == 0)
1022 		pg_noresult(req, 404, "Not Found", "No results found.");
1023 	else
1024 		pg_searchres(req, res, ressz);
1025 
1026 	free(query);
1027 	mansearch_free(res, ressz);
1028 	free(paths.paths[0]);
1029 	free(paths.paths);
1030 }
1031 
1032 int
1033 main(void)
1034 {
1035 	struct req	 req;
1036 	struct itimerval itimer;
1037 	const char	*path;
1038 	const char	*querystring;
1039 	int		 i;
1040 
1041 	/*
1042 	 * The "rpath" pledge could be revoked after mparse_readfd()
1043 	 * if the file desciptor to "/footer.html" would be opened
1044 	 * up front, but it's probably not worth the complication
1045 	 * of the code it would cause: it would require scattering
1046 	 * pledge() calls in multiple low-level resp_*() functions.
1047 	 */
1048 
1049 	if (pledge("stdio rpath", NULL) == -1) {
1050 		warn("pledge");
1051 		pg_error_internal();
1052 		return EXIT_FAILURE;
1053 	}
1054 
1055 	/* Poor man's ReDoS mitigation. */
1056 
1057 	itimer.it_value.tv_sec = 2;
1058 	itimer.it_value.tv_usec = 0;
1059 	itimer.it_interval.tv_sec = 2;
1060 	itimer.it_interval.tv_usec = 0;
1061 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1062 		warn("setitimer");
1063 		pg_error_internal();
1064 		return EXIT_FAILURE;
1065 	}
1066 
1067 	/*
1068 	 * First we change directory into the MAN_DIR so that
1069 	 * subsequent scanning for manpath directories is rooted
1070 	 * relative to the same position.
1071 	 */
1072 
1073 	if (chdir(MAN_DIR) == -1) {
1074 		warn("MAN_DIR: %s", MAN_DIR);
1075 		pg_error_internal();
1076 		return EXIT_FAILURE;
1077 	}
1078 
1079 	memset(&req, 0, sizeof(struct req));
1080 	req.q.equal = 1;
1081 	parse_manpath_conf(&req);
1082 
1083 	/* Parse the path info and the query string. */
1084 
1085 	if ((path = getenv("PATH_INFO")) == NULL)
1086 		path = "";
1087 	else if (*path == '/')
1088 		path++;
1089 
1090 	if (*path != '\0') {
1091 		parse_path_info(&req, path);
1092 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1093 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1094 			path = "";
1095 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1096 		parse_query_string(&req, querystring);
1097 
1098 	/* Validate parsed data and add defaults. */
1099 
1100 	if (req.q.manpath == NULL)
1101 		req.q.manpath = mandoc_strdup(req.p[0]);
1102 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1103 		pg_error_badrequest(
1104 		    "You specified an invalid manpath.");
1105 		return EXIT_FAILURE;
1106 	}
1107 
1108 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1109 		pg_error_badrequest(
1110 		    "You specified an invalid architecture.");
1111 		return EXIT_FAILURE;
1112 	}
1113 
1114 	/* Dispatch to the three different pages. */
1115 
1116 	if ('\0' != *path)
1117 		pg_show(&req, path);
1118 	else if (NULL != req.q.query)
1119 		pg_search(&req);
1120 	else
1121 		pg_index(&req);
1122 
1123 	free(req.q.manpath);
1124 	free(req.q.arch);
1125 	free(req.q.sec);
1126 	free(req.q.query);
1127 	for (i = 0; i < (int)req.psz; i++)
1128 		free(req.p[i]);
1129 	free(req.p);
1130 	return EXIT_SUCCESS;
1131 }
1132 
1133 /*
1134  * Translate PATH_INFO to a query.
1135  */
1136 static void
1137 parse_path_info(struct req *req, const char *path)
1138 {
1139 	const char	*name, *sec, *end;
1140 
1141 	req->isquery = 0;
1142 	req->q.equal = 1;
1143 	req->q.manpath = NULL;
1144 	req->q.arch = NULL;
1145 
1146 	/* Mandatory manual page name. */
1147 	if ((name = strrchr(path, '/')) == NULL)
1148 		name = path;
1149 	else
1150 		name++;
1151 
1152 	/* Optional trailing section. */
1153 	sec = strrchr(name, '.');
1154 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1155 		req->q.query = mandoc_strndup(name, sec - name - 1);
1156 		req->q.sec = mandoc_strdup(sec);
1157 	} else {
1158 		req->q.query = mandoc_strdup(name);
1159 		req->q.sec = NULL;
1160 	}
1161 
1162 	/* Handle the case of name[.section] only. */
1163 	if (name == path)
1164 		return;
1165 
1166 	/* Optional manpath. */
1167 	end = strchr(path, '/');
1168 	req->q.manpath = mandoc_strndup(path, end - path);
1169 	if (validate_manpath(req, req->q.manpath)) {
1170 		path = end + 1;
1171 		if (name == path)
1172 			return;
1173 	} else {
1174 		free(req->q.manpath);
1175 		req->q.manpath = NULL;
1176 	}
1177 
1178 	/* Optional section. */
1179 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1180 		path += 3;
1181 		end = strchr(path, '/');
1182 		free(req->q.sec);
1183 		req->q.sec = mandoc_strndup(path, end - path);
1184 		path = end + 1;
1185 		if (name == path)
1186 			return;
1187 	}
1188 
1189 	/* Optional architecture. */
1190 	end = strchr(path, '/');
1191 	if (end + 1 != name) {
1192 		pg_error_badrequest(
1193 		    "You specified too many directory components.");
1194 		exit(EXIT_FAILURE);
1195 	}
1196 	req->q.arch = mandoc_strndup(path, end - path);
1197 	if (validate_arch(req->q.arch) == 0) {
1198 		pg_error_badrequest(
1199 		    "You specified an invalid directory component.");
1200 		exit(EXIT_FAILURE);
1201 	}
1202 }
1203 
1204 /*
1205  * Scan for indexable paths.
1206  */
1207 static void
1208 parse_manpath_conf(struct req *req)
1209 {
1210 	FILE	*fp;
1211 	char	*dp;
1212 	size_t	 dpsz;
1213 	ssize_t	 len;
1214 
1215 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1216 		warn("%s/manpath.conf", MAN_DIR);
1217 		pg_error_internal();
1218 		exit(EXIT_FAILURE);
1219 	}
1220 
1221 	dp = NULL;
1222 	dpsz = 0;
1223 
1224 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1225 		if (dp[len - 1] == '\n')
1226 			dp[--len] = '\0';
1227 		req->p = mandoc_realloc(req->p,
1228 		    (req->psz + 1) * sizeof(char *));
1229 		if ( ! validate_urifrag(dp)) {
1230 			warnx("%s/manpath.conf contains "
1231 			    "unsafe path \"%s\"", MAN_DIR, dp);
1232 			pg_error_internal();
1233 			exit(EXIT_FAILURE);
1234 		}
1235 		if (strchr(dp, '/') != NULL) {
1236 			warnx("%s/manpath.conf contains "
1237 			    "path with slash \"%s\"", MAN_DIR, dp);
1238 			pg_error_internal();
1239 			exit(EXIT_FAILURE);
1240 		}
1241 		req->p[req->psz++] = dp;
1242 		dp = NULL;
1243 		dpsz = 0;
1244 	}
1245 	free(dp);
1246 
1247 	if (req->p == NULL) {
1248 		warnx("%s/manpath.conf is empty", MAN_DIR);
1249 		pg_error_internal();
1250 		exit(EXIT_FAILURE);
1251 	}
1252 }
1253