xref: /openbsd/usr.bin/mandoc/cgi.c (revision 9b7c3dbb)
1 /*	$OpenBSD: cgi.c,v 1.77 2016/08/18 00:44:37 jsg Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 #include <sys/time.h>
20 
21 #include <ctype.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <limits.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <unistd.h>
31 
32 #include "mandoc_aux.h"
33 #include "mandoc.h"
34 #include "roff.h"
35 #include "mdoc.h"
36 #include "man.h"
37 #include "main.h"
38 #include "manconf.h"
39 #include "mansearch.h"
40 #include "cgi.h"
41 
42 /*
43  * A query as passed to the search function.
44  */
45 struct	query {
46 	char		*manpath; /* desired manual directory */
47 	char		*arch; /* architecture */
48 	char		*sec; /* manual section */
49 	char		*query; /* unparsed query expression */
50 	int		 equal; /* match whole names, not substrings */
51 };
52 
53 struct	req {
54 	struct query	  q;
55 	char		**p; /* array of available manpaths */
56 	size_t		  psz; /* number of available manpaths */
57 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
58 };
59 
60 enum	focus {
61 	FOCUS_NONE = 0,
62 	FOCUS_QUERY
63 };
64 
65 static	void		 html_print(const char *);
66 static	void		 html_putchar(char);
67 static	int		 http_decode(char *);
68 static	void		 parse_manpath_conf(struct req *);
69 static	void		 parse_path_info(struct req *req, const char *path);
70 static	void		 parse_query_string(struct req *, const char *);
71 static	void		 pg_error_badrequest(const char *);
72 static	void		 pg_error_internal(void);
73 static	void		 pg_index(const struct req *);
74 static	void		 pg_noresult(const struct req *, const char *);
75 static	void		 pg_search(const struct req *);
76 static	void		 pg_searchres(const struct req *,
77 				struct manpage *, size_t);
78 static	void		 pg_show(struct req *, const char *);
79 static	void		 resp_begin_html(int, const char *);
80 static	void		 resp_begin_http(int, const char *);
81 static	void		 resp_catman(const struct req *, const char *);
82 static	void		 resp_copy(const char *);
83 static	void		 resp_end_html(void);
84 static	void		 resp_format(const struct req *, const char *);
85 static	void		 resp_searchform(const struct req *, enum focus);
86 static	void		 resp_show(const struct req *, const char *);
87 static	void		 set_query_attr(char **, char **);
88 static	int		 validate_filename(const char *);
89 static	int		 validate_manpath(const struct req *, const char *);
90 static	int		 validate_urifrag(const char *);
91 
92 static	const char	 *scriptname = SCRIPT_NAME;
93 
94 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
95 static	const char *const sec_numbers[] = {
96     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
97 };
98 static	const char *const sec_names[] = {
99     "All Sections",
100     "1 - General Commands",
101     "2 - System Calls",
102     "3 - Library Functions",
103     "3p - Perl Library",
104     "4 - Device Drivers",
105     "5 - File Formats",
106     "6 - Games",
107     "7 - Miscellaneous Information",
108     "8 - System Manager\'s Manual",
109     "9 - Kernel Developer\'s Manual"
110 };
111 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
112 
113 static	const char *const arch_names[] = {
114     "amd64",       "alpha",       "armv7",
115     "hppa",        "i386",        "landisk",
116     "loongson",    "luna88k",     "macppc",      "mips64",
117     "octeon",      "sgi",         "socppc",      "sparc",
118     "sparc64",     "zaurus",
119     "amiga",       "arc",         "armish",      "arm32",
120     "atari",       "aviion",      "beagle",      "cats",
121     "hppa64",      "hp300",
122     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
123     "mvmeppc",     "palm",        "pc532",       "pegasos",
124     "pmax",        "powerpc",     "solbourne",   "sun3",
125     "vax",         "wgrisc",      "x68k"
126 };
127 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
128 
129 /*
130  * Print a character, escaping HTML along the way.
131  * This will pass non-ASCII straight to output: be warned!
132  */
133 static void
134 html_putchar(char c)
135 {
136 
137 	switch (c) {
138 	case ('"'):
139 		printf("&quote;");
140 		break;
141 	case ('&'):
142 		printf("&amp;");
143 		break;
144 	case ('>'):
145 		printf("&gt;");
146 		break;
147 	case ('<'):
148 		printf("&lt;");
149 		break;
150 	default:
151 		putchar((unsigned char)c);
152 		break;
153 	}
154 }
155 
156 /*
157  * Call through to html_putchar().
158  * Accepts NULL strings.
159  */
160 static void
161 html_print(const char *p)
162 {
163 
164 	if (NULL == p)
165 		return;
166 	while ('\0' != *p)
167 		html_putchar(*p++);
168 }
169 
170 /*
171  * Transfer the responsibility for the allocated string *val
172  * to the query structure.
173  */
174 static void
175 set_query_attr(char **attr, char **val)
176 {
177 
178 	free(*attr);
179 	if (**val == '\0') {
180 		*attr = NULL;
181 		free(*val);
182 	} else
183 		*attr = *val;
184 	*val = NULL;
185 }
186 
187 /*
188  * Parse the QUERY_STRING for key-value pairs
189  * and store the values into the query structure.
190  */
191 static void
192 parse_query_string(struct req *req, const char *qs)
193 {
194 	char		*key, *val;
195 	size_t		 keysz, valsz;
196 
197 	req->isquery	= 1;
198 	req->q.manpath	= NULL;
199 	req->q.arch	= NULL;
200 	req->q.sec	= NULL;
201 	req->q.query	= NULL;
202 	req->q.equal	= 1;
203 
204 	key = val = NULL;
205 	while (*qs != '\0') {
206 
207 		/* Parse one key. */
208 
209 		keysz = strcspn(qs, "=;&");
210 		key = mandoc_strndup(qs, keysz);
211 		qs += keysz;
212 		if (*qs != '=')
213 			goto next;
214 
215 		/* Parse one value. */
216 
217 		valsz = strcspn(++qs, ";&");
218 		val = mandoc_strndup(qs, valsz);
219 		qs += valsz;
220 
221 		/* Decode and catch encoding errors. */
222 
223 		if ( ! (http_decode(key) && http_decode(val)))
224 			goto next;
225 
226 		/* Handle key-value pairs. */
227 
228 		if ( ! strcmp(key, "query"))
229 			set_query_attr(&req->q.query, &val);
230 
231 		else if ( ! strcmp(key, "apropos"))
232 			req->q.equal = !strcmp(val, "0");
233 
234 		else if ( ! strcmp(key, "manpath")) {
235 #ifdef COMPAT_OLDURI
236 			if ( ! strncmp(val, "OpenBSD ", 8)) {
237 				val[7] = '-';
238 				if ('C' == val[8])
239 					val[8] = 'c';
240 			}
241 #endif
242 			set_query_attr(&req->q.manpath, &val);
243 		}
244 
245 		else if ( ! (strcmp(key, "sec")
246 #ifdef COMPAT_OLDURI
247 		    && strcmp(key, "sektion")
248 #endif
249 		    )) {
250 			if ( ! strcmp(val, "0"))
251 				*val = '\0';
252 			set_query_attr(&req->q.sec, &val);
253 		}
254 
255 		else if ( ! strcmp(key, "arch")) {
256 			if ( ! strcmp(val, "default"))
257 				*val = '\0';
258 			set_query_attr(&req->q.arch, &val);
259 		}
260 
261 		/*
262 		 * The key must be freed in any case.
263 		 * The val may have been handed over to the query
264 		 * structure, in which case it is now NULL.
265 		 */
266 next:
267 		free(key);
268 		key = NULL;
269 		free(val);
270 		val = NULL;
271 
272 		if (*qs != '\0')
273 			qs++;
274 	}
275 }
276 
277 /*
278  * HTTP-decode a string.  The standard explanation is that this turns
279  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
280  * over the allocated string.
281  */
282 static int
283 http_decode(char *p)
284 {
285 	char             hex[3];
286 	char		*q;
287 	int              c;
288 
289 	hex[2] = '\0';
290 
291 	q = p;
292 	for ( ; '\0' != *p; p++, q++) {
293 		if ('%' == *p) {
294 			if ('\0' == (hex[0] = *(p + 1)))
295 				return 0;
296 			if ('\0' == (hex[1] = *(p + 2)))
297 				return 0;
298 			if (1 != sscanf(hex, "%x", &c))
299 				return 0;
300 			if ('\0' == c)
301 				return 0;
302 
303 			*q = (char)c;
304 			p += 2;
305 		} else
306 			*q = '+' == *p ? ' ' : *p;
307 	}
308 
309 	*q = '\0';
310 	return 1;
311 }
312 
313 static void
314 resp_begin_http(int code, const char *msg)
315 {
316 
317 	if (200 != code)
318 		printf("Status: %d %s\r\n", code, msg);
319 
320 	printf("Content-Type: text/html; charset=utf-8\r\n"
321 	     "Cache-Control: no-cache\r\n"
322 	     "Pragma: no-cache\r\n"
323 	     "\r\n");
324 
325 	fflush(stdout);
326 }
327 
328 static void
329 resp_copy(const char *filename)
330 {
331 	char	 buf[4096];
332 	ssize_t	 sz;
333 	int	 fd;
334 
335 	if ((fd = open(filename, O_RDONLY)) != -1) {
336 		fflush(stdout);
337 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
338 			write(STDOUT_FILENO, buf, sz);
339 		close(fd);
340 	}
341 }
342 
343 static void
344 resp_begin_html(int code, const char *msg)
345 {
346 
347 	resp_begin_http(code, msg);
348 
349 	printf("<!DOCTYPE html>\n"
350 	       "<html>\n"
351 	       "<head>\n"
352 	       "<meta charset=\"UTF-8\"/>\n"
353 	       "<link rel=\"stylesheet\" href=\"%s/mandoc.css\""
354 	       " type=\"text/css\" media=\"all\">\n"
355 	       "<title>%s</title>\n"
356 	       "</head>\n"
357 	       "<body>\n"
358 	       "<!-- Begin page content. //-->\n",
359 	       CSS_DIR, CUSTOMIZE_TITLE);
360 
361 	resp_copy(MAN_DIR "/header.html");
362 }
363 
364 static void
365 resp_end_html(void)
366 {
367 
368 	resp_copy(MAN_DIR "/footer.html");
369 
370 	puts("</body>\n"
371 	     "</html>");
372 }
373 
374 static void
375 resp_searchform(const struct req *req, enum focus focus)
376 {
377 	int		 i;
378 
379 	puts("<!-- Begin search form. //-->");
380 	printf("<div id=\"mancgi\">\n"
381 	       "<form action=\"/%s\" method=\"get\">\n"
382 	       "<fieldset>\n"
383 	       "<legend>Manual Page Search Parameters</legend>\n",
384 	       scriptname);
385 
386 	/* Write query input box. */
387 
388 	printf("<input type=\"text\" name=\"query\" value=\"");
389 	if (req->q.query != NULL)
390 		html_print(req->q.query);
391 	printf( "\" size=\"40\"");
392 	if (focus == FOCUS_QUERY)
393 		printf(" autofocus");
394 	puts(">");
395 
396 	/* Write submission buttons. */
397 
398 	printf(	"<button type=\"submit\" name=\"apropos\" value=\"0\">"
399 		"man</button>\n"
400 		"<button type=\"submit\" name=\"apropos\" value=\"1\">"
401 		"apropos</button>\n<br/>\n");
402 
403 	/* Write section selector. */
404 
405 	puts("<select name=\"sec\">");
406 	for (i = 0; i < sec_MAX; i++) {
407 		printf("<option value=\"%s\"", sec_numbers[i]);
408 		if (NULL != req->q.sec &&
409 		    0 == strcmp(sec_numbers[i], req->q.sec))
410 			printf(" selected=\"selected\"");
411 		printf(">%s</option>\n", sec_names[i]);
412 	}
413 	puts("</select>");
414 
415 	/* Write architecture selector. */
416 
417 	printf(	"<select name=\"arch\">\n"
418 		"<option value=\"default\"");
419 	if (NULL == req->q.arch)
420 		printf(" selected=\"selected\"");
421 	puts(">All Architectures</option>");
422 	for (i = 0; i < arch_MAX; i++) {
423 		printf("<option value=\"%s\"", arch_names[i]);
424 		if (NULL != req->q.arch &&
425 		    0 == strcmp(arch_names[i], req->q.arch))
426 			printf(" selected=\"selected\"");
427 		printf(">%s</option>\n", arch_names[i]);
428 	}
429 	puts("</select>");
430 
431 	/* Write manpath selector. */
432 
433 	if (req->psz > 1) {
434 		puts("<select name=\"manpath\">");
435 		for (i = 0; i < (int)req->psz; i++) {
436 			printf("<option ");
437 			if (strcmp(req->q.manpath, req->p[i]) == 0)
438 				printf("selected=\"selected\" ");
439 			printf("value=\"");
440 			html_print(req->p[i]);
441 			printf("\">");
442 			html_print(req->p[i]);
443 			puts("</option>");
444 		}
445 		puts("</select>");
446 	}
447 
448 	puts("</fieldset>\n"
449 	     "</form>\n"
450 	     "</div>");
451 	puts("<!-- End search form. //-->");
452 }
453 
454 static int
455 validate_urifrag(const char *frag)
456 {
457 
458 	while ('\0' != *frag) {
459 		if ( ! (isalnum((unsigned char)*frag) ||
460 		    '-' == *frag || '.' == *frag ||
461 		    '/' == *frag || '_' == *frag))
462 			return 0;
463 		frag++;
464 	}
465 	return 1;
466 }
467 
468 static int
469 validate_manpath(const struct req *req, const char* manpath)
470 {
471 	size_t	 i;
472 
473 	for (i = 0; i < req->psz; i++)
474 		if ( ! strcmp(manpath, req->p[i]))
475 			return 1;
476 
477 	return 0;
478 }
479 
480 static int
481 validate_filename(const char *file)
482 {
483 
484 	if ('.' == file[0] && '/' == file[1])
485 		file += 2;
486 
487 	return ! (strstr(file, "../") || strstr(file, "/..") ||
488 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
489 }
490 
491 static void
492 pg_index(const struct req *req)
493 {
494 
495 	resp_begin_html(200, NULL);
496 	resp_searchform(req, FOCUS_QUERY);
497 	printf("<p>\n"
498 	       "This web interface is documented in the\n"
499 	       "<a href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
500 	       "manual, and the\n"
501 	       "<a href=\"/%s%sapropos.1\">apropos(1)</a>\n"
502 	       "manual explains the query syntax.\n"
503 	       "</p>\n",
504 	       scriptname, *scriptname == '\0' ? "" : "/",
505 	       scriptname, *scriptname == '\0' ? "" : "/");
506 	resp_end_html();
507 }
508 
509 static void
510 pg_noresult(const struct req *req, const char *msg)
511 {
512 	resp_begin_html(200, NULL);
513 	resp_searchform(req, FOCUS_QUERY);
514 	puts("<p>");
515 	puts(msg);
516 	puts("</p>");
517 	resp_end_html();
518 }
519 
520 static void
521 pg_error_badrequest(const char *msg)
522 {
523 
524 	resp_begin_html(400, "Bad Request");
525 	puts("<h1>Bad Request</h1>\n"
526 	     "<p>\n");
527 	puts(msg);
528 	printf("Try again from the\n"
529 	       "<a href=\"/%s\">main page</a>.\n"
530 	       "</p>", scriptname);
531 	resp_end_html();
532 }
533 
534 static void
535 pg_error_internal(void)
536 {
537 	resp_begin_html(500, "Internal Server Error");
538 	puts("<p>Internal Server Error</p>");
539 	resp_end_html();
540 }
541 
542 static void
543 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
544 {
545 	char		*arch, *archend;
546 	const char	*sec;
547 	size_t		 i, iuse;
548 	int		 archprio, archpriouse;
549 	int		 prio, priouse;
550 
551 	for (i = 0; i < sz; i++) {
552 		if (validate_filename(r[i].file))
553 			continue;
554 		warnx("invalid filename %s in %s database",
555 		    r[i].file, req->q.manpath);
556 		pg_error_internal();
557 		return;
558 	}
559 
560 	if (req->isquery && sz == 1) {
561 		/*
562 		 * If we have just one result, then jump there now
563 		 * without any delay.
564 		 */
565 		printf("Status: 303 See Other\r\n");
566 		printf("Location: http://%s/%s%s%s/%s",
567 		    HTTP_HOST, scriptname,
568 		    *scriptname == '\0' ? "" : "/",
569 		    req->q.manpath, r[0].file);
570 		printf("\r\n"
571 		     "Content-Type: text/html; charset=utf-8\r\n"
572 		     "\r\n");
573 		return;
574 	}
575 
576 	resp_begin_html(200, NULL);
577 	resp_searchform(req,
578 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
579 
580 	if (sz > 1) {
581 		puts("<div class=\"results\">");
582 		puts("<table>");
583 
584 		for (i = 0; i < sz; i++) {
585 			printf("<tr>\n"
586 			       "<td class=\"title\">\n"
587 			       "<a href=\"/%s%s%s/%s",
588 			    scriptname, *scriptname == '\0' ? "" : "/",
589 			    req->q.manpath, r[i].file);
590 			printf("\">");
591 			html_print(r[i].names);
592 			printf("</a>\n"
593 			       "</td>\n"
594 			       "<td class=\"desc\">");
595 			html_print(r[i].output);
596 			puts("</td>\n"
597 			     "</tr>");
598 		}
599 
600 		puts("</table>\n"
601 		     "</div>");
602 	}
603 
604 	/*
605 	 * In man(1) mode, show one of the pages
606 	 * even if more than one is found.
607 	 */
608 
609 	if (req->q.equal || sz == 1) {
610 		puts("<hr>");
611 		iuse = 0;
612 		priouse = 20;
613 		archpriouse = 3;
614 		for (i = 0; i < sz; i++) {
615 			sec = r[i].file;
616 			sec += strcspn(sec, "123456789");
617 			if (sec[0] == '\0')
618 				continue;
619 			prio = sec_prios[sec[0] - '1'];
620 			if (sec[1] != '/')
621 				prio += 10;
622 			if (req->q.arch == NULL) {
623 				archprio =
624 				    ((arch = strchr(sec + 1, '/'))
625 					== NULL) ? 3 :
626 				    ((archend = strchr(arch + 1, '/'))
627 					== NULL) ? 0 :
628 				    strncmp(arch, "amd64/",
629 					archend - arch) ? 2 : 1;
630 				if (archprio < archpriouse) {
631 					archpriouse = archprio;
632 					priouse = prio;
633 					iuse = i;
634 					continue;
635 				}
636 				if (archprio > archpriouse)
637 					continue;
638 			}
639 			if (prio >= priouse)
640 				continue;
641 			priouse = prio;
642 			iuse = i;
643 		}
644 		resp_show(req, r[iuse].file);
645 	}
646 
647 	resp_end_html();
648 }
649 
650 static void
651 resp_catman(const struct req *req, const char *file)
652 {
653 	FILE		*f;
654 	char		*p;
655 	size_t		 sz;
656 	ssize_t		 len;
657 	int		 i;
658 	int		 italic, bold;
659 
660 	if ((f = fopen(file, "r")) == NULL) {
661 		puts("<p>You specified an invalid manual file.</p>");
662 		return;
663 	}
664 
665 	puts("<div class=\"catman\">\n"
666 	     "<pre>");
667 
668 	p = NULL;
669 	sz = 0;
670 
671 	while ((len = getline(&p, &sz, f)) != -1) {
672 		bold = italic = 0;
673 		for (i = 0; i < len - 1; i++) {
674 			/*
675 			 * This means that the catpage is out of state.
676 			 * Ignore it and keep going (although the
677 			 * catpage is bogus).
678 			 */
679 
680 			if ('\b' == p[i] || '\n' == p[i])
681 				continue;
682 
683 			/*
684 			 * Print a regular character.
685 			 * Close out any bold/italic scopes.
686 			 * If we're in back-space mode, make sure we'll
687 			 * have something to enter when we backspace.
688 			 */
689 
690 			if ('\b' != p[i + 1]) {
691 				if (italic)
692 					printf("</i>");
693 				if (bold)
694 					printf("</b>");
695 				italic = bold = 0;
696 				html_putchar(p[i]);
697 				continue;
698 			} else if (i + 2 >= len)
699 				continue;
700 
701 			/* Italic mode. */
702 
703 			if ('_' == p[i]) {
704 				if (bold)
705 					printf("</b>");
706 				if ( ! italic)
707 					printf("<i>");
708 				bold = 0;
709 				italic = 1;
710 				i += 2;
711 				html_putchar(p[i]);
712 				continue;
713 			}
714 
715 			/*
716 			 * Handle funny behaviour troff-isms.
717 			 * These grok'd from the original man2html.c.
718 			 */
719 
720 			if (('+' == p[i] && 'o' == p[i + 2]) ||
721 					('o' == p[i] && '+' == p[i + 2]) ||
722 					('|' == p[i] && '=' == p[i + 2]) ||
723 					('=' == p[i] && '|' == p[i + 2]) ||
724 					('*' == p[i] && '=' == p[i + 2]) ||
725 					('=' == p[i] && '*' == p[i + 2]) ||
726 					('*' == p[i] && '|' == p[i + 2]) ||
727 					('|' == p[i] && '*' == p[i + 2]))  {
728 				if (italic)
729 					printf("</i>");
730 				if (bold)
731 					printf("</b>");
732 				italic = bold = 0;
733 				putchar('*');
734 				i += 2;
735 				continue;
736 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
737 					('-' == p[i] && '|' == p[i + 1]) ||
738 					('+' == p[i] && '-' == p[i + 1]) ||
739 					('-' == p[i] && '+' == p[i + 1]) ||
740 					('+' == p[i] && '|' == p[i + 1]) ||
741 					('|' == p[i] && '+' == p[i + 1]))  {
742 				if (italic)
743 					printf("</i>");
744 				if (bold)
745 					printf("</b>");
746 				italic = bold = 0;
747 				putchar('+');
748 				i += 2;
749 				continue;
750 			}
751 
752 			/* Bold mode. */
753 
754 			if (italic)
755 				printf("</i>");
756 			if ( ! bold)
757 				printf("<b>");
758 			bold = 1;
759 			italic = 0;
760 			i += 2;
761 			html_putchar(p[i]);
762 		}
763 
764 		/*
765 		 * Clean up the last character.
766 		 * We can get to a newline; don't print that.
767 		 */
768 
769 		if (italic)
770 			printf("</i>");
771 		if (bold)
772 			printf("</b>");
773 
774 		if (i == len - 1 && p[i] != '\n')
775 			html_putchar(p[i]);
776 
777 		putchar('\n');
778 	}
779 	free(p);
780 
781 	puts("</pre>\n"
782 	     "</div>");
783 
784 	fclose(f);
785 }
786 
787 static void
788 resp_format(const struct req *req, const char *file)
789 {
790 	struct manoutput conf;
791 	struct mparse	*mp;
792 	struct roff_man	*man;
793 	void		*vp;
794 	int		 fd;
795 	int		 usepath;
796 
797 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
798 		puts("<p>You specified an invalid manual file.</p>");
799 		return;
800 	}
801 
802 	mchars_alloc();
803 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
804 	    MANDOCLEVEL_BADARG, NULL, req->q.manpath);
805 	mparse_readfd(mp, fd, file);
806 	close(fd);
807 
808 	memset(&conf, 0, sizeof(conf));
809 	conf.fragment = 1;
810 	usepath = strcmp(req->q.manpath, req->p[0]);
811 	mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
812 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
813 
814 	mparse_result(mp, &man, NULL);
815 	if (man == NULL) {
816 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
817 		pg_error_internal();
818 		mparse_free(mp);
819 		mchars_free();
820 		return;
821 	}
822 
823 	vp = html_alloc(&conf);
824 
825 	if (man->macroset == MACROSET_MDOC) {
826 		mdoc_validate(man);
827 		html_mdoc(vp, man);
828 	} else {
829 		man_validate(man);
830 		html_man(vp, man);
831 	}
832 
833 	html_free(vp);
834 	mparse_free(mp);
835 	mchars_free();
836 	free(conf.man);
837 }
838 
839 static void
840 resp_show(const struct req *req, const char *file)
841 {
842 
843 	if ('.' == file[0] && '/' == file[1])
844 		file += 2;
845 
846 	if ('c' == *file)
847 		resp_catman(req, file);
848 	else
849 		resp_format(req, file);
850 }
851 
852 static void
853 pg_show(struct req *req, const char *fullpath)
854 {
855 	char		*manpath;
856 	const char	*file;
857 
858 	if ((file = strchr(fullpath, '/')) == NULL) {
859 		pg_error_badrequest(
860 		    "You did not specify a page to show.");
861 		return;
862 	}
863 	manpath = mandoc_strndup(fullpath, file - fullpath);
864 	file++;
865 
866 	if ( ! validate_manpath(req, manpath)) {
867 		pg_error_badrequest(
868 		    "You specified an invalid manpath.");
869 		free(manpath);
870 		return;
871 	}
872 
873 	/*
874 	 * Begin by chdir()ing into the manpath.
875 	 * This way we can pick up the database files, which are
876 	 * relative to the manpath root.
877 	 */
878 
879 	if (chdir(manpath) == -1) {
880 		warn("chdir %s", manpath);
881 		pg_error_internal();
882 		free(manpath);
883 		return;
884 	}
885 	free(manpath);
886 
887 	if ( ! validate_filename(file)) {
888 		pg_error_badrequest(
889 		    "You specified an invalid manual file.");
890 		return;
891 	}
892 
893 	resp_begin_html(200, NULL);
894 	resp_searchform(req, FOCUS_NONE);
895 	resp_show(req, file);
896 	resp_end_html();
897 }
898 
899 static void
900 pg_search(const struct req *req)
901 {
902 	struct mansearch	  search;
903 	struct manpaths		  paths;
904 	struct manpage		 *res;
905 	char			**argv;
906 	char			 *query, *rp, *wp;
907 	size_t			  ressz;
908 	int			  argc;
909 
910 	/*
911 	 * Begin by chdir()ing into the root of the manpath.
912 	 * This way we can pick up the database files, which are
913 	 * relative to the manpath root.
914 	 */
915 
916 	if (chdir(req->q.manpath) == -1) {
917 		warn("chdir %s", req->q.manpath);
918 		pg_error_internal();
919 		return;
920 	}
921 
922 	search.arch = req->q.arch;
923 	search.sec = req->q.sec;
924 	search.outkey = "Nd";
925 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
926 	search.firstmatch = 1;
927 
928 	paths.sz = 1;
929 	paths.paths = mandoc_malloc(sizeof(char *));
930 	paths.paths[0] = mandoc_strdup(".");
931 
932 	/*
933 	 * Break apart at spaces with backslash-escaping.
934 	 */
935 
936 	argc = 0;
937 	argv = NULL;
938 	rp = query = mandoc_strdup(req->q.query);
939 	for (;;) {
940 		while (isspace((unsigned char)*rp))
941 			rp++;
942 		if (*rp == '\0')
943 			break;
944 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
945 		argv[argc++] = wp = rp;
946 		for (;;) {
947 			if (isspace((unsigned char)*rp)) {
948 				*wp = '\0';
949 				rp++;
950 				break;
951 			}
952 			if (rp[0] == '\\' && rp[1] != '\0')
953 				rp++;
954 			if (wp != rp)
955 				*wp = *rp;
956 			if (*rp == '\0')
957 				break;
958 			wp++;
959 			rp++;
960 		}
961 	}
962 
963 	if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
964 		pg_noresult(req, "You entered an invalid query.");
965 	else if (0 == ressz)
966 		pg_noresult(req, "No results found.");
967 	else
968 		pg_searchres(req, res, ressz);
969 
970 	free(query);
971 	mansearch_free(res, ressz);
972 	free(paths.paths[0]);
973 	free(paths.paths);
974 }
975 
976 int
977 main(void)
978 {
979 	struct req	 req;
980 	struct itimerval itimer;
981 	const char	*path;
982 	const char	*querystring;
983 	int		 i;
984 
985 	/* Poor man's ReDoS mitigation. */
986 
987 	itimer.it_value.tv_sec = 2;
988 	itimer.it_value.tv_usec = 0;
989 	itimer.it_interval.tv_sec = 2;
990 	itimer.it_interval.tv_usec = 0;
991 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
992 		warn("setitimer");
993 		pg_error_internal();
994 		return EXIT_FAILURE;
995 	}
996 
997 	/*
998 	 * First we change directory into the MAN_DIR so that
999 	 * subsequent scanning for manpath directories is rooted
1000 	 * relative to the same position.
1001 	 */
1002 
1003 	if (chdir(MAN_DIR) == -1) {
1004 		warn("MAN_DIR: %s", MAN_DIR);
1005 		pg_error_internal();
1006 		return EXIT_FAILURE;
1007 	}
1008 
1009 	memset(&req, 0, sizeof(struct req));
1010 	req.q.equal = 1;
1011 	parse_manpath_conf(&req);
1012 
1013 	/* Parse the path info and the query string. */
1014 
1015 	if ((path = getenv("PATH_INFO")) == NULL)
1016 		path = "";
1017 	else if (*path == '/')
1018 		path++;
1019 
1020 	if (*path != '\0') {
1021 		parse_path_info(&req, path);
1022 		if (req.q.manpath == NULL || access(path, F_OK) == -1)
1023 			path = "";
1024 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1025 		parse_query_string(&req, querystring);
1026 
1027 	/* Validate parsed data and add defaults. */
1028 
1029 	if (req.q.manpath == NULL)
1030 		req.q.manpath = mandoc_strdup(req.p[0]);
1031 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1032 		pg_error_badrequest(
1033 		    "You specified an invalid manpath.");
1034 		return EXIT_FAILURE;
1035 	}
1036 
1037 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1038 		pg_error_badrequest(
1039 		    "You specified an invalid architecture.");
1040 		return EXIT_FAILURE;
1041 	}
1042 
1043 	/* Dispatch to the three different pages. */
1044 
1045 	if ('\0' != *path)
1046 		pg_show(&req, path);
1047 	else if (NULL != req.q.query)
1048 		pg_search(&req);
1049 	else
1050 		pg_index(&req);
1051 
1052 	free(req.q.manpath);
1053 	free(req.q.arch);
1054 	free(req.q.sec);
1055 	free(req.q.query);
1056 	for (i = 0; i < (int)req.psz; i++)
1057 		free(req.p[i]);
1058 	free(req.p);
1059 	return EXIT_SUCCESS;
1060 }
1061 
1062 /*
1063  * If PATH_INFO is not a file name, translate it to a query.
1064  */
1065 static void
1066 parse_path_info(struct req *req, const char *path)
1067 {
1068 	char	*dir[4];
1069 	int	 i;
1070 
1071 	req->isquery = 0;
1072 	req->q.equal = 1;
1073 	req->q.manpath = mandoc_strdup(path);
1074 	req->q.arch = NULL;
1075 
1076 	/* Mandatory manual page name. */
1077 	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1078 		req->q.query = req->q.manpath;
1079 		req->q.manpath = NULL;
1080 	} else
1081 		*req->q.query++ = '\0';
1082 
1083 	/* Optional trailing section. */
1084 	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1085 		if(isdigit((unsigned char)req->q.sec[1])) {
1086 			*req->q.sec++ = '\0';
1087 			req->q.sec = mandoc_strdup(req->q.sec);
1088 		} else
1089 			req->q.sec = NULL;
1090 	}
1091 
1092 	/* Handle the case of name[.section] only. */
1093 	if (req->q.manpath == NULL)
1094 		return;
1095 	req->q.query = mandoc_strdup(req->q.query);
1096 
1097 	/* Split directory components. */
1098 	dir[i = 0] = req->q.manpath;
1099 	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1100 		if (++i == 3) {
1101 			pg_error_badrequest(
1102 			    "You specified too many directory components.");
1103 			exit(EXIT_FAILURE);
1104 		}
1105 		*dir[i]++ = '\0';
1106 	}
1107 
1108 	/* Optional manpath. */
1109 	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1110 		req->q.manpath = NULL;
1111 	else if (dir[1] == NULL)
1112 		return;
1113 
1114 	/* Optional section. */
1115 	if (strncmp(dir[i], "man", 3) == 0) {
1116 		free(req->q.sec);
1117 		req->q.sec = mandoc_strdup(dir[i++] + 3);
1118 	}
1119 	if (dir[i] == NULL) {
1120 		if (req->q.manpath == NULL)
1121 			free(dir[0]);
1122 		return;
1123 	}
1124 	if (dir[i + 1] != NULL) {
1125 		pg_error_badrequest(
1126 		    "You specified an invalid directory component.");
1127 		exit(EXIT_FAILURE);
1128 	}
1129 
1130 	/* Optional architecture. */
1131 	if (i) {
1132 		req->q.arch = mandoc_strdup(dir[i]);
1133 		if (req->q.manpath == NULL)
1134 			free(dir[0]);
1135 	} else
1136 		req->q.arch = dir[0];
1137 }
1138 
1139 /*
1140  * Scan for indexable paths.
1141  */
1142 static void
1143 parse_manpath_conf(struct req *req)
1144 {
1145 	FILE	*fp;
1146 	char	*dp;
1147 	size_t	 dpsz;
1148 	ssize_t	 len;
1149 
1150 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1151 		warn("%s/manpath.conf", MAN_DIR);
1152 		pg_error_internal();
1153 		exit(EXIT_FAILURE);
1154 	}
1155 
1156 	dp = NULL;
1157 	dpsz = 0;
1158 
1159 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1160 		if (dp[len - 1] == '\n')
1161 			dp[--len] = '\0';
1162 		req->p = mandoc_realloc(req->p,
1163 		    (req->psz + 1) * sizeof(char *));
1164 		if ( ! validate_urifrag(dp)) {
1165 			warnx("%s/manpath.conf contains "
1166 			    "unsafe path \"%s\"", MAN_DIR, dp);
1167 			pg_error_internal();
1168 			exit(EXIT_FAILURE);
1169 		}
1170 		if (strchr(dp, '/') != NULL) {
1171 			warnx("%s/manpath.conf contains "
1172 			    "path with slash \"%s\"", MAN_DIR, dp);
1173 			pg_error_internal();
1174 			exit(EXIT_FAILURE);
1175 		}
1176 		req->p[req->psz++] = dp;
1177 		dp = NULL;
1178 		dpsz = 0;
1179 	}
1180 	free(dp);
1181 
1182 	if (req->p == NULL) {
1183 		warnx("%s/manpath.conf is empty", MAN_DIR);
1184 		pg_error_internal();
1185 		exit(EXIT_FAILURE);
1186 	}
1187 }
1188