xref: /freebsd/contrib/mandoc/cgi.c (revision 6d38604f)
1 /* $Id: cgi.c,v 1.175 2021/08/19 15:23:36 schwarze Exp $ */
2 /*
3  * Copyright (c) 2014-2019, 2021 Ingo Schwarze <schwarze@usta.de>
4  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the man.cgi(8) program.
19  */
20 #include "config.h"
21 
22 #include <sys/types.h>
23 #include <sys/time.h>
24 
25 #include <ctype.h>
26 #if HAVE_ERR
27 #include <err.h>
28 #endif
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <limits.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 
38 #include "mandoc_aux.h"
39 #include "mandoc.h"
40 #include "roff.h"
41 #include "mdoc.h"
42 #include "man.h"
43 #include "mandoc_parse.h"
44 #include "main.h"
45 #include "manconf.h"
46 #include "mansearch.h"
47 #include "cgi.h"
48 
49 /*
50  * A query as passed to the search function.
51  */
52 struct	query {
53 	char		*manpath; /* desired manual directory */
54 	char		*arch; /* architecture */
55 	char		*sec; /* manual section */
56 	char		*query; /* unparsed query expression */
57 	int		 equal; /* match whole names, not substrings */
58 };
59 
60 struct	req {
61 	struct query	  q;
62 	char		**p; /* array of available manpaths */
63 	size_t		  psz; /* number of available manpaths */
64 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
65 };
66 
67 enum	focus {
68 	FOCUS_NONE = 0,
69 	FOCUS_QUERY
70 };
71 
72 static	void		 html_print(const char *);
73 static	void		 html_putchar(char);
74 static	int		 http_decode(char *);
75 static	void		 http_encode(const char *);
76 static	void		 parse_manpath_conf(struct req *);
77 static	void		 parse_path_info(struct req *, const char *);
78 static	void		 parse_query_string(struct req *, const char *);
79 static	void		 pg_error_badrequest(const char *);
80 static	void		 pg_error_internal(void);
81 static	void		 pg_index(const struct req *);
82 static	void		 pg_noresult(const struct req *, int, const char *,
83 				const char *);
84 static	void		 pg_redirect(const struct req *, const char *);
85 static	void		 pg_search(const struct req *);
86 static	void		 pg_searchres(const struct req *,
87 				struct manpage *, size_t);
88 static	void		 pg_show(struct req *, const char *);
89 static	void		 resp_begin_html(int, const char *, const char *);
90 static	void		 resp_begin_http(int, const char *);
91 static	void		 resp_catman(const struct req *, const char *);
92 static	void		 resp_copy(const char *);
93 static	void		 resp_end_html(void);
94 static	void		 resp_format(const struct req *, const char *);
95 static	void		 resp_searchform(const struct req *, enum focus);
96 static	void		 resp_show(const struct req *, const char *);
97 static	void		 set_query_attr(char **, char **);
98 static	int		 validate_arch(const char *);
99 static	int		 validate_filename(const char *);
100 static	int		 validate_manpath(const struct req *, const char *);
101 static	int		 validate_urifrag(const char *);
102 
103 static	const char	 *scriptname = SCRIPT_NAME;
104 
105 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
106 static	const char *const sec_numbers[] = {
107     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
108 };
109 static	const char *const sec_names[] = {
110     "All Sections",
111     "1 - General Commands",
112     "2 - System Calls",
113     "3 - Library Functions",
114     "3p - Perl Library",
115     "4 - Device Drivers",
116     "5 - File Formats",
117     "6 - Games",
118     "7 - Miscellaneous Information",
119     "8 - System Manager\'s Manual",
120     "9 - Kernel Developer\'s Manual"
121 };
122 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
123 
124 static	const char *const arch_names[] = {
125     "amd64",       "alpha",       "armv7",       "arm64",
126     "hppa",        "i386",        "landisk",     "loongson",
127     "luna88k",     "macppc",      "mips64",      "octeon",
128     "powerpc64",   "riscv64",     "sparc64",
129 
130     "amiga",       "arc",         "armish",      "arm32",
131     "atari",       "aviion",      "beagle",      "cats",
132     "hppa64",      "hp300",
133     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
134     "mvmeppc",     "palm",        "pc532",       "pegasos",
135     "pmax",        "powerpc",     "sgi",         "socppc",
136     "solbourne",   "sparc",
137     "sun3",        "vax",         "wgrisc",      "x68k",
138     "zaurus"
139 };
140 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
141 
142 /*
143  * Print a character, escaping HTML along the way.
144  * This will pass non-ASCII straight to output: be warned!
145  */
146 static void
html_putchar(char c)147 html_putchar(char c)
148 {
149 
150 	switch (c) {
151 	case '"':
152 		printf("&quot;");
153 		break;
154 	case '&':
155 		printf("&amp;");
156 		break;
157 	case '>':
158 		printf("&gt;");
159 		break;
160 	case '<':
161 		printf("&lt;");
162 		break;
163 	default:
164 		putchar((unsigned char)c);
165 		break;
166 	}
167 }
168 
169 /*
170  * Call through to html_putchar().
171  * Accepts NULL strings.
172  */
173 static void
html_print(const char * p)174 html_print(const char *p)
175 {
176 
177 	if (NULL == p)
178 		return;
179 	while ('\0' != *p)
180 		html_putchar(*p++);
181 }
182 
183 /*
184  * Transfer the responsibility for the allocated string *val
185  * to the query structure.
186  */
187 static void
set_query_attr(char ** attr,char ** val)188 set_query_attr(char **attr, char **val)
189 {
190 
191 	free(*attr);
192 	if (**val == '\0') {
193 		*attr = NULL;
194 		free(*val);
195 	} else
196 		*attr = *val;
197 	*val = NULL;
198 }
199 
200 /*
201  * Parse the QUERY_STRING for key-value pairs
202  * and store the values into the query structure.
203  */
204 static void
parse_query_string(struct req * req,const char * qs)205 parse_query_string(struct req *req, const char *qs)
206 {
207 	char		*key, *val;
208 	size_t		 keysz, valsz;
209 
210 	req->isquery	= 1;
211 	req->q.manpath	= NULL;
212 	req->q.arch	= NULL;
213 	req->q.sec	= NULL;
214 	req->q.query	= NULL;
215 	req->q.equal	= 1;
216 
217 	key = val = NULL;
218 	while (*qs != '\0') {
219 
220 		/* Parse one key. */
221 
222 		keysz = strcspn(qs, "=;&");
223 		key = mandoc_strndup(qs, keysz);
224 		qs += keysz;
225 		if (*qs != '=')
226 			goto next;
227 
228 		/* Parse one value. */
229 
230 		valsz = strcspn(++qs, ";&");
231 		val = mandoc_strndup(qs, valsz);
232 		qs += valsz;
233 
234 		/* Decode and catch encoding errors. */
235 
236 		if ( ! (http_decode(key) && http_decode(val)))
237 			goto next;
238 
239 		/* Handle key-value pairs. */
240 
241 		if ( ! strcmp(key, "query"))
242 			set_query_attr(&req->q.query, &val);
243 
244 		else if ( ! strcmp(key, "apropos"))
245 			req->q.equal = !strcmp(val, "0");
246 
247 		else if ( ! strcmp(key, "manpath")) {
248 #ifdef COMPAT_OLDURI
249 			if ( ! strncmp(val, "OpenBSD ", 8)) {
250 				val[7] = '-';
251 				if ('C' == val[8])
252 					val[8] = 'c';
253 			}
254 #endif
255 			set_query_attr(&req->q.manpath, &val);
256 		}
257 
258 		else if ( ! (strcmp(key, "sec")
259 #ifdef COMPAT_OLDURI
260 		    && strcmp(key, "sektion")
261 #endif
262 		    )) {
263 			if ( ! strcmp(val, "0"))
264 				*val = '\0';
265 			set_query_attr(&req->q.sec, &val);
266 		}
267 
268 		else if ( ! strcmp(key, "arch")) {
269 			if ( ! strcmp(val, "default"))
270 				*val = '\0';
271 			set_query_attr(&req->q.arch, &val);
272 		}
273 
274 		/*
275 		 * The key must be freed in any case.
276 		 * The val may have been handed over to the query
277 		 * structure, in which case it is now NULL.
278 		 */
279 next:
280 		free(key);
281 		key = NULL;
282 		free(val);
283 		val = NULL;
284 
285 		if (*qs != '\0')
286 			qs++;
287 	}
288 }
289 
290 /*
291  * HTTP-decode a string.  The standard explanation is that this turns
292  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
293  * over the allocated string.
294  */
295 static int
http_decode(char * p)296 http_decode(char *p)
297 {
298 	char             hex[3];
299 	char		*q;
300 	int              c;
301 
302 	hex[2] = '\0';
303 
304 	q = p;
305 	for ( ; '\0' != *p; p++, q++) {
306 		if ('%' == *p) {
307 			if ('\0' == (hex[0] = *(p + 1)))
308 				return 0;
309 			if ('\0' == (hex[1] = *(p + 2)))
310 				return 0;
311 			if (1 != sscanf(hex, "%x", &c))
312 				return 0;
313 			if ('\0' == c)
314 				return 0;
315 
316 			*q = (char)c;
317 			p += 2;
318 		} else
319 			*q = '+' == *p ? ' ' : *p;
320 	}
321 
322 	*q = '\0';
323 	return 1;
324 }
325 
326 static void
http_encode(const char * p)327 http_encode(const char *p)
328 {
329 	for (; *p != '\0'; p++) {
330 		if (isalnum((unsigned char)*p) == 0 &&
331 		    strchr("-._~", *p) == NULL)
332 			printf("%%%2.2X", (unsigned char)*p);
333 		else
334 			putchar(*p);
335 	}
336 }
337 
338 static void
resp_begin_http(int code,const char * msg)339 resp_begin_http(int code, const char *msg)
340 {
341 
342 	if (200 != code)
343 		printf("Status: %d %s\r\n", code, msg);
344 
345 	printf("Content-Type: text/html; charset=utf-8\r\n"
346 	     "Cache-Control: no-cache\r\n"
347 	     "Content-Security-Policy: default-src 'none'; "
348 	     "style-src 'self' 'unsafe-inline'\r\n"
349 	     "Pragma: no-cache\r\n"
350 	     "\r\n");
351 
352 	fflush(stdout);
353 }
354 
355 static void
resp_copy(const char * filename)356 resp_copy(const char *filename)
357 {
358 	char	 buf[4096];
359 	ssize_t	 sz;
360 	int	 fd;
361 
362 	if ((fd = open(filename, O_RDONLY)) != -1) {
363 		fflush(stdout);
364 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
365 			write(STDOUT_FILENO, buf, sz);
366 		close(fd);
367 	}
368 }
369 
370 static void
resp_begin_html(int code,const char * msg,const char * file)371 resp_begin_html(int code, const char *msg, const char *file)
372 {
373 	const char	*name, *sec, *cp;
374 	int		 namesz, secsz;
375 
376 	resp_begin_http(code, msg);
377 
378 	printf("<!DOCTYPE html>\n"
379 	       "<html>\n"
380 	       "<head>\n"
381 	       "  <meta charset=\"UTF-8\"/>\n"
382 	       "  <meta name=\"viewport\""
383 		      " content=\"width=device-width, initial-scale=1.0\">\n"
384 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
385 	       " type=\"text/css\" media=\"all\">\n"
386 	       "  <title>",
387 	       CSS_DIR);
388 	if (file != NULL) {
389 		cp = strrchr(file, '/');
390 		name = cp == NULL ? file : cp + 1;
391 		cp = strrchr(name, '.');
392 		namesz = cp == NULL ? strlen(name) : cp - name;
393 		sec = NULL;
394 		if (cp != NULL && cp[1] != '0') {
395 			sec = cp + 1;
396 			secsz = strlen(sec);
397 		} else if (name - file > 1) {
398 			for (cp = name - 2; cp >= file; cp--) {
399 				if (*cp < '1' || *cp > '9')
400 					continue;
401 				sec = cp;
402 				secsz = name - cp - 1;
403 				break;
404 			}
405 		}
406 		printf("%.*s", namesz, name);
407 		if (sec != NULL)
408 			printf("(%.*s)", secsz, sec);
409 		fputs(" - ", stdout);
410 	}
411 	printf("%s</title>\n"
412 	       "</head>\n"
413 	       "<body>\n",
414 	       CUSTOMIZE_TITLE);
415 
416 	resp_copy(MAN_DIR "/header.html");
417 }
418 
419 static void
resp_end_html(void)420 resp_end_html(void)
421 {
422 
423 	resp_copy(MAN_DIR "/footer.html");
424 
425 	puts("</body>\n"
426 	     "</html>");
427 }
428 
429 static void
resp_searchform(const struct req * req,enum focus focus)430 resp_searchform(const struct req *req, enum focus focus)
431 {
432 	int		 i;
433 
434 	printf("<form action=\"/%s\" method=\"get\" "
435 	       "autocomplete=\"off\" autocapitalize=\"none\">\n"
436 	       "  <fieldset>\n"
437 	       "    <legend>Manual Page Search Parameters</legend>\n",
438 	       scriptname);
439 
440 	/* Write query input box. */
441 
442 	printf("    <input type=\"search\" name=\"query\" value=\"");
443 	if (req->q.query != NULL)
444 		html_print(req->q.query);
445 	printf( "\" size=\"40\"");
446 	if (focus == FOCUS_QUERY)
447 		printf(" autofocus");
448 	puts(">");
449 
450 	/* Write submission buttons. */
451 
452 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
453 		"man</button>\n"
454 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
455 		"apropos</button>\n"
456 		"    <br/>\n");
457 
458 	/* Write section selector. */
459 
460 	puts("    <select name=\"sec\">");
461 	for (i = 0; i < sec_MAX; i++) {
462 		printf("      <option value=\"%s\"", sec_numbers[i]);
463 		if (NULL != req->q.sec &&
464 		    0 == strcmp(sec_numbers[i], req->q.sec))
465 			printf(" selected=\"selected\"");
466 		printf(">%s</option>\n", sec_names[i]);
467 	}
468 	puts("    </select>");
469 
470 	/* Write architecture selector. */
471 
472 	printf(	"    <select name=\"arch\">\n"
473 		"      <option value=\"default\"");
474 	if (NULL == req->q.arch)
475 		printf(" selected=\"selected\"");
476 	puts(">All Architectures</option>");
477 	for (i = 0; i < arch_MAX; i++) {
478 		printf("      <option");
479 		if (NULL != req->q.arch &&
480 		    0 == strcmp(arch_names[i], req->q.arch))
481 			printf(" selected=\"selected\"");
482 		printf(">%s</option>\n", arch_names[i]);
483 	}
484 	puts("    </select>");
485 
486 	/* Write manpath selector. */
487 
488 	if (req->psz > 1) {
489 		puts("    <select name=\"manpath\">");
490 		for (i = 0; i < (int)req->psz; i++) {
491 			printf("      <option");
492 			if (strcmp(req->q.manpath, req->p[i]) == 0)
493 				printf(" selected=\"selected\"");
494 			printf(">");
495 			html_print(req->p[i]);
496 			puts("</option>");
497 		}
498 		puts("    </select>");
499 	}
500 
501 	puts("  </fieldset>\n"
502 	     "</form>");
503 }
504 
505 static int
validate_urifrag(const char * frag)506 validate_urifrag(const char *frag)
507 {
508 
509 	while ('\0' != *frag) {
510 		if ( ! (isalnum((unsigned char)*frag) ||
511 		    '-' == *frag || '.' == *frag ||
512 		    '/' == *frag || '_' == *frag))
513 			return 0;
514 		frag++;
515 	}
516 	return 1;
517 }
518 
519 static int
validate_manpath(const struct req * req,const char * manpath)520 validate_manpath(const struct req *req, const char* manpath)
521 {
522 	size_t	 i;
523 
524 	for (i = 0; i < req->psz; i++)
525 		if ( ! strcmp(manpath, req->p[i]))
526 			return 1;
527 
528 	return 0;
529 }
530 
531 static int
validate_arch(const char * arch)532 validate_arch(const char *arch)
533 {
534 	int	 i;
535 
536 	for (i = 0; i < arch_MAX; i++)
537 		if (strcmp(arch, arch_names[i]) == 0)
538 			return 1;
539 
540 	return 0;
541 }
542 
543 static int
validate_filename(const char * file)544 validate_filename(const char *file)
545 {
546 
547 	if ('.' == file[0] && '/' == file[1])
548 		file += 2;
549 
550 	return ! (strstr(file, "../") || strstr(file, "/..") ||
551 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
552 }
553 
554 static void
pg_index(const struct req * req)555 pg_index(const struct req *req)
556 {
557 
558 	resp_begin_html(200, NULL, NULL);
559 	resp_searchform(req, FOCUS_QUERY);
560 	printf("<p>\n"
561 	       "This web interface is documented in the\n"
562 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
563 	       "manual, and the\n"
564 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
565 	       "manual explains the query syntax.\n"
566 	       "</p>\n",
567 	       scriptname, *scriptname == '\0' ? "" : "/",
568 	       scriptname, *scriptname == '\0' ? "" : "/");
569 	resp_end_html();
570 }
571 
572 static void
pg_noresult(const struct req * req,int code,const char * http_msg,const char * user_msg)573 pg_noresult(const struct req *req, int code, const char *http_msg,
574     const char *user_msg)
575 {
576 	resp_begin_html(code, http_msg, NULL);
577 	resp_searchform(req, FOCUS_QUERY);
578 	puts("<p>");
579 	puts(user_msg);
580 	puts("</p>");
581 	resp_end_html();
582 }
583 
584 static void
pg_error_badrequest(const char * msg)585 pg_error_badrequest(const char *msg)
586 {
587 
588 	resp_begin_html(400, "Bad Request", NULL);
589 	puts("<h1>Bad Request</h1>\n"
590 	     "<p>\n");
591 	puts(msg);
592 	printf("Try again from the\n"
593 	       "<a href=\"/%s\">main page</a>.\n"
594 	       "</p>", scriptname);
595 	resp_end_html();
596 }
597 
598 static void
pg_error_internal(void)599 pg_error_internal(void)
600 {
601 	resp_begin_html(500, "Internal Server Error", NULL);
602 	puts("<p>Internal Server Error</p>");
603 	resp_end_html();
604 }
605 
606 static void
pg_redirect(const struct req * req,const char * name)607 pg_redirect(const struct req *req, const char *name)
608 {
609 	printf("Status: 303 See Other\r\n"
610 	    "Location: /");
611 	if (*scriptname != '\0')
612 		printf("%s/", scriptname);
613 	if (strcmp(req->q.manpath, req->p[0]))
614 		printf("%s/", req->q.manpath);
615 	if (req->q.arch != NULL)
616 		printf("%s/", req->q.arch);
617 	http_encode(name);
618 	if (req->q.sec != NULL) {
619 		putchar('.');
620 		http_encode(req->q.sec);
621 	}
622 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
623 }
624 
625 static void
pg_searchres(const struct req * req,struct manpage * r,size_t sz)626 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
627 {
628 	char		*arch, *archend;
629 	const char	*sec;
630 	size_t		 i, iuse;
631 	int		 archprio, archpriouse;
632 	int		 prio, priouse;
633 
634 	for (i = 0; i < sz; i++) {
635 		if (validate_filename(r[i].file))
636 			continue;
637 		warnx("invalid filename %s in %s database",
638 		    r[i].file, req->q.manpath);
639 		pg_error_internal();
640 		return;
641 	}
642 
643 	if (req->isquery && sz == 1) {
644 		/*
645 		 * If we have just one result, then jump there now
646 		 * without any delay.
647 		 */
648 		printf("Status: 303 See Other\r\n"
649 		    "Location: /");
650 		if (*scriptname != '\0')
651 			printf("%s/", scriptname);
652 		if (strcmp(req->q.manpath, req->p[0]))
653 			printf("%s/", req->q.manpath);
654 		printf("%s\r\n"
655 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
656 		    r[0].file);
657 		return;
658 	}
659 
660 	/*
661 	 * In man(1) mode, show one of the pages
662 	 * even if more than one is found.
663 	 */
664 
665 	iuse = 0;
666 	if (req->q.equal || sz == 1) {
667 		priouse = 20;
668 		archpriouse = 3;
669 		for (i = 0; i < sz; i++) {
670 			sec = r[i].file;
671 			sec += strcspn(sec, "123456789");
672 			if (sec[0] == '\0')
673 				continue;
674 			prio = sec_prios[sec[0] - '1'];
675 			if (sec[1] != '/')
676 				prio += 10;
677 			if (req->q.arch == NULL) {
678 				archprio =
679 				    ((arch = strchr(sec + 1, '/'))
680 					== NULL) ? 3 :
681 				    ((archend = strchr(arch + 1, '/'))
682 					== NULL) ? 0 :
683 				    strncmp(arch, "amd64/",
684 					archend - arch) ? 2 : 1;
685 				if (archprio < archpriouse) {
686 					archpriouse = archprio;
687 					priouse = prio;
688 					iuse = i;
689 					continue;
690 				}
691 				if (archprio > archpriouse)
692 					continue;
693 			}
694 			if (prio >= priouse)
695 				continue;
696 			priouse = prio;
697 			iuse = i;
698 		}
699 		resp_begin_html(200, NULL, r[iuse].file);
700 	} else
701 		resp_begin_html(200, NULL, NULL);
702 
703 	resp_searchform(req,
704 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
705 
706 	if (sz > 1) {
707 		puts("<table class=\"results\">");
708 		for (i = 0; i < sz; i++) {
709 			printf("  <tr>\n"
710 			       "    <td>"
711 			       "<a class=\"Xr\" href=\"/");
712 			if (*scriptname != '\0')
713 				printf("%s/", scriptname);
714 			if (strcmp(req->q.manpath, req->p[0]))
715 				printf("%s/", req->q.manpath);
716 			printf("%s\">", r[i].file);
717 			html_print(r[i].names);
718 			printf("</a></td>\n"
719 			       "    <td><span class=\"Nd\">");
720 			html_print(r[i].output);
721 			puts("</span></td>\n"
722 			     "  </tr>");
723 		}
724 		puts("</table>");
725 	}
726 
727 	if (req->q.equal || sz == 1) {
728 		puts("<hr>");
729 		resp_show(req, r[iuse].file);
730 	}
731 
732 	resp_end_html();
733 }
734 
735 static void
resp_catman(const struct req * req,const char * file)736 resp_catman(const struct req *req, const char *file)
737 {
738 	FILE		*f;
739 	char		*p;
740 	size_t		 sz;
741 	ssize_t		 len;
742 	int		 i;
743 	int		 italic, bold;
744 
745 	if ((f = fopen(file, "r")) == NULL) {
746 		puts("<p>You specified an invalid manual file.</p>");
747 		return;
748 	}
749 
750 	puts("<div class=\"catman\">\n"
751 	     "<pre>");
752 
753 	p = NULL;
754 	sz = 0;
755 
756 	while ((len = getline(&p, &sz, f)) != -1) {
757 		bold = italic = 0;
758 		for (i = 0; i < len - 1; i++) {
759 			/*
760 			 * This means that the catpage is out of state.
761 			 * Ignore it and keep going (although the
762 			 * catpage is bogus).
763 			 */
764 
765 			if ('\b' == p[i] || '\n' == p[i])
766 				continue;
767 
768 			/*
769 			 * Print a regular character.
770 			 * Close out any bold/italic scopes.
771 			 * If we're in back-space mode, make sure we'll
772 			 * have something to enter when we backspace.
773 			 */
774 
775 			if ('\b' != p[i + 1]) {
776 				if (italic)
777 					printf("</i>");
778 				if (bold)
779 					printf("</b>");
780 				italic = bold = 0;
781 				html_putchar(p[i]);
782 				continue;
783 			} else if (i + 2 >= len)
784 				continue;
785 
786 			/* Italic mode. */
787 
788 			if ('_' == p[i]) {
789 				if (bold)
790 					printf("</b>");
791 				if ( ! italic)
792 					printf("<i>");
793 				bold = 0;
794 				italic = 1;
795 				i += 2;
796 				html_putchar(p[i]);
797 				continue;
798 			}
799 
800 			/*
801 			 * Handle funny behaviour troff-isms.
802 			 * These grok'd from the original man2html.c.
803 			 */
804 
805 			if (('+' == p[i] && 'o' == p[i + 2]) ||
806 					('o' == p[i] && '+' == p[i + 2]) ||
807 					('|' == p[i] && '=' == p[i + 2]) ||
808 					('=' == p[i] && '|' == p[i + 2]) ||
809 					('*' == p[i] && '=' == p[i + 2]) ||
810 					('=' == p[i] && '*' == p[i + 2]) ||
811 					('*' == p[i] && '|' == p[i + 2]) ||
812 					('|' == p[i] && '*' == p[i + 2]))  {
813 				if (italic)
814 					printf("</i>");
815 				if (bold)
816 					printf("</b>");
817 				italic = bold = 0;
818 				putchar('*');
819 				i += 2;
820 				continue;
821 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
822 					('-' == p[i] && '|' == p[i + 1]) ||
823 					('+' == p[i] && '-' == p[i + 1]) ||
824 					('-' == p[i] && '+' == p[i + 1]) ||
825 					('+' == p[i] && '|' == p[i + 1]) ||
826 					('|' == p[i] && '+' == p[i + 1]))  {
827 				if (italic)
828 					printf("</i>");
829 				if (bold)
830 					printf("</b>");
831 				italic = bold = 0;
832 				putchar('+');
833 				i += 2;
834 				continue;
835 			}
836 
837 			/* Bold mode. */
838 
839 			if (italic)
840 				printf("</i>");
841 			if ( ! bold)
842 				printf("<b>");
843 			bold = 1;
844 			italic = 0;
845 			i += 2;
846 			html_putchar(p[i]);
847 		}
848 
849 		/*
850 		 * Clean up the last character.
851 		 * We can get to a newline; don't print that.
852 		 */
853 
854 		if (italic)
855 			printf("</i>");
856 		if (bold)
857 			printf("</b>");
858 
859 		if (i == len - 1 && p[i] != '\n')
860 			html_putchar(p[i]);
861 
862 		putchar('\n');
863 	}
864 	free(p);
865 
866 	puts("</pre>\n"
867 	     "</div>");
868 
869 	fclose(f);
870 }
871 
872 static void
resp_format(const struct req * req,const char * file)873 resp_format(const struct req *req, const char *file)
874 {
875 	struct manoutput conf;
876 	struct mparse	*mp;
877 	struct roff_meta *meta;
878 	void		*vp;
879 	int		 fd;
880 	int		 usepath;
881 
882 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
883 		puts("<p>You specified an invalid manual file.</p>");
884 		return;
885 	}
886 
887 	mchars_alloc();
888 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
889 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
890 	mparse_readfd(mp, fd, file);
891 	close(fd);
892 	meta = mparse_result(mp);
893 
894 	memset(&conf, 0, sizeof(conf));
895 	conf.fragment = 1;
896 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
897 	usepath = strcmp(req->q.manpath, req->p[0]);
898 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
899 	    scriptname, *scriptname == '\0' ? "" : "/",
900 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
901 
902 	vp = html_alloc(&conf);
903 	if (meta->macroset == MACROSET_MDOC)
904 		html_mdoc(vp, meta);
905 	else
906 		html_man(vp, meta);
907 
908 	html_free(vp);
909 	mparse_free(mp);
910 	mchars_free();
911 	free(conf.man);
912 	free(conf.style);
913 }
914 
915 static void
resp_show(const struct req * req,const char * file)916 resp_show(const struct req *req, const char *file)
917 {
918 
919 	if ('.' == file[0] && '/' == file[1])
920 		file += 2;
921 
922 	if ('c' == *file)
923 		resp_catman(req, file);
924 	else
925 		resp_format(req, file);
926 }
927 
928 static void
pg_show(struct req * req,const char * fullpath)929 pg_show(struct req *req, const char *fullpath)
930 {
931 	char		*manpath;
932 	const char	*file;
933 
934 	if ((file = strchr(fullpath, '/')) == NULL) {
935 		pg_error_badrequest(
936 		    "You did not specify a page to show.");
937 		return;
938 	}
939 	manpath = mandoc_strndup(fullpath, file - fullpath);
940 	file++;
941 
942 	if ( ! validate_manpath(req, manpath)) {
943 		pg_error_badrequest(
944 		    "You specified an invalid manpath.");
945 		free(manpath);
946 		return;
947 	}
948 
949 	/*
950 	 * Begin by chdir()ing into the manpath.
951 	 * This way we can pick up the database files, which are
952 	 * relative to the manpath root.
953 	 */
954 
955 	if (chdir(manpath) == -1) {
956 		warn("chdir %s", manpath);
957 		pg_error_internal();
958 		free(manpath);
959 		return;
960 	}
961 	free(manpath);
962 
963 	if ( ! validate_filename(file)) {
964 		pg_error_badrequest(
965 		    "You specified an invalid manual file.");
966 		return;
967 	}
968 
969 	resp_begin_html(200, NULL, file);
970 	resp_searchform(req, FOCUS_NONE);
971 	resp_show(req, file);
972 	resp_end_html();
973 }
974 
975 static void
pg_search(const struct req * req)976 pg_search(const struct req *req)
977 {
978 	struct mansearch	  search;
979 	struct manpaths		  paths;
980 	struct manpage		 *res;
981 	char			**argv;
982 	char			 *query, *rp, *wp;
983 	size_t			  ressz;
984 	int			  argc;
985 
986 	/*
987 	 * Begin by chdir()ing into the root of the manpath.
988 	 * This way we can pick up the database files, which are
989 	 * relative to the manpath root.
990 	 */
991 
992 	if (chdir(req->q.manpath) == -1) {
993 		warn("chdir %s", req->q.manpath);
994 		pg_error_internal();
995 		return;
996 	}
997 
998 	search.arch = req->q.arch;
999 	search.sec = req->q.sec;
1000 	search.outkey = "Nd";
1001 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1002 	search.firstmatch = 1;
1003 
1004 	paths.sz = 1;
1005 	paths.paths = mandoc_malloc(sizeof(char *));
1006 	paths.paths[0] = mandoc_strdup(".");
1007 
1008 	/*
1009 	 * Break apart at spaces with backslash-escaping.
1010 	 */
1011 
1012 	argc = 0;
1013 	argv = NULL;
1014 	rp = query = mandoc_strdup(req->q.query);
1015 	for (;;) {
1016 		while (isspace((unsigned char)*rp))
1017 			rp++;
1018 		if (*rp == '\0')
1019 			break;
1020 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1021 		argv[argc++] = wp = rp;
1022 		for (;;) {
1023 			if (isspace((unsigned char)*rp)) {
1024 				*wp = '\0';
1025 				rp++;
1026 				break;
1027 			}
1028 			if (rp[0] == '\\' && rp[1] != '\0')
1029 				rp++;
1030 			if (wp != rp)
1031 				*wp = *rp;
1032 			if (*rp == '\0')
1033 				break;
1034 			wp++;
1035 			rp++;
1036 		}
1037 	}
1038 
1039 	res = NULL;
1040 	ressz = 0;
1041 	if (req->isquery && req->q.equal && argc == 1)
1042 		pg_redirect(req, argv[0]);
1043 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1044 		pg_noresult(req, 400, "Bad Request",
1045 		    "You entered an invalid query.");
1046 	else if (ressz == 0)
1047 		pg_noresult(req, 404, "Not Found", "No results found.");
1048 	else
1049 		pg_searchres(req, res, ressz);
1050 
1051 	free(query);
1052 	mansearch_free(res, ressz);
1053 	free(paths.paths[0]);
1054 	free(paths.paths);
1055 }
1056 
1057 int
main(void)1058 main(void)
1059 {
1060 	struct req	 req;
1061 	struct itimerval itimer;
1062 	const char	*path;
1063 	const char	*querystring;
1064 	int		 i;
1065 
1066 #if HAVE_PLEDGE
1067 	/*
1068 	 * The "rpath" pledge could be revoked after mparse_readfd()
1069 	 * if the file desciptor to "/footer.html" would be opened
1070 	 * up front, but it's probably not worth the complication
1071 	 * of the code it would cause: it would require scattering
1072 	 * pledge() calls in multiple low-level resp_*() functions.
1073 	 */
1074 
1075 	if (pledge("stdio rpath", NULL) == -1) {
1076 		warn("pledge");
1077 		pg_error_internal();
1078 		return EXIT_FAILURE;
1079 	}
1080 #endif
1081 
1082 	/* Poor man's ReDoS mitigation. */
1083 
1084 	itimer.it_value.tv_sec = 2;
1085 	itimer.it_value.tv_usec = 0;
1086 	itimer.it_interval.tv_sec = 2;
1087 	itimer.it_interval.tv_usec = 0;
1088 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1089 		warn("setitimer");
1090 		pg_error_internal();
1091 		return EXIT_FAILURE;
1092 	}
1093 
1094 	/*
1095 	 * First we change directory into the MAN_DIR so that
1096 	 * subsequent scanning for manpath directories is rooted
1097 	 * relative to the same position.
1098 	 */
1099 
1100 	if (chdir(MAN_DIR) == -1) {
1101 		warn("MAN_DIR: %s", MAN_DIR);
1102 		pg_error_internal();
1103 		return EXIT_FAILURE;
1104 	}
1105 
1106 	memset(&req, 0, sizeof(struct req));
1107 	req.q.equal = 1;
1108 	parse_manpath_conf(&req);
1109 
1110 	/* Parse the path info and the query string. */
1111 
1112 	if ((path = getenv("PATH_INFO")) == NULL)
1113 		path = "";
1114 	else if (*path == '/')
1115 		path++;
1116 
1117 	if (*path != '\0') {
1118 		parse_path_info(&req, path);
1119 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1120 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1121 			path = "";
1122 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1123 		parse_query_string(&req, querystring);
1124 
1125 	/* Validate parsed data and add defaults. */
1126 
1127 	if (req.q.manpath == NULL)
1128 		req.q.manpath = mandoc_strdup(req.p[0]);
1129 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1130 		pg_error_badrequest(
1131 		    "You specified an invalid manpath.");
1132 		return EXIT_FAILURE;
1133 	}
1134 
1135 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1136 		pg_error_badrequest(
1137 		    "You specified an invalid architecture.");
1138 		return EXIT_FAILURE;
1139 	}
1140 
1141 	/* Dispatch to the three different pages. */
1142 
1143 	if ('\0' != *path)
1144 		pg_show(&req, path);
1145 	else if (NULL != req.q.query)
1146 		pg_search(&req);
1147 	else
1148 		pg_index(&req);
1149 
1150 	free(req.q.manpath);
1151 	free(req.q.arch);
1152 	free(req.q.sec);
1153 	free(req.q.query);
1154 	for (i = 0; i < (int)req.psz; i++)
1155 		free(req.p[i]);
1156 	free(req.p);
1157 	return EXIT_SUCCESS;
1158 }
1159 
1160 /*
1161  * Translate PATH_INFO to a query.
1162  */
1163 static void
parse_path_info(struct req * req,const char * path)1164 parse_path_info(struct req *req, const char *path)
1165 {
1166 	const char	*name, *sec, *end;
1167 
1168 	req->isquery = 0;
1169 	req->q.equal = 1;
1170 	req->q.manpath = NULL;
1171 	req->q.arch = NULL;
1172 
1173 	/* Mandatory manual page name. */
1174 	if ((name = strrchr(path, '/')) == NULL)
1175 		name = path;
1176 	else
1177 		name++;
1178 
1179 	/* Optional trailing section. */
1180 	sec = strrchr(name, '.');
1181 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1182 		req->q.query = mandoc_strndup(name, sec - name - 1);
1183 		req->q.sec = mandoc_strdup(sec);
1184 	} else {
1185 		req->q.query = mandoc_strdup(name);
1186 		req->q.sec = NULL;
1187 	}
1188 
1189 	/* Handle the case of name[.section] only. */
1190 	if (name == path)
1191 		return;
1192 
1193 	/* Optional manpath. */
1194 	end = strchr(path, '/');
1195 	req->q.manpath = mandoc_strndup(path, end - path);
1196 	if (validate_manpath(req, req->q.manpath)) {
1197 		path = end + 1;
1198 		if (name == path)
1199 			return;
1200 	} else {
1201 		free(req->q.manpath);
1202 		req->q.manpath = NULL;
1203 	}
1204 
1205 	/* Optional section. */
1206 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1207 		path += 3;
1208 		end = strchr(path, '/');
1209 		free(req->q.sec);
1210 		req->q.sec = mandoc_strndup(path, end - path);
1211 		path = end + 1;
1212 		if (name == path)
1213 			return;
1214 	}
1215 
1216 	/* Optional architecture. */
1217 	end = strchr(path, '/');
1218 	if (end + 1 != name) {
1219 		pg_error_badrequest(
1220 		    "You specified too many directory components.");
1221 		exit(EXIT_FAILURE);
1222 	}
1223 	req->q.arch = mandoc_strndup(path, end - path);
1224 	if (validate_arch(req->q.arch) == 0) {
1225 		pg_error_badrequest(
1226 		    "You specified an invalid directory component.");
1227 		exit(EXIT_FAILURE);
1228 	}
1229 }
1230 
1231 /*
1232  * Scan for indexable paths.
1233  */
1234 static void
parse_manpath_conf(struct req * req)1235 parse_manpath_conf(struct req *req)
1236 {
1237 	FILE	*fp;
1238 	char	*dp;
1239 	size_t	 dpsz;
1240 	ssize_t	 len;
1241 
1242 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1243 		warn("%s/manpath.conf", MAN_DIR);
1244 		pg_error_internal();
1245 		exit(EXIT_FAILURE);
1246 	}
1247 
1248 	dp = NULL;
1249 	dpsz = 0;
1250 
1251 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1252 		if (dp[len - 1] == '\n')
1253 			dp[--len] = '\0';
1254 		req->p = mandoc_realloc(req->p,
1255 		    (req->psz + 1) * sizeof(char *));
1256 		if ( ! validate_urifrag(dp)) {
1257 			warnx("%s/manpath.conf contains "
1258 			    "unsafe path \"%s\"", MAN_DIR, dp);
1259 			pg_error_internal();
1260 			exit(EXIT_FAILURE);
1261 		}
1262 		if (strchr(dp, '/') != NULL) {
1263 			warnx("%s/manpath.conf contains "
1264 			    "path with slash \"%s\"", MAN_DIR, dp);
1265 			pg_error_internal();
1266 			exit(EXIT_FAILURE);
1267 		}
1268 		req->p[req->psz++] = dp;
1269 		dp = NULL;
1270 		dpsz = 0;
1271 	}
1272 	free(dp);
1273 
1274 	if (req->p == NULL) {
1275 		warnx("%s/manpath.conf is empty", MAN_DIR);
1276 		pg_error_internal();
1277 		exit(EXIT_FAILURE);
1278 	}
1279 }
1280