1 /* Copyright (c) 2013-2018 Dovecot authors, see the included COPYING file */
2 
3 #include "lib.h"
4 #include "str.h"
5 #include "strfuncs.h"
6 #include "net.h"
7 #include "uri-util.h"
8 
9 #include "http-url.h"
10 #include "http-request.h"
11 
12 /*
13  * HTTP URL parser
14  */
15 
16 struct http_url_parser {
17 	struct uri_parser parser;
18 
19 	enum http_url_parse_flags flags;
20 
21 	struct http_url *url;
22 	struct http_url *base;
23 
24 	enum http_request_target_format req_format;
25 
26  	bool relative:1;
27 	bool request_target:1;
28 };
29 
30 static bool http_url_parse_authority_form(struct http_url_parser *url_parser);
31 
32 static bool
http_url_parse_scheme(struct http_url_parser * url_parser,const char ** scheme_r)33 http_url_parse_scheme(struct http_url_parser *url_parser, const char **scheme_r)
34 {
35 	struct uri_parser *parser = &url_parser->parser;
36 
37 	*scheme_r = NULL;
38 	if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) != 0)
39 		return TRUE;
40 
41 	if (uri_parse_scheme(parser, scheme_r) <= 0) {
42 		parser->cur = parser->begin;
43 		return FALSE;
44 	}
45 
46 	return TRUE;
47 }
48 
http_url_parse_unknown_scheme(struct http_url_parser * url_parser)49 static bool http_url_parse_unknown_scheme(struct http_url_parser *url_parser)
50 {
51 	struct uri_parser *parser = &url_parser->parser;
52 
53 	if (url_parser->request_target) {
54 		/* Valid as non-HTTP scheme, but also try to parse as authority
55 		 */
56 		parser->cur = parser->begin;
57 		if (!http_url_parse_authority_form(url_parser)) {
58 			/* indicate non-http-url */
59 			url_parser->url = NULL;
60 			url_parser->req_format =
61 				HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
62 		}
63 		return TRUE;
64 	}
65 	parser->error = "Not an HTTP URL";
66 	return FALSE;
67 }
68 
69 static bool
http_url_parse_userinfo(struct http_url_parser * url_parser,struct uri_authority * auth,const char ** user_r,const char ** password_r)70 http_url_parse_userinfo(struct http_url_parser *url_parser,
71 			struct uri_authority *auth,
72 			const char **user_r, const char **password_r)
73 {
74 	struct uri_parser *parser = &url_parser->parser;
75 	const char *p;
76 
77 	*user_r = *password_r = NULL;
78 
79 	if (auth->enc_userinfo == NULL)
80 		return TRUE;
81 
82 	if ((url_parser->flags & HTTP_URL_ALLOW_USERINFO_PART) == 0) {
83 		/* RFC 7230, Section 2.7.1: http URI Scheme
84 
85 		   A sender MUST NOT generate the userinfo subcomponent (and its
86 		   "@" delimiter) when an "http" URI reference is generated
87 		   within a message as a request target or header field value.
88 		   Before making use of an "http" URI reference received from an
89 		   untrusted source, a recipient SHOULD parse for userinfo and
90 		   treat its presence as an error; it is likely being used to
91 		   obscure the authority for the sake of phishing attacks.
92 		 */
93 		parser->error = "HTTP URL does not allow `userinfo@' part";
94 		return FALSE;
95 	}
96 
97 	p = strchr(auth->enc_userinfo, ':');
98 	if (p == NULL) {
99 		if (!uri_data_decode(parser, auth->enc_userinfo, NULL, user_r))
100 			return FALSE;
101 	} else {
102 		if (!uri_data_decode(parser, auth->enc_userinfo, p, user_r))
103 			return FALSE;
104 		if (!uri_data_decode(parser, p + 1, NULL, password_r))
105 			return FALSE;
106 	}
107 	return TRUE;
108 }
109 
http_url_parse_authority(struct http_url_parser * url_parser)110 static bool http_url_parse_authority(struct http_url_parser *url_parser)
111 {
112 	struct uri_parser *parser = &url_parser->parser;
113 	struct http_url *url = url_parser->url;
114 	struct uri_authority auth;
115 	const char *user = NULL, *password = NULL;
116 	int ret;
117 
118 	if ((ret = uri_parse_host_authority(parser, &auth)) < 0)
119 		return FALSE;
120 	if (auth.host.name == NULL || *auth.host.name == '\0') {
121 		/* RFC 7230, Section 2.7.1: http URI Scheme
122 
123 		   A sender MUST NOT generate an "http" URI with an empty host
124 		   identifier.  A recipient that processes such a URI reference
125 		   MUST reject it as invalid.
126 		 */
127 		parser->error = "HTTP URL does not allow empty host identifier";
128 		return FALSE;
129 	}
130 	if (ret > 0) {
131 		if (!http_url_parse_userinfo(url_parser, &auth,
132 					     &user, &password))
133 			return FALSE;
134 	}
135 	if (url != NULL) {
136 		uri_host_copy(parser->pool, &url->host, &auth.host);
137 		url->port = auth.port;
138 		url->user = p_strdup(parser->pool, user);
139 		url->password = p_strdup(parser->pool, password);
140 	}
141 	return TRUE;
142 }
143 
http_url_parse_authority_form(struct http_url_parser * url_parser)144 static bool http_url_parse_authority_form(struct http_url_parser *url_parser)
145 {
146 	struct uri_parser *parser = &url_parser->parser;
147 
148 	if (!http_url_parse_authority(url_parser))
149 		return FALSE;
150 	if (parser->cur != parser->end)
151 		return FALSE;
152 	url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_AUTHORITY;
153 	return TRUE;
154 }
155 
156 static int
http_url_parse_path(struct http_url_parser * url_parser)157 http_url_parse_path(struct http_url_parser *url_parser)
158 {
159 	struct uri_parser *parser = &url_parser->parser;
160 	struct http_url *url = url_parser->url, *base = url_parser->base;
161 	const char *const *path;
162 	int path_relative;
163 	string_t *fullpath = NULL;
164 	int ret;
165 
166 	/* path-abempty / path-absolute / path-noscheme / path-empty */
167 	if ((ret = uri_parse_path(parser, &path_relative, &path)) < 0)
168 		return -1;
169 
170 	/* Resolve path */
171 	if (ret == 0) {
172 		if (url_parser->relative && url != NULL)
173 			url->path = p_strdup(parser->pool, base->path);
174 		return 0;
175 	}
176 
177 	if (url != NULL)
178 		fullpath = t_str_new(256);
179 
180 	if (url_parser->relative && path_relative > 0 && base->path != NULL) {
181 		const char *pbegin = base->path;
182 		const char *pend = base->path + strlen(base->path);
183 		const char *p = pend - 1;
184 
185 		i_assert(*pbegin == '/');
186 
187 		/* Discard trailing segments of base path based on how many
188 		   effective leading '..' segments were found in the relative
189 		   path.
190 		 */
191 		while (path_relative > 0 && p > pbegin) {
192 			while (p > pbegin && *p != '/') p--;
193 			if (p >= pbegin) {
194 				pend = p;
195 				path_relative--;
196 			}
197 			if (p > pbegin) p--;
198 		}
199 
200 		if (url != NULL && pend > pbegin)
201 			str_append_data(fullpath, pbegin, pend - pbegin);
202 	}
203 
204 	/* Append relative path */
205 	while (*path != NULL) {
206 		const char *part;
207 
208 		if (!uri_data_decode(parser, *path, NULL, &part))
209 			return -1;
210 
211 		if (url != NULL) {
212 			str_append_c(fullpath, '/');
213 			str_append(fullpath, part);
214 		}
215 		path++;
216 	}
217 
218 	if (url != NULL)
219 		url->path = p_strdup(parser->pool, str_c(fullpath));
220 	return 1;
221 }
222 
223 static bool
http_url_parse_query(struct http_url_parser * url_parser,bool have_path)224 http_url_parse_query(struct http_url_parser *url_parser, bool have_path)
225 {
226 	struct uri_parser *parser = &url_parser->parser;
227 	struct http_url *url = url_parser->url, *base = url_parser->base;
228 	const char *query;
229 	int ret;
230 
231 	if ((ret = uri_parse_query(parser, &query)) < 0)
232 		return FALSE;
233 	if (url == NULL)
234 		return TRUE;
235 
236 	if (ret > 0)
237 		url->enc_query = p_strdup(parser->pool, query);
238 	else if (url_parser->relative && !have_path)
239 		url->enc_query = p_strdup(parser->pool, base->enc_query);
240 	return TRUE;
241 }
242 
243 static bool
http_url_parse_fragment(struct http_url_parser * url_parser,bool have_path)244 http_url_parse_fragment(struct http_url_parser *url_parser, bool have_path)
245 {
246 	struct uri_parser *parser = &url_parser->parser;
247 	struct http_url *url = url_parser->url, *base = url_parser->base;
248 	const char *fragment;
249 	int ret;
250 
251 	if ((ret = uri_parse_fragment(parser, &fragment)) < 0)
252 		return FALSE;
253 	if (ret > 0 &&
254 	    (url_parser->flags & HTTP_URL_ALLOW_FRAGMENT_PART) == 0) {
255 		parser->error =
256 			"URL fragment not allowed for HTTP URL in this context";
257 		return FALSE;
258 	}
259 	if (url == NULL)
260 		return TRUE;
261 
262 	if (ret > 0)
263 		url->enc_fragment =  p_strdup(parser->pool, fragment);
264 	else if (url_parser->relative && !have_path)
265 		url->enc_fragment = p_strdup(parser->pool, base->enc_fragment);
266 	return TRUE;
267 }
268 
http_url_do_parse(struct http_url_parser * url_parser)269 static bool http_url_do_parse(struct http_url_parser *url_parser)
270 {
271 	struct uri_parser *parser = &url_parser->parser;
272 	struct http_url *url = url_parser->url, *base = url_parser->base;
273 	bool relative = TRUE, have_scheme = FALSE, have_authority = FALSE,
274 		have_path = FALSE;
275 	const char *scheme;
276 	int ret;
277 
278 	/* RFC 7230, Appendix B:
279 
280 	   http-URI       = "http://" authority path-abempty [ "?" query ]
281 	                    [ "#" fragment ]
282 	   https-URI      = "https://" authority path-abempty [ "?" query ]
283 	                    [ "#" fragment ]
284 	   partial-URI    = relative-part [ "?" query ]
285 
286 	   request-target = origin-form / absolute-form / authority-form /
287 	                    asterisk-form
288 
289 	   origin-form    = absolute-path [ "?" query ]
290 	   absolute-form  = absolute-URI
291 	   authority-form = authority
292 	   asterisk-form  = "*"
293 	                  ; Not parsed here
294 
295 	   absolute-path  = 1*( "/" segment )
296 
297 	   RFC 3986, Appendix A: (implemented in uri-util.h)
298 
299 	   absolute-URI   = scheme ":" hier-part [ "?" query ]
300 
301 	   hier-part      = "//" authority path-abempty
302 	                  / path-absolute
303 	                  / path-rootless
304 	                  / path-empty
305 
306 	   relative-part  = "//" authority path-abempty
307 	                  / path-absolute
308 	                  / path-noscheme
309 	                  / path-empty
310 
311 	   authority     = [ userinfo "@" ] host [ ":" port ]
312 
313 	   path-abempty   = *( "/" segment )
314 	   path-absolute  = "/" [ segment-nz *( "/" segment ) ]
315 	   path-noscheme  = segment-nz-nc *( "/" segment )
316 	   path-rootless  = segment-nz *( "/" segment )
317 	   path-empty     = 0<pchar>
318 
319 	   segment        = *pchar
320 	   segment-nz     = 1*pchar
321 	   segment-nz-nc  = 1*( unreserved / pct-encoded / sub-delims / "@" )
322                     ; non-zero-length segment without any colon ":"
323 
324 	   query          = *( pchar / "/" / "?" )
325 	   fragment       = *( pchar / "/" / "?" )
326 	 */
327 
328 	/* "http:" / "https:" */
329 	if (http_url_parse_scheme(url_parser, &scheme)) {
330 		if (scheme == NULL) {
331 			/* Scheme externally parsed */
332 		} else if (strcasecmp(scheme, "https") == 0) {
333 			if (url != NULL)
334 				url->have_ssl = TRUE;
335 		} else if (strcasecmp(scheme, "http") != 0) {
336 			return http_url_parse_unknown_scheme(url_parser);
337 		}
338 
339 		relative = FALSE;
340 		have_scheme = TRUE;
341 	}
342 
343 	/* "//" authority   ; or
344 	 * ["//"] authority ; when parsing a request target
345 	 */
346 	if (parser->cur < parser->end && parser->cur[0] == '/') {
347 		if ((have_scheme || !url_parser->request_target) &&
348 		    (parser->cur + 1) < parser->end && parser->cur[1] == '/') {
349 			parser->cur += 2;
350 			relative = FALSE;
351 			have_authority = TRUE;
352 		} else {
353 			/* start of absolute-path */
354 		}
355 	} else if (url_parser->request_target && !have_scheme) {
356 		if (!http_url_parse_authority_form(url_parser)) {
357 			/* not non-HTTP scheme and invalid as authority-form */
358 			parser->error = "Request target is invalid";
359 			return FALSE;
360 		}
361 		return TRUE;
362 	}
363 
364 	if (have_scheme && !have_authority) {
365 		parser->error = "Absolute HTTP URL requires `//' after `http:'";
366  		return FALSE;
367 	}
368 
369 	if (have_authority) {
370 		if (!http_url_parse_authority(url_parser))
371 			return FALSE;
372 	}
373 
374 	/* Relative URLs are only valid when we have a base URL */
375 	if (relative) {
376 		if (base == NULL) {
377 			parser->error = "Relative HTTP URL not allowed";
378 			return FALSE;
379 		} else if (!have_authority && url != NULL) {
380 			uri_host_copy(parser->pool, &url->host, &base->host);
381 			url->port = base->port;
382 			url->have_ssl = base->have_ssl;
383 			url->user = p_strdup_empty(parser->pool, base->user);
384 			url->password = p_strdup_empty(parser->pool,
385 						       base->password);
386 		}
387 
388 		url_parser->relative = TRUE;
389 	}
390 
391 	/* path-abempty / path-absolute / path-noscheme / path-empty */
392 	ret = http_url_parse_path(url_parser);
393 	if (ret < 0)
394 		return FALSE;
395 	have_path = (ret > 0);
396 
397 	/* [ "?" query ] */
398 	if (!http_url_parse_query(url_parser, have_path))
399 		return FALSE;
400 
401 	/* [ "#" fragment ] */
402 	if (!http_url_parse_fragment(url_parser, have_path))
403 		return FALSE;
404 
405 	/* must be at end of URL now */
406 	i_assert(parser->cur == parser->end);
407 
408 	if (have_scheme)
409 		url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
410 	return TRUE;
411 }
412 
413 /* Public API */
414 
http_url_parse(const char * url,struct http_url * base,enum http_url_parse_flags flags,pool_t pool,struct http_url ** url_r,const char ** error_r)415 int http_url_parse(const char *url, struct http_url *base,
416 		   enum http_url_parse_flags flags, pool_t pool,
417 		   struct http_url **url_r, const char **error_r)
418 {
419 	struct http_url_parser url_parser;
420 
421 	/* base != NULL indicates whether relative URLs are allowed. However,
422 	   certain flags may also dictate whether relative URLs are
423 	   allowed/required. */
424 	i_assert((flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0 || base == NULL);
425 
426 	i_zero(&url_parser);
427 	uri_parser_init(&url_parser.parser, pool, url);
428 	url_parser.parser.allow_pct_nul = (flags & HTTP_URL_ALLOW_PCT_NUL) != 0;
429 
430 	url_parser.url = p_new(pool, struct http_url, 1);
431 	url_parser.base = base;
432 	url_parser.flags = flags;
433 
434 	if (!http_url_do_parse(&url_parser)) {
435 		*error_r = url_parser.parser.error;
436 		return -1;
437 	}
438 	*url_r = url_parser.url;
439 	return 0;
440 }
441 
http_url_request_target_parse(const char * request_target,const char * host_header,const struct http_url * default_base,pool_t pool,struct http_request_target * target,const char ** error_r)442 int http_url_request_target_parse(const char *request_target,
443 				  const char *host_header,
444 				  const struct http_url *default_base,
445 				  pool_t pool,
446 				  struct http_request_target *target,
447 				  const char **error_r)
448 {
449 	struct http_url_parser url_parser;
450 	struct uri_authority auth;
451 	struct http_url base;
452 
453 	i_zero(&base);
454 	if (host_header != NULL && *host_header != '\0') {
455 		struct uri_parser *parser;
456 
457 		i_zero(&url_parser);
458 		parser = &url_parser.parser;
459 		uri_parser_init(parser, pool, host_header);
460 
461 		if (uri_parse_host_authority(parser, &auth) <= 0) {
462 			*error_r = t_strdup_printf("Invalid Host header: %s",
463 						   parser->error);
464 			return -1;
465 		}
466 
467 		if (parser->cur != parser->end || auth.enc_userinfo != NULL) {
468 			*error_r = "Invalid Host header: "
469 				   "Contains invalid character";
470 			return -1;
471 		}
472 
473 		base.host = auth.host;
474 		base.port = auth.port;
475 	} else if (default_base == NULL) {
476 		*error_r = "Empty Host header";
477 		return -1;
478 	} else {
479 		i_assert(default_base != NULL);
480 		base = *default_base;
481 	}
482 
483 	if (request_target[0] == '*' && request_target[1] == '\0') {
484 		struct http_url *url = p_new(pool, struct http_url, 1);
485 
486 		uri_host_copy(pool, &url->host, &base.host);
487 		url->port = base.port;
488 		target->url = url;
489 		target->format = HTTP_REQUEST_TARGET_FORMAT_ASTERISK;
490 		return 0;
491 	}
492 
493 	i_zero(&url_parser);
494 	uri_parser_init(&url_parser.parser, pool, request_target);
495 
496 	url_parser.url = p_new(pool, struct http_url, 1);
497 	url_parser.request_target = TRUE;
498 	url_parser.req_format = HTTP_REQUEST_TARGET_FORMAT_ORIGIN;
499 	url_parser.base = &base;
500 	url_parser.flags = 0;
501 
502 	if (!http_url_do_parse(&url_parser)) {
503 		*error_r = url_parser.parser.error;
504 		return -1;
505 	}
506 
507 	target->url = url_parser.url;
508 	target->format = url_parser.req_format;
509 	return 0;
510 }
511 
512 /*
513  * HTTP URL manipulation
514  */
515 
http_url_init_authority_from(struct http_url * dest,const struct http_url * src)516 void http_url_init_authority_from(struct http_url *dest,
517 				  const struct http_url *src)
518 {
519 	i_zero(dest);
520 	dest->host = src->host;
521 	dest->port = src->port;
522 	dest->have_ssl = src->have_ssl;
523 }
524 
http_url_copy_authority(pool_t pool,struct http_url * dest,const struct http_url * src)525 void http_url_copy_authority(pool_t pool, struct http_url *dest,
526 			     const struct http_url *src)
527 {
528 	i_zero(dest);
529 	uri_host_copy(pool, &dest->host, &src->host);
530 	dest->port = src->port;
531 	dest->have_ssl = src->have_ssl;
532 }
533 
534 struct http_url *
http_url_clone_authority(pool_t pool,const struct http_url * src)535 http_url_clone_authority(pool_t pool, const struct http_url *src)
536 {
537 	struct http_url *new_url;
538 
539 	new_url = p_new(pool, struct http_url, 1);
540 	http_url_copy_authority(pool, new_url, src);
541 
542 	return new_url;
543 }
544 
http_url_copy(pool_t pool,struct http_url * dest,const struct http_url * src)545 void http_url_copy(pool_t pool, struct http_url *dest,
546 		   const struct http_url *src)
547 {
548 	http_url_copy_authority(pool, dest, src);
549 	dest->path = p_strdup(pool, src->path);
550 	dest->enc_query = p_strdup(pool, src->enc_query);
551 	dest->enc_fragment = p_strdup(pool, src->enc_fragment);
552 }
553 
http_url_copy_with_userinfo(pool_t pool,struct http_url * dest,const struct http_url * src)554 void http_url_copy_with_userinfo(pool_t pool, struct http_url *dest,
555 				 const struct http_url *src)
556 {
557 	http_url_copy(pool, dest, src);
558 	dest->user = p_strdup(pool, src->user);
559 	dest->password = p_strdup(pool, src->password);
560 }
561 
http_url_clone(pool_t pool,const struct http_url * src)562 struct http_url *http_url_clone(pool_t pool, const struct http_url *src)
563 {
564 	struct http_url *new_url;
565 
566 	new_url = p_new(pool, struct http_url, 1);
567 	http_url_copy(pool, new_url, src);
568 
569 	return new_url;
570 }
571 
572 struct http_url *
http_url_clone_with_userinfo(pool_t pool,const struct http_url * src)573 http_url_clone_with_userinfo(pool_t pool, const struct http_url *src)
574 {
575 	struct http_url *new_url;
576 
577 	new_url = p_new(pool, struct http_url, 1);
578 	http_url_copy_with_userinfo(pool, new_url, src);
579 
580 	return new_url;
581 }
582 
583 /*
584  * HTTP URL construction
585  */
586 
587 static void
http_url_add_scheme(string_t * urlstr,const struct http_url * url)588 http_url_add_scheme(string_t *urlstr, const struct http_url *url)
589 {
590 	/* scheme */
591 	if (!url->have_ssl)
592 		uri_append_scheme(urlstr, "http");
593 	else
594 		uri_append_scheme(urlstr, "https");
595 	str_append(urlstr, "//");
596 }
597 
598 static void
http_url_add_authority(string_t * urlstr,const struct http_url * url)599 http_url_add_authority(string_t *urlstr, const struct http_url *url)
600 {
601 	/* host */
602 	uri_append_host(urlstr, &url->host);
603 	/* port */
604 	uri_append_port(urlstr, url->port);
605 }
606 
607 static void
http_url_add_target(string_t * urlstr,const struct http_url * url)608 http_url_add_target(string_t *urlstr, const struct http_url *url)
609 {
610 	if (url->path == NULL || *url->path == '\0') {
611 		/* Older syntax of RFC 2616 requires this slash at all times for
612 		   an absolute URL. */
613 		str_append_c(urlstr, '/');
614 	} else {
615 		uri_append_path_data(urlstr, "", url->path);
616 	}
617 
618 	/* query (pre-encoded) */
619 	if (url->enc_query != NULL) {
620 		str_append_c(urlstr, '?');
621 		str_append(urlstr, url->enc_query);
622 	}
623 }
624 
http_url_create(const struct http_url * url)625 const char *http_url_create(const struct http_url *url)
626 {
627 	string_t *urlstr = t_str_new(512);
628 
629 	http_url_add_scheme(urlstr, url);
630 	http_url_add_authority(urlstr, url);
631 	http_url_add_target(urlstr, url);
632 
633 	/* fragment */
634 	if (url->enc_fragment != NULL) {
635 		str_append_c(urlstr, '#');
636 		str_append(urlstr, url->enc_fragment);
637 	}
638 
639 	return str_c(urlstr);
640 }
641 
http_url_create_host(const struct http_url * url)642 const char *http_url_create_host(const struct http_url *url)
643 {
644 	string_t *urlstr = t_str_new(512);
645 
646 	http_url_add_scheme(urlstr, url);
647 	http_url_add_authority(urlstr, url);
648 
649 	return str_c(urlstr);
650 }
651 
http_url_create_authority(const struct http_url * url)652 const char *http_url_create_authority(const struct http_url *url)
653 {
654 	string_t *urlstr = t_str_new(256);
655 
656 	http_url_add_authority(urlstr, url);
657 
658 	return str_c(urlstr);
659 }
660 
http_url_create_target(const struct http_url * url)661 const char *http_url_create_target(const struct http_url *url)
662 {
663 	string_t *urlstr = t_str_new(256);
664 
665 	http_url_add_target(urlstr, url);
666 
667 	return str_c(urlstr);
668 }
669 
http_url_escape_path(string_t * out,const char * data)670 void http_url_escape_path(string_t *out, const char *data)
671 {
672 	uri_append_query_data(out, "&;?=+", data);
673 }
674 
http_url_escape_param(string_t * out,const char * data)675 void http_url_escape_param(string_t *out, const char *data)
676 {
677 	uri_append_query_data(out, "&;/?=+", data);
678 }
679