1 /* Copyright (c) 2013-2018 Dovecot authors, see the included COPYING file */
2
3 #include "lib.h"
4 #include "str.h"
5 #include "strfuncs.h"
6 #include "net.h"
7 #include "uri-util.h"
8
9 #include "http-url.h"
10 #include "http-request.h"
11
12 /*
13 * HTTP URL parser
14 */
15
16 struct http_url_parser {
17 struct uri_parser parser;
18
19 enum http_url_parse_flags flags;
20
21 struct http_url *url;
22 struct http_url *base;
23
24 enum http_request_target_format req_format;
25
26 bool relative:1;
27 bool request_target:1;
28 };
29
30 static bool http_url_parse_authority_form(struct http_url_parser *url_parser);
31
32 static bool
http_url_parse_scheme(struct http_url_parser * url_parser,const char ** scheme_r)33 http_url_parse_scheme(struct http_url_parser *url_parser, const char **scheme_r)
34 {
35 struct uri_parser *parser = &url_parser->parser;
36
37 *scheme_r = NULL;
38 if ((url_parser->flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) != 0)
39 return TRUE;
40
41 if (uri_parse_scheme(parser, scheme_r) <= 0) {
42 parser->cur = parser->begin;
43 return FALSE;
44 }
45
46 return TRUE;
47 }
48
http_url_parse_unknown_scheme(struct http_url_parser * url_parser)49 static bool http_url_parse_unknown_scheme(struct http_url_parser *url_parser)
50 {
51 struct uri_parser *parser = &url_parser->parser;
52
53 if (url_parser->request_target) {
54 /* Valid as non-HTTP scheme, but also try to parse as authority
55 */
56 parser->cur = parser->begin;
57 if (!http_url_parse_authority_form(url_parser)) {
58 /* indicate non-http-url */
59 url_parser->url = NULL;
60 url_parser->req_format =
61 HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
62 }
63 return TRUE;
64 }
65 parser->error = "Not an HTTP URL";
66 return FALSE;
67 }
68
69 static bool
http_url_parse_userinfo(struct http_url_parser * url_parser,struct uri_authority * auth,const char ** user_r,const char ** password_r)70 http_url_parse_userinfo(struct http_url_parser *url_parser,
71 struct uri_authority *auth,
72 const char **user_r, const char **password_r)
73 {
74 struct uri_parser *parser = &url_parser->parser;
75 const char *p;
76
77 *user_r = *password_r = NULL;
78
79 if (auth->enc_userinfo == NULL)
80 return TRUE;
81
82 if ((url_parser->flags & HTTP_URL_ALLOW_USERINFO_PART) == 0) {
83 /* RFC 7230, Section 2.7.1: http URI Scheme
84
85 A sender MUST NOT generate the userinfo subcomponent (and its
86 "@" delimiter) when an "http" URI reference is generated
87 within a message as a request target or header field value.
88 Before making use of an "http" URI reference received from an
89 untrusted source, a recipient SHOULD parse for userinfo and
90 treat its presence as an error; it is likely being used to
91 obscure the authority for the sake of phishing attacks.
92 */
93 parser->error = "HTTP URL does not allow `userinfo@' part";
94 return FALSE;
95 }
96
97 p = strchr(auth->enc_userinfo, ':');
98 if (p == NULL) {
99 if (!uri_data_decode(parser, auth->enc_userinfo, NULL, user_r))
100 return FALSE;
101 } else {
102 if (!uri_data_decode(parser, auth->enc_userinfo, p, user_r))
103 return FALSE;
104 if (!uri_data_decode(parser, p + 1, NULL, password_r))
105 return FALSE;
106 }
107 return TRUE;
108 }
109
http_url_parse_authority(struct http_url_parser * url_parser)110 static bool http_url_parse_authority(struct http_url_parser *url_parser)
111 {
112 struct uri_parser *parser = &url_parser->parser;
113 struct http_url *url = url_parser->url;
114 struct uri_authority auth;
115 const char *user = NULL, *password = NULL;
116 int ret;
117
118 if ((ret = uri_parse_host_authority(parser, &auth)) < 0)
119 return FALSE;
120 if (auth.host.name == NULL || *auth.host.name == '\0') {
121 /* RFC 7230, Section 2.7.1: http URI Scheme
122
123 A sender MUST NOT generate an "http" URI with an empty host
124 identifier. A recipient that processes such a URI reference
125 MUST reject it as invalid.
126 */
127 parser->error = "HTTP URL does not allow empty host identifier";
128 return FALSE;
129 }
130 if (ret > 0) {
131 if (!http_url_parse_userinfo(url_parser, &auth,
132 &user, &password))
133 return FALSE;
134 }
135 if (url != NULL) {
136 uri_host_copy(parser->pool, &url->host, &auth.host);
137 url->port = auth.port;
138 url->user = p_strdup(parser->pool, user);
139 url->password = p_strdup(parser->pool, password);
140 }
141 return TRUE;
142 }
143
http_url_parse_authority_form(struct http_url_parser * url_parser)144 static bool http_url_parse_authority_form(struct http_url_parser *url_parser)
145 {
146 struct uri_parser *parser = &url_parser->parser;
147
148 if (!http_url_parse_authority(url_parser))
149 return FALSE;
150 if (parser->cur != parser->end)
151 return FALSE;
152 url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_AUTHORITY;
153 return TRUE;
154 }
155
156 static int
http_url_parse_path(struct http_url_parser * url_parser)157 http_url_parse_path(struct http_url_parser *url_parser)
158 {
159 struct uri_parser *parser = &url_parser->parser;
160 struct http_url *url = url_parser->url, *base = url_parser->base;
161 const char *const *path;
162 int path_relative;
163 string_t *fullpath = NULL;
164 int ret;
165
166 /* path-abempty / path-absolute / path-noscheme / path-empty */
167 if ((ret = uri_parse_path(parser, &path_relative, &path)) < 0)
168 return -1;
169
170 /* Resolve path */
171 if (ret == 0) {
172 if (url_parser->relative && url != NULL)
173 url->path = p_strdup(parser->pool, base->path);
174 return 0;
175 }
176
177 if (url != NULL)
178 fullpath = t_str_new(256);
179
180 if (url_parser->relative && path_relative > 0 && base->path != NULL) {
181 const char *pbegin = base->path;
182 const char *pend = base->path + strlen(base->path);
183 const char *p = pend - 1;
184
185 i_assert(*pbegin == '/');
186
187 /* Discard trailing segments of base path based on how many
188 effective leading '..' segments were found in the relative
189 path.
190 */
191 while (path_relative > 0 && p > pbegin) {
192 while (p > pbegin && *p != '/') p--;
193 if (p >= pbegin) {
194 pend = p;
195 path_relative--;
196 }
197 if (p > pbegin) p--;
198 }
199
200 if (url != NULL && pend > pbegin)
201 str_append_data(fullpath, pbegin, pend - pbegin);
202 }
203
204 /* Append relative path */
205 while (*path != NULL) {
206 const char *part;
207
208 if (!uri_data_decode(parser, *path, NULL, &part))
209 return -1;
210
211 if (url != NULL) {
212 str_append_c(fullpath, '/');
213 str_append(fullpath, part);
214 }
215 path++;
216 }
217
218 if (url != NULL)
219 url->path = p_strdup(parser->pool, str_c(fullpath));
220 return 1;
221 }
222
223 static bool
http_url_parse_query(struct http_url_parser * url_parser,bool have_path)224 http_url_parse_query(struct http_url_parser *url_parser, bool have_path)
225 {
226 struct uri_parser *parser = &url_parser->parser;
227 struct http_url *url = url_parser->url, *base = url_parser->base;
228 const char *query;
229 int ret;
230
231 if ((ret = uri_parse_query(parser, &query)) < 0)
232 return FALSE;
233 if (url == NULL)
234 return TRUE;
235
236 if (ret > 0)
237 url->enc_query = p_strdup(parser->pool, query);
238 else if (url_parser->relative && !have_path)
239 url->enc_query = p_strdup(parser->pool, base->enc_query);
240 return TRUE;
241 }
242
243 static bool
http_url_parse_fragment(struct http_url_parser * url_parser,bool have_path)244 http_url_parse_fragment(struct http_url_parser *url_parser, bool have_path)
245 {
246 struct uri_parser *parser = &url_parser->parser;
247 struct http_url *url = url_parser->url, *base = url_parser->base;
248 const char *fragment;
249 int ret;
250
251 if ((ret = uri_parse_fragment(parser, &fragment)) < 0)
252 return FALSE;
253 if (ret > 0 &&
254 (url_parser->flags & HTTP_URL_ALLOW_FRAGMENT_PART) == 0) {
255 parser->error =
256 "URL fragment not allowed for HTTP URL in this context";
257 return FALSE;
258 }
259 if (url == NULL)
260 return TRUE;
261
262 if (ret > 0)
263 url->enc_fragment = p_strdup(parser->pool, fragment);
264 else if (url_parser->relative && !have_path)
265 url->enc_fragment = p_strdup(parser->pool, base->enc_fragment);
266 return TRUE;
267 }
268
http_url_do_parse(struct http_url_parser * url_parser)269 static bool http_url_do_parse(struct http_url_parser *url_parser)
270 {
271 struct uri_parser *parser = &url_parser->parser;
272 struct http_url *url = url_parser->url, *base = url_parser->base;
273 bool relative = TRUE, have_scheme = FALSE, have_authority = FALSE,
274 have_path = FALSE;
275 const char *scheme;
276 int ret;
277
278 /* RFC 7230, Appendix B:
279
280 http-URI = "http://" authority path-abempty [ "?" query ]
281 [ "#" fragment ]
282 https-URI = "https://" authority path-abempty [ "?" query ]
283 [ "#" fragment ]
284 partial-URI = relative-part [ "?" query ]
285
286 request-target = origin-form / absolute-form / authority-form /
287 asterisk-form
288
289 origin-form = absolute-path [ "?" query ]
290 absolute-form = absolute-URI
291 authority-form = authority
292 asterisk-form = "*"
293 ; Not parsed here
294
295 absolute-path = 1*( "/" segment )
296
297 RFC 3986, Appendix A: (implemented in uri-util.h)
298
299 absolute-URI = scheme ":" hier-part [ "?" query ]
300
301 hier-part = "//" authority path-abempty
302 / path-absolute
303 / path-rootless
304 / path-empty
305
306 relative-part = "//" authority path-abempty
307 / path-absolute
308 / path-noscheme
309 / path-empty
310
311 authority = [ userinfo "@" ] host [ ":" port ]
312
313 path-abempty = *( "/" segment )
314 path-absolute = "/" [ segment-nz *( "/" segment ) ]
315 path-noscheme = segment-nz-nc *( "/" segment )
316 path-rootless = segment-nz *( "/" segment )
317 path-empty = 0<pchar>
318
319 segment = *pchar
320 segment-nz = 1*pchar
321 segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
322 ; non-zero-length segment without any colon ":"
323
324 query = *( pchar / "/" / "?" )
325 fragment = *( pchar / "/" / "?" )
326 */
327
328 /* "http:" / "https:" */
329 if (http_url_parse_scheme(url_parser, &scheme)) {
330 if (scheme == NULL) {
331 /* Scheme externally parsed */
332 } else if (strcasecmp(scheme, "https") == 0) {
333 if (url != NULL)
334 url->have_ssl = TRUE;
335 } else if (strcasecmp(scheme, "http") != 0) {
336 return http_url_parse_unknown_scheme(url_parser);
337 }
338
339 relative = FALSE;
340 have_scheme = TRUE;
341 }
342
343 /* "//" authority ; or
344 * ["//"] authority ; when parsing a request target
345 */
346 if (parser->cur < parser->end && parser->cur[0] == '/') {
347 if ((have_scheme || !url_parser->request_target) &&
348 (parser->cur + 1) < parser->end && parser->cur[1] == '/') {
349 parser->cur += 2;
350 relative = FALSE;
351 have_authority = TRUE;
352 } else {
353 /* start of absolute-path */
354 }
355 } else if (url_parser->request_target && !have_scheme) {
356 if (!http_url_parse_authority_form(url_parser)) {
357 /* not non-HTTP scheme and invalid as authority-form */
358 parser->error = "Request target is invalid";
359 return FALSE;
360 }
361 return TRUE;
362 }
363
364 if (have_scheme && !have_authority) {
365 parser->error = "Absolute HTTP URL requires `//' after `http:'";
366 return FALSE;
367 }
368
369 if (have_authority) {
370 if (!http_url_parse_authority(url_parser))
371 return FALSE;
372 }
373
374 /* Relative URLs are only valid when we have a base URL */
375 if (relative) {
376 if (base == NULL) {
377 parser->error = "Relative HTTP URL not allowed";
378 return FALSE;
379 } else if (!have_authority && url != NULL) {
380 uri_host_copy(parser->pool, &url->host, &base->host);
381 url->port = base->port;
382 url->have_ssl = base->have_ssl;
383 url->user = p_strdup_empty(parser->pool, base->user);
384 url->password = p_strdup_empty(parser->pool,
385 base->password);
386 }
387
388 url_parser->relative = TRUE;
389 }
390
391 /* path-abempty / path-absolute / path-noscheme / path-empty */
392 ret = http_url_parse_path(url_parser);
393 if (ret < 0)
394 return FALSE;
395 have_path = (ret > 0);
396
397 /* [ "?" query ] */
398 if (!http_url_parse_query(url_parser, have_path))
399 return FALSE;
400
401 /* [ "#" fragment ] */
402 if (!http_url_parse_fragment(url_parser, have_path))
403 return FALSE;
404
405 /* must be at end of URL now */
406 i_assert(parser->cur == parser->end);
407
408 if (have_scheme)
409 url_parser->req_format = HTTP_REQUEST_TARGET_FORMAT_ABSOLUTE;
410 return TRUE;
411 }
412
413 /* Public API */
414
http_url_parse(const char * url,struct http_url * base,enum http_url_parse_flags flags,pool_t pool,struct http_url ** url_r,const char ** error_r)415 int http_url_parse(const char *url, struct http_url *base,
416 enum http_url_parse_flags flags, pool_t pool,
417 struct http_url **url_r, const char **error_r)
418 {
419 struct http_url_parser url_parser;
420
421 /* base != NULL indicates whether relative URLs are allowed. However,
422 certain flags may also dictate whether relative URLs are
423 allowed/required. */
424 i_assert((flags & HTTP_URL_PARSE_SCHEME_EXTERNAL) == 0 || base == NULL);
425
426 i_zero(&url_parser);
427 uri_parser_init(&url_parser.parser, pool, url);
428 url_parser.parser.allow_pct_nul = (flags & HTTP_URL_ALLOW_PCT_NUL) != 0;
429
430 url_parser.url = p_new(pool, struct http_url, 1);
431 url_parser.base = base;
432 url_parser.flags = flags;
433
434 if (!http_url_do_parse(&url_parser)) {
435 *error_r = url_parser.parser.error;
436 return -1;
437 }
438 *url_r = url_parser.url;
439 return 0;
440 }
441
http_url_request_target_parse(const char * request_target,const char * host_header,const struct http_url * default_base,pool_t pool,struct http_request_target * target,const char ** error_r)442 int http_url_request_target_parse(const char *request_target,
443 const char *host_header,
444 const struct http_url *default_base,
445 pool_t pool,
446 struct http_request_target *target,
447 const char **error_r)
448 {
449 struct http_url_parser url_parser;
450 struct uri_authority auth;
451 struct http_url base;
452
453 i_zero(&base);
454 if (host_header != NULL && *host_header != '\0') {
455 struct uri_parser *parser;
456
457 i_zero(&url_parser);
458 parser = &url_parser.parser;
459 uri_parser_init(parser, pool, host_header);
460
461 if (uri_parse_host_authority(parser, &auth) <= 0) {
462 *error_r = t_strdup_printf("Invalid Host header: %s",
463 parser->error);
464 return -1;
465 }
466
467 if (parser->cur != parser->end || auth.enc_userinfo != NULL) {
468 *error_r = "Invalid Host header: "
469 "Contains invalid character";
470 return -1;
471 }
472
473 base.host = auth.host;
474 base.port = auth.port;
475 } else if (default_base == NULL) {
476 *error_r = "Empty Host header";
477 return -1;
478 } else {
479 i_assert(default_base != NULL);
480 base = *default_base;
481 }
482
483 if (request_target[0] == '*' && request_target[1] == '\0') {
484 struct http_url *url = p_new(pool, struct http_url, 1);
485
486 uri_host_copy(pool, &url->host, &base.host);
487 url->port = base.port;
488 target->url = url;
489 target->format = HTTP_REQUEST_TARGET_FORMAT_ASTERISK;
490 return 0;
491 }
492
493 i_zero(&url_parser);
494 uri_parser_init(&url_parser.parser, pool, request_target);
495
496 url_parser.url = p_new(pool, struct http_url, 1);
497 url_parser.request_target = TRUE;
498 url_parser.req_format = HTTP_REQUEST_TARGET_FORMAT_ORIGIN;
499 url_parser.base = &base;
500 url_parser.flags = 0;
501
502 if (!http_url_do_parse(&url_parser)) {
503 *error_r = url_parser.parser.error;
504 return -1;
505 }
506
507 target->url = url_parser.url;
508 target->format = url_parser.req_format;
509 return 0;
510 }
511
512 /*
513 * HTTP URL manipulation
514 */
515
http_url_init_authority_from(struct http_url * dest,const struct http_url * src)516 void http_url_init_authority_from(struct http_url *dest,
517 const struct http_url *src)
518 {
519 i_zero(dest);
520 dest->host = src->host;
521 dest->port = src->port;
522 dest->have_ssl = src->have_ssl;
523 }
524
http_url_copy_authority(pool_t pool,struct http_url * dest,const struct http_url * src)525 void http_url_copy_authority(pool_t pool, struct http_url *dest,
526 const struct http_url *src)
527 {
528 i_zero(dest);
529 uri_host_copy(pool, &dest->host, &src->host);
530 dest->port = src->port;
531 dest->have_ssl = src->have_ssl;
532 }
533
534 struct http_url *
http_url_clone_authority(pool_t pool,const struct http_url * src)535 http_url_clone_authority(pool_t pool, const struct http_url *src)
536 {
537 struct http_url *new_url;
538
539 new_url = p_new(pool, struct http_url, 1);
540 http_url_copy_authority(pool, new_url, src);
541
542 return new_url;
543 }
544
http_url_copy(pool_t pool,struct http_url * dest,const struct http_url * src)545 void http_url_copy(pool_t pool, struct http_url *dest,
546 const struct http_url *src)
547 {
548 http_url_copy_authority(pool, dest, src);
549 dest->path = p_strdup(pool, src->path);
550 dest->enc_query = p_strdup(pool, src->enc_query);
551 dest->enc_fragment = p_strdup(pool, src->enc_fragment);
552 }
553
http_url_copy_with_userinfo(pool_t pool,struct http_url * dest,const struct http_url * src)554 void http_url_copy_with_userinfo(pool_t pool, struct http_url *dest,
555 const struct http_url *src)
556 {
557 http_url_copy(pool, dest, src);
558 dest->user = p_strdup(pool, src->user);
559 dest->password = p_strdup(pool, src->password);
560 }
561
http_url_clone(pool_t pool,const struct http_url * src)562 struct http_url *http_url_clone(pool_t pool, const struct http_url *src)
563 {
564 struct http_url *new_url;
565
566 new_url = p_new(pool, struct http_url, 1);
567 http_url_copy(pool, new_url, src);
568
569 return new_url;
570 }
571
572 struct http_url *
http_url_clone_with_userinfo(pool_t pool,const struct http_url * src)573 http_url_clone_with_userinfo(pool_t pool, const struct http_url *src)
574 {
575 struct http_url *new_url;
576
577 new_url = p_new(pool, struct http_url, 1);
578 http_url_copy_with_userinfo(pool, new_url, src);
579
580 return new_url;
581 }
582
583 /*
584 * HTTP URL construction
585 */
586
587 static void
http_url_add_scheme(string_t * urlstr,const struct http_url * url)588 http_url_add_scheme(string_t *urlstr, const struct http_url *url)
589 {
590 /* scheme */
591 if (!url->have_ssl)
592 uri_append_scheme(urlstr, "http");
593 else
594 uri_append_scheme(urlstr, "https");
595 str_append(urlstr, "//");
596 }
597
598 static void
http_url_add_authority(string_t * urlstr,const struct http_url * url)599 http_url_add_authority(string_t *urlstr, const struct http_url *url)
600 {
601 /* host */
602 uri_append_host(urlstr, &url->host);
603 /* port */
604 uri_append_port(urlstr, url->port);
605 }
606
607 static void
http_url_add_target(string_t * urlstr,const struct http_url * url)608 http_url_add_target(string_t *urlstr, const struct http_url *url)
609 {
610 if (url->path == NULL || *url->path == '\0') {
611 /* Older syntax of RFC 2616 requires this slash at all times for
612 an absolute URL. */
613 str_append_c(urlstr, '/');
614 } else {
615 uri_append_path_data(urlstr, "", url->path);
616 }
617
618 /* query (pre-encoded) */
619 if (url->enc_query != NULL) {
620 str_append_c(urlstr, '?');
621 str_append(urlstr, url->enc_query);
622 }
623 }
624
http_url_create(const struct http_url * url)625 const char *http_url_create(const struct http_url *url)
626 {
627 string_t *urlstr = t_str_new(512);
628
629 http_url_add_scheme(urlstr, url);
630 http_url_add_authority(urlstr, url);
631 http_url_add_target(urlstr, url);
632
633 /* fragment */
634 if (url->enc_fragment != NULL) {
635 str_append_c(urlstr, '#');
636 str_append(urlstr, url->enc_fragment);
637 }
638
639 return str_c(urlstr);
640 }
641
http_url_create_host(const struct http_url * url)642 const char *http_url_create_host(const struct http_url *url)
643 {
644 string_t *urlstr = t_str_new(512);
645
646 http_url_add_scheme(urlstr, url);
647 http_url_add_authority(urlstr, url);
648
649 return str_c(urlstr);
650 }
651
http_url_create_authority(const struct http_url * url)652 const char *http_url_create_authority(const struct http_url *url)
653 {
654 string_t *urlstr = t_str_new(256);
655
656 http_url_add_authority(urlstr, url);
657
658 return str_c(urlstr);
659 }
660
http_url_create_target(const struct http_url * url)661 const char *http_url_create_target(const struct http_url *url)
662 {
663 string_t *urlstr = t_str_new(256);
664
665 http_url_add_target(urlstr, url);
666
667 return str_c(urlstr);
668 }
669
http_url_escape_path(string_t * out,const char * data)670 void http_url_escape_path(string_t *out, const char *data)
671 {
672 uri_append_query_data(out, "&;?=+", data);
673 }
674
http_url_escape_param(string_t * out,const char * data)675 void http_url_escape_param(string_t *out, const char *data)
676 {
677 uri_append_query_data(out, "&;/?=+", data);
678 }
679