1 /* Copyright (c) 2010-2018 Dovecot authors, see the included COPYING file */
2
3 #include "lib.h"
4 #include "array.h"
5 #include "str.h"
6 #include "net.h"
7 #include "uri-util.h"
8
9 #include <ctype.h>
10
11 /* [URI-GEN] RFC3986 Appendix A:
12
13 URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
14 absolute-URI = scheme ":" hier-part [ "?" query ]
15 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
16
17 URI-reference = URI / relative-ref
18 relative-ref = relative-part [ "?" query ] [ "#" fragment ]
19
20 relative-part = "//" authority path-abempty
21 / path-absolute
22 / path-noscheme
23 / path-empty
24 hier-part = "//" authority path-abempty
25 / path-absolute
26 / path-rootless
27 / path-empty
28
29 authority = [ userinfo "@" ] host [ ":" port ]
30 userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
31 host = IP-literal / IPv4address / reg-name
32 port = *DIGIT
33
34 IP-literal = "[" ( IPv6address / IPvFuture ) "]"
35 IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
36 IPv6address = 6( h16 ":" ) ls32
37 / "::" 5( h16 ":" ) ls32
38 / [ h16 ] "::" 4( h16 ":" ) ls32
39 / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
40 / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
41 / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
42 / [ *4( h16 ":" ) h16 ] "::" ls32
43 / [ *5( h16 ":" ) h16 ] "::" h16
44 / [ *6( h16 ":" ) h16 ] "::"
45 h16 = 1*4HEXDIG
46 ls32 = ( h16 ":" h16 ) / IPv4address
47 IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
48 dec-octet = DIGIT ; 0-9
49 / %x31-39 DIGIT ; 10-99
50 / "1" 2DIGIT ; 100-199
51 / "2" %x30-34 DIGIT ; 200-249
52 / "25" %x30-35 ; 250-255
53 reg-name = *( unreserved / pct-encoded / sub-delims )
54
55 path = path-abempty ; begins with "/" or is empty
56 / path-absolute ; begins with "/" but not "//"
57 / path-noscheme ; begins with a non-colon segment
58 / path-rootless ; begins with a segment
59 / path-empty ; zero characters
60 path-abempty = *( "/" segment )
61 path-absolute = "/" [ segment-nz *( "/" segment ) ]
62 path-noscheme = segment-nz-nc *( "/" segment )
63 path-rootless = segment-nz *( "/" segment )
64 path-empty = 0<pchar>
65
66 segment = *pchar
67 segment-nz = 1*pchar
68 segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
69 ; non-zero-length segment without any colon ":"
70 pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
71
72 query = *( pchar / "/" / "?" )
73 fragment = *( pchar / "/" / "?" )
74
75 pct-encoded = "%" HEXDIG HEXDIG
76 unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
77 reserved = gen-delims / sub-delims
78 gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
79 sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
80 / "*" / "+" / "," / ";" / "="
81 */
82
83 #define URI_MAX_SCHEME_NAME_LEN 64
84
85 /* Character lookup table
86 *
87 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" [bit0]
88 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
89 * / "*" / "+" / "," / ";" / "=" [bit1]
90 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" [bit2]
91 * pchar = unreserved / sub-delims / ":" / "@" [bit0|bit1|bit3]
92 * 'pfchar' = unreserved / sub-delims / ":" / "@" / "/"
93 * [bit0|bit1|bit3|bit5]
94 * 'uchar' = unreserved / sub-delims / ":" [bit0|bit1|bit4]
95 * 'qchar' = pchar / "/" / "?" [bit0|bit1|bit3|bit5|bit6]
96 *
97 */
98
99 #define CHAR_MASK_UNRESERVED (1<<0)
100 #define CHAR_MASK_SUB_DELIMS (1<<1)
101 #define CHAR_MASK_PCHAR ((1<<0)|(1<<1)|(1<<3))
102 #define CHAR_MASK_PFCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5))
103 #define CHAR_MASK_UCHAR ((1<<0)|(1<<1)|(1<<4))
104 #define CHAR_MASK_QCHAR ((1<<0)|(1<<1)|(1<<3)|(1<<5)|(1<<6))
105 #define CHAR_MASK_UNRESERVED_PATH ((1<<0)|(1<<5))
106
107 static unsigned const char _uri_char_lookup[256] = {
108 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00
109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10
110 0, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 1, 1, 36, // 20
111 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 28, 2, 0, 2, 0, 68, // 30
112 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40
113 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 4, 0, 1, // 50
114 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60
115 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 70
116 };
117
_decode_hex_digit(const unsigned char digit)118 static inline int _decode_hex_digit(const unsigned char digit)
119 {
120 switch (digit) {
121 case '0': case '1': case '2': case '3': case '4':
122 case '5': case '6': case '7': case '8': case '9':
123 return digit - '0';
124
125 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
126 return digit - 'a' + 0x0a;
127
128 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
129 return digit - 'A' + 0x0A;
130 }
131 return -1;
132 }
133
134 static int
uri_parse_pct_encoded_data(struct uri_parser * parser,const unsigned char ** p,const unsigned char * pend,unsigned char * ch_r)135 uri_parse_pct_encoded_data(struct uri_parser *parser,
136 const unsigned char **p, const unsigned char *pend,
137 unsigned char *ch_r) ATTR_NULL(3)
138 {
139 int value;
140
141 if (**p != '%' || (pend != NULL && *p >= pend))
142 return 0;
143 *p += 1;
144
145 if (**p == 0 || *(*p+1) == 0 || (pend != NULL && *p+1 >= pend)) {
146 parser->error = "Unexpected URI boundary after '%'";
147 return -1;
148 }
149
150 if ((value = _decode_hex_digit(**p)) < 0) {
151 parser->error = p_strdup_printf(parser->pool,
152 "Expecting hex digit after '%%', but found '%c'", **p);
153 return -1;
154 }
155
156 *ch_r = (value & 0x0f) << 4;
157 *p += 1;
158
159 if ((value = _decode_hex_digit(**p)) < 0) {
160 parser->error = p_strdup_printf(parser->pool,
161 "Expecting hex digit after '%%%c', but found '%c'", *((*p)-1), **p);
162 return -1;
163 }
164
165 *ch_r |= (value & 0x0f);
166 *p += 1;
167
168 if (!parser->allow_pct_nul && *ch_r == '\0') {
169 parser->error =
170 "Percent encoding is not allowed to encode NUL character";
171 return -1;
172 }
173 return 1;
174 }
175
uri_parse_pct_encoded(struct uri_parser * parser,unsigned char * ch_r)176 int uri_parse_pct_encoded(struct uri_parser *parser,
177 unsigned char *ch_r)
178 {
179 return uri_parse_pct_encoded_data
180 (parser, &parser->cur, parser->end, ch_r);
181 }
182
183 static int
uri_parse_unreserved_char(struct uri_parser * parser,unsigned char * ch_r)184 uri_parse_unreserved_char(struct uri_parser *parser, unsigned char *ch_r)
185 {
186 if ((*parser->cur & 0x80) != 0)
187 return 0;
188
189 if ((_uri_char_lookup[*parser->cur] & CHAR_MASK_UNRESERVED) != 0) {
190 *ch_r = *parser->cur;
191 parser->cur++;
192 return 1;
193 }
194 return 0;
195 }
196
uri_parse_unreserved(struct uri_parser * parser,string_t * part)197 int uri_parse_unreserved(struct uri_parser *parser, string_t *part)
198 {
199 int len = 0;
200
201 while (parser->cur < parser->end) {
202 int ret;
203 unsigned char ch = 0;
204
205 if ((ret = uri_parse_unreserved_char(parser, &ch)) < 0)
206 return -1;
207 if (ret == 0)
208 break;
209
210 if (part != NULL)
211 str_append_c(part, ch);
212 len++;
213 }
214
215 return len > 0 ? 1 : 0;
216 }
217
uri_parse_unreserved_pct(struct uri_parser * parser,string_t * part)218 int uri_parse_unreserved_pct(struct uri_parser *parser, string_t *part)
219 {
220 int len = 0;
221
222 while (parser->cur < parser->end) {
223 int ret;
224 unsigned char ch = 0;
225
226 if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
227 return -1;
228 else if (ret == 0 &&
229 (ret=uri_parse_unreserved_char(parser, &ch)) < 0)
230 return -1;
231 if (ret == 0)
232 break;
233
234 if (part != NULL)
235 str_append_c(part, ch);
236 len++;
237 }
238
239 return len > 0 ? 1 : 0;
240 }
241
uri_data_decode(struct uri_parser * parser,const char * data,const char * until,const char ** decoded_r)242 bool uri_data_decode(struct uri_parser *parser, const char *data,
243 const char *until, const char **decoded_r)
244 {
245 const unsigned char *p = (const unsigned char *)data;
246 const unsigned char *pend = (const unsigned char *)until;
247 string_t *decoded;
248 int ret;
249
250 if (pend == NULL) {
251 /* NULL means unlimited; solely rely on '\0' */
252 pend = (const unsigned char *)SIZE_MAX;
253 }
254
255 if (p >= pend || *p == '\0') {
256 if (decoded_r != NULL)
257 *decoded_r = "";
258 return TRUE;
259 }
260
261 decoded = uri_parser_get_tmpbuf(parser, 256);
262 while (p < pend && *p != '\0') {
263 unsigned char ch;
264
265 if ((ret=uri_parse_pct_encoded_data
266 (parser, &p, NULL, &ch)) != 0) {
267 if (ret < 0)
268 return FALSE;
269 str_append_c(decoded, ch);
270 } else {
271 str_append_c(decoded, *p);
272 p++;
273 }
274 }
275
276 if (decoded_r != NULL)
277 *decoded_r = p_strdup(parser->pool, str_c(decoded));
278 return TRUE;
279 }
280
uri_parse_scheme(struct uri_parser * parser,const char ** scheme_r)281 int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
282 {
283 const unsigned char *first = parser->cur;
284 size_t len = 1;
285
286 /* RFC 3968:
287 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
288 */
289
290 if (parser->cur >= parser->end || !i_isalpha(*parser->cur))
291 return 0;
292 parser->cur++;
293
294 while (len < URI_MAX_SCHEME_NAME_LEN &&
295 parser->cur < parser->end) {
296 if (!i_isalnum(*parser->cur) &&
297 *parser->cur != '+' && *parser->cur != '-' &&
298 *parser->cur != '.')
299 break;
300 parser->cur++;
301 len++;
302 }
303
304 if (parser->cur >= parser->end || *parser->cur != ':') {
305 parser->error = "Invalid URI scheme";
306 return -1;
307 }
308 if (scheme_r != NULL)
309 *scheme_r = t_strndup(first, parser->cur - first);
310 parser->cur++;
311 return 1;
312 }
313
uri_cut_scheme(const char ** uri_p,const char ** scheme_r)314 int uri_cut_scheme(const char **uri_p, const char **scheme_r)
315 {
316 struct uri_parser parser;
317
318 uri_parser_init(&parser, NULL, *uri_p);
319 if (uri_parse_scheme(&parser, scheme_r) <= 0)
320 return -1;
321 *uri_p = (const char *)parser.cur;
322 return 0;
323 }
324
325 static int
uri_parse_dec_octet(struct uri_parser * parser,string_t * literal,uint8_t * octet_r)326 uri_parse_dec_octet(struct uri_parser *parser, string_t *literal,
327 uint8_t *octet_r) ATTR_NULL(2)
328 {
329 unsigned int octet = 0;
330 int count = 0;
331
332 /* RFC 3986:
333 *
334 * dec-octet = DIGIT ; 0-9
335 * / %x31-39 DIGIT ; 10-99
336 * / "1" 2DIGIT ; 100-199
337 * / "2" %x30-34 DIGIT ; 200-249
338 * / "25" %x30-35 ; 250-255
339 */
340
341 while (parser->cur < parser->end && i_isdigit(*parser->cur)) {
342 octet = octet * 10 + (parser->cur[0] - '0');
343 if (octet > 255)
344 return -1;
345
346 if (literal != NULL)
347 str_append_c(literal, *parser->cur);
348
349 parser->cur++;
350 count++;
351 }
352
353 if (count > 0) {
354 *octet_r = octet;
355 return 1;
356 }
357 return 0;
358 }
359
360 static int
uri_parse_ipv4address(struct uri_parser * parser,string_t * literal,struct in_addr * ip4_r)361 uri_parse_ipv4address(struct uri_parser *parser, string_t *literal,
362 struct in_addr *ip4_r) ATTR_NULL(2,3)
363 {
364 uint8_t octet;
365 uint32_t ip = 0;
366 int ret;
367 int i;
368
369 /* RFC 3986:
370 *
371 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
372 */
373
374 if ((ret = uri_parse_dec_octet(parser, literal, &octet)) <= 0)
375 return ret;
376 ip = octet;
377
378 for (i = 0; i < 3 && parser->cur < parser->end; i++) {
379 if (*parser->cur != '.')
380 return -1;
381
382 if (literal != NULL)
383 str_append_c(literal, '.');
384 parser->cur++;
385
386 if (uri_parse_dec_octet(parser, literal, &octet) <= 0)
387 return -1;
388 ip = (ip << 8) + octet;
389 }
390
391 if (ip4_r != NULL)
392 ip4_r->s_addr = htonl(ip);
393 return 1;
394 }
395
396 static int
uri_do_parse_reg_name(struct uri_parser * parser,string_t * reg_name)397 uri_do_parse_reg_name(struct uri_parser *parser,
398 string_t *reg_name) ATTR_NULL(2)
399 {
400 /* RFC 3986:
401 *
402 * reg-name = *( unreserved / pct-encoded / sub-delims )
403 */
404
405 while (parser->cur < parser->end) {
406 int ret;
407 unsigned char c;
408
409 /* unreserved / pct-encoded */
410 if ((ret=uri_parse_pct_encoded(parser, &c)) < 0)
411 return -1;
412 else if (ret == 0 &&
413 (ret=uri_parse_unreserved_char(parser, &c)) < 0)
414 return -1;
415
416 if (ret > 0) {
417 if (reg_name != NULL)
418 str_append_c(reg_name, c);
419 continue;
420 }
421
422 /* sub-delims */
423 c = *parser->cur;
424 if ((c & 0x80) == 0 && (_uri_char_lookup[c] & CHAR_MASK_SUB_DELIMS) != 0) {
425 if (reg_name != NULL)
426 str_append_c(reg_name, *parser->cur);
427 parser->cur++;
428 continue;
429 }
430 break;
431 }
432 return 0;
433 }
434
uri_parse_reg_name(struct uri_parser * parser,const char ** reg_name_r)435 int uri_parse_reg_name(struct uri_parser *parser,
436 const char **reg_name_r)
437 {
438 string_t *reg_name = NULL;
439 int ret;
440
441 if (reg_name_r != NULL)
442 reg_name = uri_parser_get_tmpbuf(parser, 256);
443
444 if ((ret=uri_do_parse_reg_name(parser, reg_name)) <= 0)
445 return ret;
446
447 if (reg_name_r != NULL)
448 *reg_name_r = str_c(reg_name);
449 return 1;
450 }
451
uri_do_parse_host_name(struct uri_parser * parser,string_t * host_name)452 static int uri_do_parse_host_name(struct uri_parser *parser,
453 string_t *host_name) ATTR_NULL(2)
454 {
455 const unsigned char *first, *part;
456 int ret;
457
458 /* RFC 3986, Section 3.2.2:
459
460 A registered name intended for lookup in the DNS uses the syntax
461 defined in Section 3.5 of [RFC1034] and Section 2.1 of [RFC1123].
462 Such a name consists of a sequence of domain labels separated by ".",
463 each domain label starting and ending with an alphanumeric character
464 and possibly also containing "-" characters. The rightmost domain
465 label of a fully qualified domain name in DNS may be followed by a
466 single "." and should be if it is necessary to distinguish between
467 the complete domain name and some local domain.
468
469 RFC 2396, Section 3.2.2 (old URI specification):
470
471 hostname = *( domainlabel "." ) toplabel [ "." ]
472 domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
473 toplabel = alpha | alpha *( alphanum | "-" ) alphanum
474
475 The description in RFC 3986 is more liberal, so:
476
477 hostname = *( domainlabel "." ) domainlabel [ "." ]
478 domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
479
480 We also support percent encoding in spirit of the generic reg-name,
481 even though this should explicitly not be used according to the RFC.
482 It is, however, not strictly forbidden (unlike older RFC), so we
483 support it.
484 */
485
486 first = part = parser->cur;
487 for (;;) {
488 const unsigned char *offset;
489 unsigned char ch, pch;
490
491 /* alphanum */
492 offset = parser->cur;
493 ch = pch = *parser->cur;
494 if (parser->cur >= parser->end)
495 break;
496 if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0) {
497 return -1;
498 } else if (ret > 0) {
499 if (!i_isalnum(ch))
500 return -1;
501 if (host_name != NULL)
502 str_append_c(host_name, ch);
503 part = parser->cur;
504 } else {
505 if (!i_isalnum(*parser->cur))
506 break;
507 parser->cur++;
508 }
509
510 if (parser->cur < parser->end) {
511 /* *( alphanum | "-" ) alphanum */
512 do {
513 offset = parser->cur;
514
515 if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0) {
516 return -1;
517 } else if (ret > 0) {
518 if (!i_isalnum(ch) && ch != '-')
519 break;
520 if (host_name != NULL) {
521 if (offset > part)
522 str_append_data(host_name, part, offset - part);
523 str_append_c(host_name, ch);
524 }
525 part = parser->cur;
526 } else {
527 ch = *parser->cur;
528 if (!i_isalnum(ch) && ch != '-')
529 break;
530 parser->cur++;
531 }
532 pch = ch;
533 } while (parser->cur < parser->end);
534
535 if (!i_isalnum(pch)) {
536 parser->error = "Invalid domain label in hostname";
537 return -1;
538 }
539 }
540
541 if (host_name != NULL && parser->cur > part)
542 str_append_data(host_name, part, parser->cur - part);
543
544 /* "." */
545 if (parser->cur >= parser->end || ch != '.')
546 break;
547 if (host_name != NULL)
548 str_append_c(host_name, '.');
549 if (parser->cur == offset)
550 parser->cur++;
551 part = parser->cur;
552 }
553
554 if (parser->cur == first)
555 return 0;
556
557 /* remove trailing '.' */
558 if (host_name != NULL) {
559 const char *name = str_c(host_name);
560
561 i_assert(str_len(host_name) > 0);
562 if (name[str_len(host_name)-1] == '.')
563 str_truncate(host_name, str_len(host_name)-1);
564 }
565 return 1;
566 }
567
uri_parse_host_name(struct uri_parser * parser,const char ** host_name_r)568 int uri_parse_host_name(struct uri_parser *parser,
569 const char **host_name_r)
570 {
571 string_t *host_name = NULL;
572 int ret;
573
574 if (host_name_r != NULL)
575 host_name = uri_parser_get_tmpbuf(parser, 256);
576
577 if ((ret=uri_do_parse_host_name(parser, host_name)) <= 0)
578 return ret;
579
580 if (host_name_r != NULL)
581 *host_name_r = str_c(host_name);
582 return 1;
583 }
584
585 static int
uri_parse_ip_literal(struct uri_parser * parser,string_t * literal,struct in6_addr * ip6_r)586 uri_parse_ip_literal(struct uri_parser *parser, string_t *literal,
587 struct in6_addr *ip6_r) ATTR_NULL(2,3)
588 {
589 const unsigned char *p;
590 const char *address;
591 struct in6_addr ip6;
592
593 /* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
594 * IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
595 * IPv6address = ; Syntax not relevant: parsed using inet_pton()
596 */
597
598 /* "[" already verified */
599
600 /* Scan for end of address */
601 for (p = parser->cur+1; p < parser->end; p++) {
602 if (*p == ']')
603 break;
604 }
605
606 if (p >= parser->end || *p != ']') {
607 parser->error = "Expecting ']' at end of IP-literal";
608 return -1;
609 }
610
611 if (literal != NULL)
612 str_append_data(literal, parser->cur, p-parser->cur+1);
613 address = t_strdup_until(parser->cur+1, p);
614 parser->cur = p + 1;
615
616 if (*address == '\0') {
617 parser->error = "Empty IPv6 host address";
618 return -1;
619 }
620 if (*address == 'v') {
621 parser->error = p_strdup_printf(parser->pool,
622 "Future IP host address '%s' not supported", address);
623 return -1;
624 }
625 if (inet_pton(AF_INET6, address, &ip6) <= 0) {
626 parser->error = p_strdup_printf(parser->pool,
627 "Invalid IPv6 host address '%s'", address);
628 return -1;
629 }
630 if (ip6_r != NULL)
631 *ip6_r = ip6;
632 return 1;
633 }
634
635 static int
uri_do_parse_host(struct uri_parser * parser,struct uri_host * host,bool host_name)636 uri_do_parse_host(struct uri_parser *parser,
637 struct uri_host *host, bool host_name)
638 ATTR_NULL(2)
639 {
640 const unsigned char *preserve;
641 struct in_addr ip4;
642 struct in6_addr ip6;
643 string_t *literal = NULL;
644 int ret;
645
646 /* RFC 3986:
647 *
648 * host = IP-literal / IPv4address / reg-name
649 */
650
651 if (host != NULL)
652 i_zero(host);
653
654 literal = uri_parser_get_tmpbuf(parser, 256);
655
656 /* IP-literal / */
657 if (parser->cur < parser->end && *parser->cur == '[') {
658 if (uri_parse_ip_literal(parser, literal, &ip6) <= 0)
659 return -1;
660
661 if (host != NULL) {
662 host->name = p_strdup(parser->pool, str_c(literal));;
663 host->ip.family = AF_INET6;
664 host->ip.u.ip6 = ip6;
665 }
666 return 1;
667 }
668
669 /* IPv4address /
670 *
671 * If it fails to parse, we try to parse it as a reg-name
672 */
673 preserve = parser->cur;
674 if ((ret = uri_parse_ipv4address(parser, literal, &ip4)) > 0) {
675 if (host != NULL) {
676 host->name = p_strdup(parser->pool, str_c(literal));
677 host->ip.family = AF_INET;
678 host->ip.u.ip4 = ip4;
679 }
680 return ret;
681 }
682 parser->cur = preserve;
683 str_truncate(literal, 0);
684
685 /* reg-name */
686 if (host_name) {
687 if (uri_do_parse_host_name(parser, literal) < 0)
688 return -1;
689 } else if (uri_do_parse_reg_name(parser, literal) < 0)
690 return -1;
691 if (host != NULL)
692 host->name = p_strdup(parser->pool, str_c(literal));
693 return 0;
694 }
695
uri_parse_host(struct uri_parser * parser,struct uri_host * host)696 int uri_parse_host(struct uri_parser *parser,
697 struct uri_host *host)
698 {
699 return uri_do_parse_host(parser, host, TRUE);
700 }
701
702 static int
uri_parse_port(struct uri_parser * parser,struct uri_authority * auth)703 uri_parse_port(struct uri_parser *parser,
704 struct uri_authority *auth) ATTR_NULL(2)
705 {
706 const unsigned char *first;
707 in_port_t port;
708
709 /* RFC 3986:
710 *
711 * port = *DIGIT
712 */
713
714 first = parser->cur;
715 while (parser->cur < parser->end && i_isdigit(*parser->cur))
716 parser->cur++;
717
718 if (parser->cur == first)
719 return 0;
720 if (net_str2port(t_strdup_until(first, parser->cur), &port) < 0) {
721 parser->error = "Invalid port number";
722 return -1;
723 }
724
725 if (auth != NULL)
726 auth->port = port;
727 return 1;
728 }
729
730 static int
uri_do_parse_authority(struct uri_parser * parser,struct uri_authority * auth,bool host_name)731 uri_do_parse_authority(struct uri_parser *parser,
732 struct uri_authority *auth, bool host_name) ATTR_NULL(2)
733 {
734 const unsigned char *p;
735 int ret;
736
737 /*
738 * authority = [ userinfo "@" ] host [ ":" port ]
739 */
740
741 if (auth != NULL)
742 i_zero(auth);
743
744 /* Scan ahead to check whether there is a [userinfo "@"] uri component */
745 for (p = parser->cur; p < parser->end; p++){
746 /* refuse 8bit characters */
747 if ((*p & 0x80) != 0)
748 break;
749
750 /* break at first delimiter */
751 if (*p != '%' && (_uri_char_lookup[*p] & CHAR_MASK_UCHAR) == 0)
752 break;
753 }
754
755 /* Extract userinfo */
756 if (p < parser->end && *p == '@') {
757 if (auth != NULL)
758 auth->enc_userinfo = p_strdup_until(parser->pool, parser->cur, p);
759 parser->cur = p+1;
760 }
761
762 /* host */
763 if (uri_do_parse_host(parser,
764 (auth == NULL ? NULL : &auth->host), host_name) < 0)
765 return -1;
766 if (parser->cur == parser->end)
767 return 1;
768 switch (*parser->cur) {
769 case ':': case '/': case '?': case '#':
770 break;
771 default:
772 parser->error = "Invalid host identifier";
773 return -1;
774 }
775
776 /* [":" port] */
777 if (*parser->cur == ':') {
778 parser->cur++;
779
780 if ((ret = uri_parse_port(parser, auth)) < 0)
781 return ret;
782 if (parser->cur == parser->end)
783 return 1;
784 switch (*parser->cur) {
785 case '/': case '?': case '#':
786 break;
787 default:
788 parser->error = "Invalid host port";
789 return -1;
790 }
791 }
792
793 return 1;
794 }
795
796 static int
uri_do_parse_slashslash_authority(struct uri_parser * parser,struct uri_authority * auth,bool host_name)797 uri_do_parse_slashslash_authority(struct uri_parser *parser,
798 struct uri_authority *auth, bool host_name)
799 ATTR_NULL(2)
800 {
801 /* "//" authority */
802
803 if ((parser->end - parser->cur) <= 2 || parser->cur[0] != '/' ||
804 parser->cur[1] != '/')
805 return 0;
806
807 parser->cur += 2;
808 return uri_do_parse_authority(parser, auth, host_name);
809 }
810
uri_parse_authority(struct uri_parser * parser,struct uri_authority * auth)811 int uri_parse_authority(struct uri_parser *parser,
812 struct uri_authority *auth)
813 {
814 return uri_do_parse_authority(parser, auth, FALSE);
815 }
816
uri_parse_slashslash_authority(struct uri_parser * parser,struct uri_authority * auth)817 int uri_parse_slashslash_authority(struct uri_parser *parser,
818 struct uri_authority *auth)
819 {
820 return uri_do_parse_slashslash_authority(parser, auth, FALSE);
821 }
822
uri_parse_host_authority(struct uri_parser * parser,struct uri_authority * auth)823 int uri_parse_host_authority(struct uri_parser *parser,
824 struct uri_authority *auth)
825 {
826 return uri_do_parse_authority(parser, auth, TRUE);
827 }
828
uri_parse_slashslash_host_authority(struct uri_parser * parser,struct uri_authority * auth)829 int uri_parse_slashslash_host_authority(struct uri_parser *parser,
830 struct uri_authority *auth)
831 {
832 return uri_do_parse_slashslash_authority(parser, auth, TRUE);
833 }
834
uri_parse_path_segment(struct uri_parser * parser,const char ** segment_r)835 int uri_parse_path_segment(struct uri_parser *parser, const char **segment_r)
836 {
837 const unsigned char *first = parser->cur;
838 int ret;
839
840 while (parser->cur < parser->end) {
841 if (*parser->cur == '%') {
842 unsigned char ch = 0;
843 if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
844 return -1;
845 if (ret > 0)
846 continue;
847 }
848
849 if ((*parser->cur & 0x80) != 0 ||
850 (_uri_char_lookup[*parser->cur] & CHAR_MASK_PCHAR) == 0)
851 break;
852
853 parser->cur++;
854 }
855
856 if (parser->cur < parser->end &&
857 *parser->cur != '/' && *parser->cur != '?' && *parser->cur != '#' ) {
858 parser->error =
859 "Path component contains invalid character";
860 return -1;
861 }
862
863 if (first == parser->cur)
864 return 0;
865
866 if (segment_r != NULL)
867 *segment_r = p_strdup_until(parser->pool, first, parser->cur);
868 return 1;
869 }
870
uri_parse_path(struct uri_parser * parser,int * relative_r,const char * const ** path_r)871 int uri_parse_path(struct uri_parser *parser,
872 int *relative_r, const char *const **path_r)
873 {
874 const unsigned char *pbegin = parser->cur;
875 ARRAY_TYPE(const_string) segments;
876 const char *segment = NULL;
877 unsigned int count;
878 int relative = 1;
879 int ret;
880
881 count = 0;
882 if (path_r != NULL)
883 p_array_init(&segments, parser->pool, 16);
884 else
885 i_zero(&segments);
886
887 /* check for a leading '/' and indicate absolute path
888 when it is present
889 */
890 if (parser->cur < parser->end && *parser->cur == '/') {
891 parser->cur++;
892 relative = 0;
893 }
894
895 /* parse first segment */
896 if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
897 return -1;
898
899 for (;;) {
900 if (ret > 0) {
901 /* strip dot segments */
902 if (segment[0] == '.') {
903 if (segment[1] == '.') {
904 if (segment[2] == '\0') {
905 /* '..' -> skip and... */
906 segment = NULL;
907
908 /* ... pop last segment (if any) */
909 if (count > 0) {
910 if (path_r != NULL) {
911 i_assert(count == array_count(&segments));
912 array_delete(&segments, count-1, 1);
913 }
914 count--;
915 } else if ( relative > 0 ) {
916 relative++;
917 }
918 }
919 } else if (segment[1] == '\0') {
920 /* '.' -> skip */
921 segment = NULL;
922 }
923 }
924 } else {
925 segment = "";
926 }
927
928 if (segment != NULL) {
929 if (path_r != NULL)
930 array_push_back(&segments, &segment);
931 count++;
932 }
933
934 if (parser->cur >= parser->end || *parser->cur != '/')
935 break;
936 parser->cur++;
937
938 /* parse next path segment */
939 if ((ret = uri_parse_path_segment(parser, &segment)) < 0)
940 return -1;
941 }
942
943 if (relative_r != NULL)
944 *relative_r = relative;
945 if (path_r != NULL)
946 *path_r = NULL;
947
948 if (parser->cur == pbegin) {
949 /* path part of URI is empty */
950 return 0;
951 }
952
953 if (path_r != NULL) {
954 /* special treatment for a trailing '..' or '.' */
955 if (segment == NULL) {
956 segment = "";
957 array_push_back(&segments, &segment);
958 }
959 array_append_zero(&segments);
960 *path_r = array_get(&segments, &count);
961 }
962 if (parser->cur < parser->end &&
963 *parser->cur != '?' && *parser->cur != '#') {
964 parser->error = "Path component contains invalid character";
965 return -1;
966 }
967 return 1;
968 }
969
uri_parse_query(struct uri_parser * parser,const char ** query_r)970 int uri_parse_query(struct uri_parser *parser, const char **query_r)
971 {
972 const unsigned char *first = parser->cur;
973 int ret;
974
975 /* RFC 3986:
976 *
977 * URI = { ... } [ "?" query ] { ... }
978 * query = *( pchar / "/" / "?" )
979 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
980 */
981 if (parser->cur >= parser->end || *parser->cur != '?')
982 return 0;
983 parser->cur++;
984
985 while (parser->cur < parser->end) {
986 if (*parser->cur == '%') {
987 unsigned char ch = 0;
988 if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
989 return -1;
990 if (ret > 0)
991 continue;
992 }
993
994 if ((*parser->cur & 0x80) != 0 ||
995 (_uri_char_lookup[*parser->cur] & CHAR_MASK_QCHAR) == 0)
996 break;
997 parser->cur++;
998 }
999
1000 if (parser->cur < parser->end && *parser->cur != '#') {
1001 parser->error = "Query component contains invalid character";
1002 return -1;
1003 }
1004
1005 if (query_r != NULL)
1006 *query_r = p_strdup_until(parser->pool, first+1, parser->cur);
1007 return 1;
1008 }
1009
uri_parse_fragment(struct uri_parser * parser,const char ** fragment_r)1010 int uri_parse_fragment(struct uri_parser *parser, const char **fragment_r)
1011 {
1012 const unsigned char *first = parser->cur;
1013 int ret;
1014
1015 /* RFC 3986:
1016 *
1017 * URI = { ... } [ "#" fragment ]
1018 * fragment = *( pchar / "/" / "?" )
1019 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
1020 */
1021
1022 if (parser->cur >= parser->end || *parser->cur != '#')
1023 return 0;
1024 parser->cur++;
1025
1026 while (parser->cur < parser->end) {
1027 if (*parser->cur == '%') {
1028 unsigned char ch = 0;
1029 if ((ret=uri_parse_pct_encoded(parser, &ch)) < 0)
1030 return -1;
1031 if (ret > 0)
1032 continue;
1033 }
1034
1035 if ((*parser->cur & 0x80) != 0 ||
1036 (_uri_char_lookup[*parser->cur] & CHAR_MASK_QCHAR) == 0)
1037 break;
1038 parser->cur++;
1039 }
1040
1041 if (parser->cur < parser->end) {
1042 parser->error = "Fragment component contains invalid character";
1043 return -1;
1044 }
1045
1046 if (fragment_r != NULL)
1047 *fragment_r = p_strdup_until(parser->pool, first+1, parser->cur);
1048 return 1;
1049 }
1050
uri_parser_init_data(struct uri_parser * parser,pool_t pool,const unsigned char * data,size_t size)1051 void uri_parser_init_data(struct uri_parser *parser,
1052 pool_t pool, const unsigned char *data, size_t size)
1053 {
1054 i_zero(parser);
1055 parser->pool = pool;
1056 parser->begin = parser->cur = data;
1057 parser->end = data + size;
1058 }
1059
uri_parser_init(struct uri_parser * parser,pool_t pool,const char * uri)1060 void uri_parser_init(struct uri_parser *parser,
1061 pool_t pool, const char *uri)
1062 {
1063 uri_parser_init_data
1064 (parser, pool, (const unsigned char *)uri, strlen(uri));
1065 }
1066
uri_parser_get_tmpbuf(struct uri_parser * parser,size_t size)1067 string_t *uri_parser_get_tmpbuf(struct uri_parser *parser, size_t size)
1068 {
1069 if (parser->tmpbuf == NULL)
1070 parser->tmpbuf = str_new(parser->pool, size);
1071 else
1072 str_truncate(parser->tmpbuf, 0);
1073 return parser->tmpbuf;
1074 }
1075
uri_parse_absolute_generic(struct uri_parser * parser,enum uri_parse_flags flags)1076 int uri_parse_absolute_generic(struct uri_parser *parser,
1077 enum uri_parse_flags flags)
1078 {
1079 int relative, aret, ret = 0;
1080
1081 /*
1082 URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
1083
1084 hier-part = "//" authority path-abempty
1085 / path-absolute
1086 / path-rootless
1087 / path-empty
1088 path-abempty = *( "/" segment )
1089 path-absolute = "/" [ segment-nz *( "/" segment ) ]
1090 path-rootless = segment-nz *( "/" segment )
1091 path-empty = 0<pchar>
1092
1093 segment = *pchar
1094 segment-nz = 1*pchar
1095 */
1096
1097 /* scheme ":" */
1098 if ((flags & URI_PARSE_SCHEME_EXTERNAL) == 0 &&
1099 (ret=uri_parse_scheme(parser, NULL)) <= 0) {
1100 if (ret == 0)
1101 parser->error = "Missing scheme";
1102 return -1;
1103 }
1104
1105 /* "//" authority */
1106 if ((aret=uri_parse_slashslash_authority
1107 (parser, NULL)) < 0)
1108 return -1;
1109
1110 /* path-absolute / path-rootless / path-empty */
1111 if (aret == 0) {
1112 ret = uri_parse_path(parser, &relative, NULL);
1113 /* path-abempty */
1114 } else if (parser->cur < parser->end && *parser->cur == '/') {
1115 ret = uri_parse_path(parser, &relative, NULL);
1116 i_assert(ret <= 0 || relative == 0);
1117 }
1118 if (ret < 0)
1119 return -1;
1120
1121 /* [ "?" query ] */
1122 if (uri_parse_query(parser, NULL) < 0)
1123 return -1;
1124
1125 /* [ "#" fragment ] */
1126 if ((ret=uri_parse_fragment(parser, NULL)) < 0)
1127 return ret;
1128 if (ret > 0 && (flags & URI_PARSE_ALLOW_FRAGMENT_PART) == 0) {
1129 parser->error = "Fragment part not allowed";
1130 return -1;
1131 }
1132
1133 i_assert(parser->cur == parser->end);
1134 return 0;
1135 }
1136
1137 /*
1138 * Generic URI manipulation
1139 */
1140
uri_host_copy(pool_t pool,struct uri_host * dest,const struct uri_host * src)1141 void uri_host_copy(pool_t pool, struct uri_host *dest,
1142 const struct uri_host *src)
1143 {
1144 const char *host_name = src->name;
1145
1146 /* create host name literal if caller is lazy */
1147 if (host_name == NULL && src->ip.family != 0) {
1148 host_name = net_ip2addr(&src->ip);
1149 i_assert(*host_name != '\0');
1150 }
1151
1152 *dest = *src;
1153 dest->name = p_strdup(pool, host_name);
1154 }
1155
1156 /*
1157 * Check generic URI
1158 */
1159
uri_check_data(const unsigned char * data,size_t size,enum uri_parse_flags flags,const char ** error_r)1160 int uri_check_data(const unsigned char *data, size_t size,
1161 enum uri_parse_flags flags, const char **error_r)
1162 {
1163 struct uri_parser parser;
1164 int ret;
1165
1166 i_zero(&parser);
1167 parser.pool = pool_datastack_create();
1168 parser.begin = parser.cur = data;
1169 parser.end = data + size;
1170
1171 ret = uri_parse_absolute_generic(&parser, flags);
1172 *error_r = parser.error;
1173 return ret;
1174 }
1175
uri_check(const char * uri,enum uri_parse_flags flags,const char ** error_r)1176 int uri_check(const char *uri, enum uri_parse_flags flags,
1177 const char **error_r)
1178 {
1179 return uri_check_data
1180 ((const unsigned char *)uri, strlen(uri), flags, error_r);
1181 }
1182
1183 /*
1184 * Generic URI construction
1185 */
1186
uri_data_encode(string_t * out,const unsigned char esc_table[256],unsigned char esc_mask,const char * esc_extra,const char * data)1187 void uri_data_encode(string_t *out,
1188 const unsigned char esc_table[256],
1189 unsigned char esc_mask, const char *esc_extra,
1190 const char *data)
1191 {
1192 const unsigned char *pbegin, *p;
1193
1194 pbegin = p = (const unsigned char *)data;
1195 while (*p != '\0') {
1196 if ((*p & 0x80) != 0 || (esc_table[*p] & esc_mask) == 0 ||
1197 (esc_extra != NULL && strchr(esc_extra, (char)*p) != NULL)) {
1198 if ((p - pbegin) > 0)
1199 str_append_data(out, pbegin, p - pbegin);
1200 str_printfa(out, "%%%02x", *p);
1201 p++;
1202 pbegin = p;
1203 } else {
1204 p++;
1205 }
1206 }
1207 if ((p - pbegin) > 0)
1208 str_append_data(out, pbegin, p - pbegin);
1209 }
1210
uri_append_scheme(string_t * out,const char * scheme)1211 void uri_append_scheme(string_t *out, const char *scheme)
1212 {
1213 str_append(out, scheme);
1214 str_append_c(out, ':');
1215 }
1216
uri_append_user_data(string_t * out,const char * esc,const char * data)1217 void uri_append_user_data(string_t *out, const char *esc,
1218 const char *data)
1219 {
1220 uri_data_encode(out, _uri_char_lookup, CHAR_MASK_UCHAR, esc, data);
1221 }
1222
uri_append_userinfo(string_t * out,const char * userinfo)1223 void uri_append_userinfo(string_t *out, const char *userinfo)
1224 {
1225 uri_append_user_data(out, NULL, userinfo);
1226 str_append_c(out, '@');
1227 }
1228
uri_append_host_name(string_t * out,const char * name)1229 void uri_append_host_name(string_t *out, const char *name)
1230 {
1231 uri_data_encode(out, _uri_char_lookup,
1232 CHAR_MASK_UNRESERVED | CHAR_MASK_SUB_DELIMS, NULL, name);
1233 }
1234
uri_append_host_ip(string_t * out,const struct ip_addr * host_ip)1235 void uri_append_host_ip(string_t *out, const struct ip_addr *host_ip)
1236 {
1237 const char *addr = net_ip2addr(host_ip);
1238
1239 i_assert(host_ip->family != 0);
1240
1241 if (host_ip->family == AF_INET) {
1242 str_append(out, addr);
1243 return;
1244 }
1245
1246 i_assert(host_ip->family == AF_INET6);
1247 str_append_c(out, '[');
1248 str_append(out, addr);
1249 str_append_c(out, ']');
1250 }
1251
uri_append_host(string_t * out,const struct uri_host * host)1252 void uri_append_host(string_t *out, const struct uri_host *host)
1253 {
1254 if (host->name != NULL) {
1255 /* assume IPv6 literal if starts with '['; avoid encoding */
1256 if (*host->name == '[')
1257 str_append(out, host->name);
1258 else
1259 uri_append_host_name(out, host->name);
1260 } else
1261 uri_append_host_ip(out, &host->ip);
1262 }
1263
uri_append_port(string_t * out,in_port_t port)1264 void uri_append_port(string_t *out, in_port_t port)
1265 {
1266 if (port != 0)
1267 str_printfa(out, ":%u", port);
1268 }
1269
uri_append_path_segment_data(string_t * out,const char * esc,const char * data)1270 void uri_append_path_segment_data(string_t *out, const char *esc,
1271 const char *data)
1272 {
1273 uri_data_encode(out, _uri_char_lookup, CHAR_MASK_PCHAR, esc, data);
1274 }
1275
uri_append_path_segment(string_t * out,const char * segment)1276 void uri_append_path_segment(string_t *out, const char *segment)
1277 {
1278 str_append_c(out, '/');
1279 if (*segment != '\0')
1280 uri_append_path_data(out, NULL, segment);
1281 }
1282
uri_append_path_data(string_t * out,const char * esc,const char * data)1283 void uri_append_path_data(string_t *out, const char *esc,
1284 const char *data)
1285 {
1286 uri_data_encode(out, _uri_char_lookup, CHAR_MASK_PFCHAR, esc, data);
1287 }
1288
uri_append_path(string_t * out,const char * path)1289 void uri_append_path(string_t *out, const char *path)
1290 {
1291 str_append_c(out, '/');
1292 if (*path != '\0')
1293 uri_append_path_data(out, NULL, path);
1294 }
1295
uri_append_query_data(string_t * out,const char * esc,const char * data)1296 void uri_append_query_data(string_t *out, const char *esc,
1297 const char *data)
1298 {
1299 uri_data_encode(out, _uri_char_lookup, CHAR_MASK_QCHAR, esc, data);
1300 }
1301
uri_append_query(string_t * out,const char * query)1302 void uri_append_query(string_t *out, const char *query)
1303 {
1304 str_append_c(out, '?');
1305 if (*query != '\0')
1306 uri_append_query_data(out, NULL, query);
1307 }
1308
uri_append_fragment_data(string_t * out,const char * esc,const char * data)1309 void uri_append_fragment_data(string_t *out, const char *esc,
1310 const char *data)
1311 {
1312 uri_data_encode(out, _uri_char_lookup, CHAR_MASK_QCHAR, esc, data);
1313 }
1314
uri_append_fragment(string_t * out,const char * fragment)1315 void uri_append_fragment(string_t *out, const char *fragment)
1316 {
1317 str_append_c(out, '#');
1318 if (*fragment != '\0')
1319 uri_append_fragment_data(out, NULL, fragment);
1320 }
1321
uri_append_unreserved(string_t * out,const char * data)1322 void uri_append_unreserved(string_t *out, const char *data)
1323 {
1324 uri_data_encode(out, _uri_char_lookup, CHAR_MASK_UNRESERVED,
1325 NULL, data);
1326 }
1327
uri_append_unreserved_path(string_t * out,const char * data)1328 void uri_append_unreserved_path(string_t *out, const char *data)
1329 {
1330 uri_data_encode(out, _uri_char_lookup, CHAR_MASK_UNRESERVED_PATH,
1331 NULL, data);
1332 }
1333