1 /*
2  * Copyright (c) 2014,2015 DeNA Co., Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to
6  * deal in the Software without restriction, including without limitation the
7  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8  * sell copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20  * IN THE SOFTWARE.
21  */
22 #include <inttypes.h>
23 #include <sys/socket.h>
24 #include <sys/types.h>
25 #include <sys/un.h>
26 #include "h2o/memory.h"
27 #include "h2o/string_.h"
28 #include "h2o/url.h"
29 
30 const h2o_url_scheme_t H2O_URL_SCHEME_HTTP = {{H2O_STRLIT("http")}, 80, 0};
31 const h2o_url_scheme_t H2O_URL_SCHEME_HTTPS = {{H2O_STRLIT("https")}, 443, 1};
32 const h2o_url_scheme_t H2O_URL_SCHEME_MASQUE = {{H2O_STRLIT("masque")}, 65535, 0 /* ??? masque might or might not be over TLS */};
33 const h2o_url_scheme_t H2O_URL_SCHEME_FASTCGI = {{H2O_STRLIT("fastcgi")}, 65535, 0};
34 
decode_hex(int ch)35 static int decode_hex(int ch)
36 {
37     if ('0' <= ch && ch <= '9')
38         return ch - '0';
39     if ('A' <= ch && ch <= 'F')
40         return ch - 'A' + 0xa;
41     if ('a' <= ch && ch <= 'f')
42         return ch - 'a' + 0xa;
43     return -1;
44 }
45 
handle_special_paths(const char * path,size_t off,size_t last_slash)46 static size_t handle_special_paths(const char *path, size_t off, size_t last_slash)
47 {
48     size_t orig_off = off, part_size = off - last_slash;
49 
50     if (part_size == 2 && path[off - 1] == '.') {
51         --off;
52     } else if (part_size == 3 && path[off - 2] == '.' && path[off - 1] == '.') {
53         off -= 2;
54         if (off > 1) {
55             for (--off; path[off - 1] != '/'; --off)
56                 ;
57         }
58     }
59     return orig_off - off;
60 }
61 
62 /* Perform path normalization and URL decoding in one pass.
63  * See h2o_req_t for the purpose of @norm_indexes. */
rebuild_path(h2o_mem_pool_t * pool,const char * src,size_t src_len,size_t * query_at,size_t ** norm_indexes)64 static h2o_iovec_t rebuild_path(h2o_mem_pool_t *pool, const char *src, size_t src_len, size_t *query_at, size_t **norm_indexes)
65 {
66     char *dst;
67     size_t src_off = 0, dst_off = 0, last_slash, rewind;
68 
69     { /* locate '?', and set len to the end of input path */
70         const char *q = memchr(src, '?', src_len);
71         if (q != NULL) {
72             src_len = *query_at = q - src;
73         } else {
74             *query_at = SIZE_MAX;
75         }
76     }
77 
78     /* dst can be 1 byte more than src if src is missing the prefixing '/' */
79     dst = h2o_mem_alloc_pool(pool, char, src_len + 1);
80     *norm_indexes = h2o_mem_alloc_pool(pool, *norm_indexes[0], (src_len + 1));
81 
82     if (src[0] == '/')
83         src_off++;
84     last_slash = dst_off;
85     dst[dst_off] = '/';
86     (*norm_indexes)[dst_off] = src_off;
87     dst_off++;
88 
89     /* decode %xx */
90     while (src_off < src_len) {
91         int hi, lo;
92         char decoded;
93 
94         if (src[src_off] == '%' && (src_off + 2 < src_len) && (hi = decode_hex(src[src_off + 1])) != -1 &&
95             (lo = decode_hex(src[src_off + 2])) != -1) {
96             decoded = (hi << 4) | lo;
97             src_off += 3;
98         } else {
99             decoded = src[src_off++];
100         }
101         if (decoded == '/') {
102             rewind = handle_special_paths(dst, dst_off, last_slash);
103             if (rewind > 0) {
104                 dst_off -= rewind;
105                 last_slash = dst_off - 1;
106                 continue;
107             }
108             last_slash = dst_off;
109         }
110         dst[dst_off] = decoded;
111         (*norm_indexes)[dst_off] = src_off;
112         dst_off++;
113     }
114     rewind = handle_special_paths(dst, dst_off, last_slash);
115     dst_off -= rewind;
116 
117     return h2o_iovec_init(dst, dst_off);
118 }
119 
h2o_url_normalize_path(h2o_mem_pool_t * pool,const char * path,size_t len,size_t * query_at,size_t ** norm_indexes)120 h2o_iovec_t h2o_url_normalize_path(h2o_mem_pool_t *pool, const char *path, size_t len, size_t *query_at, size_t **norm_indexes)
121 {
122     const char *p = path, *end = path + len;
123     h2o_iovec_t ret;
124 
125     *query_at = SIZE_MAX;
126     *norm_indexes = NULL;
127 
128     if (len == 0) {
129         ret = h2o_iovec_init("/", 1);
130         return ret;
131     }
132 
133     if (path[0] != '/')
134         goto Rewrite;
135 
136     for (; p + 1 < end; ++p) {
137         if ((p[0] == '/' && p[1] == '.') || p[0] == '%') {
138             /* detect false positives as well */
139             goto Rewrite;
140         } else if (p[0] == '?') {
141             *query_at = p - path;
142             goto Return;
143         }
144     }
145     for (; p < end; ++p) {
146         if (p[0] == '?') {
147             *query_at = p - path;
148             goto Return;
149         }
150     }
151 
152 Return:
153     ret.base = (char *)path;
154     ret.len = p - path;
155     return ret;
156 
157 Rewrite:
158     ret = rebuild_path(pool, path, len, query_at, norm_indexes);
159     if (ret.len == 0)
160         goto RewriteError;
161     if (ret.base[0] != '/')
162         goto RewriteError;
163     if (h2o_strstr(ret.base, ret.len, H2O_STRLIT("/../")) != SIZE_MAX)
164         goto RewriteError;
165     if (ret.len >= 3 && memcmp(ret.base + ret.len - 3, "/..", 3) == 0)
166         goto RewriteError;
167     return ret;
168 RewriteError:
169     h2o_error_printf("failed to normalize path: `%.*s` => `%.*s`\n", (int)len, path, (int)ret.len, ret.base);
170     ret = h2o_iovec_init("/", 1);
171     return ret;
172 }
173 
parse_scheme(const char * s,const char * end,const h2o_url_scheme_t ** scheme)174 static const char *parse_scheme(const char *s, const char *end, const h2o_url_scheme_t **scheme)
175 {
176     if (end - s >= 5 && memcmp(s, "http:", 5) == 0) {
177         *scheme = &H2O_URL_SCHEME_HTTP;
178         return s + 5;
179     } else if (end - s >= 6 && memcmp(s, "https:", 6) == 0) {
180         *scheme = &H2O_URL_SCHEME_HTTPS;
181         return s + 6;
182     } else if (end - s >= 7 && memcmp(s, "masque:", 7) == 0) {
183         *scheme = &H2O_URL_SCHEME_MASQUE;
184         return s + 7;
185     }
186     return NULL;
187 }
188 
h2o_url_parse_hostport(const char * s,size_t len,h2o_iovec_t * host,uint16_t * port)189 const char *h2o_url_parse_hostport(const char *s, size_t len, h2o_iovec_t *host, uint16_t *port)
190 {
191     const char *token_start = s, *token_end, *end = s + len;
192 
193     *port = 65535;
194 
195     if (token_start == end)
196         return NULL;
197 
198     if (*token_start == '[') {
199         /* is IPv6 address */
200         ++token_start;
201         if ((token_end = memchr(token_start, ']', end - token_start)) == NULL)
202             return NULL;
203         *host = h2o_iovec_init(token_start, token_end - token_start);
204         token_start = token_end + 1;
205     } else {
206         for (token_end = token_start; !(token_end == end || *token_end == '/' || *token_end == ':'); ++token_end)
207             ;
208         *host = h2o_iovec_init(token_start, token_end - token_start);
209         token_start = token_end;
210     }
211 
212     /* disallow zero-length host */
213     if (host->len == 0)
214         return NULL;
215 
216     /* parse port */
217     if (token_start != end && *token_start == ':') {
218         size_t p;
219         ++token_start;
220         if ((token_end = memchr(token_start, '/', end - token_start)) == NULL)
221             token_end = end;
222         if ((p = h2o_strtosize(token_start, token_end - token_start)) >= 65535)
223             return NULL;
224         *port = (uint16_t)p;
225         token_start = token_end;
226     }
227 
228     return token_start;
229 }
230 
parse_authority_and_path(const char * src,const char * url_end,h2o_url_t * parsed)231 static int parse_authority_and_path(const char *src, const char *url_end, h2o_url_t *parsed)
232 {
233     const char *p = h2o_url_parse_hostport(src, url_end - src, &parsed->host, &parsed->_port);
234     if (p == NULL)
235         return -1;
236     parsed->authority = h2o_iovec_init(src, p - src);
237     if (p == url_end) {
238         parsed->path = h2o_iovec_init(H2O_STRLIT("/"));
239     } else {
240         if (*p != '/')
241             return -1;
242         parsed->path = h2o_iovec_init(p, url_end - p);
243     }
244     return 0;
245 }
246 
h2o_url_parse(const char * url,size_t url_len,h2o_url_t * parsed)247 int h2o_url_parse(const char *url, size_t url_len, h2o_url_t *parsed)
248 {
249     const char *url_end, *p;
250 
251     if (url_len == SIZE_MAX)
252         url_len = strlen(url);
253     url_end = url + url_len;
254 
255     /* check and skip scheme */
256     if ((p = parse_scheme(url, url_end, &parsed->scheme)) == NULL)
257         return -1;
258 
259     /* skip "//" */
260     if (!(url_end - p >= 2 && p[0] == '/' && p[1] == '/'))
261         return -1;
262     p += 2;
263 
264     return parse_authority_and_path(p, url_end, parsed);
265 }
266 
h2o_url_parse_relative(const char * url,size_t url_len,h2o_url_t * parsed)267 int h2o_url_parse_relative(const char *url, size_t url_len, h2o_url_t *parsed)
268 {
269     const char *url_end, *p;
270 
271     if (url_len == SIZE_MAX)
272         url_len = strlen(url);
273     url_end = url + url_len;
274 
275     /* obtain scheme and port number */
276     if ((p = parse_scheme(url, url_end, &parsed->scheme)) == NULL) {
277         parsed->scheme = NULL;
278         p = url;
279     }
280 
281     /* handle "//" */
282     if (url_end - p >= 2 && p[0] == '/' && p[1] == '/')
283         return parse_authority_and_path(p + 2, url_end, parsed);
284 
285     /* reset authority, host, port, and set path */
286     parsed->authority = (h2o_iovec_t){NULL};
287     parsed->host = (h2o_iovec_t){NULL};
288     parsed->_port = 65535;
289     parsed->path = h2o_iovec_init(p, url_end - p);
290 
291     return 0;
292 }
293 
h2o_url_resolve(h2o_mem_pool_t * pool,const h2o_url_t * base,const h2o_url_t * relative,h2o_url_t * dest)294 h2o_iovec_t h2o_url_resolve(h2o_mem_pool_t *pool, const h2o_url_t *base, const h2o_url_t *relative, h2o_url_t *dest)
295 {
296     h2o_iovec_t base_path, relative_path, ret;
297 
298     assert(base->path.len != 0);
299     assert(base->path.base[0] == '/');
300 
301     if (relative == NULL) {
302         /* build URL using base copied to dest */
303         *dest = *base;
304         base_path = base->path;
305         relative_path = h2o_iovec_init(NULL, 0);
306         goto Build;
307     }
308 
309     /* scheme */
310     dest->scheme = relative->scheme != NULL ? relative->scheme : base->scheme;
311 
312     /* authority (and host:port) */
313     if (relative->authority.base != NULL) {
314         assert(relative->host.base != NULL);
315         dest->authority = relative->authority;
316         dest->host = relative->host;
317         dest->_port = relative->_port;
318     } else {
319         assert(relative->host.base == NULL);
320         assert(relative->_port == 65535);
321         dest->authority = base->authority;
322         dest->host = base->host;
323         dest->_port = base->_port;
324     }
325 
326     /* path */
327     base_path = base->path;
328     if (relative->path.base != NULL) {
329         relative_path = relative->path;
330         h2o_url_resolve_path(&base_path, &relative_path);
331     } else {
332         assert(relative->path.len == 0);
333         relative_path = (h2o_iovec_t){NULL};
334     }
335 
336 Build:
337     /* build the output */
338     ret = h2o_concat(pool, dest->scheme->name, h2o_iovec_init(H2O_STRLIT("://")), dest->authority, base_path, relative_path);
339     /* adjust dest */
340     dest->authority.base = ret.base + dest->scheme->name.len + 3;
341     dest->host.base = dest->authority.base;
342     if (dest->authority.len != 0 && dest->authority.base[0] == '[')
343         ++dest->host.base;
344     dest->path.base = dest->authority.base + dest->authority.len;
345     dest->path.len = ret.base + ret.len - dest->path.base;
346 
347     return ret;
348 }
349 
h2o_url_resolve_path(h2o_iovec_t * base,h2o_iovec_t * relative)350 void h2o_url_resolve_path(h2o_iovec_t *base, h2o_iovec_t *relative)
351 {
352     size_t base_path_len = base->len, rel_path_offset = 0;
353 
354     if (relative->len != 0 && relative->base[0] == '/') {
355         base_path_len = 0;
356     } else {
357         /* relative path */
358         while (base->base[--base_path_len] != '/')
359             ;
360         while (rel_path_offset != relative->len) {
361             if (relative->base[rel_path_offset] == '.') {
362                 if (relative->len - rel_path_offset >= 2 && relative->base[rel_path_offset + 1] == '.' &&
363                     (relative->len - rel_path_offset == 2 || relative->base[rel_path_offset + 2] == '/')) {
364                     if (base_path_len != 0) {
365                         while (base->base[--base_path_len] != '/')
366                             ;
367                     }
368                     rel_path_offset += relative->len - rel_path_offset == 2 ? 2 : 3;
369                     continue;
370                 }
371                 if (relative->len - rel_path_offset == 1) {
372                     rel_path_offset += 1;
373                     continue;
374                 } else if (relative->base[rel_path_offset + 1] == '/') {
375                     rel_path_offset += 2;
376                     continue;
377                 }
378             }
379             break;
380         }
381         base_path_len += 1;
382     }
383 
384     base->len = base_path_len;
385     *relative = h2o_iovec_init(relative->base + rel_path_offset, relative->len - rel_path_offset);
386 }
387 
h2o_url_copy(h2o_mem_pool_t * pool,h2o_url_t * dest,const h2o_url_t * src)388 void h2o_url_copy(h2o_mem_pool_t *pool, h2o_url_t *dest, const h2o_url_t *src)
389 {
390     dest->scheme = src->scheme;
391     dest->authority = h2o_strdup(pool, src->authority.base, src->authority.len);
392     dest->host = h2o_strdup(pool, src->host.base, src->host.len);
393     dest->path = h2o_strdup(pool, src->path.base, src->path.len);
394     dest->_port = src->_port;
395 }
396 
h2o_url_host_to_sun(h2o_iovec_t host,struct sockaddr_un * sa)397 const char *h2o_url_host_to_sun(h2o_iovec_t host, struct sockaddr_un *sa)
398 {
399 #define PREFIX "unix:"
400 
401     if (host.len < sizeof(PREFIX) - 1 || memcmp(host.base, PREFIX, sizeof(PREFIX) - 1) != 0)
402         return h2o_url_host_to_sun_err_is_not_unix_socket;
403 
404     if (host.len - sizeof(PREFIX) - 1 >= sizeof(sa->sun_path))
405         return "unix-domain socket path is too long";
406 
407     memset(sa, 0, sizeof(*sa));
408     sa->sun_family = AF_UNIX;
409     memcpy(sa->sun_path, host.base + sizeof(PREFIX) - 1, host.len - (sizeof(PREFIX) - 1));
410     return NULL;
411 
412 #undef PREFIX
413 }
414 
415 const char h2o_url_host_to_sun_err_is_not_unix_socket[] = "supplied name does not look like an unix-domain socket";
416 
h2o_url_init_with_hostport(h2o_url_t * url,h2o_mem_pool_t * pool,const h2o_url_scheme_t * scheme,h2o_iovec_t host,uint16_t port,h2o_iovec_t path)417 int h2o_url_init_with_hostport(h2o_url_t *url, h2o_mem_pool_t *pool, const h2o_url_scheme_t *scheme, h2o_iovec_t host,
418                                uint16_t port, h2o_iovec_t path)
419 {
420     url->scheme = scheme;
421     url->path = path;
422 
423     if (port == scheme->default_port) {
424         url->_port = 65535;
425         url->authority = h2o_strdup(pool, host.base, host.len);
426         url->host = url->authority;
427     } else {
428         url->_port = port;
429         char _port[sizeof(H2O_UINT16_LONGEST_STR)];
430         int port_len = sprintf(_port, "%" PRIu16, port);
431         if (port_len < 0)
432             return -1;
433 
434         url->authority.len = host.len + 1 + port_len;
435         url->authority.base = pool == NULL ? h2o_mem_alloc(url->authority.len) : h2o_mem_alloc_pool(pool, char, url->authority.len);
436         memcpy(url->authority.base, host.base, host.len);
437         memcpy(url->authority.base + host.len, ":", 1);
438         memcpy(url->authority.base + host.len + 1, _port, port_len);
439         url->host = h2o_iovec_init(url->authority.base, url->authority.len - 1 - port_len);
440     }
441 
442     return 0;
443 }
444 
h2o_url_init_with_sun_path(h2o_url_t * url,h2o_mem_pool_t * pool,const h2o_url_scheme_t * scheme,h2o_iovec_t sun_path,h2o_iovec_t path)445 int h2o_url_init_with_sun_path(h2o_url_t *url, h2o_mem_pool_t *pool, const h2o_url_scheme_t *scheme, h2o_iovec_t sun_path,
446                                h2o_iovec_t path)
447 {
448     url->scheme = scheme;
449     url->path = path;
450     url->_port = 65535;
451 
452 #define PREFIX "[unix:"
453 #define SUFFIX "]"
454     url->authority.len = strlen(PREFIX SUFFIX) + sun_path.len;
455     url->authority.base = pool == NULL ? h2o_mem_alloc(url->authority.len) : h2o_mem_alloc_pool(pool, char, url->authority.len);
456     memcpy(url->authority.base, PREFIX, sizeof(PREFIX) - 1);
457     memcpy(url->authority.base + sizeof(PREFIX) - 1, sun_path.base, sun_path.len);
458     memcpy(url->authority.base + url->authority.len - 1, SUFFIX, sizeof(SUFFIX) - 1);
459 #undef PREFIX
460 #undef SUFFIX
461 
462     url->host = h2o_iovec_init(url->authority.base + 1, url->authority.len - 2);
463 
464     return 0;
465 }
466