1 /*
2 * Copyright (c) 2014,2015 DeNA Co., Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to
6 * deal in the Software without restriction, including without limitation the
7 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8 * sell copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 * IN THE SOFTWARE.
21 */
22 #include <inttypes.h>
23 #include <sys/socket.h>
24 #include <sys/types.h>
25 #include <sys/un.h>
26 #include "h2o/memory.h"
27 #include "h2o/string_.h"
28 #include "h2o/url.h"
29
30 const h2o_url_scheme_t H2O_URL_SCHEME_HTTP = {{H2O_STRLIT("http")}, 80, 0};
31 const h2o_url_scheme_t H2O_URL_SCHEME_HTTPS = {{H2O_STRLIT("https")}, 443, 1};
32 const h2o_url_scheme_t H2O_URL_SCHEME_MASQUE = {{H2O_STRLIT("masque")}, 65535, 0 /* ??? masque might or might not be over TLS */};
33 const h2o_url_scheme_t H2O_URL_SCHEME_FASTCGI = {{H2O_STRLIT("fastcgi")}, 65535, 0};
34
decode_hex(int ch)35 static int decode_hex(int ch)
36 {
37 if ('0' <= ch && ch <= '9')
38 return ch - '0';
39 if ('A' <= ch && ch <= 'F')
40 return ch - 'A' + 0xa;
41 if ('a' <= ch && ch <= 'f')
42 return ch - 'a' + 0xa;
43 return -1;
44 }
45
handle_special_paths(const char * path,size_t off,size_t last_slash)46 static size_t handle_special_paths(const char *path, size_t off, size_t last_slash)
47 {
48 size_t orig_off = off, part_size = off - last_slash;
49
50 if (part_size == 2 && path[off - 1] == '.') {
51 --off;
52 } else if (part_size == 3 && path[off - 2] == '.' && path[off - 1] == '.') {
53 off -= 2;
54 if (off > 1) {
55 for (--off; path[off - 1] != '/'; --off)
56 ;
57 }
58 }
59 return orig_off - off;
60 }
61
62 /* Perform path normalization and URL decoding in one pass.
63 * See h2o_req_t for the purpose of @norm_indexes. */
rebuild_path(h2o_mem_pool_t * pool,const char * src,size_t src_len,size_t * query_at,size_t ** norm_indexes)64 static h2o_iovec_t rebuild_path(h2o_mem_pool_t *pool, const char *src, size_t src_len, size_t *query_at, size_t **norm_indexes)
65 {
66 char *dst;
67 size_t src_off = 0, dst_off = 0, last_slash, rewind;
68
69 { /* locate '?', and set len to the end of input path */
70 const char *q = memchr(src, '?', src_len);
71 if (q != NULL) {
72 src_len = *query_at = q - src;
73 } else {
74 *query_at = SIZE_MAX;
75 }
76 }
77
78 /* dst can be 1 byte more than src if src is missing the prefixing '/' */
79 dst = h2o_mem_alloc_pool(pool, char, src_len + 1);
80 *norm_indexes = h2o_mem_alloc_pool(pool, *norm_indexes[0], (src_len + 1));
81
82 if (src[0] == '/')
83 src_off++;
84 last_slash = dst_off;
85 dst[dst_off] = '/';
86 (*norm_indexes)[dst_off] = src_off;
87 dst_off++;
88
89 /* decode %xx */
90 while (src_off < src_len) {
91 int hi, lo;
92 char decoded;
93
94 if (src[src_off] == '%' && (src_off + 2 < src_len) && (hi = decode_hex(src[src_off + 1])) != -1 &&
95 (lo = decode_hex(src[src_off + 2])) != -1) {
96 decoded = (hi << 4) | lo;
97 src_off += 3;
98 } else {
99 decoded = src[src_off++];
100 }
101 if (decoded == '/') {
102 rewind = handle_special_paths(dst, dst_off, last_slash);
103 if (rewind > 0) {
104 dst_off -= rewind;
105 last_slash = dst_off - 1;
106 continue;
107 }
108 last_slash = dst_off;
109 }
110 dst[dst_off] = decoded;
111 (*norm_indexes)[dst_off] = src_off;
112 dst_off++;
113 }
114 rewind = handle_special_paths(dst, dst_off, last_slash);
115 dst_off -= rewind;
116
117 return h2o_iovec_init(dst, dst_off);
118 }
119
h2o_url_normalize_path(h2o_mem_pool_t * pool,const char * path,size_t len,size_t * query_at,size_t ** norm_indexes)120 h2o_iovec_t h2o_url_normalize_path(h2o_mem_pool_t *pool, const char *path, size_t len, size_t *query_at, size_t **norm_indexes)
121 {
122 const char *p = path, *end = path + len;
123 h2o_iovec_t ret;
124
125 *query_at = SIZE_MAX;
126 *norm_indexes = NULL;
127
128 if (len == 0) {
129 ret = h2o_iovec_init("/", 1);
130 return ret;
131 }
132
133 if (path[0] != '/')
134 goto Rewrite;
135
136 for (; p + 1 < end; ++p) {
137 if ((p[0] == '/' && p[1] == '.') || p[0] == '%') {
138 /* detect false positives as well */
139 goto Rewrite;
140 } else if (p[0] == '?') {
141 *query_at = p - path;
142 goto Return;
143 }
144 }
145 for (; p < end; ++p) {
146 if (p[0] == '?') {
147 *query_at = p - path;
148 goto Return;
149 }
150 }
151
152 Return:
153 ret.base = (char *)path;
154 ret.len = p - path;
155 return ret;
156
157 Rewrite:
158 ret = rebuild_path(pool, path, len, query_at, norm_indexes);
159 if (ret.len == 0)
160 goto RewriteError;
161 if (ret.base[0] != '/')
162 goto RewriteError;
163 if (h2o_strstr(ret.base, ret.len, H2O_STRLIT("/../")) != SIZE_MAX)
164 goto RewriteError;
165 if (ret.len >= 3 && memcmp(ret.base + ret.len - 3, "/..", 3) == 0)
166 goto RewriteError;
167 return ret;
168 RewriteError:
169 h2o_error_printf("failed to normalize path: `%.*s` => `%.*s`\n", (int)len, path, (int)ret.len, ret.base);
170 ret = h2o_iovec_init("/", 1);
171 return ret;
172 }
173
parse_scheme(const char * s,const char * end,const h2o_url_scheme_t ** scheme)174 static const char *parse_scheme(const char *s, const char *end, const h2o_url_scheme_t **scheme)
175 {
176 if (end - s >= 5 && memcmp(s, "http:", 5) == 0) {
177 *scheme = &H2O_URL_SCHEME_HTTP;
178 return s + 5;
179 } else if (end - s >= 6 && memcmp(s, "https:", 6) == 0) {
180 *scheme = &H2O_URL_SCHEME_HTTPS;
181 return s + 6;
182 } else if (end - s >= 7 && memcmp(s, "masque:", 7) == 0) {
183 *scheme = &H2O_URL_SCHEME_MASQUE;
184 return s + 7;
185 }
186 return NULL;
187 }
188
h2o_url_parse_hostport(const char * s,size_t len,h2o_iovec_t * host,uint16_t * port)189 const char *h2o_url_parse_hostport(const char *s, size_t len, h2o_iovec_t *host, uint16_t *port)
190 {
191 const char *token_start = s, *token_end, *end = s + len;
192
193 *port = 65535;
194
195 if (token_start == end)
196 return NULL;
197
198 if (*token_start == '[') {
199 /* is IPv6 address */
200 ++token_start;
201 if ((token_end = memchr(token_start, ']', end - token_start)) == NULL)
202 return NULL;
203 *host = h2o_iovec_init(token_start, token_end - token_start);
204 token_start = token_end + 1;
205 } else {
206 for (token_end = token_start; !(token_end == end || *token_end == '/' || *token_end == ':'); ++token_end)
207 ;
208 *host = h2o_iovec_init(token_start, token_end - token_start);
209 token_start = token_end;
210 }
211
212 /* disallow zero-length host */
213 if (host->len == 0)
214 return NULL;
215
216 /* parse port */
217 if (token_start != end && *token_start == ':') {
218 size_t p;
219 ++token_start;
220 if ((token_end = memchr(token_start, '/', end - token_start)) == NULL)
221 token_end = end;
222 if ((p = h2o_strtosize(token_start, token_end - token_start)) >= 65535)
223 return NULL;
224 *port = (uint16_t)p;
225 token_start = token_end;
226 }
227
228 return token_start;
229 }
230
parse_authority_and_path(const char * src,const char * url_end,h2o_url_t * parsed)231 static int parse_authority_and_path(const char *src, const char *url_end, h2o_url_t *parsed)
232 {
233 const char *p = h2o_url_parse_hostport(src, url_end - src, &parsed->host, &parsed->_port);
234 if (p == NULL)
235 return -1;
236 parsed->authority = h2o_iovec_init(src, p - src);
237 if (p == url_end) {
238 parsed->path = h2o_iovec_init(H2O_STRLIT("/"));
239 } else {
240 if (*p != '/')
241 return -1;
242 parsed->path = h2o_iovec_init(p, url_end - p);
243 }
244 return 0;
245 }
246
h2o_url_parse(const char * url,size_t url_len,h2o_url_t * parsed)247 int h2o_url_parse(const char *url, size_t url_len, h2o_url_t *parsed)
248 {
249 const char *url_end, *p;
250
251 if (url_len == SIZE_MAX)
252 url_len = strlen(url);
253 url_end = url + url_len;
254
255 /* check and skip scheme */
256 if ((p = parse_scheme(url, url_end, &parsed->scheme)) == NULL)
257 return -1;
258
259 /* skip "//" */
260 if (!(url_end - p >= 2 && p[0] == '/' && p[1] == '/'))
261 return -1;
262 p += 2;
263
264 return parse_authority_and_path(p, url_end, parsed);
265 }
266
h2o_url_parse_relative(const char * url,size_t url_len,h2o_url_t * parsed)267 int h2o_url_parse_relative(const char *url, size_t url_len, h2o_url_t *parsed)
268 {
269 const char *url_end, *p;
270
271 if (url_len == SIZE_MAX)
272 url_len = strlen(url);
273 url_end = url + url_len;
274
275 /* obtain scheme and port number */
276 if ((p = parse_scheme(url, url_end, &parsed->scheme)) == NULL) {
277 parsed->scheme = NULL;
278 p = url;
279 }
280
281 /* handle "//" */
282 if (url_end - p >= 2 && p[0] == '/' && p[1] == '/')
283 return parse_authority_and_path(p + 2, url_end, parsed);
284
285 /* reset authority, host, port, and set path */
286 parsed->authority = (h2o_iovec_t){NULL};
287 parsed->host = (h2o_iovec_t){NULL};
288 parsed->_port = 65535;
289 parsed->path = h2o_iovec_init(p, url_end - p);
290
291 return 0;
292 }
293
h2o_url_resolve(h2o_mem_pool_t * pool,const h2o_url_t * base,const h2o_url_t * relative,h2o_url_t * dest)294 h2o_iovec_t h2o_url_resolve(h2o_mem_pool_t *pool, const h2o_url_t *base, const h2o_url_t *relative, h2o_url_t *dest)
295 {
296 h2o_iovec_t base_path, relative_path, ret;
297
298 assert(base->path.len != 0);
299 assert(base->path.base[0] == '/');
300
301 if (relative == NULL) {
302 /* build URL using base copied to dest */
303 *dest = *base;
304 base_path = base->path;
305 relative_path = h2o_iovec_init(NULL, 0);
306 goto Build;
307 }
308
309 /* scheme */
310 dest->scheme = relative->scheme != NULL ? relative->scheme : base->scheme;
311
312 /* authority (and host:port) */
313 if (relative->authority.base != NULL) {
314 assert(relative->host.base != NULL);
315 dest->authority = relative->authority;
316 dest->host = relative->host;
317 dest->_port = relative->_port;
318 } else {
319 assert(relative->host.base == NULL);
320 assert(relative->_port == 65535);
321 dest->authority = base->authority;
322 dest->host = base->host;
323 dest->_port = base->_port;
324 }
325
326 /* path */
327 base_path = base->path;
328 if (relative->path.base != NULL) {
329 relative_path = relative->path;
330 h2o_url_resolve_path(&base_path, &relative_path);
331 } else {
332 assert(relative->path.len == 0);
333 relative_path = (h2o_iovec_t){NULL};
334 }
335
336 Build:
337 /* build the output */
338 ret = h2o_concat(pool, dest->scheme->name, h2o_iovec_init(H2O_STRLIT("://")), dest->authority, base_path, relative_path);
339 /* adjust dest */
340 dest->authority.base = ret.base + dest->scheme->name.len + 3;
341 dest->host.base = dest->authority.base;
342 if (dest->authority.len != 0 && dest->authority.base[0] == '[')
343 ++dest->host.base;
344 dest->path.base = dest->authority.base + dest->authority.len;
345 dest->path.len = ret.base + ret.len - dest->path.base;
346
347 return ret;
348 }
349
h2o_url_resolve_path(h2o_iovec_t * base,h2o_iovec_t * relative)350 void h2o_url_resolve_path(h2o_iovec_t *base, h2o_iovec_t *relative)
351 {
352 size_t base_path_len = base->len, rel_path_offset = 0;
353
354 if (relative->len != 0 && relative->base[0] == '/') {
355 base_path_len = 0;
356 } else {
357 /* relative path */
358 while (base->base[--base_path_len] != '/')
359 ;
360 while (rel_path_offset != relative->len) {
361 if (relative->base[rel_path_offset] == '.') {
362 if (relative->len - rel_path_offset >= 2 && relative->base[rel_path_offset + 1] == '.' &&
363 (relative->len - rel_path_offset == 2 || relative->base[rel_path_offset + 2] == '/')) {
364 if (base_path_len != 0) {
365 while (base->base[--base_path_len] != '/')
366 ;
367 }
368 rel_path_offset += relative->len - rel_path_offset == 2 ? 2 : 3;
369 continue;
370 }
371 if (relative->len - rel_path_offset == 1) {
372 rel_path_offset += 1;
373 continue;
374 } else if (relative->base[rel_path_offset + 1] == '/') {
375 rel_path_offset += 2;
376 continue;
377 }
378 }
379 break;
380 }
381 base_path_len += 1;
382 }
383
384 base->len = base_path_len;
385 *relative = h2o_iovec_init(relative->base + rel_path_offset, relative->len - rel_path_offset);
386 }
387
h2o_url_copy(h2o_mem_pool_t * pool,h2o_url_t * dest,const h2o_url_t * src)388 void h2o_url_copy(h2o_mem_pool_t *pool, h2o_url_t *dest, const h2o_url_t *src)
389 {
390 dest->scheme = src->scheme;
391 dest->authority = h2o_strdup(pool, src->authority.base, src->authority.len);
392 dest->host = h2o_strdup(pool, src->host.base, src->host.len);
393 dest->path = h2o_strdup(pool, src->path.base, src->path.len);
394 dest->_port = src->_port;
395 }
396
h2o_url_host_to_sun(h2o_iovec_t host,struct sockaddr_un * sa)397 const char *h2o_url_host_to_sun(h2o_iovec_t host, struct sockaddr_un *sa)
398 {
399 #define PREFIX "unix:"
400
401 if (host.len < sizeof(PREFIX) - 1 || memcmp(host.base, PREFIX, sizeof(PREFIX) - 1) != 0)
402 return h2o_url_host_to_sun_err_is_not_unix_socket;
403
404 if (host.len - sizeof(PREFIX) - 1 >= sizeof(sa->sun_path))
405 return "unix-domain socket path is too long";
406
407 memset(sa, 0, sizeof(*sa));
408 sa->sun_family = AF_UNIX;
409 memcpy(sa->sun_path, host.base + sizeof(PREFIX) - 1, host.len - (sizeof(PREFIX) - 1));
410 return NULL;
411
412 #undef PREFIX
413 }
414
415 const char h2o_url_host_to_sun_err_is_not_unix_socket[] = "supplied name does not look like an unix-domain socket";
416
h2o_url_init_with_hostport(h2o_url_t * url,h2o_mem_pool_t * pool,const h2o_url_scheme_t * scheme,h2o_iovec_t host,uint16_t port,h2o_iovec_t path)417 int h2o_url_init_with_hostport(h2o_url_t *url, h2o_mem_pool_t *pool, const h2o_url_scheme_t *scheme, h2o_iovec_t host,
418 uint16_t port, h2o_iovec_t path)
419 {
420 url->scheme = scheme;
421 url->path = path;
422
423 if (port == scheme->default_port) {
424 url->_port = 65535;
425 url->authority = h2o_strdup(pool, host.base, host.len);
426 url->host = url->authority;
427 } else {
428 url->_port = port;
429 char _port[sizeof(H2O_UINT16_LONGEST_STR)];
430 int port_len = sprintf(_port, "%" PRIu16, port);
431 if (port_len < 0)
432 return -1;
433
434 url->authority.len = host.len + 1 + port_len;
435 url->authority.base = pool == NULL ? h2o_mem_alloc(url->authority.len) : h2o_mem_alloc_pool(pool, char, url->authority.len);
436 memcpy(url->authority.base, host.base, host.len);
437 memcpy(url->authority.base + host.len, ":", 1);
438 memcpy(url->authority.base + host.len + 1, _port, port_len);
439 url->host = h2o_iovec_init(url->authority.base, url->authority.len - 1 - port_len);
440 }
441
442 return 0;
443 }
444
h2o_url_init_with_sun_path(h2o_url_t * url,h2o_mem_pool_t * pool,const h2o_url_scheme_t * scheme,h2o_iovec_t sun_path,h2o_iovec_t path)445 int h2o_url_init_with_sun_path(h2o_url_t *url, h2o_mem_pool_t *pool, const h2o_url_scheme_t *scheme, h2o_iovec_t sun_path,
446 h2o_iovec_t path)
447 {
448 url->scheme = scheme;
449 url->path = path;
450 url->_port = 65535;
451
452 #define PREFIX "[unix:"
453 #define SUFFIX "]"
454 url->authority.len = strlen(PREFIX SUFFIX) + sun_path.len;
455 url->authority.base = pool == NULL ? h2o_mem_alloc(url->authority.len) : h2o_mem_alloc_pool(pool, char, url->authority.len);
456 memcpy(url->authority.base, PREFIX, sizeof(PREFIX) - 1);
457 memcpy(url->authority.base + sizeof(PREFIX) - 1, sun_path.base, sun_path.len);
458 memcpy(url->authority.base + url->authority.len - 1, SUFFIX, sizeof(SUFFIX) - 1);
459 #undef PREFIX
460 #undef SUFFIX
461
462 url->host = h2o_iovec_init(url->authority.base + 1, url->authority.len - 2);
463
464 return 0;
465 }
466