1 /* URL parser and translator; implementation of RFC 2396. */
2
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
6
7 #include <ctype.h>
8 #include <errno.h>
9 #ifdef HAVE_IDNA_H
10 #include <idna.h>
11 #endif
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <sys/types.h>
16 #ifdef HAVE_NETDB_H
17 #include <netdb.h> /* OS/2 needs this after sys/types.h */
18 #endif
19
20 #ifdef HAVE_SYS_SOCKET_H
21 #include <sys/socket.h>
22 #endif
23 #ifdef HAVE_NETINET_IN_H
24 #include <netinet/in.h>
25 #endif
26 #ifdef HAVE_ARPA_INET_H
27 #include <arpa/inet.h>
28 #endif
29
30 #include "elinks.h"
31
32 #include "main/object.h"
33 #include "protocol/protocol.h"
34 #include "protocol/uri.h"
35 #include "util/conv.h"
36 #include "util/error.h"
37 #include "util/file.h"
38 #include "util/hash.h"
39 #include "util/memory.h"
40 #include "util/string.h"
41
42
43 static inline int
end_of_dir(unsigned char c)44 end_of_dir(unsigned char c)
45 {
46 return c == POST_CHAR || c == '#' || c == ';' || c == '?';
47 }
48
49 static inline int
is_uri_dir_sep(struct uri * uri,unsigned char pos)50 is_uri_dir_sep(struct uri *uri, unsigned char pos)
51 {
52 return (uri->protocol == PROTOCOL_FILE ? dir_sep(pos) : pos == '/');
53 }
54
55
56 int
is_ip_address(unsigned char * address,int addresslen)57 is_ip_address(unsigned char *address, int addresslen)
58 {
59 /* The @address has well defined limits so it would be a shame to
60 * allocate it. */
61 unsigned char buffer[IP_ADDRESS_BUFFER_SIZE];
62
63 if (addresslen >= sizeof(buffer))
64 return 0;
65
66 safe_strncpy(buffer, address, addresslen + 1);
67
68 #ifdef HAVE_INET_PTON
69 #ifdef CONFIG_IPV6
70 {
71 struct sockaddr_in6 addr6;
72
73 if (inet_pton(AF_INET6, buffer, &addr6.sin6_addr) > 0)
74 return 1;
75 }
76 #endif /* CONFIG_IPV6 */
77 {
78 struct in_addr addr4;
79
80 if (inet_pton(AF_INET, buffer, &addr4) > 0)
81 return 1;
82 }
83
84 return 0;
85 #else
86 /* FIXME: Is this ever the case? */
87 return 0;
88 #endif /* HAVE_INET_PTON */
89 }
90
91
92 int
end_with_known_tld(unsigned char * s,int slen)93 end_with_known_tld(unsigned char *s, int slen)
94 {
95 int i;
96 static const unsigned char *tld[] =
97 { "com", "edu", "net",
98 "org", "gov", "mil",
99 "int", "biz", "arpa",
100 "aero", "coop",
101 "info", "museum",
102 "name", "pro", NULL };
103
104 if (!slen) return -1;
105 if (slen < 0) slen = strlen(s);
106
107 for (i = 0; tld[i]; i++) {
108 int tldlen = strlen(tld[i]);
109 int pos = slen - tldlen;
110
111 if (pos >= 0 && !c_strncasecmp(&s[pos], tld[i], tldlen))
112 return pos;
113 }
114
115 return -1;
116 }
117
118 /* XXX: this function writes to @name. */
119 static int
check_whether_file_exists(unsigned char * name)120 check_whether_file_exists(unsigned char *name)
121 {
122 /* Check POST_CHAR etc ... */
123 static const unsigned char chars[] = POST_CHAR_S "#?";
124 int i;
125 int namelen = strlen(name);
126
127 if (file_exists(name))
128 return namelen;
129
130 for (i = 0; i < sizeof(chars) - 1; i++) {
131 unsigned char *pos = memchr(name, chars[i], namelen);
132 int exists;
133
134 if (!pos) continue;
135
136 *pos = 0;
137 exists = file_exists(name);
138 *pos = chars[i];
139
140 if (exists) {
141 return pos - name;
142 }
143 }
144
145 return -1;
146 }
147
148 static int
check_uri_file(unsigned char * name)149 check_uri_file(unsigned char *name)
150 {
151 /* Check POST_CHAR etc ... */
152 static const unsigned char chars[] = POST_CHAR_S "#";
153
154 return strcspn(name, chars);
155 }
156
157 /* Encodes URIs without encoding stuff like fragments and query separators. */
158 static void
encode_file_uri_string(struct string * string,unsigned char * uristring)159 encode_file_uri_string(struct string *string, unsigned char *uristring)
160 {
161 int filenamelen = check_whether_file_exists(uristring);
162
163 encode_uri_string(string, uristring, filenamelen, 0);
164 if (filenamelen > 0) add_to_string(string, uristring + filenamelen);
165 }
166
167
168 static inline int
get_protocol_length(const unsigned char * url)169 get_protocol_length(const unsigned char *url)
170 {
171 unsigned char *end = (unsigned char *) url;
172
173 /* Seek the end of the protocol name if any. */
174 /* RFC1738:
175 * scheme = 1*[ lowalpha | digit | "+" | "-" | "." ]
176 * (but per its recommendations we accept "upalpha" too) */
177 while (isalnum(*end) || *end == '+' || *end == '-' || *end == '.')
178 end++;
179
180 /* Now we make something to support our "IP version in protocol scheme
181 * name" hack and silently chop off the last digit if it's there. The
182 * IETF's not gonna notice I hope or it'd be going after us hard. */
183 if (end != url && isdigit(end[-1]))
184 end--;
185
186 /* Also return 0 if there's no protocol name (@end == @url). */
187 return (*end == ':' || isdigit(*end)) ? end - url : 0;
188 }
189
190 enum uri_errno
parse_uri(struct uri * uri,unsigned char * uristring)191 parse_uri(struct uri *uri, unsigned char *uristring)
192 {
193 unsigned char *prefix_end, *host_end;
194 #ifdef CONFIG_IPV6
195 unsigned char *lbracket, *rbracket;
196 #endif
197
198 assertm(uristring, "No uri to parse.");
199 memset(uri, 0, sizeof(*uri));
200
201 /* Nothing to do for an empty url. */
202 if_assert_failed return 0;
203 if (!*uristring) return URI_ERRNO_EMPTY;
204
205 uri->string = uristring;
206 uri->protocollen = get_protocol_length(uristring);
207
208 /* Invalid */
209 if (!uri->protocollen) return URI_ERRNO_INVALID_PROTOCOL;
210
211 /* Figure out whether the protocol is known */
212 uri->protocol = get_protocol(struri(uri), uri->protocollen);
213
214 prefix_end = uristring + uri->protocollen; /* ':' */
215
216 /* Check if there's a digit after the protocol name. */
217 if (isdigit(*prefix_end)) {
218 uri->ip_family = uristring[uri->protocollen] - '0';
219 prefix_end++;
220 }
221 if (*prefix_end != ':')
222 return URI_ERRNO_INVALID_PROTOCOL;
223 prefix_end++;
224
225 /* Skip slashes */
226
227 if (prefix_end[0] == '/' && prefix_end[1] == '/') {
228 if (prefix_end[2] == '/'
229 && get_protocol_need_slash_after_host(uri->protocol))
230 return URI_ERRNO_TOO_MANY_SLASHES;
231
232 prefix_end += 2;
233
234 } else if (get_protocol_need_slashes(uri->protocol)) {
235 return URI_ERRNO_NO_SLASHES;
236 }
237
238 if (get_protocol_free_syntax(uri->protocol)) {
239 uri->data = prefix_end;
240 uri->datalen = strlen(prefix_end);
241 return URI_ERRNO_OK;
242
243 } else if (uri->protocol == PROTOCOL_FILE) {
244 int datalen = check_uri_file(prefix_end);
245 unsigned char *frag_or_post = prefix_end + datalen;
246
247 /* Extract the fragment part. */
248 if (datalen >= 0) {
249 if (*frag_or_post == '#') {
250 uri->fragment = frag_or_post + 1;
251 uri->fragmentlen = strcspn(uri->fragment, POST_CHAR_S);
252 frag_or_post = uri->fragment + uri->fragmentlen;
253 }
254 if (*frag_or_post == POST_CHAR) {
255 uri->post = frag_or_post + 1;
256 }
257 } else {
258 datalen = strlen(prefix_end);
259 }
260
261 uri->data = prefix_end;
262 uri->datalen = datalen;
263
264 return URI_ERRNO_OK;
265 }
266
267 /* Isolate host */
268
269 #ifdef CONFIG_IPV6
270 /* Get brackets enclosing IPv6 address */
271 lbracket = strchr(prefix_end, '[');
272 if (lbracket) {
273 rbracket = strchr(lbracket, ']');
274 /* [address] is handled only inside of hostname part (surprisingly). */
275 if (rbracket && rbracket < prefix_end + strcspn(prefix_end, "/"))
276 uri->ipv6 = 1;
277 else
278 lbracket = rbracket = NULL;
279 } else {
280 rbracket = NULL;
281 }
282 #endif
283
284 /* Possibly skip auth part */
285 host_end = prefix_end + strcspn(prefix_end, "@");
286
287 if (prefix_end + strcspn(prefix_end, "/") > host_end
288 && *host_end) { /* we have auth info here */
289 unsigned char *user_end;
290
291 /* Allow '@' in the password component */
292 while (strcspn(host_end + 1, "@") < strcspn(host_end + 1, "/?"))
293 host_end = host_end + 1 + strcspn(host_end + 1, "@");
294
295 user_end = strchr(prefix_end, ':');
296
297 if (!user_end || user_end > host_end) {
298 uri->user = prefix_end;
299 uri->userlen = host_end - prefix_end;
300 } else {
301 uri->user = prefix_end;
302 uri->userlen = user_end - prefix_end;
303 uri->password = user_end + 1;
304 uri->passwordlen = host_end - user_end - 1;
305 }
306 prefix_end = host_end + 1;
307 }
308
309 #ifdef CONFIG_IPV6
310 if (uri->ipv6)
311 host_end = rbracket + strcspn(rbracket, ":/?");
312 else
313 #endif
314 host_end = prefix_end + strcspn(prefix_end, ":/?");
315
316 #ifdef CONFIG_IPV6
317 if (uri->ipv6) {
318 int addrlen = rbracket - lbracket - 1;
319
320 /* Check for valid length.
321 * addrlen >= sizeof(hostbuf) is theorically impossible
322 * but i keep the test in case of... Safer, imho --Zas */
323 assertm(addrlen >= 0 && addrlen < NI_MAXHOST,
324 "parse_uri(): addrlen value is bad (%d) for URL '%s'. "
325 "Problems are likely to be encountered. Please report "
326 "this, it is a security bug!", addrlen, uristring);
327 if_assert_failed return URI_ERRNO_IPV6_SECURITY;
328
329 uri->host = lbracket + 1;
330 uri->hostlen = addrlen;
331 } else
332 #endif
333 {
334 uri->host = prefix_end;
335 uri->hostlen = host_end - prefix_end;
336
337 /* Trim trailing '.'s */
338 if (uri->hostlen && uri->host[uri->hostlen - 1] == '.')
339 return URI_ERRNO_TRAILING_DOTS;
340 }
341
342 if (*host_end == ':') { /* we have port here */
343 unsigned char *port_end = host_end + 1 + strcspn(host_end + 1, "/");
344
345 host_end++;
346
347 uri->port = host_end;
348 uri->portlen = port_end - host_end;
349
350 if (uri->portlen == 0)
351 return URI_ERRNO_NO_PORT_COLON;
352
353 /* We only use 8 bits for portlen so better check */
354 if (uri->portlen != port_end - host_end)
355 return URI_ERRNO_INVALID_PORT;
356
357 /* test if port is number */
358 /* TODO: possibly lookup for the service otherwise? --pasky */
359 for (; host_end < port_end; host_end++)
360 if (!isdigit(*host_end))
361 return URI_ERRNO_INVALID_PORT;
362
363 /* Check valid port value, and let show an error message
364 * about invalid url syntax. */
365 if (uri->port && uri->portlen) {
366 int n;
367
368 errno = 0;
369 n = strtol(uri->port, NULL, 10);
370 if (errno || !uri_port_is_valid(n))
371 return URI_ERRNO_INVALID_PORT;
372 }
373 }
374
375 if (*host_end == '/') {
376 host_end++;
377
378 } else if (get_protocol_need_slash_after_host(uri->protocol)) {
379 /* The need for slash after the host component depends on the
380 * need for a host component. -- The dangerous mind of Jonah */
381 if (!uri->hostlen)
382 return URI_ERRNO_NO_HOST;
383
384 return URI_ERRNO_NO_HOST_SLASH;
385 }
386
387 /* Look for #fragment or POST_CHAR */
388 prefix_end = host_end + strcspn(host_end, "#" POST_CHAR_S);
389 uri->data = host_end;
390 uri->datalen = prefix_end - host_end;
391
392 if (*prefix_end == '#') {
393 uri->fragment = prefix_end + 1;
394 uri->fragmentlen = strcspn(uri->fragment, POST_CHAR_S);
395 prefix_end = uri->fragment + uri->fragmentlen;
396 }
397
398 if (*prefix_end == POST_CHAR) {
399 uri->post = prefix_end + 1;
400 }
401
402 return URI_ERRNO_OK;
403 }
404
405 int
get_uri_port(struct uri * uri)406 get_uri_port(struct uri *uri)
407 {
408 if (uri->port && uri->portlen) {
409 unsigned char *end = uri->port;
410 int port = strtol(uri->port, (char **) &end, 10);
411
412 if (end != uri->port) {
413 assert(uri_port_is_valid(port));
414 return port;
415 }
416 }
417
418 return get_protocol_port(uri->protocol);
419 }
420
421 #define can_compare_uri_components(comp) !(((comp) & (URI_SPECIAL | URI_IDN)))
422
423 static inline int
compare_component(unsigned char * a,int alen,unsigned char * b,int blen)424 compare_component(unsigned char *a, int alen, unsigned char *b, int blen)
425 {
426 /* Check that the length and the strings are both set or unset */
427 if (alen != blen || !!a != !!b) return 0;
428
429 /* Both are unset so that will make a perfect match */
430 if (!a || !alen) return 1;
431
432 /* Let the higher forces decide */
433 return !memcmp(a, b, blen);
434 }
435
436 #define wants(x) (components & (x))
437
438 int
compare_uri(struct uri * a,struct uri * b,enum uri_component components)439 compare_uri(struct uri *a, struct uri *b, enum uri_component components)
440 {
441 if (a == b) return 1;
442 if (!components) return 0;
443
444 assertm(can_compare_uri_components(components),
445 "compare_uri() is a work in progress. Component unsupported");
446
447 return (!wants(URI_PROTOCOL) || a->protocol == b->protocol)
448 && (!wants(URI_IP_FAMILY) || a->ip_family == b->ip_family)
449 && (!wants(URI_USER)
450 || compare_component(a->user, a->userlen, b->user, b->userlen))
451 && (!wants(URI_PASSWORD)
452 || compare_component(a->password, a->passwordlen, b->password, b->passwordlen))
453 && (!wants(URI_HOST)
454 || compare_component(a->host, a->hostlen, b->host, b->hostlen))
455 && (!wants(URI_PORT)
456 || compare_component(a->port, a->portlen, b->port, b->portlen))
457 && (!wants(URI_DATA)
458 || compare_component(a->data, a->datalen, b->data, b->datalen))
459 && (!wants(URI_FRAGMENT)
460 || compare_component(a->fragment, a->fragmentlen, b->fragment, b->fragmentlen))
461 && (!wants(URI_POST)
462 || compare_component(a->post, a->post ? strlen(a->post) : 0, b->post, b->post ? strlen(b->post) : 0));
463 }
464
465
466 /* We might need something more intelligent than this Swiss army knife. */
467 struct string *
add_uri_to_string(struct string * string,struct uri * uri,enum uri_component components)468 add_uri_to_string(struct string *string, struct uri *uri,
469 enum uri_component components)
470 {
471 /* Custom or unknown keep the URI untouched. */
472 if (uri->protocol == PROTOCOL_UNKNOWN)
473 return add_to_string(string, struri(uri));
474
475 if (wants(URI_PROTOCOL)) {
476 add_bytes_to_string(string, uri->string, uri->protocollen);
477 if (wants(URI_IP_FAMILY) && uri->ip_family)
478 add_long_to_string(string, uri->ip_family);
479 add_char_to_string(string, ':');
480 if (get_protocol_need_slashes(uri->protocol))
481 add_to_string(string, "//");
482 }
483
484 if (wants(URI_USER) && uri->userlen) {
485 add_bytes_to_string(string, uri->user, uri->userlen);
486
487 if (wants(URI_PASSWORD) && uri->passwordlen) {
488 add_char_to_string(string, ':');
489 add_bytes_to_string(string, uri->password,
490 uri->passwordlen);
491 }
492
493 add_char_to_string(string, '@');
494 }
495
496 if (wants(URI_HOST) && uri->hostlen) {
497 int add_host = 1;
498
499 #ifdef CONFIG_IPV6
500 /* Rationale for wants(URI_PORT): The [notation] was invented
501 * so that you can have an IPv6 addy and a port together. So
502 * we want to use it when that happens, otherwise we need not
503 * bother (that happens only when we want it for DNS anyway).
504 * I insist on an implied elegancy of this way, but YMMV. ;-)
505 * --pasky */
506 if (uri->ipv6 && wants(URI_PORT)) add_char_to_string(string, '[');
507 #endif
508 #ifdef CONFIG_IDN
509 /* Support for the GNU International Domain Name library.
510 *
511 * http://www.gnu.org/software/libidn/manual/html_node/IDNA-Functions.html
512 *
513 * Now it is probably not perfect because idna_to_ascii_lz()
514 * will be using a ``zero terminated input string encoded in
515 * the current locale's character set''. Anyway I don't know
516 * how to convert anything to UTF-8 or Unicode. --jonas */
517 if (wants(URI_IDN)) {
518 unsigned char *host = memacpy(uri->host, uri->hostlen);
519
520 if (host) {
521 char *idname;
522 int code = idna_to_ascii_lz(host, &idname, 0);
523
524 /* FIXME: Return NULL if it coughed? --jonas */
525 if (code == IDNA_SUCCESS) {
526 add_to_string(string, idname);
527 free(idname);
528 add_host = 0;
529 }
530
531 mem_free(host);
532 }
533 }
534
535 #endif
536 if (add_host)
537 add_bytes_to_string(string, uri->host, uri->hostlen);
538
539 #ifdef CONFIG_IPV6
540 if (uri->ipv6 && wants(URI_PORT)) add_char_to_string(string, ']');
541 #endif
542 }
543
544 if (wants(URI_PORT) || wants(URI_DEFAULT_PORT)) {
545 if (uri->portlen) {
546 add_char_to_string(string, ':');
547 add_bytes_to_string(string, uri->port, uri->portlen);
548
549 } else if (wants(URI_DEFAULT_PORT)
550 && uri->protocol != PROTOCOL_USER) {
551 /* For user protocols we don't know a default port.
552 * Should user protocols ports be configurable? */
553 int port = get_protocol_port(uri->protocol);
554
555 add_char_to_string(string, ':');
556 add_long_to_string(string, port);
557 }
558 }
559
560 /* Only add slash if we need to separate */
561 if ((wants(URI_DATA) || wants(URI_POST) || components == URI_HTTP_REFERRER_HOST)
562 && wants(~(URI_DATA | URI_PORT))
563 && get_protocol_need_slash_after_host(uri->protocol))
564 add_char_to_string(string, '/');
565
566 if (wants(URI_DATA) && uri->datalen)
567 add_bytes_to_string(string, uri->data, uri->datalen);
568
569 /* We can not test uri->datalen here since we need to always
570 * add '/'. */
571 if (wants(URI_PATH) || wants(URI_FILENAME)) {
572 unsigned char *filename = uri->data;
573 unsigned char *pos;
574
575 assertm(!wants(URI_FILENAME) || components == URI_FILENAME,
576 "URI_FILENAME should be used alone %d", components);
577
578 if (wants(URI_PATH) && !is_uri_dir_sep(uri, *filename)) {
579 /* FIXME: Add correct separator */
580 add_char_to_string(string, '/');
581 }
582
583 if (!uri->datalen) return string;
584
585 for (pos = filename; *pos && !end_of_dir(*pos); pos++)
586 if (wants(URI_FILENAME) && is_uri_dir_sep(uri, *pos))
587 filename = pos + 1;
588
589 return add_bytes_to_string(string, filename, pos - filename);
590 }
591
592 if (wants(URI_QUERY) && uri->datalen) {
593 unsigned char *query = memchr(uri->data, '?', uri->datalen);
594
595 assertm(URI_QUERY == components,
596 "URI_QUERY should be used alone %d", components);
597
598 if (!query) return string;
599
600 query++;
601 /* Check fragment and POST_CHAR */
602 return add_bytes_to_string(string, query, strcspn(query, "#" POST_CHAR_S));
603 }
604
605 if (wants(URI_FRAGMENT) && uri->fragmentlen) {
606 add_char_to_string(string, '#');
607 add_bytes_to_string(string, uri->fragment, uri->fragmentlen);
608 }
609
610 if (wants(URI_POST) && uri->post) {
611 add_char_to_string(string, POST_CHAR);
612 add_to_string(string, uri->post);
613
614 } else if (wants(URI_POST_INFO) && uri->post) {
615 if (!strncmp(uri->post, "text/plain", 10)) {
616 add_to_string(string, " (PLAIN TEXT DATA)");
617
618 } else if (!strncmp(uri->post, "multipart/form-data;", 20)) {
619 add_to_string(string, " (MULTIPART FORM DATA)");
620
621 } else {
622 add_to_string(string, " (POST DATA)");
623 }
624
625 }
626
627 return string;
628 }
629
630 #undef wants
631
632 unsigned char *
get_uri_string(struct uri * uri,enum uri_component components)633 get_uri_string(struct uri *uri, enum uri_component components)
634 {
635 struct string string;
636
637 if (init_string(&string)
638 && add_uri_to_string(&string, uri, components))
639 return string.source;
640
641 done_string(&string);
642 return NULL;
643 }
644
645
646 struct string *
add_string_uri_to_string(struct string * string,unsigned char * uristring,enum uri_component components)647 add_string_uri_to_string(struct string *string, unsigned char *uristring,
648 enum uri_component components)
649 {
650 struct uri uri;
651
652 if (parse_uri(&uri, uristring) != URI_ERRNO_OK)
653 return NULL;
654
655 return add_uri_to_string(string, &uri, components);
656 }
657
658
659 #define normalize_uri_reparse(str) normalize_uri(NULL, str)
660 #define normalize_uri_noparse(uri) normalize_uri(uri, struri(uri))
661
662 unsigned char *
normalize_uri(struct uri * uri,unsigned char * uristring)663 normalize_uri(struct uri *uri, unsigned char *uristring)
664 {
665 unsigned char *parse_string = uristring;
666 unsigned char *src, *dest, *path;
667 int need_slash = 0;
668 int parse = (uri == NULL);
669 struct uri uri_struct;
670
671 if (!uri) uri = &uri_struct;
672
673 /* We need to get the real (proxied) URI but lowercase relevant URI
674 * parts along the way. */
675 do {
676 if (parse && parse_uri(uri, parse_string) != URI_ERRNO_OK)
677 return uristring;
678
679 assert(uri->data);
680
681 /* This is a maybe not the right place but both join_urls() and
682 * get_translated_uri() through translate_url() calls this
683 * function and then it already works on and modifies an
684 * allocated copy. */
685 convert_to_lowercase_locale_indep(uri->string, uri->protocollen);
686 if (uri->hostlen) convert_to_lowercase(uri->host, uri->hostlen);
687
688 parse = 1;
689 parse_string = uri->data;
690 } while (uri->protocol == PROTOCOL_PROXY);
691
692 if (get_protocol_free_syntax(uri->protocol))
693 return uristring;
694
695 if (uri->protocol != PROTOCOL_UNKNOWN)
696 need_slash = get_protocol_need_slash_after_host(uri->protocol);
697
698 /* We want to start at the first slash to also reduce URIs like
699 * http://host//index.html to http://host/index.html */
700 path = uri->data - need_slash;
701 dest = src = path;
702
703 /* This loop mangles the URI string by removing directory elevators and
704 * other cruft. Example: /.././etc////..//usr/ -> /usr/ */
705 while (*dest) {
706 /* If the following pieces are the LAST parts of URL, we remove
707 * them as well. See RFC 1808 for details. */
708
709 if (end_of_dir(src[0])) {
710 /* URL data contains no more path. */
711 memmove(dest, src, strlen(src) + 1);
712 break;
713 }
714
715 if (!is_uri_dir_sep(uri, src[0])) {
716 /* This is to reduce indentation */
717
718 } else if (src[1] == '.') {
719 if (!src[2]) {
720 /* /. - skip the dot */
721 *dest++ = *src;
722 *dest = 0;
723 break;
724
725 } else if (is_uri_dir_sep(uri, src[2])) {
726 /* /./ - strip that.. */
727 src += 2;
728 continue;
729
730 } else if (src[2] == '.'
731 && (is_uri_dir_sep(uri, src[3]) || !src[3])) {
732 /* /../ or /.. - skip it and preceding element. */
733
734 /* First back out the last incrementation of
735 * @dest (dest++) to get the position that was
736 * last asigned to. */
737 if (dest > path) dest--;
738
739 /* @dest might be pointing to a dir separator
740 * so we decrement before any testing. */
741 while (dest > path) {
742 dest--;
743 if (is_uri_dir_sep(uri, *dest)) break;
744 }
745
746 if (!src[3]) {
747 /* /.. - add ending slash and stop */
748 *dest++ = *src;
749 *dest = 0;
750 break;
751 }
752
753 src += 3;
754 continue;
755 }
756
757 } else if (is_uri_dir_sep(uri, src[1])) {
758 /* // - ignore first '/'. */
759 src += 1;
760 continue;
761 }
762
763 /* We don't want to access memory past the NUL char. */
764 *dest = *src++;
765 if (*dest) dest++;
766 }
767
768 return uristring;
769 }
770
771 /* The 'file' scheme URI comes in and bastardized URI comes out which consists
772 * of just the complete path to file/directory, which the dumb 'file' protocol
773 * backend can understand. No host parts etc, that is what this function is
774 * supposed to chew. */
775 static struct uri *
transform_file_url(struct uri * uri,unsigned char * cwd)776 transform_file_url(struct uri *uri, unsigned char *cwd)
777 {
778 unsigned char *path = uri->data;
779
780 assert(uri->protocol == PROTOCOL_FILE && uri->data);
781
782 /* Sort out the host part. We currently support only host "localhost"
783 * (plus empty host part will be assumed to be "localhost" as well).
784 * As our extensions, '.' will reference to the cwd on localhost
785 * (originally, when the first thing after file:// wasn't "localhost/",
786 * we assumed the cwd as well, and pretended that there's no host part
787 * at all) and '..' to the directory parent to cwd. Another extension
788 * is that if this is a DOS-like system, the first char in two-char
789 * host part is uppercase letter and the second char is a colon, it is
790 * assumed to be a local disk specification. */
791 /* TODO: Use FTP for non-localhost hosts. --pasky */
792
793 /* For URL "file://", we open the current directory. Some other
794 * browsers instead open root directory, but AFAIK the standard does
795 * not specify that and this was the original behaviour and it is more
796 * consistent with our file://./ notation. */
797
798 /* Who would name their file/dir '...' ? */
799 if (*path == '.' || !*path) {
800 struct string dir;
801
802 if (!init_string(&dir))
803 return NULL;
804
805 encode_uri_string(&dir, cwd, -1, 0);
806
807 /* Either we will end up with '//' and translate_directories()
808 * will shorten it or the '/' will mark the inserted cwd as a
809 * directory. */
810 if (*path == '.') *path = '/';
811
812 /* Insert the current working directory. */
813 /* The offset is 7 == sizeof("file://") - 1. */
814 insert_in_string(&struri(uri), 7, dir.source, dir.length);
815
816 done_string(&dir);
817 return uri;
818 }
819
820 #ifdef DOS_FS
821 if (isasciialpha(path[0]) && path[1] == ':' && dir_sep(path[2]))
822 return NULL;
823 #endif
824
825 for (; *path && !dir_sep(*path); path++);
826
827 /* FIXME: We will in fact assume localhost even for non-local hosts,
828 * until we will support the FTP transformation. --pasky */
829
830 memmove(uri->data, path, strlen(path) + 1);
831 return uri;
832 }
833
834 static unsigned char *translate_url(unsigned char *url, unsigned char *cwd);
835
836 unsigned char *
join_urls(struct uri * base,unsigned char * rel)837 join_urls(struct uri *base, unsigned char *rel)
838 {
839 unsigned char *uristring, *path;
840 int add_slash = 0;
841 int translate = 0;
842 int length = 0;
843
844 /* See RFC 1808 */
845 /* TODO: Support for ';' ? (see the RFC) --pasky */
846
847 /* For '#', '?' and '//' we could use get_uri_string() but it might be
848 * too expensive since it uses granular allocation scheme. I wouldn't
849 * personally mind tho' because it would be cleaner. --jonas */
850 if (rel[0] == '#') {
851 /* Strip fragment and post part from the base URI and append
852 * the fragment string in @rel. */
853 length = base->fragment
854 ? base->fragment - struri(base) - 1
855 : get_real_uri_length(base);
856
857 } else if (rel[0] == '?') {
858 /* Strip query, fragment and post part from the base URI and
859 * append the query string in @rel. */
860 length = base->fragment ? base->fragment - struri(base) - 1
861 : get_real_uri_length(base);
862
863 uristring = memchr(base->data, '?', base->datalen);
864 if (uristring) length = uristring - struri(base);
865
866 } else if (rel[0] == '/' && rel[1] == '/') {
867 if (!get_protocol_need_slashes(base->protocol))
868 return NULL;
869
870 /* Get `<protocol>:' from the base URI and append the `//' part
871 * from @rel. */
872 length = base->protocollen + 1;
873
874 /* We need to sanitize the relative part and add stuff like
875 * host slash. */
876 translate = 1;
877 }
878
879 /* If one of the tests above set @length to something useful */
880 if (length) {
881 uristring = memacpy(struri(base), length);
882 if (!uristring) return NULL;
883
884 add_to_strn(&uristring, rel);
885
886 if (translate) {
887 unsigned char *translated;
888
889 translated = translate_url(uristring, NULL);
890 mem_free(uristring);
891 return translated;
892 }
893 return normalize_uri_reparse(uristring);
894 }
895
896 /* Check if there is some protocol name to go for */
897 length = get_protocol_length(rel);
898 if (length) {
899 switch (get_protocol(rel, length)) {
900 case PROTOCOL_UNKNOWN:
901 case PROTOCOL_PROXY:
902 /* Mysteriously proxy URIs are breaking here ... */
903 break;
904
905 case PROTOCOL_FILE:
906 /* FIXME: Use get_uri_string(base, URI_PATH) as cwd arg
907 * to translate_url(). */
908 default:
909 uristring = translate_url(rel, NULL);
910 if (uristring) return uristring;
911 }
912 }
913
914 assertm(base->data, "bad base url");
915 if_assert_failed return NULL;
916
917 path = base->data;
918
919 /* Either is path blank, but we've slash char before, or path is not
920 * blank, but doesn't start by a slash (if we'd just stay along with
921 * is_uri_dir_sep(&uri, path[-1]) w/o all the surrounding crap, it
922 * should be enough, but I'm not sure and I don't want to break
923 * anything --pasky). */
924 /* We skip first char of URL ('/') in parse_url() (ARGH). This
925 * is reason of all this bug-bearing magic.. */
926 if (*path) {
927 if (!is_uri_dir_sep(base, *path)) path--;
928 } else {
929 if (is_uri_dir_sep(base, path[-1])) path--;
930 }
931
932 if (!is_uri_dir_sep(base, rel[0])) {
933 unsigned char *path_end;
934
935 /* The URL is relative. */
936
937 if (!*path) {
938 /* There's no path in the URL, but we're going to add
939 * something there, and the something doesn't start by
940 * a slash. So we need to insert a slash after the base
941 * URL. Clever, eh? ;) */
942 add_slash = 1;
943 }
944
945 for (path_end = path; *path_end; path_end++) {
946 if (end_of_dir(*path_end)) break;
947 /* Modify the path pointer, so that it'll always point
948 * above the last '/' in the URL; later, we'll copy the
949 * URL only _TO_ this point, and anything after last
950 * slash will be substituted by 'rel'. */
951 if (is_uri_dir_sep(base, *path_end))
952 path = path_end + 1;
953 }
954 }
955
956 length = path - struri(base);
957 uristring = mem_alloc(length + strlen(rel) + add_slash + 1);
958 if (!uristring) return NULL;
959
960 memcpy(uristring, struri(base), length);
961 if (add_slash) uristring[length] = '/';
962 strcpy(uristring + length + add_slash, rel);
963
964 return normalize_uri_reparse(uristring);
965 }
966
967
968 /* Tries to figure out what protocol @newurl might be specifying by checking if
969 * it exists as a file locally or by checking parts of the host name. */
970 static enum protocol
find_uri_protocol(unsigned char * newurl)971 find_uri_protocol(unsigned char *newurl)
972 {
973 unsigned char *ch;
974
975 /* First see if it is a file so filenames that look like hostnames
976 * won't confuse us below. */
977 if (check_whether_file_exists(newurl) >= 0) return PROTOCOL_FILE;
978
979 /* Yes, it would be simpler to make test for IPv6 address first,
980 * but it would result in confusing mix of ifdefs ;-). */
981 /* FIXME: Ideas for improve protocol detection
982 *
983 * - Handle common hostnames. It could be part of the protocol backend
984 * structure. [ www -> http, irc -> irc, news -> nntp, ... ]
985 *
986 * - Resolve using port number. [ 119 -> nntp, 443 -> https, ... ]
987 */
988
989 ch = newurl + strcspn(newurl, ".:/@");
990 if (*ch == '@'
991 || (*ch == ':' && *newurl != '[' && strchr(newurl, '@'))
992 || !c_strncasecmp(newurl, "ftp.", 4)) {
993 /* Contains user/password/ftp-hostname */
994 return PROTOCOL_FTP;
995
996 #ifdef CONFIG_IPV6
997 } else if (*newurl == '[' && *ch == ':') {
998 /* Candidate for IPv6 address */
999 unsigned char *bracket2, *colon2;
1000
1001 ch++;
1002 bracket2 = strchr(ch, ']');
1003 colon2 = strchr(ch, ':');
1004 if (bracket2 && colon2 && bracket2 > colon2)
1005 return PROTOCOL_HTTP;
1006 #endif
1007
1008 } else if (*newurl != '.' && *ch == '.') {
1009 /* Contains domain name? */
1010 unsigned char *host_end, *domain;
1011 unsigned char *ipscan;
1012
1013 /* Process the hostname */
1014 for (domain = ch + 1;
1015 *(host_end = domain + strcspn(domain, ".:/?")) == '.';
1016 domain = host_end + 1);
1017
1018 /* It's IP? */
1019 for (ipscan = ch; isdigit(*ipscan) || *ipscan == '.';
1020 ipscan++);
1021
1022 if (!*ipscan || *ipscan == ':' || *ipscan == '/')
1023 return PROTOCOL_HTTP;
1024
1025 /* It's two-letter or known TLD? */
1026 if (host_end - domain == 2
1027 || end_with_known_tld(domain, host_end - domain) >= 0)
1028 return PROTOCOL_HTTP;
1029 }
1030
1031 return PROTOCOL_UNKNOWN;
1032 }
1033
1034
1035 #define MAX_TRANSLATION_ATTEMPTS 32
1036
1037 /* Returns an URI string that can be used internally. Adding protocol prefix,
1038 * missing slashes etc. */
1039 static unsigned char *
translate_url(unsigned char * url,unsigned char * cwd)1040 translate_url(unsigned char *url, unsigned char *cwd)
1041 {
1042 unsigned char *newurl;
1043 struct uri uri;
1044 enum uri_errno uri_errno, prev_errno = URI_ERRNO_EMPTY;
1045 int retries = 0;
1046
1047 /* Strip starting spaces */
1048 while (*url == ' ') url++;
1049 if (!*url) return NULL;
1050
1051 newurl = expand_tilde(url); /* XXX: Post data copy. */
1052 if (!newurl) return NULL;
1053
1054 parse_uri:
1055 /* Yay a goto loop. If we get some URI parse error and try to
1056 * fix it we go back to here and try again. */
1057 /* Ordinary parse */
1058 uri_errno = parse_uri(&uri, newurl);
1059
1060 /* Bail out if the same error occurs twice */
1061 if (uri_errno == prev_errno || retries++ > MAX_TRANSLATION_ATTEMPTS) {
1062 if (retries > MAX_TRANSLATION_ATTEMPTS) {
1063 ERROR("Maximum number of parsing attempts exceeded "
1064 "for %s.", url);
1065 }
1066 mem_free(newurl);
1067 return NULL;
1068 }
1069
1070 prev_errno = uri_errno;
1071
1072 switch (uri_errno) {
1073 case URI_ERRNO_OK:
1074 /* Fix translation of 1.2.3.4:5 so IP address part won't be
1075 * interpreted as the protocol name. */
1076 if (uri.protocol == PROTOCOL_UNKNOWN) {
1077 enum protocol protocol = find_uri_protocol(newurl);
1078
1079 /* Code duplication with the URI_ERRNO_INVALID_PROTOCOL
1080 * case. */
1081 if (protocol != PROTOCOL_UNKNOWN) {
1082 struct string str;
1083
1084 if (!init_string(&str)) return NULL;
1085
1086 switch (protocol) {
1087 case PROTOCOL_FTP:
1088 add_to_string(&str, "ftp://");
1089 encode_uri_string(&str, newurl, -1, 0);
1090 break;
1091
1092 case PROTOCOL_HTTP:
1093 add_to_string(&str, "http://");
1094 add_to_string(&str, newurl);
1095 break;
1096
1097 case PROTOCOL_UNKNOWN:
1098 break;
1099
1100 case PROTOCOL_FILE:
1101 default:
1102 add_to_string(&str, "file://");
1103 if (!dir_sep(*newurl))
1104 add_to_string(&str, "./");
1105
1106 add_to_string(&str, newurl);
1107 }
1108
1109 mem_free(newurl);
1110 newurl = str.source;
1111
1112 /* Work around the infinite loop prevention */
1113 prev_errno = URI_ERRNO_EMPTY;
1114 goto parse_uri;
1115 }
1116 }
1117
1118 /* If file:// URI is transformed we need to reparse. */
1119 if (uri.protocol == PROTOCOL_FILE && cwd && *cwd
1120 && transform_file_url(&uri, cwd))
1121 return normalize_uri_reparse(struri(&uri));
1122
1123 /* Translate the proxied URI too if proxy:// */
1124 if (uri.protocol == PROTOCOL_PROXY) {
1125 unsigned char *data = translate_url(uri.data, cwd);
1126 int pos = uri.data - struri(&uri);
1127
1128 if (!data) break;
1129 struri(&uri)[pos] = 0;
1130 insert_in_string(&struri(&uri), pos, data, strlen(data));
1131 mem_free(data);
1132 return normalize_uri_reparse(struri(&uri));
1133 }
1134
1135 return normalize_uri_noparse(&uri);
1136
1137 case URI_ERRNO_TOO_MANY_SLASHES:
1138 {
1139 unsigned char *from, *to;
1140
1141 assert(uri.string[uri.protocollen] == ':'
1142 && uri.string[uri.protocollen + 1] == '/'
1143 && uri.string[uri.protocollen + 2] == '/');
1144
1145 from = to = uri.string + uri.protocollen + 3;
1146 while (*from == '/') from++;
1147
1148 assert(to < from);
1149 memmove(to, from, strlen(from) + 1);
1150 goto parse_uri;
1151 }
1152 case URI_ERRNO_NO_SLASHES:
1153 {
1154 /* Try prefix:some.url -> prefix://some.url.. */
1155 int slashes = 2;
1156
1157 /* Check if only one '/' is needed. */
1158 if (uri.string[uri.protocollen + 1] == '/')
1159 slashes--;
1160
1161 insert_in_string(&newurl, uri.protocollen + 1, "//", slashes);
1162 goto parse_uri;
1163 }
1164 case URI_ERRNO_TRAILING_DOTS:
1165 {
1166 /* Trim trailing '.'s */
1167 unsigned char *from = uri.host + uri.hostlen;
1168 unsigned char *to = from;
1169
1170 assert(uri.host < to && to[-1] == '.' && *from != '.');
1171
1172 while (uri.host < to && to[-1] == '.') to--;
1173
1174 assert(to < from);
1175 memmove(to, from, strlen(from) + 1);
1176 goto parse_uri;
1177 }
1178 case URI_ERRNO_NO_PORT_COLON:
1179 assert(uri.portlen == 0
1180 && uri.string < uri.port
1181 && uri.port[-1] == ':');
1182
1183 memmove(uri.port - 1, uri.port, strlen(uri.port) + 1);
1184 goto parse_uri;
1185
1186 case URI_ERRNO_NO_HOST_SLASH:
1187 {
1188 int offset = uri.port
1189 ? uri.port + uri.portlen - struri(&uri)
1190 : uri.host + uri.hostlen - struri(&uri) + uri.ipv6 /* ']' */;
1191
1192 assertm(uri.host, "uri.host not set after no host slash error");
1193 insert_in_string(&newurl, offset, "/", 1);
1194 goto parse_uri;
1195 }
1196 case URI_ERRNO_INVALID_PROTOCOL:
1197 {
1198 /* No protocol name */
1199 enum protocol protocol = find_uri_protocol(newurl);
1200 struct string str;
1201
1202 if (!init_string(&str)) return NULL;
1203
1204 switch (protocol) {
1205 case PROTOCOL_FTP:
1206 add_to_string(&str, "ftp://");
1207 encode_uri_string(&str, newurl, -1, 0);
1208 break;
1209
1210 case PROTOCOL_HTTP:
1211 add_to_string(&str, "http://");
1212 add_to_string(&str, newurl);
1213 break;
1214
1215 case PROTOCOL_UNKNOWN:
1216 /* We default to file:// even though we already
1217 * tested if the file existed since it will give
1218 * a "No such file or directory" error. which
1219 * might better hint the user that there was
1220 * problem figuring out the URI. */
1221 case PROTOCOL_FILE:
1222 default:
1223 add_to_string(&str, "file://");
1224 if (!dir_sep(*newurl))
1225 add_to_string(&str, "./");
1226
1227 encode_file_uri_string(&str, newurl);
1228 }
1229
1230 mem_free(newurl);
1231 newurl = str.source;
1232
1233 goto parse_uri;
1234 }
1235 case URI_ERRNO_EMPTY:
1236 case URI_ERRNO_IPV6_SECURITY:
1237 case URI_ERRNO_NO_HOST:
1238 case URI_ERRNO_INVALID_PORT:
1239 case URI_ERRNO_INVALID_PORT_RANGE:
1240 /* None of these can be handled properly. */
1241 break;
1242 }
1243
1244 mem_free(newurl);
1245 return NULL;
1246 }
1247
1248
1249 struct uri *
get_composed_uri(struct uri * uri,enum uri_component components)1250 get_composed_uri(struct uri *uri, enum uri_component components)
1251 {
1252 unsigned char *string;
1253
1254 assert(uri);
1255 if_assert_failed return NULL;
1256
1257 string = get_uri_string(uri, components);
1258 if (!string) return NULL;
1259
1260 uri = get_uri(string, 0);
1261 mem_free(string);
1262
1263 return uri;
1264 }
1265
1266 struct uri *
get_translated_uri(unsigned char * uristring,unsigned char * cwd)1267 get_translated_uri(unsigned char *uristring, unsigned char *cwd)
1268 {
1269 struct uri *uri;
1270
1271 uristring = translate_url(uristring, cwd);
1272 if (!uristring) return NULL;
1273
1274 uri = get_uri(uristring, 0);
1275 mem_free(uristring);
1276
1277 return uri;
1278 }
1279
1280
1281 unsigned char *
get_extension_from_uri(struct uri * uri)1282 get_extension_from_uri(struct uri *uri)
1283 {
1284 unsigned char *extension = NULL;
1285 int afterslash = 1;
1286 unsigned char *pos = uri->data;
1287
1288 assert(pos);
1289
1290 for (; *pos && !end_of_dir(*pos); pos++) {
1291 if (!afterslash && !extension && *pos == '.') {
1292 extension = pos;
1293 } else if (is_uri_dir_sep(uri, *pos)) {
1294 extension = NULL;
1295 afterslash = 1;
1296 } else {
1297 afterslash = 0;
1298 }
1299 }
1300
1301 if (extension && extension < pos)
1302 return memacpy(extension, pos - extension);
1303
1304 return NULL;
1305 }
1306
1307 /* URI encoding, escaping unallowed characters. */
1308 static inline int
safe_char(unsigned char c)1309 safe_char(unsigned char c)
1310 {
1311 /* RFC 2396, Page 8, Section 2.3 ;-) */
1312 return isident(c) || c == '.' || c == '!' || c == '~'
1313 || c == '*' || c == '\''|| c == '(' || c == ')';
1314 }
1315
1316 void
encode_uri_string(struct string * string,unsigned char * name,int namelen,int convert_slashes)1317 encode_uri_string(struct string *string, unsigned char *name, int namelen,
1318 int convert_slashes)
1319 {
1320 unsigned char n[4];
1321 unsigned char *end;
1322
1323 n[0] = '%';
1324 n[3] = '\0';
1325
1326 if (namelen < 0) namelen = strlen(name);
1327
1328 for (end = name + namelen; name < end; name++) {
1329 #if 0
1330 /* This is probably correct only for query part of URI..? */
1331 if (*name == ' ') add_char_to_string(data, len, '+');
1332 else
1333 #endif
1334 if (safe_char(*name) || (!convert_slashes && *name == '/')) {
1335 add_char_to_string(string, *name);
1336 } else {
1337 /* Hex it. */
1338 n[1] = hx((((int) *name) & 0xF0) >> 4);
1339 n[2] = hx(((int) *name) & 0xF);
1340 add_bytes_to_string(string, n, sizeof(n) - 1);
1341 }
1342 }
1343 }
1344
1345 /* This function is evil, it modifies its parameter. */
1346 /* XXX: but decoded string is _never_ longer than encoded string so it's an
1347 * efficient way to do that, imho. --Zas */
1348 void
decode_uri(unsigned char * src)1349 decode_uri(unsigned char *src)
1350 {
1351 unsigned char *dst = src;
1352 unsigned char c;
1353
1354 do {
1355 c = *src++;
1356
1357 if (c == '%') {
1358 int x1 = unhx(*src);
1359
1360 if (x1 >= 0) {
1361 int x2 = unhx(*(src + 1));
1362
1363 if (x2 >= 0) {
1364 x1 = (x1 << 4) + x2;
1365 if (x1 != 0) { /* don't allow %00 */
1366 c = (unsigned char) x1;
1367 src += 2;
1368 }
1369 }
1370 }
1371
1372 #if 0
1373 } else if (c == '+') {
1374 /* As the comment in encode_uri_string suggests, '+'
1375 * should only be decoded in the query part of a URI
1376 * (should that be 'URL'?). I'm not bold enough to
1377 * disable this code, tho. -- Miciah */
1378 c = ' ';
1379 #endif
1380 }
1381
1382 *dst++ = c;
1383 } while (c != '\0');
1384 }
1385
1386 void
decode_uri_string(struct string * string)1387 decode_uri_string(struct string *string)
1388 {
1389 decode_uri(string->source);
1390 string->length = strlen(string->source);
1391 }
1392
1393 void
decode_uri_for_display(unsigned char * src)1394 decode_uri_for_display(unsigned char *src)
1395 {
1396 decode_uri(src);
1397
1398 for (; *src; src++)
1399 if (!isprint(*src) || iscntrl(*src))
1400 *src = '*';
1401 }
1402
1403 void
decode_uri_string_for_display(struct string * string)1404 decode_uri_string_for_display(struct string *string)
1405 {
1406 decode_uri_for_display(string->source);
1407 string->length = strlen(string->source);
1408 }
1409
1410
1411 /* URI list */
1412
1413 #define URI_LIST_GRANULARITY 0x3
1414
1415 #define realloc_uri_list(list) \
1416 mem_align_alloc(&(list)->uris, (list)->size, (list)->size + 1, \
1417 struct uri *, URI_LIST_GRANULARITY)
1418
1419 struct uri *
add_to_uri_list(struct uri_list * list,struct uri * uri)1420 add_to_uri_list(struct uri_list *list, struct uri *uri)
1421 {
1422 if (!realloc_uri_list(list))
1423 return NULL;
1424
1425 list->uris[list->size++] = get_uri_reference(uri);
1426
1427 return uri;
1428 };
1429
1430 void
free_uri_list(struct uri_list * list)1431 free_uri_list(struct uri_list *list)
1432 {
1433 struct uri *uri;
1434 int index;
1435
1436 if (!list->uris) return;
1437
1438 foreach_uri (uri, index, list) {
1439 done_uri(uri);
1440 }
1441
1442 mem_free_set(&list->uris, NULL);
1443 list->size = 0;
1444 }
1445
1446 /* URI cache */
1447
1448 struct uri_cache_entry {
1449 struct uri uri;
1450 unsigned char string[1];
1451 };
1452
1453 struct uri_cache {
1454 struct hash *map;
1455 struct object object;
1456 };
1457
1458 static struct uri_cache uri_cache;
1459
1460 #ifdef CONFIG_DEBUG
1461 static inline void
check_uri_sanity(struct uri * uri)1462 check_uri_sanity(struct uri *uri)
1463 {
1464 int pos;
1465
1466 for (pos = 0; pos < uri->protocollen; pos++)
1467 if (c_isupper(uri->string[pos])) goto error;
1468
1469 if (uri->hostlen)
1470 for (pos = 0; pos < uri->hostlen; pos++)
1471 if (c_isupper(uri->host[pos])) goto error;
1472 return;
1473 error:
1474 INTERNAL("Uppercase letters detected in protocol or host part (%s).", struri(uri));
1475 }
1476 #else
1477 #define check_uri_sanity(uri)
1478 #endif
1479
1480 static inline struct uri_cache_entry *
get_uri_cache_entry(unsigned char * string,int length)1481 get_uri_cache_entry(unsigned char *string, int length)
1482 {
1483 struct uri_cache_entry *entry;
1484 struct hash_item *item;
1485
1486 assert(string && length > 0);
1487 if_assert_failed return NULL;
1488
1489 item = get_hash_item(uri_cache.map, string, length);
1490 if (item) return item->value;
1491
1492 /* Setup a new entry */
1493
1494 entry = mem_calloc(1, sizeof(*entry) + length);
1495 if (!entry) return NULL;
1496
1497 object_nolock(&entry->uri, "uri");
1498 memcpy(&entry->string, string, length);
1499 string = entry->string;
1500
1501 if (parse_uri(&entry->uri, string) != URI_ERRNO_OK
1502 || !add_hash_item(uri_cache.map, string, length, entry)) {
1503 mem_free(entry);
1504 return NULL;
1505 }
1506
1507 object_lock(&uri_cache);
1508
1509 return entry;
1510 }
1511
1512 struct uri *
get_uri(unsigned char * string,enum uri_component components)1513 get_uri(unsigned char *string, enum uri_component components)
1514 {
1515 struct uri_cache_entry *entry;
1516
1517 assert(string);
1518
1519 if (components) {
1520 struct uri uri;
1521
1522 if (parse_uri(&uri, string) != URI_ERRNO_OK)
1523 return NULL;
1524
1525 return get_composed_uri(&uri, components);
1526 }
1527
1528 if (!is_object_used(&uri_cache)) {
1529 uri_cache.map = init_hash(hash_size(3), strhash);
1530 if (!uri_cache.map) return NULL;
1531 object_nolock(&uri_cache, "uri_cache");
1532 }
1533
1534 entry = get_uri_cache_entry(string, strlen(string));
1535 if (!entry) {
1536 if (!is_object_used(&uri_cache))
1537 free_hash(uri_cache.map);
1538 return NULL;
1539 }
1540
1541 check_uri_sanity(&entry->uri);
1542 object_nolock(&entry->uri, "uri");
1543 object_lock(&entry->uri);
1544
1545 return &entry->uri;
1546 }
1547
1548 void
done_uri(struct uri * uri)1549 done_uri(struct uri *uri)
1550 {
1551 unsigned char *string = struri(uri);
1552 int length = strlen(string);
1553 struct hash_item *item;
1554 struct uri_cache_entry *entry;
1555
1556 assert(is_object_used(&uri_cache));
1557
1558 object_unlock(uri);
1559 if (is_object_used(uri)) return;
1560
1561 item = get_hash_item(uri_cache.map, string, length);
1562 entry = item ? item->value : NULL;
1563
1564 assertm(entry, "Releasing unknown URI [%s]", string);
1565 del_hash_item(uri_cache.map, item);
1566 mem_free(entry);
1567
1568 /* Last URI frees the cache */
1569 object_unlock(&uri_cache);
1570 if (!is_object_used(&uri_cache))
1571 free_hash(uri_cache.map);
1572 }
1573