1 /***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22
23 #include "curl_setup.h"
24
25 #include "urldata.h"
26 #include "urlapi-int.h"
27 #include "strcase.h"
28 #include "dotdot.h"
29 #include "url.h"
30 #include "escape.h"
31 #include "curl_ctype.h"
32 #include "inet_pton.h"
33
34 /* The last 3 #include files should be in this order */
35 #include "curl_printf.h"
36 #include "curl_memory.h"
37 #include "memdebug.h"
38
39 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
40 #define STARTS_WITH_DRIVE_PREFIX(str) \
41 ((('a' <= str[0] && str[0] <= 'z') || \
42 ('A' <= str[0] && str[0] <= 'Z')) && \
43 (str[1] == ':'))
44
45 /* MSDOS/Windows style drive prefix, optionally with
46 * a '|' instead of ':', followed by a slash or NUL */
47 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
48 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
49 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
50 ((str)[1] == ':' || (str)[1] == '|') && \
51 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
52
53 /* Internal representation of CURLU. Point to URL-encoded strings. */
54 struct Curl_URL {
55 char *scheme;
56 char *user;
57 char *password;
58 char *options; /* IMAP only? */
59 char *host;
60 char *zoneid; /* for numerical IPv6 addresses */
61 char *port;
62 char *path;
63 char *query;
64 char *fragment;
65
66 char *scratch; /* temporary scratch area */
67 char *temppath; /* temporary path pointer */
68 long portnum; /* the numerical version */
69 };
70
71 #define DEFAULT_SCHEME "https"
72
free_urlhandle(struct Curl_URL * u)73 static void free_urlhandle(struct Curl_URL *u)
74 {
75 free(u->scheme);
76 free(u->user);
77 free(u->password);
78 free(u->options);
79 free(u->host);
80 free(u->zoneid);
81 free(u->port);
82 free(u->path);
83 free(u->query);
84 free(u->fragment);
85 free(u->scratch);
86 free(u->temppath);
87 }
88
89 /* move the full contents of one handle onto another and
90 free the original */
mv_urlhandle(struct Curl_URL * from,struct Curl_URL * to)91 static void mv_urlhandle(struct Curl_URL *from,
92 struct Curl_URL *to)
93 {
94 free_urlhandle(to);
95 *to = *from;
96 free(from);
97 }
98
99 /*
100 * Find the separator at the end of the host name, or the '?' in cases like
101 * http://www.url.com?id=2380
102 */
find_host_sep(const char * url)103 static const char *find_host_sep(const char *url)
104 {
105 const char *sep;
106 const char *query;
107
108 /* Find the start of the hostname */
109 sep = strstr(url, "//");
110 if(!sep)
111 sep = url;
112 else
113 sep += 2;
114
115 query = strchr(sep, '?');
116 sep = strchr(sep, '/');
117
118 if(!sep)
119 sep = url + strlen(url);
120
121 if(!query)
122 query = url + strlen(url);
123
124 return sep < query ? sep : query;
125 }
126
127 /*
128 * Decide in an encoding-independent manner whether a character in an
129 * URL must be escaped. The same criterion must be used in strlen_url()
130 * and strcpy_url().
131 */
urlchar_needs_escaping(int c)132 static bool urlchar_needs_escaping(int c)
133 {
134 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
135 }
136
137 /*
138 * strlen_url() returns the length of the given URL if the spaces within the
139 * URL were properly URL encoded.
140 * URL encoding should be skipped for host names, otherwise IDN resolution
141 * will fail.
142 */
strlen_url(const char * url,bool relative)143 static size_t strlen_url(const char *url, bool relative)
144 {
145 const unsigned char *ptr;
146 size_t newlen = 0;
147 bool left = TRUE; /* left side of the ? */
148 const unsigned char *host_sep = (const unsigned char *) url;
149
150 if(!relative)
151 host_sep = (const unsigned char *) find_host_sep(url);
152
153 for(ptr = (unsigned char *)url; *ptr; ptr++) {
154
155 if(ptr < host_sep) {
156 ++newlen;
157 continue;
158 }
159
160 if(*ptr == ' ') {
161 if(left)
162 newlen += 3;
163 else
164 newlen++;
165 continue;
166 }
167
168 if (*ptr == '?')
169 left = FALSE;
170
171 if(urlchar_needs_escaping(*ptr))
172 newlen += 2;
173
174 newlen++;
175 }
176
177 return newlen;
178 }
179
180 /* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
181 * the source URL accordingly.
182 * URL encoding should be skipped for host names, otherwise IDN resolution
183 * will fail.
184 */
strcpy_url(char * output,const char * url,bool relative)185 static void strcpy_url(char *output, const char *url, bool relative)
186 {
187 /* we must add this with whitespace-replacing */
188 bool left = TRUE;
189 const unsigned char *iptr;
190 char *optr = output;
191 const unsigned char *host_sep = (const unsigned char *) url;
192
193 if(!relative)
194 host_sep = (const unsigned char *) find_host_sep(url);
195
196 for(iptr = (unsigned char *)url; /* read from here */
197 *iptr; /* until zero byte */
198 iptr++) {
199
200 if(iptr < host_sep) {
201 *optr++ = *iptr;
202 continue;
203 }
204
205 if(*iptr == ' ') {
206 if(left) {
207 *optr++='%'; /* add a '%' */
208 *optr++='2'; /* add a '2' */
209 *optr++='0'; /* add a '0' */
210 }
211 else
212 *optr++='+'; /* add a '+' here */
213 continue;
214 }
215
216 if(*iptr == '?')
217 left = FALSE;
218
219 if(urlchar_needs_escaping(*iptr)) {
220 msnprintf(optr, 4, "%%%02x", *iptr);
221 optr += 3;
222 }
223 else
224 *optr++ = *iptr;
225 }
226 *optr = 0; /* null-terminate output buffer */
227
228 }
229
230 /*
231 * Returns true if the given URL is absolute (as opposed to relative) within
232 * the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is
233 * non-NULL.
234 */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen)235 bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
236 {
237 size_t i;
238 #ifdef WIN32
239 if(STARTS_WITH_DRIVE_PREFIX(url))
240 return FALSE;
241 #endif
242 for(i = 0; i < buflen && url[i]; ++i) {
243 char s = url[i];
244 if((s == ':') && (url[i + 1] == '/')) {
245 if(buf)
246 buf[i] = 0;
247 return TRUE;
248 }
249 /* RFC 3986 3.1 explains:
250 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
251 */
252 else if(ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') ) {
253 if(buf)
254 buf[i] = (char)TOLOWER(s);
255 }
256 else
257 break;
258 }
259 return FALSE;
260 }
261
262 /*
263 * Concatenate a relative URL to a base URL making it absolute.
264 * URL-encodes any spaces.
265 * The returned pointer must be freed by the caller unless NULL
266 * (returns NULL on out of memory).
267 */
concat_url(const char * base,const char * relurl)268 static char *concat_url(const char *base, const char *relurl)
269 {
270 /***
271 TRY to append this new path to the old URL
272 to the right of the host part. Oh crap, this is doomed to cause
273 problems in the future...
274 */
275 char *newest;
276 char *protsep;
277 char *pathsep;
278 size_t newlen;
279 bool host_changed = FALSE;
280
281 const char *useurl = relurl;
282 size_t urllen;
283
284 /* we must make our own copy of the URL to play with, as it may
285 point to read-only data */
286 char *url_clone = strdup(base);
287
288 if(!url_clone)
289 return NULL; /* skip out of this NOW */
290
291 /* protsep points to the start of the host name */
292 protsep = strstr(url_clone, "//");
293 if(!protsep)
294 protsep = url_clone;
295 else
296 protsep += 2; /* pass the slashes */
297
298 if('/' != relurl[0]) {
299 int level = 0;
300
301 /* First we need to find out if there's a ?-letter in the URL,
302 and cut it and the right-side of that off */
303 pathsep = strchr(protsep, '?');
304 if(pathsep)
305 *pathsep = 0;
306
307 /* we have a relative path to append to the last slash if there's one
308 available, or if the new URL is just a query string (starts with a
309 '?') we append the new one at the end of the entire currently worked
310 out URL */
311 if(useurl[0] != '?') {
312 pathsep = strrchr(protsep, '/');
313 if(pathsep)
314 *pathsep = 0;
315 }
316
317 /* Check if there's any slash after the host name, and if so, remember
318 that position instead */
319 pathsep = strchr(protsep, '/');
320 if(pathsep)
321 protsep = pathsep + 1;
322 else
323 protsep = NULL;
324
325 /* now deal with one "./" or any amount of "../" in the newurl
326 and act accordingly */
327
328 if((useurl[0] == '.') && (useurl[1] == '/'))
329 useurl += 2; /* just skip the "./" */
330
331 while((useurl[0] == '.') &&
332 (useurl[1] == '.') &&
333 (useurl[2] == '/')) {
334 level++;
335 useurl += 3; /* pass the "../" */
336 }
337
338 if(protsep) {
339 while(level--) {
340 /* cut off one more level from the right of the original URL */
341 pathsep = strrchr(protsep, '/');
342 if(pathsep)
343 *pathsep = 0;
344 else {
345 *protsep = 0;
346 break;
347 }
348 }
349 }
350 }
351 else {
352 /* We got a new absolute path for this server */
353
354 if(relurl[1] == '/') {
355 /* the new URL starts with //, just keep the protocol part from the
356 original one */
357 *protsep = 0;
358 useurl = &relurl[2]; /* we keep the slashes from the original, so we
359 skip the new ones */
360 host_changed = TRUE;
361 }
362 else {
363 /* cut off the original URL from the first slash, or deal with URLs
364 without slash */
365 pathsep = strchr(protsep, '/');
366 if(pathsep) {
367 /* When people use badly formatted URLs, such as
368 "http://www.url.com?dir=/home/daniel" we must not use the first
369 slash, if there's a ?-letter before it! */
370 char *sep = strchr(protsep, '?');
371 if(sep && (sep < pathsep))
372 pathsep = sep;
373 *pathsep = 0;
374 }
375 else {
376 /* There was no slash. Now, since we might be operating on a badly
377 formatted URL, such as "http://www.url.com?id=2380" which doesn't
378 use a slash separator as it is supposed to, we need to check for a
379 ?-letter as well! */
380 pathsep = strchr(protsep, '?');
381 if(pathsep)
382 *pathsep = 0;
383 }
384 }
385 }
386
387 /* If the new part contains a space, this is a mighty stupid redirect
388 but we still make an effort to do "right". To the left of a '?'
389 letter we replace each space with %20 while it is replaced with '+'
390 on the right side of the '?' letter.
391 */
392 newlen = strlen_url(useurl, !host_changed);
393
394 urllen = strlen(url_clone);
395
396 newest = malloc(urllen + 1 + /* possible slash */
397 newlen + 1 /* zero byte */);
398
399 if(!newest) {
400 free(url_clone); /* don't leak this */
401 return NULL;
402 }
403
404 /* copy over the root url part */
405 memcpy(newest, url_clone, urllen);
406
407 /* check if we need to append a slash */
408 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
409 ;
410 else
411 newest[urllen++]='/';
412
413 /* then append the new piece on the right side */
414 strcpy_url(&newest[urllen], useurl, !host_changed);
415
416 free(url_clone);
417
418 return newest;
419 }
420
421 /*
422 * parse_hostname_login()
423 *
424 * Parse the login details (user name, password and options) from the URL and
425 * strip them out of the host name
426 *
427 */
parse_hostname_login(struct Curl_URL * u,char ** hostname,unsigned int flags)428 static CURLUcode parse_hostname_login(struct Curl_URL *u,
429 char **hostname,
430 unsigned int flags)
431 {
432 CURLUcode result = CURLUE_OK;
433 CURLcode ccode;
434 char *userp = NULL;
435 char *passwdp = NULL;
436 char *optionsp = NULL;
437 const struct Curl_handler *h = NULL;
438
439 /* At this point, we're hoping all the other special cases have
440 * been taken care of, so conn->host.name is at most
441 * [user[:password][;options]]@]hostname
442 *
443 * We need somewhere to put the embedded details, so do that first.
444 */
445
446 char *ptr = strchr(*hostname, '@');
447 char *login = *hostname;
448
449 if(!ptr)
450 goto out;
451
452 /* We will now try to extract the
453 * possible login information in a string like:
454 * ftp://user:password@ftp.my.site:8021/README */
455 *hostname = ++ptr;
456
457 /* if this is a known scheme, get some details */
458 if(u->scheme)
459 h = Curl_builtin_scheme(u->scheme);
460
461 /* We could use the login information in the URL so extract it. Only parse
462 options if the handler says we should. Note that 'h' might be NULL! */
463 ccode = Curl_parse_login_details(login, ptr - login - 1,
464 &userp, &passwdp,
465 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
466 &optionsp:NULL);
467 if(ccode) {
468 result = CURLUE_MALFORMED_INPUT;
469 goto out;
470 }
471
472 if(userp) {
473 if(flags & CURLU_DISALLOW_USER) {
474 /* Option DISALLOW_USER is set and url contains username. */
475 result = CURLUE_USER_NOT_ALLOWED;
476 goto out;
477 }
478
479 u->user = userp;
480 }
481
482 if(passwdp)
483 u->password = passwdp;
484
485 if(optionsp)
486 u->options = optionsp;
487
488 return CURLUE_OK;
489 out:
490
491 free(userp);
492 free(passwdp);
493 free(optionsp);
494
495 return result;
496 }
497
Curl_parse_port(struct Curl_URL * u,char * hostname,bool has_scheme)498 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
499 bool has_scheme)
500 {
501 char *portptr = NULL;
502 char endbracket;
503 int len;
504
505 /*
506 * Find the end of an IPv6 address, either on the ']' ending bracket or
507 * a percent-encoded zone index.
508 */
509 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
510 &endbracket, &len)) {
511 if(']' == endbracket)
512 portptr = &hostname[len];
513 else if('%' == endbracket) {
514 int zonelen = len;
515 if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
516 if(']' != endbracket)
517 return CURLUE_MALFORMED_INPUT;
518 portptr = &hostname[--zonelen + len + 1];
519 }
520 else
521 return CURLUE_MALFORMED_INPUT;
522 }
523 else
524 return CURLUE_MALFORMED_INPUT;
525
526 /* this is a RFC2732-style specified IP-address */
527 if(portptr && *portptr) {
528 if(*portptr != ':')
529 return CURLUE_MALFORMED_INPUT;
530 }
531 else
532 portptr = NULL;
533 }
534 else
535 portptr = strchr(hostname, ':');
536
537 if(portptr) {
538 char *rest;
539 long port;
540 char portbuf[7];
541
542 /* Browser behavior adaptation. If there's a colon with no digits after,
543 just cut off the name there which makes us ignore the colon and just
544 use the default port. Firefox, Chrome and Safari all do that.
545
546 Don't do it if the URL has no scheme, to make something that looks like
547 a scheme not work!
548 */
549 if(!portptr[1]) {
550 *portptr = '\0';
551 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
552 }
553
554 if(!ISDIGIT(portptr[1]))
555 return CURLUE_BAD_PORT_NUMBER;
556
557 port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
558
559 if((port <= 0) || (port > 0xffff))
560 /* Single unix standard says port numbers are 16 bits long, but we don't
561 treat port zero as OK. */
562 return CURLUE_BAD_PORT_NUMBER;
563
564 if(rest[0])
565 return CURLUE_BAD_PORT_NUMBER;
566
567 *portptr++ = '\0'; /* cut off the name there */
568 *rest = 0;
569 /* generate a new port number string to get rid of leading zeroes etc */
570 msnprintf(portbuf, sizeof(portbuf), "%ld", port);
571 u->portnum = port;
572 u->port = strdup(portbuf);
573 if(!u->port)
574 return CURLUE_OUT_OF_MEMORY;
575 }
576
577 return CURLUE_OK;
578 }
579
580 /* scan for byte values < 31 or 127 */
junkscan(const char * part,unsigned int flags)581 static bool junkscan(const char *part, unsigned int flags)
582 {
583 if(part) {
584 static const char badbytes[]={
585 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
586 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
587 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
588 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
589 0x7f, 0x00 /* null-terminate */
590 };
591 size_t n = strlen(part);
592 size_t nfine = strcspn(part, badbytes);
593 if(nfine != n)
594 /* since we don't know which part is scanned, return a generic error
595 code */
596 return TRUE;
597 if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
598 return TRUE;
599 }
600 return FALSE;
601 }
602
hostname_check(struct Curl_URL * u,char * hostname)603 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
604 {
605 size_t len;
606 size_t hlen = strlen(hostname);
607
608 if(hostname[0] == '[') {
609 #ifdef ENABLE_IPV6
610 char dest[16]; /* fits a binary IPv6 address */
611 #endif
612 const char *l = "0123456789abcdefABCDEF:.";
613 if(hlen < 4) /* '[::]' is the shortest possible valid string */
614 return CURLUE_MALFORMED_INPUT;
615 hostname++;
616 hlen -= 2;
617
618 if(hostname[hlen] != ']')
619 return CURLUE_MALFORMED_INPUT;
620
621 /* only valid letters are ok */
622 len = strspn(hostname, l);
623 if(hlen != len) {
624 hlen = len;
625 if(hostname[len] == '%') {
626 /* this could now be '%[zone id]' */
627 char zoneid[16];
628 int i = 0;
629 char *h = &hostname[len + 1];
630 /* pass '25' if present and is a url encoded percent sign */
631 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
632 h += 2;
633 while(*h && (*h != ']') && (i < 15))
634 zoneid[i++] = *h++;
635 if(!i || (']' != *h))
636 return CURLUE_MALFORMED_INPUT;
637 zoneid[i] = 0;
638 u->zoneid = strdup(zoneid);
639 if(!u->zoneid)
640 return CURLUE_OUT_OF_MEMORY;
641 hostname[len] = ']'; /* insert end bracket */
642 hostname[len + 1] = 0; /* terminate the hostname */
643 }
644 else
645 return CURLUE_MALFORMED_INPUT;
646 /* hostname is fine */
647 }
648 #ifdef ENABLE_IPV6
649 hostname[hlen] = 0; /* end the address there */
650 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
651 return CURLUE_MALFORMED_INPUT;
652 hostname[hlen] = ']'; /* restore ending bracket */
653 #endif
654 }
655 else {
656 /* letters from the second string is not ok */
657 len = strcspn(hostname, " \r\n");
658 if(hlen != len)
659 /* hostname with bad content */
660 return CURLUE_MALFORMED_INPUT;
661 }
662 if(!hostname[0])
663 return CURLUE_NO_HOST;
664 return CURLUE_OK;
665 }
666
667 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
668
669 /*
670 * Handle partial IPv4 numerical addresses and different bases, like
671 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
672 *
673 * If the given input string is syntactically wrong or any part for example is
674 * too big, this function returns FALSE and doesn't create any output.
675 *
676 * Output the "normalized" version of that input string in plain quad decimal
677 * integers and return TRUE.
678 */
ipv4_normalize(const char * hostname,char * outp,size_t olen)679 static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
680 {
681 bool done = FALSE;
682 int n = 0;
683 const char *c = hostname;
684 unsigned long parts[4] = {0, 0, 0, 0};
685
686 while(!done) {
687 char *endp;
688 unsigned long l;
689 if((*c < '0') || (*c > '9'))
690 /* most importantly this doesn't allow a leading plus or minus */
691 return FALSE;
692 l = strtoul(c, &endp, 0);
693
694 /* overflow or nothing parsed at all */
695 if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
696 return FALSE;
697
698 #if SIZEOF_LONG > 4
699 /* a value larger than 32 bits */
700 if(l > UINT_MAX)
701 return FALSE;
702 #endif
703
704 parts[n] = l;
705 c = endp;
706
707 switch (*c) {
708 case '.' :
709 if(n == 3)
710 return FALSE;
711 n++;
712 c++;
713 break;
714
715 case '\0':
716 done = TRUE;
717 break;
718
719 default:
720 return FALSE;
721 }
722 }
723
724 /* this is deemed a valid IPv4 numerical address */
725
726 switch(n) {
727 case 0: /* a -- 32 bits */
728 msnprintf(outp, olen, "%u.%u.%u.%u",
729 parts[0] >> 24, (parts[0] >> 16) & 0xff,
730 (parts[0] >> 8) & 0xff, parts[0] & 0xff);
731 break;
732 case 1: /* a.b -- 8.24 bits */
733 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
734 return FALSE;
735 msnprintf(outp, olen, "%u.%u.%u.%u",
736 parts[0], (parts[1] >> 16) & 0xff,
737 (parts[1] >> 8) & 0xff, parts[1] & 0xff);
738 break;
739 case 2: /* a.b.c -- 8.8.16 bits */
740 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
741 return FALSE;
742 msnprintf(outp, olen, "%u.%u.%u.%u",
743 parts[0], parts[1], (parts[2] >> 8) & 0xff,
744 parts[2] & 0xff);
745 break;
746 case 3: /* a.b.c.d -- 8.8.8.8 bits */
747 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
748 (parts[3] > 0xff))
749 return FALSE;
750 msnprintf(outp, olen, "%u.%u.%u.%u",
751 parts[0], parts[1], parts[2], parts[3]);
752 break;
753 }
754 return TRUE;
755 }
756
757 /* return strdup'ed version in 'outp', possibly percent decoded */
decode_host(char * hostname,char ** outp)758 static CURLUcode decode_host(char *hostname, char **outp)
759 {
760 char *per = NULL;
761 if(hostname[0] != '[')
762 /* only decode if not an ipv6 numerical */
763 per = strchr(hostname, '%');
764 if(!per) {
765 *outp = strdup(hostname);
766 if(!*outp)
767 return CURLUE_OUT_OF_MEMORY;
768 }
769 else {
770 /* might be encoded */
771 size_t dlen;
772 CURLcode result = Curl_urldecode(NULL, hostname, 0,
773 outp, &dlen, REJECT_CTRL);
774 if(result)
775 return CURLUE_MALFORMED_INPUT;
776 }
777
778 return CURLUE_OK;
779 }
780
seturl(const char * url,CURLU * u,unsigned int flags)781 static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
782 {
783 char *path;
784 bool path_alloced = FALSE;
785 bool uncpath = FALSE;
786 char *hostname;
787 char *query = NULL;
788 char *fragment = NULL;
789 CURLUcode result;
790 bool url_has_scheme = FALSE;
791 char schemebuf[MAX_SCHEME_LEN + 1];
792 const char *schemep = NULL;
793 size_t schemelen = 0;
794 size_t urllen;
795
796 DEBUGASSERT(url);
797
798 /*************************************************************
799 * Parse the URL.
800 ************************************************************/
801 /* allocate scratch area */
802 urllen = strlen(url);
803 if(urllen > CURL_MAX_INPUT_LENGTH)
804 /* excessive input length */
805 return CURLUE_MALFORMED_INPUT;
806
807 path = u->scratch = malloc(urllen * 2 + 2);
808 if(!path)
809 return CURLUE_OUT_OF_MEMORY;
810
811 hostname = &path[urllen + 1];
812 hostname[0] = 0;
813
814 if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
815 url_has_scheme = TRUE;
816 schemelen = strlen(schemebuf);
817 }
818
819 /* handle the file: scheme */
820 if(url_has_scheme && strcasecompare(schemebuf, "file")) {
821 /* path has been allocated large enough to hold this */
822 strcpy(path, &url[5]);
823
824 u->scheme = strdup("file");
825 if(!u->scheme)
826 return CURLUE_OUT_OF_MEMORY;
827
828 /* Extra handling URLs with an authority component (i.e. that start with
829 * "file://")
830 *
831 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
832 * RFC 8089, but not the (current) WHAT-WG URL spec.
833 */
834 if(path[0] == '/' && path[1] == '/') {
835 /* swallow the two slashes */
836 char *ptr = &path[2];
837
838 /*
839 * According to RFC 8089, a file: URL can be reliably dereferenced if:
840 *
841 * o it has no/blank hostname, or
842 *
843 * o the hostname matches "localhost" (case-insensitively), or
844 *
845 * o the hostname is a FQDN that resolves to this machine, or
846 *
847 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
848 * Appendix E.3).
849 *
850 * For brevity, we only consider URLs with empty, "localhost", or
851 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
852 *
853 * Additionally, there is an exception for URLs with a Windows drive
854 * letter in the authority (which was accidentally omitted from RFC 8089
855 * Appendix E, but believe me, it was meant to be there. --MK)
856 */
857 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
858 /* the URL includes a host name, it must match "localhost" or
859 "127.0.0.1" to be valid */
860 if(checkprefix("localhost/", ptr) ||
861 checkprefix("127.0.0.1/", ptr)) {
862 ptr += 9; /* now points to the slash after the host */
863 }
864 else {
865 #if defined(WIN32)
866 size_t len;
867
868 /* the host name, NetBIOS computer name, can not contain disallowed
869 chars, and the delimiting slash character must be appended to the
870 host name */
871 path = strpbrk(ptr, "/\\:*?\"<>|");
872 if(!path || *path != '/')
873 return CURLUE_MALFORMED_INPUT;
874
875 len = path - ptr;
876 if(len) {
877 memcpy(hostname, ptr, len);
878 hostname[len] = 0;
879 uncpath = TRUE;
880 }
881
882 ptr -= 2; /* now points to the // before the host in UNC */
883 #else
884 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
885 none */
886 return CURLUE_MALFORMED_INPUT;
887 #endif
888 }
889 }
890
891 path = ptr;
892 }
893
894 if(!uncpath)
895 hostname = NULL; /* no host for file: URLs by default */
896
897 #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
898 /* Don't allow Windows drive letters when not in Windows.
899 * This catches both "file:/c:" and "file:c:" */
900 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
901 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
902 /* File drive letters are only accepted in MSDOS/Windows */
903 return CURLUE_MALFORMED_INPUT;
904 }
905 #else
906 /* If the path starts with a slash and a drive letter, ditch the slash */
907 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
908 /* This cannot be done with strcpy, as the memory chunks overlap! */
909 memmove(path, &path[1], strlen(&path[1]) + 1);
910 }
911 #endif
912
913 }
914 else {
915 /* clear path */
916 const char *p;
917 const char *hostp;
918 size_t len;
919 path[0] = 0;
920
921 if(url_has_scheme) {
922 int i = 0;
923 p = &url[schemelen + 1];
924 while(p && (*p == '/') && (i < 4)) {
925 p++;
926 i++;
927 }
928 if((i < 1) || (i>3))
929 /* less than one or more than three slashes */
930 return CURLUE_MALFORMED_INPUT;
931
932 schemep = schemebuf;
933 if(!Curl_builtin_scheme(schemep) &&
934 !(flags & CURLU_NON_SUPPORT_SCHEME))
935 return CURLUE_UNSUPPORTED_SCHEME;
936
937 if(junkscan(schemep, flags))
938 return CURLUE_MALFORMED_INPUT;
939 }
940 else {
941 /* no scheme! */
942
943 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
944 return CURLUE_MALFORMED_INPUT;
945 if(flags & CURLU_DEFAULT_SCHEME)
946 schemep = DEFAULT_SCHEME;
947
948 /*
949 * The URL was badly formatted, let's try without scheme specified.
950 */
951 p = url;
952 }
953 hostp = p; /* host name starts here */
954
955 while(*p && !HOSTNAME_END(*p)) /* find end of host name */
956 p++;
957
958 len = p - hostp;
959 if(len) {
960 memcpy(hostname, hostp, len);
961 hostname[len] = 0;
962 }
963 else {
964 if(!(flags & CURLU_NO_AUTHORITY))
965 return CURLUE_MALFORMED_INPUT;
966 }
967
968 len = strlen(p);
969 memcpy(path, p, len);
970 path[len] = 0;
971
972 if(schemep) {
973 u->scheme = strdup(schemep);
974 if(!u->scheme)
975 return CURLUE_OUT_OF_MEMORY;
976 }
977 }
978
979 if(junkscan(path, flags))
980 return CURLUE_MALFORMED_INPUT;
981
982 if((flags & CURLU_URLENCODE) && path[0]) {
983 /* worst case output length is 3x the original! */
984 char *newp = malloc(strlen(path) * 3);
985 if(!newp)
986 return CURLUE_OUT_OF_MEMORY;
987 path_alloced = TRUE;
988 strcpy_url(newp, path, TRUE); /* consider it relative */
989 u->temppath = path = newp;
990 }
991
992 fragment = strchr(path, '#');
993 if(fragment) {
994 *fragment++ = 0;
995 if(fragment[0]) {
996 u->fragment = strdup(fragment);
997 if(!u->fragment)
998 return CURLUE_OUT_OF_MEMORY;
999 }
1000 }
1001
1002 query = strchr(path, '?');
1003 if(query) {
1004 *query++ = 0;
1005 /* done even if the query part is a blank string */
1006 u->query = strdup(query);
1007 if(!u->query)
1008 return CURLUE_OUT_OF_MEMORY;
1009 }
1010
1011 if(!path[0])
1012 /* if there's no path left set, unset */
1013 path = NULL;
1014 else {
1015 if(!(flags & CURLU_PATH_AS_IS)) {
1016 /* remove ../ and ./ sequences according to RFC3986 */
1017 char *newp = Curl_dedotdotify(path);
1018 if(!newp)
1019 return CURLUE_OUT_OF_MEMORY;
1020
1021 if(strcmp(newp, path)) {
1022 /* if we got a new version */
1023 if(path_alloced)
1024 Curl_safefree(u->temppath);
1025 u->temppath = path = newp;
1026 path_alloced = TRUE;
1027 }
1028 else
1029 free(newp);
1030 }
1031
1032 u->path = path_alloced?path:strdup(path);
1033 if(!u->path)
1034 return CURLUE_OUT_OF_MEMORY;
1035 u->temppath = NULL; /* used now */
1036 }
1037
1038 if(hostname) {
1039 char normalized_ipv4[sizeof("255.255.255.255") + 1];
1040 /*
1041 * Parse the login details and strip them out of the host name.
1042 */
1043 if(junkscan(hostname, flags))
1044 return CURLUE_MALFORMED_INPUT;
1045
1046 result = parse_hostname_login(u, &hostname, flags);
1047 if(result)
1048 return result;
1049
1050 result = Curl_parse_port(u, hostname, url_has_scheme);
1051 if(result)
1052 return result;
1053
1054 if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
1055 /* Skip hostname check, it's allowed to be empty. */
1056 u->host = strdup("");
1057 }
1058 else {
1059 if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
1060 u->host = strdup(normalized_ipv4);
1061 else {
1062 result = decode_host(hostname, &u->host);
1063 if(result)
1064 return result;
1065 result = hostname_check(u, u->host);
1066 if(result)
1067 return result;
1068 }
1069 }
1070 if(!u->host)
1071 return CURLUE_OUT_OF_MEMORY;
1072 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1073 /* legacy curl-style guess based on host name */
1074 if(checkprefix("ftp.", hostname))
1075 schemep = "ftp";
1076 else if(checkprefix("dict.", hostname))
1077 schemep = "dict";
1078 else if(checkprefix("ldap.", hostname))
1079 schemep = "ldap";
1080 else if(checkprefix("imap.", hostname))
1081 schemep = "imap";
1082 else if(checkprefix("smtp.", hostname))
1083 schemep = "smtp";
1084 else if(checkprefix("pop3.", hostname))
1085 schemep = "pop3";
1086 else
1087 schemep = "http";
1088
1089 u->scheme = strdup(schemep);
1090 if(!u->scheme)
1091 return CURLUE_OUT_OF_MEMORY;
1092 }
1093 }
1094
1095 Curl_safefree(u->scratch);
1096 Curl_safefree(u->temppath);
1097
1098 return CURLUE_OK;
1099 }
1100
1101 /*
1102 * Parse the URL and set the relevant members of the Curl_URL struct.
1103 */
parseurl(const char * url,CURLU * u,unsigned int flags)1104 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1105 {
1106 CURLUcode result = seturl(url, u, flags);
1107 if(result) {
1108 free_urlhandle(u);
1109 memset(u, 0, sizeof(struct Curl_URL));
1110 }
1111 return result;
1112 }
1113
1114 /*
1115 */
curl_url(void)1116 CURLU *curl_url(void)
1117 {
1118 return calloc(sizeof(struct Curl_URL), 1);
1119 }
1120
curl_url_cleanup(CURLU * u)1121 void curl_url_cleanup(CURLU *u)
1122 {
1123 if(u) {
1124 free_urlhandle(u);
1125 free(u);
1126 }
1127 }
1128
1129 #define DUP(dest, src, name) \
1130 do { \
1131 if(src->name) { \
1132 dest->name = strdup(src->name); \
1133 if(!dest->name) \
1134 goto fail; \
1135 } \
1136 } while(0)
1137
curl_url_dup(CURLU * in)1138 CURLU *curl_url_dup(CURLU *in)
1139 {
1140 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1141 if(u) {
1142 DUP(u, in, scheme);
1143 DUP(u, in, user);
1144 DUP(u, in, password);
1145 DUP(u, in, options);
1146 DUP(u, in, host);
1147 DUP(u, in, port);
1148 DUP(u, in, path);
1149 DUP(u, in, query);
1150 DUP(u, in, fragment);
1151 u->portnum = in->portnum;
1152 }
1153 return u;
1154 fail:
1155 curl_url_cleanup(u);
1156 return NULL;
1157 }
1158
curl_url_get(CURLU * u,CURLUPart what,char ** part,unsigned int flags)1159 CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1160 char **part, unsigned int flags)
1161 {
1162 char *ptr;
1163 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1164 char portbuf[7];
1165 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1166 bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1167 bool plusdecode = FALSE;
1168 (void)flags;
1169 if(!u)
1170 return CURLUE_BAD_HANDLE;
1171 if(!part)
1172 return CURLUE_BAD_PARTPOINTER;
1173 *part = NULL;
1174
1175 switch(what) {
1176 case CURLUPART_SCHEME:
1177 ptr = u->scheme;
1178 ifmissing = CURLUE_NO_SCHEME;
1179 urldecode = FALSE; /* never for schemes */
1180 break;
1181 case CURLUPART_USER:
1182 ptr = u->user;
1183 ifmissing = CURLUE_NO_USER;
1184 break;
1185 case CURLUPART_PASSWORD:
1186 ptr = u->password;
1187 ifmissing = CURLUE_NO_PASSWORD;
1188 break;
1189 case CURLUPART_OPTIONS:
1190 ptr = u->options;
1191 ifmissing = CURLUE_NO_OPTIONS;
1192 break;
1193 case CURLUPART_HOST:
1194 ptr = u->host;
1195 ifmissing = CURLUE_NO_HOST;
1196 break;
1197 case CURLUPART_ZONEID:
1198 ptr = u->zoneid;
1199 break;
1200 case CURLUPART_PORT:
1201 ptr = u->port;
1202 ifmissing = CURLUE_NO_PORT;
1203 urldecode = FALSE; /* never for port */
1204 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1205 /* there's no stored port number, but asked to deliver
1206 a default one for the scheme */
1207 const struct Curl_handler *h =
1208 Curl_builtin_scheme(u->scheme);
1209 if(h) {
1210 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1211 ptr = portbuf;
1212 }
1213 }
1214 else if(ptr && u->scheme) {
1215 /* there is a stored port number, but ask to inhibit if
1216 it matches the default one for the scheme */
1217 const struct Curl_handler *h =
1218 Curl_builtin_scheme(u->scheme);
1219 if(h && (h->defport == u->portnum) &&
1220 (flags & CURLU_NO_DEFAULT_PORT))
1221 ptr = NULL;
1222 }
1223 break;
1224 case CURLUPART_PATH:
1225 ptr = u->path;
1226 if(!ptr) {
1227 ptr = u->path = strdup("/");
1228 if(!u->path)
1229 return CURLUE_OUT_OF_MEMORY;
1230 }
1231 break;
1232 case CURLUPART_QUERY:
1233 ptr = u->query;
1234 ifmissing = CURLUE_NO_QUERY;
1235 plusdecode = urldecode;
1236 break;
1237 case CURLUPART_FRAGMENT:
1238 ptr = u->fragment;
1239 ifmissing = CURLUE_NO_FRAGMENT;
1240 break;
1241 case CURLUPART_URL: {
1242 char *url;
1243 char *scheme;
1244 char *options = u->options;
1245 char *port = u->port;
1246 char *allochost = NULL;
1247 if(u->scheme && strcasecompare("file", u->scheme)) {
1248 url = aprintf("file://%s%s%s",
1249 u->path,
1250 u->fragment? "#": "",
1251 u->fragment? u->fragment : "");
1252 }
1253 else if(!u->host)
1254 return CURLUE_NO_HOST;
1255 else {
1256 const struct Curl_handler *h = NULL;
1257 if(u->scheme)
1258 scheme = u->scheme;
1259 else if(flags & CURLU_DEFAULT_SCHEME)
1260 scheme = (char *) DEFAULT_SCHEME;
1261 else
1262 return CURLUE_NO_SCHEME;
1263
1264 h = Curl_builtin_scheme(scheme);
1265 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1266 /* there's no stored port number, but asked to deliver
1267 a default one for the scheme */
1268 if(h) {
1269 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1270 port = portbuf;
1271 }
1272 }
1273 else if(port) {
1274 /* there is a stored port number, but asked to inhibit if it matches
1275 the default one for the scheme */
1276 if(h && (h->defport == u->portnum) &&
1277 (flags & CURLU_NO_DEFAULT_PORT))
1278 port = NULL;
1279 }
1280
1281 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1282 options = NULL;
1283
1284 if(u->host[0] == '[') {
1285 if(u->zoneid) {
1286 /* make it '[ host %25 zoneid ]' */
1287 size_t hostlen = strlen(u->host);
1288 size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
1289 allochost = malloc(alen);
1290 if(!allochost)
1291 return CURLUE_OUT_OF_MEMORY;
1292 memcpy(allochost, u->host, hostlen - 1);
1293 msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
1294 "%%25%s]", u->zoneid);
1295 }
1296 }
1297 else if(urlencode) {
1298 allochost = curl_easy_escape(NULL, u->host, 0);
1299 if(!allochost)
1300 return CURLUE_OUT_OF_MEMORY;
1301 }
1302 else {
1303 /* only encode '%' in output host name */
1304 char *host = u->host;
1305 size_t pcount = 0;
1306 /* first, count number of percents present in the name */
1307 while(*host) {
1308 if(*host == '%')
1309 pcount++;
1310 host++;
1311 }
1312 /* if there were percents, encode the host name */
1313 if(pcount) {
1314 size_t hostlen = strlen(u->host);
1315 size_t alen = hostlen + 2 * pcount + 1;
1316 char *o = allochost = malloc(alen);
1317 if(!allochost)
1318 return CURLUE_OUT_OF_MEMORY;
1319
1320 host = u->host;
1321 while(*host) {
1322 if(*host == '%') {
1323 memcpy(o, "%25", 3);
1324 o += 3;
1325 host++;
1326 continue;
1327 }
1328 *o++ = *host++;
1329 }
1330 *o = '\0';
1331 }
1332 }
1333
1334 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1335 scheme,
1336 u->user ? u->user : "",
1337 u->password ? ":": "",
1338 u->password ? u->password : "",
1339 options ? ";" : "",
1340 options ? options : "",
1341 (u->user || u->password || options) ? "@": "",
1342 allochost ? allochost : u->host,
1343 port ? ":": "",
1344 port ? port : "",
1345 (u->path && (u->path[0] != '/')) ? "/": "",
1346 u->path ? u->path : "/",
1347 (u->query && u->query[0]) ? "?": "",
1348 (u->query && u->query[0]) ? u->query : "",
1349 u->fragment? "#": "",
1350 u->fragment? u->fragment : "");
1351 free(allochost);
1352 }
1353 if(!url)
1354 return CURLUE_OUT_OF_MEMORY;
1355 *part = url;
1356 return CURLUE_OK;
1357 }
1358 default:
1359 ptr = NULL;
1360 break;
1361 }
1362 if(ptr) {
1363 *part = strdup(ptr);
1364 if(!*part)
1365 return CURLUE_OUT_OF_MEMORY;
1366 if(plusdecode) {
1367 /* convert + to space */
1368 char *plus;
1369 for(plus = *part; *plus; ++plus) {
1370 if(*plus == '+')
1371 *plus = ' ';
1372 }
1373 }
1374 if(urldecode) {
1375 char *decoded;
1376 size_t dlen;
1377 /* this unconditional rejection of control bytes is documented
1378 API behavior */
1379 CURLcode res = Curl_urldecode(NULL, *part, 0, &decoded, &dlen,
1380 REJECT_CTRL);
1381 free(*part);
1382 if(res) {
1383 *part = NULL;
1384 return CURLUE_URLDECODE;
1385 }
1386 *part = decoded;
1387 }
1388 return CURLUE_OK;
1389 }
1390 else
1391 return ifmissing;
1392 }
1393
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1394 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1395 const char *part, unsigned int flags)
1396 {
1397 char **storep = NULL;
1398 long port = 0;
1399 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1400 bool plusencode = FALSE;
1401 bool urlskipslash = FALSE;
1402 bool appendquery = FALSE;
1403 bool equalsencode = FALSE;
1404
1405 if(!u)
1406 return CURLUE_BAD_HANDLE;
1407 if(!part) {
1408 /* setting a part to NULL clears it */
1409 switch(what) {
1410 case CURLUPART_URL:
1411 break;
1412 case CURLUPART_SCHEME:
1413 storep = &u->scheme;
1414 break;
1415 case CURLUPART_USER:
1416 storep = &u->user;
1417 break;
1418 case CURLUPART_PASSWORD:
1419 storep = &u->password;
1420 break;
1421 case CURLUPART_OPTIONS:
1422 storep = &u->options;
1423 break;
1424 case CURLUPART_HOST:
1425 storep = &u->host;
1426 break;
1427 case CURLUPART_ZONEID:
1428 storep = &u->zoneid;
1429 break;
1430 case CURLUPART_PORT:
1431 u->portnum = 0;
1432 storep = &u->port;
1433 break;
1434 case CURLUPART_PATH:
1435 storep = &u->path;
1436 break;
1437 case CURLUPART_QUERY:
1438 storep = &u->query;
1439 break;
1440 case CURLUPART_FRAGMENT:
1441 storep = &u->fragment;
1442 break;
1443 default:
1444 return CURLUE_UNKNOWN_PART;
1445 }
1446 if(storep && *storep) {
1447 Curl_safefree(*storep);
1448 }
1449 return CURLUE_OK;
1450 }
1451
1452 switch(what) {
1453 case CURLUPART_SCHEME:
1454 if(strlen(part) > MAX_SCHEME_LEN)
1455 /* too long */
1456 return CURLUE_MALFORMED_INPUT;
1457 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1458 /* verify that it is a fine scheme */
1459 !Curl_builtin_scheme(part))
1460 return CURLUE_UNSUPPORTED_SCHEME;
1461 storep = &u->scheme;
1462 urlencode = FALSE; /* never */
1463 break;
1464 case CURLUPART_USER:
1465 storep = &u->user;
1466 break;
1467 case CURLUPART_PASSWORD:
1468 storep = &u->password;
1469 break;
1470 case CURLUPART_OPTIONS:
1471 storep = &u->options;
1472 break;
1473 case CURLUPART_HOST: {
1474 size_t len = strcspn(part, " \r\n");
1475 if(strlen(part) != len)
1476 /* hostname with bad content */
1477 return CURLUE_MALFORMED_INPUT;
1478 storep = &u->host;
1479 Curl_safefree(u->zoneid);
1480 break;
1481 }
1482 case CURLUPART_ZONEID:
1483 storep = &u->zoneid;
1484 break;
1485 case CURLUPART_PORT:
1486 {
1487 char *endp;
1488 urlencode = FALSE; /* never */
1489 port = strtol(part, &endp, 10); /* Port number must be decimal */
1490 if((port <= 0) || (port > 0xffff))
1491 return CURLUE_BAD_PORT_NUMBER;
1492 if(*endp)
1493 /* weirdly provided number, not good! */
1494 return CURLUE_MALFORMED_INPUT;
1495 storep = &u->port;
1496 }
1497 break;
1498 case CURLUPART_PATH:
1499 urlskipslash = TRUE;
1500 storep = &u->path;
1501 break;
1502 case CURLUPART_QUERY:
1503 plusencode = urlencode;
1504 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1505 equalsencode = appendquery;
1506 storep = &u->query;
1507 break;
1508 case CURLUPART_FRAGMENT:
1509 storep = &u->fragment;
1510 break;
1511 case CURLUPART_URL: {
1512 /*
1513 * Allow a new URL to replace the existing (if any) contents.
1514 *
1515 * If the existing contents is enough for a URL, allow a relative URL to
1516 * replace it.
1517 */
1518 CURLUcode result;
1519 char *oldurl;
1520 char *redired_url;
1521 CURLU *handle2;
1522
1523 if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN + 1)) {
1524 handle2 = curl_url();
1525 if(!handle2)
1526 return CURLUE_OUT_OF_MEMORY;
1527 result = parseurl(part, handle2, flags);
1528 if(!result)
1529 mv_urlhandle(handle2, u);
1530 else
1531 curl_url_cleanup(handle2);
1532 return result;
1533 }
1534 /* extract the full "old" URL to do the redirect on */
1535 result = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1536 if(result) {
1537 /* couldn't get the old URL, just use the new! */
1538 handle2 = curl_url();
1539 if(!handle2)
1540 return CURLUE_OUT_OF_MEMORY;
1541 result = parseurl(part, handle2, flags);
1542 if(!result)
1543 mv_urlhandle(handle2, u);
1544 else
1545 curl_url_cleanup(handle2);
1546 return result;
1547 }
1548
1549 /* apply the relative part to create a new URL */
1550 redired_url = concat_url(oldurl, part);
1551 free(oldurl);
1552 if(!redired_url)
1553 return CURLUE_OUT_OF_MEMORY;
1554
1555 /* now parse the new URL */
1556 handle2 = curl_url();
1557 if(!handle2) {
1558 free(redired_url);
1559 return CURLUE_OUT_OF_MEMORY;
1560 }
1561 result = parseurl(redired_url, handle2, flags);
1562 free(redired_url);
1563 if(!result)
1564 mv_urlhandle(handle2, u);
1565 else
1566 curl_url_cleanup(handle2);
1567 return result;
1568 }
1569 default:
1570 return CURLUE_UNKNOWN_PART;
1571 }
1572 DEBUGASSERT(storep);
1573 {
1574 const char *newp = part;
1575 size_t nalloc = strlen(part);
1576
1577 if(nalloc > CURL_MAX_INPUT_LENGTH)
1578 /* excessive input length */
1579 return CURLUE_MALFORMED_INPUT;
1580
1581 if(urlencode) {
1582 const unsigned char *i;
1583 char *o;
1584 char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
1585 if(!enc)
1586 return CURLUE_OUT_OF_MEMORY;
1587 for(i = (const unsigned char *)part, o = enc; *i; i++) {
1588 if((*i == ' ') && plusencode) {
1589 *o = '+';
1590 o++;
1591 }
1592 else if(Curl_isunreserved(*i) ||
1593 ((*i == '/') && urlskipslash) ||
1594 ((*i == '=') && equalsencode)) {
1595 if((*i == '=') && equalsencode)
1596 /* only skip the first equals sign */
1597 equalsencode = FALSE;
1598 *o = *i;
1599 o++;
1600 }
1601 else {
1602 msnprintf(o, 4, "%%%02x", *i);
1603 o += 3;
1604 }
1605 }
1606 *o = 0; /* null-terminate */
1607 newp = enc;
1608 }
1609 else {
1610 char *p;
1611 newp = strdup(part);
1612 if(!newp)
1613 return CURLUE_OUT_OF_MEMORY;
1614 p = (char *)newp;
1615 while(*p) {
1616 /* make sure percent encoded are lower case */
1617 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1618 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1619 p[1] = (char)TOLOWER(p[1]);
1620 p[2] = (char)TOLOWER(p[2]);
1621 p += 3;
1622 }
1623 else
1624 p++;
1625 }
1626 }
1627
1628 if(appendquery) {
1629 /* Append the string onto the old query. Add a '&' separator if none is
1630 present at the end of the exsting query already */
1631 size_t querylen = u->query ? strlen(u->query) : 0;
1632 bool addamperand = querylen && (u->query[querylen -1] != '&');
1633 if(querylen) {
1634 size_t newplen = strlen(newp);
1635 char *p = malloc(querylen + addamperand + newplen + 1);
1636 if(!p) {
1637 free((char *)newp);
1638 return CURLUE_OUT_OF_MEMORY;
1639 }
1640 strcpy(p, u->query); /* original query */
1641 if(addamperand)
1642 p[querylen] = '&'; /* ampersand */
1643 strcpy(&p[querylen + addamperand], newp); /* new suffix */
1644 free((char *)newp);
1645 free(*storep);
1646 *storep = p;
1647 return CURLUE_OK;
1648 }
1649 }
1650
1651 if(what == CURLUPART_HOST) {
1652 if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
1653 /* Skip hostname check, it's allowed to be empty. */
1654 }
1655 else {
1656 if(hostname_check(u, (char *)newp)) {
1657 free((char *)newp);
1658 return CURLUE_MALFORMED_INPUT;
1659 }
1660 }
1661 }
1662
1663 free(*storep);
1664 *storep = (char *)newp;
1665 }
1666 /* set after the string, to make it not assigned if the allocation above
1667 fails */
1668 if(port)
1669 u->portnum = port;
1670 return CURLUE_OK;
1671 }
1672