1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at https://curl.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  ***************************************************************************/
22 
23 #include "curl_setup.h"
24 
25 #include "urldata.h"
26 #include "urlapi-int.h"
27 #include "strcase.h"
28 #include "dotdot.h"
29 #include "url.h"
30 #include "escape.h"
31 #include "curl_ctype.h"
32 #include "inet_pton.h"
33 
34 /* The last 3 #include files should be in this order */
35 #include "curl_printf.h"
36 #include "curl_memory.h"
37 #include "memdebug.h"
38 
39   /* MSDOS/Windows style drive prefix, eg c: in c:foo */
40 #define STARTS_WITH_DRIVE_PREFIX(str) \
41   ((('a' <= str[0] && str[0] <= 'z') || \
42     ('A' <= str[0] && str[0] <= 'Z')) && \
43    (str[1] == ':'))
44 
45   /* MSDOS/Windows style drive prefix, optionally with
46    * a '|' instead of ':', followed by a slash or NUL */
47 #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
48   ((('a' <= (str)[0] && (str)[0] <= 'z') || \
49     ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
50    ((str)[1] == ':' || (str)[1] == '|') && \
51    ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
52 
53 /* Internal representation of CURLU. Point to URL-encoded strings. */
54 struct Curl_URL {
55   char *scheme;
56   char *user;
57   char *password;
58   char *options; /* IMAP only? */
59   char *host;
60   char *zoneid; /* for numerical IPv6 addresses */
61   char *port;
62   char *path;
63   char *query;
64   char *fragment;
65 
66   char *scratch; /* temporary scratch area */
67   char *temppath; /* temporary path pointer */
68   long portnum; /* the numerical version */
69 };
70 
71 #define DEFAULT_SCHEME "https"
72 
free_urlhandle(struct Curl_URL * u)73 static void free_urlhandle(struct Curl_URL *u)
74 {
75   free(u->scheme);
76   free(u->user);
77   free(u->password);
78   free(u->options);
79   free(u->host);
80   free(u->zoneid);
81   free(u->port);
82   free(u->path);
83   free(u->query);
84   free(u->fragment);
85   free(u->scratch);
86   free(u->temppath);
87 }
88 
89 /* move the full contents of one handle onto another and
90    free the original */
mv_urlhandle(struct Curl_URL * from,struct Curl_URL * to)91 static void mv_urlhandle(struct Curl_URL *from,
92                          struct Curl_URL *to)
93 {
94   free_urlhandle(to);
95   *to = *from;
96   free(from);
97 }
98 
99 /*
100  * Find the separator at the end of the host name, or the '?' in cases like
101  * http://www.url.com?id=2380
102  */
find_host_sep(const char * url)103 static const char *find_host_sep(const char *url)
104 {
105   const char *sep;
106   const char *query;
107 
108   /* Find the start of the hostname */
109   sep = strstr(url, "//");
110   if(!sep)
111     sep = url;
112   else
113     sep += 2;
114 
115   query = strchr(sep, '?');
116   sep = strchr(sep, '/');
117 
118   if(!sep)
119     sep = url + strlen(url);
120 
121   if(!query)
122     query = url + strlen(url);
123 
124   return sep < query ? sep : query;
125 }
126 
127 /*
128  * Decide in an encoding-independent manner whether a character in an
129  * URL must be escaped. The same criterion must be used in strlen_url()
130  * and strcpy_url().
131  */
urlchar_needs_escaping(int c)132 static bool urlchar_needs_escaping(int c)
133 {
134   return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
135 }
136 
137 /*
138  * strlen_url() returns the length of the given URL if the spaces within the
139  * URL were properly URL encoded.
140  * URL encoding should be skipped for host names, otherwise IDN resolution
141  * will fail.
142  */
strlen_url(const char * url,bool relative)143 static size_t strlen_url(const char *url, bool relative)
144 {
145   const unsigned char *ptr;
146   size_t newlen = 0;
147   bool left = TRUE; /* left side of the ? */
148   const unsigned char *host_sep = (const unsigned char *) url;
149 
150   if(!relative)
151     host_sep = (const unsigned char *) find_host_sep(url);
152 
153   for(ptr = (unsigned char *)url; *ptr; ptr++) {
154 
155     if(ptr < host_sep) {
156       ++newlen;
157       continue;
158     }
159 
160     if(*ptr == ' ') {
161       if(left)
162         newlen += 3;
163       else
164         newlen++;
165       continue;
166     }
167 
168     if (*ptr == '?')
169       left = FALSE;
170 
171     if(urlchar_needs_escaping(*ptr))
172       newlen += 2;
173 
174     newlen++;
175   }
176 
177   return newlen;
178 }
179 
180 /* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
181  * the source URL accordingly.
182  * URL encoding should be skipped for host names, otherwise IDN resolution
183  * will fail.
184  */
strcpy_url(char * output,const char * url,bool relative)185 static void strcpy_url(char *output, const char *url, bool relative)
186 {
187   /* we must add this with whitespace-replacing */
188   bool left = TRUE;
189   const unsigned char *iptr;
190   char *optr = output;
191   const unsigned char *host_sep = (const unsigned char *) url;
192 
193   if(!relative)
194     host_sep = (const unsigned char *) find_host_sep(url);
195 
196   for(iptr = (unsigned char *)url;    /* read from here */
197       *iptr;         /* until zero byte */
198       iptr++) {
199 
200     if(iptr < host_sep) {
201       *optr++ = *iptr;
202       continue;
203     }
204 
205     if(*iptr == ' ') {
206       if(left) {
207         *optr++='%'; /* add a '%' */
208         *optr++='2'; /* add a '2' */
209         *optr++='0'; /* add a '0' */
210       }
211       else
212         *optr++='+'; /* add a '+' here */
213       continue;
214     }
215 
216     if(*iptr == '?')
217       left = FALSE;
218 
219     if(urlchar_needs_escaping(*iptr)) {
220       msnprintf(optr, 4, "%%%02x", *iptr);
221       optr += 3;
222     }
223     else
224       *optr++ = *iptr;
225   }
226   *optr = 0; /* null-terminate output buffer */
227 
228 }
229 
230 /*
231  * Returns true if the given URL is absolute (as opposed to relative) within
232  * the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is
233  * non-NULL.
234  */
Curl_is_absolute_url(const char * url,char * buf,size_t buflen)235 bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
236 {
237   size_t i;
238 #ifdef WIN32
239   if(STARTS_WITH_DRIVE_PREFIX(url))
240     return FALSE;
241 #endif
242   for(i = 0; i < buflen && url[i]; ++i) {
243     char s = url[i];
244     if((s == ':') && (url[i + 1] == '/')) {
245       if(buf)
246         buf[i] = 0;
247       return TRUE;
248     }
249     /* RFC 3986 3.1 explains:
250       scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
251     */
252     else if(ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') ) {
253       if(buf)
254         buf[i] = (char)TOLOWER(s);
255     }
256     else
257       break;
258   }
259   return FALSE;
260 }
261 
262 /*
263  * Concatenate a relative URL to a base URL making it absolute.
264  * URL-encodes any spaces.
265  * The returned pointer must be freed by the caller unless NULL
266  * (returns NULL on out of memory).
267  */
concat_url(const char * base,const char * relurl)268 static char *concat_url(const char *base, const char *relurl)
269 {
270   /***
271    TRY to append this new path to the old URL
272    to the right of the host part. Oh crap, this is doomed to cause
273    problems in the future...
274   */
275   char *newest;
276   char *protsep;
277   char *pathsep;
278   size_t newlen;
279   bool host_changed = FALSE;
280 
281   const char *useurl = relurl;
282   size_t urllen;
283 
284   /* we must make our own copy of the URL to play with, as it may
285      point to read-only data */
286   char *url_clone = strdup(base);
287 
288   if(!url_clone)
289     return NULL; /* skip out of this NOW */
290 
291   /* protsep points to the start of the host name */
292   protsep = strstr(url_clone, "//");
293   if(!protsep)
294     protsep = url_clone;
295   else
296     protsep += 2; /* pass the slashes */
297 
298   if('/' != relurl[0]) {
299     int level = 0;
300 
301     /* First we need to find out if there's a ?-letter in the URL,
302        and cut it and the right-side of that off */
303     pathsep = strchr(protsep, '?');
304     if(pathsep)
305       *pathsep = 0;
306 
307     /* we have a relative path to append to the last slash if there's one
308        available, or if the new URL is just a query string (starts with a
309        '?')  we append the new one at the end of the entire currently worked
310        out URL */
311     if(useurl[0] != '?') {
312       pathsep = strrchr(protsep, '/');
313       if(pathsep)
314         *pathsep = 0;
315     }
316 
317     /* Check if there's any slash after the host name, and if so, remember
318        that position instead */
319     pathsep = strchr(protsep, '/');
320     if(pathsep)
321       protsep = pathsep + 1;
322     else
323       protsep = NULL;
324 
325     /* now deal with one "./" or any amount of "../" in the newurl
326        and act accordingly */
327 
328     if((useurl[0] == '.') && (useurl[1] == '/'))
329       useurl += 2; /* just skip the "./" */
330 
331     while((useurl[0] == '.') &&
332           (useurl[1] == '.') &&
333           (useurl[2] == '/')) {
334       level++;
335       useurl += 3; /* pass the "../" */
336     }
337 
338     if(protsep) {
339       while(level--) {
340         /* cut off one more level from the right of the original URL */
341         pathsep = strrchr(protsep, '/');
342         if(pathsep)
343           *pathsep = 0;
344         else {
345           *protsep = 0;
346           break;
347         }
348       }
349     }
350   }
351   else {
352     /* We got a new absolute path for this server */
353 
354     if(relurl[1] == '/') {
355       /* the new URL starts with //, just keep the protocol part from the
356          original one */
357       *protsep = 0;
358       useurl = &relurl[2]; /* we keep the slashes from the original, so we
359                               skip the new ones */
360       host_changed = TRUE;
361     }
362     else {
363       /* cut off the original URL from the first slash, or deal with URLs
364          without slash */
365       pathsep = strchr(protsep, '/');
366       if(pathsep) {
367         /* When people use badly formatted URLs, such as
368            "http://www.url.com?dir=/home/daniel" we must not use the first
369            slash, if there's a ?-letter before it! */
370         char *sep = strchr(protsep, '?');
371         if(sep && (sep < pathsep))
372           pathsep = sep;
373         *pathsep = 0;
374       }
375       else {
376         /* There was no slash. Now, since we might be operating on a badly
377            formatted URL, such as "http://www.url.com?id=2380" which doesn't
378            use a slash separator as it is supposed to, we need to check for a
379            ?-letter as well! */
380         pathsep = strchr(protsep, '?');
381         if(pathsep)
382           *pathsep = 0;
383       }
384     }
385   }
386 
387   /* If the new part contains a space, this is a mighty stupid redirect
388      but we still make an effort to do "right". To the left of a '?'
389      letter we replace each space with %20 while it is replaced with '+'
390      on the right side of the '?' letter.
391   */
392   newlen = strlen_url(useurl, !host_changed);
393 
394   urllen = strlen(url_clone);
395 
396   newest = malloc(urllen + 1 + /* possible slash */
397                   newlen + 1 /* zero byte */);
398 
399   if(!newest) {
400     free(url_clone); /* don't leak this */
401     return NULL;
402   }
403 
404   /* copy over the root url part */
405   memcpy(newest, url_clone, urllen);
406 
407   /* check if we need to append a slash */
408   if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
409     ;
410   else
411     newest[urllen++]='/';
412 
413   /* then append the new piece on the right side */
414   strcpy_url(&newest[urllen], useurl, !host_changed);
415 
416   free(url_clone);
417 
418   return newest;
419 }
420 
421 /*
422  * parse_hostname_login()
423  *
424  * Parse the login details (user name, password and options) from the URL and
425  * strip them out of the host name
426  *
427  */
parse_hostname_login(struct Curl_URL * u,char ** hostname,unsigned int flags)428 static CURLUcode parse_hostname_login(struct Curl_URL *u,
429                                       char **hostname,
430                                       unsigned int flags)
431 {
432   CURLUcode result = CURLUE_OK;
433   CURLcode ccode;
434   char *userp = NULL;
435   char *passwdp = NULL;
436   char *optionsp = NULL;
437   const struct Curl_handler *h = NULL;
438 
439   /* At this point, we're hoping all the other special cases have
440    * been taken care of, so conn->host.name is at most
441    *    [user[:password][;options]]@]hostname
442    *
443    * We need somewhere to put the embedded details, so do that first.
444    */
445 
446   char *ptr = strchr(*hostname, '@');
447   char *login = *hostname;
448 
449   if(!ptr)
450     goto out;
451 
452   /* We will now try to extract the
453    * possible login information in a string like:
454    * ftp://user:password@ftp.my.site:8021/README */
455   *hostname = ++ptr;
456 
457   /* if this is a known scheme, get some details */
458   if(u->scheme)
459     h = Curl_builtin_scheme(u->scheme);
460 
461   /* We could use the login information in the URL so extract it. Only parse
462      options if the handler says we should. Note that 'h' might be NULL! */
463   ccode = Curl_parse_login_details(login, ptr - login - 1,
464                                    &userp, &passwdp,
465                                    (h && (h->flags & PROTOPT_URLOPTIONS)) ?
466                                    &optionsp:NULL);
467   if(ccode) {
468     result = CURLUE_MALFORMED_INPUT;
469     goto out;
470   }
471 
472   if(userp) {
473     if(flags & CURLU_DISALLOW_USER) {
474       /* Option DISALLOW_USER is set and url contains username. */
475       result = CURLUE_USER_NOT_ALLOWED;
476       goto out;
477     }
478 
479     u->user = userp;
480   }
481 
482   if(passwdp)
483     u->password = passwdp;
484 
485   if(optionsp)
486     u->options = optionsp;
487 
488   return CURLUE_OK;
489   out:
490 
491   free(userp);
492   free(passwdp);
493   free(optionsp);
494 
495   return result;
496 }
497 
Curl_parse_port(struct Curl_URL * u,char * hostname,bool has_scheme)498 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
499                                    bool has_scheme)
500 {
501   char *portptr = NULL;
502   char endbracket;
503   int len;
504 
505   /*
506    * Find the end of an IPv6 address, either on the ']' ending bracket or
507    * a percent-encoded zone index.
508    */
509   if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
510                  &endbracket, &len)) {
511     if(']' == endbracket)
512       portptr = &hostname[len];
513     else if('%' == endbracket) {
514       int zonelen = len;
515       if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
516         if(']' != endbracket)
517           return CURLUE_MALFORMED_INPUT;
518         portptr = &hostname[--zonelen + len + 1];
519       }
520       else
521         return CURLUE_MALFORMED_INPUT;
522     }
523     else
524       return CURLUE_MALFORMED_INPUT;
525 
526     /* this is a RFC2732-style specified IP-address */
527     if(portptr && *portptr) {
528       if(*portptr != ':')
529         return CURLUE_MALFORMED_INPUT;
530     }
531     else
532       portptr = NULL;
533   }
534   else
535     portptr = strchr(hostname, ':');
536 
537   if(portptr) {
538     char *rest;
539     long port;
540     char portbuf[7];
541 
542     /* Browser behavior adaptation. If there's a colon with no digits after,
543        just cut off the name there which makes us ignore the colon and just
544        use the default port. Firefox, Chrome and Safari all do that.
545 
546        Don't do it if the URL has no scheme, to make something that looks like
547        a scheme not work!
548     */
549     if(!portptr[1]) {
550       *portptr = '\0';
551       return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
552     }
553 
554     if(!ISDIGIT(portptr[1]))
555       return CURLUE_BAD_PORT_NUMBER;
556 
557     port = strtol(portptr + 1, &rest, 10);  /* Port number must be decimal */
558 
559     if((port <= 0) || (port > 0xffff))
560       /* Single unix standard says port numbers are 16 bits long, but we don't
561          treat port zero as OK. */
562       return CURLUE_BAD_PORT_NUMBER;
563 
564     if(rest[0])
565       return CURLUE_BAD_PORT_NUMBER;
566 
567     *portptr++ = '\0'; /* cut off the name there */
568     *rest = 0;
569     /* generate a new port number string to get rid of leading zeroes etc */
570     msnprintf(portbuf, sizeof(portbuf), "%ld", port);
571     u->portnum = port;
572     u->port = strdup(portbuf);
573     if(!u->port)
574       return CURLUE_OUT_OF_MEMORY;
575   }
576 
577   return CURLUE_OK;
578 }
579 
580 /* scan for byte values < 31 or 127 */
junkscan(const char * part,unsigned int flags)581 static bool junkscan(const char *part, unsigned int flags)
582 {
583   if(part) {
584     static const char badbytes[]={
585       /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
586       0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
587       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
588       0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
589       0x7f, 0x00 /* null-terminate */
590     };
591     size_t n = strlen(part);
592     size_t nfine = strcspn(part, badbytes);
593     if(nfine != n)
594       /* since we don't know which part is scanned, return a generic error
595          code */
596       return TRUE;
597     if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
598       return TRUE;
599   }
600   return FALSE;
601 }
602 
hostname_check(struct Curl_URL * u,char * hostname)603 static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
604 {
605   size_t len;
606   size_t hlen = strlen(hostname);
607 
608   if(hostname[0] == '[') {
609 #ifdef ENABLE_IPV6
610     char dest[16]; /* fits a binary IPv6 address */
611 #endif
612     const char *l = "0123456789abcdefABCDEF:.";
613     if(hlen < 4) /* '[::]' is the shortest possible valid string */
614       return CURLUE_MALFORMED_INPUT;
615     hostname++;
616     hlen -= 2;
617 
618     if(hostname[hlen] != ']')
619       return CURLUE_MALFORMED_INPUT;
620 
621     /* only valid letters are ok */
622     len = strspn(hostname, l);
623     if(hlen != len) {
624       hlen = len;
625       if(hostname[len] == '%') {
626         /* this could now be '%[zone id]' */
627         char zoneid[16];
628         int i = 0;
629         char *h = &hostname[len + 1];
630         /* pass '25' if present and is a url encoded percent sign */
631         if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
632           h += 2;
633         while(*h && (*h != ']') && (i < 15))
634           zoneid[i++] = *h++;
635         if(!i || (']' != *h))
636           return CURLUE_MALFORMED_INPUT;
637         zoneid[i] = 0;
638         u->zoneid = strdup(zoneid);
639         if(!u->zoneid)
640           return CURLUE_OUT_OF_MEMORY;
641         hostname[len] = ']'; /* insert end bracket */
642         hostname[len + 1] = 0; /* terminate the hostname */
643       }
644       else
645         return CURLUE_MALFORMED_INPUT;
646       /* hostname is fine */
647     }
648 #ifdef ENABLE_IPV6
649     hostname[hlen] = 0; /* end the address there */
650     if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
651       return CURLUE_MALFORMED_INPUT;
652     hostname[hlen] = ']'; /* restore ending bracket */
653 #endif
654   }
655   else {
656     /* letters from the second string is not ok */
657     len = strcspn(hostname, " \r\n");
658     if(hlen != len)
659       /* hostname with bad content */
660       return CURLUE_MALFORMED_INPUT;
661   }
662   if(!hostname[0])
663     return CURLUE_NO_HOST;
664   return CURLUE_OK;
665 }
666 
667 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
668 
669 /*
670  * Handle partial IPv4 numerical addresses and different bases, like
671  * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
672  *
673  * If the given input string is syntactically wrong or any part for example is
674  * too big, this function returns FALSE and doesn't create any output.
675  *
676  * Output the "normalized" version of that input string in plain quad decimal
677  * integers and return TRUE.
678  */
ipv4_normalize(const char * hostname,char * outp,size_t olen)679 static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
680 {
681   bool done = FALSE;
682   int n = 0;
683   const char *c = hostname;
684   unsigned long parts[4] = {0, 0, 0, 0};
685 
686   while(!done) {
687     char *endp;
688     unsigned long l;
689     if((*c < '0') || (*c > '9'))
690       /* most importantly this doesn't allow a leading plus or minus */
691       return FALSE;
692     l = strtoul(c, &endp, 0);
693 
694     /* overflow or nothing parsed at all */
695     if(((l == ULONG_MAX) && (errno == ERANGE)) ||  (endp == c))
696       return FALSE;
697 
698 #if SIZEOF_LONG > 4
699     /* a value larger than 32 bits */
700     if(l > UINT_MAX)
701       return FALSE;
702 #endif
703 
704     parts[n] = l;
705     c = endp;
706 
707     switch (*c) {
708     case '.' :
709       if(n == 3)
710         return FALSE;
711       n++;
712       c++;
713       break;
714 
715     case '\0':
716       done = TRUE;
717       break;
718 
719     default:
720       return FALSE;
721     }
722   }
723 
724   /* this is deemed a valid IPv4 numerical address */
725 
726   switch(n) {
727   case 0: /* a -- 32 bits */
728     msnprintf(outp, olen, "%u.%u.%u.%u",
729               parts[0] >> 24, (parts[0] >> 16) & 0xff,
730               (parts[0] >> 8) & 0xff, parts[0] & 0xff);
731     break;
732   case 1: /* a.b -- 8.24 bits */
733     if((parts[0] > 0xff) || (parts[1] > 0xffffff))
734       return FALSE;
735     msnprintf(outp, olen, "%u.%u.%u.%u",
736               parts[0], (parts[1] >> 16) & 0xff,
737               (parts[1] >> 8) & 0xff, parts[1] & 0xff);
738     break;
739   case 2: /* a.b.c -- 8.8.16 bits */
740     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
741       return FALSE;
742     msnprintf(outp, olen, "%u.%u.%u.%u",
743               parts[0], parts[1], (parts[2] >> 8) & 0xff,
744               parts[2] & 0xff);
745     break;
746   case 3: /* a.b.c.d -- 8.8.8.8 bits */
747     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
748        (parts[3] > 0xff))
749       return FALSE;
750     msnprintf(outp, olen, "%u.%u.%u.%u",
751               parts[0], parts[1], parts[2], parts[3]);
752     break;
753   }
754   return TRUE;
755 }
756 
757 /* return strdup'ed version in 'outp', possibly percent decoded */
decode_host(char * hostname,char ** outp)758 static CURLUcode decode_host(char *hostname, char **outp)
759 {
760   char *per = NULL;
761   if(hostname[0] != '[')
762     /* only decode if not an ipv6 numerical */
763     per = strchr(hostname, '%');
764   if(!per) {
765     *outp = strdup(hostname);
766     if(!*outp)
767       return CURLUE_OUT_OF_MEMORY;
768   }
769   else {
770     /* might be encoded */
771     size_t dlen;
772     CURLcode result = Curl_urldecode(NULL, hostname, 0,
773                                      outp, &dlen, REJECT_CTRL);
774     if(result)
775       return CURLUE_MALFORMED_INPUT;
776   }
777 
778   return CURLUE_OK;
779 }
780 
seturl(const char * url,CURLU * u,unsigned int flags)781 static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
782 {
783   char *path;
784   bool path_alloced = FALSE;
785   bool uncpath = FALSE;
786   char *hostname;
787   char *query = NULL;
788   char *fragment = NULL;
789   CURLUcode result;
790   bool url_has_scheme = FALSE;
791   char schemebuf[MAX_SCHEME_LEN + 1];
792   const char *schemep = NULL;
793   size_t schemelen = 0;
794   size_t urllen;
795 
796   DEBUGASSERT(url);
797 
798   /*************************************************************
799    * Parse the URL.
800    ************************************************************/
801   /* allocate scratch area */
802   urllen = strlen(url);
803   if(urllen > CURL_MAX_INPUT_LENGTH)
804     /* excessive input length */
805     return CURLUE_MALFORMED_INPUT;
806 
807   path = u->scratch = malloc(urllen * 2 + 2);
808   if(!path)
809     return CURLUE_OUT_OF_MEMORY;
810 
811   hostname = &path[urllen + 1];
812   hostname[0] = 0;
813 
814   if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
815     url_has_scheme = TRUE;
816     schemelen = strlen(schemebuf);
817   }
818 
819   /* handle the file: scheme */
820   if(url_has_scheme && strcasecompare(schemebuf, "file")) {
821     /* path has been allocated large enough to hold this */
822     strcpy(path, &url[5]);
823 
824     u->scheme = strdup("file");
825     if(!u->scheme)
826       return CURLUE_OUT_OF_MEMORY;
827 
828     /* Extra handling URLs with an authority component (i.e. that start with
829      * "file://")
830      *
831      * We allow omitted hostname (e.g. file:/<path>) -- valid according to
832      * RFC 8089, but not the (current) WHAT-WG URL spec.
833      */
834     if(path[0] == '/' && path[1] == '/') {
835       /* swallow the two slashes */
836       char *ptr = &path[2];
837 
838       /*
839        * According to RFC 8089, a file: URL can be reliably dereferenced if:
840        *
841        *  o it has no/blank hostname, or
842        *
843        *  o the hostname matches "localhost" (case-insensitively), or
844        *
845        *  o the hostname is a FQDN that resolves to this machine, or
846        *
847        *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
848        *    Appendix E.3).
849        *
850        * For brevity, we only consider URLs with empty, "localhost", or
851        * "127.0.0.1" hostnames as local, otherwise as an UNC String.
852        *
853        * Additionally, there is an exception for URLs with a Windows drive
854        * letter in the authority (which was accidentally omitted from RFC 8089
855        * Appendix E, but believe me, it was meant to be there. --MK)
856        */
857       if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
858         /* the URL includes a host name, it must match "localhost" or
859            "127.0.0.1" to be valid */
860         if(checkprefix("localhost/", ptr) ||
861            checkprefix("127.0.0.1/", ptr)) {
862           ptr += 9; /* now points to the slash after the host */
863         }
864         else {
865 #if defined(WIN32)
866           size_t len;
867 
868           /* the host name, NetBIOS computer name, can not contain disallowed
869              chars, and the delimiting slash character must be appended to the
870              host name */
871           path = strpbrk(ptr, "/\\:*?\"<>|");
872           if(!path || *path != '/')
873             return CURLUE_MALFORMED_INPUT;
874 
875           len = path - ptr;
876           if(len) {
877             memcpy(hostname, ptr, len);
878             hostname[len] = 0;
879             uncpath = TRUE;
880           }
881 
882           ptr -= 2; /* now points to the // before the host in UNC */
883 #else
884           /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
885              none */
886           return CURLUE_MALFORMED_INPUT;
887 #endif
888         }
889       }
890 
891       path = ptr;
892     }
893 
894     if(!uncpath)
895         hostname = NULL; /* no host for file: URLs by default */
896 
897 #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
898     /* Don't allow Windows drive letters when not in Windows.
899      * This catches both "file:/c:" and "file:c:" */
900     if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
901        STARTS_WITH_URL_DRIVE_PREFIX(path)) {
902       /* File drive letters are only accepted in MSDOS/Windows */
903       return CURLUE_MALFORMED_INPUT;
904     }
905 #else
906     /* If the path starts with a slash and a drive letter, ditch the slash */
907     if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
908       /* This cannot be done with strcpy, as the memory chunks overlap! */
909       memmove(path, &path[1], strlen(&path[1]) + 1);
910     }
911 #endif
912 
913   }
914   else {
915     /* clear path */
916     const char *p;
917     const char *hostp;
918     size_t len;
919     path[0] = 0;
920 
921     if(url_has_scheme) {
922       int i = 0;
923       p = &url[schemelen + 1];
924       while(p && (*p == '/') && (i < 4)) {
925         p++;
926         i++;
927       }
928       if((i < 1) || (i>3))
929         /* less than one or more than three slashes */
930         return CURLUE_MALFORMED_INPUT;
931 
932       schemep = schemebuf;
933       if(!Curl_builtin_scheme(schemep) &&
934          !(flags & CURLU_NON_SUPPORT_SCHEME))
935         return CURLUE_UNSUPPORTED_SCHEME;
936 
937       if(junkscan(schemep, flags))
938         return CURLUE_MALFORMED_INPUT;
939     }
940     else {
941       /* no scheme! */
942 
943       if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
944         return CURLUE_MALFORMED_INPUT;
945       if(flags & CURLU_DEFAULT_SCHEME)
946         schemep = DEFAULT_SCHEME;
947 
948       /*
949        * The URL was badly formatted, let's try without scheme specified.
950        */
951       p = url;
952     }
953     hostp = p; /* host name starts here */
954 
955     while(*p && !HOSTNAME_END(*p)) /* find end of host name */
956       p++;
957 
958     len = p - hostp;
959     if(len) {
960       memcpy(hostname, hostp, len);
961       hostname[len] = 0;
962     }
963     else {
964       if(!(flags & CURLU_NO_AUTHORITY))
965         return CURLUE_MALFORMED_INPUT;
966     }
967 
968     len = strlen(p);
969     memcpy(path, p, len);
970     path[len] = 0;
971 
972     if(schemep) {
973       u->scheme = strdup(schemep);
974       if(!u->scheme)
975         return CURLUE_OUT_OF_MEMORY;
976     }
977   }
978 
979   if(junkscan(path, flags))
980     return CURLUE_MALFORMED_INPUT;
981 
982   if((flags & CURLU_URLENCODE) && path[0]) {
983     /* worst case output length is 3x the original! */
984     char *newp = malloc(strlen(path) * 3);
985     if(!newp)
986       return CURLUE_OUT_OF_MEMORY;
987     path_alloced = TRUE;
988     strcpy_url(newp, path, TRUE); /* consider it relative */
989     u->temppath = path = newp;
990   }
991 
992   fragment = strchr(path, '#');
993   if(fragment) {
994     *fragment++ = 0;
995     if(fragment[0]) {
996       u->fragment = strdup(fragment);
997       if(!u->fragment)
998         return CURLUE_OUT_OF_MEMORY;
999     }
1000   }
1001 
1002   query = strchr(path, '?');
1003   if(query) {
1004     *query++ = 0;
1005     /* done even if the query part is a blank string */
1006     u->query = strdup(query);
1007     if(!u->query)
1008       return CURLUE_OUT_OF_MEMORY;
1009   }
1010 
1011   if(!path[0])
1012     /* if there's no path left set, unset */
1013     path = NULL;
1014   else {
1015     if(!(flags & CURLU_PATH_AS_IS)) {
1016       /* remove ../ and ./ sequences according to RFC3986 */
1017       char *newp = Curl_dedotdotify(path);
1018       if(!newp)
1019         return CURLUE_OUT_OF_MEMORY;
1020 
1021       if(strcmp(newp, path)) {
1022         /* if we got a new version */
1023         if(path_alloced)
1024           Curl_safefree(u->temppath);
1025         u->temppath = path = newp;
1026         path_alloced = TRUE;
1027       }
1028       else
1029         free(newp);
1030     }
1031 
1032     u->path = path_alloced?path:strdup(path);
1033     if(!u->path)
1034       return CURLUE_OUT_OF_MEMORY;
1035     u->temppath = NULL; /* used now */
1036   }
1037 
1038   if(hostname) {
1039     char normalized_ipv4[sizeof("255.255.255.255") + 1];
1040     /*
1041      * Parse the login details and strip them out of the host name.
1042      */
1043     if(junkscan(hostname, flags))
1044       return CURLUE_MALFORMED_INPUT;
1045 
1046     result = parse_hostname_login(u, &hostname, flags);
1047     if(result)
1048       return result;
1049 
1050     result = Curl_parse_port(u, hostname, url_has_scheme);
1051     if(result)
1052       return result;
1053 
1054     if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
1055       /* Skip hostname check, it's allowed to be empty. */
1056       u->host = strdup("");
1057     }
1058     else {
1059       if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
1060         u->host = strdup(normalized_ipv4);
1061       else {
1062         result = decode_host(hostname, &u->host);
1063         if(result)
1064           return result;
1065         result = hostname_check(u, u->host);
1066         if(result)
1067           return result;
1068       }
1069     }
1070     if(!u->host)
1071       return CURLUE_OUT_OF_MEMORY;
1072     if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1073       /* legacy curl-style guess based on host name */
1074       if(checkprefix("ftp.", hostname))
1075         schemep = "ftp";
1076       else if(checkprefix("dict.", hostname))
1077         schemep = "dict";
1078       else if(checkprefix("ldap.", hostname))
1079         schemep = "ldap";
1080       else if(checkprefix("imap.", hostname))
1081         schemep = "imap";
1082       else if(checkprefix("smtp.", hostname))
1083         schemep = "smtp";
1084       else if(checkprefix("pop3.", hostname))
1085         schemep = "pop3";
1086       else
1087         schemep = "http";
1088 
1089       u->scheme = strdup(schemep);
1090       if(!u->scheme)
1091         return CURLUE_OUT_OF_MEMORY;
1092     }
1093   }
1094 
1095   Curl_safefree(u->scratch);
1096   Curl_safefree(u->temppath);
1097 
1098   return CURLUE_OK;
1099 }
1100 
1101 /*
1102  * Parse the URL and set the relevant members of the Curl_URL struct.
1103  */
parseurl(const char * url,CURLU * u,unsigned int flags)1104 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1105 {
1106   CURLUcode result = seturl(url, u, flags);
1107   if(result) {
1108     free_urlhandle(u);
1109     memset(u, 0, sizeof(struct Curl_URL));
1110   }
1111   return result;
1112 }
1113 
1114 /*
1115  */
curl_url(void)1116 CURLU *curl_url(void)
1117 {
1118   return calloc(sizeof(struct Curl_URL), 1);
1119 }
1120 
curl_url_cleanup(CURLU * u)1121 void curl_url_cleanup(CURLU *u)
1122 {
1123   if(u) {
1124     free_urlhandle(u);
1125     free(u);
1126   }
1127 }
1128 
1129 #define DUP(dest, src, name)                    \
1130   do {                                          \
1131     if(src->name) {                             \
1132       dest->name = strdup(src->name);           \
1133       if(!dest->name)                           \
1134         goto fail;                              \
1135     }                                           \
1136   } while(0)
1137 
curl_url_dup(CURLU * in)1138 CURLU *curl_url_dup(CURLU *in)
1139 {
1140   struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1141   if(u) {
1142     DUP(u, in, scheme);
1143     DUP(u, in, user);
1144     DUP(u, in, password);
1145     DUP(u, in, options);
1146     DUP(u, in, host);
1147     DUP(u, in, port);
1148     DUP(u, in, path);
1149     DUP(u, in, query);
1150     DUP(u, in, fragment);
1151     u->portnum = in->portnum;
1152   }
1153   return u;
1154   fail:
1155   curl_url_cleanup(u);
1156   return NULL;
1157 }
1158 
curl_url_get(CURLU * u,CURLUPart what,char ** part,unsigned int flags)1159 CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1160                        char **part, unsigned int flags)
1161 {
1162   char *ptr;
1163   CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1164   char portbuf[7];
1165   bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1166   bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1167   bool plusdecode = FALSE;
1168   (void)flags;
1169   if(!u)
1170     return CURLUE_BAD_HANDLE;
1171   if(!part)
1172     return CURLUE_BAD_PARTPOINTER;
1173   *part = NULL;
1174 
1175   switch(what) {
1176   case CURLUPART_SCHEME:
1177     ptr = u->scheme;
1178     ifmissing = CURLUE_NO_SCHEME;
1179     urldecode = FALSE; /* never for schemes */
1180     break;
1181   case CURLUPART_USER:
1182     ptr = u->user;
1183     ifmissing = CURLUE_NO_USER;
1184     break;
1185   case CURLUPART_PASSWORD:
1186     ptr = u->password;
1187     ifmissing = CURLUE_NO_PASSWORD;
1188     break;
1189   case CURLUPART_OPTIONS:
1190     ptr = u->options;
1191     ifmissing = CURLUE_NO_OPTIONS;
1192     break;
1193   case CURLUPART_HOST:
1194     ptr = u->host;
1195     ifmissing = CURLUE_NO_HOST;
1196     break;
1197   case CURLUPART_ZONEID:
1198     ptr = u->zoneid;
1199     break;
1200   case CURLUPART_PORT:
1201     ptr = u->port;
1202     ifmissing = CURLUE_NO_PORT;
1203     urldecode = FALSE; /* never for port */
1204     if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1205       /* there's no stored port number, but asked to deliver
1206          a default one for the scheme */
1207       const struct Curl_handler *h =
1208         Curl_builtin_scheme(u->scheme);
1209       if(h) {
1210         msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1211         ptr = portbuf;
1212       }
1213     }
1214     else if(ptr && u->scheme) {
1215       /* there is a stored port number, but ask to inhibit if
1216          it matches the default one for the scheme */
1217       const struct Curl_handler *h =
1218         Curl_builtin_scheme(u->scheme);
1219       if(h && (h->defport == u->portnum) &&
1220          (flags & CURLU_NO_DEFAULT_PORT))
1221         ptr = NULL;
1222     }
1223     break;
1224   case CURLUPART_PATH:
1225     ptr = u->path;
1226     if(!ptr) {
1227       ptr = u->path = strdup("/");
1228       if(!u->path)
1229         return CURLUE_OUT_OF_MEMORY;
1230     }
1231     break;
1232   case CURLUPART_QUERY:
1233     ptr = u->query;
1234     ifmissing = CURLUE_NO_QUERY;
1235     plusdecode = urldecode;
1236     break;
1237   case CURLUPART_FRAGMENT:
1238     ptr = u->fragment;
1239     ifmissing = CURLUE_NO_FRAGMENT;
1240     break;
1241   case CURLUPART_URL: {
1242     char *url;
1243     char *scheme;
1244     char *options = u->options;
1245     char *port = u->port;
1246     char *allochost = NULL;
1247     if(u->scheme && strcasecompare("file", u->scheme)) {
1248       url = aprintf("file://%s%s%s",
1249                     u->path,
1250                     u->fragment? "#": "",
1251                     u->fragment? u->fragment : "");
1252     }
1253     else if(!u->host)
1254       return CURLUE_NO_HOST;
1255     else {
1256       const struct Curl_handler *h = NULL;
1257       if(u->scheme)
1258         scheme = u->scheme;
1259       else if(flags & CURLU_DEFAULT_SCHEME)
1260         scheme = (char *) DEFAULT_SCHEME;
1261       else
1262         return CURLUE_NO_SCHEME;
1263 
1264       h = Curl_builtin_scheme(scheme);
1265       if(!port && (flags & CURLU_DEFAULT_PORT)) {
1266         /* there's no stored port number, but asked to deliver
1267            a default one for the scheme */
1268         if(h) {
1269           msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1270           port = portbuf;
1271         }
1272       }
1273       else if(port) {
1274         /* there is a stored port number, but asked to inhibit if it matches
1275            the default one for the scheme */
1276         if(h && (h->defport == u->portnum) &&
1277            (flags & CURLU_NO_DEFAULT_PORT))
1278           port = NULL;
1279       }
1280 
1281       if(h && !(h->flags & PROTOPT_URLOPTIONS))
1282         options = NULL;
1283 
1284       if(u->host[0] == '[') {
1285         if(u->zoneid) {
1286           /* make it '[ host %25 zoneid ]' */
1287           size_t hostlen = strlen(u->host);
1288           size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
1289           allochost = malloc(alen);
1290           if(!allochost)
1291             return CURLUE_OUT_OF_MEMORY;
1292           memcpy(allochost, u->host, hostlen - 1);
1293           msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
1294                     "%%25%s]", u->zoneid);
1295         }
1296       }
1297       else if(urlencode) {
1298         allochost = curl_easy_escape(NULL, u->host, 0);
1299         if(!allochost)
1300           return CURLUE_OUT_OF_MEMORY;
1301       }
1302       else {
1303         /* only encode '%' in output host name */
1304         char *host = u->host;
1305         size_t pcount = 0;
1306         /* first, count number of percents present in the name */
1307         while(*host) {
1308           if(*host == '%')
1309             pcount++;
1310           host++;
1311         }
1312         /* if there were percents, encode the host name */
1313         if(pcount) {
1314           size_t hostlen = strlen(u->host);
1315           size_t alen = hostlen + 2 * pcount + 1;
1316           char *o = allochost = malloc(alen);
1317           if(!allochost)
1318             return CURLUE_OUT_OF_MEMORY;
1319 
1320           host = u->host;
1321           while(*host) {
1322             if(*host == '%') {
1323               memcpy(o, "%25", 3);
1324               o += 3;
1325               host++;
1326               continue;
1327             }
1328             *o++ = *host++;
1329           }
1330           *o = '\0';
1331         }
1332       }
1333 
1334       url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1335                     scheme,
1336                     u->user ? u->user : "",
1337                     u->password ? ":": "",
1338                     u->password ? u->password : "",
1339                     options ? ";" : "",
1340                     options ? options : "",
1341                     (u->user || u->password || options) ? "@": "",
1342                     allochost ? allochost : u->host,
1343                     port ? ":": "",
1344                     port ? port : "",
1345                     (u->path && (u->path[0] != '/')) ? "/": "",
1346                     u->path ? u->path : "/",
1347                     (u->query && u->query[0]) ? "?": "",
1348                     (u->query && u->query[0]) ? u->query : "",
1349                     u->fragment? "#": "",
1350                     u->fragment? u->fragment : "");
1351       free(allochost);
1352     }
1353     if(!url)
1354       return CURLUE_OUT_OF_MEMORY;
1355     *part = url;
1356     return CURLUE_OK;
1357   }
1358   default:
1359     ptr = NULL;
1360     break;
1361   }
1362   if(ptr) {
1363     *part = strdup(ptr);
1364     if(!*part)
1365       return CURLUE_OUT_OF_MEMORY;
1366     if(plusdecode) {
1367       /* convert + to space */
1368       char *plus;
1369       for(plus = *part; *plus; ++plus) {
1370         if(*plus == '+')
1371           *plus = ' ';
1372       }
1373     }
1374     if(urldecode) {
1375       char *decoded;
1376       size_t dlen;
1377       /* this unconditional rejection of control bytes is documented
1378          API behavior */
1379       CURLcode res = Curl_urldecode(NULL, *part, 0, &decoded, &dlen,
1380                                     REJECT_CTRL);
1381       free(*part);
1382       if(res) {
1383         *part = NULL;
1384         return CURLUE_URLDECODE;
1385       }
1386       *part = decoded;
1387     }
1388     return CURLUE_OK;
1389   }
1390   else
1391     return ifmissing;
1392 }
1393 
curl_url_set(CURLU * u,CURLUPart what,const char * part,unsigned int flags)1394 CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1395                        const char *part, unsigned int flags)
1396 {
1397   char **storep = NULL;
1398   long port = 0;
1399   bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1400   bool plusencode = FALSE;
1401   bool urlskipslash = FALSE;
1402   bool appendquery = FALSE;
1403   bool equalsencode = FALSE;
1404 
1405   if(!u)
1406     return CURLUE_BAD_HANDLE;
1407   if(!part) {
1408     /* setting a part to NULL clears it */
1409     switch(what) {
1410     case CURLUPART_URL:
1411       break;
1412     case CURLUPART_SCHEME:
1413       storep = &u->scheme;
1414       break;
1415     case CURLUPART_USER:
1416       storep = &u->user;
1417       break;
1418     case CURLUPART_PASSWORD:
1419       storep = &u->password;
1420       break;
1421     case CURLUPART_OPTIONS:
1422       storep = &u->options;
1423       break;
1424     case CURLUPART_HOST:
1425       storep = &u->host;
1426       break;
1427     case CURLUPART_ZONEID:
1428       storep = &u->zoneid;
1429       break;
1430     case CURLUPART_PORT:
1431       u->portnum = 0;
1432       storep = &u->port;
1433       break;
1434     case CURLUPART_PATH:
1435       storep = &u->path;
1436       break;
1437     case CURLUPART_QUERY:
1438       storep = &u->query;
1439       break;
1440     case CURLUPART_FRAGMENT:
1441       storep = &u->fragment;
1442       break;
1443     default:
1444       return CURLUE_UNKNOWN_PART;
1445     }
1446     if(storep && *storep) {
1447       Curl_safefree(*storep);
1448     }
1449     return CURLUE_OK;
1450   }
1451 
1452   switch(what) {
1453   case CURLUPART_SCHEME:
1454     if(strlen(part) > MAX_SCHEME_LEN)
1455       /* too long */
1456       return CURLUE_MALFORMED_INPUT;
1457     if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1458        /* verify that it is a fine scheme */
1459        !Curl_builtin_scheme(part))
1460       return CURLUE_UNSUPPORTED_SCHEME;
1461     storep = &u->scheme;
1462     urlencode = FALSE; /* never */
1463     break;
1464   case CURLUPART_USER:
1465     storep = &u->user;
1466     break;
1467   case CURLUPART_PASSWORD:
1468     storep = &u->password;
1469     break;
1470   case CURLUPART_OPTIONS:
1471     storep = &u->options;
1472     break;
1473   case CURLUPART_HOST: {
1474     size_t len = strcspn(part, " \r\n");
1475     if(strlen(part) != len)
1476       /* hostname with bad content */
1477       return CURLUE_MALFORMED_INPUT;
1478     storep = &u->host;
1479     Curl_safefree(u->zoneid);
1480     break;
1481   }
1482   case CURLUPART_ZONEID:
1483     storep = &u->zoneid;
1484     break;
1485   case CURLUPART_PORT:
1486   {
1487     char *endp;
1488     urlencode = FALSE; /* never */
1489     port = strtol(part, &endp, 10);  /* Port number must be decimal */
1490     if((port <= 0) || (port > 0xffff))
1491       return CURLUE_BAD_PORT_NUMBER;
1492     if(*endp)
1493       /* weirdly provided number, not good! */
1494       return CURLUE_MALFORMED_INPUT;
1495     storep = &u->port;
1496   }
1497   break;
1498   case CURLUPART_PATH:
1499     urlskipslash = TRUE;
1500     storep = &u->path;
1501     break;
1502   case CURLUPART_QUERY:
1503     plusencode = urlencode;
1504     appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1505     equalsencode = appendquery;
1506     storep = &u->query;
1507     break;
1508   case CURLUPART_FRAGMENT:
1509     storep = &u->fragment;
1510     break;
1511   case CURLUPART_URL: {
1512     /*
1513      * Allow a new URL to replace the existing (if any) contents.
1514      *
1515      * If the existing contents is enough for a URL, allow a relative URL to
1516      * replace it.
1517      */
1518     CURLUcode result;
1519     char *oldurl;
1520     char *redired_url;
1521     CURLU *handle2;
1522 
1523     if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN + 1)) {
1524       handle2 = curl_url();
1525       if(!handle2)
1526         return CURLUE_OUT_OF_MEMORY;
1527       result = parseurl(part, handle2, flags);
1528       if(!result)
1529         mv_urlhandle(handle2, u);
1530       else
1531         curl_url_cleanup(handle2);
1532       return result;
1533     }
1534     /* extract the full "old" URL to do the redirect on */
1535     result = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1536     if(result) {
1537       /* couldn't get the old URL, just use the new! */
1538       handle2 = curl_url();
1539       if(!handle2)
1540         return CURLUE_OUT_OF_MEMORY;
1541       result = parseurl(part, handle2, flags);
1542       if(!result)
1543         mv_urlhandle(handle2, u);
1544       else
1545         curl_url_cleanup(handle2);
1546       return result;
1547     }
1548 
1549     /* apply the relative part to create a new URL */
1550     redired_url = concat_url(oldurl, part);
1551     free(oldurl);
1552     if(!redired_url)
1553       return CURLUE_OUT_OF_MEMORY;
1554 
1555     /* now parse the new URL */
1556     handle2 = curl_url();
1557     if(!handle2) {
1558       free(redired_url);
1559       return CURLUE_OUT_OF_MEMORY;
1560     }
1561     result = parseurl(redired_url, handle2, flags);
1562     free(redired_url);
1563     if(!result)
1564       mv_urlhandle(handle2, u);
1565     else
1566       curl_url_cleanup(handle2);
1567     return result;
1568   }
1569   default:
1570     return CURLUE_UNKNOWN_PART;
1571   }
1572   DEBUGASSERT(storep);
1573   {
1574     const char *newp = part;
1575     size_t nalloc = strlen(part);
1576 
1577     if(nalloc > CURL_MAX_INPUT_LENGTH)
1578       /* excessive input length */
1579       return CURLUE_MALFORMED_INPUT;
1580 
1581     if(urlencode) {
1582       const unsigned char *i;
1583       char *o;
1584       char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
1585       if(!enc)
1586         return CURLUE_OUT_OF_MEMORY;
1587       for(i = (const unsigned char *)part, o = enc; *i; i++) {
1588         if((*i == ' ') && plusencode) {
1589           *o = '+';
1590           o++;
1591         }
1592         else if(Curl_isunreserved(*i) ||
1593                 ((*i == '/') && urlskipslash) ||
1594                 ((*i == '=') && equalsencode)) {
1595           if((*i == '=') && equalsencode)
1596             /* only skip the first equals sign */
1597             equalsencode = FALSE;
1598           *o = *i;
1599           o++;
1600         }
1601         else {
1602           msnprintf(o, 4, "%%%02x", *i);
1603           o += 3;
1604         }
1605       }
1606       *o = 0; /* null-terminate */
1607       newp = enc;
1608     }
1609     else {
1610       char *p;
1611       newp = strdup(part);
1612       if(!newp)
1613         return CURLUE_OUT_OF_MEMORY;
1614       p = (char *)newp;
1615       while(*p) {
1616         /* make sure percent encoded are lower case */
1617         if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1618            (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1619           p[1] = (char)TOLOWER(p[1]);
1620           p[2] = (char)TOLOWER(p[2]);
1621           p += 3;
1622         }
1623         else
1624           p++;
1625       }
1626     }
1627 
1628     if(appendquery) {
1629       /* Append the string onto the old query. Add a '&' separator if none is
1630          present at the end of the exsting query already */
1631       size_t querylen = u->query ? strlen(u->query) : 0;
1632       bool addamperand = querylen && (u->query[querylen -1] != '&');
1633       if(querylen) {
1634         size_t newplen = strlen(newp);
1635         char *p = malloc(querylen + addamperand + newplen + 1);
1636         if(!p) {
1637           free((char *)newp);
1638           return CURLUE_OUT_OF_MEMORY;
1639         }
1640         strcpy(p, u->query); /* original query */
1641         if(addamperand)
1642           p[querylen] = '&'; /* ampersand */
1643         strcpy(&p[querylen + addamperand], newp); /* new suffix */
1644         free((char *)newp);
1645         free(*storep);
1646         *storep = p;
1647         return CURLUE_OK;
1648       }
1649     }
1650 
1651     if(what == CURLUPART_HOST) {
1652       if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
1653         /* Skip hostname check, it's allowed to be empty. */
1654       }
1655       else {
1656         if(hostname_check(u, (char *)newp)) {
1657           free((char *)newp);
1658           return CURLUE_MALFORMED_INPUT;
1659         }
1660       }
1661     }
1662 
1663     free(*storep);
1664     *storep = (char *)newp;
1665   }
1666   /* set after the string, to make it not assigned if the allocation above
1667      fails */
1668   if(port)
1669     u->portnum = port;
1670   return CURLUE_OK;
1671 }
1672