1 /* 2 * Copyright 2010 Jacek Caban for CodeWeavers 3 * Copyright 2010 Thomas Mullaly 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public 7 * License as published by the Free Software Foundation; either 8 * version 2.1 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public 16 * License along with this library; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 18 */ 19 20 #include <limits.h> 21 22 #include "urlmon_main.h" 23 #include "wine/debug.h" 24 25 #define NO_SHLWAPI_REG 26 #include "shlwapi.h" 27 28 #include "strsafe.h" 29 30 #define URI_DISPLAY_NO_ABSOLUTE_URI 0x1 31 #define URI_DISPLAY_NO_DEFAULT_PORT_AUTH 0x2 32 33 #define ALLOW_NULL_TERM_SCHEME 0x01 34 #define ALLOW_NULL_TERM_USER_NAME 0x02 35 #define ALLOW_NULL_TERM_PASSWORD 0x04 36 #define ALLOW_BRACKETLESS_IP_LITERAL 0x08 37 #define SKIP_IP_FUTURE_CHECK 0x10 38 #define IGNORE_PORT_DELIMITER 0x20 39 40 #define RAW_URI_FORCE_PORT_DISP 0x1 41 #define RAW_URI_CONVERT_TO_DOS_PATH 0x2 42 43 #define COMBINE_URI_FORCE_FLAG_USE 0x1 44 45 WINE_DEFAULT_DEBUG_CHANNEL(urlmon); 46 47 static const IID IID_IUriObj = {0x4b364760,0x9f51,0x11df,{0x98,0x1c,0x08,0x00,0x20,0x0c,0x9a,0x66}}; 48 49 typedef struct { 50 IUri IUri_iface; 51 IUriBuilderFactory IUriBuilderFactory_iface; 52 IPersistStream IPersistStream_iface; 53 IMarshal IMarshal_iface; 54 55 LONG ref; 56 57 BSTR raw_uri; 58 59 /* Information about the canonicalized URI's buffer. */ 60 WCHAR *canon_uri; 61 DWORD canon_size; 62 DWORD canon_len; 63 BOOL display_modifiers; 64 DWORD create_flags; 65 66 INT scheme_start; 67 DWORD scheme_len; 68 URL_SCHEME scheme_type; 69 70 INT userinfo_start; 71 DWORD userinfo_len; 72 INT userinfo_split; 73 74 INT host_start; 75 DWORD host_len; 76 Uri_HOST_TYPE host_type; 77 78 INT port_offset; 79 DWORD port; 80 BOOL has_port; 81 82 INT authority_start; 83 DWORD authority_len; 84 85 INT domain_offset; 86 87 INT path_start; 88 DWORD path_len; 89 INT extension_offset; 90 91 INT query_start; 92 DWORD query_len; 93 94 INT fragment_start; 95 DWORD fragment_len; 96 } Uri; 97 98 typedef struct { 99 IUriBuilder IUriBuilder_iface; 100 LONG ref; 101 102 Uri *uri; 103 DWORD modified_props; 104 105 WCHAR *fragment; 106 DWORD fragment_len; 107 108 WCHAR *host; 109 DWORD host_len; 110 111 WCHAR *password; 112 DWORD password_len; 113 114 WCHAR *path; 115 DWORD path_len; 116 117 BOOL has_port; 118 DWORD port; 119 120 WCHAR *query; 121 DWORD query_len; 122 123 WCHAR *scheme; 124 DWORD scheme_len; 125 126 WCHAR *username; 127 DWORD username_len; 128 } UriBuilder; 129 130 typedef struct { 131 const WCHAR *str; 132 DWORD len; 133 } h16; 134 135 typedef struct { 136 /* IPv6 addresses can hold up to 8 h16 components. */ 137 h16 components[8]; 138 DWORD h16_count; 139 140 /* An IPv6 can have 1 elision ("::"). */ 141 const WCHAR *elision; 142 143 /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */ 144 const WCHAR *ipv4; 145 DWORD ipv4_len; 146 147 INT components_size; 148 INT elision_size; 149 } ipv6_address; 150 151 typedef struct { 152 BSTR uri; 153 154 BOOL is_relative; 155 BOOL is_opaque; 156 BOOL has_implicit_scheme; 157 BOOL has_implicit_ip; 158 UINT implicit_ipv4; 159 BOOL must_have_path; 160 161 const WCHAR *scheme; 162 DWORD scheme_len; 163 URL_SCHEME scheme_type; 164 165 const WCHAR *username; 166 DWORD username_len; 167 168 const WCHAR *password; 169 DWORD password_len; 170 171 const WCHAR *host; 172 DWORD host_len; 173 Uri_HOST_TYPE host_type; 174 175 BOOL has_ipv6; 176 ipv6_address ipv6_address; 177 178 BOOL has_port; 179 const WCHAR *port; 180 DWORD port_len; 181 DWORD port_value; 182 183 const WCHAR *path; 184 DWORD path_len; 185 186 const WCHAR *query; 187 DWORD query_len; 188 189 const WCHAR *fragment; 190 DWORD fragment_len; 191 } parse_data; 192 193 static const CHAR hexDigits[] = "0123456789ABCDEF"; 194 195 /* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */ 196 static const struct { 197 URL_SCHEME scheme; 198 WCHAR scheme_name[16]; 199 } recognized_schemes[] = { 200 {URL_SCHEME_FTP, {'f','t','p',0}}, 201 {URL_SCHEME_HTTP, {'h','t','t','p',0}}, 202 {URL_SCHEME_GOPHER, {'g','o','p','h','e','r',0}}, 203 {URL_SCHEME_MAILTO, {'m','a','i','l','t','o',0}}, 204 {URL_SCHEME_NEWS, {'n','e','w','s',0}}, 205 {URL_SCHEME_NNTP, {'n','n','t','p',0}}, 206 {URL_SCHEME_TELNET, {'t','e','l','n','e','t',0}}, 207 {URL_SCHEME_WAIS, {'w','a','i','s',0}}, 208 {URL_SCHEME_FILE, {'f','i','l','e',0}}, 209 {URL_SCHEME_MK, {'m','k',0}}, 210 {URL_SCHEME_HTTPS, {'h','t','t','p','s',0}}, 211 {URL_SCHEME_SHELL, {'s','h','e','l','l',0}}, 212 {URL_SCHEME_SNEWS, {'s','n','e','w','s',0}}, 213 {URL_SCHEME_LOCAL, {'l','o','c','a','l',0}}, 214 {URL_SCHEME_JAVASCRIPT, {'j','a','v','a','s','c','r','i','p','t',0}}, 215 {URL_SCHEME_VBSCRIPT, {'v','b','s','c','r','i','p','t',0}}, 216 {URL_SCHEME_ABOUT, {'a','b','o','u','t',0}}, 217 {URL_SCHEME_RES, {'r','e','s',0}}, 218 {URL_SCHEME_MSSHELLROOTED, {'m','s','-','s','h','e','l','l','-','r','o','o','t','e','d',0}}, 219 {URL_SCHEME_MSSHELLIDLIST, {'m','s','-','s','h','e','l','l','-','i','d','l','i','s','t',0}}, 220 {URL_SCHEME_MSHELP, {'h','c','p',0}}, 221 {URL_SCHEME_WILDCARD, {'*',0}} 222 }; 223 224 /* List of default ports Windows recognizes. */ 225 static const struct { 226 URL_SCHEME scheme; 227 USHORT port; 228 } default_ports[] = { 229 {URL_SCHEME_FTP, 21}, 230 {URL_SCHEME_HTTP, 80}, 231 {URL_SCHEME_GOPHER, 70}, 232 {URL_SCHEME_NNTP, 119}, 233 {URL_SCHEME_TELNET, 23}, 234 {URL_SCHEME_WAIS, 210}, 235 {URL_SCHEME_HTTPS, 443}, 236 }; 237 238 /* List of 3-character top level domain names Windows seems to recognize. 239 * There might be more, but, these are the only ones I've found so far. 240 */ 241 static const struct { 242 WCHAR tld_name[4]; 243 } recognized_tlds[] = { 244 {{'c','o','m',0}}, 245 {{'e','d','u',0}}, 246 {{'g','o','v',0}}, 247 {{'i','n','t',0}}, 248 {{'m','i','l',0}}, 249 {{'n','e','t',0}}, 250 {{'o','r','g',0}} 251 }; 252 253 static Uri *get_uri_obj(IUri *uri) 254 { 255 Uri *ret; 256 HRESULT hres; 257 258 hres = IUri_QueryInterface(uri, &IID_IUriObj, (void**)&ret); 259 return SUCCEEDED(hres) ? ret : NULL; 260 } 261 262 static inline BOOL is_alpha(WCHAR val) { 263 return ((val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z')); 264 } 265 266 static inline BOOL is_num(WCHAR val) { 267 return (val >= '0' && val <= '9'); 268 } 269 270 static inline BOOL is_drive_path(const WCHAR *str) { 271 return (is_alpha(str[0]) && (str[1] == ':' || str[1] == '|')); 272 } 273 274 static inline BOOL is_unc_path(const WCHAR *str) { 275 return (str[0] == '\\' && str[1] == '\\'); 276 } 277 278 static inline BOOL is_forbidden_dos_path_char(WCHAR val) { 279 return (val == '>' || val == '<' || val == '\"'); 280 } 281 282 /* A URI is implicitly a file path if it begins with 283 * a drive letter (e.g. X:) or starts with "\\" (UNC path). 284 */ 285 static inline BOOL is_implicit_file_path(const WCHAR *str) { 286 return (is_unc_path(str) || (is_alpha(str[0]) && str[1] == ':')); 287 } 288 289 /* Checks if the URI is a hierarchical URI. A hierarchical 290 * URI is one that has "//" after the scheme. 291 */ 292 static BOOL check_hierarchical(const WCHAR **ptr) { 293 const WCHAR *start = *ptr; 294 295 if(**ptr != '/') 296 return FALSE; 297 298 ++(*ptr); 299 if(**ptr != '/') { 300 *ptr = start; 301 return FALSE; 302 } 303 304 ++(*ptr); 305 return TRUE; 306 } 307 308 /* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" */ 309 static inline BOOL is_unreserved(WCHAR val) { 310 return (is_alpha(val) || is_num(val) || val == '-' || val == '.' || 311 val == '_' || val == '~'); 312 } 313 314 /* sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 315 * / "*" / "+" / "," / ";" / "=" 316 */ 317 static inline BOOL is_subdelim(WCHAR val) { 318 return (val == '!' || val == '$' || val == '&' || 319 val == '\'' || val == '(' || val == ')' || 320 val == '*' || val == '+' || val == ',' || 321 val == ';' || val == '='); 322 } 323 324 /* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" */ 325 static inline BOOL is_gendelim(WCHAR val) { 326 return (val == ':' || val == '/' || val == '?' || 327 val == '#' || val == '[' || val == ']' || 328 val == '@'); 329 } 330 331 /* Characters that delimit the end of the authority 332 * section of a URI. Sometimes a '\\' is considered 333 * an authority delimiter. 334 */ 335 static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) { 336 return (val == '#' || val == '/' || val == '?' || 337 val == '\0' || (acceptSlash && val == '\\')); 338 } 339 340 /* reserved = gen-delims / sub-delims */ 341 static inline BOOL is_reserved(WCHAR val) { 342 return (is_subdelim(val) || is_gendelim(val)); 343 } 344 345 static inline BOOL is_hexdigit(WCHAR val) { 346 return ((val >= 'a' && val <= 'f') || 347 (val >= 'A' && val <= 'F') || 348 (val >= '0' && val <= '9')); 349 } 350 351 static inline BOOL is_path_delim(URL_SCHEME scheme, WCHAR val) { 352 return (!val || (val == '#' && scheme != URL_SCHEME_FILE) || val == '?'); 353 } 354 355 static inline BOOL is_slash(WCHAR c) 356 { 357 return c == '/' || c == '\\'; 358 } 359 360 static inline BOOL is_ascii(WCHAR c) 361 { 362 return c < 0x80; 363 } 364 365 static BOOL is_default_port(URL_SCHEME scheme, DWORD port) { 366 DWORD i; 367 368 for(i = 0; i < ARRAY_SIZE(default_ports); ++i) { 369 if(default_ports[i].scheme == scheme && default_ports[i].port) 370 return TRUE; 371 } 372 373 return FALSE; 374 } 375 376 /* List of schemes types Windows seems to expect to be hierarchical. */ 377 static inline BOOL is_hierarchical_scheme(URL_SCHEME type) { 378 return(type == URL_SCHEME_HTTP || type == URL_SCHEME_FTP || 379 type == URL_SCHEME_GOPHER || type == URL_SCHEME_NNTP || 380 type == URL_SCHEME_TELNET || type == URL_SCHEME_WAIS || 381 type == URL_SCHEME_FILE || type == URL_SCHEME_HTTPS || 382 type == URL_SCHEME_RES); 383 } 384 385 /* Checks if 'flags' contains an invalid combination of Uri_CREATE flags. */ 386 static inline BOOL has_invalid_flag_combination(DWORD flags) { 387 return((flags & Uri_CREATE_DECODE_EXTRA_INFO && flags & Uri_CREATE_NO_DECODE_EXTRA_INFO) || 388 (flags & Uri_CREATE_CANONICALIZE && flags & Uri_CREATE_NO_CANONICALIZE) || 389 (flags & Uri_CREATE_CRACK_UNKNOWN_SCHEMES && flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES) || 390 (flags & Uri_CREATE_PRE_PROCESS_HTML_URI && flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI) || 391 (flags & Uri_CREATE_IE_SETTINGS && flags & Uri_CREATE_NO_IE_SETTINGS)); 392 } 393 394 /* Applies each default Uri_CREATE flags to 'flags' if it 395 * doesn't cause a flag conflict. 396 */ 397 static void apply_default_flags(DWORD *flags) { 398 if(!(*flags & Uri_CREATE_NO_CANONICALIZE)) 399 *flags |= Uri_CREATE_CANONICALIZE; 400 if(!(*flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) 401 *flags |= Uri_CREATE_DECODE_EXTRA_INFO; 402 if(!(*flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) 403 *flags |= Uri_CREATE_CRACK_UNKNOWN_SCHEMES; 404 if(!(*flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI)) 405 *flags |= Uri_CREATE_PRE_PROCESS_HTML_URI; 406 if(!(*flags & Uri_CREATE_IE_SETTINGS)) 407 *flags |= Uri_CREATE_NO_IE_SETTINGS; 408 } 409 410 /* Determines if the URI is hierarchical using the information already parsed into 411 * data and using the current location of parsing in the URI string. 412 * 413 * Windows considers a URI hierarchical if one of the following is true: 414 * A.) It's a wildcard scheme. 415 * B.) It's an implicit file scheme. 416 * C.) It's a known hierarchical scheme and it has two '\\' after the scheme name. 417 * (the '\\' will be converted into "//" during canonicalization). 418 * D.) "//" appears after the scheme name (or at the beginning if no scheme is given). 419 */ 420 static inline BOOL is_hierarchical_uri(const WCHAR **ptr, const parse_data *data) { 421 const WCHAR *start = *ptr; 422 423 if(data->scheme_type == URL_SCHEME_WILDCARD) 424 return TRUE; 425 else if(data->scheme_type == URL_SCHEME_FILE && data->has_implicit_scheme) 426 return TRUE; 427 else if(is_hierarchical_scheme(data->scheme_type) && (*ptr)[0] == '\\' && (*ptr)[1] == '\\') { 428 *ptr += 2; 429 return TRUE; 430 } else if(data->scheme_type != URL_SCHEME_MAILTO && check_hierarchical(ptr)) 431 return TRUE; 432 433 *ptr = start; 434 return FALSE; 435 } 436 437 /* Computes the size of the given IPv6 address. 438 * Each h16 component is 16 bits. If there is an IPv4 address, it's 439 * 32 bits. If there's an elision it can be 16 to 128 bits, depending 440 * on the number of other components. 441 * 442 * Modeled after google-url's CheckIPv6ComponentsSize function 443 */ 444 static void compute_ipv6_comps_size(ipv6_address *address) { 445 address->components_size = address->h16_count * 2; 446 447 if(address->ipv4) 448 /* IPv4 address is 4 bytes. */ 449 address->components_size += 4; 450 451 if(address->elision) { 452 /* An elision can be anywhere from 2 bytes up to 16 bytes. 453 * Its size depends on the size of the h16 and IPv4 components. 454 */ 455 address->elision_size = 16 - address->components_size; 456 if(address->elision_size < 2) 457 address->elision_size = 2; 458 } else 459 address->elision_size = 0; 460 } 461 462 /* Taken from dlls/jscript/lex.c */ 463 static int hex_to_int(WCHAR val) { 464 if(val >= '0' && val <= '9') 465 return val - '0'; 466 else if(val >= 'a' && val <= 'f') 467 return val - 'a' + 10; 468 else if(val >= 'A' && val <= 'F') 469 return val - 'A' + 10; 470 471 return -1; 472 } 473 474 /* Helper function for converting a percent encoded string 475 * representation of a WCHAR value into its actual WCHAR value. If 476 * the two characters following the '%' aren't valid hex values then 477 * this function returns the NULL character. 478 * 479 * E.g. 480 * "%2E" will result in '.' being returned by this function. 481 */ 482 static WCHAR decode_pct_val(const WCHAR *ptr) { 483 WCHAR ret = '\0'; 484 485 if(*ptr == '%' && is_hexdigit(*(ptr + 1)) && is_hexdigit(*(ptr + 2))) { 486 INT a = hex_to_int(*(ptr + 1)); 487 INT b = hex_to_int(*(ptr + 2)); 488 489 ret = a << 4; 490 ret += b; 491 } 492 493 return ret; 494 } 495 496 /* Helper function for percent encoding a given character 497 * and storing the encoded value into a given buffer (dest). 498 * 499 * It's up to the calling function to ensure that there is 500 * at least enough space in 'dest' for the percent encoded 501 * value to be stored (so dest + 3 spaces available). 502 */ 503 static inline void pct_encode_val(WCHAR val, WCHAR *dest) { 504 dest[0] = '%'; 505 dest[1] = hexDigits[(val >> 4) & 0xf]; 506 dest[2] = hexDigits[val & 0xf]; 507 } 508 509 /* Attempts to parse the domain name from the host. 510 * 511 * This function also includes the Top-level Domain (TLD) name 512 * of the host when it tries to find the domain name. If it finds 513 * a valid domain name it will assign 'domain_start' the offset 514 * into 'host' where the domain name starts. 515 * 516 * It's implied that if there is a domain name its range is: 517 * [host+domain_start, host+host_len). 518 */ 519 void find_domain_name(const WCHAR *host, DWORD host_len, 520 INT *domain_start) { 521 const WCHAR *last_tld, *sec_last_tld, *end; 522 523 end = host+host_len-1; 524 525 *domain_start = -1; 526 527 /* There has to be at least enough room for a '.' followed by a 528 * 3-character TLD for a domain to even exist in the host name. 529 */ 530 if(host_len < 4) 531 return; 532 533 last_tld = memrchrW(host, '.', host_len); 534 if(!last_tld) 535 /* http://hostname -> has no domain name. */ 536 return; 537 538 sec_last_tld = memrchrW(host, '.', last_tld-host); 539 if(!sec_last_tld) { 540 /* If the '.' is at the beginning of the host there 541 * has to be at least 3 characters in the TLD for it 542 * to be valid. 543 * Ex: .com -> .com as the domain name. 544 * .co -> has no domain name. 545 */ 546 if(last_tld-host == 0) { 547 if(end-(last_tld-1) < 3) 548 return; 549 } else if(last_tld-host == 3) { 550 DWORD i; 551 552 /* If there are three characters in front of last_tld and 553 * they are on the list of recognized TLDs, then this 554 * host doesn't have a domain (since the host only contains 555 * a TLD name. 556 * Ex: edu.uk -> has no domain name. 557 * foo.uk -> foo.uk as the domain name. 558 */ 559 for(i = 0; i < ARRAY_SIZE(recognized_tlds); ++i) { 560 if(!StrCmpNIW(host, recognized_tlds[i].tld_name, 3)) 561 return; 562 } 563 } else if(last_tld-host < 3) 564 /* Anything less than 3 characters is considered part 565 * of the TLD name. 566 * Ex: ak.uk -> Has no domain name. 567 */ 568 return; 569 570 /* Otherwise the domain name is the whole host name. */ 571 *domain_start = 0; 572 } else if(end+1-last_tld > 3) { 573 /* If the last_tld has more than 3 characters, then it's automatically 574 * considered the TLD of the domain name. 575 * Ex: www.winehq.org.uk.test -> uk.test as the domain name. 576 */ 577 *domain_start = (sec_last_tld+1)-host; 578 } else if(last_tld - (sec_last_tld+1) < 4) { 579 DWORD i; 580 /* If the sec_last_tld is 3 characters long it HAS to be on the list of 581 * recognized to still be considered part of the TLD name, otherwise 582 * it's considered the domain name. 583 * Ex: www.google.com.uk -> google.com.uk as the domain name. 584 * www.google.foo.uk -> foo.uk as the domain name. 585 */ 586 if(last_tld - (sec_last_tld+1) == 3) { 587 for(i = 0; i < ARRAY_SIZE(recognized_tlds); ++i) { 588 if(!StrCmpNIW(sec_last_tld+1, recognized_tlds[i].tld_name, 3)) { 589 const WCHAR *domain = memrchrW(host, '.', sec_last_tld-host); 590 591 if(!domain) 592 *domain_start = 0; 593 else 594 *domain_start = (domain+1) - host; 595 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start, 596 (host+host_len)-(host+*domain_start))); 597 return; 598 } 599 } 600 601 *domain_start = (sec_last_tld+1)-host; 602 } else { 603 /* Since the sec_last_tld is less than 3 characters it's considered 604 * part of the TLD. 605 * Ex: www.google.fo.uk -> google.fo.uk as the domain name. 606 */ 607 const WCHAR *domain = memrchrW(host, '.', sec_last_tld-host); 608 609 if(!domain) 610 *domain_start = 0; 611 else 612 *domain_start = (domain+1) - host; 613 } 614 } else { 615 /* The second to last TLD has more than 3 characters making it 616 * the domain name. 617 * Ex: www.google.test.us -> test.us as the domain name. 618 */ 619 *domain_start = (sec_last_tld+1)-host; 620 } 621 622 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start, 623 (host+host_len)-(host+*domain_start))); 624 } 625 626 /* Removes the dot segments from a hierarchical URIs path component. This 627 * function performs the removal in place. 628 * 629 * This function returns the new length of the path string. 630 */ 631 static DWORD remove_dot_segments(WCHAR *path, DWORD path_len) { 632 WCHAR *out = path; 633 const WCHAR *in = out; 634 const WCHAR *end = out + path_len; 635 DWORD len; 636 637 while(in < end) { 638 /* Move the first path segment in the input buffer to the end of 639 * the output buffer, and any subsequent characters up to, including 640 * the next "/" character (if any) or the end of the input buffer. 641 */ 642 while(in < end && !is_slash(*in)) 643 *out++ = *in++; 644 if(in == end) 645 break; 646 *out++ = *in++; 647 648 while(in < end) { 649 if(*in != '.') 650 break; 651 652 /* Handle ending "/." */ 653 if(in + 1 == end) { 654 ++in; 655 break; 656 } 657 658 /* Handle "/./" */ 659 if(is_slash(in[1])) { 660 in += 2; 661 continue; 662 } 663 664 /* If we don't have "/../" or ending "/.." */ 665 if(in[1] != '.' || (in + 2 != end && !is_slash(in[2]))) 666 break; 667 668 /* Find the slash preceding out pointer and move out pointer to it */ 669 if(out > path+1 && is_slash(*--out)) 670 --out; 671 while(out > path && !is_slash(*(--out))); 672 if(is_slash(*out)) 673 ++out; 674 in += 2; 675 if(in != end) 676 ++in; 677 } 678 } 679 680 len = out - path; 681 TRACE("(%p %d): Path after dot segments removed %s len=%d\n", path, path_len, 682 debugstr_wn(path, len), len); 683 return len; 684 } 685 686 /* Attempts to find the file extension in a given path. */ 687 static INT find_file_extension(const WCHAR *path, DWORD path_len) { 688 const WCHAR *end; 689 690 for(end = path+path_len-1; end >= path && *end != '/' && *end != '\\'; --end) { 691 if(*end == '.') 692 return end-path; 693 } 694 695 return -1; 696 } 697 698 /* Computes the location where the elision should occur in the IPv6 699 * address using the numerical values of each component stored in 700 * 'values'. If the address shouldn't contain an elision then 'index' 701 * is assigned -1 as its value. Otherwise 'index' will contain the 702 * starting index (into values) where the elision should be, and 'count' 703 * will contain the number of cells the elision covers. 704 * 705 * NOTES: 706 * Windows will expand an elision if the elision only represents one h16 707 * component of the address. 708 * 709 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7] 710 * 711 * If the IPv6 address contains an IPv4 address, the IPv4 address is also 712 * considered for being included as part of an elision if all its components 713 * are zeros. 714 * 715 * Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::] 716 */ 717 static void compute_elision_location(const ipv6_address *address, const USHORT values[8], 718 INT *index, DWORD *count) { 719 DWORD i, max_len, cur_len; 720 INT max_index, cur_index; 721 722 max_len = cur_len = 0; 723 max_index = cur_index = -1; 724 for(i = 0; i < 8; ++i) { 725 BOOL check_ipv4 = (address->ipv4 && i == 6); 726 BOOL is_end = (check_ipv4 || i == 7); 727 728 if(check_ipv4) { 729 /* Check if the IPv4 address contains only zeros. */ 730 if(values[i] == 0 && values[i+1] == 0) { 731 if(cur_index == -1) 732 cur_index = i; 733 734 cur_len += 2; 735 ++i; 736 } 737 } else if(values[i] == 0) { 738 if(cur_index == -1) 739 cur_index = i; 740 741 ++cur_len; 742 } 743 744 if(is_end || values[i] != 0) { 745 /* We only consider it for an elision if it's 746 * more than 1 component long. 747 */ 748 if(cur_len > 1 && cur_len > max_len) { 749 /* Found the new elision location. */ 750 max_len = cur_len; 751 max_index = cur_index; 752 } 753 754 /* Reset the current range for the next range of zeros. */ 755 cur_index = -1; 756 cur_len = 0; 757 } 758 } 759 760 *index = max_index; 761 *count = max_len; 762 } 763 764 /* Removes all the leading and trailing white spaces or 765 * control characters from the URI and removes all control 766 * characters inside of the URI string. 767 */ 768 static BSTR pre_process_uri(LPCWSTR uri) { 769 const WCHAR *start, *end, *ptr; 770 WCHAR *ptr2; 771 DWORD len; 772 BSTR ret; 773 774 start = uri; 775 /* Skip leading controls and whitespace. */ 776 while(*start && (iscntrlW(*start) || isspaceW(*start))) ++start; 777 778 /* URI consisted only of control/whitespace. */ 779 if(!*start) 780 return SysAllocStringLen(NULL, 0); 781 782 end = start + strlenW(start); 783 while(--end > start && (iscntrlW(*end) || isspaceW(*end))); 784 785 len = ++end - start; 786 for(ptr = start; ptr < end; ptr++) { 787 if(iscntrlW(*ptr)) 788 len--; 789 } 790 791 ret = SysAllocStringLen(NULL, len); 792 if(!ret) 793 return NULL; 794 795 for(ptr = start, ptr2=ret; ptr < end; ptr++) { 796 if(!iscntrlW(*ptr)) 797 *ptr2++ = *ptr; 798 } 799 800 return ret; 801 } 802 803 /* Converts the specified IPv4 address into an uint value. 804 * 805 * This function assumes that the IPv4 address has already been validated. 806 */ 807 static UINT ipv4toui(const WCHAR *ip, DWORD len) { 808 UINT ret = 0; 809 DWORD comp_value = 0; 810 const WCHAR *ptr; 811 812 for(ptr = ip; ptr < ip+len; ++ptr) { 813 if(*ptr == '.') { 814 ret <<= 8; 815 ret += comp_value; 816 comp_value = 0; 817 } else 818 comp_value = comp_value*10 + (*ptr-'0'); 819 } 820 821 ret <<= 8; 822 ret += comp_value; 823 824 return ret; 825 } 826 827 /* Converts an IPv4 address in numerical form into its fully qualified 828 * string form. This function returns the number of characters written 829 * to 'dest'. If 'dest' is NULL this function will return the number of 830 * characters that would have been written. 831 * 832 * It's up to the caller to ensure there's enough space in 'dest' for the 833 * address. 834 */ 835 static DWORD ui2ipv4(WCHAR *dest, UINT address) { 836 static const WCHAR formatW[] = 837 {'%','u','.','%','u','.','%','u','.','%','u',0}; 838 DWORD ret = 0; 839 UCHAR digits[4]; 840 841 digits[0] = (address >> 24) & 0xff; 842 digits[1] = (address >> 16) & 0xff; 843 digits[2] = (address >> 8) & 0xff; 844 digits[3] = address & 0xff; 845 846 if(!dest) { 847 WCHAR tmp[16]; 848 ret = sprintfW(tmp, formatW, digits[0], digits[1], digits[2], digits[3]); 849 } else 850 ret = sprintfW(dest, formatW, digits[0], digits[1], digits[2], digits[3]); 851 852 return ret; 853 } 854 855 static DWORD ui2str(WCHAR *dest, UINT value) { 856 static const WCHAR formatW[] = {'%','u',0}; 857 DWORD ret = 0; 858 859 if(!dest) { 860 WCHAR tmp[11]; 861 ret = sprintfW(tmp, formatW, value); 862 } else 863 ret = sprintfW(dest, formatW, value); 864 865 return ret; 866 } 867 868 /* Converts a h16 component (from an IPv6 address) into its 869 * numerical value. 870 * 871 * This function assumes that the h16 component has already been validated. 872 */ 873 static USHORT h16tous(h16 component) { 874 DWORD i; 875 USHORT ret = 0; 876 877 for(i = 0; i < component.len; ++i) { 878 ret <<= 4; 879 ret += hex_to_int(component.str[i]); 880 } 881 882 return ret; 883 } 884 885 /* Converts an IPv6 address into its 128 bits (16 bytes) numerical value. 886 * 887 * This function assumes that the ipv6_address has already been validated. 888 */ 889 static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) { 890 DWORD i, cur_component = 0; 891 BOOL already_passed_elision = FALSE; 892 893 for(i = 0; i < address->h16_count; ++i) { 894 if(address->elision) { 895 if(address->components[i].str > address->elision && !already_passed_elision) { 896 /* Means we just passed the elision and need to add its values to 897 * 'number' before we do anything else. 898 */ 899 INT j; 900 for(j = 0; j < address->elision_size; j+=2) 901 number[cur_component++] = 0; 902 903 already_passed_elision = TRUE; 904 } 905 } 906 907 number[cur_component++] = h16tous(address->components[i]); 908 } 909 910 /* Case when the elision appears after the h16 components. */ 911 if(!already_passed_elision && address->elision) { 912 INT j; 913 for(j = 0; j < address->elision_size; j+=2) 914 number[cur_component++] = 0; 915 } 916 917 if(address->ipv4) { 918 UINT value = ipv4toui(address->ipv4, address->ipv4_len); 919 920 if(cur_component != 6) { 921 ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component); 922 return FALSE; 923 } 924 925 number[cur_component++] = (value >> 16) & 0xffff; 926 number[cur_component] = value & 0xffff; 927 } 928 929 return TRUE; 930 } 931 932 /* Checks if the characters pointed to by 'ptr' are 933 * a percent encoded data octet. 934 * 935 * pct-encoded = "%" HEXDIG HEXDIG 936 */ 937 static BOOL check_pct_encoded(const WCHAR **ptr) { 938 const WCHAR *start = *ptr; 939 940 if(**ptr != '%') 941 return FALSE; 942 943 ++(*ptr); 944 if(!is_hexdigit(**ptr)) { 945 *ptr = start; 946 return FALSE; 947 } 948 949 ++(*ptr); 950 if(!is_hexdigit(**ptr)) { 951 *ptr = start; 952 return FALSE; 953 } 954 955 ++(*ptr); 956 return TRUE; 957 } 958 959 /* dec-octet = DIGIT ; 0-9 960 * / %x31-39 DIGIT ; 10-99 961 * / "1" 2DIGIT ; 100-199 962 * / "2" %x30-34 DIGIT ; 200-249 963 * / "25" %x30-35 ; 250-255 964 */ 965 static BOOL check_dec_octet(const WCHAR **ptr) { 966 const WCHAR *c1, *c2, *c3; 967 968 c1 = *ptr; 969 /* A dec-octet must be at least 1 digit long. */ 970 if(*c1 < '0' || *c1 > '9') 971 return FALSE; 972 973 ++(*ptr); 974 975 c2 = *ptr; 976 /* Since the 1-digit requirement was met, it doesn't 977 * matter if this is a DIGIT value, it's considered a 978 * dec-octet. 979 */ 980 if(*c2 < '0' || *c2 > '9') 981 return TRUE; 982 983 ++(*ptr); 984 985 c3 = *ptr; 986 /* Same explanation as above. */ 987 if(*c3 < '0' || *c3 > '9') 988 return TRUE; 989 990 /* Anything > 255 isn't a valid IP dec-octet. */ 991 if(*c1 >= '2' && *c2 >= '5' && *c3 >= '5') { 992 *ptr = c1; 993 return FALSE; 994 } 995 996 ++(*ptr); 997 return TRUE; 998 } 999 1000 /* Checks if there is an implicit IPv4 address in the host component of the URI. 1001 * The max value of an implicit IPv4 address is UINT_MAX. 1002 * 1003 * Ex: 1004 * "234567" would be considered an implicit IPv4 address. 1005 */ 1006 static BOOL check_implicit_ipv4(const WCHAR **ptr, UINT *val) { 1007 const WCHAR *start = *ptr; 1008 ULONGLONG ret = 0; 1009 *val = 0; 1010 1011 while(is_num(**ptr)) { 1012 ret = ret*10 + (**ptr - '0'); 1013 1014 if(ret > UINT_MAX) { 1015 *ptr = start; 1016 return FALSE; 1017 } 1018 ++(*ptr); 1019 } 1020 1021 if(*ptr == start) 1022 return FALSE; 1023 1024 *val = ret; 1025 return TRUE; 1026 } 1027 1028 /* Checks if the string contains an IPv4 address. 1029 * 1030 * This function has a strict mode or a non-strict mode of operation 1031 * When 'strict' is set to FALSE this function will return TRUE if 1032 * the string contains at least 'dec-octet "." dec-octet' since partial 1033 * IPv4 addresses will be normalized out into full IPv4 addresses. When 1034 * 'strict' is set this function expects there to be a full IPv4 address. 1035 * 1036 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 1037 */ 1038 static BOOL check_ipv4address(const WCHAR **ptr, BOOL strict) { 1039 const WCHAR *start = *ptr; 1040 1041 if(!check_dec_octet(ptr)) { 1042 *ptr = start; 1043 return FALSE; 1044 } 1045 1046 if(**ptr != '.') { 1047 *ptr = start; 1048 return FALSE; 1049 } 1050 1051 ++(*ptr); 1052 if(!check_dec_octet(ptr)) { 1053 *ptr = start; 1054 return FALSE; 1055 } 1056 1057 if(**ptr != '.') { 1058 if(strict) { 1059 *ptr = start; 1060 return FALSE; 1061 } else 1062 return TRUE; 1063 } 1064 1065 ++(*ptr); 1066 if(!check_dec_octet(ptr)) { 1067 *ptr = start; 1068 return FALSE; 1069 } 1070 1071 if(**ptr != '.') { 1072 if(strict) { 1073 *ptr = start; 1074 return FALSE; 1075 } else 1076 return TRUE; 1077 } 1078 1079 ++(*ptr); 1080 if(!check_dec_octet(ptr)) { 1081 *ptr = start; 1082 return FALSE; 1083 } 1084 1085 /* Found a four digit ip address. */ 1086 return TRUE; 1087 } 1088 /* Tries to parse the scheme name of the URI. 1089 * 1090 * scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896. 1091 * NOTE: Windows accepts a number as the first character of a scheme. 1092 */ 1093 static BOOL parse_scheme_name(const WCHAR **ptr, parse_data *data, DWORD extras) { 1094 const WCHAR *start = *ptr; 1095 1096 data->scheme = NULL; 1097 data->scheme_len = 0; 1098 1099 while(**ptr) { 1100 if(**ptr == '*' && *ptr == start) { 1101 /* Might have found a wildcard scheme. If it is the next 1102 * char has to be a ':' for it to be a valid URI 1103 */ 1104 ++(*ptr); 1105 break; 1106 } else if(!is_num(**ptr) && !is_alpha(**ptr) && **ptr != '+' && 1107 **ptr != '-' && **ptr != '.') 1108 break; 1109 1110 (*ptr)++; 1111 } 1112 1113 if(*ptr == start) 1114 return FALSE; 1115 1116 /* Schemes must end with a ':' */ 1117 if(**ptr != ':' && !((extras & ALLOW_NULL_TERM_SCHEME) && !**ptr)) { 1118 *ptr = start; 1119 return FALSE; 1120 } 1121 1122 data->scheme = start; 1123 data->scheme_len = *ptr - start; 1124 1125 ++(*ptr); 1126 return TRUE; 1127 } 1128 1129 /* Tries to deduce the corresponding URL_SCHEME for the given URI. Stores 1130 * the deduced URL_SCHEME in data->scheme_type. 1131 */ 1132 static BOOL parse_scheme_type(parse_data *data) { 1133 /* If there's scheme data then see if it's a recognized scheme. */ 1134 if(data->scheme && data->scheme_len) { 1135 DWORD i; 1136 1137 for(i = 0; i < ARRAY_SIZE(recognized_schemes); ++i) { 1138 if(lstrlenW(recognized_schemes[i].scheme_name) == data->scheme_len) { 1139 /* Has to be a case insensitive compare. */ 1140 if(!StrCmpNIW(recognized_schemes[i].scheme_name, data->scheme, data->scheme_len)) { 1141 data->scheme_type = recognized_schemes[i].scheme; 1142 return TRUE; 1143 } 1144 } 1145 } 1146 1147 /* If we get here it means it's not a recognized scheme. */ 1148 data->scheme_type = URL_SCHEME_UNKNOWN; 1149 return TRUE; 1150 } else if(data->is_relative) { 1151 /* Relative URI's have no scheme. */ 1152 data->scheme_type = URL_SCHEME_UNKNOWN; 1153 return TRUE; 1154 } else { 1155 /* Should never reach here! what happened... */ 1156 FIXME("(%p): Unable to determine scheme type for URI %s\n", data, debugstr_w(data->uri)); 1157 return FALSE; 1158 } 1159 } 1160 1161 /* Tries to parse (or deduce) the scheme_name of a URI. If it can't 1162 * parse a scheme from the URI it will try to deduce the scheme_name and scheme_type 1163 * using the flags specified in 'flags' (if any). Flags that affect how this function 1164 * operates are the Uri_CREATE_ALLOW_* flags. 1165 * 1166 * All parsed/deduced information will be stored in 'data' when the function returns. 1167 * 1168 * Returns TRUE if it was able to successfully parse the information. 1169 */ 1170 static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1171 static const WCHAR fileW[] = {'f','i','l','e',0}; 1172 static const WCHAR wildcardW[] = {'*',0}; 1173 1174 /* First check to see if the uri could implicitly be a file path. */ 1175 if(is_implicit_file_path(*ptr)) { 1176 if(flags & Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME) { 1177 data->scheme = fileW; 1178 data->scheme_len = lstrlenW(fileW); 1179 data->has_implicit_scheme = TRUE; 1180 1181 TRACE("(%p %p %x): URI is an implicit file path.\n", ptr, data, flags); 1182 } else { 1183 /* Windows does not consider anything that can implicitly be a file 1184 * path to be a valid URI if the ALLOW_IMPLICIT_FILE_SCHEME flag is not set... 1185 */ 1186 TRACE("(%p %p %x): URI is implicitly a file path, but, the ALLOW_IMPLICIT_FILE_SCHEME flag wasn't set.\n", 1187 ptr, data, flags); 1188 return FALSE; 1189 } 1190 } else if(!parse_scheme_name(ptr, data, extras)) { 1191 /* No scheme was found, this means it could be: 1192 * a) an implicit Wildcard scheme 1193 * b) a relative URI 1194 * c) an invalid URI. 1195 */ 1196 if(flags & Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME) { 1197 data->scheme = wildcardW; 1198 data->scheme_len = lstrlenW(wildcardW); 1199 data->has_implicit_scheme = TRUE; 1200 1201 TRACE("(%p %p %x): URI is an implicit wildcard scheme.\n", ptr, data, flags); 1202 } else if (flags & Uri_CREATE_ALLOW_RELATIVE) { 1203 data->is_relative = TRUE; 1204 TRACE("(%p %p %x): URI is relative.\n", ptr, data, flags); 1205 } else { 1206 TRACE("(%p %p %x): Malformed URI found. Unable to deduce scheme name.\n", ptr, data, flags); 1207 return FALSE; 1208 } 1209 } 1210 1211 if(!data->is_relative) 1212 TRACE("(%p %p %x): Found scheme=%s scheme_len=%d\n", ptr, data, flags, 1213 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len); 1214 1215 if(!parse_scheme_type(data)) 1216 return FALSE; 1217 1218 TRACE("(%p %p %x): Assigned %d as the URL_SCHEME.\n", ptr, data, flags, data->scheme_type); 1219 return TRUE; 1220 } 1221 1222 static BOOL parse_username(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1223 data->username = *ptr; 1224 1225 while(**ptr != ':' && **ptr != '@') { 1226 if(**ptr == '%') { 1227 if(!check_pct_encoded(ptr)) { 1228 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 1229 *ptr = data->username; 1230 data->username = NULL; 1231 return FALSE; 1232 } 1233 } else 1234 continue; 1235 } else if(extras & ALLOW_NULL_TERM_USER_NAME && !**ptr) 1236 break; 1237 else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 1238 *ptr = data->username; 1239 data->username = NULL; 1240 return FALSE; 1241 } 1242 1243 ++(*ptr); 1244 } 1245 1246 data->username_len = *ptr - data->username; 1247 return TRUE; 1248 } 1249 1250 static BOOL parse_password(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1251 data->password = *ptr; 1252 1253 while(**ptr != '@') { 1254 if(**ptr == '%') { 1255 if(!check_pct_encoded(ptr)) { 1256 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 1257 *ptr = data->password; 1258 data->password = NULL; 1259 return FALSE; 1260 } 1261 } else 1262 continue; 1263 } else if(extras & ALLOW_NULL_TERM_PASSWORD && !**ptr) 1264 break; 1265 else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 1266 *ptr = data->password; 1267 data->password = NULL; 1268 return FALSE; 1269 } 1270 1271 ++(*ptr); 1272 } 1273 1274 data->password_len = *ptr - data->password; 1275 return TRUE; 1276 } 1277 1278 /* Parses the userinfo part of the URI (if it exists). The userinfo field of 1279 * a URI can consist of "username:password@", or just "username@". 1280 * 1281 * RFC def: 1282 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 1283 * 1284 * NOTES: 1285 * 1) If there is more than one ':' in the userinfo part of the URI Windows 1286 * uses the first occurrence of ':' to delimit the username and password 1287 * components. 1288 * 1289 * ex: 1290 * ftp://user:pass:word@winehq.org 1291 * 1292 * would yield "user" as the username and "pass:word" as the password. 1293 * 1294 * 2) Windows allows any character to appear in the "userinfo" part of 1295 * a URI, as long as it's not an authority delimiter character set. 1296 */ 1297 static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) { 1298 const WCHAR *start = *ptr; 1299 1300 if(!parse_username(ptr, data, flags, 0)) { 1301 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 1302 return; 1303 } 1304 1305 if(**ptr == ':') { 1306 ++(*ptr); 1307 if(!parse_password(ptr, data, flags, 0)) { 1308 *ptr = start; 1309 data->username = NULL; 1310 data->username_len = 0; 1311 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 1312 return; 1313 } 1314 } 1315 1316 if(**ptr != '@') { 1317 *ptr = start; 1318 data->username = NULL; 1319 data->username_len = 0; 1320 data->password = NULL; 1321 data->password_len = 0; 1322 1323 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 1324 return; 1325 } 1326 1327 if(data->username) 1328 TRACE("(%p %p %x): Found username %s len=%d.\n", ptr, data, flags, 1329 debugstr_wn(data->username, data->username_len), data->username_len); 1330 1331 if(data->password) 1332 TRACE("(%p %p %x): Found password %s len=%d.\n", ptr, data, flags, 1333 debugstr_wn(data->password, data->password_len), data->password_len); 1334 1335 ++(*ptr); 1336 } 1337 1338 /* Attempts to parse a port from the URI. 1339 * 1340 * NOTES: 1341 * Windows seems to have a cap on what the maximum value 1342 * for a port can be. The max value is USHORT_MAX. 1343 * 1344 * port = *DIGIT 1345 */ 1346 static BOOL parse_port(const WCHAR **ptr, parse_data *data, DWORD flags) { 1347 UINT port = 0; 1348 data->port = *ptr; 1349 1350 while(!is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 1351 if(!is_num(**ptr)) { 1352 *ptr = data->port; 1353 data->port = NULL; 1354 return FALSE; 1355 } 1356 1357 port = port*10 + (**ptr-'0'); 1358 1359 if(port > USHRT_MAX) { 1360 *ptr = data->port; 1361 data->port = NULL; 1362 return FALSE; 1363 } 1364 1365 ++(*ptr); 1366 } 1367 1368 data->has_port = TRUE; 1369 data->port_value = port; 1370 data->port_len = *ptr - data->port; 1371 1372 TRACE("(%p %p %x): Found port %s len=%d value=%u\n", ptr, data, flags, 1373 debugstr_wn(data->port, data->port_len), data->port_len, data->port_value); 1374 return TRUE; 1375 } 1376 1377 /* Attempts to parse a IPv4 address from the URI. 1378 * 1379 * NOTES: 1380 * Windows normalizes IPv4 addresses, This means there are three 1381 * possibilities for the URI to contain an IPv4 address. 1382 * 1) A well formed address (ex. 192.2.2.2). 1383 * 2) A partially formed address. For example "192.0" would 1384 * normalize to "192.0.0.0" during canonicalization. 1385 * 3) An implicit IPv4 address. For example "256" would 1386 * normalize to "0.0.1.0" during canonicalization. Also 1387 * note that the maximum value for an implicit IP address 1388 * is UINT_MAX, if the value in the URI exceeds this then 1389 * it is not considered an IPv4 address. 1390 */ 1391 static BOOL parse_ipv4address(const WCHAR **ptr, parse_data *data, DWORD flags) { 1392 const BOOL is_unknown = data->scheme_type == URL_SCHEME_UNKNOWN; 1393 data->host = *ptr; 1394 1395 if(!check_ipv4address(ptr, FALSE)) { 1396 if(!check_implicit_ipv4(ptr, &data->implicit_ipv4)) { 1397 TRACE("(%p %p %x): URI didn't contain anything looking like an IPv4 address.\n", 1398 ptr, data, flags); 1399 *ptr = data->host; 1400 data->host = NULL; 1401 return FALSE; 1402 } else 1403 data->has_implicit_ip = TRUE; 1404 } 1405 1406 data->host_len = *ptr - data->host; 1407 data->host_type = Uri_HOST_IPV4; 1408 1409 /* Check if what we found is the only part of the host name (if it isn't 1410 * we don't have an IPv4 address). 1411 */ 1412 if(**ptr == ':') { 1413 ++(*ptr); 1414 if(!parse_port(ptr, data, flags)) { 1415 *ptr = data->host; 1416 data->host = NULL; 1417 return FALSE; 1418 } 1419 } else if(!is_auth_delim(**ptr, !is_unknown)) { 1420 /* Found more data which belongs to the host, so this isn't an IPv4. */ 1421 *ptr = data->host; 1422 data->host = NULL; 1423 data->has_implicit_ip = FALSE; 1424 return FALSE; 1425 } 1426 1427 TRACE("(%p %p %x): IPv4 address found. host=%s host_len=%d host_type=%d\n", 1428 ptr, data, flags, debugstr_wn(data->host, data->host_len), 1429 data->host_len, data->host_type); 1430 return TRUE; 1431 } 1432 1433 /* Attempts to parse the reg-name from the URI. 1434 * 1435 * Because of the way Windows handles ':' this function also 1436 * handles parsing the port. 1437 * 1438 * reg-name = *( unreserved / pct-encoded / sub-delims ) 1439 * 1440 * NOTE: 1441 * Windows allows everything, but, the characters in "auth_delims" and ':' 1442 * to appear in a reg-name, unless it's an unknown scheme type then ':' is 1443 * allowed to appear (even if a valid port isn't after it). 1444 * 1445 * Windows doesn't like host names which start with '[' and end with ']' 1446 * and don't contain a valid IP literal address in between them. 1447 * 1448 * On Windows if a '[' is encountered in the host name the ':' no longer 1449 * counts as a delimiter until you reach the next ']' or an "authority delimiter". 1450 * 1451 * A reg-name CAN be empty. 1452 */ 1453 static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1454 const BOOL has_start_bracket = **ptr == '['; 1455 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 1456 const BOOL is_res = data->scheme_type == URL_SCHEME_RES; 1457 BOOL inside_brackets = has_start_bracket; 1458 1459 /* res URIs don't have ports. */ 1460 BOOL ignore_col = (extras & IGNORE_PORT_DELIMITER) || is_res; 1461 1462 /* We have to be careful with file schemes. */ 1463 if(data->scheme_type == URL_SCHEME_FILE) { 1464 /* This is because an implicit file scheme could be "C:\\test" and it 1465 * would trick this function into thinking the host is "C", when after 1466 * canonicalization the host would end up being an empty string. A drive 1467 * path can also have a '|' instead of a ':' after the drive letter. 1468 */ 1469 if(is_drive_path(*ptr)) { 1470 /* Regular old drive paths have no host type (or host name). */ 1471 data->host_type = Uri_HOST_UNKNOWN; 1472 data->host = *ptr; 1473 data->host_len = 0; 1474 return TRUE; 1475 } else if(is_unc_path(*ptr)) 1476 /* Skip past the "\\" of a UNC path. */ 1477 *ptr += 2; 1478 } 1479 1480 data->host = *ptr; 1481 1482 /* For res URIs, everything before the first '/' is 1483 * considered the host. 1484 */ 1485 while((!is_res && !is_auth_delim(**ptr, known_scheme)) || 1486 (is_res && **ptr && **ptr != '/')) { 1487 if(**ptr == ':' && !ignore_col) { 1488 /* We can ignore ':' if we are inside brackets.*/ 1489 if(!inside_brackets) { 1490 const WCHAR *tmp = (*ptr)++; 1491 1492 /* Attempt to parse the port. */ 1493 if(!parse_port(ptr, data, flags)) { 1494 /* Windows expects there to be a valid port for known scheme types. */ 1495 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 1496 *ptr = data->host; 1497 data->host = NULL; 1498 TRACE("(%p %p %x %x): Expected valid port\n", ptr, data, flags, extras); 1499 return FALSE; 1500 } else 1501 /* Windows gives up on trying to parse a port when it 1502 * encounters an invalid port. 1503 */ 1504 ignore_col = TRUE; 1505 } else { 1506 data->host_len = tmp - data->host; 1507 break; 1508 } 1509 } 1510 } else if(**ptr == '%' && (known_scheme && !is_res)) { 1511 /* Has to be a legit % encoded value. */ 1512 if(!check_pct_encoded(ptr)) { 1513 *ptr = data->host; 1514 data->host = NULL; 1515 return FALSE; 1516 } else 1517 continue; 1518 } else if(is_res && is_forbidden_dos_path_char(**ptr)) { 1519 *ptr = data->host; 1520 data->host = NULL; 1521 return FALSE; 1522 } else if(**ptr == ']') 1523 inside_brackets = FALSE; 1524 else if(**ptr == '[') 1525 inside_brackets = TRUE; 1526 1527 ++(*ptr); 1528 } 1529 1530 if(has_start_bracket) { 1531 /* Make sure the last character of the host wasn't a ']'. */ 1532 if(*(*ptr-1) == ']') { 1533 TRACE("(%p %p %x %x): Expected an IP literal inside of the host\n", 1534 ptr, data, flags, extras); 1535 *ptr = data->host; 1536 data->host = NULL; 1537 return FALSE; 1538 } 1539 } 1540 1541 /* Don't overwrite our length if we found a port earlier. */ 1542 if(!data->port) 1543 data->host_len = *ptr - data->host; 1544 1545 /* If the host is empty, then it's an unknown host type. */ 1546 if(data->host_len == 0 || is_res) 1547 data->host_type = Uri_HOST_UNKNOWN; 1548 else 1549 data->host_type = Uri_HOST_DNS; 1550 1551 TRACE("(%p %p %x %x): Parsed reg-name. host=%s len=%d\n", ptr, data, flags, extras, 1552 debugstr_wn(data->host, data->host_len), data->host_len); 1553 return TRUE; 1554 } 1555 1556 /* Attempts to parse an IPv6 address out of the URI. 1557 * 1558 * IPv6address = 6( h16 ":" ) ls32 1559 * / "::" 5( h16 ":" ) ls32 1560 * / [ h16 ] "::" 4( h16 ":" ) ls32 1561 * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 1562 * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 1563 * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 1564 * / [ *4( h16 ":" ) h16 ] "::" ls32 1565 * / [ *5( h16 ":" ) h16 ] "::" h16 1566 * / [ *6( h16 ":" ) h16 ] "::" 1567 * 1568 * ls32 = ( h16 ":" h16 ) / IPv4address 1569 * ; least-significant 32 bits of address. 1570 * 1571 * h16 = 1*4HEXDIG 1572 * ; 16 bits of address represented in hexadecimal. 1573 * 1574 * Modeled after google-url's 'DoParseIPv6' function. 1575 */ 1576 static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data, DWORD flags) { 1577 const WCHAR *start, *cur_start; 1578 ipv6_address ip; 1579 1580 start = cur_start = *ptr; 1581 memset(&ip, 0, sizeof(ipv6_address)); 1582 1583 for(;; ++(*ptr)) { 1584 /* Check if we're on the last character of the host. */ 1585 BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN) 1586 || **ptr == ']'); 1587 1588 BOOL is_split = (**ptr == ':'); 1589 BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':'); 1590 1591 /* Check if we're at the end of a component, or 1592 * if we're at the end of the IPv6 address. 1593 */ 1594 if(is_split || is_end) { 1595 DWORD cur_len = 0; 1596 1597 cur_len = *ptr - cur_start; 1598 1599 /* h16 can't have a length > 4. */ 1600 if(cur_len > 4) { 1601 *ptr = start; 1602 1603 TRACE("(%p %p %x): h16 component to long.\n", 1604 ptr, data, flags); 1605 return FALSE; 1606 } 1607 1608 if(cur_len == 0) { 1609 /* An h16 component can't have the length of 0 unless 1610 * the elision is at the beginning of the address, or 1611 * at the end of the address. 1612 */ 1613 if(!((*ptr == start && is_elision) || 1614 (is_end && (*ptr-2) == ip.elision))) { 1615 *ptr = start; 1616 TRACE("(%p %p %x): IPv6 component cannot have a length of 0.\n", 1617 ptr, data, flags); 1618 return FALSE; 1619 } 1620 } 1621 1622 if(cur_len > 0) { 1623 /* An IPv6 address can have no more than 8 h16 components. */ 1624 if(ip.h16_count >= 8) { 1625 *ptr = start; 1626 TRACE("(%p %p %x): Not a IPv6 address, too many h16 components.\n", 1627 ptr, data, flags); 1628 return FALSE; 1629 } 1630 1631 ip.components[ip.h16_count].str = cur_start; 1632 ip.components[ip.h16_count].len = cur_len; 1633 1634 TRACE("(%p %p %x): Found h16 component %s, len=%d, h16_count=%d\n", 1635 ptr, data, flags, debugstr_wn(cur_start, cur_len), cur_len, 1636 ip.h16_count); 1637 ++ip.h16_count; 1638 } 1639 } 1640 1641 if(is_end) 1642 break; 1643 1644 if(is_elision) { 1645 /* A IPv6 address can only have 1 elision ('::'). */ 1646 if(ip.elision) { 1647 *ptr = start; 1648 1649 TRACE("(%p %p %x): IPv6 address cannot have 2 elisions.\n", 1650 ptr, data, flags); 1651 return FALSE; 1652 } 1653 1654 ip.elision = *ptr; 1655 ++(*ptr); 1656 } 1657 1658 if(is_split) 1659 cur_start = *ptr+1; 1660 else { 1661 if(!check_ipv4address(ptr, TRUE)) { 1662 if(!is_hexdigit(**ptr)) { 1663 /* Not a valid character for an IPv6 address. */ 1664 *ptr = start; 1665 return FALSE; 1666 } 1667 } else { 1668 /* Found an IPv4 address. */ 1669 ip.ipv4 = cur_start; 1670 ip.ipv4_len = *ptr - cur_start; 1671 1672 TRACE("(%p %p %x): Found an attached IPv4 address %s len=%d.\n", 1673 ptr, data, flags, debugstr_wn(ip.ipv4, ip.ipv4_len), 1674 ip.ipv4_len); 1675 1676 /* IPv4 addresses can only appear at the end of a IPv6. */ 1677 break; 1678 } 1679 } 1680 } 1681 1682 compute_ipv6_comps_size(&ip); 1683 1684 /* Make sure the IPv6 address adds up to 16 bytes. */ 1685 if(ip.components_size + ip.elision_size != 16) { 1686 *ptr = start; 1687 TRACE("(%p %p %x): Invalid IPv6 address, did not add up to 16 bytes.\n", 1688 ptr, data, flags); 1689 return FALSE; 1690 } 1691 1692 if(ip.elision_size == 2) { 1693 /* For some reason on Windows if an elision that represents 1694 * only one h16 component is encountered at the very begin or 1695 * end of an IPv6 address, Windows does not consider it a 1696 * valid IPv6 address. 1697 * 1698 * Ex: [::2:3:4:5:6:7] is not valid, even though the sum 1699 * of all the components == 128bits. 1700 */ 1701 if(ip.elision < ip.components[0].str || 1702 ip.elision > ip.components[ip.h16_count-1].str) { 1703 *ptr = start; 1704 TRACE("(%p %p %x): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n", 1705 ptr, data, flags); 1706 return FALSE; 1707 } 1708 } 1709 1710 data->host_type = Uri_HOST_IPV6; 1711 data->has_ipv6 = TRUE; 1712 data->ipv6_address = ip; 1713 1714 TRACE("(%p %p %x): Found valid IPv6 literal %s len=%d\n", 1715 ptr, data, flags, debugstr_wn(start, *ptr-start), 1716 (int)(*ptr-start)); 1717 return TRUE; 1718 } 1719 1720 /* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) */ 1721 static BOOL parse_ipvfuture(const WCHAR **ptr, parse_data *data, DWORD flags) { 1722 const WCHAR *start = *ptr; 1723 1724 /* IPvFuture has to start with a 'v' or 'V'. */ 1725 if(**ptr != 'v' && **ptr != 'V') 1726 return FALSE; 1727 1728 /* Following the v there must be at least 1 hex digit. */ 1729 ++(*ptr); 1730 if(!is_hexdigit(**ptr)) { 1731 *ptr = start; 1732 return FALSE; 1733 } 1734 1735 ++(*ptr); 1736 while(is_hexdigit(**ptr)) 1737 ++(*ptr); 1738 1739 /* End of the hexdigit sequence must be a '.' */ 1740 if(**ptr != '.') { 1741 *ptr = start; 1742 return FALSE; 1743 } 1744 1745 ++(*ptr); 1746 if(!is_unreserved(**ptr) && !is_subdelim(**ptr) && **ptr != ':') { 1747 *ptr = start; 1748 return FALSE; 1749 } 1750 1751 ++(*ptr); 1752 while(is_unreserved(**ptr) || is_subdelim(**ptr) || **ptr == ':') 1753 ++(*ptr); 1754 1755 data->host_type = Uri_HOST_UNKNOWN; 1756 1757 TRACE("(%p %p %x): Parsed IPvFuture address %s len=%d\n", ptr, data, flags, 1758 debugstr_wn(start, *ptr-start), (int)(*ptr-start)); 1759 1760 return TRUE; 1761 } 1762 1763 /* IP-literal = "[" ( IPv6address / IPvFuture ) "]" */ 1764 static BOOL parse_ip_literal(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1765 data->host = *ptr; 1766 1767 if(**ptr != '[' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) { 1768 data->host = NULL; 1769 return FALSE; 1770 } else if(**ptr == '[') 1771 ++(*ptr); 1772 1773 if(!parse_ipv6address(ptr, data, flags)) { 1774 if(extras & SKIP_IP_FUTURE_CHECK || !parse_ipvfuture(ptr, data, flags)) { 1775 *ptr = data->host; 1776 data->host = NULL; 1777 return FALSE; 1778 } 1779 } 1780 1781 if(**ptr != ']' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) { 1782 *ptr = data->host; 1783 data->host = NULL; 1784 return FALSE; 1785 } else if(!**ptr && extras & ALLOW_BRACKETLESS_IP_LITERAL) { 1786 /* The IP literal didn't contain brackets and was followed by 1787 * a NULL terminator, so no reason to even check the port. 1788 */ 1789 data->host_len = *ptr - data->host; 1790 return TRUE; 1791 } 1792 1793 ++(*ptr); 1794 if(**ptr == ':') { 1795 ++(*ptr); 1796 /* If a valid port is not found, then let it trickle down to 1797 * parse_reg_name. 1798 */ 1799 if(!parse_port(ptr, data, flags)) { 1800 *ptr = data->host; 1801 data->host = NULL; 1802 return FALSE; 1803 } 1804 } else 1805 data->host_len = *ptr - data->host; 1806 1807 return TRUE; 1808 } 1809 1810 /* Parses the host information from the URI. 1811 * 1812 * host = IP-literal / IPv4address / reg-name 1813 */ 1814 static BOOL parse_host(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1815 if(!parse_ip_literal(ptr, data, flags, extras)) { 1816 if(!parse_ipv4address(ptr, data, flags)) { 1817 if(!parse_reg_name(ptr, data, flags, extras)) { 1818 TRACE("(%p %p %x %x): Malformed URI, Unknown host type.\n", 1819 ptr, data, flags, extras); 1820 return FALSE; 1821 } 1822 } 1823 } 1824 1825 return TRUE; 1826 } 1827 1828 /* Parses the authority information from the URI. 1829 * 1830 * authority = [ userinfo "@" ] host [ ":" port ] 1831 */ 1832 static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) { 1833 parse_userinfo(ptr, data, flags); 1834 1835 /* Parsing the port will happen during one of the host parsing 1836 * routines (if the URI has a port). 1837 */ 1838 if(!parse_host(ptr, data, flags, 0)) 1839 return FALSE; 1840 1841 return TRUE; 1842 } 1843 1844 /* Attempts to parse the path information of a hierarchical URI. */ 1845 static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) { 1846 const WCHAR *start = *ptr; 1847 static const WCHAR slash[] = {'/',0}; 1848 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 1849 1850 if(is_path_delim(data->scheme_type, **ptr)) { 1851 if(data->scheme_type == URL_SCHEME_WILDCARD && !data->must_have_path) { 1852 data->path = NULL; 1853 data->path_len = 0; 1854 } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) { 1855 /* If the path component is empty, then a '/' is added. */ 1856 data->path = slash; 1857 data->path_len = 1; 1858 } 1859 } else { 1860 while(!is_path_delim(data->scheme_type, **ptr)) { 1861 if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN && !is_file) { 1862 if(!check_pct_encoded(ptr)) { 1863 *ptr = start; 1864 return FALSE; 1865 } else 1866 continue; 1867 } else if(is_forbidden_dos_path_char(**ptr) && is_file && 1868 (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 1869 /* File schemes with USE_DOS_PATH set aren't allowed to have 1870 * a '<' or '>' or '\"' appear in them. 1871 */ 1872 *ptr = start; 1873 return FALSE; 1874 } else if(**ptr == '\\') { 1875 /* Not allowed to have a backslash if NO_CANONICALIZE is set 1876 * and the scheme is known type (but not a file scheme). 1877 */ 1878 if(flags & Uri_CREATE_NO_CANONICALIZE) { 1879 if(data->scheme_type != URL_SCHEME_FILE && 1880 data->scheme_type != URL_SCHEME_UNKNOWN) { 1881 *ptr = start; 1882 return FALSE; 1883 } 1884 } 1885 } 1886 1887 ++(*ptr); 1888 } 1889 1890 /* The only time a URI doesn't have a path is when 1891 * the NO_CANONICALIZE flag is set and the raw URI 1892 * didn't contain one. 1893 */ 1894 if(*ptr == start) { 1895 data->path = NULL; 1896 data->path_len = 0; 1897 } else { 1898 data->path = start; 1899 data->path_len = *ptr - start; 1900 } 1901 } 1902 1903 if(data->path) 1904 TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags, 1905 debugstr_wn(data->path, data->path_len), data->path_len); 1906 else 1907 TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags); 1908 1909 return TRUE; 1910 } 1911 1912 /* Parses the path of an opaque URI (much less strict than the parser 1913 * for a hierarchical URI). 1914 * 1915 * NOTE: 1916 * Windows allows invalid % encoded data to appear in opaque URI paths 1917 * for unknown scheme types. 1918 * 1919 * File schemes with USE_DOS_PATH set aren't allowed to have '<', '>', or '\"' 1920 * appear in them. 1921 */ 1922 static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags) { 1923 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 1924 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 1925 const BOOL is_mailto = data->scheme_type == URL_SCHEME_MAILTO; 1926 1927 if (is_mailto && (*ptr)[0] == '/' && (*ptr)[1] == '/') 1928 { 1929 if ((*ptr)[2]) data->path = *ptr + 2; 1930 else data->path = NULL; 1931 } 1932 else 1933 data->path = *ptr; 1934 1935 while(!is_path_delim(data->scheme_type, **ptr)) { 1936 if(**ptr == '%' && known_scheme) { 1937 if(!check_pct_encoded(ptr)) { 1938 *ptr = data->path; 1939 data->path = NULL; 1940 return FALSE; 1941 } else 1942 continue; 1943 } else if(is_forbidden_dos_path_char(**ptr) && is_file && 1944 (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 1945 *ptr = data->path; 1946 data->path = NULL; 1947 return FALSE; 1948 } 1949 1950 ++(*ptr); 1951 } 1952 1953 if (data->path) data->path_len = *ptr - data->path; 1954 TRACE("(%p %p %x): Parsed opaque URI path %s len=%d\n", ptr, data, flags, 1955 debugstr_wn(data->path, data->path_len), data->path_len); 1956 return TRUE; 1957 } 1958 1959 /* Determines how the URI should be parsed after the scheme information. 1960 * 1961 * If the scheme is followed by "//", then it is treated as a hierarchical URI 1962 * which then the authority and path information will be parsed out. Otherwise, the 1963 * URI will be treated as an opaque URI which the authority information is not parsed 1964 * out. 1965 * 1966 * RFC 3896 definition of hier-part: 1967 * 1968 * hier-part = "//" authority path-abempty 1969 * / path-absolute 1970 * / path-rootless 1971 * / path-empty 1972 * 1973 * MSDN opaque URI definition: 1974 * scheme ":" path [ "#" fragment ] 1975 * 1976 * NOTES: 1977 * If the URI is of an unknown scheme type and has a "//" following the scheme then it 1978 * is treated as a hierarchical URI, but, if the CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is 1979 * set then it is considered an opaque URI regardless of what follows the scheme information 1980 * (per MSDN documentation). 1981 */ 1982 static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) { 1983 const WCHAR *start = *ptr; 1984 1985 data->must_have_path = FALSE; 1986 1987 /* For javascript: URIs, simply set everything as a path */ 1988 if(data->scheme_type == URL_SCHEME_JAVASCRIPT) { 1989 data->path = *ptr; 1990 data->path_len = strlenW(*ptr); 1991 data->is_opaque = TRUE; 1992 *ptr += data->path_len; 1993 return TRUE; 1994 } 1995 1996 /* Checks if the authority information needs to be parsed. */ 1997 if(is_hierarchical_uri(ptr, data)) { 1998 /* Only treat it as a hierarchical URI if the scheme_type is known or 1999 * the Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is not set. 2000 */ 2001 if(data->scheme_type != URL_SCHEME_UNKNOWN || 2002 !(flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) { 2003 TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags); 2004 data->is_opaque = FALSE; 2005 2006 if(data->scheme_type == URL_SCHEME_WILDCARD && !data->has_implicit_scheme) { 2007 if(**ptr == '/' && *(*ptr+1) == '/') { 2008 data->must_have_path = TRUE; 2009 *ptr += 2; 2010 } 2011 } 2012 2013 /* TODO: Handle hierarchical URI's, parse authority then parse the path. */ 2014 if(!parse_authority(ptr, data, flags)) 2015 return FALSE; 2016 2017 return parse_path_hierarchical(ptr, data, flags); 2018 } else 2019 /* Reset ptr to its starting position so opaque path parsing 2020 * begins at the correct location. 2021 */ 2022 *ptr = start; 2023 } 2024 2025 /* If it reaches here, then the URI will be treated as an opaque 2026 * URI. 2027 */ 2028 2029 TRACE("(%p %p %x): Treating URI as an opaque URI.\n", ptr, data, flags); 2030 2031 data->is_opaque = TRUE; 2032 if(!parse_path_opaque(ptr, data, flags)) 2033 return FALSE; 2034 2035 return TRUE; 2036 } 2037 2038 /* Attempts to parse the query string from the URI. 2039 * 2040 * NOTES: 2041 * If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded 2042 * data is allowed to appear in the query string. For unknown scheme types 2043 * invalid percent encoded data is allowed to appear regardless. 2044 */ 2045 static BOOL parse_query(const WCHAR **ptr, parse_data *data, DWORD flags) { 2046 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2047 2048 if(**ptr != '?') { 2049 TRACE("(%p %p %x): URI didn't contain a query string.\n", ptr, data, flags); 2050 return TRUE; 2051 } 2052 2053 data->query = *ptr; 2054 2055 ++(*ptr); 2056 while(**ptr && **ptr != '#') { 2057 if(**ptr == '%' && known_scheme && 2058 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 2059 if(!check_pct_encoded(ptr)) { 2060 *ptr = data->query; 2061 data->query = NULL; 2062 return FALSE; 2063 } else 2064 continue; 2065 } 2066 2067 ++(*ptr); 2068 } 2069 2070 data->query_len = *ptr - data->query; 2071 2072 TRACE("(%p %p %x): Parsed query string %s len=%d\n", ptr, data, flags, 2073 debugstr_wn(data->query, data->query_len), data->query_len); 2074 return TRUE; 2075 } 2076 2077 /* Attempts to parse the fragment from the URI. 2078 * 2079 * NOTES: 2080 * If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded 2081 * data is allowed to appear in the query string. For unknown scheme types 2082 * invalid percent encoded data is allowed to appear regardless. 2083 */ 2084 static BOOL parse_fragment(const WCHAR **ptr, parse_data *data, DWORD flags) { 2085 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2086 2087 if(**ptr != '#') { 2088 TRACE("(%p %p %x): URI didn't contain a fragment.\n", ptr, data, flags); 2089 return TRUE; 2090 } 2091 2092 data->fragment = *ptr; 2093 2094 ++(*ptr); 2095 while(**ptr) { 2096 if(**ptr == '%' && known_scheme && 2097 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 2098 if(!check_pct_encoded(ptr)) { 2099 *ptr = data->fragment; 2100 data->fragment = NULL; 2101 return FALSE; 2102 } else 2103 continue; 2104 } 2105 2106 ++(*ptr); 2107 } 2108 2109 data->fragment_len = *ptr - data->fragment; 2110 2111 TRACE("(%p %p %x): Parsed fragment %s len=%d\n", ptr, data, flags, 2112 debugstr_wn(data->fragment, data->fragment_len), data->fragment_len); 2113 return TRUE; 2114 } 2115 2116 /* Parses and validates the components of the specified by data->uri 2117 * and stores the information it parses into 'data'. 2118 * 2119 * Returns TRUE if it successfully parsed the URI. False otherwise. 2120 */ 2121 static BOOL parse_uri(parse_data *data, DWORD flags) { 2122 const WCHAR *ptr; 2123 const WCHAR **pptr; 2124 2125 ptr = data->uri; 2126 pptr = &ptr; 2127 2128 TRACE("(%p %x): BEGINNING TO PARSE URI %s.\n", data, flags, debugstr_w(data->uri)); 2129 2130 if(!parse_scheme(pptr, data, flags, 0)) 2131 return FALSE; 2132 2133 if(!parse_hierpart(pptr, data, flags)) 2134 return FALSE; 2135 2136 if(!parse_query(pptr, data, flags)) 2137 return FALSE; 2138 2139 if(!parse_fragment(pptr, data, flags)) 2140 return FALSE; 2141 2142 TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags); 2143 return TRUE; 2144 } 2145 2146 static BOOL canonicalize_username(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2147 const WCHAR *ptr; 2148 2149 if(!data->username) { 2150 uri->userinfo_start = -1; 2151 return TRUE; 2152 } 2153 2154 uri->userinfo_start = uri->canon_len; 2155 for(ptr = data->username; ptr < data->username+data->username_len; ++ptr) { 2156 if(*ptr == '%') { 2157 /* Only decode % encoded values for known scheme types. */ 2158 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 2159 /* See if the value really needs decoding. */ 2160 WCHAR val = decode_pct_val(ptr); 2161 if(is_unreserved(val)) { 2162 if(!computeOnly) 2163 uri->canon_uri[uri->canon_len] = val; 2164 2165 ++uri->canon_len; 2166 2167 /* Move pass the hex characters. */ 2168 ptr += 2; 2169 continue; 2170 } 2171 } 2172 } else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') { 2173 /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag 2174 * is NOT set. 2175 */ 2176 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 2177 if(!computeOnly) 2178 pct_encode_val(*ptr, uri->canon_uri + uri->canon_len); 2179 2180 uri->canon_len += 3; 2181 continue; 2182 } 2183 } 2184 2185 if(!computeOnly) 2186 /* Nothing special, so just copy the character over. */ 2187 uri->canon_uri[uri->canon_len] = *ptr; 2188 ++uri->canon_len; 2189 } 2190 2191 return TRUE; 2192 } 2193 2194 static BOOL canonicalize_password(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2195 const WCHAR *ptr; 2196 2197 if(!data->password) { 2198 uri->userinfo_split = -1; 2199 return TRUE; 2200 } 2201 2202 if(uri->userinfo_start == -1) 2203 /* Has a password, but, doesn't have a username. */ 2204 uri->userinfo_start = uri->canon_len; 2205 2206 uri->userinfo_split = uri->canon_len - uri->userinfo_start; 2207 2208 /* Add the ':' to the userinfo component. */ 2209 if(!computeOnly) 2210 uri->canon_uri[uri->canon_len] = ':'; 2211 ++uri->canon_len; 2212 2213 for(ptr = data->password; ptr < data->password+data->password_len; ++ptr) { 2214 if(*ptr == '%') { 2215 /* Only decode % encoded values for known scheme types. */ 2216 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 2217 /* See if the value really needs decoding. */ 2218 WCHAR val = decode_pct_val(ptr); 2219 if(is_unreserved(val)) { 2220 if(!computeOnly) 2221 uri->canon_uri[uri->canon_len] = val; 2222 2223 ++uri->canon_len; 2224 2225 /* Move pass the hex characters. */ 2226 ptr += 2; 2227 continue; 2228 } 2229 } 2230 } else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') { 2231 /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag 2232 * is NOT set. 2233 */ 2234 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 2235 if(!computeOnly) 2236 pct_encode_val(*ptr, uri->canon_uri + uri->canon_len); 2237 2238 uri->canon_len += 3; 2239 continue; 2240 } 2241 } 2242 2243 if(!computeOnly) 2244 /* Nothing special, so just copy the character over. */ 2245 uri->canon_uri[uri->canon_len] = *ptr; 2246 ++uri->canon_len; 2247 } 2248 2249 return TRUE; 2250 } 2251 2252 /* Canonicalizes the userinfo of the URI represented by the parse_data. 2253 * 2254 * Canonicalization of the userinfo is a simple process. If there are any percent 2255 * encoded characters that fall in the "unreserved" character set, they are decoded 2256 * to their actual value. If a character is not in the "unreserved" or "reserved" sets 2257 * then it is percent encoded. Other than that the characters are copied over without 2258 * change. 2259 */ 2260 static BOOL canonicalize_userinfo(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2261 uri->userinfo_start = uri->userinfo_split = -1; 2262 uri->userinfo_len = 0; 2263 2264 if(!data->username && !data->password) 2265 /* URI doesn't have userinfo, so nothing to do here. */ 2266 return TRUE; 2267 2268 if(!canonicalize_username(data, uri, flags, computeOnly)) 2269 return FALSE; 2270 2271 if(!canonicalize_password(data, uri, flags, computeOnly)) 2272 return FALSE; 2273 2274 uri->userinfo_len = uri->canon_len - uri->userinfo_start; 2275 if(!computeOnly) 2276 TRACE("(%p %p %x %d): Canonicalized userinfo, userinfo_start=%d, userinfo=%s, userinfo_split=%d userinfo_len=%d.\n", 2277 data, uri, flags, computeOnly, uri->userinfo_start, debugstr_wn(uri->canon_uri + uri->userinfo_start, uri->userinfo_len), 2278 uri->userinfo_split, uri->userinfo_len); 2279 2280 /* Now insert the '@' after the userinfo. */ 2281 if(!computeOnly) 2282 uri->canon_uri[uri->canon_len] = '@'; 2283 ++uri->canon_len; 2284 2285 return TRUE; 2286 } 2287 2288 /* Attempts to canonicalize a reg_name. 2289 * 2290 * Things that happen: 2291 * 1) If Uri_CREATE_NO_CANONICALIZE flag is not set, then the reg_name is 2292 * lower cased. Unless it's an unknown scheme type, which case it's 2293 * no lower cased regardless. 2294 * 2295 * 2) Unreserved % encoded characters are decoded for known 2296 * scheme types. 2297 * 2298 * 3) Forbidden characters are % encoded as long as 2299 * Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS flag is not set and 2300 * it isn't an unknown scheme type. 2301 * 2302 * 4) If it's a file scheme and the host is "localhost" it's removed. 2303 * 2304 * 5) If it's a file scheme and Uri_CREATE_FILE_USE_DOS_PATH is set, 2305 * then the UNC path characters are added before the host name. 2306 */ 2307 static BOOL canonicalize_reg_name(const parse_data *data, Uri *uri, 2308 DWORD flags, BOOL computeOnly) { 2309 static const WCHAR localhostW[] = 2310 {'l','o','c','a','l','h','o','s','t',0}; 2311 const WCHAR *ptr; 2312 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2313 2314 if(data->scheme_type == URL_SCHEME_FILE && 2315 data->host_len == lstrlenW(localhostW)) { 2316 if(!StrCmpNIW(data->host, localhostW, data->host_len)) { 2317 uri->host_start = -1; 2318 uri->host_len = 0; 2319 uri->host_type = Uri_HOST_UNKNOWN; 2320 return TRUE; 2321 } 2322 } 2323 2324 if(data->scheme_type == URL_SCHEME_FILE && flags & Uri_CREATE_FILE_USE_DOS_PATH) { 2325 if(!computeOnly) { 2326 uri->canon_uri[uri->canon_len] = '\\'; 2327 uri->canon_uri[uri->canon_len+1] = '\\'; 2328 } 2329 uri->canon_len += 2; 2330 uri->authority_start = uri->canon_len; 2331 } 2332 2333 uri->host_start = uri->canon_len; 2334 2335 for(ptr = data->host; ptr < data->host+data->host_len; ++ptr) { 2336 if(*ptr == '%' && known_scheme) { 2337 WCHAR val = decode_pct_val(ptr); 2338 if(is_unreserved(val)) { 2339 /* If NO_CANONICALIZE is not set, then windows lower cases the 2340 * decoded value. 2341 */ 2342 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && isupperW(val)) { 2343 if(!computeOnly) 2344 uri->canon_uri[uri->canon_len] = tolowerW(val); 2345 } else { 2346 if(!computeOnly) 2347 uri->canon_uri[uri->canon_len] = val; 2348 } 2349 ++uri->canon_len; 2350 2351 /* Skip past the % encoded character. */ 2352 ptr += 2; 2353 continue; 2354 } else { 2355 /* Just copy the % over. */ 2356 if(!computeOnly) 2357 uri->canon_uri[uri->canon_len] = *ptr; 2358 ++uri->canon_len; 2359 } 2360 } else if(*ptr == '\\') { 2361 /* Only unknown scheme types could have made it here with a '\\' in the host name. */ 2362 if(!computeOnly) 2363 uri->canon_uri[uri->canon_len] = *ptr; 2364 ++uri->canon_len; 2365 } else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && is_ascii(*ptr) && 2366 !is_unreserved(*ptr) && !is_reserved(*ptr) && known_scheme) { 2367 if(!computeOnly) { 2368 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 2369 2370 /* The percent encoded value gets lower cased also. */ 2371 if(!(flags & Uri_CREATE_NO_CANONICALIZE)) { 2372 uri->canon_uri[uri->canon_len+1] = tolowerW(uri->canon_uri[uri->canon_len+1]); 2373 uri->canon_uri[uri->canon_len+2] = tolowerW(uri->canon_uri[uri->canon_len+2]); 2374 } 2375 } 2376 2377 uri->canon_len += 3; 2378 } else { 2379 if(!computeOnly) { 2380 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && known_scheme) 2381 uri->canon_uri[uri->canon_len] = tolowerW(*ptr); 2382 else 2383 uri->canon_uri[uri->canon_len] = *ptr; 2384 } 2385 2386 ++uri->canon_len; 2387 } 2388 } 2389 2390 uri->host_len = uri->canon_len - uri->host_start; 2391 2392 if(!computeOnly) 2393 TRACE("(%p %p %x %d): Canonicalize reg_name=%s len=%d\n", data, uri, flags, 2394 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2395 uri->host_len); 2396 2397 if(!computeOnly) 2398 find_domain_name(uri->canon_uri+uri->host_start, uri->host_len, 2399 &(uri->domain_offset)); 2400 2401 return TRUE; 2402 } 2403 2404 /* Attempts to canonicalize an implicit IPv4 address. */ 2405 static BOOL canonicalize_implicit_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2406 uri->host_start = uri->canon_len; 2407 2408 TRACE("%u\n", data->implicit_ipv4); 2409 /* For unknown scheme types Windows doesn't convert 2410 * the value into an IP address, but it still considers 2411 * it an IPv4 address. 2412 */ 2413 if(data->scheme_type == URL_SCHEME_UNKNOWN) { 2414 if(!computeOnly) 2415 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2416 uri->canon_len += data->host_len; 2417 } else { 2418 if(!computeOnly) 2419 uri->canon_len += ui2ipv4(uri->canon_uri+uri->canon_len, data->implicit_ipv4); 2420 else 2421 uri->canon_len += ui2ipv4(NULL, data->implicit_ipv4); 2422 } 2423 2424 uri->host_len = uri->canon_len - uri->host_start; 2425 uri->host_type = Uri_HOST_IPV4; 2426 2427 if(!computeOnly) 2428 TRACE("%p %p %x %d): Canonicalized implicit IP address=%s len=%d\n", 2429 data, uri, flags, computeOnly, 2430 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2431 uri->host_len); 2432 2433 return TRUE; 2434 } 2435 2436 /* Attempts to canonicalize an IPv4 address. 2437 * 2438 * If the parse_data represents a URI that has an implicit IPv4 address 2439 * (ex. http://256/, this function will convert 256 into 0.0.1.0). If 2440 * the implicit IP address exceeds the value of UINT_MAX (maximum value 2441 * for an IPv4 address) it's canonicalized as if it were a reg-name. 2442 * 2443 * If the parse_data contains a partial or full IPv4 address it normalizes it. 2444 * A partial IPv4 address is something like "192.0" and would be normalized to 2445 * "192.0.0.0". With a full (or partial) IPv4 address like "192.002.01.003" would 2446 * be normalized to "192.2.1.3". 2447 * 2448 * NOTES: 2449 * Windows ONLY normalizes IPv4 address for known scheme types (one that isn't 2450 * URL_SCHEME_UNKNOWN). For unknown scheme types, it simply copies the data from 2451 * the original URI into the canonicalized URI, but, it still recognizes URI's 2452 * host type as HOST_IPV4. 2453 */ 2454 static BOOL canonicalize_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2455 if(data->has_implicit_ip) 2456 return canonicalize_implicit_ipv4address(data, uri, flags, computeOnly); 2457 else { 2458 uri->host_start = uri->canon_len; 2459 2460 /* Windows only normalizes for known scheme types. */ 2461 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 2462 /* parse_data contains a partial or full IPv4 address, so normalize it. */ 2463 DWORD i, octetDigitCount = 0, octetCount = 0; 2464 BOOL octetHasDigit = FALSE; 2465 2466 for(i = 0; i < data->host_len; ++i) { 2467 if(data->host[i] == '0' && !octetHasDigit) { 2468 /* Can ignore leading zeros if: 2469 * 1) It isn't the last digit of the octet. 2470 * 2) i+1 != data->host_len 2471 * 3) i+1 != '.' 2472 */ 2473 if(octetDigitCount == 2 || 2474 i+1 == data->host_len || 2475 data->host[i+1] == '.') { 2476 if(!computeOnly) 2477 uri->canon_uri[uri->canon_len] = data->host[i]; 2478 ++uri->canon_len; 2479 TRACE("Adding zero\n"); 2480 } 2481 } else if(data->host[i] == '.') { 2482 if(!computeOnly) 2483 uri->canon_uri[uri->canon_len] = data->host[i]; 2484 ++uri->canon_len; 2485 2486 octetDigitCount = 0; 2487 octetHasDigit = FALSE; 2488 ++octetCount; 2489 } else { 2490 if(!computeOnly) 2491 uri->canon_uri[uri->canon_len] = data->host[i]; 2492 ++uri->canon_len; 2493 2494 ++octetDigitCount; 2495 octetHasDigit = TRUE; 2496 } 2497 } 2498 2499 /* Make sure the canonicalized IP address has 4 dec-octets. 2500 * If doesn't add "0" ones until there is 4; 2501 */ 2502 for( ; octetCount < 3; ++octetCount) { 2503 if(!computeOnly) { 2504 uri->canon_uri[uri->canon_len] = '.'; 2505 uri->canon_uri[uri->canon_len+1] = '0'; 2506 } 2507 2508 uri->canon_len += 2; 2509 } 2510 } else { 2511 /* Windows doesn't normalize addresses in unknown schemes. */ 2512 if(!computeOnly) 2513 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2514 uri->canon_len += data->host_len; 2515 } 2516 2517 uri->host_len = uri->canon_len - uri->host_start; 2518 if(!computeOnly) 2519 TRACE("(%p %p %x %d): Canonicalized IPv4 address, ip=%s len=%d\n", 2520 data, uri, flags, computeOnly, 2521 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2522 uri->host_len); 2523 } 2524 2525 return TRUE; 2526 } 2527 2528 /* Attempts to canonicalize the IPv6 address of the URI. 2529 * 2530 * Multiple things happen during the canonicalization of an IPv6 address: 2531 * 1) Any leading zero's in a h16 component are removed. 2532 * Ex: [0001:0022::] -> [1:22::] 2533 * 2534 * 2) The longest sequence of zero h16 components are compressed 2535 * into a "::" (elision). If there's a tie, the first is chosen. 2536 * 2537 * Ex: [0:0:0:0:1:6:7:8] -> [::1:6:7:8] 2538 * [0:0:0:0:1:2::] -> [::1:2:0:0] 2539 * [0:0:1:2:0:0:7:8] -> [::1:2:0:0:7:8] 2540 * 2541 * 3) If an IPv4 address is attached to the IPv6 address, it's 2542 * also normalized. 2543 * Ex: [::001.002.022.000] -> [::1.2.22.0] 2544 * 2545 * 4) If an elision is present, but, only represents one h16 component 2546 * it's expanded. 2547 * 2548 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7] 2549 * 2550 * 5) If the IPv6 address contains an IPv4 address and there exists 2551 * at least 1 non-zero h16 component the IPv4 address is converted 2552 * into two h16 components, otherwise it's normalized and kept as is. 2553 * 2554 * Ex: [::192.200.003.4] -> [::192.200.3.4] 2555 * [ffff::192.200.003.4] -> [ffff::c0c8:3041] 2556 * 2557 * NOTE: 2558 * For unknown scheme types Windows simply copies the address over without any 2559 * changes. 2560 * 2561 * IPv4 address can be included in an elision if all its components are 0's. 2562 */ 2563 static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri, 2564 DWORD flags, BOOL computeOnly) { 2565 uri->host_start = uri->canon_len; 2566 2567 if(data->scheme_type == URL_SCHEME_UNKNOWN) { 2568 if(!computeOnly) 2569 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2570 uri->canon_len += data->host_len; 2571 } else { 2572 USHORT values[8]; 2573 INT elision_start; 2574 DWORD i, elision_len; 2575 2576 if(!ipv6_to_number(&(data->ipv6_address), values)) { 2577 TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n", 2578 data, uri, flags, computeOnly); 2579 return FALSE; 2580 } 2581 2582 if(!computeOnly) 2583 uri->canon_uri[uri->canon_len] = '['; 2584 ++uri->canon_len; 2585 2586 /* Find where the elision should occur (if any). */ 2587 compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len); 2588 2589 TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags, 2590 computeOnly, elision_start, elision_len); 2591 2592 for(i = 0; i < 8; ++i) { 2593 BOOL in_elision = (elision_start > -1 && i >= elision_start && 2594 i < elision_start+elision_len); 2595 BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision && 2596 data->ipv6_address.h16_count == 0); 2597 2598 if(i == elision_start) { 2599 if(!computeOnly) { 2600 uri->canon_uri[uri->canon_len] = ':'; 2601 uri->canon_uri[uri->canon_len+1] = ':'; 2602 } 2603 uri->canon_len += 2; 2604 } 2605 2606 /* We can ignore the current component if we're in the elision. */ 2607 if(in_elision) 2608 continue; 2609 2610 /* We only add a ':' if we're not at i == 0, or when we're at 2611 * the very end of elision range since the ':' colon was handled 2612 * earlier. Otherwise we would end up with ":::" after elision. 2613 */ 2614 if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) { 2615 if(!computeOnly) 2616 uri->canon_uri[uri->canon_len] = ':'; 2617 ++uri->canon_len; 2618 } 2619 2620 if(do_ipv4) { 2621 UINT val; 2622 DWORD len; 2623 2624 /* Combine the two parts of the IPv4 address values. */ 2625 val = values[i]; 2626 val <<= 16; 2627 val += values[i+1]; 2628 2629 if(!computeOnly) 2630 len = ui2ipv4(uri->canon_uri+uri->canon_len, val); 2631 else 2632 len = ui2ipv4(NULL, val); 2633 2634 uri->canon_len += len; 2635 ++i; 2636 } else { 2637 /* Write a regular h16 component to the URI. */ 2638 2639 /* Short circuit for the trivial case. */ 2640 if(values[i] == 0) { 2641 if(!computeOnly) 2642 uri->canon_uri[uri->canon_len] = '0'; 2643 ++uri->canon_len; 2644 } else { 2645 static const WCHAR formatW[] = {'%','x',0}; 2646 2647 if(!computeOnly) 2648 uri->canon_len += sprintfW(uri->canon_uri+uri->canon_len, 2649 formatW, values[i]); 2650 else { 2651 WCHAR tmp[5]; 2652 uri->canon_len += sprintfW(tmp, formatW, values[i]); 2653 } 2654 } 2655 } 2656 } 2657 2658 /* Add the closing ']'. */ 2659 if(!computeOnly) 2660 uri->canon_uri[uri->canon_len] = ']'; 2661 ++uri->canon_len; 2662 } 2663 2664 uri->host_len = uri->canon_len - uri->host_start; 2665 2666 if(!computeOnly) 2667 TRACE("(%p %p %x %d): Canonicalized IPv6 address %s, len=%d\n", data, uri, flags, 2668 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2669 uri->host_len); 2670 2671 return TRUE; 2672 } 2673 2674 /* Attempts to canonicalize the host of the URI (if any). */ 2675 static BOOL canonicalize_host(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2676 uri->host_start = -1; 2677 uri->host_len = 0; 2678 uri->domain_offset = -1; 2679 2680 if(data->host) { 2681 switch(data->host_type) { 2682 case Uri_HOST_DNS: 2683 uri->host_type = Uri_HOST_DNS; 2684 if(!canonicalize_reg_name(data, uri, flags, computeOnly)) 2685 return FALSE; 2686 2687 break; 2688 case Uri_HOST_IPV4: 2689 uri->host_type = Uri_HOST_IPV4; 2690 if(!canonicalize_ipv4address(data, uri, flags, computeOnly)) 2691 return FALSE; 2692 2693 break; 2694 case Uri_HOST_IPV6: 2695 if(!canonicalize_ipv6address(data, uri, flags, computeOnly)) 2696 return FALSE; 2697 2698 uri->host_type = Uri_HOST_IPV6; 2699 break; 2700 case Uri_HOST_UNKNOWN: 2701 if(data->host_len > 0 || data->scheme_type != URL_SCHEME_FILE) { 2702 uri->host_start = uri->canon_len; 2703 2704 /* Nothing happens to unknown host types. */ 2705 if(!computeOnly) 2706 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2707 uri->canon_len += data->host_len; 2708 uri->host_len = data->host_len; 2709 } 2710 2711 uri->host_type = Uri_HOST_UNKNOWN; 2712 break; 2713 default: 2714 FIXME("(%p %p %x %d): Canonicalization for host type %d not supported.\n", data, 2715 uri, flags, computeOnly, data->host_type); 2716 return FALSE; 2717 } 2718 } 2719 2720 return TRUE; 2721 } 2722 2723 static BOOL canonicalize_port(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2724 BOOL has_default_port = FALSE; 2725 USHORT default_port = 0; 2726 DWORD i; 2727 2728 uri->port_offset = -1; 2729 2730 /* Check if the scheme has a default port. */ 2731 for(i = 0; i < ARRAY_SIZE(default_ports); ++i) { 2732 if(default_ports[i].scheme == data->scheme_type) { 2733 has_default_port = TRUE; 2734 default_port = default_ports[i].port; 2735 break; 2736 } 2737 } 2738 2739 uri->has_port = data->has_port || has_default_port; 2740 2741 /* Possible cases: 2742 * 1) Has a port which is the default port. 2743 * 2) Has a port (not the default). 2744 * 3) Doesn't have a port, but, scheme has a default port. 2745 * 4) No port. 2746 */ 2747 if(has_default_port && data->has_port && data->port_value == default_port) { 2748 /* If it's the default port and this flag isn't set, don't do anything. */ 2749 if(flags & Uri_CREATE_NO_CANONICALIZE) { 2750 uri->port_offset = uri->canon_len-uri->authority_start; 2751 if(!computeOnly) 2752 uri->canon_uri[uri->canon_len] = ':'; 2753 ++uri->canon_len; 2754 2755 if(data->port) { 2756 /* Copy the original port over. */ 2757 if(!computeOnly) 2758 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR)); 2759 uri->canon_len += data->port_len; 2760 } else { 2761 if(!computeOnly) 2762 uri->canon_len += ui2str(uri->canon_uri+uri->canon_len, data->port_value); 2763 else 2764 uri->canon_len += ui2str(NULL, data->port_value); 2765 } 2766 } 2767 2768 uri->port = default_port; 2769 } else if(data->has_port) { 2770 uri->port_offset = uri->canon_len-uri->authority_start; 2771 if(!computeOnly) 2772 uri->canon_uri[uri->canon_len] = ':'; 2773 ++uri->canon_len; 2774 2775 if(flags & Uri_CREATE_NO_CANONICALIZE && data->port) { 2776 /* Copy the original over without changes. */ 2777 if(!computeOnly) 2778 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR)); 2779 uri->canon_len += data->port_len; 2780 } else { 2781 if(!computeOnly) 2782 uri->canon_len += ui2str(uri->canon_uri+uri->canon_len, data->port_value); 2783 else 2784 uri->canon_len += ui2str(NULL, data->port_value); 2785 } 2786 2787 uri->port = data->port_value; 2788 } else if(has_default_port) 2789 uri->port = default_port; 2790 2791 return TRUE; 2792 } 2793 2794 /* Canonicalizes the authority of the URI represented by the parse_data. */ 2795 static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2796 uri->authority_start = uri->canon_len; 2797 uri->authority_len = 0; 2798 2799 if(!canonicalize_userinfo(data, uri, flags, computeOnly)) 2800 return FALSE; 2801 2802 if(!canonicalize_host(data, uri, flags, computeOnly)) 2803 return FALSE; 2804 2805 if(!canonicalize_port(data, uri, flags, computeOnly)) 2806 return FALSE; 2807 2808 if(uri->host_start != -1 || (data->is_relative && (data->password || data->username))) 2809 uri->authority_len = uri->canon_len - uri->authority_start; 2810 else 2811 uri->authority_start = -1; 2812 2813 return TRUE; 2814 } 2815 2816 /* Attempts to canonicalize the path of a hierarchical URI. 2817 * 2818 * Things that happen: 2819 * 1). Forbidden characters are percent encoded, unless the NO_ENCODE_FORBIDDEN 2820 * flag is set or it's a file URI. Forbidden characters are always encoded 2821 * for file schemes regardless and forbidden characters are never encoded 2822 * for unknown scheme types. 2823 * 2824 * 2). For known scheme types '\\' are changed to '/'. 2825 * 2826 * 3). Percent encoded, unreserved characters are decoded to their actual values. 2827 * Unless the scheme type is unknown. For file schemes any percent encoded 2828 * character in the unreserved or reserved set is decoded. 2829 * 2830 * 4). For File schemes if the path is starts with a drive letter and doesn't 2831 * start with a '/' then one is appended. 2832 * Ex: file://c:/test.mp3 -> file:///c:/test.mp3 2833 * 2834 * 5). Dot segments are removed from the path for all scheme types 2835 * unless NO_CANONICALIZE flag is set. Dot segments aren't removed 2836 * for wildcard scheme types. 2837 * 2838 * NOTES: 2839 * file://c:/test%20test -> file:///c:/test%2520test 2840 * file://c:/test%3Etest -> file:///c:/test%253Etest 2841 * if Uri_CREATE_FILE_USE_DOS_PATH is not set: 2842 * file:///c:/test%20test -> file:///c:/test%20test 2843 * file:///c:/test%test -> file:///c:/test%25test 2844 */ 2845 static DWORD canonicalize_path_hierarchical(const WCHAR *path, DWORD path_len, URL_SCHEME scheme_type, BOOL has_host, DWORD flags, 2846 BOOL is_implicit_scheme, WCHAR *ret_path) { 2847 const BOOL known_scheme = scheme_type != URL_SCHEME_UNKNOWN; 2848 const BOOL is_file = scheme_type == URL_SCHEME_FILE; 2849 const BOOL is_res = scheme_type == URL_SCHEME_RES; 2850 const WCHAR *ptr; 2851 BOOL escape_pct = FALSE; 2852 DWORD len = 0; 2853 2854 if(!path) 2855 return 0; 2856 2857 ptr = path; 2858 2859 if(is_file && !has_host) { 2860 /* Check if a '/' needs to be appended for the file scheme. */ 2861 if(path_len > 1 && is_drive_path(ptr) && !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2862 if(ret_path) 2863 ret_path[len] = '/'; 2864 len++; 2865 escape_pct = TRUE; 2866 } else if(*ptr == '/') { 2867 if(!(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2868 /* Copy the extra '/' over. */ 2869 if(ret_path) 2870 ret_path[len] = '/'; 2871 len++; 2872 } 2873 ++ptr; 2874 } 2875 2876 if(is_drive_path(ptr)) { 2877 if(ret_path) { 2878 ret_path[len] = *ptr; 2879 /* If there's a '|' after the drive letter, convert it to a ':'. */ 2880 ret_path[len+1] = ':'; 2881 } 2882 ptr += 2; 2883 len += 2; 2884 } 2885 } 2886 2887 if(!is_file && *path && *path != '/') { 2888 /* Prepend a '/' to the path if it doesn't have one. */ 2889 if(ret_path) 2890 ret_path[len] = '/'; 2891 len++; 2892 } 2893 2894 for(; ptr < path+path_len; ++ptr) { 2895 BOOL do_default_action = TRUE; 2896 2897 if(*ptr == '%' && !is_res) { 2898 const WCHAR *tmp = ptr; 2899 WCHAR val; 2900 2901 /* Check if the % represents a valid encoded char, or if it needs encoding. */ 2902 BOOL force_encode = !check_pct_encoded(&tmp) && is_file && !(flags&Uri_CREATE_FILE_USE_DOS_PATH); 2903 val = decode_pct_val(ptr); 2904 2905 if(force_encode || escape_pct) { 2906 /* Escape the percent sign in the file URI. */ 2907 if(ret_path) 2908 pct_encode_val(*ptr, ret_path+len); 2909 len += 3; 2910 do_default_action = FALSE; 2911 } else if((is_unreserved(val) && known_scheme) || 2912 (is_file && !is_implicit_scheme && (is_unreserved(val) || is_reserved(val) || 2913 (val && flags&Uri_CREATE_FILE_USE_DOS_PATH && !is_forbidden_dos_path_char(val))))) { 2914 if(ret_path) 2915 ret_path[len] = val; 2916 len++; 2917 2918 ptr += 2; 2919 continue; 2920 } 2921 } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2922 /* Convert the '/' back to a '\\'. */ 2923 if(ret_path) 2924 ret_path[len] = '\\'; 2925 len++; 2926 do_default_action = FALSE; 2927 } else if(*ptr == '\\' && known_scheme) { 2928 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) { 2929 /* Convert '\\' into a '/'. */ 2930 if(ret_path) 2931 ret_path[len] = '/'; 2932 len++; 2933 do_default_action = FALSE; 2934 } 2935 } else if(known_scheme && !is_res && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr) && 2936 (!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) { 2937 if(!is_file || !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2938 /* Escape the forbidden character. */ 2939 if(ret_path) 2940 pct_encode_val(*ptr, ret_path+len); 2941 len += 3; 2942 do_default_action = FALSE; 2943 } 2944 } 2945 2946 if(do_default_action) { 2947 if(ret_path) 2948 ret_path[len] = *ptr; 2949 len++; 2950 } 2951 } 2952 2953 /* Removing the dot segments only happens when it's not in 2954 * computeOnly mode and it's not a wildcard scheme. File schemes 2955 * with USE_DOS_PATH set don't get dot segments removed. 2956 */ 2957 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) && 2958 scheme_type != URL_SCHEME_WILDCARD) { 2959 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && ret_path) { 2960 /* Remove the dot segments (if any) and reset everything to the new 2961 * correct length. 2962 */ 2963 len = remove_dot_segments(ret_path, len); 2964 } 2965 } 2966 2967 if(ret_path) 2968 TRACE("Canonicalized path %s len=%d\n", debugstr_wn(ret_path, len), len); 2969 return len; 2970 } 2971 2972 /* Attempts to canonicalize the path for an opaque URI. 2973 * 2974 * For known scheme types: 2975 * 1) forbidden characters are percent encoded if 2976 * NO_ENCODE_FORBIDDEN_CHARACTERS isn't set. 2977 * 2978 * 2) Percent encoded, unreserved characters are decoded 2979 * to their actual values, for known scheme types. 2980 * 2981 * 3) '\\' are changed to '/' for known scheme types 2982 * except for mailto schemes. 2983 * 2984 * 4) For file schemes, if USE_DOS_PATH is set all '/' 2985 * are converted to backslashes. 2986 * 2987 * 5) For file schemes, if USE_DOS_PATH isn't set all '\' 2988 * are converted to forward slashes. 2989 */ 2990 static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2991 const WCHAR *ptr; 2992 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2993 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 2994 const BOOL is_mk = data->scheme_type == URL_SCHEME_MK; 2995 2996 if(!data->path) { 2997 uri->path_start = -1; 2998 uri->path_len = 0; 2999 return TRUE; 3000 } 3001 3002 uri->path_start = uri->canon_len; 3003 3004 if(is_mk){ 3005 /* hijack this flag for SCHEME_MK to tell the function when to start 3006 * converting slashes */ 3007 flags |= Uri_CREATE_FILE_USE_DOS_PATH; 3008 } 3009 3010 /* For javascript: URIs, simply copy path part without any canonicalization */ 3011 if(data->scheme_type == URL_SCHEME_JAVASCRIPT) { 3012 if(!computeOnly) 3013 memcpy(uri->canon_uri+uri->canon_len, data->path, data->path_len*sizeof(WCHAR)); 3014 uri->path_len = data->path_len; 3015 uri->canon_len += data->path_len; 3016 return TRUE; 3017 } 3018 3019 /* Windows doesn't allow a "//" to appear after the scheme 3020 * of a URI, if it's an opaque URI. 3021 */ 3022 if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') { 3023 /* So it inserts a "/." before the "//" if it exists. */ 3024 if(!computeOnly) { 3025 uri->canon_uri[uri->canon_len] = '/'; 3026 uri->canon_uri[uri->canon_len+1] = '.'; 3027 } 3028 3029 uri->canon_len += 2; 3030 } 3031 3032 for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) { 3033 BOOL do_default_action = TRUE; 3034 3035 if(*ptr == '%' && known_scheme) { 3036 WCHAR val = decode_pct_val(ptr); 3037 3038 if(is_unreserved(val)) { 3039 if(!computeOnly) 3040 uri->canon_uri[uri->canon_len] = val; 3041 ++uri->canon_len; 3042 3043 ptr += 2; 3044 continue; 3045 } 3046 } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 3047 if(!computeOnly) 3048 uri->canon_uri[uri->canon_len] = '\\'; 3049 ++uri->canon_len; 3050 do_default_action = FALSE; 3051 } else if(*ptr == '\\') { 3052 if((data->is_relative || is_mk || is_file) && !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 3053 /* Convert to a '/'. */ 3054 if(!computeOnly) 3055 uri->canon_uri[uri->canon_len] = '/'; 3056 ++uri->canon_len; 3057 do_default_action = FALSE; 3058 } 3059 } else if(is_mk && *ptr == ':' && ptr + 1 < data->path + data->path_len && *(ptr + 1) == ':') { 3060 flags &= ~Uri_CREATE_FILE_USE_DOS_PATH; 3061 } else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr) && 3062 !(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 3063 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) { 3064 if(!computeOnly) 3065 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 3066 uri->canon_len += 3; 3067 do_default_action = FALSE; 3068 } 3069 } 3070 3071 if(do_default_action) { 3072 if(!computeOnly) 3073 uri->canon_uri[uri->canon_len] = *ptr; 3074 ++uri->canon_len; 3075 } 3076 } 3077 3078 if(is_mk && !computeOnly && !(flags & Uri_CREATE_NO_CANONICALIZE)) { 3079 DWORD new_len = remove_dot_segments(uri->canon_uri + uri->path_start, 3080 uri->canon_len - uri->path_start); 3081 uri->canon_len = uri->path_start + new_len; 3082 } 3083 3084 uri->path_len = uri->canon_len - uri->path_start; 3085 3086 if(!computeOnly) 3087 TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly, 3088 debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len); 3089 return TRUE; 3090 } 3091 3092 /* Determines how the URI represented by the parse_data should be canonicalized. 3093 * 3094 * Essentially, if the parse_data represents an hierarchical URI then it calls 3095 * canonicalize_authority and the canonicalization functions for the path. If the 3096 * URI is opaque it canonicalizes the path of the URI. 3097 */ 3098 static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3099 if(!data->is_opaque || (data->is_relative && (data->password || data->username))) { 3100 /* "//" is only added for non-wildcard scheme types. 3101 * 3102 * A "//" is only added to a relative URI if it has a 3103 * host or port component (this only happens if a IUriBuilder 3104 * is generating an IUri). 3105 */ 3106 if((data->is_relative && (data->host || data->has_port)) || 3107 (!data->is_relative && data->scheme_type != URL_SCHEME_WILDCARD)) { 3108 if(data->scheme_type == URL_SCHEME_WILDCARD) 3109 FIXME("Here\n"); 3110 3111 if(!computeOnly) { 3112 INT pos = uri->canon_len; 3113 3114 uri->canon_uri[pos] = '/'; 3115 uri->canon_uri[pos+1] = '/'; 3116 } 3117 uri->canon_len += 2; 3118 } 3119 3120 if(!canonicalize_authority(data, uri, flags, computeOnly)) 3121 return FALSE; 3122 3123 if(data->is_relative && (data->password || data->username)) { 3124 if(!canonicalize_path_opaque(data, uri, flags, computeOnly)) 3125 return FALSE; 3126 } else { 3127 if(!computeOnly) 3128 uri->path_start = uri->canon_len; 3129 uri->path_len = canonicalize_path_hierarchical(data->path, data->path_len, data->scheme_type, data->host_len != 0, 3130 flags, data->has_implicit_scheme, computeOnly ? NULL : uri->canon_uri+uri->canon_len); 3131 uri->canon_len += uri->path_len; 3132 if(!computeOnly && !uri->path_len) 3133 uri->path_start = -1; 3134 } 3135 } else { 3136 /* Opaque URI's don't have an authority. */ 3137 uri->userinfo_start = uri->userinfo_split = -1; 3138 uri->userinfo_len = 0; 3139 uri->host_start = -1; 3140 uri->host_len = 0; 3141 uri->host_type = Uri_HOST_UNKNOWN; 3142 uri->has_port = FALSE; 3143 uri->authority_start = -1; 3144 uri->authority_len = 0; 3145 uri->domain_offset = -1; 3146 uri->port_offset = -1; 3147 3148 if(is_hierarchical_scheme(data->scheme_type)) { 3149 DWORD i; 3150 3151 /* Absolute URIs aren't displayed for known scheme types 3152 * which should be hierarchical URIs. 3153 */ 3154 uri->display_modifiers |= URI_DISPLAY_NO_ABSOLUTE_URI; 3155 3156 /* Windows also sets the port for these (if they have one). */ 3157 for(i = 0; i < ARRAY_SIZE(default_ports); ++i) { 3158 if(data->scheme_type == default_ports[i].scheme) { 3159 uri->has_port = TRUE; 3160 uri->port = default_ports[i].port; 3161 break; 3162 } 3163 } 3164 } 3165 3166 if(!canonicalize_path_opaque(data, uri, flags, computeOnly)) 3167 return FALSE; 3168 } 3169 3170 if(uri->path_start > -1 && !computeOnly) 3171 /* Finding file extensions happens for both types of URIs. */ 3172 uri->extension_offset = find_file_extension(uri->canon_uri+uri->path_start, uri->path_len); 3173 else 3174 uri->extension_offset = -1; 3175 3176 return TRUE; 3177 } 3178 3179 /* Attempts to canonicalize the query string of the URI. 3180 * 3181 * Things that happen: 3182 * 1) For known scheme types forbidden characters 3183 * are percent encoded, unless the NO_DECODE_EXTRA_INFO flag is set 3184 * or NO_ENCODE_FORBIDDEN_CHARACTERS is set. 3185 * 3186 * 2) For known scheme types, percent encoded, unreserved characters 3187 * are decoded as long as the NO_DECODE_EXTRA_INFO flag isn't set. 3188 */ 3189 static BOOL canonicalize_query(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3190 const WCHAR *ptr, *end; 3191 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 3192 3193 if(!data->query) { 3194 uri->query_start = -1; 3195 uri->query_len = 0; 3196 return TRUE; 3197 } 3198 3199 uri->query_start = uri->canon_len; 3200 3201 end = data->query+data->query_len; 3202 for(ptr = data->query; ptr < end; ++ptr) { 3203 if(*ptr == '%') { 3204 if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3205 WCHAR val = decode_pct_val(ptr); 3206 if(is_unreserved(val)) { 3207 if(!computeOnly) 3208 uri->canon_uri[uri->canon_len] = val; 3209 ++uri->canon_len; 3210 3211 ptr += 2; 3212 continue; 3213 } 3214 } 3215 } else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr)) { 3216 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && 3217 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3218 if(!computeOnly) 3219 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 3220 uri->canon_len += 3; 3221 continue; 3222 } 3223 } 3224 3225 if(!computeOnly) 3226 uri->canon_uri[uri->canon_len] = *ptr; 3227 ++uri->canon_len; 3228 } 3229 3230 uri->query_len = uri->canon_len - uri->query_start; 3231 3232 if(!computeOnly) 3233 TRACE("(%p %p %x %d): Canonicalized query string %s len=%d\n", data, uri, flags, 3234 computeOnly, debugstr_wn(uri->canon_uri+uri->query_start, uri->query_len), 3235 uri->query_len); 3236 return TRUE; 3237 } 3238 3239 static BOOL canonicalize_fragment(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3240 const WCHAR *ptr, *end; 3241 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 3242 3243 if(!data->fragment) { 3244 uri->fragment_start = -1; 3245 uri->fragment_len = 0; 3246 return TRUE; 3247 } 3248 3249 uri->fragment_start = uri->canon_len; 3250 3251 end = data->fragment + data->fragment_len; 3252 for(ptr = data->fragment; ptr < end; ++ptr) { 3253 if(*ptr == '%') { 3254 if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3255 WCHAR val = decode_pct_val(ptr); 3256 if(is_unreserved(val)) { 3257 if(!computeOnly) 3258 uri->canon_uri[uri->canon_len] = val; 3259 ++uri->canon_len; 3260 3261 ptr += 2; 3262 continue; 3263 } 3264 } 3265 } else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr)) { 3266 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && 3267 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3268 if(!computeOnly) 3269 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 3270 uri->canon_len += 3; 3271 continue; 3272 } 3273 } 3274 3275 if(!computeOnly) 3276 uri->canon_uri[uri->canon_len] = *ptr; 3277 ++uri->canon_len; 3278 } 3279 3280 uri->fragment_len = uri->canon_len - uri->fragment_start; 3281 3282 if(!computeOnly) 3283 TRACE("(%p %p %x %d): Canonicalized fragment %s len=%d\n", data, uri, flags, 3284 computeOnly, debugstr_wn(uri->canon_uri+uri->fragment_start, uri->fragment_len), 3285 uri->fragment_len); 3286 return TRUE; 3287 } 3288 3289 /* Canonicalizes the scheme information specified in the parse_data using the specified flags. */ 3290 static BOOL canonicalize_scheme(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3291 uri->scheme_start = -1; 3292 uri->scheme_len = 0; 3293 3294 if(!data->scheme) { 3295 /* The only type of URI that doesn't have to have a scheme is a relative 3296 * URI. 3297 */ 3298 if(!data->is_relative) { 3299 FIXME("(%p %p %x): Unable to determine the scheme type of %s.\n", data, 3300 uri, flags, debugstr_w(data->uri)); 3301 return FALSE; 3302 } 3303 } else { 3304 if(!computeOnly) { 3305 DWORD i; 3306 INT pos = uri->canon_len; 3307 3308 for(i = 0; i < data->scheme_len; ++i) { 3309 /* Scheme name must be lower case after canonicalization. */ 3310 uri->canon_uri[i + pos] = tolowerW(data->scheme[i]); 3311 } 3312 3313 uri->canon_uri[i + pos] = ':'; 3314 uri->scheme_start = pos; 3315 3316 TRACE("(%p %p %x): Canonicalized scheme=%s, len=%d.\n", data, uri, flags, 3317 debugstr_wn(uri->canon_uri+uri->scheme_start, data->scheme_len), data->scheme_len); 3318 } 3319 3320 /* This happens in both computation modes. */ 3321 uri->canon_len += data->scheme_len + 1; 3322 uri->scheme_len = data->scheme_len; 3323 } 3324 return TRUE; 3325 } 3326 3327 /* Computes what the length of the URI specified by the parse_data will be 3328 * after canonicalization occurs using the specified flags. 3329 * 3330 * This function will return a non-zero value indicating the length of the canonicalized 3331 * URI, or -1 on error. 3332 */ 3333 static int compute_canonicalized_length(const parse_data *data, DWORD flags) { 3334 Uri uri; 3335 3336 memset(&uri, 0, sizeof(Uri)); 3337 3338 TRACE("(%p %x): Beginning to compute canonicalized length for URI %s\n", data, flags, 3339 debugstr_w(data->uri)); 3340 3341 if(!canonicalize_scheme(data, &uri, flags, TRUE)) { 3342 ERR("(%p %x): Failed to compute URI scheme length.\n", data, flags); 3343 return -1; 3344 } 3345 3346 if(!canonicalize_hierpart(data, &uri, flags, TRUE)) { 3347 ERR("(%p %x): Failed to compute URI hierpart length.\n", data, flags); 3348 return -1; 3349 } 3350 3351 if(!canonicalize_query(data, &uri, flags, TRUE)) { 3352 ERR("(%p %x): Failed to compute query string length.\n", data, flags); 3353 return -1; 3354 } 3355 3356 if(!canonicalize_fragment(data, &uri, flags, TRUE)) { 3357 ERR("(%p %x): Failed to compute fragment length.\n", data, flags); 3358 return -1; 3359 } 3360 3361 TRACE("(%p %x): Finished computing canonicalized URI length. length=%d\n", data, flags, uri.canon_len); 3362 3363 return uri.canon_len; 3364 } 3365 3366 /* Canonicalizes the URI data specified in the parse_data, using the given flags. If the 3367 * canonicalization succeeds it will store all the canonicalization information 3368 * in the pointer to the Uri. 3369 * 3370 * To canonicalize a URI this function first computes what the length of the URI 3371 * specified by the parse_data will be. Once this is done it will then perform the actual 3372 * canonicalization of the URI. 3373 */ 3374 static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) { 3375 INT len; 3376 3377 uri->canon_uri = NULL; 3378 uri->canon_size = uri->canon_len = 0; 3379 3380 TRACE("(%p %p %x): beginning to canonicalize URI %s.\n", data, uri, flags, debugstr_w(data->uri)); 3381 3382 /* First try to compute the length of the URI. */ 3383 len = compute_canonicalized_length(data, flags); 3384 if(len == -1) { 3385 ERR("(%p %p %x): Could not compute the canonicalized length of %s.\n", data, uri, flags, 3386 debugstr_w(data->uri)); 3387 return E_INVALIDARG; 3388 } 3389 3390 uri->canon_uri = heap_alloc((len+1)*sizeof(WCHAR)); 3391 if(!uri->canon_uri) 3392 return E_OUTOFMEMORY; 3393 3394 uri->canon_size = len; 3395 if(!canonicalize_scheme(data, uri, flags, FALSE)) { 3396 ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags); 3397 return E_INVALIDARG; 3398 } 3399 uri->scheme_type = data->scheme_type; 3400 3401 if(!canonicalize_hierpart(data, uri, flags, FALSE)) { 3402 ERR("(%p %p %x): Unable to canonicalize the heirpart of the URI\n", data, uri, flags); 3403 return E_INVALIDARG; 3404 } 3405 3406 if(!canonicalize_query(data, uri, flags, FALSE)) { 3407 ERR("(%p %p %x): Unable to canonicalize query string of the URI.\n", 3408 data, uri, flags); 3409 return E_INVALIDARG; 3410 } 3411 3412 if(!canonicalize_fragment(data, uri, flags, FALSE)) { 3413 ERR("(%p %p %x): Unable to canonicalize fragment of the URI.\n", 3414 data, uri, flags); 3415 return E_INVALIDARG; 3416 } 3417 3418 /* There's a possibility we didn't use all the space we allocated 3419 * earlier. 3420 */ 3421 if(uri->canon_len < uri->canon_size) { 3422 /* This happens if the URI is hierarchical and dot 3423 * segments were removed from its path. 3424 */ 3425 WCHAR *tmp = heap_realloc(uri->canon_uri, (uri->canon_len+1)*sizeof(WCHAR)); 3426 if(!tmp) 3427 return E_OUTOFMEMORY; 3428 3429 uri->canon_uri = tmp; 3430 uri->canon_size = uri->canon_len; 3431 } 3432 3433 uri->canon_uri[uri->canon_len] = '\0'; 3434 TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri)); 3435 3436 return S_OK; 3437 } 3438 3439 static HRESULT get_builder_component(LPWSTR *component, DWORD *component_len, 3440 LPCWSTR source, DWORD source_len, 3441 LPCWSTR *output, DWORD *output_len) 3442 { 3443 if(!output_len) { 3444 if(output) 3445 *output = NULL; 3446 return E_POINTER; 3447 } 3448 3449 if(!output) { 3450 *output_len = 0; 3451 return E_POINTER; 3452 } 3453 3454 if(!(*component) && source) { 3455 /* Allocate 'component', and copy the contents from 'source' 3456 * into the new allocation. 3457 */ 3458 *component = heap_alloc((source_len+1)*sizeof(WCHAR)); 3459 if(!(*component)) 3460 return E_OUTOFMEMORY; 3461 3462 memcpy(*component, source, source_len*sizeof(WCHAR)); 3463 (*component)[source_len] = '\0'; 3464 *component_len = source_len; 3465 } 3466 3467 *output = *component; 3468 *output_len = *component_len; 3469 return *output ? S_OK : S_FALSE; 3470 } 3471 3472 /* Allocates 'component' and copies the string from 'new_value' into 'component'. 3473 * If 'prefix' is set and 'new_value' isn't NULL, then it checks if 'new_value' 3474 * starts with 'prefix'. If it doesn't then 'prefix' is prepended to 'component'. 3475 * 3476 * If everything is successful, then will set 'success_flag' in 'flags'. 3477 */ 3478 static HRESULT set_builder_component(LPWSTR *component, DWORD *component_len, LPCWSTR new_value, 3479 WCHAR prefix, DWORD *flags, DWORD success_flag) 3480 { 3481 heap_free(*component); 3482 3483 if(!new_value) { 3484 *component = NULL; 3485 *component_len = 0; 3486 } else { 3487 BOOL add_prefix = FALSE; 3488 DWORD len = lstrlenW(new_value); 3489 DWORD pos = 0; 3490 3491 if(prefix && *new_value != prefix) { 3492 add_prefix = TRUE; 3493 *component = heap_alloc((len+2)*sizeof(WCHAR)); 3494 } else 3495 *component = heap_alloc((len+1)*sizeof(WCHAR)); 3496 3497 if(!(*component)) 3498 return E_OUTOFMEMORY; 3499 3500 if(add_prefix) 3501 (*component)[pos++] = prefix; 3502 3503 memcpy(*component+pos, new_value, (len+1)*sizeof(WCHAR)); 3504 *component_len = len+pos; 3505 } 3506 3507 *flags |= success_flag; 3508 return S_OK; 3509 } 3510 3511 static void reset_builder(UriBuilder *builder) { 3512 if(builder->uri) 3513 IUri_Release(&builder->uri->IUri_iface); 3514 builder->uri = NULL; 3515 3516 heap_free(builder->fragment); 3517 builder->fragment = NULL; 3518 builder->fragment_len = 0; 3519 3520 heap_free(builder->host); 3521 builder->host = NULL; 3522 builder->host_len = 0; 3523 3524 heap_free(builder->password); 3525 builder->password = NULL; 3526 builder->password_len = 0; 3527 3528 heap_free(builder->path); 3529 builder->path = NULL; 3530 builder->path_len = 0; 3531 3532 heap_free(builder->query); 3533 builder->query = NULL; 3534 builder->query_len = 0; 3535 3536 heap_free(builder->scheme); 3537 builder->scheme = NULL; 3538 builder->scheme_len = 0; 3539 3540 heap_free(builder->username); 3541 builder->username = NULL; 3542 builder->username_len = 0; 3543 3544 builder->has_port = FALSE; 3545 builder->port = 0; 3546 builder->modified_props = 0; 3547 } 3548 3549 static HRESULT validate_scheme_name(const UriBuilder *builder, parse_data *data, DWORD flags) { 3550 const WCHAR *component; 3551 const WCHAR *ptr; 3552 const WCHAR **pptr; 3553 DWORD expected_len; 3554 3555 if(builder->scheme) { 3556 ptr = builder->scheme; 3557 expected_len = builder->scheme_len; 3558 } else if(builder->uri && builder->uri->scheme_start > -1) { 3559 ptr = builder->uri->canon_uri+builder->uri->scheme_start; 3560 expected_len = builder->uri->scheme_len; 3561 } else { 3562 static const WCHAR nullW[] = {0}; 3563 ptr = nullW; 3564 expected_len = 0; 3565 } 3566 3567 component = ptr; 3568 pptr = &ptr; 3569 if(parse_scheme(pptr, data, flags, ALLOW_NULL_TERM_SCHEME) && 3570 data->scheme_len == expected_len) { 3571 if(data->scheme) 3572 TRACE("(%p %p %x): Found valid scheme component %s len=%d.\n", builder, data, flags, 3573 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len); 3574 } else { 3575 TRACE("(%p %p %x): Invalid scheme component found %s.\n", builder, data, flags, 3576 debugstr_wn(component, expected_len)); 3577 return INET_E_INVALID_URL; 3578 } 3579 3580 return S_OK; 3581 } 3582 3583 static HRESULT validate_username(const UriBuilder *builder, parse_data *data, DWORD flags) { 3584 const WCHAR *ptr; 3585 const WCHAR **pptr; 3586 DWORD expected_len; 3587 3588 if(builder->username) { 3589 ptr = builder->username; 3590 expected_len = builder->username_len; 3591 } else if(!(builder->modified_props & Uri_HAS_USER_NAME) && builder->uri && 3592 builder->uri->userinfo_start > -1 && builder->uri->userinfo_split != 0) { 3593 /* Just use the username from the base Uri. */ 3594 data->username = builder->uri->canon_uri+builder->uri->userinfo_start; 3595 data->username_len = (builder->uri->userinfo_split > -1) ? 3596 builder->uri->userinfo_split : builder->uri->userinfo_len; 3597 ptr = NULL; 3598 } else { 3599 ptr = NULL; 3600 expected_len = 0; 3601 } 3602 3603 if(ptr) { 3604 const WCHAR *component = ptr; 3605 pptr = &ptr; 3606 if(parse_username(pptr, data, flags, ALLOW_NULL_TERM_USER_NAME) && 3607 data->username_len == expected_len) 3608 TRACE("(%p %p %x): Found valid username component %s len=%d.\n", builder, data, flags, 3609 debugstr_wn(data->username, data->username_len), data->username_len); 3610 else { 3611 TRACE("(%p %p %x): Invalid username component found %s.\n", builder, data, flags, 3612 debugstr_wn(component, expected_len)); 3613 return INET_E_INVALID_URL; 3614 } 3615 } 3616 3617 return S_OK; 3618 } 3619 3620 static HRESULT validate_password(const UriBuilder *builder, parse_data *data, DWORD flags) { 3621 const WCHAR *ptr; 3622 const WCHAR **pptr; 3623 DWORD expected_len; 3624 3625 if(builder->password) { 3626 ptr = builder->password; 3627 expected_len = builder->password_len; 3628 } else if(!(builder->modified_props & Uri_HAS_PASSWORD) && builder->uri && 3629 builder->uri->userinfo_split > -1) { 3630 data->password = builder->uri->canon_uri+builder->uri->userinfo_start+builder->uri->userinfo_split+1; 3631 data->password_len = builder->uri->userinfo_len-builder->uri->userinfo_split-1; 3632 ptr = NULL; 3633 } else { 3634 ptr = NULL; 3635 expected_len = 0; 3636 } 3637 3638 if(ptr) { 3639 const WCHAR *component = ptr; 3640 pptr = &ptr; 3641 if(parse_password(pptr, data, flags, ALLOW_NULL_TERM_PASSWORD) && 3642 data->password_len == expected_len) 3643 TRACE("(%p %p %x): Found valid password component %s len=%d.\n", builder, data, flags, 3644 debugstr_wn(data->password, data->password_len), data->password_len); 3645 else { 3646 TRACE("(%p %p %x): Invalid password component found %s.\n", builder, data, flags, 3647 debugstr_wn(component, expected_len)); 3648 return INET_E_INVALID_URL; 3649 } 3650 } 3651 3652 return S_OK; 3653 } 3654 3655 static HRESULT validate_userinfo(const UriBuilder *builder, parse_data *data, DWORD flags) { 3656 HRESULT hr; 3657 3658 hr = validate_username(builder, data, flags); 3659 if(FAILED(hr)) 3660 return hr; 3661 3662 hr = validate_password(builder, data, flags); 3663 if(FAILED(hr)) 3664 return hr; 3665 3666 return S_OK; 3667 } 3668 3669 static HRESULT validate_host(const UriBuilder *builder, parse_data *data, DWORD flags) { 3670 const WCHAR *ptr; 3671 const WCHAR **pptr; 3672 DWORD expected_len; 3673 3674 if(builder->host) { 3675 ptr = builder->host; 3676 expected_len = builder->host_len; 3677 } else if(!(builder->modified_props & Uri_HAS_HOST) && builder->uri && builder->uri->host_start > -1) { 3678 ptr = builder->uri->canon_uri + builder->uri->host_start; 3679 expected_len = builder->uri->host_len; 3680 } else 3681 ptr = NULL; 3682 3683 if(ptr) { 3684 const WCHAR *component = ptr; 3685 DWORD extras = ALLOW_BRACKETLESS_IP_LITERAL|IGNORE_PORT_DELIMITER|SKIP_IP_FUTURE_CHECK; 3686 pptr = &ptr; 3687 3688 if(parse_host(pptr, data, flags, extras) && data->host_len == expected_len) 3689 TRACE("(%p %p %x): Found valid host name %s len=%d type=%d.\n", builder, data, flags, 3690 debugstr_wn(data->host, data->host_len), data->host_len, data->host_type); 3691 else { 3692 TRACE("(%p %p %x): Invalid host name found %s.\n", builder, data, flags, 3693 debugstr_wn(component, expected_len)); 3694 return INET_E_INVALID_URL; 3695 } 3696 } 3697 3698 return S_OK; 3699 } 3700 3701 static void setup_port(const UriBuilder *builder, parse_data *data, DWORD flags) { 3702 if(builder->modified_props & Uri_HAS_PORT) { 3703 if(builder->has_port) { 3704 data->has_port = TRUE; 3705 data->port_value = builder->port; 3706 } 3707 } else if(builder->uri && builder->uri->has_port) { 3708 data->has_port = TRUE; 3709 data->port_value = builder->uri->port; 3710 } 3711 3712 if(data->has_port) 3713 TRACE("(%p %p %x): Using %u as port for IUri.\n", builder, data, flags, data->port_value); 3714 } 3715 3716 static HRESULT validate_path(const UriBuilder *builder, parse_data *data, DWORD flags) { 3717 const WCHAR *ptr = NULL; 3718 const WCHAR *component; 3719 const WCHAR **pptr; 3720 DWORD expected_len; 3721 BOOL check_len = TRUE; 3722 BOOL valid = FALSE; 3723 3724 if(builder->path) { 3725 ptr = builder->path; 3726 expected_len = builder->path_len; 3727 } else if(!(builder->modified_props & Uri_HAS_PATH) && 3728 builder->uri && builder->uri->path_start > -1) { 3729 ptr = builder->uri->canon_uri+builder->uri->path_start; 3730 expected_len = builder->uri->path_len; 3731 } else { 3732 static const WCHAR nullW[] = {0}; 3733 ptr = nullW; 3734 check_len = FALSE; 3735 expected_len = -1; 3736 } 3737 3738 component = ptr; 3739 pptr = &ptr; 3740 3741 /* How the path is validated depends on what type of 3742 * URI it is. 3743 */ 3744 valid = data->is_opaque ? 3745 parse_path_opaque(pptr, data, flags) : parse_path_hierarchical(pptr, data, flags); 3746 3747 if(!valid || (check_len && expected_len != data->path_len)) { 3748 TRACE("(%p %p %x): Invalid path component %s.\n", builder, data, flags, 3749 debugstr_wn(component, expected_len) ); 3750 return INET_E_INVALID_URL; 3751 } 3752 3753 TRACE("(%p %p %x): Valid path component %s len=%d.\n", builder, data, flags, 3754 debugstr_wn(data->path, data->path_len), data->path_len); 3755 3756 return S_OK; 3757 } 3758 3759 static HRESULT validate_query(const UriBuilder *builder, parse_data *data, DWORD flags) { 3760 const WCHAR *ptr = NULL; 3761 const WCHAR **pptr; 3762 DWORD expected_len; 3763 3764 if(builder->query) { 3765 ptr = builder->query; 3766 expected_len = builder->query_len; 3767 } else if(!(builder->modified_props & Uri_HAS_QUERY) && builder->uri && 3768 builder->uri->query_start > -1) { 3769 ptr = builder->uri->canon_uri+builder->uri->query_start; 3770 expected_len = builder->uri->query_len; 3771 } 3772 3773 if(ptr) { 3774 const WCHAR *component = ptr; 3775 pptr = &ptr; 3776 3777 if(parse_query(pptr, data, flags) && expected_len == data->query_len) 3778 TRACE("(%p %p %x): Valid query component %s len=%d.\n", builder, data, flags, 3779 debugstr_wn(data->query, data->query_len), data->query_len); 3780 else { 3781 TRACE("(%p %p %x): Invalid query component %s.\n", builder, data, flags, 3782 debugstr_wn(component, expected_len)); 3783 return INET_E_INVALID_URL; 3784 } 3785 } 3786 3787 return S_OK; 3788 } 3789 3790 static HRESULT validate_fragment(const UriBuilder *builder, parse_data *data, DWORD flags) { 3791 const WCHAR *ptr = NULL; 3792 const WCHAR **pptr; 3793 DWORD expected_len; 3794 3795 if(builder->fragment) { 3796 ptr = builder->fragment; 3797 expected_len = builder->fragment_len; 3798 } else if(!(builder->modified_props & Uri_HAS_FRAGMENT) && builder->uri && 3799 builder->uri->fragment_start > -1) { 3800 ptr = builder->uri->canon_uri+builder->uri->fragment_start; 3801 expected_len = builder->uri->fragment_len; 3802 } 3803 3804 if(ptr) { 3805 const WCHAR *component = ptr; 3806 pptr = &ptr; 3807 3808 if(parse_fragment(pptr, data, flags) && expected_len == data->fragment_len) 3809 TRACE("(%p %p %x): Valid fragment component %s len=%d.\n", builder, data, flags, 3810 debugstr_wn(data->fragment, data->fragment_len), data->fragment_len); 3811 else { 3812 TRACE("(%p %p %x): Invalid fragment component %s.\n", builder, data, flags, 3813 debugstr_wn(component, expected_len)); 3814 return INET_E_INVALID_URL; 3815 } 3816 } 3817 3818 return S_OK; 3819 } 3820 3821 static HRESULT validate_components(const UriBuilder *builder, parse_data *data, DWORD flags) { 3822 HRESULT hr; 3823 3824 memset(data, 0, sizeof(parse_data)); 3825 3826 TRACE("(%p %p %x): Beginning to validate builder components.\n", builder, data, flags); 3827 3828 hr = validate_scheme_name(builder, data, flags); 3829 if(FAILED(hr)) 3830 return hr; 3831 3832 /* Extra validation for file schemes. */ 3833 if(data->scheme_type == URL_SCHEME_FILE) { 3834 if((builder->password || (builder->uri && builder->uri->userinfo_split > -1)) || 3835 (builder->username || (builder->uri && builder->uri->userinfo_start > -1))) { 3836 TRACE("(%p %p %x): File schemes can't contain a username or password.\n", 3837 builder, data, flags); 3838 return INET_E_INVALID_URL; 3839 } 3840 } 3841 3842 hr = validate_userinfo(builder, data, flags); 3843 if(FAILED(hr)) 3844 return hr; 3845 3846 hr = validate_host(builder, data, flags); 3847 if(FAILED(hr)) 3848 return hr; 3849 3850 setup_port(builder, data, flags); 3851 3852 /* The URI is opaque if it doesn't have an authority component. */ 3853 if(!data->is_relative) 3854 data->is_opaque = !data->username && !data->password && !data->host && !data->has_port 3855 && data->scheme_type != URL_SCHEME_FILE; 3856 else 3857 data->is_opaque = !data->host && !data->has_port; 3858 3859 hr = validate_path(builder, data, flags); 3860 if(FAILED(hr)) 3861 return hr; 3862 3863 hr = validate_query(builder, data, flags); 3864 if(FAILED(hr)) 3865 return hr; 3866 3867 hr = validate_fragment(builder, data, flags); 3868 if(FAILED(hr)) 3869 return hr; 3870 3871 TRACE("(%p %p %x): Finished validating builder components.\n", builder, data, flags); 3872 3873 return S_OK; 3874 } 3875 3876 static HRESULT compare_file_paths(const Uri *a, const Uri *b, BOOL *ret) 3877 { 3878 WCHAR *canon_path_a, *canon_path_b; 3879 DWORD len_a, len_b; 3880 3881 if(!a->path_len) { 3882 *ret = !b->path_len; 3883 return S_OK; 3884 } 3885 3886 if(!b->path_len) { 3887 *ret = FALSE; 3888 return S_OK; 3889 } 3890 3891 /* Fast path */ 3892 if(a->path_len == b->path_len && !memicmpW(a->canon_uri+a->path_start, b->canon_uri+b->path_start, a->path_len)) { 3893 *ret = TRUE; 3894 return S_OK; 3895 } 3896 3897 len_a = canonicalize_path_hierarchical(a->canon_uri+a->path_start, a->path_len, a->scheme_type, FALSE, 0, FALSE, NULL); 3898 len_b = canonicalize_path_hierarchical(b->canon_uri+b->path_start, b->path_len, b->scheme_type, FALSE, 0, FALSE, NULL); 3899 3900 canon_path_a = heap_alloc(len_a*sizeof(WCHAR)); 3901 if(!canon_path_a) 3902 return E_OUTOFMEMORY; 3903 canon_path_b = heap_alloc(len_b*sizeof(WCHAR)); 3904 if(!canon_path_b) { 3905 heap_free(canon_path_a); 3906 return E_OUTOFMEMORY; 3907 } 3908 3909 len_a = canonicalize_path_hierarchical(a->canon_uri+a->path_start, a->path_len, a->scheme_type, FALSE, 0, FALSE, canon_path_a); 3910 len_b = canonicalize_path_hierarchical(b->canon_uri+b->path_start, b->path_len, b->scheme_type, FALSE, 0, FALSE, canon_path_b); 3911 3912 *ret = len_a == len_b && !memicmpW(canon_path_a, canon_path_b, len_a); 3913 3914 heap_free(canon_path_a); 3915 heap_free(canon_path_b); 3916 return S_OK; 3917 } 3918 3919 /* Checks if the two Uri's are logically equivalent. It's a simple 3920 * comparison, since they are both of type Uri, and it can access 3921 * the properties of each Uri directly without the need to go 3922 * through the "IUri_Get*" interface calls. 3923 */ 3924 static HRESULT compare_uris(const Uri *a, const Uri *b, BOOL *ret) { 3925 const BOOL known_scheme = a->scheme_type != URL_SCHEME_UNKNOWN; 3926 const BOOL are_hierarchical = a->authority_start > -1 && b->authority_start > -1; 3927 HRESULT hres; 3928 3929 *ret = FALSE; 3930 3931 if(a->scheme_type != b->scheme_type) 3932 return S_OK; 3933 3934 /* Only compare the scheme names (if any) if their unknown scheme types. */ 3935 if(!known_scheme) { 3936 if((a->scheme_start > -1 && b->scheme_start > -1) && 3937 (a->scheme_len == b->scheme_len)) { 3938 /* Make sure the schemes are the same. */ 3939 if(StrCmpNW(a->canon_uri+a->scheme_start, b->canon_uri+b->scheme_start, a->scheme_len)) 3940 return S_OK; 3941 } else if(a->scheme_len != b->scheme_len) 3942 /* One of the Uri's has a scheme name, while the other doesn't. */ 3943 return S_OK; 3944 } 3945 3946 /* If they have a userinfo component, perform case sensitive compare. */ 3947 if((a->userinfo_start > -1 && b->userinfo_start > -1) && 3948 (a->userinfo_len == b->userinfo_len)) { 3949 if(StrCmpNW(a->canon_uri+a->userinfo_start, b->canon_uri+b->userinfo_start, a->userinfo_len)) 3950 return S_OK; 3951 } else if(a->userinfo_len != b->userinfo_len) 3952 /* One of the Uri's had a userinfo, while the other one doesn't. */ 3953 return S_OK; 3954 3955 /* Check if they have a host name. */ 3956 if((a->host_start > -1 && b->host_start > -1) && 3957 (a->host_len == b->host_len)) { 3958 /* Perform a case insensitive compare if they are a known scheme type. */ 3959 if(known_scheme) { 3960 if(StrCmpNIW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len)) 3961 return S_OK; 3962 } else if(StrCmpNW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len)) 3963 return S_OK; 3964 } else if(a->host_len != b->host_len) 3965 /* One of the Uri's had a host, while the other one didn't. */ 3966 return S_OK; 3967 3968 if(a->has_port && b->has_port) { 3969 if(a->port != b->port) 3970 return S_OK; 3971 } else if(a->has_port || b->has_port) 3972 /* One had a port, while the other one didn't. */ 3973 return S_OK; 3974 3975 /* Windows is weird with how it handles paths. For example 3976 * One URI could be "http://google.com" (after canonicalization) 3977 * and one could be "http://google.com/" and the IsEqual function 3978 * would still evaluate to TRUE, but, only if they are both hierarchical 3979 * URIs. 3980 */ 3981 if(a->scheme_type == URL_SCHEME_FILE) { 3982 BOOL cmp; 3983 3984 hres = compare_file_paths(a, b, &cmp); 3985 if(FAILED(hres) || !cmp) 3986 return hres; 3987 } else if((a->path_start > -1 && b->path_start > -1) && 3988 (a->path_len == b->path_len)) { 3989 if(StrCmpNW(a->canon_uri+a->path_start, b->canon_uri+b->path_start, a->path_len)) 3990 return S_OK; 3991 } else if(are_hierarchical && a->path_len == -1 && b->path_len == 0) { 3992 if(*(a->canon_uri+a->path_start) != '/') 3993 return S_OK; 3994 } else if(are_hierarchical && b->path_len == 1 && a->path_len == 0) { 3995 if(*(b->canon_uri+b->path_start) != '/') 3996 return S_OK; 3997 } else if(a->path_len != b->path_len) 3998 return S_OK; 3999 4000 /* Compare the query strings of the two URIs. */ 4001 if((a->query_start > -1 && b->query_start > -1) && 4002 (a->query_len == b->query_len)) { 4003 if(StrCmpNW(a->canon_uri+a->query_start, b->canon_uri+b->query_start, a->query_len)) 4004 return S_OK; 4005 } else if(a->query_len != b->query_len) 4006 return S_OK; 4007 4008 if((a->fragment_start > -1 && b->fragment_start > -1) && 4009 (a->fragment_len == b->fragment_len)) { 4010 if(StrCmpNW(a->canon_uri+a->fragment_start, b->canon_uri+b->fragment_start, a->fragment_len)) 4011 return S_OK; 4012 } else if(a->fragment_len != b->fragment_len) 4013 return S_OK; 4014 4015 /* If we get here, the two URIs are equivalent. */ 4016 *ret = TRUE; 4017 return S_OK; 4018 } 4019 4020 static void convert_to_dos_path(const WCHAR *path, DWORD path_len, 4021 WCHAR *output, DWORD *output_len) 4022 { 4023 const WCHAR *ptr = path; 4024 4025 if(path_len > 3 && *ptr == '/' && is_drive_path(path+1)) 4026 /* Skip over the leading / before the drive path. */ 4027 ++ptr; 4028 4029 for(; ptr < path+path_len; ++ptr) { 4030 if(*ptr == '/') { 4031 if(output) 4032 *output++ = '\\'; 4033 (*output_len)++; 4034 } else { 4035 if(output) 4036 *output++ = *ptr; 4037 (*output_len)++; 4038 } 4039 } 4040 } 4041 4042 /* Generates a raw uri string using the parse_data. */ 4043 static DWORD generate_raw_uri(const parse_data *data, BSTR uri, DWORD flags) { 4044 DWORD length = 0; 4045 4046 if(data->scheme) { 4047 if(uri) { 4048 memcpy(uri, data->scheme, data->scheme_len*sizeof(WCHAR)); 4049 uri[data->scheme_len] = ':'; 4050 } 4051 length += data->scheme_len+1; 4052 } 4053 4054 if(!data->is_opaque) { 4055 /* For the "//" which appears before the authority component. */ 4056 if(uri) { 4057 uri[length] = '/'; 4058 uri[length+1] = '/'; 4059 } 4060 length += 2; 4061 4062 /* Check if we need to add the "\\" before the host name 4063 * of a UNC server name in a DOS path. 4064 */ 4065 if(flags & RAW_URI_CONVERT_TO_DOS_PATH && 4066 data->scheme_type == URL_SCHEME_FILE && data->host) { 4067 if(uri) { 4068 uri[length] = '\\'; 4069 uri[length+1] = '\\'; 4070 } 4071 length += 2; 4072 } 4073 } 4074 4075 if(data->username) { 4076 if(uri) 4077 memcpy(uri+length, data->username, data->username_len*sizeof(WCHAR)); 4078 length += data->username_len; 4079 } 4080 4081 if(data->password) { 4082 if(uri) { 4083 uri[length] = ':'; 4084 memcpy(uri+length+1, data->password, data->password_len*sizeof(WCHAR)); 4085 } 4086 length += data->password_len+1; 4087 } 4088 4089 if(data->password || data->username) { 4090 if(uri) 4091 uri[length] = '@'; 4092 ++length; 4093 } 4094 4095 if(data->host) { 4096 /* IPv6 addresses get the brackets added around them if they don't already 4097 * have them. 4098 */ 4099 const BOOL add_brackets = data->host_type == Uri_HOST_IPV6 && *(data->host) != '['; 4100 if(add_brackets) { 4101 if(uri) 4102 uri[length] = '['; 4103 ++length; 4104 } 4105 4106 if(uri) 4107 memcpy(uri+length, data->host, data->host_len*sizeof(WCHAR)); 4108 length += data->host_len; 4109 4110 if(add_brackets) { 4111 if(uri) 4112 uri[length] = ']'; 4113 length++; 4114 } 4115 } 4116 4117 if(data->has_port) { 4118 /* The port isn't included in the raw uri if it's the default 4119 * port for the scheme type. 4120 */ 4121 DWORD i; 4122 BOOL is_default = FALSE; 4123 4124 for(i = 0; i < ARRAY_SIZE(default_ports); ++i) { 4125 if(data->scheme_type == default_ports[i].scheme && 4126 data->port_value == default_ports[i].port) 4127 is_default = TRUE; 4128 } 4129 4130 if(!is_default || flags & RAW_URI_FORCE_PORT_DISP) { 4131 if(uri) 4132 uri[length] = ':'; 4133 ++length; 4134 4135 if(uri) 4136 length += ui2str(uri+length, data->port_value); 4137 else 4138 length += ui2str(NULL, data->port_value); 4139 } 4140 } 4141 4142 /* Check if a '/' should be added before the path for hierarchical URIs. */ 4143 if(!data->is_opaque && data->path && *(data->path) != '/') { 4144 if(uri) 4145 uri[length] = '/'; 4146 ++length; 4147 } 4148 4149 if(data->path) { 4150 if(!data->is_opaque && data->scheme_type == URL_SCHEME_FILE && 4151 flags & RAW_URI_CONVERT_TO_DOS_PATH) { 4152 DWORD len = 0; 4153 4154 if(uri) 4155 convert_to_dos_path(data->path, data->path_len, uri+length, &len); 4156 else 4157 convert_to_dos_path(data->path, data->path_len, NULL, &len); 4158 4159 length += len; 4160 } else { 4161 if(uri) 4162 memcpy(uri+length, data->path, data->path_len*sizeof(WCHAR)); 4163 length += data->path_len; 4164 } 4165 } 4166 4167 if(data->query) { 4168 if(uri) 4169 memcpy(uri+length, data->query, data->query_len*sizeof(WCHAR)); 4170 length += data->query_len; 4171 } 4172 4173 if(data->fragment) { 4174 if(uri) 4175 memcpy(uri+length, data->fragment, data->fragment_len*sizeof(WCHAR)); 4176 length += data->fragment_len; 4177 } 4178 4179 if(uri) 4180 TRACE("(%p %p): Generated raw uri=%s len=%d\n", data, uri, debugstr_wn(uri, length), length); 4181 else 4182 TRACE("(%p %p): Computed raw uri len=%d\n", data, uri, length); 4183 4184 return length; 4185 } 4186 4187 static HRESULT generate_uri(const UriBuilder *builder, const parse_data *data, Uri *uri, DWORD flags) { 4188 HRESULT hr; 4189 DWORD length = generate_raw_uri(data, NULL, 0); 4190 uri->raw_uri = SysAllocStringLen(NULL, length); 4191 if(!uri->raw_uri) 4192 return E_OUTOFMEMORY; 4193 4194 generate_raw_uri(data, uri->raw_uri, 0); 4195 4196 hr = canonicalize_uri(data, uri, flags); 4197 if(FAILED(hr)) { 4198 if(hr == E_INVALIDARG) 4199 return INET_E_INVALID_URL; 4200 return hr; 4201 } 4202 4203 uri->create_flags = flags; 4204 return S_OK; 4205 } 4206 4207 static inline Uri* impl_from_IUri(IUri *iface) 4208 { 4209 return CONTAINING_RECORD(iface, Uri, IUri_iface); 4210 } 4211 4212 static inline void destroy_uri_obj(Uri *This) 4213 { 4214 SysFreeString(This->raw_uri); 4215 heap_free(This->canon_uri); 4216 heap_free(This); 4217 } 4218 4219 static HRESULT WINAPI Uri_QueryInterface(IUri *iface, REFIID riid, void **ppv) 4220 { 4221 Uri *This = impl_from_IUri(iface); 4222 4223 if(IsEqualGUID(&IID_IUnknown, riid)) { 4224 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv); 4225 *ppv = &This->IUri_iface; 4226 }else if(IsEqualGUID(&IID_IUri, riid)) { 4227 TRACE("(%p)->(IID_IUri %p)\n", This, ppv); 4228 *ppv = &This->IUri_iface; 4229 }else if(IsEqualGUID(&IID_IUriBuilderFactory, riid)) { 4230 TRACE("(%p)->(IID_IUriBuilderFactory %p)\n", This, ppv); 4231 *ppv = &This->IUriBuilderFactory_iface; 4232 }else if(IsEqualGUID(&IID_IPersistStream, riid)) { 4233 TRACE("(%p)->(IID_IPersistStream %p)\n", This, ppv); 4234 *ppv = &This->IPersistStream_iface; 4235 }else if(IsEqualGUID(&IID_IMarshal, riid)) { 4236 TRACE("(%p)->(IID_IMarshal %p)\n", This, ppv); 4237 *ppv = &This->IMarshal_iface; 4238 }else if(IsEqualGUID(&IID_IUriObj, riid)) { 4239 TRACE("(%p)->(IID_IUriObj %p)\n", This, ppv); 4240 *ppv = This; 4241 return S_OK; 4242 }else { 4243 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv); 4244 *ppv = NULL; 4245 return E_NOINTERFACE; 4246 } 4247 4248 IUnknown_AddRef((IUnknown*)*ppv); 4249 return S_OK; 4250 } 4251 4252 static ULONG WINAPI Uri_AddRef(IUri *iface) 4253 { 4254 Uri *This = impl_from_IUri(iface); 4255 LONG ref = InterlockedIncrement(&This->ref); 4256 4257 TRACE("(%p) ref=%d\n", This, ref); 4258 4259 return ref; 4260 } 4261 4262 static ULONG WINAPI Uri_Release(IUri *iface) 4263 { 4264 Uri *This = impl_from_IUri(iface); 4265 LONG ref = InterlockedDecrement(&This->ref); 4266 4267 TRACE("(%p) ref=%d\n", This, ref); 4268 4269 if(!ref) 4270 destroy_uri_obj(This); 4271 4272 return ref; 4273 } 4274 4275 static HRESULT WINAPI Uri_GetPropertyBSTR(IUri *iface, Uri_PROPERTY uriProp, BSTR *pbstrProperty, DWORD dwFlags) 4276 { 4277 Uri *This = impl_from_IUri(iface); 4278 HRESULT hres; 4279 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pbstrProperty, dwFlags); 4280 4281 if(!This->create_flags) 4282 return E_UNEXPECTED; 4283 if(!pbstrProperty) 4284 return E_POINTER; 4285 4286 if(uriProp > Uri_PROPERTY_STRING_LAST) { 4287 /* It only returns S_FALSE for the ZONE property... */ 4288 if(uriProp == Uri_PROPERTY_ZONE) { 4289 *pbstrProperty = SysAllocStringLen(NULL, 0); 4290 if(!(*pbstrProperty)) 4291 return E_OUTOFMEMORY; 4292 return S_FALSE; 4293 } 4294 4295 *pbstrProperty = NULL; 4296 return E_INVALIDARG; 4297 } 4298 4299 /* Don't have support for flags yet. */ 4300 if(dwFlags) { 4301 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags); 4302 return E_NOTIMPL; 4303 } 4304 4305 switch(uriProp) { 4306 case Uri_PROPERTY_ABSOLUTE_URI: 4307 if(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI) { 4308 *pbstrProperty = SysAllocStringLen(NULL, 0); 4309 hres = S_FALSE; 4310 } else { 4311 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) { 4312 if(This->userinfo_len == 0) { 4313 /* Don't include the '@' after the userinfo component. */ 4314 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-1); 4315 hres = S_OK; 4316 if(*pbstrProperty) { 4317 /* Copy everything before it. */ 4318 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 4319 4320 /* And everything after it. */ 4321 memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+1, 4322 (This->canon_len-This->userinfo_start-1)*sizeof(WCHAR)); 4323 } 4324 } else if(This->userinfo_split == 0 && This->userinfo_len == 1) { 4325 /* Don't include the ":@" */ 4326 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-2); 4327 hres = S_OK; 4328 if(*pbstrProperty) { 4329 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 4330 memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+2, 4331 (This->canon_len-This->userinfo_start-2)*sizeof(WCHAR)); 4332 } 4333 } else { 4334 *pbstrProperty = SysAllocString(This->canon_uri); 4335 hres = S_OK; 4336 } 4337 } else { 4338 *pbstrProperty = SysAllocString(This->canon_uri); 4339 hres = S_OK; 4340 } 4341 } 4342 4343 if(!(*pbstrProperty)) 4344 hres = E_OUTOFMEMORY; 4345 4346 break; 4347 case Uri_PROPERTY_AUTHORITY: 4348 if(This->authority_start > -1) { 4349 if(This->port_offset > -1 && is_default_port(This->scheme_type, This->port) && 4350 This->display_modifiers & URI_DISPLAY_NO_DEFAULT_PORT_AUTH) 4351 /* Don't include the port in the authority component. */ 4352 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->port_offset); 4353 else 4354 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->authority_len); 4355 hres = S_OK; 4356 } else { 4357 *pbstrProperty = SysAllocStringLen(NULL, 0); 4358 hres = S_FALSE; 4359 } 4360 4361 if(!(*pbstrProperty)) 4362 hres = E_OUTOFMEMORY; 4363 4364 break; 4365 case Uri_PROPERTY_DISPLAY_URI: 4366 /* The Display URI contains everything except for the userinfo for known 4367 * scheme types. 4368 */ 4369 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) { 4370 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-This->userinfo_len); 4371 4372 if(*pbstrProperty) { 4373 /* Copy everything before the userinfo over. */ 4374 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 4375 /* Copy everything after the userinfo over. */ 4376 memcpy(*pbstrProperty+This->userinfo_start, 4377 This->canon_uri+This->userinfo_start+This->userinfo_len+1, 4378 (This->canon_len-(This->userinfo_start+This->userinfo_len+1))*sizeof(WCHAR)); 4379 } 4380 } else 4381 *pbstrProperty = SysAllocString(This->canon_uri); 4382 4383 if(!(*pbstrProperty)) 4384 hres = E_OUTOFMEMORY; 4385 else 4386 hres = S_OK; 4387 4388 break; 4389 case Uri_PROPERTY_DOMAIN: 4390 if(This->domain_offset > -1) { 4391 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+This->domain_offset, 4392 This->host_len-This->domain_offset); 4393 hres = S_OK; 4394 } else { 4395 *pbstrProperty = SysAllocStringLen(NULL, 0); 4396 hres = S_FALSE; 4397 } 4398 4399 if(!(*pbstrProperty)) 4400 hres = E_OUTOFMEMORY; 4401 4402 break; 4403 case Uri_PROPERTY_EXTENSION: 4404 if(This->extension_offset > -1) { 4405 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start+This->extension_offset, 4406 This->path_len-This->extension_offset); 4407 hres = S_OK; 4408 } else { 4409 *pbstrProperty = SysAllocStringLen(NULL, 0); 4410 hres = S_FALSE; 4411 } 4412 4413 if(!(*pbstrProperty)) 4414 hres = E_OUTOFMEMORY; 4415 4416 break; 4417 case Uri_PROPERTY_FRAGMENT: 4418 if(This->fragment_start > -1) { 4419 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->fragment_start, This->fragment_len); 4420 hres = S_OK; 4421 } else { 4422 *pbstrProperty = SysAllocStringLen(NULL, 0); 4423 hres = S_FALSE; 4424 } 4425 4426 if(!(*pbstrProperty)) 4427 hres = E_OUTOFMEMORY; 4428 4429 break; 4430 case Uri_PROPERTY_HOST: 4431 if(This->host_start > -1) { 4432 /* The '[' and ']' aren't included for IPv6 addresses. */ 4433 if(This->host_type == Uri_HOST_IPV6) 4434 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+1, This->host_len-2); 4435 else 4436 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start, This->host_len); 4437 4438 hres = S_OK; 4439 } else { 4440 *pbstrProperty = SysAllocStringLen(NULL, 0); 4441 hres = S_FALSE; 4442 } 4443 4444 if(!(*pbstrProperty)) 4445 hres = E_OUTOFMEMORY; 4446 4447 break; 4448 case Uri_PROPERTY_PASSWORD: 4449 if(This->userinfo_split > -1) { 4450 *pbstrProperty = SysAllocStringLen( 4451 This->canon_uri+This->userinfo_start+This->userinfo_split+1, 4452 This->userinfo_len-This->userinfo_split-1); 4453 hres = S_OK; 4454 } else { 4455 *pbstrProperty = SysAllocStringLen(NULL, 0); 4456 hres = S_FALSE; 4457 } 4458 4459 if(!(*pbstrProperty)) 4460 return E_OUTOFMEMORY; 4461 4462 break; 4463 case Uri_PROPERTY_PATH: 4464 if(This->path_start > -1) { 4465 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len); 4466 hres = S_OK; 4467 } else { 4468 *pbstrProperty = SysAllocStringLen(NULL, 0); 4469 hres = S_FALSE; 4470 } 4471 4472 if(!(*pbstrProperty)) 4473 hres = E_OUTOFMEMORY; 4474 4475 break; 4476 case Uri_PROPERTY_PATH_AND_QUERY: 4477 if(This->path_start > -1) { 4478 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len+This->query_len); 4479 hres = S_OK; 4480 } else if(This->query_start > -1) { 4481 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len); 4482 hres = S_OK; 4483 } else { 4484 *pbstrProperty = SysAllocStringLen(NULL, 0); 4485 hres = S_FALSE; 4486 } 4487 4488 if(!(*pbstrProperty)) 4489 hres = E_OUTOFMEMORY; 4490 4491 break; 4492 case Uri_PROPERTY_QUERY: 4493 if(This->query_start > -1) { 4494 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len); 4495 hres = S_OK; 4496 } else { 4497 *pbstrProperty = SysAllocStringLen(NULL, 0); 4498 hres = S_FALSE; 4499 } 4500 4501 if(!(*pbstrProperty)) 4502 hres = E_OUTOFMEMORY; 4503 4504 break; 4505 case Uri_PROPERTY_RAW_URI: 4506 *pbstrProperty = SysAllocString(This->raw_uri); 4507 if(!(*pbstrProperty)) 4508 hres = E_OUTOFMEMORY; 4509 else 4510 hres = S_OK; 4511 break; 4512 case Uri_PROPERTY_SCHEME_NAME: 4513 if(This->scheme_start > -1) { 4514 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->scheme_start, This->scheme_len); 4515 hres = S_OK; 4516 } else { 4517 *pbstrProperty = SysAllocStringLen(NULL, 0); 4518 hres = S_FALSE; 4519 } 4520 4521 if(!(*pbstrProperty)) 4522 hres = E_OUTOFMEMORY; 4523 4524 break; 4525 case Uri_PROPERTY_USER_INFO: 4526 if(This->userinfo_start > -1) { 4527 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->userinfo_start, This->userinfo_len); 4528 hres = S_OK; 4529 } else { 4530 *pbstrProperty = SysAllocStringLen(NULL, 0); 4531 hres = S_FALSE; 4532 } 4533 4534 if(!(*pbstrProperty)) 4535 hres = E_OUTOFMEMORY; 4536 4537 break; 4538 case Uri_PROPERTY_USER_NAME: 4539 if(This->userinfo_start > -1 && This->userinfo_split != 0) { 4540 /* If userinfo_split is set, that means a password exists 4541 * so the username is only from userinfo_start to userinfo_split. 4542 */ 4543 if(This->userinfo_split > -1) { 4544 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_split); 4545 hres = S_OK; 4546 } else { 4547 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_len); 4548 hres = S_OK; 4549 } 4550 } else { 4551 *pbstrProperty = SysAllocStringLen(NULL, 0); 4552 hres = S_FALSE; 4553 } 4554 4555 if(!(*pbstrProperty)) 4556 return E_OUTOFMEMORY; 4557 4558 break; 4559 default: 4560 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags); 4561 hres = E_NOTIMPL; 4562 } 4563 4564 return hres; 4565 } 4566 4567 static HRESULT WINAPI Uri_GetPropertyLength(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags) 4568 { 4569 Uri *This = impl_from_IUri(iface); 4570 HRESULT hres; 4571 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pcchProperty, dwFlags); 4572 4573 if(!This->create_flags) 4574 return E_UNEXPECTED; 4575 if(!pcchProperty) 4576 return E_INVALIDARG; 4577 4578 /* Can only return a length for a property if it's a string. */ 4579 if(uriProp > Uri_PROPERTY_STRING_LAST) 4580 return E_INVALIDARG; 4581 4582 /* Don't have support for flags yet. */ 4583 if(dwFlags) { 4584 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 4585 return E_NOTIMPL; 4586 } 4587 4588 switch(uriProp) { 4589 case Uri_PROPERTY_ABSOLUTE_URI: 4590 if(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI) { 4591 *pcchProperty = 0; 4592 hres = S_FALSE; 4593 } else { 4594 if(This->scheme_type != URL_SCHEME_UNKNOWN) { 4595 if(This->userinfo_start > -1 && This->userinfo_len == 0) 4596 /* Don't include the '@' in the length. */ 4597 *pcchProperty = This->canon_len-1; 4598 else if(This->userinfo_start > -1 && This->userinfo_len == 1 && 4599 This->userinfo_split == 0) 4600 /* Don't include the ":@" in the length. */ 4601 *pcchProperty = This->canon_len-2; 4602 else 4603 *pcchProperty = This->canon_len; 4604 } else 4605 *pcchProperty = This->canon_len; 4606 4607 hres = S_OK; 4608 } 4609 4610 break; 4611 case Uri_PROPERTY_AUTHORITY: 4612 if(This->port_offset > -1 && 4613 This->display_modifiers & URI_DISPLAY_NO_DEFAULT_PORT_AUTH && 4614 is_default_port(This->scheme_type, This->port)) 4615 /* Only count up until the port in the authority. */ 4616 *pcchProperty = This->port_offset; 4617 else 4618 *pcchProperty = This->authority_len; 4619 hres = (This->authority_start > -1) ? S_OK : S_FALSE; 4620 break; 4621 case Uri_PROPERTY_DISPLAY_URI: 4622 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) 4623 *pcchProperty = This->canon_len-This->userinfo_len-1; 4624 else 4625 *pcchProperty = This->canon_len; 4626 4627 hres = S_OK; 4628 break; 4629 case Uri_PROPERTY_DOMAIN: 4630 if(This->domain_offset > -1) 4631 *pcchProperty = This->host_len - This->domain_offset; 4632 else 4633 *pcchProperty = 0; 4634 4635 hres = (This->domain_offset > -1) ? S_OK : S_FALSE; 4636 break; 4637 case Uri_PROPERTY_EXTENSION: 4638 if(This->extension_offset > -1) { 4639 *pcchProperty = This->path_len - This->extension_offset; 4640 hres = S_OK; 4641 } else { 4642 *pcchProperty = 0; 4643 hres = S_FALSE; 4644 } 4645 4646 break; 4647 case Uri_PROPERTY_FRAGMENT: 4648 *pcchProperty = This->fragment_len; 4649 hres = (This->fragment_start > -1) ? S_OK : S_FALSE; 4650 break; 4651 case Uri_PROPERTY_HOST: 4652 *pcchProperty = This->host_len; 4653 4654 /* '[' and ']' aren't included in the length. */ 4655 if(This->host_type == Uri_HOST_IPV6) 4656 *pcchProperty -= 2; 4657 4658 hres = (This->host_start > -1) ? S_OK : S_FALSE; 4659 break; 4660 case Uri_PROPERTY_PASSWORD: 4661 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_len-This->userinfo_split-1 : 0; 4662 hres = (This->userinfo_split > -1) ? S_OK : S_FALSE; 4663 break; 4664 case Uri_PROPERTY_PATH: 4665 *pcchProperty = This->path_len; 4666 hres = (This->path_start > -1) ? S_OK : S_FALSE; 4667 break; 4668 case Uri_PROPERTY_PATH_AND_QUERY: 4669 *pcchProperty = This->path_len+This->query_len; 4670 hres = (This->path_start > -1 || This->query_start > -1) ? S_OK : S_FALSE; 4671 break; 4672 case Uri_PROPERTY_QUERY: 4673 *pcchProperty = This->query_len; 4674 hres = (This->query_start > -1) ? S_OK : S_FALSE; 4675 break; 4676 case Uri_PROPERTY_RAW_URI: 4677 *pcchProperty = SysStringLen(This->raw_uri); 4678 hres = S_OK; 4679 break; 4680 case Uri_PROPERTY_SCHEME_NAME: 4681 *pcchProperty = This->scheme_len; 4682 hres = (This->scheme_start > -1) ? S_OK : S_FALSE; 4683 break; 4684 case Uri_PROPERTY_USER_INFO: 4685 *pcchProperty = This->userinfo_len; 4686 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE; 4687 break; 4688 case Uri_PROPERTY_USER_NAME: 4689 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_split : This->userinfo_len; 4690 if(This->userinfo_split == 0) 4691 hres = S_FALSE; 4692 else 4693 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE; 4694 break; 4695 default: 4696 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 4697 hres = E_NOTIMPL; 4698 } 4699 4700 return hres; 4701 } 4702 4703 static HRESULT WINAPI Uri_GetPropertyDWORD(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags) 4704 { 4705 Uri *This = impl_from_IUri(iface); 4706 HRESULT hres; 4707 4708 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pcchProperty, dwFlags); 4709 4710 if(!This->create_flags) 4711 return E_UNEXPECTED; 4712 if(!pcchProperty) 4713 return E_INVALIDARG; 4714 4715 /* Microsoft's implementation for the ZONE property of a URI seems to be lacking... 4716 * From what I can tell, instead of checking which URLZONE the URI belongs to it 4717 * simply assigns URLZONE_INVALID and returns E_NOTIMPL. This also applies to the GetZone 4718 * function. 4719 */ 4720 if(uriProp == Uri_PROPERTY_ZONE) { 4721 *pcchProperty = URLZONE_INVALID; 4722 return E_NOTIMPL; 4723 } 4724 4725 if(uriProp < Uri_PROPERTY_DWORD_START) { 4726 *pcchProperty = 0; 4727 return E_INVALIDARG; 4728 } 4729 4730 switch(uriProp) { 4731 case Uri_PROPERTY_HOST_TYPE: 4732 *pcchProperty = This->host_type; 4733 hres = S_OK; 4734 break; 4735 case Uri_PROPERTY_PORT: 4736 if(!This->has_port) { 4737 *pcchProperty = 0; 4738 hres = S_FALSE; 4739 } else { 4740 *pcchProperty = This->port; 4741 hres = S_OK; 4742 } 4743 4744 break; 4745 case Uri_PROPERTY_SCHEME: 4746 *pcchProperty = This->scheme_type; 4747 hres = S_OK; 4748 break; 4749 default: 4750 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 4751 hres = E_NOTIMPL; 4752 } 4753 4754 return hres; 4755 } 4756 4757 static HRESULT WINAPI Uri_HasProperty(IUri *iface, Uri_PROPERTY uriProp, BOOL *pfHasProperty) 4758 { 4759 Uri *This = impl_from_IUri(iface); 4760 4761 TRACE("(%p %s)->(%d %p)\n", This, debugstr_w(This->canon_uri), uriProp, pfHasProperty); 4762 4763 if(!pfHasProperty) 4764 return E_INVALIDARG; 4765 4766 switch(uriProp) { 4767 case Uri_PROPERTY_ABSOLUTE_URI: 4768 *pfHasProperty = !(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI); 4769 break; 4770 case Uri_PROPERTY_AUTHORITY: 4771 *pfHasProperty = This->authority_start > -1; 4772 break; 4773 case Uri_PROPERTY_DISPLAY_URI: 4774 *pfHasProperty = TRUE; 4775 break; 4776 case Uri_PROPERTY_DOMAIN: 4777 *pfHasProperty = This->domain_offset > -1; 4778 break; 4779 case Uri_PROPERTY_EXTENSION: 4780 *pfHasProperty = This->extension_offset > -1; 4781 break; 4782 case Uri_PROPERTY_FRAGMENT: 4783 *pfHasProperty = This->fragment_start > -1; 4784 break; 4785 case Uri_PROPERTY_HOST: 4786 *pfHasProperty = This->host_start > -1; 4787 break; 4788 case Uri_PROPERTY_PASSWORD: 4789 *pfHasProperty = This->userinfo_split > -1; 4790 break; 4791 case Uri_PROPERTY_PATH: 4792 *pfHasProperty = This->path_start > -1; 4793 break; 4794 case Uri_PROPERTY_PATH_AND_QUERY: 4795 *pfHasProperty = (This->path_start > -1 || This->query_start > -1); 4796 break; 4797 case Uri_PROPERTY_QUERY: 4798 *pfHasProperty = This->query_start > -1; 4799 break; 4800 case Uri_PROPERTY_RAW_URI: 4801 *pfHasProperty = TRUE; 4802 break; 4803 case Uri_PROPERTY_SCHEME_NAME: 4804 *pfHasProperty = This->scheme_start > -1; 4805 break; 4806 case Uri_PROPERTY_USER_INFO: 4807 *pfHasProperty = This->userinfo_start > -1; 4808 break; 4809 case Uri_PROPERTY_USER_NAME: 4810 if(This->userinfo_split == 0) 4811 *pfHasProperty = FALSE; 4812 else 4813 *pfHasProperty = This->userinfo_start > -1; 4814 break; 4815 case Uri_PROPERTY_HOST_TYPE: 4816 *pfHasProperty = TRUE; 4817 break; 4818 case Uri_PROPERTY_PORT: 4819 *pfHasProperty = This->has_port; 4820 break; 4821 case Uri_PROPERTY_SCHEME: 4822 *pfHasProperty = TRUE; 4823 break; 4824 case Uri_PROPERTY_ZONE: 4825 *pfHasProperty = FALSE; 4826 break; 4827 default: 4828 FIXME("(%p)->(%d %p): Unsupported property type.\n", This, uriProp, pfHasProperty); 4829 return E_NOTIMPL; 4830 } 4831 4832 return S_OK; 4833 } 4834 4835 static HRESULT WINAPI Uri_GetAbsoluteUri(IUri *iface, BSTR *pstrAbsoluteUri) 4836 { 4837 TRACE("(%p)->(%p)\n", iface, pstrAbsoluteUri); 4838 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_ABSOLUTE_URI, pstrAbsoluteUri, 0); 4839 } 4840 4841 static HRESULT WINAPI Uri_GetAuthority(IUri *iface, BSTR *pstrAuthority) 4842 { 4843 TRACE("(%p)->(%p)\n", iface, pstrAuthority); 4844 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_AUTHORITY, pstrAuthority, 0); 4845 } 4846 4847 static HRESULT WINAPI Uri_GetDisplayUri(IUri *iface, BSTR *pstrDisplayUri) 4848 { 4849 TRACE("(%p)->(%p)\n", iface, pstrDisplayUri); 4850 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_DISPLAY_URI, pstrDisplayUri, 0); 4851 } 4852 4853 static HRESULT WINAPI Uri_GetDomain(IUri *iface, BSTR *pstrDomain) 4854 { 4855 TRACE("(%p)->(%p)\n", iface, pstrDomain); 4856 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_DOMAIN, pstrDomain, 0); 4857 } 4858 4859 static HRESULT WINAPI Uri_GetExtension(IUri *iface, BSTR *pstrExtension) 4860 { 4861 TRACE("(%p)->(%p)\n", iface, pstrExtension); 4862 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_EXTENSION, pstrExtension, 0); 4863 } 4864 4865 static HRESULT WINAPI Uri_GetFragment(IUri *iface, BSTR *pstrFragment) 4866 { 4867 TRACE("(%p)->(%p)\n", iface, pstrFragment); 4868 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_FRAGMENT, pstrFragment, 0); 4869 } 4870 4871 static HRESULT WINAPI Uri_GetHost(IUri *iface, BSTR *pstrHost) 4872 { 4873 TRACE("(%p)->(%p)\n", iface, pstrHost); 4874 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_HOST, pstrHost, 0); 4875 } 4876 4877 static HRESULT WINAPI Uri_GetPassword(IUri *iface, BSTR *pstrPassword) 4878 { 4879 TRACE("(%p)->(%p)\n", iface, pstrPassword); 4880 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PASSWORD, pstrPassword, 0); 4881 } 4882 4883 static HRESULT WINAPI Uri_GetPath(IUri *iface, BSTR *pstrPath) 4884 { 4885 TRACE("(%p)->(%p)\n", iface, pstrPath); 4886 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH, pstrPath, 0); 4887 } 4888 4889 static HRESULT WINAPI Uri_GetPathAndQuery(IUri *iface, BSTR *pstrPathAndQuery) 4890 { 4891 TRACE("(%p)->(%p)\n", iface, pstrPathAndQuery); 4892 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH_AND_QUERY, pstrPathAndQuery, 0); 4893 } 4894 4895 static HRESULT WINAPI Uri_GetQuery(IUri *iface, BSTR *pstrQuery) 4896 { 4897 TRACE("(%p)->(%p)\n", iface, pstrQuery); 4898 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_QUERY, pstrQuery, 0); 4899 } 4900 4901 static HRESULT WINAPI Uri_GetRawUri(IUri *iface, BSTR *pstrRawUri) 4902 { 4903 TRACE("(%p)->(%p)\n", iface, pstrRawUri); 4904 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_RAW_URI, pstrRawUri, 0); 4905 } 4906 4907 static HRESULT WINAPI Uri_GetSchemeName(IUri *iface, BSTR *pstrSchemeName) 4908 { 4909 TRACE("(%p)->(%p)\n", iface, pstrSchemeName); 4910 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_SCHEME_NAME, pstrSchemeName, 0); 4911 } 4912 4913 static HRESULT WINAPI Uri_GetUserInfo(IUri *iface, BSTR *pstrUserInfo) 4914 { 4915 TRACE("(%p)->(%p)\n", iface, pstrUserInfo); 4916 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_INFO, pstrUserInfo, 0); 4917 } 4918 4919 static HRESULT WINAPI Uri_GetUserName(IUri *iface, BSTR *pstrUserName) 4920 { 4921 TRACE("(%p)->(%p)\n", iface, pstrUserName); 4922 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_NAME, pstrUserName, 0); 4923 } 4924 4925 static HRESULT WINAPI Uri_GetHostType(IUri *iface, DWORD *pdwHostType) 4926 { 4927 TRACE("(%p)->(%p)\n", iface, pdwHostType); 4928 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_HOST_TYPE, pdwHostType, 0); 4929 } 4930 4931 static HRESULT WINAPI Uri_GetPort(IUri *iface, DWORD *pdwPort) 4932 { 4933 TRACE("(%p)->(%p)\n", iface, pdwPort); 4934 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_PORT, pdwPort, 0); 4935 } 4936 4937 static HRESULT WINAPI Uri_GetScheme(IUri *iface, DWORD *pdwScheme) 4938 { 4939 TRACE("(%p)->(%p)\n", iface, pdwScheme); 4940 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_SCHEME, pdwScheme, 0); 4941 } 4942 4943 static HRESULT WINAPI Uri_GetZone(IUri *iface, DWORD *pdwZone) 4944 { 4945 TRACE("(%p)->(%p)\n", iface, pdwZone); 4946 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_ZONE,pdwZone, 0); 4947 } 4948 4949 static HRESULT WINAPI Uri_GetProperties(IUri *iface, DWORD *pdwProperties) 4950 { 4951 Uri *This = impl_from_IUri(iface); 4952 TRACE("(%p %s)->(%p)\n", This, debugstr_w(This->canon_uri), pdwProperties); 4953 4954 if(!This->create_flags) 4955 return E_UNEXPECTED; 4956 if(!pdwProperties) 4957 return E_INVALIDARG; 4958 4959 /* All URIs have these. */ 4960 *pdwProperties = Uri_HAS_DISPLAY_URI|Uri_HAS_RAW_URI|Uri_HAS_SCHEME|Uri_HAS_HOST_TYPE; 4961 4962 if(!(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI)) 4963 *pdwProperties |= Uri_HAS_ABSOLUTE_URI; 4964 4965 if(This->scheme_start > -1) 4966 *pdwProperties |= Uri_HAS_SCHEME_NAME; 4967 4968 if(This->authority_start > -1) { 4969 *pdwProperties |= Uri_HAS_AUTHORITY; 4970 if(This->userinfo_start > -1) { 4971 *pdwProperties |= Uri_HAS_USER_INFO; 4972 if(This->userinfo_split != 0) 4973 *pdwProperties |= Uri_HAS_USER_NAME; 4974 } 4975 if(This->userinfo_split > -1) 4976 *pdwProperties |= Uri_HAS_PASSWORD; 4977 if(This->host_start > -1) 4978 *pdwProperties |= Uri_HAS_HOST; 4979 if(This->domain_offset > -1) 4980 *pdwProperties |= Uri_HAS_DOMAIN; 4981 } 4982 4983 if(This->has_port) 4984 *pdwProperties |= Uri_HAS_PORT; 4985 if(This->path_start > -1) 4986 *pdwProperties |= Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY; 4987 if(This->query_start > -1) 4988 *pdwProperties |= Uri_HAS_QUERY|Uri_HAS_PATH_AND_QUERY; 4989 4990 if(This->extension_offset > -1) 4991 *pdwProperties |= Uri_HAS_EXTENSION; 4992 4993 if(This->fragment_start > -1) 4994 *pdwProperties |= Uri_HAS_FRAGMENT; 4995 4996 return S_OK; 4997 } 4998 4999 static HRESULT WINAPI Uri_IsEqual(IUri *iface, IUri *pUri, BOOL *pfEqual) 5000 { 5001 Uri *This = impl_from_IUri(iface); 5002 Uri *other; 5003 5004 TRACE("(%p %s)->(%p %p)\n", This, debugstr_w(This->canon_uri), pUri, pfEqual); 5005 5006 if(!This->create_flags) 5007 return E_UNEXPECTED; 5008 if(!pfEqual) 5009 return E_POINTER; 5010 5011 if(!pUri) { 5012 *pfEqual = FALSE; 5013 5014 /* For some reason Windows returns S_OK here... */ 5015 return S_OK; 5016 } 5017 5018 /* Try to convert it to a Uri (allows for a more simple comparison). */ 5019 if(!(other = get_uri_obj(pUri))) { 5020 FIXME("(%p)->(%p %p) No support for unknown IUri's yet.\n", iface, pUri, pfEqual); 5021 return E_NOTIMPL; 5022 } 5023 5024 TRACE("comparing to %s\n", debugstr_w(other->canon_uri)); 5025 return compare_uris(This, other, pfEqual); 5026 } 5027 5028 static const IUriVtbl UriVtbl = { 5029 Uri_QueryInterface, 5030 Uri_AddRef, 5031 Uri_Release, 5032 Uri_GetPropertyBSTR, 5033 Uri_GetPropertyLength, 5034 Uri_GetPropertyDWORD, 5035 Uri_HasProperty, 5036 Uri_GetAbsoluteUri, 5037 Uri_GetAuthority, 5038 Uri_GetDisplayUri, 5039 Uri_GetDomain, 5040 Uri_GetExtension, 5041 Uri_GetFragment, 5042 Uri_GetHost, 5043 Uri_GetPassword, 5044 Uri_GetPath, 5045 Uri_GetPathAndQuery, 5046 Uri_GetQuery, 5047 Uri_GetRawUri, 5048 Uri_GetSchemeName, 5049 Uri_GetUserInfo, 5050 Uri_GetUserName, 5051 Uri_GetHostType, 5052 Uri_GetPort, 5053 Uri_GetScheme, 5054 Uri_GetZone, 5055 Uri_GetProperties, 5056 Uri_IsEqual 5057 }; 5058 5059 static inline Uri* impl_from_IUriBuilderFactory(IUriBuilderFactory *iface) 5060 { 5061 return CONTAINING_RECORD(iface, Uri, IUriBuilderFactory_iface); 5062 } 5063 5064 static HRESULT WINAPI UriBuilderFactory_QueryInterface(IUriBuilderFactory *iface, REFIID riid, void **ppv) 5065 { 5066 Uri *This = impl_from_IUriBuilderFactory(iface); 5067 return IUri_QueryInterface(&This->IUri_iface, riid, ppv); 5068 } 5069 5070 static ULONG WINAPI UriBuilderFactory_AddRef(IUriBuilderFactory *iface) 5071 { 5072 Uri *This = impl_from_IUriBuilderFactory(iface); 5073 return IUri_AddRef(&This->IUri_iface); 5074 } 5075 5076 static ULONG WINAPI UriBuilderFactory_Release(IUriBuilderFactory *iface) 5077 { 5078 Uri *This = impl_from_IUriBuilderFactory(iface); 5079 return IUri_Release(&This->IUri_iface); 5080 } 5081 5082 static HRESULT WINAPI UriBuilderFactory_CreateIUriBuilder(IUriBuilderFactory *iface, 5083 DWORD dwFlags, 5084 DWORD_PTR dwReserved, 5085 IUriBuilder **ppIUriBuilder) 5086 { 5087 Uri *This = impl_from_IUriBuilderFactory(iface); 5088 TRACE("(%p)->(%08x %08x %p)\n", This, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 5089 5090 if(!ppIUriBuilder) 5091 return E_POINTER; 5092 5093 if(dwFlags || dwReserved) { 5094 *ppIUriBuilder = NULL; 5095 return E_INVALIDARG; 5096 } 5097 5098 return CreateIUriBuilder(NULL, 0, 0, ppIUriBuilder); 5099 } 5100 5101 static HRESULT WINAPI UriBuilderFactory_CreateInitializedIUriBuilder(IUriBuilderFactory *iface, 5102 DWORD dwFlags, 5103 DWORD_PTR dwReserved, 5104 IUriBuilder **ppIUriBuilder) 5105 { 5106 Uri *This = impl_from_IUriBuilderFactory(iface); 5107 TRACE("(%p)->(%08x %08x %p)\n", This, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 5108 5109 if(!ppIUriBuilder) 5110 return E_POINTER; 5111 5112 if(dwFlags || dwReserved) { 5113 *ppIUriBuilder = NULL; 5114 return E_INVALIDARG; 5115 } 5116 5117 return CreateIUriBuilder(&This->IUri_iface, 0, 0, ppIUriBuilder); 5118 } 5119 5120 static const IUriBuilderFactoryVtbl UriBuilderFactoryVtbl = { 5121 UriBuilderFactory_QueryInterface, 5122 UriBuilderFactory_AddRef, 5123 UriBuilderFactory_Release, 5124 UriBuilderFactory_CreateIUriBuilder, 5125 UriBuilderFactory_CreateInitializedIUriBuilder 5126 }; 5127 5128 static inline Uri* impl_from_IPersistStream(IPersistStream *iface) 5129 { 5130 return CONTAINING_RECORD(iface, Uri, IPersistStream_iface); 5131 } 5132 5133 static HRESULT WINAPI PersistStream_QueryInterface(IPersistStream *iface, REFIID riid, void **ppvObject) 5134 { 5135 Uri *This = impl_from_IPersistStream(iface); 5136 return IUri_QueryInterface(&This->IUri_iface, riid, ppvObject); 5137 } 5138 5139 static ULONG WINAPI PersistStream_AddRef(IPersistStream *iface) 5140 { 5141 Uri *This = impl_from_IPersistStream(iface); 5142 return IUri_AddRef(&This->IUri_iface); 5143 } 5144 5145 static ULONG WINAPI PersistStream_Release(IPersistStream *iface) 5146 { 5147 Uri *This = impl_from_IPersistStream(iface); 5148 return IUri_Release(&This->IUri_iface); 5149 } 5150 5151 static HRESULT WINAPI PersistStream_GetClassID(IPersistStream *iface, CLSID *pClassID) 5152 { 5153 Uri *This = impl_from_IPersistStream(iface); 5154 TRACE("(%p)->(%p)\n", This, pClassID); 5155 5156 if(!pClassID) 5157 return E_INVALIDARG; 5158 5159 *pClassID = CLSID_CUri; 5160 return S_OK; 5161 } 5162 5163 static HRESULT WINAPI PersistStream_IsDirty(IPersistStream *iface) 5164 { 5165 Uri *This = impl_from_IPersistStream(iface); 5166 TRACE("(%p)\n", This); 5167 return S_FALSE; 5168 } 5169 5170 struct persist_uri { 5171 DWORD size; 5172 DWORD unk1[2]; 5173 DWORD create_flags; 5174 DWORD unk2[3]; 5175 DWORD fields_no; 5176 BYTE data[1]; 5177 }; 5178 5179 static HRESULT WINAPI PersistStream_Load(IPersistStream *iface, IStream *pStm) 5180 { 5181 Uri *This = impl_from_IPersistStream(iface); 5182 struct persist_uri *data; 5183 parse_data parse; 5184 DWORD size; 5185 HRESULT hr; 5186 5187 TRACE("(%p)->(%p)\n", This, pStm); 5188 5189 if(This->create_flags) 5190 return E_UNEXPECTED; 5191 if(!pStm) 5192 return E_INVALIDARG; 5193 5194 hr = IStream_Read(pStm, &size, sizeof(DWORD), NULL); 5195 if(FAILED(hr)) 5196 return hr; 5197 data = heap_alloc(size); 5198 if(!data) 5199 return E_OUTOFMEMORY; 5200 hr = IStream_Read(pStm, data->unk1, size-sizeof(DWORD)-2, NULL); 5201 if(FAILED(hr)) { 5202 heap_free(data); 5203 return hr; 5204 } 5205 5206 if(size < sizeof(struct persist_uri)) { 5207 heap_free(data); 5208 return S_OK; 5209 } 5210 5211 if(*(DWORD*)data->data != Uri_PROPERTY_RAW_URI) { 5212 heap_free(data); 5213 ERR("Can't find raw_uri\n"); 5214 return E_UNEXPECTED; 5215 } 5216 5217 This->raw_uri = SysAllocString((WCHAR*)(data->data+sizeof(DWORD)*2)); 5218 if(!This->raw_uri) { 5219 heap_free(data); 5220 return E_OUTOFMEMORY; 5221 } 5222 This->create_flags = data->create_flags; 5223 heap_free(data); 5224 TRACE("%x %s\n", This->create_flags, debugstr_w(This->raw_uri)); 5225 5226 memset(&parse, 0, sizeof(parse_data)); 5227 parse.uri = This->raw_uri; 5228 if(!parse_uri(&parse, This->create_flags)) { 5229 SysFreeString(This->raw_uri); 5230 This->create_flags = 0; 5231 return E_UNEXPECTED; 5232 } 5233 5234 hr = canonicalize_uri(&parse, This, This->create_flags); 5235 if(FAILED(hr)) { 5236 SysFreeString(This->raw_uri); 5237 This->create_flags = 0; 5238 return hr; 5239 } 5240 5241 return S_OK; 5242 } 5243 5244 static inline BYTE* persist_stream_add_strprop(Uri *This, BYTE *p, DWORD type, DWORD len, WCHAR *data) 5245 { 5246 len *= sizeof(WCHAR); 5247 *(DWORD*)p = type; 5248 p += sizeof(DWORD); 5249 *(DWORD*)p = len+sizeof(WCHAR); 5250 p += sizeof(DWORD); 5251 memcpy(p, data, len); 5252 p += len; 5253 *(WCHAR*)p = 0; 5254 return p+sizeof(WCHAR); 5255 } 5256 5257 static inline void persist_stream_save(Uri *This, IStream *pStm, BOOL marshal, struct persist_uri *data) 5258 { 5259 BYTE *p = NULL; 5260 5261 data->create_flags = This->create_flags; 5262 5263 if(This->create_flags) { 5264 data->fields_no = 1; 5265 p = persist_stream_add_strprop(This, data->data, Uri_PROPERTY_RAW_URI, 5266 SysStringLen(This->raw_uri), This->raw_uri); 5267 } 5268 if(This->scheme_type!=URL_SCHEME_HTTP && This->scheme_type!=URL_SCHEME_HTTPS 5269 && This->scheme_type!=URL_SCHEME_FTP) 5270 return; 5271 5272 if(This->fragment_len) { 5273 data->fields_no++; 5274 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_FRAGMENT, 5275 This->fragment_len, This->canon_uri+This->fragment_start); 5276 } 5277 5278 if(This->host_len) { 5279 data->fields_no++; 5280 if(This->host_type == Uri_HOST_IPV6) 5281 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_HOST, 5282 This->host_len-2, This->canon_uri+This->host_start+1); 5283 else 5284 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_HOST, 5285 This->host_len, This->canon_uri+This->host_start); 5286 } 5287 5288 if(This->userinfo_split > -1) { 5289 data->fields_no++; 5290 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PASSWORD, 5291 This->userinfo_len-This->userinfo_split-1, 5292 This->canon_uri+This->userinfo_start+This->userinfo_split+1); 5293 } 5294 5295 if(This->path_len) { 5296 data->fields_no++; 5297 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PATH, 5298 This->path_len, This->canon_uri+This->path_start); 5299 } else if(marshal) { 5300 WCHAR no_path = '/'; 5301 data->fields_no++; 5302 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PATH, 1, &no_path); 5303 } 5304 5305 if(This->has_port) { 5306 data->fields_no++; 5307 *(DWORD*)p = Uri_PROPERTY_PORT; 5308 p += sizeof(DWORD); 5309 *(DWORD*)p = sizeof(DWORD); 5310 p += sizeof(DWORD); 5311 *(DWORD*)p = This->port; 5312 p += sizeof(DWORD); 5313 } 5314 5315 if(This->query_len) { 5316 data->fields_no++; 5317 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_QUERY, 5318 This->query_len, This->canon_uri+This->query_start); 5319 } 5320 5321 if(This->scheme_len) { 5322 data->fields_no++; 5323 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_SCHEME_NAME, 5324 This->scheme_len, This->canon_uri+This->scheme_start); 5325 } 5326 5327 if(This->userinfo_start>-1 && This->userinfo_split!=0) { 5328 data->fields_no++; 5329 if(This->userinfo_split > -1) 5330 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_USER_NAME, 5331 This->userinfo_split, This->canon_uri+This->userinfo_start); 5332 else 5333 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_USER_NAME, 5334 This->userinfo_len, This->canon_uri+This->userinfo_start); 5335 } 5336 } 5337 5338 static HRESULT WINAPI PersistStream_Save(IPersistStream *iface, IStream *pStm, BOOL fClearDirty) 5339 { 5340 Uri *This = impl_from_IPersistStream(iface); 5341 struct persist_uri *data; 5342 ULARGE_INTEGER size; 5343 HRESULT hres; 5344 5345 TRACE("(%p)->(%p %x)\n", This, pStm, fClearDirty); 5346 5347 if(!pStm) 5348 return E_INVALIDARG; 5349 5350 hres = IPersistStream_GetSizeMax(&This->IPersistStream_iface, &size); 5351 if(FAILED(hres)) 5352 return hres; 5353 5354 data = heap_alloc_zero(size.u.LowPart); 5355 if(!data) 5356 return E_OUTOFMEMORY; 5357 data->size = size.u.LowPart; 5358 persist_stream_save(This, pStm, FALSE, data); 5359 5360 hres = IStream_Write(pStm, data, data->size-2, NULL); 5361 heap_free(data); 5362 return hres; 5363 } 5364 5365 static HRESULT WINAPI PersistStream_GetSizeMax(IPersistStream *iface, ULARGE_INTEGER *pcbSize) 5366 { 5367 Uri *This = impl_from_IPersistStream(iface); 5368 TRACE("(%p)->(%p)\n", This, pcbSize); 5369 5370 if(!pcbSize) 5371 return E_INVALIDARG; 5372 5373 pcbSize->u.LowPart = 2+sizeof(struct persist_uri); 5374 pcbSize->u.HighPart = 0; 5375 if(This->create_flags) 5376 pcbSize->u.LowPart += (SysStringLen(This->raw_uri)+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5377 else /* there's no place for fields no */ 5378 pcbSize->u.LowPart -= sizeof(DWORD); 5379 if(This->scheme_type!=URL_SCHEME_HTTP && This->scheme_type!=URL_SCHEME_HTTPS 5380 && This->scheme_type!=URL_SCHEME_FTP) 5381 return S_OK; 5382 5383 if(This->fragment_len) 5384 pcbSize->u.LowPart += (This->fragment_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5385 if(This->host_len) { 5386 if(This->host_type == Uri_HOST_IPV6) 5387 pcbSize->u.LowPart += (This->host_len-1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5388 else 5389 pcbSize->u.LowPart += (This->host_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5390 } 5391 if(This->userinfo_split > -1) 5392 pcbSize->u.LowPart += (This->userinfo_len-This->userinfo_split)*sizeof(WCHAR) + 2*sizeof(DWORD); 5393 if(This->path_len) 5394 pcbSize->u.LowPart += (This->path_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5395 if(This->has_port) 5396 pcbSize->u.LowPart += 3*sizeof(DWORD); 5397 if(This->query_len) 5398 pcbSize->u.LowPart += (This->query_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5399 if(This->scheme_len) 5400 pcbSize->u.LowPart += (This->scheme_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5401 if(This->userinfo_start>-1 && This->userinfo_split!=0) { 5402 if(This->userinfo_split > -1) 5403 pcbSize->u.LowPart += (This->userinfo_split+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5404 else 5405 pcbSize->u.LowPart += (This->userinfo_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5406 } 5407 return S_OK; 5408 } 5409 5410 static const IPersistStreamVtbl PersistStreamVtbl = { 5411 PersistStream_QueryInterface, 5412 PersistStream_AddRef, 5413 PersistStream_Release, 5414 PersistStream_GetClassID, 5415 PersistStream_IsDirty, 5416 PersistStream_Load, 5417 PersistStream_Save, 5418 PersistStream_GetSizeMax 5419 }; 5420 5421 static inline Uri* impl_from_IMarshal(IMarshal *iface) 5422 { 5423 return CONTAINING_RECORD(iface, Uri, IMarshal_iface); 5424 } 5425 5426 static HRESULT WINAPI Marshal_QueryInterface(IMarshal *iface, REFIID riid, void **ppvObject) 5427 { 5428 Uri *This = impl_from_IMarshal(iface); 5429 return IUri_QueryInterface(&This->IUri_iface, riid, ppvObject); 5430 } 5431 5432 static ULONG WINAPI Marshal_AddRef(IMarshal *iface) 5433 { 5434 Uri *This = impl_from_IMarshal(iface); 5435 return IUri_AddRef(&This->IUri_iface); 5436 } 5437 5438 static ULONG WINAPI Marshal_Release(IMarshal *iface) 5439 { 5440 Uri *This = impl_from_IMarshal(iface); 5441 return IUri_Release(&This->IUri_iface); 5442 } 5443 5444 static HRESULT WINAPI Marshal_GetUnmarshalClass(IMarshal *iface, REFIID riid, void *pv, 5445 DWORD dwDestContext, void *pvDestContext, DWORD mshlflags, CLSID *pCid) 5446 { 5447 Uri *This = impl_from_IMarshal(iface); 5448 TRACE("(%p)->(%s %p %x %p %x %p)\n", This, debugstr_guid(riid), pv, 5449 dwDestContext, pvDestContext, mshlflags, pCid); 5450 5451 if(!pCid || (dwDestContext!=MSHCTX_LOCAL && dwDestContext!=MSHCTX_NOSHAREDMEM 5452 && dwDestContext!=MSHCTX_INPROC)) 5453 return E_INVALIDARG; 5454 5455 *pCid = CLSID_CUri; 5456 return S_OK; 5457 } 5458 5459 struct inproc_marshal_uri { 5460 DWORD size; 5461 DWORD mshlflags; 5462 DWORD unk[4]; /* process identifier? */ 5463 Uri *uri; 5464 }; 5465 5466 static HRESULT WINAPI Marshal_GetMarshalSizeMax(IMarshal *iface, REFIID riid, void *pv, 5467 DWORD dwDestContext, void *pvDestContext, DWORD mshlflags, DWORD *pSize) 5468 { 5469 Uri *This = impl_from_IMarshal(iface); 5470 ULARGE_INTEGER size; 5471 HRESULT hres; 5472 TRACE("(%p)->(%s %p %x %p %x %p)\n", This, debugstr_guid(riid), pv, 5473 dwDestContext, pvDestContext, mshlflags, pSize); 5474 5475 if(!pSize || (dwDestContext!=MSHCTX_LOCAL && dwDestContext!=MSHCTX_NOSHAREDMEM 5476 && dwDestContext!=MSHCTX_INPROC)) 5477 return E_INVALIDARG; 5478 5479 if(dwDestContext == MSHCTX_INPROC) { 5480 *pSize = sizeof(struct inproc_marshal_uri); 5481 return S_OK; 5482 } 5483 5484 hres = IPersistStream_GetSizeMax(&This->IPersistStream_iface, &size); 5485 if(FAILED(hres)) 5486 return hres; 5487 if(!This->path_len && (This->scheme_type==URL_SCHEME_HTTP 5488 || This->scheme_type==URL_SCHEME_HTTPS 5489 || This->scheme_type==URL_SCHEME_FTP)) 5490 size.u.LowPart += 3*sizeof(DWORD); 5491 *pSize = size.u.LowPart+2*sizeof(DWORD); 5492 return S_OK; 5493 } 5494 5495 static HRESULT WINAPI Marshal_MarshalInterface(IMarshal *iface, IStream *pStm, REFIID riid, 5496 void *pv, DWORD dwDestContext, void *pvDestContext, DWORD mshlflags) 5497 { 5498 Uri *This = impl_from_IMarshal(iface); 5499 DWORD *data; 5500 DWORD size; 5501 HRESULT hres; 5502 5503 TRACE("(%p)->(%p %s %p %x %p %x)\n", This, pStm, debugstr_guid(riid), pv, 5504 dwDestContext, pvDestContext, mshlflags); 5505 5506 if(!pStm || mshlflags!=MSHLFLAGS_NORMAL || (dwDestContext!=MSHCTX_LOCAL 5507 && dwDestContext!=MSHCTX_NOSHAREDMEM && dwDestContext!=MSHCTX_INPROC)) 5508 return E_INVALIDARG; 5509 5510 if(dwDestContext == MSHCTX_INPROC) { 5511 struct inproc_marshal_uri data; 5512 5513 data.size = sizeof(data); 5514 data.mshlflags = MSHCTX_INPROC; 5515 data.unk[0] = 0; 5516 data.unk[1] = 0; 5517 data.unk[2] = 0; 5518 data.unk[3] = 0; 5519 data.uri = This; 5520 5521 hres = IStream_Write(pStm, &data, data.size, NULL); 5522 if(FAILED(hres)) 5523 return hres; 5524 5525 IUri_AddRef(&This->IUri_iface); 5526 return S_OK; 5527 } 5528 5529 hres = IMarshal_GetMarshalSizeMax(iface, riid, pv, dwDestContext, 5530 pvDestContext, mshlflags, &size); 5531 if(FAILED(hres)) 5532 return hres; 5533 5534 data = heap_alloc_zero(size); 5535 if(!data) 5536 return E_OUTOFMEMORY; 5537 5538 data[0] = size; 5539 data[1] = dwDestContext; 5540 data[2] = size-2*sizeof(DWORD); 5541 persist_stream_save(This, pStm, TRUE, (struct persist_uri*)(data+2)); 5542 5543 hres = IStream_Write(pStm, data, data[0]-2, NULL); 5544 heap_free(data); 5545 return hres; 5546 } 5547 5548 static HRESULT WINAPI Marshal_UnmarshalInterface(IMarshal *iface, 5549 IStream *pStm, REFIID riid, void **ppv) 5550 { 5551 Uri *This = impl_from_IMarshal(iface); 5552 DWORD header[2]; 5553 HRESULT hres; 5554 5555 TRACE("(%p)->(%p %s %p)\n", This, pStm, debugstr_guid(riid), ppv); 5556 5557 if(This->create_flags) 5558 return E_UNEXPECTED; 5559 if(!pStm || !riid || !ppv) 5560 return E_INVALIDARG; 5561 5562 hres = IStream_Read(pStm, header, sizeof(header), NULL); 5563 if(FAILED(hres)) 5564 return hres; 5565 5566 if(header[1]!=MSHCTX_LOCAL && header[1]!=MSHCTX_NOSHAREDMEM 5567 && header[1]!=MSHCTX_INPROC) 5568 return E_UNEXPECTED; 5569 5570 if(header[1] == MSHCTX_INPROC) { 5571 struct inproc_marshal_uri data; 5572 parse_data parse; 5573 5574 hres = IStream_Read(pStm, data.unk, sizeof(data)-2*sizeof(DWORD), NULL); 5575 if(FAILED(hres)) 5576 return hres; 5577 5578 This->raw_uri = SysAllocString(data.uri->raw_uri); 5579 if(!This->raw_uri) { 5580 return E_OUTOFMEMORY; 5581 } 5582 5583 memset(&parse, 0, sizeof(parse_data)); 5584 parse.uri = This->raw_uri; 5585 5586 if(!parse_uri(&parse, data.uri->create_flags)) 5587 return E_INVALIDARG; 5588 5589 hres = canonicalize_uri(&parse, This, data.uri->create_flags); 5590 if(FAILED(hres)) 5591 return hres; 5592 5593 This->create_flags = data.uri->create_flags; 5594 IUri_Release(&data.uri->IUri_iface); 5595 5596 return IUri_QueryInterface(&This->IUri_iface, riid, ppv); 5597 } 5598 5599 hres = IPersistStream_Load(&This->IPersistStream_iface, pStm); 5600 if(FAILED(hres)) 5601 return hres; 5602 5603 return IUri_QueryInterface(&This->IUri_iface, riid, ppv); 5604 } 5605 5606 static HRESULT WINAPI Marshal_ReleaseMarshalData(IMarshal *iface, IStream *pStm) 5607 { 5608 Uri *This = impl_from_IMarshal(iface); 5609 LARGE_INTEGER off; 5610 DWORD header[2]; 5611 HRESULT hres; 5612 5613 TRACE("(%p)->(%p)\n", This, pStm); 5614 5615 if(!pStm) 5616 return E_INVALIDARG; 5617 5618 hres = IStream_Read(pStm, header, 2*sizeof(DWORD), NULL); 5619 if(FAILED(hres)) 5620 return hres; 5621 5622 if(header[1] == MSHCTX_INPROC) { 5623 struct inproc_marshal_uri data; 5624 5625 hres = IStream_Read(pStm, data.unk, sizeof(data)-2*sizeof(DWORD), NULL); 5626 if(FAILED(hres)) 5627 return hres; 5628 5629 IUri_Release(&data.uri->IUri_iface); 5630 return S_OK; 5631 } 5632 5633 off.u.LowPart = header[0]-sizeof(header)-2; 5634 off.u.HighPart = 0; 5635 return IStream_Seek(pStm, off, STREAM_SEEK_CUR, NULL); 5636 } 5637 5638 static HRESULT WINAPI Marshal_DisconnectObject(IMarshal *iface, DWORD dwReserved) 5639 { 5640 Uri *This = impl_from_IMarshal(iface); 5641 TRACE("(%p)->(%x)\n", This, dwReserved); 5642 return S_OK; 5643 } 5644 5645 static const IMarshalVtbl MarshalVtbl = { 5646 Marshal_QueryInterface, 5647 Marshal_AddRef, 5648 Marshal_Release, 5649 Marshal_GetUnmarshalClass, 5650 Marshal_GetMarshalSizeMax, 5651 Marshal_MarshalInterface, 5652 Marshal_UnmarshalInterface, 5653 Marshal_ReleaseMarshalData, 5654 Marshal_DisconnectObject 5655 }; 5656 5657 HRESULT Uri_Construct(IUnknown *pUnkOuter, LPVOID *ppobj) 5658 { 5659 Uri *ret = heap_alloc_zero(sizeof(Uri)); 5660 5661 TRACE("(%p %p)\n", pUnkOuter, ppobj); 5662 5663 *ppobj = ret; 5664 if(!ret) 5665 return E_OUTOFMEMORY; 5666 5667 ret->IUri_iface.lpVtbl = &UriVtbl; 5668 ret->IUriBuilderFactory_iface.lpVtbl = &UriBuilderFactoryVtbl; 5669 ret->IPersistStream_iface.lpVtbl = &PersistStreamVtbl; 5670 ret->IMarshal_iface.lpVtbl = &MarshalVtbl; 5671 ret->ref = 1; 5672 5673 *ppobj = &ret->IUri_iface; 5674 return S_OK; 5675 } 5676 5677 /*********************************************************************** 5678 * CreateUri (urlmon.@) 5679 * 5680 * Creates a new IUri object using the URI represented by pwzURI. This function 5681 * parses and validates the components of pwzURI and then canonicalizes the 5682 * parsed components. 5683 * 5684 * PARAMS 5685 * pwzURI [I] The URI to parse, validate, and canonicalize. 5686 * dwFlags [I] Flags which can affect how the parsing/canonicalization is performed. 5687 * dwReserved [I] Reserved (not used). 5688 * ppURI [O] The resulting IUri after parsing/canonicalization occurs. 5689 * 5690 * RETURNS 5691 * Success: Returns S_OK. ppURI contains the pointer to the newly allocated IUri. 5692 * Failure: E_INVALIDARG if there are invalid flag combinations in dwFlags, or an 5693 * invalid parameter, or pwzURI doesn't represent a valid URI. 5694 * E_OUTOFMEMORY if any memory allocation fails. 5695 * 5696 * NOTES 5697 * Default flags: 5698 * Uri_CREATE_CANONICALIZE, Uri_CREATE_DECODE_EXTRA_INFO, Uri_CREATE_CRACK_UNKNOWN_SCHEMES, 5699 * Uri_CREATE_PRE_PROCESS_HTML_URI, Uri_CREATE_NO_IE_SETTINGS. 5700 */ 5701 HRESULT WINAPI CreateUri(LPCWSTR pwzURI, DWORD dwFlags, DWORD_PTR dwReserved, IUri **ppURI) 5702 { 5703 const DWORD supported_flags = Uri_CREATE_ALLOW_RELATIVE|Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME| 5704 Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME|Uri_CREATE_NO_CANONICALIZE|Uri_CREATE_CANONICALIZE| 5705 Uri_CREATE_DECODE_EXTRA_INFO|Uri_CREATE_NO_DECODE_EXTRA_INFO|Uri_CREATE_CRACK_UNKNOWN_SCHEMES| 5706 Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES|Uri_CREATE_PRE_PROCESS_HTML_URI|Uri_CREATE_NO_PRE_PROCESS_HTML_URI| 5707 Uri_CREATE_NO_IE_SETTINGS|Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS|Uri_CREATE_FILE_USE_DOS_PATH; 5708 Uri *ret; 5709 HRESULT hr; 5710 parse_data data; 5711 5712 TRACE("(%s %x %x %p)\n", debugstr_w(pwzURI), dwFlags, (DWORD)dwReserved, ppURI); 5713 5714 if(!ppURI) 5715 return E_INVALIDARG; 5716 5717 if(!pwzURI) { 5718 *ppURI = NULL; 5719 return E_INVALIDARG; 5720 } 5721 5722 /* Check for invalid flags. */ 5723 if(has_invalid_flag_combination(dwFlags)) { 5724 *ppURI = NULL; 5725 return E_INVALIDARG; 5726 } 5727 5728 /* Currently unsupported. */ 5729 if(dwFlags & ~supported_flags) 5730 FIXME("Ignoring unsupported flag(s) %x\n", dwFlags & ~supported_flags); 5731 5732 hr = Uri_Construct(NULL, (void**)&ret); 5733 if(FAILED(hr)) { 5734 *ppURI = NULL; 5735 return hr; 5736 } 5737 5738 /* Explicitly set the default flags if it doesn't cause a flag conflict. */ 5739 apply_default_flags(&dwFlags); 5740 5741 /* Pre process the URI, unless told otherwise. */ 5742 if(!(dwFlags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI)) 5743 ret->raw_uri = pre_process_uri(pwzURI); 5744 else 5745 ret->raw_uri = SysAllocString(pwzURI); 5746 5747 if(!ret->raw_uri) { 5748 heap_free(ret); 5749 return E_OUTOFMEMORY; 5750 } 5751 5752 memset(&data, 0, sizeof(parse_data)); 5753 data.uri = ret->raw_uri; 5754 5755 /* Validate and parse the URI into its components. */ 5756 if(!parse_uri(&data, dwFlags)) { 5757 /* Encountered an unsupported or invalid URI */ 5758 IUri_Release(&ret->IUri_iface); 5759 *ppURI = NULL; 5760 return E_INVALIDARG; 5761 } 5762 5763 /* Canonicalize the URI. */ 5764 hr = canonicalize_uri(&data, ret, dwFlags); 5765 if(FAILED(hr)) { 5766 IUri_Release(&ret->IUri_iface); 5767 *ppURI = NULL; 5768 return hr; 5769 } 5770 5771 ret->create_flags = dwFlags; 5772 5773 *ppURI = &ret->IUri_iface; 5774 return S_OK; 5775 } 5776 5777 /*********************************************************************** 5778 * CreateUriWithFragment (urlmon.@) 5779 * 5780 * Creates a new IUri object. This is almost the same as CreateUri, expect that 5781 * it allows you to explicitly specify a fragment (pwzFragment) for pwzURI. 5782 * 5783 * PARAMS 5784 * pwzURI [I] The URI to parse and perform canonicalization on. 5785 * pwzFragment [I] The explicit fragment string which should be added to pwzURI. 5786 * dwFlags [I] The flags which will be passed to CreateUri. 5787 * dwReserved [I] Reserved (not used). 5788 * ppURI [O] The resulting IUri after parsing/canonicalization. 5789 * 5790 * RETURNS 5791 * Success: S_OK. ppURI contains the pointer to the newly allocated IUri. 5792 * Failure: E_INVALIDARG if pwzURI already contains a fragment and pwzFragment 5793 * isn't NULL. Will also return E_INVALIDARG for the same reasons as 5794 * CreateUri will. E_OUTOFMEMORY if any allocation fails. 5795 */ 5796 HRESULT WINAPI CreateUriWithFragment(LPCWSTR pwzURI, LPCWSTR pwzFragment, DWORD dwFlags, 5797 DWORD_PTR dwReserved, IUri **ppURI) 5798 { 5799 HRESULT hres; 5800 TRACE("(%s %s %x %x %p)\n", debugstr_w(pwzURI), debugstr_w(pwzFragment), dwFlags, (DWORD)dwReserved, ppURI); 5801 5802 if(!ppURI) 5803 return E_INVALIDARG; 5804 5805 if(!pwzURI) { 5806 *ppURI = NULL; 5807 return E_INVALIDARG; 5808 } 5809 5810 /* Check if a fragment should be appended to the URI string. */ 5811 if(pwzFragment) { 5812 WCHAR *uriW; 5813 DWORD uri_len, frag_len; 5814 BOOL add_pound; 5815 5816 /* Check if the original URI already has a fragment component. */ 5817 if(StrChrW(pwzURI, '#')) { 5818 *ppURI = NULL; 5819 return E_INVALIDARG; 5820 } 5821 5822 uri_len = lstrlenW(pwzURI); 5823 frag_len = lstrlenW(pwzFragment); 5824 5825 /* If the fragment doesn't start with a '#', one will be added. */ 5826 add_pound = *pwzFragment != '#'; 5827 5828 if(add_pound) 5829 uriW = heap_alloc((uri_len+frag_len+2)*sizeof(WCHAR)); 5830 else 5831 uriW = heap_alloc((uri_len+frag_len+1)*sizeof(WCHAR)); 5832 5833 if(!uriW) 5834 return E_OUTOFMEMORY; 5835 5836 memcpy(uriW, pwzURI, uri_len*sizeof(WCHAR)); 5837 if(add_pound) 5838 uriW[uri_len++] = '#'; 5839 memcpy(uriW+uri_len, pwzFragment, (frag_len+1)*sizeof(WCHAR)); 5840 5841 hres = CreateUri(uriW, dwFlags, 0, ppURI); 5842 5843 heap_free(uriW); 5844 } else 5845 /* A fragment string wasn't specified, so just forward the call. */ 5846 hres = CreateUri(pwzURI, dwFlags, 0, ppURI); 5847 5848 return hres; 5849 } 5850 5851 static HRESULT build_uri(const UriBuilder *builder, IUri **uri, DWORD create_flags, 5852 DWORD use_orig_flags, DWORD encoding_mask) 5853 { 5854 HRESULT hr; 5855 parse_data data; 5856 Uri *ret; 5857 5858 if(!uri) 5859 return E_POINTER; 5860 5861 if(encoding_mask && (!builder->uri || builder->modified_props)) { 5862 *uri = NULL; 5863 return E_NOTIMPL; 5864 } 5865 5866 /* Decide what flags should be used when creating the Uri. */ 5867 if((use_orig_flags & UriBuilder_USE_ORIGINAL_FLAGS) && builder->uri) 5868 create_flags = builder->uri->create_flags; 5869 else { 5870 if(has_invalid_flag_combination(create_flags)) { 5871 *uri = NULL; 5872 return E_INVALIDARG; 5873 } 5874 5875 /* Set the default flags if they don't cause a conflict. */ 5876 apply_default_flags(&create_flags); 5877 } 5878 5879 /* Return the base IUri if no changes have been made and the create_flags match. */ 5880 if(builder->uri && !builder->modified_props && builder->uri->create_flags == create_flags) { 5881 *uri = &builder->uri->IUri_iface; 5882 IUri_AddRef(*uri); 5883 return S_OK; 5884 } 5885 5886 hr = validate_components(builder, &data, create_flags); 5887 if(FAILED(hr)) { 5888 *uri = NULL; 5889 return hr; 5890 } 5891 5892 hr = Uri_Construct(NULL, (void**)&ret); 5893 if(FAILED(hr)) { 5894 *uri = NULL; 5895 return hr; 5896 } 5897 5898 hr = generate_uri(builder, &data, ret, create_flags); 5899 if(FAILED(hr)) { 5900 IUri_Release(&ret->IUri_iface); 5901 *uri = NULL; 5902 return hr; 5903 } 5904 5905 *uri = &ret->IUri_iface; 5906 return S_OK; 5907 } 5908 5909 static inline UriBuilder* impl_from_IUriBuilder(IUriBuilder *iface) 5910 { 5911 return CONTAINING_RECORD(iface, UriBuilder, IUriBuilder_iface); 5912 } 5913 5914 static HRESULT WINAPI UriBuilder_QueryInterface(IUriBuilder *iface, REFIID riid, void **ppv) 5915 { 5916 UriBuilder *This = impl_from_IUriBuilder(iface); 5917 5918 if(IsEqualGUID(&IID_IUnknown, riid)) { 5919 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv); 5920 *ppv = &This->IUriBuilder_iface; 5921 }else if(IsEqualGUID(&IID_IUriBuilder, riid)) { 5922 TRACE("(%p)->(IID_IUriBuilder %p)\n", This, ppv); 5923 *ppv = &This->IUriBuilder_iface; 5924 }else { 5925 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv); 5926 *ppv = NULL; 5927 return E_NOINTERFACE; 5928 } 5929 5930 IUnknown_AddRef((IUnknown*)*ppv); 5931 return S_OK; 5932 } 5933 5934 static ULONG WINAPI UriBuilder_AddRef(IUriBuilder *iface) 5935 { 5936 UriBuilder *This = impl_from_IUriBuilder(iface); 5937 LONG ref = InterlockedIncrement(&This->ref); 5938 5939 TRACE("(%p) ref=%d\n", This, ref); 5940 5941 return ref; 5942 } 5943 5944 static ULONG WINAPI UriBuilder_Release(IUriBuilder *iface) 5945 { 5946 UriBuilder *This = impl_from_IUriBuilder(iface); 5947 LONG ref = InterlockedDecrement(&This->ref); 5948 5949 TRACE("(%p) ref=%d\n", This, ref); 5950 5951 if(!ref) { 5952 if(This->uri) IUri_Release(&This->uri->IUri_iface); 5953 heap_free(This->fragment); 5954 heap_free(This->host); 5955 heap_free(This->password); 5956 heap_free(This->path); 5957 heap_free(This->query); 5958 heap_free(This->scheme); 5959 heap_free(This->username); 5960 heap_free(This); 5961 } 5962 5963 return ref; 5964 } 5965 5966 static HRESULT WINAPI UriBuilder_CreateUriSimple(IUriBuilder *iface, 5967 DWORD dwAllowEncodingPropertyMask, 5968 DWORD_PTR dwReserved, 5969 IUri **ppIUri) 5970 { 5971 UriBuilder *This = impl_from_IUriBuilder(iface); 5972 HRESULT hr; 5973 TRACE("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5974 5975 hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask); 5976 if(hr == E_NOTIMPL) 5977 FIXME("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5978 return hr; 5979 } 5980 5981 static HRESULT WINAPI UriBuilder_CreateUri(IUriBuilder *iface, 5982 DWORD dwCreateFlags, 5983 DWORD dwAllowEncodingPropertyMask, 5984 DWORD_PTR dwReserved, 5985 IUri **ppIUri) 5986 { 5987 UriBuilder *This = impl_from_IUriBuilder(iface); 5988 HRESULT hr; 5989 TRACE("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5990 5991 if(dwCreateFlags == -1) 5992 hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask); 5993 else 5994 hr = build_uri(This, ppIUri, dwCreateFlags, 0, dwAllowEncodingPropertyMask); 5995 5996 if(hr == E_NOTIMPL) 5997 FIXME("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5998 return hr; 5999 } 6000 6001 static HRESULT WINAPI UriBuilder_CreateUriWithFlags(IUriBuilder *iface, 6002 DWORD dwCreateFlags, 6003 DWORD dwUriBuilderFlags, 6004 DWORD dwAllowEncodingPropertyMask, 6005 DWORD_PTR dwReserved, 6006 IUri **ppIUri) 6007 { 6008 UriBuilder *This = impl_from_IUriBuilder(iface); 6009 HRESULT hr; 6010 TRACE("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags, 6011 dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 6012 6013 hr = build_uri(This, ppIUri, dwCreateFlags, dwUriBuilderFlags, dwAllowEncodingPropertyMask); 6014 if(hr == E_NOTIMPL) 6015 FIXME("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags, 6016 dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 6017 return hr; 6018 } 6019 6020 static HRESULT WINAPI UriBuilder_GetIUri(IUriBuilder *iface, IUri **ppIUri) 6021 { 6022 UriBuilder *This = impl_from_IUriBuilder(iface); 6023 TRACE("(%p)->(%p)\n", This, ppIUri); 6024 6025 if(!ppIUri) 6026 return E_POINTER; 6027 6028 if(This->uri) { 6029 IUri *uri = &This->uri->IUri_iface; 6030 IUri_AddRef(uri); 6031 *ppIUri = uri; 6032 } else 6033 *ppIUri = NULL; 6034 6035 return S_OK; 6036 } 6037 6038 static HRESULT WINAPI UriBuilder_SetIUri(IUriBuilder *iface, IUri *pIUri) 6039 { 6040 UriBuilder *This = impl_from_IUriBuilder(iface); 6041 TRACE("(%p)->(%p)\n", This, pIUri); 6042 6043 if(pIUri) { 6044 Uri *uri; 6045 6046 if((uri = get_uri_obj(pIUri))) { 6047 /* Only reset the builder if its Uri isn't the same as 6048 * the Uri passed to the function. 6049 */ 6050 if(This->uri != uri) { 6051 reset_builder(This); 6052 6053 This->uri = uri; 6054 if(uri->has_port) 6055 This->port = uri->port; 6056 6057 IUri_AddRef(pIUri); 6058 } 6059 } else { 6060 FIXME("(%p)->(%p) Unknown IUri types not supported yet.\n", This, pIUri); 6061 return E_NOTIMPL; 6062 } 6063 } else if(This->uri) 6064 /* Only reset the builder if its Uri isn't NULL. */ 6065 reset_builder(This); 6066 6067 return S_OK; 6068 } 6069 6070 static HRESULT WINAPI UriBuilder_GetFragment(IUriBuilder *iface, DWORD *pcchFragment, LPCWSTR *ppwzFragment) 6071 { 6072 UriBuilder *This = impl_from_IUriBuilder(iface); 6073 TRACE("(%p)->(%p %p)\n", This, pcchFragment, ppwzFragment); 6074 6075 if(!This->uri || This->uri->fragment_start == -1 || This->modified_props & Uri_HAS_FRAGMENT) 6076 return get_builder_component(&This->fragment, &This->fragment_len, NULL, 0, ppwzFragment, pcchFragment); 6077 else 6078 return get_builder_component(&This->fragment, &This->fragment_len, This->uri->canon_uri+This->uri->fragment_start, 6079 This->uri->fragment_len, ppwzFragment, pcchFragment); 6080 } 6081 6082 static HRESULT WINAPI UriBuilder_GetHost(IUriBuilder *iface, DWORD *pcchHost, LPCWSTR *ppwzHost) 6083 { 6084 UriBuilder *This = impl_from_IUriBuilder(iface); 6085 TRACE("(%p)->(%p %p)\n", This, pcchHost, ppwzHost); 6086 6087 if(!This->uri || This->uri->host_start == -1 || This->modified_props & Uri_HAS_HOST) 6088 return get_builder_component(&This->host, &This->host_len, NULL, 0, ppwzHost, pcchHost); 6089 else { 6090 if(This->uri->host_type == Uri_HOST_IPV6) 6091 /* Don't include the '[' and ']' around the address. */ 6092 return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start+1, 6093 This->uri->host_len-2, ppwzHost, pcchHost); 6094 else 6095 return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start, 6096 This->uri->host_len, ppwzHost, pcchHost); 6097 } 6098 } 6099 6100 static HRESULT WINAPI UriBuilder_GetPassword(IUriBuilder *iface, DWORD *pcchPassword, LPCWSTR *ppwzPassword) 6101 { 6102 UriBuilder *This = impl_from_IUriBuilder(iface); 6103 TRACE("(%p)->(%p %p)\n", This, pcchPassword, ppwzPassword); 6104 6105 if(!This->uri || This->uri->userinfo_split == -1 || This->modified_props & Uri_HAS_PASSWORD) 6106 return get_builder_component(&This->password, &This->password_len, NULL, 0, ppwzPassword, pcchPassword); 6107 else { 6108 const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start+This->uri->userinfo_split+1; 6109 DWORD len = This->uri->userinfo_len-This->uri->userinfo_split-1; 6110 return get_builder_component(&This->password, &This->password_len, start, len, ppwzPassword, pcchPassword); 6111 } 6112 } 6113 6114 static HRESULT WINAPI UriBuilder_GetPath(IUriBuilder *iface, DWORD *pcchPath, LPCWSTR *ppwzPath) 6115 { 6116 UriBuilder *This = impl_from_IUriBuilder(iface); 6117 TRACE("(%p)->(%p %p)\n", This, pcchPath, ppwzPath); 6118 6119 if(!This->uri || This->uri->path_start == -1 || This->modified_props & Uri_HAS_PATH) 6120 return get_builder_component(&This->path, &This->path_len, NULL, 0, ppwzPath, pcchPath); 6121 else 6122 return get_builder_component(&This->path, &This->path_len, This->uri->canon_uri+This->uri->path_start, 6123 This->uri->path_len, ppwzPath, pcchPath); 6124 } 6125 6126 static HRESULT WINAPI UriBuilder_GetPort(IUriBuilder *iface, BOOL *pfHasPort, DWORD *pdwPort) 6127 { 6128 UriBuilder *This = impl_from_IUriBuilder(iface); 6129 TRACE("(%p)->(%p %p)\n", This, pfHasPort, pdwPort); 6130 6131 if(!pfHasPort) { 6132 if(pdwPort) 6133 *pdwPort = 0; 6134 return E_POINTER; 6135 } 6136 6137 if(!pdwPort) { 6138 *pfHasPort = FALSE; 6139 return E_POINTER; 6140 } 6141 6142 *pfHasPort = This->has_port; 6143 *pdwPort = This->port; 6144 return S_OK; 6145 } 6146 6147 static HRESULT WINAPI UriBuilder_GetQuery(IUriBuilder *iface, DWORD *pcchQuery, LPCWSTR *ppwzQuery) 6148 { 6149 UriBuilder *This = impl_from_IUriBuilder(iface); 6150 TRACE("(%p)->(%p %p)\n", This, pcchQuery, ppwzQuery); 6151 6152 if(!This->uri || This->uri->query_start == -1 || This->modified_props & Uri_HAS_QUERY) 6153 return get_builder_component(&This->query, &This->query_len, NULL, 0, ppwzQuery, pcchQuery); 6154 else 6155 return get_builder_component(&This->query, &This->query_len, This->uri->canon_uri+This->uri->query_start, 6156 This->uri->query_len, ppwzQuery, pcchQuery); 6157 } 6158 6159 static HRESULT WINAPI UriBuilder_GetSchemeName(IUriBuilder *iface, DWORD *pcchSchemeName, LPCWSTR *ppwzSchemeName) 6160 { 6161 UriBuilder *This = impl_from_IUriBuilder(iface); 6162 TRACE("(%p)->(%p %p)\n", This, pcchSchemeName, ppwzSchemeName); 6163 6164 if(!This->uri || This->uri->scheme_start == -1 || This->modified_props & Uri_HAS_SCHEME_NAME) 6165 return get_builder_component(&This->scheme, &This->scheme_len, NULL, 0, ppwzSchemeName, pcchSchemeName); 6166 else 6167 return get_builder_component(&This->scheme, &This->scheme_len, This->uri->canon_uri+This->uri->scheme_start, 6168 This->uri->scheme_len, ppwzSchemeName, pcchSchemeName); 6169 } 6170 6171 static HRESULT WINAPI UriBuilder_GetUserName(IUriBuilder *iface, DWORD *pcchUserName, LPCWSTR *ppwzUserName) 6172 { 6173 UriBuilder *This = impl_from_IUriBuilder(iface); 6174 TRACE("(%p)->(%p %p)\n", This, pcchUserName, ppwzUserName); 6175 6176 if(!This->uri || This->uri->userinfo_start == -1 || This->uri->userinfo_split == 0 || 6177 This->modified_props & Uri_HAS_USER_NAME) 6178 return get_builder_component(&This->username, &This->username_len, NULL, 0, ppwzUserName, pcchUserName); 6179 else { 6180 const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start; 6181 6182 /* Check if there's a password in the userinfo section. */ 6183 if(This->uri->userinfo_split > -1) 6184 /* Don't include the password. */ 6185 return get_builder_component(&This->username, &This->username_len, start, 6186 This->uri->userinfo_split, ppwzUserName, pcchUserName); 6187 else 6188 return get_builder_component(&This->username, &This->username_len, start, 6189 This->uri->userinfo_len, ppwzUserName, pcchUserName); 6190 } 6191 } 6192 6193 static HRESULT WINAPI UriBuilder_SetFragment(IUriBuilder *iface, LPCWSTR pwzNewValue) 6194 { 6195 UriBuilder *This = impl_from_IUriBuilder(iface); 6196 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6197 return set_builder_component(&This->fragment, &This->fragment_len, pwzNewValue, '#', 6198 &This->modified_props, Uri_HAS_FRAGMENT); 6199 } 6200 6201 static HRESULT WINAPI UriBuilder_SetHost(IUriBuilder *iface, LPCWSTR pwzNewValue) 6202 { 6203 UriBuilder *This = impl_from_IUriBuilder(iface); 6204 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6205 6206 /* Host name can't be set to NULL. */ 6207 if(!pwzNewValue) 6208 return E_INVALIDARG; 6209 6210 return set_builder_component(&This->host, &This->host_len, pwzNewValue, 0, 6211 &This->modified_props, Uri_HAS_HOST); 6212 } 6213 6214 static HRESULT WINAPI UriBuilder_SetPassword(IUriBuilder *iface, LPCWSTR pwzNewValue) 6215 { 6216 UriBuilder *This = impl_from_IUriBuilder(iface); 6217 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6218 return set_builder_component(&This->password, &This->password_len, pwzNewValue, 0, 6219 &This->modified_props, Uri_HAS_PASSWORD); 6220 } 6221 6222 static HRESULT WINAPI UriBuilder_SetPath(IUriBuilder *iface, LPCWSTR pwzNewValue) 6223 { 6224 UriBuilder *This = impl_from_IUriBuilder(iface); 6225 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6226 return set_builder_component(&This->path, &This->path_len, pwzNewValue, 0, 6227 &This->modified_props, Uri_HAS_PATH); 6228 } 6229 6230 static HRESULT WINAPI UriBuilder_SetPort(IUriBuilder *iface, BOOL fHasPort, DWORD dwNewValue) 6231 { 6232 UriBuilder *This = impl_from_IUriBuilder(iface); 6233 TRACE("(%p)->(%d %d)\n", This, fHasPort, dwNewValue); 6234 6235 This->has_port = fHasPort; 6236 This->port = dwNewValue; 6237 This->modified_props |= Uri_HAS_PORT; 6238 return S_OK; 6239 } 6240 6241 static HRESULT WINAPI UriBuilder_SetQuery(IUriBuilder *iface, LPCWSTR pwzNewValue) 6242 { 6243 UriBuilder *This = impl_from_IUriBuilder(iface); 6244 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6245 return set_builder_component(&This->query, &This->query_len, pwzNewValue, '?', 6246 &This->modified_props, Uri_HAS_QUERY); 6247 } 6248 6249 static HRESULT WINAPI UriBuilder_SetSchemeName(IUriBuilder *iface, LPCWSTR pwzNewValue) 6250 { 6251 UriBuilder *This = impl_from_IUriBuilder(iface); 6252 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6253 6254 /* Only set the scheme name if it's not NULL or empty. */ 6255 if(!pwzNewValue || !*pwzNewValue) 6256 return E_INVALIDARG; 6257 6258 return set_builder_component(&This->scheme, &This->scheme_len, pwzNewValue, 0, 6259 &This->modified_props, Uri_HAS_SCHEME_NAME); 6260 } 6261 6262 static HRESULT WINAPI UriBuilder_SetUserName(IUriBuilder *iface, LPCWSTR pwzNewValue) 6263 { 6264 UriBuilder *This = impl_from_IUriBuilder(iface); 6265 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6266 return set_builder_component(&This->username, &This->username_len, pwzNewValue, 0, 6267 &This->modified_props, Uri_HAS_USER_NAME); 6268 } 6269 6270 static HRESULT WINAPI UriBuilder_RemoveProperties(IUriBuilder *iface, DWORD dwPropertyMask) 6271 { 6272 const DWORD accepted_flags = Uri_HAS_AUTHORITY|Uri_HAS_DOMAIN|Uri_HAS_EXTENSION|Uri_HAS_FRAGMENT|Uri_HAS_HOST| 6273 Uri_HAS_PASSWORD|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_QUERY| 6274 Uri_HAS_USER_INFO|Uri_HAS_USER_NAME; 6275 6276 UriBuilder *This = impl_from_IUriBuilder(iface); 6277 TRACE("(%p)->(0x%08x)\n", This, dwPropertyMask); 6278 6279 if(dwPropertyMask & ~accepted_flags) 6280 return E_INVALIDARG; 6281 6282 if(dwPropertyMask & Uri_HAS_FRAGMENT) 6283 UriBuilder_SetFragment(iface, NULL); 6284 6285 /* Even though you can't set the host name to NULL or an 6286 * empty string, you can still remove it... for some reason. 6287 */ 6288 if(dwPropertyMask & Uri_HAS_HOST) 6289 set_builder_component(&This->host, &This->host_len, NULL, 0, 6290 &This->modified_props, Uri_HAS_HOST); 6291 6292 if(dwPropertyMask & Uri_HAS_PASSWORD) 6293 UriBuilder_SetPassword(iface, NULL); 6294 6295 if(dwPropertyMask & Uri_HAS_PATH) 6296 UriBuilder_SetPath(iface, NULL); 6297 6298 if(dwPropertyMask & Uri_HAS_PORT) 6299 UriBuilder_SetPort(iface, FALSE, 0); 6300 6301 if(dwPropertyMask & Uri_HAS_QUERY) 6302 UriBuilder_SetQuery(iface, NULL); 6303 6304 if(dwPropertyMask & Uri_HAS_USER_NAME) 6305 UriBuilder_SetUserName(iface, NULL); 6306 6307 return S_OK; 6308 } 6309 6310 static HRESULT WINAPI UriBuilder_HasBeenModified(IUriBuilder *iface, BOOL *pfModified) 6311 { 6312 UriBuilder *This = impl_from_IUriBuilder(iface); 6313 TRACE("(%p)->(%p)\n", This, pfModified); 6314 6315 if(!pfModified) 6316 return E_POINTER; 6317 6318 *pfModified = This->modified_props > 0; 6319 return S_OK; 6320 } 6321 6322 static const IUriBuilderVtbl UriBuilderVtbl = { 6323 UriBuilder_QueryInterface, 6324 UriBuilder_AddRef, 6325 UriBuilder_Release, 6326 UriBuilder_CreateUriSimple, 6327 UriBuilder_CreateUri, 6328 UriBuilder_CreateUriWithFlags, 6329 UriBuilder_GetIUri, 6330 UriBuilder_SetIUri, 6331 UriBuilder_GetFragment, 6332 UriBuilder_GetHost, 6333 UriBuilder_GetPassword, 6334 UriBuilder_GetPath, 6335 UriBuilder_GetPort, 6336 UriBuilder_GetQuery, 6337 UriBuilder_GetSchemeName, 6338 UriBuilder_GetUserName, 6339 UriBuilder_SetFragment, 6340 UriBuilder_SetHost, 6341 UriBuilder_SetPassword, 6342 UriBuilder_SetPath, 6343 UriBuilder_SetPort, 6344 UriBuilder_SetQuery, 6345 UriBuilder_SetSchemeName, 6346 UriBuilder_SetUserName, 6347 UriBuilder_RemoveProperties, 6348 UriBuilder_HasBeenModified, 6349 }; 6350 6351 /*********************************************************************** 6352 * CreateIUriBuilder (urlmon.@) 6353 */ 6354 HRESULT WINAPI CreateIUriBuilder(IUri *pIUri, DWORD dwFlags, DWORD_PTR dwReserved, IUriBuilder **ppIUriBuilder) 6355 { 6356 UriBuilder *ret; 6357 6358 TRACE("(%p %x %x %p)\n", pIUri, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 6359 6360 if(!ppIUriBuilder) 6361 return E_POINTER; 6362 6363 ret = heap_alloc_zero(sizeof(UriBuilder)); 6364 if(!ret) 6365 return E_OUTOFMEMORY; 6366 6367 ret->IUriBuilder_iface.lpVtbl = &UriBuilderVtbl; 6368 ret->ref = 1; 6369 6370 if(pIUri) { 6371 Uri *uri; 6372 6373 if((uri = get_uri_obj(pIUri))) { 6374 if(!uri->create_flags) { 6375 heap_free(ret); 6376 return E_UNEXPECTED; 6377 } 6378 IUri_AddRef(pIUri); 6379 ret->uri = uri; 6380 6381 if(uri->has_port) 6382 /* Windows doesn't set 'has_port' to TRUE in this case. */ 6383 ret->port = uri->port; 6384 6385 } else { 6386 heap_free(ret); 6387 *ppIUriBuilder = NULL; 6388 FIXME("(%p %x %x %p): Unknown IUri types not supported yet.\n", pIUri, dwFlags, 6389 (DWORD)dwReserved, ppIUriBuilder); 6390 return E_NOTIMPL; 6391 } 6392 } 6393 6394 *ppIUriBuilder = &ret->IUriBuilder_iface; 6395 return S_OK; 6396 } 6397 6398 /* Merges the base path with the relative path and stores the resulting path 6399 * and path len in 'result' and 'result_len'. 6400 */ 6401 static HRESULT merge_paths(parse_data *data, const WCHAR *base, DWORD base_len, const WCHAR *relative, 6402 DWORD relative_len, WCHAR **result, DWORD *result_len, DWORD flags) 6403 { 6404 const WCHAR *end = NULL; 6405 DWORD base_copy_len = 0; 6406 WCHAR *ptr; 6407 6408 if(base_len) { 6409 if(data->scheme_type == URL_SCHEME_MK && *relative == '/') { 6410 /* Find '::' segment */ 6411 for(end = base; end < base+base_len-1; end++) { 6412 if(end[0] == ':' && end[1] == ':') { 6413 end++; 6414 break; 6415 } 6416 } 6417 6418 /* If not found, try finding the end of @xxx: */ 6419 if(end == base+base_len-1) 6420 end = *base == '@' ? memchr(base, ':', base_len) : NULL; 6421 }else { 6422 /* Find the characters that will be copied over from the base path. */ 6423 end = memrchrW(base, '/', base_len); 6424 if(!end && data->scheme_type == URL_SCHEME_FILE) 6425 /* Try looking for a '\\'. */ 6426 end = memrchrW(base, '\\', base_len); 6427 } 6428 } 6429 6430 if(end) { 6431 base_copy_len = (end+1)-base; 6432 *result = heap_alloc((base_copy_len+relative_len+1)*sizeof(WCHAR)); 6433 } else 6434 *result = heap_alloc((relative_len+1)*sizeof(WCHAR)); 6435 6436 if(!(*result)) { 6437 *result_len = 0; 6438 return E_OUTOFMEMORY; 6439 } 6440 6441 ptr = *result; 6442 if(end) { 6443 memcpy(ptr, base, base_copy_len*sizeof(WCHAR)); 6444 ptr += base_copy_len; 6445 } 6446 6447 memcpy(ptr, relative, relative_len*sizeof(WCHAR)); 6448 ptr += relative_len; 6449 *ptr = '\0'; 6450 6451 *result_len = (ptr-*result); 6452 TRACE("ret %s\n", debugstr_wn(*result, *result_len)); 6453 return S_OK; 6454 } 6455 6456 static HRESULT combine_uri(Uri *base, Uri *relative, DWORD flags, IUri **result, DWORD extras) { 6457 Uri *ret; 6458 HRESULT hr; 6459 parse_data data; 6460 Uri *proc_uri = base; 6461 DWORD create_flags = 0, len = 0; 6462 6463 memset(&data, 0, sizeof(parse_data)); 6464 6465 /* Base case is when the relative Uri has a scheme name, 6466 * if it does, then 'result' will contain the same data 6467 * as the relative Uri. 6468 */ 6469 if(relative->scheme_start > -1) { 6470 data.uri = SysAllocString(relative->raw_uri); 6471 if(!data.uri) { 6472 *result = NULL; 6473 return E_OUTOFMEMORY; 6474 } 6475 6476 parse_uri(&data, Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME); 6477 6478 hr = Uri_Construct(NULL, (void**)&ret); 6479 if(FAILED(hr)) { 6480 *result = NULL; 6481 return hr; 6482 } 6483 6484 if(extras & COMBINE_URI_FORCE_FLAG_USE) { 6485 if(flags & URL_DONT_SIMPLIFY) 6486 create_flags |= Uri_CREATE_NO_CANONICALIZE; 6487 if(flags & URL_DONT_UNESCAPE_EXTRA_INFO) 6488 create_flags |= Uri_CREATE_NO_DECODE_EXTRA_INFO; 6489 } 6490 6491 ret->raw_uri = data.uri; 6492 hr = canonicalize_uri(&data, ret, create_flags); 6493 if(FAILED(hr)) { 6494 IUri_Release(&ret->IUri_iface); 6495 *result = NULL; 6496 return hr; 6497 } 6498 6499 apply_default_flags(&create_flags); 6500 ret->create_flags = create_flags; 6501 6502 *result = &ret->IUri_iface; 6503 } else { 6504 WCHAR *path = NULL; 6505 DWORD raw_flags = 0; 6506 6507 if(base->scheme_start > -1) { 6508 data.scheme = base->canon_uri+base->scheme_start; 6509 data.scheme_len = base->scheme_len; 6510 data.scheme_type = base->scheme_type; 6511 } else { 6512 data.is_relative = TRUE; 6513 data.scheme_type = URL_SCHEME_UNKNOWN; 6514 create_flags |= Uri_CREATE_ALLOW_RELATIVE; 6515 } 6516 6517 if(relative->authority_start > -1) 6518 proc_uri = relative; 6519 6520 if(proc_uri->authority_start > -1) { 6521 if(proc_uri->userinfo_start > -1 && proc_uri->userinfo_split != 0) { 6522 data.username = proc_uri->canon_uri+proc_uri->userinfo_start; 6523 data.username_len = (proc_uri->userinfo_split > -1) ? proc_uri->userinfo_split : proc_uri->userinfo_len; 6524 } 6525 6526 if(proc_uri->userinfo_split > -1) { 6527 data.password = proc_uri->canon_uri+proc_uri->userinfo_start+proc_uri->userinfo_split+1; 6528 data.password_len = proc_uri->userinfo_len-proc_uri->userinfo_split-1; 6529 } 6530 6531 if(proc_uri->host_start > -1) { 6532 data.host = proc_uri->canon_uri+proc_uri->host_start; 6533 data.host_len = proc_uri->host_len; 6534 data.host_type = proc_uri->host_type; 6535 } 6536 6537 if(proc_uri->has_port) { 6538 data.has_port = TRUE; 6539 data.port_value = proc_uri->port; 6540 } 6541 } else if(base->scheme_type != URL_SCHEME_FILE) 6542 data.is_opaque = TRUE; 6543 6544 if(proc_uri == relative || relative->path_start == -1 || !relative->path_len) { 6545 if(proc_uri->path_start > -1) { 6546 data.path = proc_uri->canon_uri+proc_uri->path_start; 6547 data.path_len = proc_uri->path_len; 6548 } else if(!data.is_opaque) { 6549 /* Just set the path as a '/' if the base didn't have 6550 * one and if it's a hierarchical URI. 6551 */ 6552 static const WCHAR slashW[] = {'/',0}; 6553 data.path = slashW; 6554 data.path_len = 1; 6555 } 6556 6557 if(relative->query_start > -1) 6558 proc_uri = relative; 6559 6560 if(proc_uri->query_start > -1) { 6561 data.query = proc_uri->canon_uri+proc_uri->query_start; 6562 data.query_len = proc_uri->query_len; 6563 } 6564 } else { 6565 const WCHAR *ptr, **pptr; 6566 DWORD path_offset = 0, path_len = 0; 6567 6568 /* There's two possibilities on what will happen to the path component 6569 * of the result IUri. First, if the relative path begins with a '/' 6570 * then the resulting path will just be the relative path. Second, if 6571 * relative path doesn't begin with a '/' then the base path and relative 6572 * path are merged together. 6573 */ 6574 if(relative->path_len && *(relative->canon_uri+relative->path_start) == '/' && data.scheme_type != URL_SCHEME_MK) { 6575 WCHAR *tmp = NULL; 6576 BOOL copy_drive_path = FALSE; 6577 6578 /* If the relative IUri's path starts with a '/', then we 6579 * don't use the base IUri's path. Unless the base IUri 6580 * is a file URI, in which case it uses the drive path of 6581 * the base IUri (if it has any) in the new path. 6582 */ 6583 if(base->scheme_type == URL_SCHEME_FILE) { 6584 if(base->path_len > 3 && *(base->canon_uri+base->path_start) == '/' && 6585 is_drive_path(base->canon_uri+base->path_start+1)) { 6586 path_len += 3; 6587 copy_drive_path = TRUE; 6588 } 6589 } 6590 6591 path_len += relative->path_len; 6592 6593 path = heap_alloc((path_len+1)*sizeof(WCHAR)); 6594 if(!path) { 6595 *result = NULL; 6596 return E_OUTOFMEMORY; 6597 } 6598 6599 tmp = path; 6600 6601 /* Copy the base paths, drive path over. */ 6602 if(copy_drive_path) { 6603 memcpy(tmp, base->canon_uri+base->path_start, 3*sizeof(WCHAR)); 6604 tmp += 3; 6605 } 6606 6607 memcpy(tmp, relative->canon_uri+relative->path_start, relative->path_len*sizeof(WCHAR)); 6608 path[path_len] = '\0'; 6609 } else { 6610 /* Merge the base path with the relative path. */ 6611 hr = merge_paths(&data, base->canon_uri+base->path_start, base->path_len, 6612 relative->canon_uri+relative->path_start, relative->path_len, 6613 &path, &path_len, flags); 6614 if(FAILED(hr)) { 6615 *result = NULL; 6616 return hr; 6617 } 6618 6619 /* If the resulting IUri is a file URI, the drive path isn't 6620 * reduced out when the dot segments are removed. 6621 */ 6622 if(path_len >= 3 && data.scheme_type == URL_SCHEME_FILE && !data.host) { 6623 if(*path == '/' && is_drive_path(path+1)) 6624 path_offset = 2; 6625 else if(is_drive_path(path)) 6626 path_offset = 1; 6627 } 6628 } 6629 6630 /* Check if the dot segments need to be removed from the path. */ 6631 if(!(flags & URL_DONT_SIMPLIFY) && !data.is_opaque) { 6632 DWORD offset = (path_offset > 0) ? path_offset+1 : 0; 6633 DWORD new_len = remove_dot_segments(path+offset,path_len-offset); 6634 6635 if(new_len != path_len) { 6636 WCHAR *tmp = heap_realloc(path, (offset+new_len+1)*sizeof(WCHAR)); 6637 if(!tmp) { 6638 heap_free(path); 6639 *result = NULL; 6640 return E_OUTOFMEMORY; 6641 } 6642 6643 tmp[new_len+offset] = '\0'; 6644 path = tmp; 6645 path_len = new_len+offset; 6646 } 6647 } 6648 6649 if(relative->query_start > -1) { 6650 data.query = relative->canon_uri+relative->query_start; 6651 data.query_len = relative->query_len; 6652 } 6653 6654 /* Make sure the path component is valid. */ 6655 ptr = path; 6656 pptr = &ptr; 6657 if((data.is_opaque && !parse_path_opaque(pptr, &data, 0)) || 6658 (!data.is_opaque && !parse_path_hierarchical(pptr, &data, 0))) { 6659 heap_free(path); 6660 *result = NULL; 6661 return E_INVALIDARG; 6662 } 6663 } 6664 6665 if(relative->fragment_start > -1) { 6666 data.fragment = relative->canon_uri+relative->fragment_start; 6667 data.fragment_len = relative->fragment_len; 6668 } 6669 6670 if(flags & URL_DONT_SIMPLIFY) 6671 raw_flags |= RAW_URI_FORCE_PORT_DISP; 6672 if(flags & URL_FILE_USE_PATHURL) 6673 raw_flags |= RAW_URI_CONVERT_TO_DOS_PATH; 6674 6675 len = generate_raw_uri(&data, data.uri, raw_flags); 6676 data.uri = SysAllocStringLen(NULL, len); 6677 if(!data.uri) { 6678 heap_free(path); 6679 *result = NULL; 6680 return E_OUTOFMEMORY; 6681 } 6682 6683 generate_raw_uri(&data, data.uri, raw_flags); 6684 6685 hr = Uri_Construct(NULL, (void**)&ret); 6686 if(FAILED(hr)) { 6687 SysFreeString(data.uri); 6688 heap_free(path); 6689 *result = NULL; 6690 return hr; 6691 } 6692 6693 if(flags & URL_DONT_SIMPLIFY) 6694 create_flags |= Uri_CREATE_NO_CANONICALIZE; 6695 if(flags & URL_FILE_USE_PATHURL) 6696 create_flags |= Uri_CREATE_FILE_USE_DOS_PATH; 6697 6698 ret->raw_uri = data.uri; 6699 hr = canonicalize_uri(&data, ret, create_flags); 6700 if(FAILED(hr)) { 6701 IUri_Release(&ret->IUri_iface); 6702 *result = NULL; 6703 return hr; 6704 } 6705 6706 if(flags & URL_DONT_SIMPLIFY) 6707 ret->display_modifiers |= URI_DISPLAY_NO_DEFAULT_PORT_AUTH; 6708 6709 apply_default_flags(&create_flags); 6710 ret->create_flags = create_flags; 6711 *result = &ret->IUri_iface; 6712 6713 heap_free(path); 6714 } 6715 6716 return S_OK; 6717 } 6718 6719 /*********************************************************************** 6720 * CoInternetCombineIUri (urlmon.@) 6721 */ 6722 HRESULT WINAPI CoInternetCombineIUri(IUri *pBaseUri, IUri *pRelativeUri, DWORD dwCombineFlags, 6723 IUri **ppCombinedUri, DWORD_PTR dwReserved) 6724 { 6725 HRESULT hr; 6726 IInternetProtocolInfo *info; 6727 Uri *relative, *base; 6728 TRACE("(%p %p %x %p %x)\n", pBaseUri, pRelativeUri, dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 6729 6730 if(!ppCombinedUri) 6731 return E_INVALIDARG; 6732 6733 if(!pBaseUri || !pRelativeUri) { 6734 *ppCombinedUri = NULL; 6735 return E_INVALIDARG; 6736 } 6737 6738 relative = get_uri_obj(pRelativeUri); 6739 base = get_uri_obj(pBaseUri); 6740 if(!relative || !base) { 6741 *ppCombinedUri = NULL; 6742 FIXME("(%p %p %x %p %x) Unknown IUri types not supported yet.\n", 6743 pBaseUri, pRelativeUri, dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 6744 return E_NOTIMPL; 6745 } 6746 6747 info = get_protocol_info(base->canon_uri); 6748 if(info) { 6749 WCHAR result[INTERNET_MAX_URL_LENGTH+1]; 6750 DWORD result_len = 0; 6751 6752 hr = IInternetProtocolInfo_CombineUrl(info, base->canon_uri, relative->canon_uri, dwCombineFlags, 6753 result, INTERNET_MAX_URL_LENGTH+1, &result_len, 0); 6754 IInternetProtocolInfo_Release(info); 6755 if(SUCCEEDED(hr)) { 6756 hr = CreateUri(result, Uri_CREATE_ALLOW_RELATIVE, 0, ppCombinedUri); 6757 if(SUCCEEDED(hr)) 6758 return hr; 6759 } 6760 } 6761 6762 return combine_uri(base, relative, dwCombineFlags, ppCombinedUri, 0); 6763 } 6764 6765 /*********************************************************************** 6766 * CoInternetCombineUrlEx (urlmon.@) 6767 */ 6768 HRESULT WINAPI CoInternetCombineUrlEx(IUri *pBaseUri, LPCWSTR pwzRelativeUrl, DWORD dwCombineFlags, 6769 IUri **ppCombinedUri, DWORD_PTR dwReserved) 6770 { 6771 IUri *relative; 6772 Uri *base; 6773 HRESULT hr; 6774 IInternetProtocolInfo *info; 6775 6776 TRACE("(%p %s %x %p %x)\n", pBaseUri, debugstr_w(pwzRelativeUrl), dwCombineFlags, 6777 ppCombinedUri, (DWORD)dwReserved); 6778 6779 if(!ppCombinedUri) 6780 return E_POINTER; 6781 6782 if(!pwzRelativeUrl) { 6783 *ppCombinedUri = NULL; 6784 return E_UNEXPECTED; 6785 } 6786 6787 if(!pBaseUri) { 6788 *ppCombinedUri = NULL; 6789 return E_INVALIDARG; 6790 } 6791 6792 base = get_uri_obj(pBaseUri); 6793 if(!base) { 6794 *ppCombinedUri = NULL; 6795 FIXME("(%p %s %x %p %x) Unknown IUri's not supported yet.\n", pBaseUri, debugstr_w(pwzRelativeUrl), 6796 dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 6797 return E_NOTIMPL; 6798 } 6799 6800 info = get_protocol_info(base->canon_uri); 6801 if(info) { 6802 WCHAR result[INTERNET_MAX_URL_LENGTH+1]; 6803 DWORD result_len = 0; 6804 6805 hr = IInternetProtocolInfo_CombineUrl(info, base->canon_uri, pwzRelativeUrl, dwCombineFlags, 6806 result, INTERNET_MAX_URL_LENGTH+1, &result_len, 0); 6807 IInternetProtocolInfo_Release(info); 6808 if(SUCCEEDED(hr)) { 6809 hr = CreateUri(result, Uri_CREATE_ALLOW_RELATIVE, 0, ppCombinedUri); 6810 if(SUCCEEDED(hr)) 6811 return hr; 6812 } 6813 } 6814 6815 hr = CreateUri(pwzRelativeUrl, Uri_CREATE_ALLOW_RELATIVE|Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME, 0, &relative); 6816 if(FAILED(hr)) { 6817 *ppCombinedUri = NULL; 6818 return hr; 6819 } 6820 6821 hr = combine_uri(base, get_uri_obj(relative), dwCombineFlags, ppCombinedUri, COMBINE_URI_FORCE_FLAG_USE); 6822 6823 IUri_Release(relative); 6824 return hr; 6825 } 6826 6827 static HRESULT parse_canonicalize(const Uri *uri, DWORD flags, LPWSTR output, 6828 DWORD output_len, DWORD *result_len) 6829 { 6830 const WCHAR *ptr = NULL; 6831 WCHAR *path = NULL; 6832 const WCHAR **pptr; 6833 DWORD len = 0; 6834 BOOL reduce_path; 6835 6836 /* URL_UNESCAPE only has effect if none of the URL_ESCAPE flags are set. */ 6837 const BOOL allow_unescape = !(flags & URL_ESCAPE_UNSAFE) && 6838 !(flags & URL_ESCAPE_SPACES_ONLY) && 6839 !(flags & URL_ESCAPE_PERCENT); 6840 6841 6842 /* Check if the dot segments need to be removed from the 6843 * path component. 6844 */ 6845 if(uri->scheme_start > -1 && uri->path_start > -1) { 6846 ptr = uri->canon_uri+uri->scheme_start+uri->scheme_len+1; 6847 pptr = &ptr; 6848 } 6849 reduce_path = !(flags & URL_DONT_SIMPLIFY) && 6850 ptr && check_hierarchical(pptr); 6851 6852 for(ptr = uri->canon_uri; ptr < uri->canon_uri+uri->canon_len; ++ptr) { 6853 BOOL do_default_action = TRUE; 6854 6855 /* Keep track of the path if we need to remove dot segments from 6856 * it later. 6857 */ 6858 if(reduce_path && !path && ptr == uri->canon_uri+uri->path_start) 6859 path = output+len; 6860 6861 /* Check if it's time to reduce the path. */ 6862 if(reduce_path && ptr == uri->canon_uri+uri->path_start+uri->path_len) { 6863 DWORD current_path_len = (output+len) - path; 6864 DWORD new_path_len = remove_dot_segments(path, current_path_len); 6865 6866 /* Update the current length. */ 6867 len -= (current_path_len-new_path_len); 6868 reduce_path = FALSE; 6869 } 6870 6871 if(*ptr == '%') { 6872 const WCHAR decoded = decode_pct_val(ptr); 6873 if(decoded) { 6874 if(allow_unescape && (flags & URL_UNESCAPE)) { 6875 if(len < output_len) 6876 output[len] = decoded; 6877 len++; 6878 ptr += 2; 6879 do_default_action = FALSE; 6880 } 6881 } 6882 6883 /* See if %'s needed to encoded. */ 6884 if(do_default_action && (flags & URL_ESCAPE_PERCENT)) { 6885 if(len + 3 < output_len) 6886 pct_encode_val(*ptr, output+len); 6887 len += 3; 6888 do_default_action = FALSE; 6889 } 6890 } else if(*ptr == ' ') { 6891 if((flags & URL_ESCAPE_SPACES_ONLY) && 6892 !(flags & URL_ESCAPE_UNSAFE)) { 6893 if(len + 3 < output_len) 6894 pct_encode_val(*ptr, output+len); 6895 len += 3; 6896 do_default_action = FALSE; 6897 } 6898 } else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr)) { 6899 if(flags & URL_ESCAPE_UNSAFE) { 6900 if(len + 3 < output_len) 6901 pct_encode_val(*ptr, output+len); 6902 len += 3; 6903 do_default_action = FALSE; 6904 } 6905 } 6906 6907 if(do_default_action) { 6908 if(len < output_len) 6909 output[len] = *ptr; 6910 len++; 6911 } 6912 } 6913 6914 /* Sometimes the path is the very last component of the IUri, so 6915 * see if the dot segments need to be reduced now. 6916 */ 6917 if(reduce_path && path) { 6918 DWORD current_path_len = (output+len) - path; 6919 DWORD new_path_len = remove_dot_segments(path, current_path_len); 6920 6921 /* Update the current length. */ 6922 len -= (current_path_len-new_path_len); 6923 } 6924 6925 if(len < output_len) 6926 output[len] = 0; 6927 else 6928 output[output_len-1] = 0; 6929 6930 /* The null terminator isn't included in the length. */ 6931 *result_len = len; 6932 if(len >= output_len) 6933 return STRSAFE_E_INSUFFICIENT_BUFFER; 6934 6935 return S_OK; 6936 } 6937 6938 static HRESULT parse_friendly(IUri *uri, LPWSTR output, DWORD output_len, 6939 DWORD *result_len) 6940 { 6941 HRESULT hr; 6942 DWORD display_len; 6943 BSTR display; 6944 6945 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_DISPLAY_URI, &display_len, 0); 6946 if(FAILED(hr)) { 6947 *result_len = 0; 6948 return hr; 6949 } 6950 6951 *result_len = display_len; 6952 if(display_len+1 > output_len) 6953 return STRSAFE_E_INSUFFICIENT_BUFFER; 6954 6955 hr = IUri_GetDisplayUri(uri, &display); 6956 if(FAILED(hr)) { 6957 *result_len = 0; 6958 return hr; 6959 } 6960 6961 memcpy(output, display, (display_len+1)*sizeof(WCHAR)); 6962 SysFreeString(display); 6963 return S_OK; 6964 } 6965 6966 static HRESULT parse_rootdocument(const Uri *uri, LPWSTR output, DWORD output_len, 6967 DWORD *result_len) 6968 { 6969 static const WCHAR colon_slashesW[] = {':','/','/'}; 6970 6971 WCHAR *ptr; 6972 DWORD len = 0; 6973 6974 /* Windows only returns the root document if the URI has an authority 6975 * and it's not an unknown scheme type or a file scheme type. 6976 */ 6977 if(uri->authority_start == -1 || 6978 uri->scheme_type == URL_SCHEME_UNKNOWN || 6979 uri->scheme_type == URL_SCHEME_FILE) { 6980 *result_len = 0; 6981 if(!output_len) 6982 return STRSAFE_E_INSUFFICIENT_BUFFER; 6983 6984 output[0] = 0; 6985 return S_OK; 6986 } 6987 6988 len = uri->scheme_len+uri->authority_len; 6989 /* For the "://" and '/' which will be added. */ 6990 len += 4; 6991 6992 if(len+1 > output_len) { 6993 *result_len = len; 6994 return STRSAFE_E_INSUFFICIENT_BUFFER; 6995 } 6996 6997 ptr = output; 6998 memcpy(ptr, uri->canon_uri+uri->scheme_start, uri->scheme_len*sizeof(WCHAR)); 6999 7000 /* Add the "://". */ 7001 ptr += uri->scheme_len; 7002 memcpy(ptr, colon_slashesW, sizeof(colon_slashesW)); 7003 7004 /* Add the authority. */ 7005 ptr += ARRAY_SIZE(colon_slashesW); 7006 memcpy(ptr, uri->canon_uri+uri->authority_start, uri->authority_len*sizeof(WCHAR)); 7007 7008 /* Add the '/' after the authority. */ 7009 ptr += uri->authority_len; 7010 *ptr = '/'; 7011 ptr[1] = 0; 7012 7013 *result_len = len; 7014 return S_OK; 7015 } 7016 7017 static HRESULT parse_document(const Uri *uri, LPWSTR output, DWORD output_len, 7018 DWORD *result_len) 7019 { 7020 DWORD len = 0; 7021 7022 /* It has to be a known scheme type, but, it can't be a file 7023 * scheme. It also has to hierarchical. 7024 */ 7025 if(uri->scheme_type == URL_SCHEME_UNKNOWN || 7026 uri->scheme_type == URL_SCHEME_FILE || 7027 uri->authority_start == -1) { 7028 *result_len = 0; 7029 if(output_len < 1) 7030 return STRSAFE_E_INSUFFICIENT_BUFFER; 7031 7032 output[0] = 0; 7033 return S_OK; 7034 } 7035 7036 if(uri->fragment_start > -1) 7037 len = uri->fragment_start; 7038 else 7039 len = uri->canon_len; 7040 7041 *result_len = len; 7042 if(len+1 > output_len) 7043 return STRSAFE_E_INSUFFICIENT_BUFFER; 7044 7045 memcpy(output, uri->canon_uri, len*sizeof(WCHAR)); 7046 output[len] = 0; 7047 return S_OK; 7048 } 7049 7050 static HRESULT parse_path_from_url(const Uri *uri, LPWSTR output, DWORD output_len, 7051 DWORD *result_len) 7052 { 7053 const WCHAR *path_ptr; 7054 WCHAR buffer[INTERNET_MAX_URL_LENGTH+1]; 7055 WCHAR *ptr; 7056 7057 if(uri->scheme_type != URL_SCHEME_FILE) { 7058 *result_len = 0; 7059 if(output_len > 0) 7060 output[0] = 0; 7061 return E_INVALIDARG; 7062 } 7063 7064 ptr = buffer; 7065 if(uri->host_start > -1) { 7066 static const WCHAR slash_slashW[] = {'\\','\\'}; 7067 7068 memcpy(ptr, slash_slashW, sizeof(slash_slashW)); 7069 ptr += ARRAY_SIZE(slash_slashW); 7070 memcpy(ptr, uri->canon_uri+uri->host_start, uri->host_len*sizeof(WCHAR)); 7071 ptr += uri->host_len; 7072 } 7073 7074 path_ptr = uri->canon_uri+uri->path_start; 7075 if(uri->path_len > 3 && *path_ptr == '/' && is_drive_path(path_ptr+1)) 7076 /* Skip past the '/' in front of the drive path. */ 7077 ++path_ptr; 7078 7079 for(; path_ptr < uri->canon_uri+uri->path_start+uri->path_len; ++path_ptr, ++ptr) { 7080 BOOL do_default_action = TRUE; 7081 7082 if(*path_ptr == '%') { 7083 const WCHAR decoded = decode_pct_val(path_ptr); 7084 if(decoded) { 7085 *ptr = decoded; 7086 path_ptr += 2; 7087 do_default_action = FALSE; 7088 } 7089 } else if(*path_ptr == '/') { 7090 *ptr = '\\'; 7091 do_default_action = FALSE; 7092 } 7093 7094 if(do_default_action) 7095 *ptr = *path_ptr; 7096 } 7097 7098 *ptr = 0; 7099 7100 *result_len = ptr-buffer; 7101 if(*result_len+1 > output_len) 7102 return STRSAFE_E_INSUFFICIENT_BUFFER; 7103 7104 memcpy(output, buffer, (*result_len+1)*sizeof(WCHAR)); 7105 return S_OK; 7106 } 7107 7108 static HRESULT parse_url_from_path(IUri *uri, LPWSTR output, DWORD output_len, 7109 DWORD *result_len) 7110 { 7111 HRESULT hr; 7112 BSTR received; 7113 DWORD len = 0; 7114 7115 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_ABSOLUTE_URI, &len, 0); 7116 if(FAILED(hr)) { 7117 *result_len = 0; 7118 return hr; 7119 } 7120 7121 *result_len = len; 7122 if(len+1 > output_len) 7123 return STRSAFE_E_INSUFFICIENT_BUFFER; 7124 7125 hr = IUri_GetAbsoluteUri(uri, &received); 7126 if(FAILED(hr)) { 7127 *result_len = 0; 7128 return hr; 7129 } 7130 7131 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7132 SysFreeString(received); 7133 7134 return S_OK; 7135 } 7136 7137 static HRESULT parse_schema(IUri *uri, LPWSTR output, DWORD output_len, 7138 DWORD *result_len) 7139 { 7140 HRESULT hr; 7141 DWORD len; 7142 BSTR received; 7143 7144 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_SCHEME_NAME, &len, 0); 7145 if(FAILED(hr)) { 7146 *result_len = 0; 7147 return hr; 7148 } 7149 7150 *result_len = len; 7151 if(len+1 > output_len) 7152 return STRSAFE_E_INSUFFICIENT_BUFFER; 7153 7154 hr = IUri_GetSchemeName(uri, &received); 7155 if(FAILED(hr)) { 7156 *result_len = 0; 7157 return hr; 7158 } 7159 7160 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7161 SysFreeString(received); 7162 7163 return S_OK; 7164 } 7165 7166 static HRESULT parse_site(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 7167 { 7168 HRESULT hr; 7169 DWORD len; 7170 BSTR received; 7171 7172 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_HOST, &len, 0); 7173 if(FAILED(hr)) { 7174 *result_len = 0; 7175 return hr; 7176 } 7177 7178 *result_len = len; 7179 if(len+1 > output_len) 7180 return STRSAFE_E_INSUFFICIENT_BUFFER; 7181 7182 hr = IUri_GetHost(uri, &received); 7183 if(FAILED(hr)) { 7184 *result_len = 0; 7185 return hr; 7186 } 7187 7188 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7189 SysFreeString(received); 7190 7191 return S_OK; 7192 } 7193 7194 static HRESULT parse_domain(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 7195 { 7196 HRESULT hr; 7197 DWORD len; 7198 BSTR received; 7199 7200 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_DOMAIN, &len, 0); 7201 if(FAILED(hr)) { 7202 *result_len = 0; 7203 return hr; 7204 } 7205 7206 *result_len = len; 7207 if(len+1 > output_len) 7208 return STRSAFE_E_INSUFFICIENT_BUFFER; 7209 7210 hr = IUri_GetDomain(uri, &received); 7211 if(FAILED(hr)) { 7212 *result_len = 0; 7213 return hr; 7214 } 7215 7216 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7217 SysFreeString(received); 7218 7219 return S_OK; 7220 } 7221 7222 static HRESULT parse_anchor(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 7223 { 7224 HRESULT hr; 7225 DWORD len; 7226 BSTR received; 7227 7228 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_FRAGMENT, &len, 0); 7229 if(FAILED(hr)) { 7230 *result_len = 0; 7231 return hr; 7232 } 7233 7234 *result_len = len; 7235 if(len+1 > output_len) 7236 return STRSAFE_E_INSUFFICIENT_BUFFER; 7237 7238 hr = IUri_GetFragment(uri, &received); 7239 if(FAILED(hr)) { 7240 *result_len = 0; 7241 return hr; 7242 } 7243 7244 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7245 SysFreeString(received); 7246 7247 return S_OK; 7248 } 7249 7250 /*********************************************************************** 7251 * CoInternetParseIUri (urlmon.@) 7252 */ 7253 HRESULT WINAPI CoInternetParseIUri(IUri *pIUri, PARSEACTION ParseAction, DWORD dwFlags, 7254 LPWSTR pwzResult, DWORD cchResult, DWORD *pcchResult, 7255 DWORD_PTR dwReserved) 7256 { 7257 HRESULT hr; 7258 Uri *uri; 7259 IInternetProtocolInfo *info; 7260 7261 TRACE("(%p %d %x %p %d %p %x)\n", pIUri, ParseAction, dwFlags, pwzResult, 7262 cchResult, pcchResult, (DWORD)dwReserved); 7263 7264 if(!pcchResult) 7265 return E_POINTER; 7266 7267 if(!pwzResult || !pIUri) { 7268 *pcchResult = 0; 7269 return E_INVALIDARG; 7270 } 7271 7272 if(!(uri = get_uri_obj(pIUri))) { 7273 *pcchResult = 0; 7274 FIXME("(%p %d %x %p %d %p %x) Unknown IUri's not supported for this action.\n", 7275 pIUri, ParseAction, dwFlags, pwzResult, cchResult, pcchResult, (DWORD)dwReserved); 7276 return E_NOTIMPL; 7277 } 7278 7279 info = get_protocol_info(uri->canon_uri); 7280 if(info) { 7281 hr = IInternetProtocolInfo_ParseUrl(info, uri->canon_uri, ParseAction, dwFlags, 7282 pwzResult, cchResult, pcchResult, 0); 7283 IInternetProtocolInfo_Release(info); 7284 if(SUCCEEDED(hr)) return hr; 7285 } 7286 7287 switch(ParseAction) { 7288 case PARSE_CANONICALIZE: 7289 hr = parse_canonicalize(uri, dwFlags, pwzResult, cchResult, pcchResult); 7290 break; 7291 case PARSE_FRIENDLY: 7292 hr = parse_friendly(pIUri, pwzResult, cchResult, pcchResult); 7293 break; 7294 case PARSE_ROOTDOCUMENT: 7295 hr = parse_rootdocument(uri, pwzResult, cchResult, pcchResult); 7296 break; 7297 case PARSE_DOCUMENT: 7298 hr = parse_document(uri, pwzResult, cchResult, pcchResult); 7299 break; 7300 case PARSE_PATH_FROM_URL: 7301 hr = parse_path_from_url(uri, pwzResult, cchResult, pcchResult); 7302 break; 7303 case PARSE_URL_FROM_PATH: 7304 hr = parse_url_from_path(pIUri, pwzResult, cchResult, pcchResult); 7305 break; 7306 case PARSE_SCHEMA: 7307 hr = parse_schema(pIUri, pwzResult, cchResult, pcchResult); 7308 break; 7309 case PARSE_SITE: 7310 hr = parse_site(pIUri, pwzResult, cchResult, pcchResult); 7311 break; 7312 case PARSE_DOMAIN: 7313 hr = parse_domain(pIUri, pwzResult, cchResult, pcchResult); 7314 break; 7315 case PARSE_LOCATION: 7316 case PARSE_ANCHOR: 7317 hr = parse_anchor(pIUri, pwzResult, cchResult, pcchResult); 7318 break; 7319 case PARSE_SECURITY_URL: 7320 case PARSE_MIME: 7321 case PARSE_SERVER: 7322 case PARSE_SECURITY_DOMAIN: 7323 *pcchResult = 0; 7324 hr = E_FAIL; 7325 break; 7326 default: 7327 *pcchResult = 0; 7328 hr = E_NOTIMPL; 7329 FIXME("(%p %d %x %p %d %p %x) Partial stub.\n", pIUri, ParseAction, dwFlags, 7330 pwzResult, cchResult, pcchResult, (DWORD)dwReserved); 7331 } 7332 7333 return hr; 7334 } 7335