1 /* 2 * Copyright 2010 Jacek Caban for CodeWeavers 3 * Copyright 2010 Thomas Mullaly 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public 7 * License as published by the Free Software Foundation; either 8 * version 2.1 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public 16 * License along with this library; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 18 */ 19 20 #include <limits.h> 21 #include <wchar.h> 22 23 #include "urlmon_main.h" 24 #include "wine/debug.h" 25 26 #define NO_SHLWAPI_REG 27 #include "shlwapi.h" 28 29 #include "strsafe.h" 30 31 #define URI_DISPLAY_NO_ABSOLUTE_URI 0x1 32 #define URI_DISPLAY_NO_DEFAULT_PORT_AUTH 0x2 33 34 #define ALLOW_NULL_TERM_SCHEME 0x01 35 #define ALLOW_NULL_TERM_USER_NAME 0x02 36 #define ALLOW_NULL_TERM_PASSWORD 0x04 37 #define ALLOW_BRACKETLESS_IP_LITERAL 0x08 38 #define SKIP_IP_FUTURE_CHECK 0x10 39 #define IGNORE_PORT_DELIMITER 0x20 40 41 #define RAW_URI_FORCE_PORT_DISP 0x1 42 #define RAW_URI_CONVERT_TO_DOS_PATH 0x2 43 44 #define COMBINE_URI_FORCE_FLAG_USE 0x1 45 46 WINE_DEFAULT_DEBUG_CHANNEL(urlmon); 47 48 static const IID IID_IUriObj = {0x4b364760,0x9f51,0x11df,{0x98,0x1c,0x08,0x00,0x20,0x0c,0x9a,0x66}}; 49 50 typedef struct { 51 IUri IUri_iface; 52 IUriBuilderFactory IUriBuilderFactory_iface; 53 IPersistStream IPersistStream_iface; 54 IMarshal IMarshal_iface; 55 56 LONG ref; 57 58 BSTR raw_uri; 59 60 /* Information about the canonicalized URI's buffer. */ 61 WCHAR *canon_uri; 62 DWORD canon_size; 63 DWORD canon_len; 64 BOOL display_modifiers; 65 DWORD create_flags; 66 67 INT scheme_start; 68 DWORD scheme_len; 69 URL_SCHEME scheme_type; 70 71 INT userinfo_start; 72 DWORD userinfo_len; 73 INT userinfo_split; 74 75 INT host_start; 76 DWORD host_len; 77 Uri_HOST_TYPE host_type; 78 79 INT port_offset; 80 DWORD port; 81 BOOL has_port; 82 83 INT authority_start; 84 DWORD authority_len; 85 86 INT domain_offset; 87 88 INT path_start; 89 DWORD path_len; 90 INT extension_offset; 91 92 INT query_start; 93 DWORD query_len; 94 95 INT fragment_start; 96 DWORD fragment_len; 97 } Uri; 98 99 typedef struct { 100 IUriBuilder IUriBuilder_iface; 101 LONG ref; 102 103 Uri *uri; 104 DWORD modified_props; 105 106 WCHAR *fragment; 107 DWORD fragment_len; 108 109 WCHAR *host; 110 DWORD host_len; 111 112 WCHAR *password; 113 DWORD password_len; 114 115 WCHAR *path; 116 DWORD path_len; 117 118 BOOL has_port; 119 DWORD port; 120 121 WCHAR *query; 122 DWORD query_len; 123 124 WCHAR *scheme; 125 DWORD scheme_len; 126 127 WCHAR *username; 128 DWORD username_len; 129 } UriBuilder; 130 131 typedef struct { 132 const WCHAR *str; 133 DWORD len; 134 } h16; 135 136 typedef struct { 137 /* IPv6 addresses can hold up to 8 h16 components. */ 138 h16 components[8]; 139 DWORD h16_count; 140 141 /* An IPv6 can have 1 elision ("::"). */ 142 const WCHAR *elision; 143 144 /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */ 145 const WCHAR *ipv4; 146 DWORD ipv4_len; 147 148 INT components_size; 149 INT elision_size; 150 } ipv6_address; 151 152 typedef struct { 153 BSTR uri; 154 155 BOOL is_relative; 156 BOOL is_opaque; 157 BOOL has_implicit_scheme; 158 BOOL has_implicit_ip; 159 UINT implicit_ipv4; 160 BOOL must_have_path; 161 162 const WCHAR *scheme; 163 DWORD scheme_len; 164 URL_SCHEME scheme_type; 165 166 const WCHAR *username; 167 DWORD username_len; 168 169 const WCHAR *password; 170 DWORD password_len; 171 172 const WCHAR *host; 173 DWORD host_len; 174 Uri_HOST_TYPE host_type; 175 176 BOOL has_ipv6; 177 ipv6_address ipv6_address; 178 179 BOOL has_port; 180 const WCHAR *port; 181 DWORD port_len; 182 DWORD port_value; 183 184 const WCHAR *path; 185 DWORD path_len; 186 187 const WCHAR *query; 188 DWORD query_len; 189 190 const WCHAR *fragment; 191 DWORD fragment_len; 192 } parse_data; 193 194 static const CHAR hexDigits[] = "0123456789ABCDEF"; 195 196 /* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */ 197 static const struct { 198 URL_SCHEME scheme; 199 WCHAR scheme_name[16]; 200 } recognized_schemes[] = { 201 {URL_SCHEME_FTP, {'f','t','p',0}}, 202 {URL_SCHEME_HTTP, {'h','t','t','p',0}}, 203 {URL_SCHEME_GOPHER, {'g','o','p','h','e','r',0}}, 204 {URL_SCHEME_MAILTO, {'m','a','i','l','t','o',0}}, 205 {URL_SCHEME_NEWS, {'n','e','w','s',0}}, 206 {URL_SCHEME_NNTP, {'n','n','t','p',0}}, 207 {URL_SCHEME_TELNET, {'t','e','l','n','e','t',0}}, 208 {URL_SCHEME_WAIS, {'w','a','i','s',0}}, 209 {URL_SCHEME_FILE, {'f','i','l','e',0}}, 210 {URL_SCHEME_MK, {'m','k',0}}, 211 {URL_SCHEME_HTTPS, {'h','t','t','p','s',0}}, 212 {URL_SCHEME_SHELL, {'s','h','e','l','l',0}}, 213 {URL_SCHEME_SNEWS, {'s','n','e','w','s',0}}, 214 {URL_SCHEME_LOCAL, {'l','o','c','a','l',0}}, 215 {URL_SCHEME_JAVASCRIPT, {'j','a','v','a','s','c','r','i','p','t',0}}, 216 {URL_SCHEME_VBSCRIPT, {'v','b','s','c','r','i','p','t',0}}, 217 {URL_SCHEME_ABOUT, {'a','b','o','u','t',0}}, 218 {URL_SCHEME_RES, {'r','e','s',0}}, 219 {URL_SCHEME_MSSHELLROOTED, {'m','s','-','s','h','e','l','l','-','r','o','o','t','e','d',0}}, 220 {URL_SCHEME_MSSHELLIDLIST, {'m','s','-','s','h','e','l','l','-','i','d','l','i','s','t',0}}, 221 {URL_SCHEME_MSHELP, {'h','c','p',0}}, 222 {URL_SCHEME_WILDCARD, {'*',0}} 223 }; 224 225 /* List of default ports Windows recognizes. */ 226 static const struct { 227 URL_SCHEME scheme; 228 USHORT port; 229 } default_ports[] = { 230 {URL_SCHEME_FTP, 21}, 231 {URL_SCHEME_HTTP, 80}, 232 {URL_SCHEME_GOPHER, 70}, 233 {URL_SCHEME_NNTP, 119}, 234 {URL_SCHEME_TELNET, 23}, 235 {URL_SCHEME_WAIS, 210}, 236 {URL_SCHEME_HTTPS, 443}, 237 }; 238 239 /* List of 3-character top level domain names Windows seems to recognize. 240 * There might be more, but, these are the only ones I've found so far. 241 */ 242 static const struct { 243 WCHAR tld_name[4]; 244 } recognized_tlds[] = { 245 {{'c','o','m',0}}, 246 {{'e','d','u',0}}, 247 {{'g','o','v',0}}, 248 {{'i','n','t',0}}, 249 {{'m','i','l',0}}, 250 {{'n','e','t',0}}, 251 {{'o','r','g',0}} 252 }; 253 254 static Uri *get_uri_obj(IUri *uri) 255 { 256 Uri *ret; 257 HRESULT hres; 258 259 hres = IUri_QueryInterface(uri, &IID_IUriObj, (void**)&ret); 260 return SUCCEEDED(hres) ? ret : NULL; 261 } 262 263 static inline BOOL is_alpha(WCHAR val) { 264 return ((val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z')); 265 } 266 267 static inline BOOL is_num(WCHAR val) { 268 return (val >= '0' && val <= '9'); 269 } 270 271 static inline BOOL is_drive_path(const WCHAR *str) { 272 return (is_alpha(str[0]) && (str[1] == ':' || str[1] == '|')); 273 } 274 275 static inline BOOL is_unc_path(const WCHAR *str) { 276 return (str[0] == '\\' && str[1] == '\\'); 277 } 278 279 static inline BOOL is_forbidden_dos_path_char(WCHAR val) { 280 return (val == '>' || val == '<' || val == '\"'); 281 } 282 283 /* A URI is implicitly a file path if it begins with 284 * a drive letter (e.g. X:) or starts with "\\" (UNC path). 285 */ 286 static inline BOOL is_implicit_file_path(const WCHAR *str) { 287 return (is_unc_path(str) || (is_alpha(str[0]) && str[1] == ':')); 288 } 289 290 /* Checks if the URI is a hierarchical URI. A hierarchical 291 * URI is one that has "//" after the scheme. 292 */ 293 static BOOL check_hierarchical(const WCHAR **ptr) { 294 const WCHAR *start = *ptr; 295 296 if(**ptr != '/') 297 return FALSE; 298 299 ++(*ptr); 300 if(**ptr != '/') { 301 *ptr = start; 302 return FALSE; 303 } 304 305 ++(*ptr); 306 return TRUE; 307 } 308 309 /* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" */ 310 static inline BOOL is_unreserved(WCHAR val) { 311 return (is_alpha(val) || is_num(val) || val == '-' || val == '.' || 312 val == '_' || val == '~'); 313 } 314 315 /* sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 316 * / "*" / "+" / "," / ";" / "=" 317 */ 318 static inline BOOL is_subdelim(WCHAR val) { 319 return (val == '!' || val == '$' || val == '&' || 320 val == '\'' || val == '(' || val == ')' || 321 val == '*' || val == '+' || val == ',' || 322 val == ';' || val == '='); 323 } 324 325 /* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" */ 326 static inline BOOL is_gendelim(WCHAR val) { 327 return (val == ':' || val == '/' || val == '?' || 328 val == '#' || val == '[' || val == ']' || 329 val == '@'); 330 } 331 332 /* Characters that delimit the end of the authority 333 * section of a URI. Sometimes a '\\' is considered 334 * an authority delimiter. 335 */ 336 static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) { 337 return (val == '#' || val == '/' || val == '?' || 338 val == '\0' || (acceptSlash && val == '\\')); 339 } 340 341 /* reserved = gen-delims / sub-delims */ 342 static inline BOOL is_reserved(WCHAR val) { 343 return (is_subdelim(val) || is_gendelim(val)); 344 } 345 346 static inline BOOL is_hexdigit(WCHAR val) { 347 return ((val >= 'a' && val <= 'f') || 348 (val >= 'A' && val <= 'F') || 349 (val >= '0' && val <= '9')); 350 } 351 352 static inline BOOL is_path_delim(URL_SCHEME scheme, WCHAR val) { 353 return (!val || (val == '#' && scheme != URL_SCHEME_FILE) || val == '?'); 354 } 355 356 static inline BOOL is_slash(WCHAR c) 357 { 358 return c == '/' || c == '\\'; 359 } 360 361 static inline BOOL is_ascii(WCHAR c) 362 { 363 return c < 0x80; 364 } 365 366 static BOOL is_default_port(URL_SCHEME scheme, DWORD port) { 367 DWORD i; 368 369 for(i = 0; i < ARRAY_SIZE(default_ports); ++i) { 370 if(default_ports[i].scheme == scheme && default_ports[i].port) 371 return TRUE; 372 } 373 374 return FALSE; 375 } 376 377 /* List of schemes types Windows seems to expect to be hierarchical. */ 378 static inline BOOL is_hierarchical_scheme(URL_SCHEME type) { 379 return(type == URL_SCHEME_HTTP || type == URL_SCHEME_FTP || 380 type == URL_SCHEME_GOPHER || type == URL_SCHEME_NNTP || 381 type == URL_SCHEME_TELNET || type == URL_SCHEME_WAIS || 382 type == URL_SCHEME_FILE || type == URL_SCHEME_HTTPS || 383 type == URL_SCHEME_RES); 384 } 385 386 /* Checks if 'flags' contains an invalid combination of Uri_CREATE flags. */ 387 static inline BOOL has_invalid_flag_combination(DWORD flags) { 388 return((flags & Uri_CREATE_DECODE_EXTRA_INFO && flags & Uri_CREATE_NO_DECODE_EXTRA_INFO) || 389 (flags & Uri_CREATE_CANONICALIZE && flags & Uri_CREATE_NO_CANONICALIZE) || 390 (flags & Uri_CREATE_CRACK_UNKNOWN_SCHEMES && flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES) || 391 (flags & Uri_CREATE_PRE_PROCESS_HTML_URI && flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI) || 392 (flags & Uri_CREATE_IE_SETTINGS && flags & Uri_CREATE_NO_IE_SETTINGS)); 393 } 394 395 /* Applies each default Uri_CREATE flags to 'flags' if it 396 * doesn't cause a flag conflict. 397 */ 398 static void apply_default_flags(DWORD *flags) { 399 if(!(*flags & Uri_CREATE_NO_CANONICALIZE)) 400 *flags |= Uri_CREATE_CANONICALIZE; 401 if(!(*flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) 402 *flags |= Uri_CREATE_DECODE_EXTRA_INFO; 403 if(!(*flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) 404 *flags |= Uri_CREATE_CRACK_UNKNOWN_SCHEMES; 405 if(!(*flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI)) 406 *flags |= Uri_CREATE_PRE_PROCESS_HTML_URI; 407 if(!(*flags & Uri_CREATE_IE_SETTINGS)) 408 *flags |= Uri_CREATE_NO_IE_SETTINGS; 409 } 410 411 /* Determines if the URI is hierarchical using the information already parsed into 412 * data and using the current location of parsing in the URI string. 413 * 414 * Windows considers a URI hierarchical if one of the following is true: 415 * A.) It's a wildcard scheme. 416 * B.) It's an implicit file scheme. 417 * C.) It's a known hierarchical scheme and it has two '\\' after the scheme name. 418 * (the '\\' will be converted into "//" during canonicalization). 419 * D.) "//" appears after the scheme name (or at the beginning if no scheme is given). 420 */ 421 static inline BOOL is_hierarchical_uri(const WCHAR **ptr, const parse_data *data) { 422 const WCHAR *start = *ptr; 423 424 if(data->scheme_type == URL_SCHEME_WILDCARD) 425 return TRUE; 426 else if(data->scheme_type == URL_SCHEME_FILE && data->has_implicit_scheme) 427 return TRUE; 428 else if(is_hierarchical_scheme(data->scheme_type) && (*ptr)[0] == '\\' && (*ptr)[1] == '\\') { 429 *ptr += 2; 430 return TRUE; 431 } else if(data->scheme_type != URL_SCHEME_MAILTO && check_hierarchical(ptr)) 432 return TRUE; 433 434 *ptr = start; 435 return FALSE; 436 } 437 438 /* Computes the size of the given IPv6 address. 439 * Each h16 component is 16 bits. If there is an IPv4 address, it's 440 * 32 bits. If there's an elision it can be 16 to 128 bits, depending 441 * on the number of other components. 442 * 443 * Modeled after google-url's CheckIPv6ComponentsSize function 444 */ 445 static void compute_ipv6_comps_size(ipv6_address *address) { 446 address->components_size = address->h16_count * 2; 447 448 if(address->ipv4) 449 /* IPv4 address is 4 bytes. */ 450 address->components_size += 4; 451 452 if(address->elision) { 453 /* An elision can be anywhere from 2 bytes up to 16 bytes. 454 * Its size depends on the size of the h16 and IPv4 components. 455 */ 456 address->elision_size = 16 - address->components_size; 457 if(address->elision_size < 2) 458 address->elision_size = 2; 459 } else 460 address->elision_size = 0; 461 } 462 463 /* Taken from dlls/jscript/lex.c */ 464 static int hex_to_int(WCHAR val) { 465 if(val >= '0' && val <= '9') 466 return val - '0'; 467 else if(val >= 'a' && val <= 'f') 468 return val - 'a' + 10; 469 else if(val >= 'A' && val <= 'F') 470 return val - 'A' + 10; 471 472 return -1; 473 } 474 475 /* Helper function for converting a percent encoded string 476 * representation of a WCHAR value into its actual WCHAR value. If 477 * the two characters following the '%' aren't valid hex values then 478 * this function returns the NULL character. 479 * 480 * E.g. 481 * "%2E" will result in '.' being returned by this function. 482 */ 483 static WCHAR decode_pct_val(const WCHAR *ptr) { 484 WCHAR ret = '\0'; 485 486 if(*ptr == '%' && is_hexdigit(*(ptr + 1)) && is_hexdigit(*(ptr + 2))) { 487 INT a = hex_to_int(*(ptr + 1)); 488 INT b = hex_to_int(*(ptr + 2)); 489 490 ret = a << 4; 491 ret += b; 492 } 493 494 return ret; 495 } 496 497 /* Helper function for percent encoding a given character 498 * and storing the encoded value into a given buffer (dest). 499 * 500 * It's up to the calling function to ensure that there is 501 * at least enough space in 'dest' for the percent encoded 502 * value to be stored (so dest + 3 spaces available). 503 */ 504 static inline void pct_encode_val(WCHAR val, WCHAR *dest) { 505 dest[0] = '%'; 506 dest[1] = hexDigits[(val >> 4) & 0xf]; 507 dest[2] = hexDigits[val & 0xf]; 508 } 509 510 /* Attempts to parse the domain name from the host. 511 * 512 * This function also includes the Top-level Domain (TLD) name 513 * of the host when it tries to find the domain name. If it finds 514 * a valid domain name it will assign 'domain_start' the offset 515 * into 'host' where the domain name starts. 516 * 517 * It's implied that if there is a domain name its range is: 518 * [host+domain_start, host+host_len). 519 */ 520 void find_domain_name(const WCHAR *host, DWORD host_len, 521 INT *domain_start) { 522 const WCHAR *last_tld, *sec_last_tld, *end, *p; 523 524 end = host+host_len-1; 525 526 *domain_start = -1; 527 528 /* There has to be at least enough room for a '.' followed by a 529 * 3-character TLD for a domain to even exist in the host name. 530 */ 531 if(host_len < 4) 532 return; 533 534 for (last_tld = sec_last_tld = NULL, p = host; p <= end; p++) 535 { 536 if (*p == '.') 537 { 538 sec_last_tld = last_tld; 539 last_tld = p; 540 } 541 } 542 if(!last_tld) 543 /* http://hostname -> has no domain name. */ 544 return; 545 546 if(!sec_last_tld) { 547 /* If the '.' is at the beginning of the host there 548 * has to be at least 3 characters in the TLD for it 549 * to be valid. 550 * Ex: .com -> .com as the domain name. 551 * .co -> has no domain name. 552 */ 553 if(last_tld-host == 0) { 554 if(end-(last_tld-1) < 3) 555 return; 556 } else if(last_tld-host == 3) { 557 DWORD i; 558 559 /* If there are three characters in front of last_tld and 560 * they are on the list of recognized TLDs, then this 561 * host doesn't have a domain (since the host only contains 562 * a TLD name. 563 * Ex: edu.uk -> has no domain name. 564 * foo.uk -> foo.uk as the domain name. 565 */ 566 for(i = 0; i < ARRAY_SIZE(recognized_tlds); ++i) { 567 if(!StrCmpNIW(host, recognized_tlds[i].tld_name, 3)) 568 return; 569 } 570 } else if(last_tld-host < 3) 571 /* Anything less than 3 characters is considered part 572 * of the TLD name. 573 * Ex: ak.uk -> Has no domain name. 574 */ 575 return; 576 577 /* Otherwise the domain name is the whole host name. */ 578 *domain_start = 0; 579 } else if(end+1-last_tld > 3) { 580 /* If the last_tld has more than 3 characters, then it's automatically 581 * considered the TLD of the domain name. 582 * Ex: www.winehq.org.uk.test -> uk.test as the domain name. 583 */ 584 *domain_start = (sec_last_tld+1)-host; 585 } else if(last_tld - (sec_last_tld+1) < 4) { 586 DWORD i; 587 /* If the sec_last_tld is 3 characters long it HAS to be on the list of 588 * recognized to still be considered part of the TLD name, otherwise 589 * it's considered the domain name. 590 * Ex: www.google.com.uk -> google.com.uk as the domain name. 591 * www.google.foo.uk -> foo.uk as the domain name. 592 */ 593 if(last_tld - (sec_last_tld+1) == 3) { 594 for(i = 0; i < ARRAY_SIZE(recognized_tlds); ++i) { 595 if(!StrCmpNIW(sec_last_tld+1, recognized_tlds[i].tld_name, 3)) { 596 for (p = sec_last_tld; p > host; p--) if (p[-1] == '.') break; 597 *domain_start = p - host; 598 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start, 599 (host+host_len)-(host+*domain_start))); 600 return; 601 } 602 } 603 604 *domain_start = (sec_last_tld+1)-host; 605 } else { 606 /* Since the sec_last_tld is less than 3 characters it's considered 607 * part of the TLD. 608 * Ex: www.google.fo.uk -> google.fo.uk as the domain name. 609 */ 610 for (p = sec_last_tld; p > host; p--) if (p[-1] == '.') break; 611 *domain_start = p - host; 612 } 613 } else { 614 /* The second to last TLD has more than 3 characters making it 615 * the domain name. 616 * Ex: www.google.test.us -> test.us as the domain name. 617 */ 618 *domain_start = (sec_last_tld+1)-host; 619 } 620 621 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start, 622 (host+host_len)-(host+*domain_start))); 623 } 624 625 /* Removes the dot segments from a hierarchical URIs path component. This 626 * function performs the removal in place. 627 * 628 * This function returns the new length of the path string. 629 */ 630 static DWORD remove_dot_segments(WCHAR *path, DWORD path_len) { 631 WCHAR *out = path; 632 const WCHAR *in = out; 633 const WCHAR *end = out + path_len; 634 DWORD len; 635 636 while(in < end) { 637 /* Move the first path segment in the input buffer to the end of 638 * the output buffer, and any subsequent characters up to, including 639 * the next "/" character (if any) or the end of the input buffer. 640 */ 641 while(in < end && !is_slash(*in)) 642 *out++ = *in++; 643 if(in == end) 644 break; 645 *out++ = *in++; 646 647 while(in < end) { 648 if(*in != '.') 649 break; 650 651 /* Handle ending "/." */ 652 if(in + 1 == end) { 653 ++in; 654 break; 655 } 656 657 /* Handle "/./" */ 658 if(is_slash(in[1])) { 659 in += 2; 660 continue; 661 } 662 663 /* If we don't have "/../" or ending "/.." */ 664 if(in[1] != '.' || (in + 2 != end && !is_slash(in[2]))) 665 break; 666 667 /* Find the slash preceding out pointer and move out pointer to it */ 668 if(out > path+1 && is_slash(*--out)) 669 --out; 670 while(out > path && !is_slash(*(--out))); 671 if(is_slash(*out)) 672 ++out; 673 in += 2; 674 if(in != end) 675 ++in; 676 } 677 } 678 679 len = out - path; 680 TRACE("(%p %d): Path after dot segments removed %s len=%d\n", path, path_len, 681 debugstr_wn(path, len), len); 682 return len; 683 } 684 685 /* Attempts to find the file extension in a given path. */ 686 static INT find_file_extension(const WCHAR *path, DWORD path_len) { 687 const WCHAR *end; 688 689 for(end = path+path_len-1; end >= path && *end != '/' && *end != '\\'; --end) { 690 if(*end == '.') 691 return end-path; 692 } 693 694 return -1; 695 } 696 697 /* Computes the location where the elision should occur in the IPv6 698 * address using the numerical values of each component stored in 699 * 'values'. If the address shouldn't contain an elision then 'index' 700 * is assigned -1 as its value. Otherwise 'index' will contain the 701 * starting index (into values) where the elision should be, and 'count' 702 * will contain the number of cells the elision covers. 703 * 704 * NOTES: 705 * Windows will expand an elision if the elision only represents one h16 706 * component of the address. 707 * 708 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7] 709 * 710 * If the IPv6 address contains an IPv4 address, the IPv4 address is also 711 * considered for being included as part of an elision if all its components 712 * are zeros. 713 * 714 * Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::] 715 */ 716 static void compute_elision_location(const ipv6_address *address, const USHORT values[8], 717 INT *index, DWORD *count) { 718 DWORD i, max_len, cur_len; 719 INT max_index, cur_index; 720 721 max_len = cur_len = 0; 722 max_index = cur_index = -1; 723 for(i = 0; i < 8; ++i) { 724 BOOL check_ipv4 = (address->ipv4 && i == 6); 725 BOOL is_end = (check_ipv4 || i == 7); 726 727 if(check_ipv4) { 728 /* Check if the IPv4 address contains only zeros. */ 729 if(values[i] == 0 && values[i+1] == 0) { 730 if(cur_index == -1) 731 cur_index = i; 732 733 cur_len += 2; 734 ++i; 735 } 736 } else if(values[i] == 0) { 737 if(cur_index == -1) 738 cur_index = i; 739 740 ++cur_len; 741 } 742 743 if(is_end || values[i] != 0) { 744 /* We only consider it for an elision if it's 745 * more than 1 component long. 746 */ 747 if(cur_len > 1 && cur_len > max_len) { 748 /* Found the new elision location. */ 749 max_len = cur_len; 750 max_index = cur_index; 751 } 752 753 /* Reset the current range for the next range of zeros. */ 754 cur_index = -1; 755 cur_len = 0; 756 } 757 } 758 759 *index = max_index; 760 *count = max_len; 761 } 762 763 /* Removes all the leading and trailing white spaces or 764 * control characters from the URI and removes all control 765 * characters inside of the URI string. 766 */ 767 static BSTR pre_process_uri(LPCWSTR uri) { 768 const WCHAR *start, *end, *ptr; 769 WCHAR *ptr2; 770 DWORD len; 771 BSTR ret; 772 773 start = uri; 774 /* Skip leading controls and whitespace. */ 775 while(*start && (iswcntrl(*start) || iswspace(*start))) ++start; 776 777 /* URI consisted only of control/whitespace. */ 778 if(!*start) 779 return SysAllocStringLen(NULL, 0); 780 781 end = start + lstrlenW(start); 782 while(--end > start && (iswcntrl(*end) || iswspace(*end))); 783 784 len = ++end - start; 785 for(ptr = start; ptr < end; ptr++) { 786 if(iswcntrl(*ptr)) 787 len--; 788 } 789 790 ret = SysAllocStringLen(NULL, len); 791 if(!ret) 792 return NULL; 793 794 for(ptr = start, ptr2=ret; ptr < end; ptr++) { 795 if(!iswcntrl(*ptr)) 796 *ptr2++ = *ptr; 797 } 798 799 return ret; 800 } 801 802 /* Converts the specified IPv4 address into an uint value. 803 * 804 * This function assumes that the IPv4 address has already been validated. 805 */ 806 static UINT ipv4toui(const WCHAR *ip, DWORD len) { 807 UINT ret = 0; 808 DWORD comp_value = 0; 809 const WCHAR *ptr; 810 811 for(ptr = ip; ptr < ip+len; ++ptr) { 812 if(*ptr == '.') { 813 ret <<= 8; 814 ret += comp_value; 815 comp_value = 0; 816 } else 817 comp_value = comp_value*10 + (*ptr-'0'); 818 } 819 820 ret <<= 8; 821 ret += comp_value; 822 823 return ret; 824 } 825 826 /* Converts an IPv4 address in numerical form into its fully qualified 827 * string form. This function returns the number of characters written 828 * to 'dest'. If 'dest' is NULL this function will return the number of 829 * characters that would have been written. 830 * 831 * It's up to the caller to ensure there's enough space in 'dest' for the 832 * address. 833 */ 834 static DWORD ui2ipv4(WCHAR *dest, UINT address) { 835 static const WCHAR formatW[] = 836 {'%','u','.','%','u','.','%','u','.','%','u',0}; 837 DWORD ret = 0; 838 UCHAR digits[4]; 839 840 digits[0] = (address >> 24) & 0xff; 841 digits[1] = (address >> 16) & 0xff; 842 digits[2] = (address >> 8) & 0xff; 843 digits[3] = address & 0xff; 844 845 if(!dest) { 846 WCHAR tmp[16]; 847 ret = swprintf(tmp, formatW, digits[0], digits[1], digits[2], digits[3]); 848 } else 849 ret = swprintf(dest, formatW, digits[0], digits[1], digits[2], digits[3]); 850 851 return ret; 852 } 853 854 static DWORD ui2str(WCHAR *dest, UINT value) { 855 static const WCHAR formatW[] = {'%','u',0}; 856 DWORD ret = 0; 857 858 if(!dest) { 859 WCHAR tmp[11]; 860 ret = swprintf(tmp, formatW, value); 861 } else 862 ret = swprintf(dest, formatW, value); 863 864 return ret; 865 } 866 867 /* Converts a h16 component (from an IPv6 address) into its 868 * numerical value. 869 * 870 * This function assumes that the h16 component has already been validated. 871 */ 872 static USHORT h16tous(h16 component) { 873 DWORD i; 874 USHORT ret = 0; 875 876 for(i = 0; i < component.len; ++i) { 877 ret <<= 4; 878 ret += hex_to_int(component.str[i]); 879 } 880 881 return ret; 882 } 883 884 /* Converts an IPv6 address into its 128 bits (16 bytes) numerical value. 885 * 886 * This function assumes that the ipv6_address has already been validated. 887 */ 888 static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) { 889 DWORD i, cur_component = 0; 890 BOOL already_passed_elision = FALSE; 891 892 for(i = 0; i < address->h16_count; ++i) { 893 if(address->elision) { 894 if(address->components[i].str > address->elision && !already_passed_elision) { 895 /* Means we just passed the elision and need to add its values to 896 * 'number' before we do anything else. 897 */ 898 INT j; 899 for(j = 0; j < address->elision_size; j+=2) 900 number[cur_component++] = 0; 901 902 already_passed_elision = TRUE; 903 } 904 } 905 906 number[cur_component++] = h16tous(address->components[i]); 907 } 908 909 /* Case when the elision appears after the h16 components. */ 910 if(!already_passed_elision && address->elision) { 911 INT j; 912 for(j = 0; j < address->elision_size; j+=2) 913 number[cur_component++] = 0; 914 } 915 916 if(address->ipv4) { 917 UINT value = ipv4toui(address->ipv4, address->ipv4_len); 918 919 if(cur_component != 6) { 920 ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component); 921 return FALSE; 922 } 923 924 number[cur_component++] = (value >> 16) & 0xffff; 925 number[cur_component] = value & 0xffff; 926 } 927 928 return TRUE; 929 } 930 931 /* Checks if the characters pointed to by 'ptr' are 932 * a percent encoded data octet. 933 * 934 * pct-encoded = "%" HEXDIG HEXDIG 935 */ 936 static BOOL check_pct_encoded(const WCHAR **ptr) { 937 const WCHAR *start = *ptr; 938 939 if(**ptr != '%') 940 return FALSE; 941 942 ++(*ptr); 943 if(!is_hexdigit(**ptr)) { 944 *ptr = start; 945 return FALSE; 946 } 947 948 ++(*ptr); 949 if(!is_hexdigit(**ptr)) { 950 *ptr = start; 951 return FALSE; 952 } 953 954 ++(*ptr); 955 return TRUE; 956 } 957 958 /* dec-octet = DIGIT ; 0-9 959 * / %x31-39 DIGIT ; 10-99 960 * / "1" 2DIGIT ; 100-199 961 * / "2" %x30-34 DIGIT ; 200-249 962 * / "25" %x30-35 ; 250-255 963 */ 964 static BOOL check_dec_octet(const WCHAR **ptr) { 965 const WCHAR *c1, *c2, *c3; 966 967 c1 = *ptr; 968 /* A dec-octet must be at least 1 digit long. */ 969 if(*c1 < '0' || *c1 > '9') 970 return FALSE; 971 972 ++(*ptr); 973 974 c2 = *ptr; 975 /* Since the 1-digit requirement was met, it doesn't 976 * matter if this is a DIGIT value, it's considered a 977 * dec-octet. 978 */ 979 if(*c2 < '0' || *c2 > '9') 980 return TRUE; 981 982 ++(*ptr); 983 984 c3 = *ptr; 985 /* Same explanation as above. */ 986 if(*c3 < '0' || *c3 > '9') 987 return TRUE; 988 989 /* Anything > 255 isn't a valid IP dec-octet. */ 990 if(*c1 >= '2' && *c2 >= '5' && *c3 >= '5') { 991 *ptr = c1; 992 return FALSE; 993 } 994 995 ++(*ptr); 996 return TRUE; 997 } 998 999 /* Checks if there is an implicit IPv4 address in the host component of the URI. 1000 * The max value of an implicit IPv4 address is UINT_MAX. 1001 * 1002 * Ex: 1003 * "234567" would be considered an implicit IPv4 address. 1004 */ 1005 static BOOL check_implicit_ipv4(const WCHAR **ptr, UINT *val) { 1006 const WCHAR *start = *ptr; 1007 ULONGLONG ret = 0; 1008 *val = 0; 1009 1010 while(is_num(**ptr)) { 1011 ret = ret*10 + (**ptr - '0'); 1012 1013 if(ret > UINT_MAX) { 1014 *ptr = start; 1015 return FALSE; 1016 } 1017 ++(*ptr); 1018 } 1019 1020 if(*ptr == start) 1021 return FALSE; 1022 1023 *val = ret; 1024 return TRUE; 1025 } 1026 1027 /* Checks if the string contains an IPv4 address. 1028 * 1029 * This function has a strict mode or a non-strict mode of operation 1030 * When 'strict' is set to FALSE this function will return TRUE if 1031 * the string contains at least 'dec-octet "." dec-octet' since partial 1032 * IPv4 addresses will be normalized out into full IPv4 addresses. When 1033 * 'strict' is set this function expects there to be a full IPv4 address. 1034 * 1035 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 1036 */ 1037 static BOOL check_ipv4address(const WCHAR **ptr, BOOL strict) { 1038 const WCHAR *start = *ptr; 1039 1040 if(!check_dec_octet(ptr)) { 1041 *ptr = start; 1042 return FALSE; 1043 } 1044 1045 if(**ptr != '.') { 1046 *ptr = start; 1047 return FALSE; 1048 } 1049 1050 ++(*ptr); 1051 if(!check_dec_octet(ptr)) { 1052 *ptr = start; 1053 return FALSE; 1054 } 1055 1056 if(**ptr != '.') { 1057 if(strict) { 1058 *ptr = start; 1059 return FALSE; 1060 } else 1061 return TRUE; 1062 } 1063 1064 ++(*ptr); 1065 if(!check_dec_octet(ptr)) { 1066 *ptr = start; 1067 return FALSE; 1068 } 1069 1070 if(**ptr != '.') { 1071 if(strict) { 1072 *ptr = start; 1073 return FALSE; 1074 } else 1075 return TRUE; 1076 } 1077 1078 ++(*ptr); 1079 if(!check_dec_octet(ptr)) { 1080 *ptr = start; 1081 return FALSE; 1082 } 1083 1084 /* Found a four digit ip address. */ 1085 return TRUE; 1086 } 1087 /* Tries to parse the scheme name of the URI. 1088 * 1089 * scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896. 1090 * NOTE: Windows accepts a number as the first character of a scheme. 1091 */ 1092 static BOOL parse_scheme_name(const WCHAR **ptr, parse_data *data, DWORD extras) { 1093 const WCHAR *start = *ptr; 1094 1095 data->scheme = NULL; 1096 data->scheme_len = 0; 1097 1098 while(**ptr) { 1099 if(**ptr == '*' && *ptr == start) { 1100 /* Might have found a wildcard scheme. If it is the next 1101 * char has to be a ':' for it to be a valid URI 1102 */ 1103 ++(*ptr); 1104 break; 1105 } else if(!is_num(**ptr) && !is_alpha(**ptr) && **ptr != '+' && 1106 **ptr != '-' && **ptr != '.') 1107 break; 1108 1109 (*ptr)++; 1110 } 1111 1112 if(*ptr == start) 1113 return FALSE; 1114 1115 /* Schemes must end with a ':' */ 1116 if(**ptr != ':' && !((extras & ALLOW_NULL_TERM_SCHEME) && !**ptr)) { 1117 *ptr = start; 1118 return FALSE; 1119 } 1120 1121 data->scheme = start; 1122 data->scheme_len = *ptr - start; 1123 1124 ++(*ptr); 1125 return TRUE; 1126 } 1127 1128 /* Tries to deduce the corresponding URL_SCHEME for the given URI. Stores 1129 * the deduced URL_SCHEME in data->scheme_type. 1130 */ 1131 static BOOL parse_scheme_type(parse_data *data) { 1132 /* If there's scheme data then see if it's a recognized scheme. */ 1133 if(data->scheme && data->scheme_len) { 1134 DWORD i; 1135 1136 for(i = 0; i < ARRAY_SIZE(recognized_schemes); ++i) { 1137 if(lstrlenW(recognized_schemes[i].scheme_name) == data->scheme_len) { 1138 /* Has to be a case insensitive compare. */ 1139 if(!StrCmpNIW(recognized_schemes[i].scheme_name, data->scheme, data->scheme_len)) { 1140 data->scheme_type = recognized_schemes[i].scheme; 1141 return TRUE; 1142 } 1143 } 1144 } 1145 1146 /* If we get here it means it's not a recognized scheme. */ 1147 data->scheme_type = URL_SCHEME_UNKNOWN; 1148 return TRUE; 1149 } else if(data->is_relative) { 1150 /* Relative URI's have no scheme. */ 1151 data->scheme_type = URL_SCHEME_UNKNOWN; 1152 return TRUE; 1153 } else { 1154 /* Should never reach here! what happened... */ 1155 FIXME("(%p): Unable to determine scheme type for URI %s\n", data, debugstr_w(data->uri)); 1156 return FALSE; 1157 } 1158 } 1159 1160 /* Tries to parse (or deduce) the scheme_name of a URI. If it can't 1161 * parse a scheme from the URI it will try to deduce the scheme_name and scheme_type 1162 * using the flags specified in 'flags' (if any). Flags that affect how this function 1163 * operates are the Uri_CREATE_ALLOW_* flags. 1164 * 1165 * All parsed/deduced information will be stored in 'data' when the function returns. 1166 * 1167 * Returns TRUE if it was able to successfully parse the information. 1168 */ 1169 static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1170 static const WCHAR fileW[] = {'f','i','l','e',0}; 1171 static const WCHAR wildcardW[] = {'*',0}; 1172 1173 /* First check to see if the uri could implicitly be a file path. */ 1174 if(is_implicit_file_path(*ptr)) { 1175 if(flags & Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME) { 1176 data->scheme = fileW; 1177 data->scheme_len = lstrlenW(fileW); 1178 data->has_implicit_scheme = TRUE; 1179 1180 TRACE("(%p %p %x): URI is an implicit file path.\n", ptr, data, flags); 1181 } else { 1182 /* Windows does not consider anything that can implicitly be a file 1183 * path to be a valid URI if the ALLOW_IMPLICIT_FILE_SCHEME flag is not set... 1184 */ 1185 TRACE("(%p %p %x): URI is implicitly a file path, but, the ALLOW_IMPLICIT_FILE_SCHEME flag wasn't set.\n", 1186 ptr, data, flags); 1187 return FALSE; 1188 } 1189 } else if(!parse_scheme_name(ptr, data, extras)) { 1190 /* No scheme was found, this means it could be: 1191 * a) an implicit Wildcard scheme 1192 * b) a relative URI 1193 * c) an invalid URI. 1194 */ 1195 if(flags & Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME) { 1196 data->scheme = wildcardW; 1197 data->scheme_len = lstrlenW(wildcardW); 1198 data->has_implicit_scheme = TRUE; 1199 1200 TRACE("(%p %p %x): URI is an implicit wildcard scheme.\n", ptr, data, flags); 1201 } else if (flags & Uri_CREATE_ALLOW_RELATIVE) { 1202 data->is_relative = TRUE; 1203 TRACE("(%p %p %x): URI is relative.\n", ptr, data, flags); 1204 } else { 1205 TRACE("(%p %p %x): Malformed URI found. Unable to deduce scheme name.\n", ptr, data, flags); 1206 return FALSE; 1207 } 1208 } 1209 1210 if(!data->is_relative) 1211 TRACE("(%p %p %x): Found scheme=%s scheme_len=%d\n", ptr, data, flags, 1212 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len); 1213 1214 if(!parse_scheme_type(data)) 1215 return FALSE; 1216 1217 TRACE("(%p %p %x): Assigned %d as the URL_SCHEME.\n", ptr, data, flags, data->scheme_type); 1218 return TRUE; 1219 } 1220 1221 static BOOL parse_username(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1222 data->username = *ptr; 1223 1224 while(**ptr != ':' && **ptr != '@') { 1225 if(**ptr == '%') { 1226 if(!check_pct_encoded(ptr)) { 1227 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 1228 *ptr = data->username; 1229 data->username = NULL; 1230 return FALSE; 1231 } 1232 } else 1233 continue; 1234 } else if(extras & ALLOW_NULL_TERM_USER_NAME && !**ptr) 1235 break; 1236 else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 1237 *ptr = data->username; 1238 data->username = NULL; 1239 return FALSE; 1240 } 1241 1242 ++(*ptr); 1243 } 1244 1245 data->username_len = *ptr - data->username; 1246 return TRUE; 1247 } 1248 1249 static BOOL parse_password(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1250 data->password = *ptr; 1251 1252 while(**ptr != '@') { 1253 if(**ptr == '%') { 1254 if(!check_pct_encoded(ptr)) { 1255 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 1256 *ptr = data->password; 1257 data->password = NULL; 1258 return FALSE; 1259 } 1260 } else 1261 continue; 1262 } else if(extras & ALLOW_NULL_TERM_PASSWORD && !**ptr) 1263 break; 1264 else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 1265 *ptr = data->password; 1266 data->password = NULL; 1267 return FALSE; 1268 } 1269 1270 ++(*ptr); 1271 } 1272 1273 data->password_len = *ptr - data->password; 1274 return TRUE; 1275 } 1276 1277 /* Parses the userinfo part of the URI (if it exists). The userinfo field of 1278 * a URI can consist of "username:password@", or just "username@". 1279 * 1280 * RFC def: 1281 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 1282 * 1283 * NOTES: 1284 * 1) If there is more than one ':' in the userinfo part of the URI Windows 1285 * uses the first occurrence of ':' to delimit the username and password 1286 * components. 1287 * 1288 * ex: 1289 * ftp://user:pass:word@winehq.org 1290 * 1291 * would yield "user" as the username and "pass:word" as the password. 1292 * 1293 * 2) Windows allows any character to appear in the "userinfo" part of 1294 * a URI, as long as it's not an authority delimiter character set. 1295 */ 1296 static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) { 1297 const WCHAR *start = *ptr; 1298 1299 if(!parse_username(ptr, data, flags, 0)) { 1300 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 1301 return; 1302 } 1303 1304 if(**ptr == ':') { 1305 ++(*ptr); 1306 if(!parse_password(ptr, data, flags, 0)) { 1307 *ptr = start; 1308 data->username = NULL; 1309 data->username_len = 0; 1310 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 1311 return; 1312 } 1313 } 1314 1315 if(**ptr != '@') { 1316 *ptr = start; 1317 data->username = NULL; 1318 data->username_len = 0; 1319 data->password = NULL; 1320 data->password_len = 0; 1321 1322 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 1323 return; 1324 } 1325 1326 if(data->username) 1327 TRACE("(%p %p %x): Found username %s len=%d.\n", ptr, data, flags, 1328 debugstr_wn(data->username, data->username_len), data->username_len); 1329 1330 if(data->password) 1331 TRACE("(%p %p %x): Found password %s len=%d.\n", ptr, data, flags, 1332 debugstr_wn(data->password, data->password_len), data->password_len); 1333 1334 ++(*ptr); 1335 } 1336 1337 /* Attempts to parse a port from the URI. 1338 * 1339 * NOTES: 1340 * Windows seems to have a cap on what the maximum value 1341 * for a port can be. The max value is USHORT_MAX. 1342 * 1343 * port = *DIGIT 1344 */ 1345 static BOOL parse_port(const WCHAR **ptr, parse_data *data, DWORD flags) { 1346 UINT port = 0; 1347 data->port = *ptr; 1348 1349 while(!is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 1350 if(!is_num(**ptr)) { 1351 *ptr = data->port; 1352 data->port = NULL; 1353 return FALSE; 1354 } 1355 1356 port = port*10 + (**ptr-'0'); 1357 1358 if(port > USHRT_MAX) { 1359 *ptr = data->port; 1360 data->port = NULL; 1361 return FALSE; 1362 } 1363 1364 ++(*ptr); 1365 } 1366 1367 data->has_port = TRUE; 1368 data->port_value = port; 1369 data->port_len = *ptr - data->port; 1370 1371 TRACE("(%p %p %x): Found port %s len=%d value=%u\n", ptr, data, flags, 1372 debugstr_wn(data->port, data->port_len), data->port_len, data->port_value); 1373 return TRUE; 1374 } 1375 1376 /* Attempts to parse a IPv4 address from the URI. 1377 * 1378 * NOTES: 1379 * Windows normalizes IPv4 addresses, This means there are three 1380 * possibilities for the URI to contain an IPv4 address. 1381 * 1) A well formed address (ex. 192.2.2.2). 1382 * 2) A partially formed address. For example "192.0" would 1383 * normalize to "192.0.0.0" during canonicalization. 1384 * 3) An implicit IPv4 address. For example "256" would 1385 * normalize to "0.0.1.0" during canonicalization. Also 1386 * note that the maximum value for an implicit IP address 1387 * is UINT_MAX, if the value in the URI exceeds this then 1388 * it is not considered an IPv4 address. 1389 */ 1390 static BOOL parse_ipv4address(const WCHAR **ptr, parse_data *data, DWORD flags) { 1391 const BOOL is_unknown = data->scheme_type == URL_SCHEME_UNKNOWN; 1392 data->host = *ptr; 1393 1394 if(!check_ipv4address(ptr, FALSE)) { 1395 if(!check_implicit_ipv4(ptr, &data->implicit_ipv4)) { 1396 TRACE("(%p %p %x): URI didn't contain anything looking like an IPv4 address.\n", 1397 ptr, data, flags); 1398 *ptr = data->host; 1399 data->host = NULL; 1400 return FALSE; 1401 } else 1402 data->has_implicit_ip = TRUE; 1403 } 1404 1405 data->host_len = *ptr - data->host; 1406 data->host_type = Uri_HOST_IPV4; 1407 1408 /* Check if what we found is the only part of the host name (if it isn't 1409 * we don't have an IPv4 address). 1410 */ 1411 if(**ptr == ':') { 1412 ++(*ptr); 1413 if(!parse_port(ptr, data, flags)) { 1414 *ptr = data->host; 1415 data->host = NULL; 1416 return FALSE; 1417 } 1418 } else if(!is_auth_delim(**ptr, !is_unknown)) { 1419 /* Found more data which belongs to the host, so this isn't an IPv4. */ 1420 *ptr = data->host; 1421 data->host = NULL; 1422 data->has_implicit_ip = FALSE; 1423 return FALSE; 1424 } 1425 1426 TRACE("(%p %p %x): IPv4 address found. host=%s host_len=%d host_type=%d\n", 1427 ptr, data, flags, debugstr_wn(data->host, data->host_len), 1428 data->host_len, data->host_type); 1429 return TRUE; 1430 } 1431 1432 /* Attempts to parse the reg-name from the URI. 1433 * 1434 * Because of the way Windows handles ':' this function also 1435 * handles parsing the port. 1436 * 1437 * reg-name = *( unreserved / pct-encoded / sub-delims ) 1438 * 1439 * NOTE: 1440 * Windows allows everything, but, the characters in "auth_delims" and ':' 1441 * to appear in a reg-name, unless it's an unknown scheme type then ':' is 1442 * allowed to appear (even if a valid port isn't after it). 1443 * 1444 * Windows doesn't like host names which start with '[' and end with ']' 1445 * and don't contain a valid IP literal address in between them. 1446 * 1447 * On Windows if a '[' is encountered in the host name the ':' no longer 1448 * counts as a delimiter until you reach the next ']' or an "authority delimiter". 1449 * 1450 * A reg-name CAN be empty. 1451 */ 1452 static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1453 const BOOL has_start_bracket = **ptr == '['; 1454 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 1455 const BOOL is_res = data->scheme_type == URL_SCHEME_RES; 1456 BOOL inside_brackets = has_start_bracket; 1457 1458 /* res URIs don't have ports. */ 1459 BOOL ignore_col = (extras & IGNORE_PORT_DELIMITER) || is_res; 1460 1461 /* We have to be careful with file schemes. */ 1462 if(data->scheme_type == URL_SCHEME_FILE) { 1463 /* This is because an implicit file scheme could be "C:\\test" and it 1464 * would trick this function into thinking the host is "C", when after 1465 * canonicalization the host would end up being an empty string. A drive 1466 * path can also have a '|' instead of a ':' after the drive letter. 1467 */ 1468 if(is_drive_path(*ptr)) { 1469 /* Regular old drive paths have no host type (or host name). */ 1470 data->host_type = Uri_HOST_UNKNOWN; 1471 data->host = *ptr; 1472 data->host_len = 0; 1473 return TRUE; 1474 } else if(is_unc_path(*ptr)) 1475 /* Skip past the "\\" of a UNC path. */ 1476 *ptr += 2; 1477 } 1478 1479 data->host = *ptr; 1480 1481 /* For res URIs, everything before the first '/' is 1482 * considered the host. 1483 */ 1484 while((!is_res && !is_auth_delim(**ptr, known_scheme)) || 1485 (is_res && **ptr && **ptr != '/')) { 1486 if(**ptr == ':' && !ignore_col) { 1487 /* We can ignore ':' if we are inside brackets.*/ 1488 if(!inside_brackets) { 1489 const WCHAR *tmp = (*ptr)++; 1490 1491 /* Attempt to parse the port. */ 1492 if(!parse_port(ptr, data, flags)) { 1493 /* Windows expects there to be a valid port for known scheme types. */ 1494 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 1495 *ptr = data->host; 1496 data->host = NULL; 1497 TRACE("(%p %p %x %x): Expected valid port\n", ptr, data, flags, extras); 1498 return FALSE; 1499 } else 1500 /* Windows gives up on trying to parse a port when it 1501 * encounters an invalid port. 1502 */ 1503 ignore_col = TRUE; 1504 } else { 1505 data->host_len = tmp - data->host; 1506 break; 1507 } 1508 } 1509 } else if(**ptr == '%' && (known_scheme && !is_res)) { 1510 /* Has to be a legit % encoded value. */ 1511 if(!check_pct_encoded(ptr)) { 1512 *ptr = data->host; 1513 data->host = NULL; 1514 return FALSE; 1515 } else 1516 continue; 1517 } else if(is_res && is_forbidden_dos_path_char(**ptr)) { 1518 *ptr = data->host; 1519 data->host = NULL; 1520 return FALSE; 1521 } else if(**ptr == ']') 1522 inside_brackets = FALSE; 1523 else if(**ptr == '[') 1524 inside_brackets = TRUE; 1525 1526 ++(*ptr); 1527 } 1528 1529 if(has_start_bracket) { 1530 /* Make sure the last character of the host wasn't a ']'. */ 1531 if(*(*ptr-1) == ']') { 1532 TRACE("(%p %p %x %x): Expected an IP literal inside of the host\n", 1533 ptr, data, flags, extras); 1534 *ptr = data->host; 1535 data->host = NULL; 1536 return FALSE; 1537 } 1538 } 1539 1540 /* Don't overwrite our length if we found a port earlier. */ 1541 if(!data->port) 1542 data->host_len = *ptr - data->host; 1543 1544 /* If the host is empty, then it's an unknown host type. */ 1545 if(data->host_len == 0 || is_res) 1546 data->host_type = Uri_HOST_UNKNOWN; 1547 else 1548 data->host_type = Uri_HOST_DNS; 1549 1550 TRACE("(%p %p %x %x): Parsed reg-name. host=%s len=%d\n", ptr, data, flags, extras, 1551 debugstr_wn(data->host, data->host_len), data->host_len); 1552 return TRUE; 1553 } 1554 1555 /* Attempts to parse an IPv6 address out of the URI. 1556 * 1557 * IPv6address = 6( h16 ":" ) ls32 1558 * / "::" 5( h16 ":" ) ls32 1559 * / [ h16 ] "::" 4( h16 ":" ) ls32 1560 * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 1561 * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 1562 * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 1563 * / [ *4( h16 ":" ) h16 ] "::" ls32 1564 * / [ *5( h16 ":" ) h16 ] "::" h16 1565 * / [ *6( h16 ":" ) h16 ] "::" 1566 * 1567 * ls32 = ( h16 ":" h16 ) / IPv4address 1568 * ; least-significant 32 bits of address. 1569 * 1570 * h16 = 1*4HEXDIG 1571 * ; 16 bits of address represented in hexadecimal. 1572 * 1573 * Modeled after google-url's 'DoParseIPv6' function. 1574 */ 1575 static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data, DWORD flags) { 1576 const WCHAR *start, *cur_start; 1577 ipv6_address ip; 1578 1579 start = cur_start = *ptr; 1580 memset(&ip, 0, sizeof(ipv6_address)); 1581 1582 for(;; ++(*ptr)) { 1583 /* Check if we're on the last character of the host. */ 1584 BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN) 1585 || **ptr == ']'); 1586 1587 BOOL is_split = (**ptr == ':'); 1588 BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':'); 1589 1590 /* Check if we're at the end of a component, or 1591 * if we're at the end of the IPv6 address. 1592 */ 1593 if(is_split || is_end) { 1594 DWORD cur_len = 0; 1595 1596 cur_len = *ptr - cur_start; 1597 1598 /* h16 can't have a length > 4. */ 1599 if(cur_len > 4) { 1600 *ptr = start; 1601 1602 TRACE("(%p %p %x): h16 component to long.\n", 1603 ptr, data, flags); 1604 return FALSE; 1605 } 1606 1607 if(cur_len == 0) { 1608 /* An h16 component can't have the length of 0 unless 1609 * the elision is at the beginning of the address, or 1610 * at the end of the address. 1611 */ 1612 if(!((*ptr == start && is_elision) || 1613 (is_end && (*ptr-2) == ip.elision))) { 1614 *ptr = start; 1615 TRACE("(%p %p %x): IPv6 component cannot have a length of 0.\n", 1616 ptr, data, flags); 1617 return FALSE; 1618 } 1619 } 1620 1621 if(cur_len > 0) { 1622 /* An IPv6 address can have no more than 8 h16 components. */ 1623 if(ip.h16_count >= 8) { 1624 *ptr = start; 1625 TRACE("(%p %p %x): Not a IPv6 address, too many h16 components.\n", 1626 ptr, data, flags); 1627 return FALSE; 1628 } 1629 1630 ip.components[ip.h16_count].str = cur_start; 1631 ip.components[ip.h16_count].len = cur_len; 1632 1633 TRACE("(%p %p %x): Found h16 component %s, len=%d, h16_count=%d\n", 1634 ptr, data, flags, debugstr_wn(cur_start, cur_len), cur_len, 1635 ip.h16_count); 1636 ++ip.h16_count; 1637 } 1638 } 1639 1640 if(is_end) 1641 break; 1642 1643 if(is_elision) { 1644 /* A IPv6 address can only have 1 elision ('::'). */ 1645 if(ip.elision) { 1646 *ptr = start; 1647 1648 TRACE("(%p %p %x): IPv6 address cannot have 2 elisions.\n", 1649 ptr, data, flags); 1650 return FALSE; 1651 } 1652 1653 ip.elision = *ptr; 1654 ++(*ptr); 1655 } 1656 1657 if(is_split) 1658 cur_start = *ptr+1; 1659 else { 1660 if(!check_ipv4address(ptr, TRUE)) { 1661 if(!is_hexdigit(**ptr)) { 1662 /* Not a valid character for an IPv6 address. */ 1663 *ptr = start; 1664 return FALSE; 1665 } 1666 } else { 1667 /* Found an IPv4 address. */ 1668 ip.ipv4 = cur_start; 1669 ip.ipv4_len = *ptr - cur_start; 1670 1671 TRACE("(%p %p %x): Found an attached IPv4 address %s len=%d.\n", 1672 ptr, data, flags, debugstr_wn(ip.ipv4, ip.ipv4_len), 1673 ip.ipv4_len); 1674 1675 /* IPv4 addresses can only appear at the end of a IPv6. */ 1676 break; 1677 } 1678 } 1679 } 1680 1681 compute_ipv6_comps_size(&ip); 1682 1683 /* Make sure the IPv6 address adds up to 16 bytes. */ 1684 if(ip.components_size + ip.elision_size != 16) { 1685 *ptr = start; 1686 TRACE("(%p %p %x): Invalid IPv6 address, did not add up to 16 bytes.\n", 1687 ptr, data, flags); 1688 return FALSE; 1689 } 1690 1691 if(ip.elision_size == 2) { 1692 /* For some reason on Windows if an elision that represents 1693 * only one h16 component is encountered at the very begin or 1694 * end of an IPv6 address, Windows does not consider it a 1695 * valid IPv6 address. 1696 * 1697 * Ex: [::2:3:4:5:6:7] is not valid, even though the sum 1698 * of all the components == 128bits. 1699 */ 1700 if(ip.elision < ip.components[0].str || 1701 ip.elision > ip.components[ip.h16_count-1].str) { 1702 *ptr = start; 1703 TRACE("(%p %p %x): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n", 1704 ptr, data, flags); 1705 return FALSE; 1706 } 1707 } 1708 1709 data->host_type = Uri_HOST_IPV6; 1710 data->has_ipv6 = TRUE; 1711 data->ipv6_address = ip; 1712 1713 TRACE("(%p %p %x): Found valid IPv6 literal %s len=%d\n", 1714 ptr, data, flags, debugstr_wn(start, *ptr-start), 1715 (int)(*ptr-start)); 1716 return TRUE; 1717 } 1718 1719 /* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) */ 1720 static BOOL parse_ipvfuture(const WCHAR **ptr, parse_data *data, DWORD flags) { 1721 const WCHAR *start = *ptr; 1722 1723 /* IPvFuture has to start with a 'v' or 'V'. */ 1724 if(**ptr != 'v' && **ptr != 'V') 1725 return FALSE; 1726 1727 /* Following the v there must be at least 1 hex digit. */ 1728 ++(*ptr); 1729 if(!is_hexdigit(**ptr)) { 1730 *ptr = start; 1731 return FALSE; 1732 } 1733 1734 ++(*ptr); 1735 while(is_hexdigit(**ptr)) 1736 ++(*ptr); 1737 1738 /* End of the hexdigit sequence must be a '.' */ 1739 if(**ptr != '.') { 1740 *ptr = start; 1741 return FALSE; 1742 } 1743 1744 ++(*ptr); 1745 if(!is_unreserved(**ptr) && !is_subdelim(**ptr) && **ptr != ':') { 1746 *ptr = start; 1747 return FALSE; 1748 } 1749 1750 ++(*ptr); 1751 while(is_unreserved(**ptr) || is_subdelim(**ptr) || **ptr == ':') 1752 ++(*ptr); 1753 1754 data->host_type = Uri_HOST_UNKNOWN; 1755 1756 TRACE("(%p %p %x): Parsed IPvFuture address %s len=%d\n", ptr, data, flags, 1757 debugstr_wn(start, *ptr-start), (int)(*ptr-start)); 1758 1759 return TRUE; 1760 } 1761 1762 /* IP-literal = "[" ( IPv6address / IPvFuture ) "]" */ 1763 static BOOL parse_ip_literal(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1764 data->host = *ptr; 1765 1766 if(**ptr != '[' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) { 1767 data->host = NULL; 1768 return FALSE; 1769 } else if(**ptr == '[') 1770 ++(*ptr); 1771 1772 if(!parse_ipv6address(ptr, data, flags)) { 1773 if(extras & SKIP_IP_FUTURE_CHECK || !parse_ipvfuture(ptr, data, flags)) { 1774 *ptr = data->host; 1775 data->host = NULL; 1776 return FALSE; 1777 } 1778 } 1779 1780 if(**ptr != ']' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) { 1781 *ptr = data->host; 1782 data->host = NULL; 1783 return FALSE; 1784 } else if(!**ptr && extras & ALLOW_BRACKETLESS_IP_LITERAL) { 1785 /* The IP literal didn't contain brackets and was followed by 1786 * a NULL terminator, so no reason to even check the port. 1787 */ 1788 data->host_len = *ptr - data->host; 1789 return TRUE; 1790 } 1791 1792 ++(*ptr); 1793 if(**ptr == ':') { 1794 ++(*ptr); 1795 /* If a valid port is not found, then let it trickle down to 1796 * parse_reg_name. 1797 */ 1798 if(!parse_port(ptr, data, flags)) { 1799 *ptr = data->host; 1800 data->host = NULL; 1801 return FALSE; 1802 } 1803 } else 1804 data->host_len = *ptr - data->host; 1805 1806 return TRUE; 1807 } 1808 1809 /* Parses the host information from the URI. 1810 * 1811 * host = IP-literal / IPv4address / reg-name 1812 */ 1813 static BOOL parse_host(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1814 if(!parse_ip_literal(ptr, data, flags, extras)) { 1815 if(!parse_ipv4address(ptr, data, flags)) { 1816 if(!parse_reg_name(ptr, data, flags, extras)) { 1817 TRACE("(%p %p %x %x): Malformed URI, Unknown host type.\n", 1818 ptr, data, flags, extras); 1819 return FALSE; 1820 } 1821 } 1822 } 1823 1824 return TRUE; 1825 } 1826 1827 /* Parses the authority information from the URI. 1828 * 1829 * authority = [ userinfo "@" ] host [ ":" port ] 1830 */ 1831 static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) { 1832 parse_userinfo(ptr, data, flags); 1833 1834 /* Parsing the port will happen during one of the host parsing 1835 * routines (if the URI has a port). 1836 */ 1837 if(!parse_host(ptr, data, flags, 0)) 1838 return FALSE; 1839 1840 return TRUE; 1841 } 1842 1843 /* Attempts to parse the path information of a hierarchical URI. */ 1844 static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) { 1845 const WCHAR *start = *ptr; 1846 static const WCHAR slash[] = {'/',0}; 1847 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 1848 1849 if(is_path_delim(data->scheme_type, **ptr)) { 1850 if(data->scheme_type == URL_SCHEME_WILDCARD && !data->must_have_path) { 1851 data->path = NULL; 1852 data->path_len = 0; 1853 } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) { 1854 /* If the path component is empty, then a '/' is added. */ 1855 data->path = slash; 1856 data->path_len = 1; 1857 } 1858 } else { 1859 while(!is_path_delim(data->scheme_type, **ptr)) { 1860 if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN && !is_file) { 1861 if(!check_pct_encoded(ptr)) { 1862 *ptr = start; 1863 return FALSE; 1864 } else 1865 continue; 1866 } else if(is_forbidden_dos_path_char(**ptr) && is_file && 1867 (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 1868 /* File schemes with USE_DOS_PATH set aren't allowed to have 1869 * a '<' or '>' or '\"' appear in them. 1870 */ 1871 *ptr = start; 1872 return FALSE; 1873 } else if(**ptr == '\\') { 1874 /* Not allowed to have a backslash if NO_CANONICALIZE is set 1875 * and the scheme is known type (but not a file scheme). 1876 */ 1877 if(flags & Uri_CREATE_NO_CANONICALIZE) { 1878 if(data->scheme_type != URL_SCHEME_FILE && 1879 data->scheme_type != URL_SCHEME_UNKNOWN) { 1880 *ptr = start; 1881 return FALSE; 1882 } 1883 } 1884 } 1885 1886 ++(*ptr); 1887 } 1888 1889 /* The only time a URI doesn't have a path is when 1890 * the NO_CANONICALIZE flag is set and the raw URI 1891 * didn't contain one. 1892 */ 1893 if(*ptr == start) { 1894 data->path = NULL; 1895 data->path_len = 0; 1896 } else { 1897 data->path = start; 1898 data->path_len = *ptr - start; 1899 } 1900 } 1901 1902 if(data->path) 1903 TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags, 1904 debugstr_wn(data->path, data->path_len), data->path_len); 1905 else 1906 TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags); 1907 1908 return TRUE; 1909 } 1910 1911 /* Parses the path of an opaque URI (much less strict than the parser 1912 * for a hierarchical URI). 1913 * 1914 * NOTE: 1915 * Windows allows invalid % encoded data to appear in opaque URI paths 1916 * for unknown scheme types. 1917 * 1918 * File schemes with USE_DOS_PATH set aren't allowed to have '<', '>', or '\"' 1919 * appear in them. 1920 */ 1921 static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags) { 1922 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 1923 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 1924 const BOOL is_mailto = data->scheme_type == URL_SCHEME_MAILTO; 1925 1926 if (is_mailto && (*ptr)[0] == '/' && (*ptr)[1] == '/') 1927 { 1928 if ((*ptr)[2]) data->path = *ptr + 2; 1929 else data->path = NULL; 1930 } 1931 else 1932 data->path = *ptr; 1933 1934 while(!is_path_delim(data->scheme_type, **ptr)) { 1935 if(**ptr == '%' && known_scheme) { 1936 if(!check_pct_encoded(ptr)) { 1937 *ptr = data->path; 1938 data->path = NULL; 1939 return FALSE; 1940 } else 1941 continue; 1942 } else if(is_forbidden_dos_path_char(**ptr) && is_file && 1943 (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 1944 *ptr = data->path; 1945 data->path = NULL; 1946 return FALSE; 1947 } 1948 1949 ++(*ptr); 1950 } 1951 1952 if (data->path) data->path_len = *ptr - data->path; 1953 TRACE("(%p %p %x): Parsed opaque URI path %s len=%d\n", ptr, data, flags, 1954 debugstr_wn(data->path, data->path_len), data->path_len); 1955 return TRUE; 1956 } 1957 1958 /* Determines how the URI should be parsed after the scheme information. 1959 * 1960 * If the scheme is followed by "//", then it is treated as a hierarchical URI 1961 * which then the authority and path information will be parsed out. Otherwise, the 1962 * URI will be treated as an opaque URI which the authority information is not parsed 1963 * out. 1964 * 1965 * RFC 3896 definition of hier-part: 1966 * 1967 * hier-part = "//" authority path-abempty 1968 * / path-absolute 1969 * / path-rootless 1970 * / path-empty 1971 * 1972 * MSDN opaque URI definition: 1973 * scheme ":" path [ "#" fragment ] 1974 * 1975 * NOTES: 1976 * If the URI is of an unknown scheme type and has a "//" following the scheme then it 1977 * is treated as a hierarchical URI, but, if the CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is 1978 * set then it is considered an opaque URI regardless of what follows the scheme information 1979 * (per MSDN documentation). 1980 */ 1981 static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) { 1982 const WCHAR *start = *ptr; 1983 1984 data->must_have_path = FALSE; 1985 1986 /* For javascript: URIs, simply set everything as a path */ 1987 if(data->scheme_type == URL_SCHEME_JAVASCRIPT) { 1988 data->path = *ptr; 1989 data->path_len = lstrlenW(*ptr); 1990 data->is_opaque = TRUE; 1991 *ptr += data->path_len; 1992 return TRUE; 1993 } 1994 1995 /* Checks if the authority information needs to be parsed. */ 1996 if(is_hierarchical_uri(ptr, data)) { 1997 /* Only treat it as a hierarchical URI if the scheme_type is known or 1998 * the Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is not set. 1999 */ 2000 if(data->scheme_type != URL_SCHEME_UNKNOWN || 2001 !(flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) { 2002 TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags); 2003 data->is_opaque = FALSE; 2004 2005 if(data->scheme_type == URL_SCHEME_WILDCARD && !data->has_implicit_scheme) { 2006 if(**ptr == '/' && *(*ptr+1) == '/') { 2007 data->must_have_path = TRUE; 2008 *ptr += 2; 2009 } 2010 } 2011 2012 /* TODO: Handle hierarchical URI's, parse authority then parse the path. */ 2013 if(!parse_authority(ptr, data, flags)) 2014 return FALSE; 2015 2016 return parse_path_hierarchical(ptr, data, flags); 2017 } else 2018 /* Reset ptr to its starting position so opaque path parsing 2019 * begins at the correct location. 2020 */ 2021 *ptr = start; 2022 } 2023 2024 /* If it reaches here, then the URI will be treated as an opaque 2025 * URI. 2026 */ 2027 2028 TRACE("(%p %p %x): Treating URI as an opaque URI.\n", ptr, data, flags); 2029 2030 data->is_opaque = TRUE; 2031 if(!parse_path_opaque(ptr, data, flags)) 2032 return FALSE; 2033 2034 return TRUE; 2035 } 2036 2037 /* Attempts to parse the query string from the URI. 2038 * 2039 * NOTES: 2040 * If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded 2041 * data is allowed to appear in the query string. For unknown scheme types 2042 * invalid percent encoded data is allowed to appear regardless. 2043 */ 2044 static BOOL parse_query(const WCHAR **ptr, parse_data *data, DWORD flags) { 2045 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2046 2047 if(**ptr != '?') { 2048 TRACE("(%p %p %x): URI didn't contain a query string.\n", ptr, data, flags); 2049 return TRUE; 2050 } 2051 2052 data->query = *ptr; 2053 2054 ++(*ptr); 2055 while(**ptr && **ptr != '#') { 2056 if(**ptr == '%' && known_scheme && 2057 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 2058 if(!check_pct_encoded(ptr)) { 2059 *ptr = data->query; 2060 data->query = NULL; 2061 return FALSE; 2062 } else 2063 continue; 2064 } 2065 2066 ++(*ptr); 2067 } 2068 2069 data->query_len = *ptr - data->query; 2070 2071 TRACE("(%p %p %x): Parsed query string %s len=%d\n", ptr, data, flags, 2072 debugstr_wn(data->query, data->query_len), data->query_len); 2073 return TRUE; 2074 } 2075 2076 /* Attempts to parse the fragment from the URI. 2077 * 2078 * NOTES: 2079 * If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded 2080 * data is allowed to appear in the query string. For unknown scheme types 2081 * invalid percent encoded data is allowed to appear regardless. 2082 */ 2083 static BOOL parse_fragment(const WCHAR **ptr, parse_data *data, DWORD flags) { 2084 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2085 2086 if(**ptr != '#') { 2087 TRACE("(%p %p %x): URI didn't contain a fragment.\n", ptr, data, flags); 2088 return TRUE; 2089 } 2090 2091 data->fragment = *ptr; 2092 2093 ++(*ptr); 2094 while(**ptr) { 2095 if(**ptr == '%' && known_scheme && 2096 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 2097 if(!check_pct_encoded(ptr)) { 2098 *ptr = data->fragment; 2099 data->fragment = NULL; 2100 return FALSE; 2101 } else 2102 continue; 2103 } 2104 2105 ++(*ptr); 2106 } 2107 2108 data->fragment_len = *ptr - data->fragment; 2109 2110 TRACE("(%p %p %x): Parsed fragment %s len=%d\n", ptr, data, flags, 2111 debugstr_wn(data->fragment, data->fragment_len), data->fragment_len); 2112 return TRUE; 2113 } 2114 2115 /* Parses and validates the components of the specified by data->uri 2116 * and stores the information it parses into 'data'. 2117 * 2118 * Returns TRUE if it successfully parsed the URI. False otherwise. 2119 */ 2120 static BOOL parse_uri(parse_data *data, DWORD flags) { 2121 const WCHAR *ptr; 2122 const WCHAR **pptr; 2123 2124 ptr = data->uri; 2125 pptr = &ptr; 2126 2127 TRACE("(%p %x): BEGINNING TO PARSE URI %s.\n", data, flags, debugstr_w(data->uri)); 2128 2129 if(!parse_scheme(pptr, data, flags, 0)) 2130 return FALSE; 2131 2132 if(!parse_hierpart(pptr, data, flags)) 2133 return FALSE; 2134 2135 if(!parse_query(pptr, data, flags)) 2136 return FALSE; 2137 2138 if(!parse_fragment(pptr, data, flags)) 2139 return FALSE; 2140 2141 TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags); 2142 return TRUE; 2143 } 2144 2145 static BOOL canonicalize_username(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2146 const WCHAR *ptr; 2147 2148 if(!data->username) { 2149 uri->userinfo_start = -1; 2150 return TRUE; 2151 } 2152 2153 uri->userinfo_start = uri->canon_len; 2154 for(ptr = data->username; ptr < data->username+data->username_len; ++ptr) { 2155 if(*ptr == '%') { 2156 /* Only decode % encoded values for known scheme types. */ 2157 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 2158 /* See if the value really needs decoding. */ 2159 WCHAR val = decode_pct_val(ptr); 2160 if(is_unreserved(val)) { 2161 if(!computeOnly) 2162 uri->canon_uri[uri->canon_len] = val; 2163 2164 ++uri->canon_len; 2165 2166 /* Move pass the hex characters. */ 2167 ptr += 2; 2168 continue; 2169 } 2170 } 2171 } else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') { 2172 /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag 2173 * is NOT set. 2174 */ 2175 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 2176 if(!computeOnly) 2177 pct_encode_val(*ptr, uri->canon_uri + uri->canon_len); 2178 2179 uri->canon_len += 3; 2180 continue; 2181 } 2182 } 2183 2184 if(!computeOnly) 2185 /* Nothing special, so just copy the character over. */ 2186 uri->canon_uri[uri->canon_len] = *ptr; 2187 ++uri->canon_len; 2188 } 2189 2190 return TRUE; 2191 } 2192 2193 static BOOL canonicalize_password(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2194 const WCHAR *ptr; 2195 2196 if(!data->password) { 2197 uri->userinfo_split = -1; 2198 return TRUE; 2199 } 2200 2201 if(uri->userinfo_start == -1) 2202 /* Has a password, but, doesn't have a username. */ 2203 uri->userinfo_start = uri->canon_len; 2204 2205 uri->userinfo_split = uri->canon_len - uri->userinfo_start; 2206 2207 /* Add the ':' to the userinfo component. */ 2208 if(!computeOnly) 2209 uri->canon_uri[uri->canon_len] = ':'; 2210 ++uri->canon_len; 2211 2212 for(ptr = data->password; ptr < data->password+data->password_len; ++ptr) { 2213 if(*ptr == '%') { 2214 /* Only decode % encoded values for known scheme types. */ 2215 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 2216 /* See if the value really needs decoding. */ 2217 WCHAR val = decode_pct_val(ptr); 2218 if(is_unreserved(val)) { 2219 if(!computeOnly) 2220 uri->canon_uri[uri->canon_len] = val; 2221 2222 ++uri->canon_len; 2223 2224 /* Move pass the hex characters. */ 2225 ptr += 2; 2226 continue; 2227 } 2228 } 2229 } else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') { 2230 /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag 2231 * is NOT set. 2232 */ 2233 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 2234 if(!computeOnly) 2235 pct_encode_val(*ptr, uri->canon_uri + uri->canon_len); 2236 2237 uri->canon_len += 3; 2238 continue; 2239 } 2240 } 2241 2242 if(!computeOnly) 2243 /* Nothing special, so just copy the character over. */ 2244 uri->canon_uri[uri->canon_len] = *ptr; 2245 ++uri->canon_len; 2246 } 2247 2248 return TRUE; 2249 } 2250 2251 /* Canonicalizes the userinfo of the URI represented by the parse_data. 2252 * 2253 * Canonicalization of the userinfo is a simple process. If there are any percent 2254 * encoded characters that fall in the "unreserved" character set, they are decoded 2255 * to their actual value. If a character is not in the "unreserved" or "reserved" sets 2256 * then it is percent encoded. Other than that the characters are copied over without 2257 * change. 2258 */ 2259 static BOOL canonicalize_userinfo(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2260 uri->userinfo_start = uri->userinfo_split = -1; 2261 uri->userinfo_len = 0; 2262 2263 if(!data->username && !data->password) 2264 /* URI doesn't have userinfo, so nothing to do here. */ 2265 return TRUE; 2266 2267 if(!canonicalize_username(data, uri, flags, computeOnly)) 2268 return FALSE; 2269 2270 if(!canonicalize_password(data, uri, flags, computeOnly)) 2271 return FALSE; 2272 2273 uri->userinfo_len = uri->canon_len - uri->userinfo_start; 2274 if(!computeOnly) 2275 TRACE("(%p %p %x %d): Canonicalized userinfo, userinfo_start=%d, userinfo=%s, userinfo_split=%d userinfo_len=%d.\n", 2276 data, uri, flags, computeOnly, uri->userinfo_start, debugstr_wn(uri->canon_uri + uri->userinfo_start, uri->userinfo_len), 2277 uri->userinfo_split, uri->userinfo_len); 2278 2279 /* Now insert the '@' after the userinfo. */ 2280 if(!computeOnly) 2281 uri->canon_uri[uri->canon_len] = '@'; 2282 ++uri->canon_len; 2283 2284 return TRUE; 2285 } 2286 2287 /* Attempts to canonicalize a reg_name. 2288 * 2289 * Things that happen: 2290 * 1) If Uri_CREATE_NO_CANONICALIZE flag is not set, then the reg_name is 2291 * lower cased. Unless it's an unknown scheme type, which case it's 2292 * no lower cased regardless. 2293 * 2294 * 2) Unreserved % encoded characters are decoded for known 2295 * scheme types. 2296 * 2297 * 3) Forbidden characters are % encoded as long as 2298 * Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS flag is not set and 2299 * it isn't an unknown scheme type. 2300 * 2301 * 4) If it's a file scheme and the host is "localhost" it's removed. 2302 * 2303 * 5) If it's a file scheme and Uri_CREATE_FILE_USE_DOS_PATH is set, 2304 * then the UNC path characters are added before the host name. 2305 */ 2306 static BOOL canonicalize_reg_name(const parse_data *data, Uri *uri, 2307 DWORD flags, BOOL computeOnly) { 2308 static const WCHAR localhostW[] = 2309 {'l','o','c','a','l','h','o','s','t',0}; 2310 const WCHAR *ptr; 2311 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2312 2313 if(data->scheme_type == URL_SCHEME_FILE && 2314 data->host_len == lstrlenW(localhostW)) { 2315 if(!StrCmpNIW(data->host, localhostW, data->host_len)) { 2316 uri->host_start = -1; 2317 uri->host_len = 0; 2318 uri->host_type = Uri_HOST_UNKNOWN; 2319 return TRUE; 2320 } 2321 } 2322 2323 if(data->scheme_type == URL_SCHEME_FILE && flags & Uri_CREATE_FILE_USE_DOS_PATH) { 2324 if(!computeOnly) { 2325 uri->canon_uri[uri->canon_len] = '\\'; 2326 uri->canon_uri[uri->canon_len+1] = '\\'; 2327 } 2328 uri->canon_len += 2; 2329 uri->authority_start = uri->canon_len; 2330 } 2331 2332 uri->host_start = uri->canon_len; 2333 2334 for(ptr = data->host; ptr < data->host+data->host_len; ++ptr) { 2335 if(*ptr == '%' && known_scheme) { 2336 WCHAR val = decode_pct_val(ptr); 2337 if(is_unreserved(val)) { 2338 /* If NO_CANONICALIZE is not set, then windows lower cases the 2339 * decoded value. 2340 */ 2341 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && iswupper(val)) { 2342 if(!computeOnly) 2343 uri->canon_uri[uri->canon_len] = towlower(val); 2344 } else { 2345 if(!computeOnly) 2346 uri->canon_uri[uri->canon_len] = val; 2347 } 2348 ++uri->canon_len; 2349 2350 /* Skip past the % encoded character. */ 2351 ptr += 2; 2352 continue; 2353 } else { 2354 /* Just copy the % over. */ 2355 if(!computeOnly) 2356 uri->canon_uri[uri->canon_len] = *ptr; 2357 ++uri->canon_len; 2358 } 2359 } else if(*ptr == '\\') { 2360 /* Only unknown scheme types could have made it here with a '\\' in the host name. */ 2361 if(!computeOnly) 2362 uri->canon_uri[uri->canon_len] = *ptr; 2363 ++uri->canon_len; 2364 } else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && is_ascii(*ptr) && 2365 !is_unreserved(*ptr) && !is_reserved(*ptr) && known_scheme) { 2366 if(!computeOnly) { 2367 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 2368 2369 /* The percent encoded value gets lower cased also. */ 2370 if(!(flags & Uri_CREATE_NO_CANONICALIZE)) { 2371 uri->canon_uri[uri->canon_len+1] = towlower(uri->canon_uri[uri->canon_len+1]); 2372 uri->canon_uri[uri->canon_len+2] = towlower(uri->canon_uri[uri->canon_len+2]); 2373 } 2374 } 2375 2376 uri->canon_len += 3; 2377 } else { 2378 if(!computeOnly) { 2379 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && known_scheme) 2380 uri->canon_uri[uri->canon_len] = towlower(*ptr); 2381 else 2382 uri->canon_uri[uri->canon_len] = *ptr; 2383 } 2384 2385 ++uri->canon_len; 2386 } 2387 } 2388 2389 uri->host_len = uri->canon_len - uri->host_start; 2390 2391 if(!computeOnly) 2392 TRACE("(%p %p %x %d): Canonicalize reg_name=%s len=%d\n", data, uri, flags, 2393 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2394 uri->host_len); 2395 2396 if(!computeOnly) 2397 find_domain_name(uri->canon_uri+uri->host_start, uri->host_len, 2398 &(uri->domain_offset)); 2399 2400 return TRUE; 2401 } 2402 2403 /* Attempts to canonicalize an implicit IPv4 address. */ 2404 static BOOL canonicalize_implicit_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2405 uri->host_start = uri->canon_len; 2406 2407 TRACE("%u\n", data->implicit_ipv4); 2408 /* For unknown scheme types Windows doesn't convert 2409 * the value into an IP address, but it still considers 2410 * it an IPv4 address. 2411 */ 2412 if(data->scheme_type == URL_SCHEME_UNKNOWN) { 2413 if(!computeOnly) 2414 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2415 uri->canon_len += data->host_len; 2416 } else { 2417 if(!computeOnly) 2418 uri->canon_len += ui2ipv4(uri->canon_uri+uri->canon_len, data->implicit_ipv4); 2419 else 2420 uri->canon_len += ui2ipv4(NULL, data->implicit_ipv4); 2421 } 2422 2423 uri->host_len = uri->canon_len - uri->host_start; 2424 uri->host_type = Uri_HOST_IPV4; 2425 2426 if(!computeOnly) 2427 TRACE("%p %p %x %d): Canonicalized implicit IP address=%s len=%d\n", 2428 data, uri, flags, computeOnly, 2429 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2430 uri->host_len); 2431 2432 return TRUE; 2433 } 2434 2435 /* Attempts to canonicalize an IPv4 address. 2436 * 2437 * If the parse_data represents a URI that has an implicit IPv4 address 2438 * (ex. http://256/, this function will convert 256 into 0.0.1.0). If 2439 * the implicit IP address exceeds the value of UINT_MAX (maximum value 2440 * for an IPv4 address) it's canonicalized as if it were a reg-name. 2441 * 2442 * If the parse_data contains a partial or full IPv4 address it normalizes it. 2443 * A partial IPv4 address is something like "192.0" and would be normalized to 2444 * "192.0.0.0". With a full (or partial) IPv4 address like "192.002.01.003" would 2445 * be normalized to "192.2.1.3". 2446 * 2447 * NOTES: 2448 * Windows ONLY normalizes IPv4 address for known scheme types (one that isn't 2449 * URL_SCHEME_UNKNOWN). For unknown scheme types, it simply copies the data from 2450 * the original URI into the canonicalized URI, but, it still recognizes URI's 2451 * host type as HOST_IPV4. 2452 */ 2453 static BOOL canonicalize_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2454 if(data->has_implicit_ip) 2455 return canonicalize_implicit_ipv4address(data, uri, flags, computeOnly); 2456 else { 2457 uri->host_start = uri->canon_len; 2458 2459 /* Windows only normalizes for known scheme types. */ 2460 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 2461 /* parse_data contains a partial or full IPv4 address, so normalize it. */ 2462 DWORD i, octetDigitCount = 0, octetCount = 0; 2463 BOOL octetHasDigit = FALSE; 2464 2465 for(i = 0; i < data->host_len; ++i) { 2466 if(data->host[i] == '0' && !octetHasDigit) { 2467 /* Can ignore leading zeros if: 2468 * 1) It isn't the last digit of the octet. 2469 * 2) i+1 != data->host_len 2470 * 3) i+1 != '.' 2471 */ 2472 if(octetDigitCount == 2 || 2473 i+1 == data->host_len || 2474 data->host[i+1] == '.') { 2475 if(!computeOnly) 2476 uri->canon_uri[uri->canon_len] = data->host[i]; 2477 ++uri->canon_len; 2478 TRACE("Adding zero\n"); 2479 } 2480 } else if(data->host[i] == '.') { 2481 if(!computeOnly) 2482 uri->canon_uri[uri->canon_len] = data->host[i]; 2483 ++uri->canon_len; 2484 2485 octetDigitCount = 0; 2486 octetHasDigit = FALSE; 2487 ++octetCount; 2488 } else { 2489 if(!computeOnly) 2490 uri->canon_uri[uri->canon_len] = data->host[i]; 2491 ++uri->canon_len; 2492 2493 ++octetDigitCount; 2494 octetHasDigit = TRUE; 2495 } 2496 } 2497 2498 /* Make sure the canonicalized IP address has 4 dec-octets. 2499 * If doesn't add "0" ones until there is 4; 2500 */ 2501 for( ; octetCount < 3; ++octetCount) { 2502 if(!computeOnly) { 2503 uri->canon_uri[uri->canon_len] = '.'; 2504 uri->canon_uri[uri->canon_len+1] = '0'; 2505 } 2506 2507 uri->canon_len += 2; 2508 } 2509 } else { 2510 /* Windows doesn't normalize addresses in unknown schemes. */ 2511 if(!computeOnly) 2512 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2513 uri->canon_len += data->host_len; 2514 } 2515 2516 uri->host_len = uri->canon_len - uri->host_start; 2517 if(!computeOnly) 2518 TRACE("(%p %p %x %d): Canonicalized IPv4 address, ip=%s len=%d\n", 2519 data, uri, flags, computeOnly, 2520 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2521 uri->host_len); 2522 } 2523 2524 return TRUE; 2525 } 2526 2527 /* Attempts to canonicalize the IPv6 address of the URI. 2528 * 2529 * Multiple things happen during the canonicalization of an IPv6 address: 2530 * 1) Any leading zero's in a h16 component are removed. 2531 * Ex: [0001:0022::] -> [1:22::] 2532 * 2533 * 2) The longest sequence of zero h16 components are compressed 2534 * into a "::" (elision). If there's a tie, the first is chosen. 2535 * 2536 * Ex: [0:0:0:0:1:6:7:8] -> [::1:6:7:8] 2537 * [0:0:0:0:1:2::] -> [::1:2:0:0] 2538 * [0:0:1:2:0:0:7:8] -> [::1:2:0:0:7:8] 2539 * 2540 * 3) If an IPv4 address is attached to the IPv6 address, it's 2541 * also normalized. 2542 * Ex: [::001.002.022.000] -> [::1.2.22.0] 2543 * 2544 * 4) If an elision is present, but, only represents one h16 component 2545 * it's expanded. 2546 * 2547 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7] 2548 * 2549 * 5) If the IPv6 address contains an IPv4 address and there exists 2550 * at least 1 non-zero h16 component the IPv4 address is converted 2551 * into two h16 components, otherwise it's normalized and kept as is. 2552 * 2553 * Ex: [::192.200.003.4] -> [::192.200.3.4] 2554 * [ffff::192.200.003.4] -> [ffff::c0c8:3041] 2555 * 2556 * NOTE: 2557 * For unknown scheme types Windows simply copies the address over without any 2558 * changes. 2559 * 2560 * IPv4 address can be included in an elision if all its components are 0's. 2561 */ 2562 static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri, 2563 DWORD flags, BOOL computeOnly) { 2564 uri->host_start = uri->canon_len; 2565 2566 if(data->scheme_type == URL_SCHEME_UNKNOWN) { 2567 if(!computeOnly) 2568 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2569 uri->canon_len += data->host_len; 2570 } else { 2571 USHORT values[8]; 2572 INT elision_start; 2573 DWORD i, elision_len; 2574 2575 if(!ipv6_to_number(&(data->ipv6_address), values)) { 2576 TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n", 2577 data, uri, flags, computeOnly); 2578 return FALSE; 2579 } 2580 2581 if(!computeOnly) 2582 uri->canon_uri[uri->canon_len] = '['; 2583 ++uri->canon_len; 2584 2585 /* Find where the elision should occur (if any). */ 2586 compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len); 2587 2588 TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags, 2589 computeOnly, elision_start, elision_len); 2590 2591 for(i = 0; i < 8; ++i) { 2592 BOOL in_elision = (elision_start > -1 && i >= elision_start && 2593 i < elision_start+elision_len); 2594 BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision && 2595 data->ipv6_address.h16_count == 0); 2596 2597 if(i == elision_start) { 2598 if(!computeOnly) { 2599 uri->canon_uri[uri->canon_len] = ':'; 2600 uri->canon_uri[uri->canon_len+1] = ':'; 2601 } 2602 uri->canon_len += 2; 2603 } 2604 2605 /* We can ignore the current component if we're in the elision. */ 2606 if(in_elision) 2607 continue; 2608 2609 /* We only add a ':' if we're not at i == 0, or when we're at 2610 * the very end of elision range since the ':' colon was handled 2611 * earlier. Otherwise we would end up with ":::" after elision. 2612 */ 2613 if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) { 2614 if(!computeOnly) 2615 uri->canon_uri[uri->canon_len] = ':'; 2616 ++uri->canon_len; 2617 } 2618 2619 if(do_ipv4) { 2620 UINT val; 2621 DWORD len; 2622 2623 /* Combine the two parts of the IPv4 address values. */ 2624 val = values[i]; 2625 val <<= 16; 2626 val += values[i+1]; 2627 2628 if(!computeOnly) 2629 len = ui2ipv4(uri->canon_uri+uri->canon_len, val); 2630 else 2631 len = ui2ipv4(NULL, val); 2632 2633 uri->canon_len += len; 2634 ++i; 2635 } else { 2636 /* Write a regular h16 component to the URI. */ 2637 2638 /* Short circuit for the trivial case. */ 2639 if(values[i] == 0) { 2640 if(!computeOnly) 2641 uri->canon_uri[uri->canon_len] = '0'; 2642 ++uri->canon_len; 2643 } else { 2644 static const WCHAR formatW[] = {'%','x',0}; 2645 2646 if(!computeOnly) 2647 uri->canon_len += swprintf(uri->canon_uri+uri->canon_len, 2648 formatW, values[i]); 2649 else { 2650 WCHAR tmp[5]; 2651 uri->canon_len += swprintf(tmp, formatW, values[i]); 2652 } 2653 } 2654 } 2655 } 2656 2657 /* Add the closing ']'. */ 2658 if(!computeOnly) 2659 uri->canon_uri[uri->canon_len] = ']'; 2660 ++uri->canon_len; 2661 } 2662 2663 uri->host_len = uri->canon_len - uri->host_start; 2664 2665 if(!computeOnly) 2666 TRACE("(%p %p %x %d): Canonicalized IPv6 address %s, len=%d\n", data, uri, flags, 2667 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2668 uri->host_len); 2669 2670 return TRUE; 2671 } 2672 2673 /* Attempts to canonicalize the host of the URI (if any). */ 2674 static BOOL canonicalize_host(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2675 uri->host_start = -1; 2676 uri->host_len = 0; 2677 uri->domain_offset = -1; 2678 2679 if(data->host) { 2680 switch(data->host_type) { 2681 case Uri_HOST_DNS: 2682 uri->host_type = Uri_HOST_DNS; 2683 if(!canonicalize_reg_name(data, uri, flags, computeOnly)) 2684 return FALSE; 2685 2686 break; 2687 case Uri_HOST_IPV4: 2688 uri->host_type = Uri_HOST_IPV4; 2689 if(!canonicalize_ipv4address(data, uri, flags, computeOnly)) 2690 return FALSE; 2691 2692 break; 2693 case Uri_HOST_IPV6: 2694 if(!canonicalize_ipv6address(data, uri, flags, computeOnly)) 2695 return FALSE; 2696 2697 uri->host_type = Uri_HOST_IPV6; 2698 break; 2699 case Uri_HOST_UNKNOWN: 2700 if(data->host_len > 0 || data->scheme_type != URL_SCHEME_FILE) { 2701 uri->host_start = uri->canon_len; 2702 2703 /* Nothing happens to unknown host types. */ 2704 if(!computeOnly) 2705 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2706 uri->canon_len += data->host_len; 2707 uri->host_len = data->host_len; 2708 } 2709 2710 uri->host_type = Uri_HOST_UNKNOWN; 2711 break; 2712 default: 2713 FIXME("(%p %p %x %d): Canonicalization for host type %d not supported.\n", data, 2714 uri, flags, computeOnly, data->host_type); 2715 return FALSE; 2716 } 2717 } 2718 2719 return TRUE; 2720 } 2721 2722 static BOOL canonicalize_port(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2723 BOOL has_default_port = FALSE; 2724 USHORT default_port = 0; 2725 DWORD i; 2726 2727 uri->port_offset = -1; 2728 2729 /* Check if the scheme has a default port. */ 2730 for(i = 0; i < ARRAY_SIZE(default_ports); ++i) { 2731 if(default_ports[i].scheme == data->scheme_type) { 2732 has_default_port = TRUE; 2733 default_port = default_ports[i].port; 2734 break; 2735 } 2736 } 2737 2738 uri->has_port = data->has_port || has_default_port; 2739 2740 /* Possible cases: 2741 * 1) Has a port which is the default port. 2742 * 2) Has a port (not the default). 2743 * 3) Doesn't have a port, but, scheme has a default port. 2744 * 4) No port. 2745 */ 2746 if(has_default_port && data->has_port && data->port_value == default_port) { 2747 /* If it's the default port and this flag isn't set, don't do anything. */ 2748 if(flags & Uri_CREATE_NO_CANONICALIZE) { 2749 uri->port_offset = uri->canon_len-uri->authority_start; 2750 if(!computeOnly) 2751 uri->canon_uri[uri->canon_len] = ':'; 2752 ++uri->canon_len; 2753 2754 if(data->port) { 2755 /* Copy the original port over. */ 2756 if(!computeOnly) 2757 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR)); 2758 uri->canon_len += data->port_len; 2759 } else { 2760 if(!computeOnly) 2761 uri->canon_len += ui2str(uri->canon_uri+uri->canon_len, data->port_value); 2762 else 2763 uri->canon_len += ui2str(NULL, data->port_value); 2764 } 2765 } 2766 2767 uri->port = default_port; 2768 } else if(data->has_port) { 2769 uri->port_offset = uri->canon_len-uri->authority_start; 2770 if(!computeOnly) 2771 uri->canon_uri[uri->canon_len] = ':'; 2772 ++uri->canon_len; 2773 2774 if(flags & Uri_CREATE_NO_CANONICALIZE && data->port) { 2775 /* Copy the original over without changes. */ 2776 if(!computeOnly) 2777 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR)); 2778 uri->canon_len += data->port_len; 2779 } else { 2780 if(!computeOnly) 2781 uri->canon_len += ui2str(uri->canon_uri+uri->canon_len, data->port_value); 2782 else 2783 uri->canon_len += ui2str(NULL, data->port_value); 2784 } 2785 2786 uri->port = data->port_value; 2787 } else if(has_default_port) 2788 uri->port = default_port; 2789 2790 return TRUE; 2791 } 2792 2793 /* Canonicalizes the authority of the URI represented by the parse_data. */ 2794 static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2795 uri->authority_start = uri->canon_len; 2796 uri->authority_len = 0; 2797 2798 if(!canonicalize_userinfo(data, uri, flags, computeOnly)) 2799 return FALSE; 2800 2801 if(!canonicalize_host(data, uri, flags, computeOnly)) 2802 return FALSE; 2803 2804 if(!canonicalize_port(data, uri, flags, computeOnly)) 2805 return FALSE; 2806 2807 if(uri->host_start != -1 || (data->is_relative && (data->password || data->username))) 2808 uri->authority_len = uri->canon_len - uri->authority_start; 2809 else 2810 uri->authority_start = -1; 2811 2812 return TRUE; 2813 } 2814 2815 /* Attempts to canonicalize the path of a hierarchical URI. 2816 * 2817 * Things that happen: 2818 * 1). Forbidden characters are percent encoded, unless the NO_ENCODE_FORBIDDEN 2819 * flag is set or it's a file URI. Forbidden characters are always encoded 2820 * for file schemes regardless and forbidden characters are never encoded 2821 * for unknown scheme types. 2822 * 2823 * 2). For known scheme types '\\' are changed to '/'. 2824 * 2825 * 3). Percent encoded, unreserved characters are decoded to their actual values. 2826 * Unless the scheme type is unknown. For file schemes any percent encoded 2827 * character in the unreserved or reserved set is decoded. 2828 * 2829 * 4). For File schemes if the path is starts with a drive letter and doesn't 2830 * start with a '/' then one is appended. 2831 * Ex: file://c:/test.mp3 -> file:///c:/test.mp3 2832 * 2833 * 5). Dot segments are removed from the path for all scheme types 2834 * unless NO_CANONICALIZE flag is set. Dot segments aren't removed 2835 * for wildcard scheme types. 2836 * 2837 * NOTES: 2838 * file://c:/test%20test -> file:///c:/test%2520test 2839 * file://c:/test%3Etest -> file:///c:/test%253Etest 2840 * if Uri_CREATE_FILE_USE_DOS_PATH is not set: 2841 * file:///c:/test%20test -> file:///c:/test%20test 2842 * file:///c:/test%test -> file:///c:/test%25test 2843 */ 2844 static DWORD canonicalize_path_hierarchical(const WCHAR *path, DWORD path_len, URL_SCHEME scheme_type, BOOL has_host, DWORD flags, 2845 BOOL is_implicit_scheme, WCHAR *ret_path) { 2846 const BOOL known_scheme = scheme_type != URL_SCHEME_UNKNOWN; 2847 const BOOL is_file = scheme_type == URL_SCHEME_FILE; 2848 const BOOL is_res = scheme_type == URL_SCHEME_RES; 2849 const WCHAR *ptr; 2850 BOOL escape_pct = FALSE; 2851 DWORD len = 0; 2852 2853 if(!path) 2854 return 0; 2855 2856 ptr = path; 2857 2858 if(is_file && !has_host) { 2859 /* Check if a '/' needs to be appended for the file scheme. */ 2860 if(path_len > 1 && is_drive_path(ptr) && !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2861 if(ret_path) 2862 ret_path[len] = '/'; 2863 len++; 2864 escape_pct = TRUE; 2865 } else if(*ptr == '/') { 2866 if(!(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2867 /* Copy the extra '/' over. */ 2868 if(ret_path) 2869 ret_path[len] = '/'; 2870 len++; 2871 } 2872 ++ptr; 2873 } 2874 2875 if(is_drive_path(ptr)) { 2876 if(ret_path) { 2877 ret_path[len] = *ptr; 2878 /* If there's a '|' after the drive letter, convert it to a ':'. */ 2879 ret_path[len+1] = ':'; 2880 } 2881 ptr += 2; 2882 len += 2; 2883 } 2884 } 2885 2886 if(!is_file && *path && *path != '/') { 2887 /* Prepend a '/' to the path if it doesn't have one. */ 2888 if(ret_path) 2889 ret_path[len] = '/'; 2890 len++; 2891 } 2892 2893 for(; ptr < path+path_len; ++ptr) { 2894 BOOL do_default_action = TRUE; 2895 2896 if(*ptr == '%' && !is_res) { 2897 const WCHAR *tmp = ptr; 2898 WCHAR val; 2899 2900 /* Check if the % represents a valid encoded char, or if it needs encoding. */ 2901 BOOL force_encode = !check_pct_encoded(&tmp) && is_file && !(flags&Uri_CREATE_FILE_USE_DOS_PATH); 2902 val = decode_pct_val(ptr); 2903 2904 if(force_encode || escape_pct) { 2905 /* Escape the percent sign in the file URI. */ 2906 if(ret_path) 2907 pct_encode_val(*ptr, ret_path+len); 2908 len += 3; 2909 do_default_action = FALSE; 2910 } else if((is_unreserved(val) && known_scheme) || 2911 (is_file && !is_implicit_scheme && (is_unreserved(val) || is_reserved(val) || 2912 (val && flags&Uri_CREATE_FILE_USE_DOS_PATH && !is_forbidden_dos_path_char(val))))) { 2913 if(ret_path) 2914 ret_path[len] = val; 2915 len++; 2916 2917 ptr += 2; 2918 continue; 2919 } 2920 } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2921 /* Convert the '/' back to a '\\'. */ 2922 if(ret_path) 2923 ret_path[len] = '\\'; 2924 len++; 2925 do_default_action = FALSE; 2926 } else if(*ptr == '\\' && known_scheme) { 2927 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) { 2928 /* Convert '\\' into a '/'. */ 2929 if(ret_path) 2930 ret_path[len] = '/'; 2931 len++; 2932 do_default_action = FALSE; 2933 } 2934 } else if(known_scheme && !is_res && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr) && 2935 (!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) { 2936 if(!is_file || !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2937 /* Escape the forbidden character. */ 2938 if(ret_path) 2939 pct_encode_val(*ptr, ret_path+len); 2940 len += 3; 2941 do_default_action = FALSE; 2942 } 2943 } 2944 2945 if(do_default_action) { 2946 if(ret_path) 2947 ret_path[len] = *ptr; 2948 len++; 2949 } 2950 } 2951 2952 /* Removing the dot segments only happens when it's not in 2953 * computeOnly mode and it's not a wildcard scheme. File schemes 2954 * with USE_DOS_PATH set don't get dot segments removed. 2955 */ 2956 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) && 2957 scheme_type != URL_SCHEME_WILDCARD) { 2958 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && ret_path) { 2959 /* Remove the dot segments (if any) and reset everything to the new 2960 * correct length. 2961 */ 2962 len = remove_dot_segments(ret_path, len); 2963 } 2964 } 2965 2966 if(ret_path) 2967 TRACE("Canonicalized path %s len=%d\n", debugstr_wn(ret_path, len), len); 2968 return len; 2969 } 2970 2971 /* Attempts to canonicalize the path for an opaque URI. 2972 * 2973 * For known scheme types: 2974 * 1) forbidden characters are percent encoded if 2975 * NO_ENCODE_FORBIDDEN_CHARACTERS isn't set. 2976 * 2977 * 2) Percent encoded, unreserved characters are decoded 2978 * to their actual values, for known scheme types. 2979 * 2980 * 3) '\\' are changed to '/' for known scheme types 2981 * except for mailto schemes. 2982 * 2983 * 4) For file schemes, if USE_DOS_PATH is set all '/' 2984 * are converted to backslashes. 2985 * 2986 * 5) For file schemes, if USE_DOS_PATH isn't set all '\' 2987 * are converted to forward slashes. 2988 */ 2989 static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2990 const WCHAR *ptr; 2991 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2992 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 2993 const BOOL is_mk = data->scheme_type == URL_SCHEME_MK; 2994 2995 if(!data->path) { 2996 uri->path_start = -1; 2997 uri->path_len = 0; 2998 return TRUE; 2999 } 3000 3001 uri->path_start = uri->canon_len; 3002 3003 if(is_mk){ 3004 /* hijack this flag for SCHEME_MK to tell the function when to start 3005 * converting slashes */ 3006 flags |= Uri_CREATE_FILE_USE_DOS_PATH; 3007 } 3008 3009 /* For javascript: URIs, simply copy path part without any canonicalization */ 3010 if(data->scheme_type == URL_SCHEME_JAVASCRIPT) { 3011 if(!computeOnly) 3012 memcpy(uri->canon_uri+uri->canon_len, data->path, data->path_len*sizeof(WCHAR)); 3013 uri->path_len = data->path_len; 3014 uri->canon_len += data->path_len; 3015 return TRUE; 3016 } 3017 3018 /* Windows doesn't allow a "//" to appear after the scheme 3019 * of a URI, if it's an opaque URI. 3020 */ 3021 if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') { 3022 /* So it inserts a "/." before the "//" if it exists. */ 3023 if(!computeOnly) { 3024 uri->canon_uri[uri->canon_len] = '/'; 3025 uri->canon_uri[uri->canon_len+1] = '.'; 3026 } 3027 3028 uri->canon_len += 2; 3029 } 3030 3031 for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) { 3032 BOOL do_default_action = TRUE; 3033 3034 if(*ptr == '%' && known_scheme) { 3035 WCHAR val = decode_pct_val(ptr); 3036 3037 if(is_unreserved(val)) { 3038 if(!computeOnly) 3039 uri->canon_uri[uri->canon_len] = val; 3040 ++uri->canon_len; 3041 3042 ptr += 2; 3043 continue; 3044 } 3045 } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 3046 if(!computeOnly) 3047 uri->canon_uri[uri->canon_len] = '\\'; 3048 ++uri->canon_len; 3049 do_default_action = FALSE; 3050 } else if(*ptr == '\\') { 3051 if((data->is_relative || is_mk || is_file) && !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 3052 /* Convert to a '/'. */ 3053 if(!computeOnly) 3054 uri->canon_uri[uri->canon_len] = '/'; 3055 ++uri->canon_len; 3056 do_default_action = FALSE; 3057 } 3058 } else if(is_mk && *ptr == ':' && ptr + 1 < data->path + data->path_len && *(ptr + 1) == ':') { 3059 flags &= ~Uri_CREATE_FILE_USE_DOS_PATH; 3060 } else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr) && 3061 !(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 3062 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) { 3063 if(!computeOnly) 3064 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 3065 uri->canon_len += 3; 3066 do_default_action = FALSE; 3067 } 3068 } 3069 3070 if(do_default_action) { 3071 if(!computeOnly) 3072 uri->canon_uri[uri->canon_len] = *ptr; 3073 ++uri->canon_len; 3074 } 3075 } 3076 3077 if(is_mk && !computeOnly && !(flags & Uri_CREATE_NO_CANONICALIZE)) { 3078 DWORD new_len = remove_dot_segments(uri->canon_uri + uri->path_start, 3079 uri->canon_len - uri->path_start); 3080 uri->canon_len = uri->path_start + new_len; 3081 } 3082 3083 uri->path_len = uri->canon_len - uri->path_start; 3084 3085 if(!computeOnly) 3086 TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly, 3087 debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len); 3088 return TRUE; 3089 } 3090 3091 /* Determines how the URI represented by the parse_data should be canonicalized. 3092 * 3093 * Essentially, if the parse_data represents an hierarchical URI then it calls 3094 * canonicalize_authority and the canonicalization functions for the path. If the 3095 * URI is opaque it canonicalizes the path of the URI. 3096 */ 3097 static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3098 if(!data->is_opaque || (data->is_relative && (data->password || data->username))) { 3099 /* "//" is only added for non-wildcard scheme types. 3100 * 3101 * A "//" is only added to a relative URI if it has a 3102 * host or port component (this only happens if a IUriBuilder 3103 * is generating an IUri). 3104 */ 3105 if((data->is_relative && (data->host || data->has_port)) || 3106 (!data->is_relative && data->scheme_type != URL_SCHEME_WILDCARD)) { 3107 if(data->scheme_type == URL_SCHEME_WILDCARD) 3108 FIXME("Here\n"); 3109 3110 if(!computeOnly) { 3111 INT pos = uri->canon_len; 3112 3113 uri->canon_uri[pos] = '/'; 3114 uri->canon_uri[pos+1] = '/'; 3115 } 3116 uri->canon_len += 2; 3117 } 3118 3119 if(!canonicalize_authority(data, uri, flags, computeOnly)) 3120 return FALSE; 3121 3122 if(data->is_relative && (data->password || data->username)) { 3123 if(!canonicalize_path_opaque(data, uri, flags, computeOnly)) 3124 return FALSE; 3125 } else { 3126 if(!computeOnly) 3127 uri->path_start = uri->canon_len; 3128 uri->path_len = canonicalize_path_hierarchical(data->path, data->path_len, data->scheme_type, data->host_len != 0, 3129 flags, data->has_implicit_scheme, computeOnly ? NULL : uri->canon_uri+uri->canon_len); 3130 uri->canon_len += uri->path_len; 3131 if(!computeOnly && !uri->path_len) 3132 uri->path_start = -1; 3133 } 3134 } else { 3135 /* Opaque URI's don't have an authority. */ 3136 uri->userinfo_start = uri->userinfo_split = -1; 3137 uri->userinfo_len = 0; 3138 uri->host_start = -1; 3139 uri->host_len = 0; 3140 uri->host_type = Uri_HOST_UNKNOWN; 3141 uri->has_port = FALSE; 3142 uri->authority_start = -1; 3143 uri->authority_len = 0; 3144 uri->domain_offset = -1; 3145 uri->port_offset = -1; 3146 3147 if(is_hierarchical_scheme(data->scheme_type)) { 3148 DWORD i; 3149 3150 /* Absolute URIs aren't displayed for known scheme types 3151 * which should be hierarchical URIs. 3152 */ 3153 uri->display_modifiers |= URI_DISPLAY_NO_ABSOLUTE_URI; 3154 3155 /* Windows also sets the port for these (if they have one). */ 3156 for(i = 0; i < ARRAY_SIZE(default_ports); ++i) { 3157 if(data->scheme_type == default_ports[i].scheme) { 3158 uri->has_port = TRUE; 3159 uri->port = default_ports[i].port; 3160 break; 3161 } 3162 } 3163 } 3164 3165 if(!canonicalize_path_opaque(data, uri, flags, computeOnly)) 3166 return FALSE; 3167 } 3168 3169 if(uri->path_start > -1 && !computeOnly) 3170 /* Finding file extensions happens for both types of URIs. */ 3171 uri->extension_offset = find_file_extension(uri->canon_uri+uri->path_start, uri->path_len); 3172 else 3173 uri->extension_offset = -1; 3174 3175 return TRUE; 3176 } 3177 3178 /* Attempts to canonicalize the query string of the URI. 3179 * 3180 * Things that happen: 3181 * 1) For known scheme types forbidden characters 3182 * are percent encoded, unless the NO_DECODE_EXTRA_INFO flag is set 3183 * or NO_ENCODE_FORBIDDEN_CHARACTERS is set. 3184 * 3185 * 2) For known scheme types, percent encoded, unreserved characters 3186 * are decoded as long as the NO_DECODE_EXTRA_INFO flag isn't set. 3187 */ 3188 static BOOL canonicalize_query(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3189 const WCHAR *ptr, *end; 3190 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 3191 3192 if(!data->query) { 3193 uri->query_start = -1; 3194 uri->query_len = 0; 3195 return TRUE; 3196 } 3197 3198 uri->query_start = uri->canon_len; 3199 3200 end = data->query+data->query_len; 3201 for(ptr = data->query; ptr < end; ++ptr) { 3202 if(*ptr == '%') { 3203 if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3204 WCHAR val = decode_pct_val(ptr); 3205 if(is_unreserved(val)) { 3206 if(!computeOnly) 3207 uri->canon_uri[uri->canon_len] = val; 3208 ++uri->canon_len; 3209 3210 ptr += 2; 3211 continue; 3212 } 3213 } 3214 } else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr)) { 3215 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && 3216 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3217 if(!computeOnly) 3218 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 3219 uri->canon_len += 3; 3220 continue; 3221 } 3222 } 3223 3224 if(!computeOnly) 3225 uri->canon_uri[uri->canon_len] = *ptr; 3226 ++uri->canon_len; 3227 } 3228 3229 uri->query_len = uri->canon_len - uri->query_start; 3230 3231 if(!computeOnly) 3232 TRACE("(%p %p %x %d): Canonicalized query string %s len=%d\n", data, uri, flags, 3233 computeOnly, debugstr_wn(uri->canon_uri+uri->query_start, uri->query_len), 3234 uri->query_len); 3235 return TRUE; 3236 } 3237 3238 static BOOL canonicalize_fragment(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3239 const WCHAR *ptr, *end; 3240 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 3241 3242 if(!data->fragment) { 3243 uri->fragment_start = -1; 3244 uri->fragment_len = 0; 3245 return TRUE; 3246 } 3247 3248 uri->fragment_start = uri->canon_len; 3249 3250 end = data->fragment + data->fragment_len; 3251 for(ptr = data->fragment; ptr < end; ++ptr) { 3252 if(*ptr == '%') { 3253 if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3254 WCHAR val = decode_pct_val(ptr); 3255 if(is_unreserved(val)) { 3256 if(!computeOnly) 3257 uri->canon_uri[uri->canon_len] = val; 3258 ++uri->canon_len; 3259 3260 ptr += 2; 3261 continue; 3262 } 3263 } 3264 } else if(known_scheme && is_ascii(*ptr) && !is_unreserved(*ptr) && !is_reserved(*ptr)) { 3265 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && 3266 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3267 if(!computeOnly) 3268 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 3269 uri->canon_len += 3; 3270 continue; 3271 } 3272 } 3273 3274 if(!computeOnly) 3275 uri->canon_uri[uri->canon_len] = *ptr; 3276 ++uri->canon_len; 3277 } 3278 3279 uri->fragment_len = uri->canon_len - uri->fragment_start; 3280 3281 if(!computeOnly) 3282 TRACE("(%p %p %x %d): Canonicalized fragment %s len=%d\n", data, uri, flags, 3283 computeOnly, debugstr_wn(uri->canon_uri+uri->fragment_start, uri->fragment_len), 3284 uri->fragment_len); 3285 return TRUE; 3286 } 3287 3288 /* Canonicalizes the scheme information specified in the parse_data using the specified flags. */ 3289 static BOOL canonicalize_scheme(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3290 uri->scheme_start = -1; 3291 uri->scheme_len = 0; 3292 3293 if(!data->scheme) { 3294 /* The only type of URI that doesn't have to have a scheme is a relative 3295 * URI. 3296 */ 3297 if(!data->is_relative) { 3298 FIXME("(%p %p %x): Unable to determine the scheme type of %s.\n", data, 3299 uri, flags, debugstr_w(data->uri)); 3300 return FALSE; 3301 } 3302 } else { 3303 if(!computeOnly) { 3304 DWORD i; 3305 INT pos = uri->canon_len; 3306 3307 for(i = 0; i < data->scheme_len; ++i) { 3308 /* Scheme name must be lower case after canonicalization. */ 3309 uri->canon_uri[i + pos] = towlower(data->scheme[i]); 3310 } 3311 3312 uri->canon_uri[i + pos] = ':'; 3313 uri->scheme_start = pos; 3314 3315 TRACE("(%p %p %x): Canonicalized scheme=%s, len=%d.\n", data, uri, flags, 3316 debugstr_wn(uri->canon_uri+uri->scheme_start, data->scheme_len), data->scheme_len); 3317 } 3318 3319 /* This happens in both computation modes. */ 3320 uri->canon_len += data->scheme_len + 1; 3321 uri->scheme_len = data->scheme_len; 3322 } 3323 return TRUE; 3324 } 3325 3326 /* Computes what the length of the URI specified by the parse_data will be 3327 * after canonicalization occurs using the specified flags. 3328 * 3329 * This function will return a non-zero value indicating the length of the canonicalized 3330 * URI, or -1 on error. 3331 */ 3332 static int compute_canonicalized_length(const parse_data *data, DWORD flags) { 3333 Uri uri; 3334 3335 memset(&uri, 0, sizeof(Uri)); 3336 3337 TRACE("(%p %x): Beginning to compute canonicalized length for URI %s\n", data, flags, 3338 debugstr_w(data->uri)); 3339 3340 if(!canonicalize_scheme(data, &uri, flags, TRUE)) { 3341 ERR("(%p %x): Failed to compute URI scheme length.\n", data, flags); 3342 return -1; 3343 } 3344 3345 if(!canonicalize_hierpart(data, &uri, flags, TRUE)) { 3346 ERR("(%p %x): Failed to compute URI hierpart length.\n", data, flags); 3347 return -1; 3348 } 3349 3350 if(!canonicalize_query(data, &uri, flags, TRUE)) { 3351 ERR("(%p %x): Failed to compute query string length.\n", data, flags); 3352 return -1; 3353 } 3354 3355 if(!canonicalize_fragment(data, &uri, flags, TRUE)) { 3356 ERR("(%p %x): Failed to compute fragment length.\n", data, flags); 3357 return -1; 3358 } 3359 3360 TRACE("(%p %x): Finished computing canonicalized URI length. length=%d\n", data, flags, uri.canon_len); 3361 3362 return uri.canon_len; 3363 } 3364 3365 /* Canonicalizes the URI data specified in the parse_data, using the given flags. If the 3366 * canonicalization succeeds it will store all the canonicalization information 3367 * in the pointer to the Uri. 3368 * 3369 * To canonicalize a URI this function first computes what the length of the URI 3370 * specified by the parse_data will be. Once this is done it will then perform the actual 3371 * canonicalization of the URI. 3372 */ 3373 static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) { 3374 INT len; 3375 3376 uri->canon_uri = NULL; 3377 uri->canon_size = uri->canon_len = 0; 3378 3379 TRACE("(%p %p %x): beginning to canonicalize URI %s.\n", data, uri, flags, debugstr_w(data->uri)); 3380 3381 /* First try to compute the length of the URI. */ 3382 len = compute_canonicalized_length(data, flags); 3383 if(len == -1) { 3384 ERR("(%p %p %x): Could not compute the canonicalized length of %s.\n", data, uri, flags, 3385 debugstr_w(data->uri)); 3386 return E_INVALIDARG; 3387 } 3388 3389 uri->canon_uri = heap_alloc((len+1)*sizeof(WCHAR)); 3390 if(!uri->canon_uri) 3391 return E_OUTOFMEMORY; 3392 3393 uri->canon_size = len; 3394 if(!canonicalize_scheme(data, uri, flags, FALSE)) { 3395 ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags); 3396 return E_INVALIDARG; 3397 } 3398 uri->scheme_type = data->scheme_type; 3399 3400 if(!canonicalize_hierpart(data, uri, flags, FALSE)) { 3401 ERR("(%p %p %x): Unable to canonicalize the heirpart of the URI\n", data, uri, flags); 3402 return E_INVALIDARG; 3403 } 3404 3405 if(!canonicalize_query(data, uri, flags, FALSE)) { 3406 ERR("(%p %p %x): Unable to canonicalize query string of the URI.\n", 3407 data, uri, flags); 3408 return E_INVALIDARG; 3409 } 3410 3411 if(!canonicalize_fragment(data, uri, flags, FALSE)) { 3412 ERR("(%p %p %x): Unable to canonicalize fragment of the URI.\n", 3413 data, uri, flags); 3414 return E_INVALIDARG; 3415 } 3416 3417 /* There's a possibility we didn't use all the space we allocated 3418 * earlier. 3419 */ 3420 if(uri->canon_len < uri->canon_size) { 3421 /* This happens if the URI is hierarchical and dot 3422 * segments were removed from its path. 3423 */ 3424 WCHAR *tmp = heap_realloc(uri->canon_uri, (uri->canon_len+1)*sizeof(WCHAR)); 3425 if(!tmp) 3426 return E_OUTOFMEMORY; 3427 3428 uri->canon_uri = tmp; 3429 uri->canon_size = uri->canon_len; 3430 } 3431 3432 uri->canon_uri[uri->canon_len] = '\0'; 3433 TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri)); 3434 3435 return S_OK; 3436 } 3437 3438 static HRESULT get_builder_component(LPWSTR *component, DWORD *component_len, 3439 LPCWSTR source, DWORD source_len, 3440 LPCWSTR *output, DWORD *output_len) 3441 { 3442 if(!output_len) { 3443 if(output) 3444 *output = NULL; 3445 return E_POINTER; 3446 } 3447 3448 if(!output) { 3449 *output_len = 0; 3450 return E_POINTER; 3451 } 3452 3453 if(!(*component) && source) { 3454 /* Allocate 'component', and copy the contents from 'source' 3455 * into the new allocation. 3456 */ 3457 *component = heap_alloc((source_len+1)*sizeof(WCHAR)); 3458 if(!(*component)) 3459 return E_OUTOFMEMORY; 3460 3461 memcpy(*component, source, source_len*sizeof(WCHAR)); 3462 (*component)[source_len] = '\0'; 3463 *component_len = source_len; 3464 } 3465 3466 *output = *component; 3467 *output_len = *component_len; 3468 return *output ? S_OK : S_FALSE; 3469 } 3470 3471 /* Allocates 'component' and copies the string from 'new_value' into 'component'. 3472 * If 'prefix' is set and 'new_value' isn't NULL, then it checks if 'new_value' 3473 * starts with 'prefix'. If it doesn't then 'prefix' is prepended to 'component'. 3474 * 3475 * If everything is successful, then will set 'success_flag' in 'flags'. 3476 */ 3477 static HRESULT set_builder_component(LPWSTR *component, DWORD *component_len, LPCWSTR new_value, 3478 WCHAR prefix, DWORD *flags, DWORD success_flag) 3479 { 3480 heap_free(*component); 3481 3482 if(!new_value) { 3483 *component = NULL; 3484 *component_len = 0; 3485 } else { 3486 BOOL add_prefix = FALSE; 3487 DWORD len = lstrlenW(new_value); 3488 DWORD pos = 0; 3489 3490 if(prefix && *new_value != prefix) { 3491 add_prefix = TRUE; 3492 *component = heap_alloc((len+2)*sizeof(WCHAR)); 3493 } else 3494 *component = heap_alloc((len+1)*sizeof(WCHAR)); 3495 3496 if(!(*component)) 3497 return E_OUTOFMEMORY; 3498 3499 if(add_prefix) 3500 (*component)[pos++] = prefix; 3501 3502 memcpy(*component+pos, new_value, (len+1)*sizeof(WCHAR)); 3503 *component_len = len+pos; 3504 } 3505 3506 *flags |= success_flag; 3507 return S_OK; 3508 } 3509 3510 static void reset_builder(UriBuilder *builder) { 3511 if(builder->uri) 3512 IUri_Release(&builder->uri->IUri_iface); 3513 builder->uri = NULL; 3514 3515 heap_free(builder->fragment); 3516 builder->fragment = NULL; 3517 builder->fragment_len = 0; 3518 3519 heap_free(builder->host); 3520 builder->host = NULL; 3521 builder->host_len = 0; 3522 3523 heap_free(builder->password); 3524 builder->password = NULL; 3525 builder->password_len = 0; 3526 3527 heap_free(builder->path); 3528 builder->path = NULL; 3529 builder->path_len = 0; 3530 3531 heap_free(builder->query); 3532 builder->query = NULL; 3533 builder->query_len = 0; 3534 3535 heap_free(builder->scheme); 3536 builder->scheme = NULL; 3537 builder->scheme_len = 0; 3538 3539 heap_free(builder->username); 3540 builder->username = NULL; 3541 builder->username_len = 0; 3542 3543 builder->has_port = FALSE; 3544 builder->port = 0; 3545 builder->modified_props = 0; 3546 } 3547 3548 static HRESULT validate_scheme_name(const UriBuilder *builder, parse_data *data, DWORD flags) { 3549 const WCHAR *component; 3550 const WCHAR *ptr; 3551 const WCHAR **pptr; 3552 DWORD expected_len; 3553 3554 if(builder->scheme) { 3555 ptr = builder->scheme; 3556 expected_len = builder->scheme_len; 3557 } else if(builder->uri && builder->uri->scheme_start > -1) { 3558 ptr = builder->uri->canon_uri+builder->uri->scheme_start; 3559 expected_len = builder->uri->scheme_len; 3560 } else { 3561 static const WCHAR nullW[] = {0}; 3562 ptr = nullW; 3563 expected_len = 0; 3564 } 3565 3566 component = ptr; 3567 pptr = &ptr; 3568 if(parse_scheme(pptr, data, flags, ALLOW_NULL_TERM_SCHEME) && 3569 data->scheme_len == expected_len) { 3570 if(data->scheme) 3571 TRACE("(%p %p %x): Found valid scheme component %s len=%d.\n", builder, data, flags, 3572 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len); 3573 } else { 3574 TRACE("(%p %p %x): Invalid scheme component found %s.\n", builder, data, flags, 3575 debugstr_wn(component, expected_len)); 3576 return INET_E_INVALID_URL; 3577 } 3578 3579 return S_OK; 3580 } 3581 3582 static HRESULT validate_username(const UriBuilder *builder, parse_data *data, DWORD flags) { 3583 const WCHAR *ptr; 3584 const WCHAR **pptr; 3585 DWORD expected_len; 3586 3587 if(builder->username) { 3588 ptr = builder->username; 3589 expected_len = builder->username_len; 3590 } else if(!(builder->modified_props & Uri_HAS_USER_NAME) && builder->uri && 3591 builder->uri->userinfo_start > -1 && builder->uri->userinfo_split != 0) { 3592 /* Just use the username from the base Uri. */ 3593 data->username = builder->uri->canon_uri+builder->uri->userinfo_start; 3594 data->username_len = (builder->uri->userinfo_split > -1) ? 3595 builder->uri->userinfo_split : builder->uri->userinfo_len; 3596 ptr = NULL; 3597 } else { 3598 ptr = NULL; 3599 expected_len = 0; 3600 } 3601 3602 if(ptr) { 3603 const WCHAR *component = ptr; 3604 pptr = &ptr; 3605 if(parse_username(pptr, data, flags, ALLOW_NULL_TERM_USER_NAME) && 3606 data->username_len == expected_len) 3607 TRACE("(%p %p %x): Found valid username component %s len=%d.\n", builder, data, flags, 3608 debugstr_wn(data->username, data->username_len), data->username_len); 3609 else { 3610 TRACE("(%p %p %x): Invalid username component found %s.\n", builder, data, flags, 3611 debugstr_wn(component, expected_len)); 3612 return INET_E_INVALID_URL; 3613 } 3614 } 3615 3616 return S_OK; 3617 } 3618 3619 static HRESULT validate_password(const UriBuilder *builder, parse_data *data, DWORD flags) { 3620 const WCHAR *ptr; 3621 const WCHAR **pptr; 3622 DWORD expected_len; 3623 3624 if(builder->password) { 3625 ptr = builder->password; 3626 expected_len = builder->password_len; 3627 } else if(!(builder->modified_props & Uri_HAS_PASSWORD) && builder->uri && 3628 builder->uri->userinfo_split > -1) { 3629 data->password = builder->uri->canon_uri+builder->uri->userinfo_start+builder->uri->userinfo_split+1; 3630 data->password_len = builder->uri->userinfo_len-builder->uri->userinfo_split-1; 3631 ptr = NULL; 3632 } else { 3633 ptr = NULL; 3634 expected_len = 0; 3635 } 3636 3637 if(ptr) { 3638 const WCHAR *component = ptr; 3639 pptr = &ptr; 3640 if(parse_password(pptr, data, flags, ALLOW_NULL_TERM_PASSWORD) && 3641 data->password_len == expected_len) 3642 TRACE("(%p %p %x): Found valid password component %s len=%d.\n", builder, data, flags, 3643 debugstr_wn(data->password, data->password_len), data->password_len); 3644 else { 3645 TRACE("(%p %p %x): Invalid password component found %s.\n", builder, data, flags, 3646 debugstr_wn(component, expected_len)); 3647 return INET_E_INVALID_URL; 3648 } 3649 } 3650 3651 return S_OK; 3652 } 3653 3654 static HRESULT validate_userinfo(const UriBuilder *builder, parse_data *data, DWORD flags) { 3655 HRESULT hr; 3656 3657 hr = validate_username(builder, data, flags); 3658 if(FAILED(hr)) 3659 return hr; 3660 3661 hr = validate_password(builder, data, flags); 3662 if(FAILED(hr)) 3663 return hr; 3664 3665 return S_OK; 3666 } 3667 3668 static HRESULT validate_host(const UriBuilder *builder, parse_data *data, DWORD flags) { 3669 const WCHAR *ptr; 3670 const WCHAR **pptr; 3671 DWORD expected_len; 3672 3673 if(builder->host) { 3674 ptr = builder->host; 3675 expected_len = builder->host_len; 3676 } else if(!(builder->modified_props & Uri_HAS_HOST) && builder->uri && builder->uri->host_start > -1) { 3677 ptr = builder->uri->canon_uri + builder->uri->host_start; 3678 expected_len = builder->uri->host_len; 3679 } else 3680 ptr = NULL; 3681 3682 if(ptr) { 3683 const WCHAR *component = ptr; 3684 DWORD extras = ALLOW_BRACKETLESS_IP_LITERAL|IGNORE_PORT_DELIMITER|SKIP_IP_FUTURE_CHECK; 3685 pptr = &ptr; 3686 3687 if(parse_host(pptr, data, flags, extras) && data->host_len == expected_len) 3688 TRACE("(%p %p %x): Found valid host name %s len=%d type=%d.\n", builder, data, flags, 3689 debugstr_wn(data->host, data->host_len), data->host_len, data->host_type); 3690 else { 3691 TRACE("(%p %p %x): Invalid host name found %s.\n", builder, data, flags, 3692 debugstr_wn(component, expected_len)); 3693 return INET_E_INVALID_URL; 3694 } 3695 } 3696 3697 return S_OK; 3698 } 3699 3700 static void setup_port(const UriBuilder *builder, parse_data *data, DWORD flags) { 3701 if(builder->modified_props & Uri_HAS_PORT) { 3702 if(builder->has_port) { 3703 data->has_port = TRUE; 3704 data->port_value = builder->port; 3705 } 3706 } else if(builder->uri && builder->uri->has_port) { 3707 data->has_port = TRUE; 3708 data->port_value = builder->uri->port; 3709 } 3710 3711 if(data->has_port) 3712 TRACE("(%p %p %x): Using %u as port for IUri.\n", builder, data, flags, data->port_value); 3713 } 3714 3715 static HRESULT validate_path(const UriBuilder *builder, parse_data *data, DWORD flags) { 3716 const WCHAR *ptr = NULL; 3717 const WCHAR *component; 3718 const WCHAR **pptr; 3719 DWORD expected_len; 3720 BOOL check_len = TRUE; 3721 BOOL valid = FALSE; 3722 3723 if(builder->path) { 3724 ptr = builder->path; 3725 expected_len = builder->path_len; 3726 } else if(!(builder->modified_props & Uri_HAS_PATH) && 3727 builder->uri && builder->uri->path_start > -1) { 3728 ptr = builder->uri->canon_uri+builder->uri->path_start; 3729 expected_len = builder->uri->path_len; 3730 } else { 3731 static const WCHAR nullW[] = {0}; 3732 ptr = nullW; 3733 check_len = FALSE; 3734 expected_len = -1; 3735 } 3736 3737 component = ptr; 3738 pptr = &ptr; 3739 3740 /* How the path is validated depends on what type of 3741 * URI it is. 3742 */ 3743 valid = data->is_opaque ? 3744 parse_path_opaque(pptr, data, flags) : parse_path_hierarchical(pptr, data, flags); 3745 3746 if(!valid || (check_len && expected_len != data->path_len)) { 3747 TRACE("(%p %p %x): Invalid path component %s.\n", builder, data, flags, 3748 debugstr_wn(component, expected_len) ); 3749 return INET_E_INVALID_URL; 3750 } 3751 3752 TRACE("(%p %p %x): Valid path component %s len=%d.\n", builder, data, flags, 3753 debugstr_wn(data->path, data->path_len), data->path_len); 3754 3755 return S_OK; 3756 } 3757 3758 static HRESULT validate_query(const UriBuilder *builder, parse_data *data, DWORD flags) { 3759 const WCHAR *ptr = NULL; 3760 const WCHAR **pptr; 3761 DWORD expected_len; 3762 3763 if(builder->query) { 3764 ptr = builder->query; 3765 expected_len = builder->query_len; 3766 } else if(!(builder->modified_props & Uri_HAS_QUERY) && builder->uri && 3767 builder->uri->query_start > -1) { 3768 ptr = builder->uri->canon_uri+builder->uri->query_start; 3769 expected_len = builder->uri->query_len; 3770 } 3771 3772 if(ptr) { 3773 const WCHAR *component = ptr; 3774 pptr = &ptr; 3775 3776 if(parse_query(pptr, data, flags) && expected_len == data->query_len) 3777 TRACE("(%p %p %x): Valid query component %s len=%d.\n", builder, data, flags, 3778 debugstr_wn(data->query, data->query_len), data->query_len); 3779 else { 3780 TRACE("(%p %p %x): Invalid query component %s.\n", builder, data, flags, 3781 debugstr_wn(component, expected_len)); 3782 return INET_E_INVALID_URL; 3783 } 3784 } 3785 3786 return S_OK; 3787 } 3788 3789 static HRESULT validate_fragment(const UriBuilder *builder, parse_data *data, DWORD flags) { 3790 const WCHAR *ptr = NULL; 3791 const WCHAR **pptr; 3792 DWORD expected_len; 3793 3794 if(builder->fragment) { 3795 ptr = builder->fragment; 3796 expected_len = builder->fragment_len; 3797 } else if(!(builder->modified_props & Uri_HAS_FRAGMENT) && builder->uri && 3798 builder->uri->fragment_start > -1) { 3799 ptr = builder->uri->canon_uri+builder->uri->fragment_start; 3800 expected_len = builder->uri->fragment_len; 3801 } 3802 3803 if(ptr) { 3804 const WCHAR *component = ptr; 3805 pptr = &ptr; 3806 3807 if(parse_fragment(pptr, data, flags) && expected_len == data->fragment_len) 3808 TRACE("(%p %p %x): Valid fragment component %s len=%d.\n", builder, data, flags, 3809 debugstr_wn(data->fragment, data->fragment_len), data->fragment_len); 3810 else { 3811 TRACE("(%p %p %x): Invalid fragment component %s.\n", builder, data, flags, 3812 debugstr_wn(component, expected_len)); 3813 return INET_E_INVALID_URL; 3814 } 3815 } 3816 3817 return S_OK; 3818 } 3819 3820 static HRESULT validate_components(const UriBuilder *builder, parse_data *data, DWORD flags) { 3821 HRESULT hr; 3822 3823 memset(data, 0, sizeof(parse_data)); 3824 3825 TRACE("(%p %p %x): Beginning to validate builder components.\n", builder, data, flags); 3826 3827 hr = validate_scheme_name(builder, data, flags); 3828 if(FAILED(hr)) 3829 return hr; 3830 3831 /* Extra validation for file schemes. */ 3832 if(data->scheme_type == URL_SCHEME_FILE) { 3833 if((builder->password || (builder->uri && builder->uri->userinfo_split > -1)) || 3834 (builder->username || (builder->uri && builder->uri->userinfo_start > -1))) { 3835 TRACE("(%p %p %x): File schemes can't contain a username or password.\n", 3836 builder, data, flags); 3837 return INET_E_INVALID_URL; 3838 } 3839 } 3840 3841 hr = validate_userinfo(builder, data, flags); 3842 if(FAILED(hr)) 3843 return hr; 3844 3845 hr = validate_host(builder, data, flags); 3846 if(FAILED(hr)) 3847 return hr; 3848 3849 setup_port(builder, data, flags); 3850 3851 /* The URI is opaque if it doesn't have an authority component. */ 3852 if(!data->is_relative) 3853 data->is_opaque = !data->username && !data->password && !data->host && !data->has_port 3854 && data->scheme_type != URL_SCHEME_FILE; 3855 else 3856 data->is_opaque = !data->host && !data->has_port; 3857 3858 hr = validate_path(builder, data, flags); 3859 if(FAILED(hr)) 3860 return hr; 3861 3862 hr = validate_query(builder, data, flags); 3863 if(FAILED(hr)) 3864 return hr; 3865 3866 hr = validate_fragment(builder, data, flags); 3867 if(FAILED(hr)) 3868 return hr; 3869 3870 TRACE("(%p %p %x): Finished validating builder components.\n", builder, data, flags); 3871 3872 return S_OK; 3873 } 3874 3875 static HRESULT compare_file_paths(const Uri *a, const Uri *b, BOOL *ret) 3876 { 3877 WCHAR *canon_path_a, *canon_path_b; 3878 DWORD len_a, len_b; 3879 3880 if(!a->path_len) { 3881 *ret = !b->path_len; 3882 return S_OK; 3883 } 3884 3885 if(!b->path_len) { 3886 *ret = FALSE; 3887 return S_OK; 3888 } 3889 3890 /* Fast path */ 3891 if(a->path_len == b->path_len && !_wcsnicmp(a->canon_uri+a->path_start, b->canon_uri+b->path_start, a->path_len)) { 3892 *ret = TRUE; 3893 return S_OK; 3894 } 3895 3896 len_a = canonicalize_path_hierarchical(a->canon_uri+a->path_start, a->path_len, a->scheme_type, FALSE, 0, FALSE, NULL); 3897 len_b = canonicalize_path_hierarchical(b->canon_uri+b->path_start, b->path_len, b->scheme_type, FALSE, 0, FALSE, NULL); 3898 3899 canon_path_a = heap_alloc(len_a*sizeof(WCHAR)); 3900 if(!canon_path_a) 3901 return E_OUTOFMEMORY; 3902 canon_path_b = heap_alloc(len_b*sizeof(WCHAR)); 3903 if(!canon_path_b) { 3904 heap_free(canon_path_a); 3905 return E_OUTOFMEMORY; 3906 } 3907 3908 len_a = canonicalize_path_hierarchical(a->canon_uri+a->path_start, a->path_len, a->scheme_type, FALSE, 0, FALSE, canon_path_a); 3909 len_b = canonicalize_path_hierarchical(b->canon_uri+b->path_start, b->path_len, b->scheme_type, FALSE, 0, FALSE, canon_path_b); 3910 3911 *ret = len_a == len_b && !_wcsnicmp(canon_path_a, canon_path_b, len_a); 3912 3913 heap_free(canon_path_a); 3914 heap_free(canon_path_b); 3915 return S_OK; 3916 } 3917 3918 /* Checks if the two Uri's are logically equivalent. It's a simple 3919 * comparison, since they are both of type Uri, and it can access 3920 * the properties of each Uri directly without the need to go 3921 * through the "IUri_Get*" interface calls. 3922 */ 3923 static HRESULT compare_uris(const Uri *a, const Uri *b, BOOL *ret) { 3924 const BOOL known_scheme = a->scheme_type != URL_SCHEME_UNKNOWN; 3925 const BOOL are_hierarchical = a->authority_start > -1 && b->authority_start > -1; 3926 HRESULT hres; 3927 3928 *ret = FALSE; 3929 3930 if(a->scheme_type != b->scheme_type) 3931 return S_OK; 3932 3933 /* Only compare the scheme names (if any) if their unknown scheme types. */ 3934 if(!known_scheme) { 3935 if((a->scheme_start > -1 && b->scheme_start > -1) && 3936 (a->scheme_len == b->scheme_len)) { 3937 /* Make sure the schemes are the same. */ 3938 if(StrCmpNW(a->canon_uri+a->scheme_start, b->canon_uri+b->scheme_start, a->scheme_len)) 3939 return S_OK; 3940 } else if(a->scheme_len != b->scheme_len) 3941 /* One of the Uri's has a scheme name, while the other doesn't. */ 3942 return S_OK; 3943 } 3944 3945 /* If they have a userinfo component, perform case sensitive compare. */ 3946 if((a->userinfo_start > -1 && b->userinfo_start > -1) && 3947 (a->userinfo_len == b->userinfo_len)) { 3948 if(StrCmpNW(a->canon_uri+a->userinfo_start, b->canon_uri+b->userinfo_start, a->userinfo_len)) 3949 return S_OK; 3950 } else if(a->userinfo_len != b->userinfo_len) 3951 /* One of the Uri's had a userinfo, while the other one doesn't. */ 3952 return S_OK; 3953 3954 /* Check if they have a host name. */ 3955 if((a->host_start > -1 && b->host_start > -1) && 3956 (a->host_len == b->host_len)) { 3957 /* Perform a case insensitive compare if they are a known scheme type. */ 3958 if(known_scheme) { 3959 if(StrCmpNIW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len)) 3960 return S_OK; 3961 } else if(StrCmpNW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len)) 3962 return S_OK; 3963 } else if(a->host_len != b->host_len) 3964 /* One of the Uri's had a host, while the other one didn't. */ 3965 return S_OK; 3966 3967 if(a->has_port && b->has_port) { 3968 if(a->port != b->port) 3969 return S_OK; 3970 } else if(a->has_port || b->has_port) 3971 /* One had a port, while the other one didn't. */ 3972 return S_OK; 3973 3974 /* Windows is weird with how it handles paths. For example 3975 * One URI could be "http://google.com" (after canonicalization) 3976 * and one could be "http://google.com/" and the IsEqual function 3977 * would still evaluate to TRUE, but, only if they are both hierarchical 3978 * URIs. 3979 */ 3980 if(a->scheme_type == URL_SCHEME_FILE) { 3981 BOOL cmp; 3982 3983 hres = compare_file_paths(a, b, &cmp); 3984 if(FAILED(hres) || !cmp) 3985 return hres; 3986 } else if((a->path_start > -1 && b->path_start > -1) && 3987 (a->path_len == b->path_len)) { 3988 if(StrCmpNW(a->canon_uri+a->path_start, b->canon_uri+b->path_start, a->path_len)) 3989 return S_OK; 3990 } else if(are_hierarchical && a->path_len == -1 && b->path_len == 0) { 3991 if(*(a->canon_uri+a->path_start) != '/') 3992 return S_OK; 3993 } else if(are_hierarchical && b->path_len == 1 && a->path_len == 0) { 3994 if(*(b->canon_uri+b->path_start) != '/') 3995 return S_OK; 3996 } else if(a->path_len != b->path_len) 3997 return S_OK; 3998 3999 /* Compare the query strings of the two URIs. */ 4000 if((a->query_start > -1 && b->query_start > -1) && 4001 (a->query_len == b->query_len)) { 4002 if(StrCmpNW(a->canon_uri+a->query_start, b->canon_uri+b->query_start, a->query_len)) 4003 return S_OK; 4004 } else if(a->query_len != b->query_len) 4005 return S_OK; 4006 4007 if((a->fragment_start > -1 && b->fragment_start > -1) && 4008 (a->fragment_len == b->fragment_len)) { 4009 if(StrCmpNW(a->canon_uri+a->fragment_start, b->canon_uri+b->fragment_start, a->fragment_len)) 4010 return S_OK; 4011 } else if(a->fragment_len != b->fragment_len) 4012 return S_OK; 4013 4014 /* If we get here, the two URIs are equivalent. */ 4015 *ret = TRUE; 4016 return S_OK; 4017 } 4018 4019 static void convert_to_dos_path(const WCHAR *path, DWORD path_len, 4020 WCHAR *output, DWORD *output_len) 4021 { 4022 const WCHAR *ptr = path; 4023 4024 if(path_len > 3 && *ptr == '/' && is_drive_path(path+1)) 4025 /* Skip over the leading / before the drive path. */ 4026 ++ptr; 4027 4028 for(; ptr < path+path_len; ++ptr) { 4029 if(*ptr == '/') { 4030 if(output) 4031 *output++ = '\\'; 4032 (*output_len)++; 4033 } else { 4034 if(output) 4035 *output++ = *ptr; 4036 (*output_len)++; 4037 } 4038 } 4039 } 4040 4041 /* Generates a raw uri string using the parse_data. */ 4042 static DWORD generate_raw_uri(const parse_data *data, BSTR uri, DWORD flags) { 4043 DWORD length = 0; 4044 4045 if(data->scheme) { 4046 if(uri) { 4047 memcpy(uri, data->scheme, data->scheme_len*sizeof(WCHAR)); 4048 uri[data->scheme_len] = ':'; 4049 } 4050 length += data->scheme_len+1; 4051 } 4052 4053 if(!data->is_opaque) { 4054 /* For the "//" which appears before the authority component. */ 4055 if(uri) { 4056 uri[length] = '/'; 4057 uri[length+1] = '/'; 4058 } 4059 length += 2; 4060 4061 /* Check if we need to add the "\\" before the host name 4062 * of a UNC server name in a DOS path. 4063 */ 4064 if(flags & RAW_URI_CONVERT_TO_DOS_PATH && 4065 data->scheme_type == URL_SCHEME_FILE && data->host) { 4066 if(uri) { 4067 uri[length] = '\\'; 4068 uri[length+1] = '\\'; 4069 } 4070 length += 2; 4071 } 4072 } 4073 4074 if(data->username) { 4075 if(uri) 4076 memcpy(uri+length, data->username, data->username_len*sizeof(WCHAR)); 4077 length += data->username_len; 4078 } 4079 4080 if(data->password) { 4081 if(uri) { 4082 uri[length] = ':'; 4083 memcpy(uri+length+1, data->password, data->password_len*sizeof(WCHAR)); 4084 } 4085 length += data->password_len+1; 4086 } 4087 4088 if(data->password || data->username) { 4089 if(uri) 4090 uri[length] = '@'; 4091 ++length; 4092 } 4093 4094 if(data->host) { 4095 /* IPv6 addresses get the brackets added around them if they don't already 4096 * have them. 4097 */ 4098 const BOOL add_brackets = data->host_type == Uri_HOST_IPV6 && *(data->host) != '['; 4099 if(add_brackets) { 4100 if(uri) 4101 uri[length] = '['; 4102 ++length; 4103 } 4104 4105 if(uri) 4106 memcpy(uri+length, data->host, data->host_len*sizeof(WCHAR)); 4107 length += data->host_len; 4108 4109 if(add_brackets) { 4110 if(uri) 4111 uri[length] = ']'; 4112 length++; 4113 } 4114 } 4115 4116 if(data->has_port) { 4117 /* The port isn't included in the raw uri if it's the default 4118 * port for the scheme type. 4119 */ 4120 DWORD i; 4121 BOOL is_default = FALSE; 4122 4123 for(i = 0; i < ARRAY_SIZE(default_ports); ++i) { 4124 if(data->scheme_type == default_ports[i].scheme && 4125 data->port_value == default_ports[i].port) 4126 is_default = TRUE; 4127 } 4128 4129 if(!is_default || flags & RAW_URI_FORCE_PORT_DISP) { 4130 if(uri) 4131 uri[length] = ':'; 4132 ++length; 4133 4134 if(uri) 4135 length += ui2str(uri+length, data->port_value); 4136 else 4137 length += ui2str(NULL, data->port_value); 4138 } 4139 } 4140 4141 /* Check if a '/' should be added before the path for hierarchical URIs. */ 4142 if(!data->is_opaque && data->path && *(data->path) != '/') { 4143 if(uri) 4144 uri[length] = '/'; 4145 ++length; 4146 } 4147 4148 if(data->path) { 4149 if(!data->is_opaque && data->scheme_type == URL_SCHEME_FILE && 4150 flags & RAW_URI_CONVERT_TO_DOS_PATH) { 4151 DWORD len = 0; 4152 4153 if(uri) 4154 convert_to_dos_path(data->path, data->path_len, uri+length, &len); 4155 else 4156 convert_to_dos_path(data->path, data->path_len, NULL, &len); 4157 4158 length += len; 4159 } else { 4160 if(uri) 4161 memcpy(uri+length, data->path, data->path_len*sizeof(WCHAR)); 4162 length += data->path_len; 4163 } 4164 } 4165 4166 if(data->query) { 4167 if(uri) 4168 memcpy(uri+length, data->query, data->query_len*sizeof(WCHAR)); 4169 length += data->query_len; 4170 } 4171 4172 if(data->fragment) { 4173 if(uri) 4174 memcpy(uri+length, data->fragment, data->fragment_len*sizeof(WCHAR)); 4175 length += data->fragment_len; 4176 } 4177 4178 if(uri) 4179 TRACE("(%p %p): Generated raw uri=%s len=%d\n", data, uri, debugstr_wn(uri, length), length); 4180 else 4181 TRACE("(%p %p): Computed raw uri len=%d\n", data, uri, length); 4182 4183 return length; 4184 } 4185 4186 static HRESULT generate_uri(const UriBuilder *builder, const parse_data *data, Uri *uri, DWORD flags) { 4187 HRESULT hr; 4188 DWORD length = generate_raw_uri(data, NULL, 0); 4189 uri->raw_uri = SysAllocStringLen(NULL, length); 4190 if(!uri->raw_uri) 4191 return E_OUTOFMEMORY; 4192 4193 generate_raw_uri(data, uri->raw_uri, 0); 4194 4195 hr = canonicalize_uri(data, uri, flags); 4196 if(FAILED(hr)) { 4197 if(hr == E_INVALIDARG) 4198 return INET_E_INVALID_URL; 4199 return hr; 4200 } 4201 4202 uri->create_flags = flags; 4203 return S_OK; 4204 } 4205 4206 static inline Uri* impl_from_IUri(IUri *iface) 4207 { 4208 return CONTAINING_RECORD(iface, Uri, IUri_iface); 4209 } 4210 4211 static inline void destroy_uri_obj(Uri *This) 4212 { 4213 SysFreeString(This->raw_uri); 4214 heap_free(This->canon_uri); 4215 heap_free(This); 4216 } 4217 4218 static HRESULT WINAPI Uri_QueryInterface(IUri *iface, REFIID riid, void **ppv) 4219 { 4220 Uri *This = impl_from_IUri(iface); 4221 4222 if(IsEqualGUID(&IID_IUnknown, riid)) { 4223 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv); 4224 *ppv = &This->IUri_iface; 4225 }else if(IsEqualGUID(&IID_IUri, riid)) { 4226 TRACE("(%p)->(IID_IUri %p)\n", This, ppv); 4227 *ppv = &This->IUri_iface; 4228 }else if(IsEqualGUID(&IID_IUriBuilderFactory, riid)) { 4229 TRACE("(%p)->(IID_IUriBuilderFactory %p)\n", This, ppv); 4230 *ppv = &This->IUriBuilderFactory_iface; 4231 }else if(IsEqualGUID(&IID_IPersistStream, riid)) { 4232 TRACE("(%p)->(IID_IPersistStream %p)\n", This, ppv); 4233 *ppv = &This->IPersistStream_iface; 4234 }else if(IsEqualGUID(&IID_IMarshal, riid)) { 4235 TRACE("(%p)->(IID_IMarshal %p)\n", This, ppv); 4236 *ppv = &This->IMarshal_iface; 4237 }else if(IsEqualGUID(&IID_IUriObj, riid)) { 4238 TRACE("(%p)->(IID_IUriObj %p)\n", This, ppv); 4239 *ppv = This; 4240 return S_OK; 4241 }else { 4242 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv); 4243 *ppv = NULL; 4244 return E_NOINTERFACE; 4245 } 4246 4247 IUnknown_AddRef((IUnknown*)*ppv); 4248 return S_OK; 4249 } 4250 4251 static ULONG WINAPI Uri_AddRef(IUri *iface) 4252 { 4253 Uri *This = impl_from_IUri(iface); 4254 LONG ref = InterlockedIncrement(&This->ref); 4255 4256 TRACE("(%p) ref=%d\n", This, ref); 4257 4258 return ref; 4259 } 4260 4261 static ULONG WINAPI Uri_Release(IUri *iface) 4262 { 4263 Uri *This = impl_from_IUri(iface); 4264 LONG ref = InterlockedDecrement(&This->ref); 4265 4266 TRACE("(%p) ref=%d\n", This, ref); 4267 4268 if(!ref) 4269 destroy_uri_obj(This); 4270 4271 return ref; 4272 } 4273 4274 static HRESULT WINAPI Uri_GetPropertyBSTR(IUri *iface, Uri_PROPERTY uriProp, BSTR *pbstrProperty, DWORD dwFlags) 4275 { 4276 Uri *This = impl_from_IUri(iface); 4277 HRESULT hres; 4278 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pbstrProperty, dwFlags); 4279 4280 if(!This->create_flags) 4281 return E_UNEXPECTED; 4282 if(!pbstrProperty) 4283 return E_POINTER; 4284 4285 if(uriProp > Uri_PROPERTY_STRING_LAST) { 4286 /* It only returns S_FALSE for the ZONE property... */ 4287 if(uriProp == Uri_PROPERTY_ZONE) { 4288 *pbstrProperty = SysAllocStringLen(NULL, 0); 4289 if(!(*pbstrProperty)) 4290 return E_OUTOFMEMORY; 4291 return S_FALSE; 4292 } 4293 4294 *pbstrProperty = NULL; 4295 return E_INVALIDARG; 4296 } 4297 4298 /* Don't have support for flags yet. */ 4299 if(dwFlags) { 4300 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags); 4301 return E_NOTIMPL; 4302 } 4303 4304 switch(uriProp) { 4305 case Uri_PROPERTY_ABSOLUTE_URI: 4306 if(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI) { 4307 *pbstrProperty = SysAllocStringLen(NULL, 0); 4308 hres = S_FALSE; 4309 } else { 4310 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) { 4311 if(This->userinfo_len == 0) { 4312 /* Don't include the '@' after the userinfo component. */ 4313 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-1); 4314 hres = S_OK; 4315 if(*pbstrProperty) { 4316 /* Copy everything before it. */ 4317 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 4318 4319 /* And everything after it. */ 4320 memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+1, 4321 (This->canon_len-This->userinfo_start-1)*sizeof(WCHAR)); 4322 } 4323 } else if(This->userinfo_split == 0 && This->userinfo_len == 1) { 4324 /* Don't include the ":@" */ 4325 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-2); 4326 hres = S_OK; 4327 if(*pbstrProperty) { 4328 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 4329 memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+2, 4330 (This->canon_len-This->userinfo_start-2)*sizeof(WCHAR)); 4331 } 4332 } else { 4333 *pbstrProperty = SysAllocString(This->canon_uri); 4334 hres = S_OK; 4335 } 4336 } else { 4337 *pbstrProperty = SysAllocString(This->canon_uri); 4338 hres = S_OK; 4339 } 4340 } 4341 4342 if(!(*pbstrProperty)) 4343 hres = E_OUTOFMEMORY; 4344 4345 break; 4346 case Uri_PROPERTY_AUTHORITY: 4347 if(This->authority_start > -1) { 4348 if(This->port_offset > -1 && is_default_port(This->scheme_type, This->port) && 4349 This->display_modifiers & URI_DISPLAY_NO_DEFAULT_PORT_AUTH) 4350 /* Don't include the port in the authority component. */ 4351 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->port_offset); 4352 else 4353 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->authority_len); 4354 hres = S_OK; 4355 } else { 4356 *pbstrProperty = SysAllocStringLen(NULL, 0); 4357 hres = S_FALSE; 4358 } 4359 4360 if(!(*pbstrProperty)) 4361 hres = E_OUTOFMEMORY; 4362 4363 break; 4364 case Uri_PROPERTY_DISPLAY_URI: 4365 /* The Display URI contains everything except for the userinfo for known 4366 * scheme types. 4367 */ 4368 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) { 4369 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-This->userinfo_len); 4370 4371 if(*pbstrProperty) { 4372 /* Copy everything before the userinfo over. */ 4373 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 4374 /* Copy everything after the userinfo over. */ 4375 memcpy(*pbstrProperty+This->userinfo_start, 4376 This->canon_uri+This->userinfo_start+This->userinfo_len+1, 4377 (This->canon_len-(This->userinfo_start+This->userinfo_len+1))*sizeof(WCHAR)); 4378 } 4379 } else 4380 *pbstrProperty = SysAllocString(This->canon_uri); 4381 4382 if(!(*pbstrProperty)) 4383 hres = E_OUTOFMEMORY; 4384 else 4385 hres = S_OK; 4386 4387 break; 4388 case Uri_PROPERTY_DOMAIN: 4389 if(This->domain_offset > -1) { 4390 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+This->domain_offset, 4391 This->host_len-This->domain_offset); 4392 hres = S_OK; 4393 } else { 4394 *pbstrProperty = SysAllocStringLen(NULL, 0); 4395 hres = S_FALSE; 4396 } 4397 4398 if(!(*pbstrProperty)) 4399 hres = E_OUTOFMEMORY; 4400 4401 break; 4402 case Uri_PROPERTY_EXTENSION: 4403 if(This->extension_offset > -1) { 4404 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start+This->extension_offset, 4405 This->path_len-This->extension_offset); 4406 hres = S_OK; 4407 } else { 4408 *pbstrProperty = SysAllocStringLen(NULL, 0); 4409 hres = S_FALSE; 4410 } 4411 4412 if(!(*pbstrProperty)) 4413 hres = E_OUTOFMEMORY; 4414 4415 break; 4416 case Uri_PROPERTY_FRAGMENT: 4417 if(This->fragment_start > -1) { 4418 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->fragment_start, This->fragment_len); 4419 hres = S_OK; 4420 } else { 4421 *pbstrProperty = SysAllocStringLen(NULL, 0); 4422 hres = S_FALSE; 4423 } 4424 4425 if(!(*pbstrProperty)) 4426 hres = E_OUTOFMEMORY; 4427 4428 break; 4429 case Uri_PROPERTY_HOST: 4430 if(This->host_start > -1) { 4431 /* The '[' and ']' aren't included for IPv6 addresses. */ 4432 if(This->host_type == Uri_HOST_IPV6) 4433 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+1, This->host_len-2); 4434 else 4435 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start, This->host_len); 4436 4437 hres = S_OK; 4438 } else { 4439 *pbstrProperty = SysAllocStringLen(NULL, 0); 4440 hres = S_FALSE; 4441 } 4442 4443 if(!(*pbstrProperty)) 4444 hres = E_OUTOFMEMORY; 4445 4446 break; 4447 case Uri_PROPERTY_PASSWORD: 4448 if(This->userinfo_split > -1) { 4449 *pbstrProperty = SysAllocStringLen( 4450 This->canon_uri+This->userinfo_start+This->userinfo_split+1, 4451 This->userinfo_len-This->userinfo_split-1); 4452 hres = S_OK; 4453 } else { 4454 *pbstrProperty = SysAllocStringLen(NULL, 0); 4455 hres = S_FALSE; 4456 } 4457 4458 if(!(*pbstrProperty)) 4459 return E_OUTOFMEMORY; 4460 4461 break; 4462 case Uri_PROPERTY_PATH: 4463 if(This->path_start > -1) { 4464 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len); 4465 hres = S_OK; 4466 } else { 4467 *pbstrProperty = SysAllocStringLen(NULL, 0); 4468 hres = S_FALSE; 4469 } 4470 4471 if(!(*pbstrProperty)) 4472 hres = E_OUTOFMEMORY; 4473 4474 break; 4475 case Uri_PROPERTY_PATH_AND_QUERY: 4476 if(This->path_start > -1) { 4477 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len+This->query_len); 4478 hres = S_OK; 4479 } else if(This->query_start > -1) { 4480 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len); 4481 hres = S_OK; 4482 } else { 4483 *pbstrProperty = SysAllocStringLen(NULL, 0); 4484 hres = S_FALSE; 4485 } 4486 4487 if(!(*pbstrProperty)) 4488 hres = E_OUTOFMEMORY; 4489 4490 break; 4491 case Uri_PROPERTY_QUERY: 4492 if(This->query_start > -1) { 4493 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len); 4494 hres = S_OK; 4495 } else { 4496 *pbstrProperty = SysAllocStringLen(NULL, 0); 4497 hres = S_FALSE; 4498 } 4499 4500 if(!(*pbstrProperty)) 4501 hres = E_OUTOFMEMORY; 4502 4503 break; 4504 case Uri_PROPERTY_RAW_URI: 4505 *pbstrProperty = SysAllocString(This->raw_uri); 4506 if(!(*pbstrProperty)) 4507 hres = E_OUTOFMEMORY; 4508 else 4509 hres = S_OK; 4510 break; 4511 case Uri_PROPERTY_SCHEME_NAME: 4512 if(This->scheme_start > -1) { 4513 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->scheme_start, This->scheme_len); 4514 hres = S_OK; 4515 } else { 4516 *pbstrProperty = SysAllocStringLen(NULL, 0); 4517 hres = S_FALSE; 4518 } 4519 4520 if(!(*pbstrProperty)) 4521 hres = E_OUTOFMEMORY; 4522 4523 break; 4524 case Uri_PROPERTY_USER_INFO: 4525 if(This->userinfo_start > -1) { 4526 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->userinfo_start, This->userinfo_len); 4527 hres = S_OK; 4528 } else { 4529 *pbstrProperty = SysAllocStringLen(NULL, 0); 4530 hres = S_FALSE; 4531 } 4532 4533 if(!(*pbstrProperty)) 4534 hres = E_OUTOFMEMORY; 4535 4536 break; 4537 case Uri_PROPERTY_USER_NAME: 4538 if(This->userinfo_start > -1 && This->userinfo_split != 0) { 4539 /* If userinfo_split is set, that means a password exists 4540 * so the username is only from userinfo_start to userinfo_split. 4541 */ 4542 if(This->userinfo_split > -1) { 4543 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_split); 4544 hres = S_OK; 4545 } else { 4546 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_len); 4547 hres = S_OK; 4548 } 4549 } else { 4550 *pbstrProperty = SysAllocStringLen(NULL, 0); 4551 hres = S_FALSE; 4552 } 4553 4554 if(!(*pbstrProperty)) 4555 return E_OUTOFMEMORY; 4556 4557 break; 4558 default: 4559 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags); 4560 hres = E_NOTIMPL; 4561 } 4562 4563 return hres; 4564 } 4565 4566 static HRESULT WINAPI Uri_GetPropertyLength(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags) 4567 { 4568 Uri *This = impl_from_IUri(iface); 4569 HRESULT hres; 4570 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pcchProperty, dwFlags); 4571 4572 if(!This->create_flags) 4573 return E_UNEXPECTED; 4574 if(!pcchProperty) 4575 return E_INVALIDARG; 4576 4577 /* Can only return a length for a property if it's a string. */ 4578 if(uriProp > Uri_PROPERTY_STRING_LAST) 4579 return E_INVALIDARG; 4580 4581 /* Don't have support for flags yet. */ 4582 if(dwFlags) { 4583 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 4584 return E_NOTIMPL; 4585 } 4586 4587 switch(uriProp) { 4588 case Uri_PROPERTY_ABSOLUTE_URI: 4589 if(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI) { 4590 *pcchProperty = 0; 4591 hres = S_FALSE; 4592 } else { 4593 if(This->scheme_type != URL_SCHEME_UNKNOWN) { 4594 if(This->userinfo_start > -1 && This->userinfo_len == 0) 4595 /* Don't include the '@' in the length. */ 4596 *pcchProperty = This->canon_len-1; 4597 else if(This->userinfo_start > -1 && This->userinfo_len == 1 && 4598 This->userinfo_split == 0) 4599 /* Don't include the ":@" in the length. */ 4600 *pcchProperty = This->canon_len-2; 4601 else 4602 *pcchProperty = This->canon_len; 4603 } else 4604 *pcchProperty = This->canon_len; 4605 4606 hres = S_OK; 4607 } 4608 4609 break; 4610 case Uri_PROPERTY_AUTHORITY: 4611 if(This->port_offset > -1 && 4612 This->display_modifiers & URI_DISPLAY_NO_DEFAULT_PORT_AUTH && 4613 is_default_port(This->scheme_type, This->port)) 4614 /* Only count up until the port in the authority. */ 4615 *pcchProperty = This->port_offset; 4616 else 4617 *pcchProperty = This->authority_len; 4618 hres = (This->authority_start > -1) ? S_OK : S_FALSE; 4619 break; 4620 case Uri_PROPERTY_DISPLAY_URI: 4621 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) 4622 *pcchProperty = This->canon_len-This->userinfo_len-1; 4623 else 4624 *pcchProperty = This->canon_len; 4625 4626 hres = S_OK; 4627 break; 4628 case Uri_PROPERTY_DOMAIN: 4629 if(This->domain_offset > -1) 4630 *pcchProperty = This->host_len - This->domain_offset; 4631 else 4632 *pcchProperty = 0; 4633 4634 hres = (This->domain_offset > -1) ? S_OK : S_FALSE; 4635 break; 4636 case Uri_PROPERTY_EXTENSION: 4637 if(This->extension_offset > -1) { 4638 *pcchProperty = This->path_len - This->extension_offset; 4639 hres = S_OK; 4640 } else { 4641 *pcchProperty = 0; 4642 hres = S_FALSE; 4643 } 4644 4645 break; 4646 case Uri_PROPERTY_FRAGMENT: 4647 *pcchProperty = This->fragment_len; 4648 hres = (This->fragment_start > -1) ? S_OK : S_FALSE; 4649 break; 4650 case Uri_PROPERTY_HOST: 4651 *pcchProperty = This->host_len; 4652 4653 /* '[' and ']' aren't included in the length. */ 4654 if(This->host_type == Uri_HOST_IPV6) 4655 *pcchProperty -= 2; 4656 4657 hres = (This->host_start > -1) ? S_OK : S_FALSE; 4658 break; 4659 case Uri_PROPERTY_PASSWORD: 4660 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_len-This->userinfo_split-1 : 0; 4661 hres = (This->userinfo_split > -1) ? S_OK : S_FALSE; 4662 break; 4663 case Uri_PROPERTY_PATH: 4664 *pcchProperty = This->path_len; 4665 hres = (This->path_start > -1) ? S_OK : S_FALSE; 4666 break; 4667 case Uri_PROPERTY_PATH_AND_QUERY: 4668 *pcchProperty = This->path_len+This->query_len; 4669 hres = (This->path_start > -1 || This->query_start > -1) ? S_OK : S_FALSE; 4670 break; 4671 case Uri_PROPERTY_QUERY: 4672 *pcchProperty = This->query_len; 4673 hres = (This->query_start > -1) ? S_OK : S_FALSE; 4674 break; 4675 case Uri_PROPERTY_RAW_URI: 4676 *pcchProperty = SysStringLen(This->raw_uri); 4677 hres = S_OK; 4678 break; 4679 case Uri_PROPERTY_SCHEME_NAME: 4680 *pcchProperty = This->scheme_len; 4681 hres = (This->scheme_start > -1) ? S_OK : S_FALSE; 4682 break; 4683 case Uri_PROPERTY_USER_INFO: 4684 *pcchProperty = This->userinfo_len; 4685 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE; 4686 break; 4687 case Uri_PROPERTY_USER_NAME: 4688 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_split : This->userinfo_len; 4689 if(This->userinfo_split == 0) 4690 hres = S_FALSE; 4691 else 4692 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE; 4693 break; 4694 default: 4695 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 4696 hres = E_NOTIMPL; 4697 } 4698 4699 return hres; 4700 } 4701 4702 static HRESULT WINAPI Uri_GetPropertyDWORD(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags) 4703 { 4704 Uri *This = impl_from_IUri(iface); 4705 HRESULT hres; 4706 4707 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pcchProperty, dwFlags); 4708 4709 if(!This->create_flags) 4710 return E_UNEXPECTED; 4711 if(!pcchProperty) 4712 return E_INVALIDARG; 4713 4714 /* Microsoft's implementation for the ZONE property of a URI seems to be lacking... 4715 * From what I can tell, instead of checking which URLZONE the URI belongs to it 4716 * simply assigns URLZONE_INVALID and returns E_NOTIMPL. This also applies to the GetZone 4717 * function. 4718 */ 4719 if(uriProp == Uri_PROPERTY_ZONE) { 4720 *pcchProperty = URLZONE_INVALID; 4721 return E_NOTIMPL; 4722 } 4723 4724 if(uriProp < Uri_PROPERTY_DWORD_START) { 4725 *pcchProperty = 0; 4726 return E_INVALIDARG; 4727 } 4728 4729 switch(uriProp) { 4730 case Uri_PROPERTY_HOST_TYPE: 4731 *pcchProperty = This->host_type; 4732 hres = S_OK; 4733 break; 4734 case Uri_PROPERTY_PORT: 4735 if(!This->has_port) { 4736 *pcchProperty = 0; 4737 hres = S_FALSE; 4738 } else { 4739 *pcchProperty = This->port; 4740 hres = S_OK; 4741 } 4742 4743 break; 4744 case Uri_PROPERTY_SCHEME: 4745 *pcchProperty = This->scheme_type; 4746 hres = S_OK; 4747 break; 4748 default: 4749 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 4750 hres = E_NOTIMPL; 4751 } 4752 4753 return hres; 4754 } 4755 4756 static HRESULT WINAPI Uri_HasProperty(IUri *iface, Uri_PROPERTY uriProp, BOOL *pfHasProperty) 4757 { 4758 Uri *This = impl_from_IUri(iface); 4759 4760 TRACE("(%p %s)->(%d %p)\n", This, debugstr_w(This->canon_uri), uriProp, pfHasProperty); 4761 4762 if(!pfHasProperty) 4763 return E_INVALIDARG; 4764 4765 switch(uriProp) { 4766 case Uri_PROPERTY_ABSOLUTE_URI: 4767 *pfHasProperty = !(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI); 4768 break; 4769 case Uri_PROPERTY_AUTHORITY: 4770 *pfHasProperty = This->authority_start > -1; 4771 break; 4772 case Uri_PROPERTY_DISPLAY_URI: 4773 *pfHasProperty = TRUE; 4774 break; 4775 case Uri_PROPERTY_DOMAIN: 4776 *pfHasProperty = This->domain_offset > -1; 4777 break; 4778 case Uri_PROPERTY_EXTENSION: 4779 *pfHasProperty = This->extension_offset > -1; 4780 break; 4781 case Uri_PROPERTY_FRAGMENT: 4782 *pfHasProperty = This->fragment_start > -1; 4783 break; 4784 case Uri_PROPERTY_HOST: 4785 *pfHasProperty = This->host_start > -1; 4786 break; 4787 case Uri_PROPERTY_PASSWORD: 4788 *pfHasProperty = This->userinfo_split > -1; 4789 break; 4790 case Uri_PROPERTY_PATH: 4791 *pfHasProperty = This->path_start > -1; 4792 break; 4793 case Uri_PROPERTY_PATH_AND_QUERY: 4794 *pfHasProperty = (This->path_start > -1 || This->query_start > -1); 4795 break; 4796 case Uri_PROPERTY_QUERY: 4797 *pfHasProperty = This->query_start > -1; 4798 break; 4799 case Uri_PROPERTY_RAW_URI: 4800 *pfHasProperty = TRUE; 4801 break; 4802 case Uri_PROPERTY_SCHEME_NAME: 4803 *pfHasProperty = This->scheme_start > -1; 4804 break; 4805 case Uri_PROPERTY_USER_INFO: 4806 *pfHasProperty = This->userinfo_start > -1; 4807 break; 4808 case Uri_PROPERTY_USER_NAME: 4809 if(This->userinfo_split == 0) 4810 *pfHasProperty = FALSE; 4811 else 4812 *pfHasProperty = This->userinfo_start > -1; 4813 break; 4814 case Uri_PROPERTY_HOST_TYPE: 4815 *pfHasProperty = TRUE; 4816 break; 4817 case Uri_PROPERTY_PORT: 4818 *pfHasProperty = This->has_port; 4819 break; 4820 case Uri_PROPERTY_SCHEME: 4821 *pfHasProperty = TRUE; 4822 break; 4823 case Uri_PROPERTY_ZONE: 4824 *pfHasProperty = FALSE; 4825 break; 4826 default: 4827 FIXME("(%p)->(%d %p): Unsupported property type.\n", This, uriProp, pfHasProperty); 4828 return E_NOTIMPL; 4829 } 4830 4831 return S_OK; 4832 } 4833 4834 static HRESULT WINAPI Uri_GetAbsoluteUri(IUri *iface, BSTR *pstrAbsoluteUri) 4835 { 4836 TRACE("(%p)->(%p)\n", iface, pstrAbsoluteUri); 4837 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_ABSOLUTE_URI, pstrAbsoluteUri, 0); 4838 } 4839 4840 static HRESULT WINAPI Uri_GetAuthority(IUri *iface, BSTR *pstrAuthority) 4841 { 4842 TRACE("(%p)->(%p)\n", iface, pstrAuthority); 4843 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_AUTHORITY, pstrAuthority, 0); 4844 } 4845 4846 static HRESULT WINAPI Uri_GetDisplayUri(IUri *iface, BSTR *pstrDisplayUri) 4847 { 4848 TRACE("(%p)->(%p)\n", iface, pstrDisplayUri); 4849 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_DISPLAY_URI, pstrDisplayUri, 0); 4850 } 4851 4852 static HRESULT WINAPI Uri_GetDomain(IUri *iface, BSTR *pstrDomain) 4853 { 4854 TRACE("(%p)->(%p)\n", iface, pstrDomain); 4855 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_DOMAIN, pstrDomain, 0); 4856 } 4857 4858 static HRESULT WINAPI Uri_GetExtension(IUri *iface, BSTR *pstrExtension) 4859 { 4860 TRACE("(%p)->(%p)\n", iface, pstrExtension); 4861 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_EXTENSION, pstrExtension, 0); 4862 } 4863 4864 static HRESULT WINAPI Uri_GetFragment(IUri *iface, BSTR *pstrFragment) 4865 { 4866 TRACE("(%p)->(%p)\n", iface, pstrFragment); 4867 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_FRAGMENT, pstrFragment, 0); 4868 } 4869 4870 static HRESULT WINAPI Uri_GetHost(IUri *iface, BSTR *pstrHost) 4871 { 4872 TRACE("(%p)->(%p)\n", iface, pstrHost); 4873 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_HOST, pstrHost, 0); 4874 } 4875 4876 static HRESULT WINAPI Uri_GetPassword(IUri *iface, BSTR *pstrPassword) 4877 { 4878 TRACE("(%p)->(%p)\n", iface, pstrPassword); 4879 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PASSWORD, pstrPassword, 0); 4880 } 4881 4882 static HRESULT WINAPI Uri_GetPath(IUri *iface, BSTR *pstrPath) 4883 { 4884 TRACE("(%p)->(%p)\n", iface, pstrPath); 4885 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH, pstrPath, 0); 4886 } 4887 4888 static HRESULT WINAPI Uri_GetPathAndQuery(IUri *iface, BSTR *pstrPathAndQuery) 4889 { 4890 TRACE("(%p)->(%p)\n", iface, pstrPathAndQuery); 4891 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH_AND_QUERY, pstrPathAndQuery, 0); 4892 } 4893 4894 static HRESULT WINAPI Uri_GetQuery(IUri *iface, BSTR *pstrQuery) 4895 { 4896 TRACE("(%p)->(%p)\n", iface, pstrQuery); 4897 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_QUERY, pstrQuery, 0); 4898 } 4899 4900 static HRESULT WINAPI Uri_GetRawUri(IUri *iface, BSTR *pstrRawUri) 4901 { 4902 TRACE("(%p)->(%p)\n", iface, pstrRawUri); 4903 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_RAW_URI, pstrRawUri, 0); 4904 } 4905 4906 static HRESULT WINAPI Uri_GetSchemeName(IUri *iface, BSTR *pstrSchemeName) 4907 { 4908 TRACE("(%p)->(%p)\n", iface, pstrSchemeName); 4909 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_SCHEME_NAME, pstrSchemeName, 0); 4910 } 4911 4912 static HRESULT WINAPI Uri_GetUserInfo(IUri *iface, BSTR *pstrUserInfo) 4913 { 4914 TRACE("(%p)->(%p)\n", iface, pstrUserInfo); 4915 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_INFO, pstrUserInfo, 0); 4916 } 4917 4918 static HRESULT WINAPI Uri_GetUserName(IUri *iface, BSTR *pstrUserName) 4919 { 4920 TRACE("(%p)->(%p)\n", iface, pstrUserName); 4921 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_NAME, pstrUserName, 0); 4922 } 4923 4924 static HRESULT WINAPI Uri_GetHostType(IUri *iface, DWORD *pdwHostType) 4925 { 4926 TRACE("(%p)->(%p)\n", iface, pdwHostType); 4927 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_HOST_TYPE, pdwHostType, 0); 4928 } 4929 4930 static HRESULT WINAPI Uri_GetPort(IUri *iface, DWORD *pdwPort) 4931 { 4932 TRACE("(%p)->(%p)\n", iface, pdwPort); 4933 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_PORT, pdwPort, 0); 4934 } 4935 4936 static HRESULT WINAPI Uri_GetScheme(IUri *iface, DWORD *pdwScheme) 4937 { 4938 TRACE("(%p)->(%p)\n", iface, pdwScheme); 4939 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_SCHEME, pdwScheme, 0); 4940 } 4941 4942 static HRESULT WINAPI Uri_GetZone(IUri *iface, DWORD *pdwZone) 4943 { 4944 TRACE("(%p)->(%p)\n", iface, pdwZone); 4945 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_ZONE,pdwZone, 0); 4946 } 4947 4948 static HRESULT WINAPI Uri_GetProperties(IUri *iface, DWORD *pdwProperties) 4949 { 4950 Uri *This = impl_from_IUri(iface); 4951 TRACE("(%p %s)->(%p)\n", This, debugstr_w(This->canon_uri), pdwProperties); 4952 4953 if(!This->create_flags) 4954 return E_UNEXPECTED; 4955 if(!pdwProperties) 4956 return E_INVALIDARG; 4957 4958 /* All URIs have these. */ 4959 *pdwProperties = Uri_HAS_DISPLAY_URI|Uri_HAS_RAW_URI|Uri_HAS_SCHEME|Uri_HAS_HOST_TYPE; 4960 4961 if(!(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI)) 4962 *pdwProperties |= Uri_HAS_ABSOLUTE_URI; 4963 4964 if(This->scheme_start > -1) 4965 *pdwProperties |= Uri_HAS_SCHEME_NAME; 4966 4967 if(This->authority_start > -1) { 4968 *pdwProperties |= Uri_HAS_AUTHORITY; 4969 if(This->userinfo_start > -1) { 4970 *pdwProperties |= Uri_HAS_USER_INFO; 4971 if(This->userinfo_split != 0) 4972 *pdwProperties |= Uri_HAS_USER_NAME; 4973 } 4974 if(This->userinfo_split > -1) 4975 *pdwProperties |= Uri_HAS_PASSWORD; 4976 if(This->host_start > -1) 4977 *pdwProperties |= Uri_HAS_HOST; 4978 if(This->domain_offset > -1) 4979 *pdwProperties |= Uri_HAS_DOMAIN; 4980 } 4981 4982 if(This->has_port) 4983 *pdwProperties |= Uri_HAS_PORT; 4984 if(This->path_start > -1) 4985 *pdwProperties |= Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY; 4986 if(This->query_start > -1) 4987 *pdwProperties |= Uri_HAS_QUERY|Uri_HAS_PATH_AND_QUERY; 4988 4989 if(This->extension_offset > -1) 4990 *pdwProperties |= Uri_HAS_EXTENSION; 4991 4992 if(This->fragment_start > -1) 4993 *pdwProperties |= Uri_HAS_FRAGMENT; 4994 4995 return S_OK; 4996 } 4997 4998 static HRESULT WINAPI Uri_IsEqual(IUri *iface, IUri *pUri, BOOL *pfEqual) 4999 { 5000 Uri *This = impl_from_IUri(iface); 5001 Uri *other; 5002 5003 TRACE("(%p %s)->(%p %p)\n", This, debugstr_w(This->canon_uri), pUri, pfEqual); 5004 5005 if(!This->create_flags) 5006 return E_UNEXPECTED; 5007 if(!pfEqual) 5008 return E_POINTER; 5009 5010 if(!pUri) { 5011 *pfEqual = FALSE; 5012 5013 /* For some reason Windows returns S_OK here... */ 5014 return S_OK; 5015 } 5016 5017 /* Try to convert it to a Uri (allows for a more simple comparison). */ 5018 if(!(other = get_uri_obj(pUri))) { 5019 FIXME("(%p)->(%p %p) No support for unknown IUri's yet.\n", iface, pUri, pfEqual); 5020 return E_NOTIMPL; 5021 } 5022 5023 TRACE("comparing to %s\n", debugstr_w(other->canon_uri)); 5024 return compare_uris(This, other, pfEqual); 5025 } 5026 5027 static const IUriVtbl UriVtbl = { 5028 Uri_QueryInterface, 5029 Uri_AddRef, 5030 Uri_Release, 5031 Uri_GetPropertyBSTR, 5032 Uri_GetPropertyLength, 5033 Uri_GetPropertyDWORD, 5034 Uri_HasProperty, 5035 Uri_GetAbsoluteUri, 5036 Uri_GetAuthority, 5037 Uri_GetDisplayUri, 5038 Uri_GetDomain, 5039 Uri_GetExtension, 5040 Uri_GetFragment, 5041 Uri_GetHost, 5042 Uri_GetPassword, 5043 Uri_GetPath, 5044 Uri_GetPathAndQuery, 5045 Uri_GetQuery, 5046 Uri_GetRawUri, 5047 Uri_GetSchemeName, 5048 Uri_GetUserInfo, 5049 Uri_GetUserName, 5050 Uri_GetHostType, 5051 Uri_GetPort, 5052 Uri_GetScheme, 5053 Uri_GetZone, 5054 Uri_GetProperties, 5055 Uri_IsEqual 5056 }; 5057 5058 static inline Uri* impl_from_IUriBuilderFactory(IUriBuilderFactory *iface) 5059 { 5060 return CONTAINING_RECORD(iface, Uri, IUriBuilderFactory_iface); 5061 } 5062 5063 static HRESULT WINAPI UriBuilderFactory_QueryInterface(IUriBuilderFactory *iface, REFIID riid, void **ppv) 5064 { 5065 Uri *This = impl_from_IUriBuilderFactory(iface); 5066 return IUri_QueryInterface(&This->IUri_iface, riid, ppv); 5067 } 5068 5069 static ULONG WINAPI UriBuilderFactory_AddRef(IUriBuilderFactory *iface) 5070 { 5071 Uri *This = impl_from_IUriBuilderFactory(iface); 5072 return IUri_AddRef(&This->IUri_iface); 5073 } 5074 5075 static ULONG WINAPI UriBuilderFactory_Release(IUriBuilderFactory *iface) 5076 { 5077 Uri *This = impl_from_IUriBuilderFactory(iface); 5078 return IUri_Release(&This->IUri_iface); 5079 } 5080 5081 static HRESULT WINAPI UriBuilderFactory_CreateIUriBuilder(IUriBuilderFactory *iface, 5082 DWORD dwFlags, 5083 DWORD_PTR dwReserved, 5084 IUriBuilder **ppIUriBuilder) 5085 { 5086 Uri *This = impl_from_IUriBuilderFactory(iface); 5087 TRACE("(%p)->(%08x %08x %p)\n", This, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 5088 5089 if(!ppIUriBuilder) 5090 return E_POINTER; 5091 5092 if(dwFlags || dwReserved) { 5093 *ppIUriBuilder = NULL; 5094 return E_INVALIDARG; 5095 } 5096 5097 return CreateIUriBuilder(NULL, 0, 0, ppIUriBuilder); 5098 } 5099 5100 static HRESULT WINAPI UriBuilderFactory_CreateInitializedIUriBuilder(IUriBuilderFactory *iface, 5101 DWORD dwFlags, 5102 DWORD_PTR dwReserved, 5103 IUriBuilder **ppIUriBuilder) 5104 { 5105 Uri *This = impl_from_IUriBuilderFactory(iface); 5106 TRACE("(%p)->(%08x %08x %p)\n", This, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 5107 5108 if(!ppIUriBuilder) 5109 return E_POINTER; 5110 5111 if(dwFlags || dwReserved) { 5112 *ppIUriBuilder = NULL; 5113 return E_INVALIDARG; 5114 } 5115 5116 return CreateIUriBuilder(&This->IUri_iface, 0, 0, ppIUriBuilder); 5117 } 5118 5119 static const IUriBuilderFactoryVtbl UriBuilderFactoryVtbl = { 5120 UriBuilderFactory_QueryInterface, 5121 UriBuilderFactory_AddRef, 5122 UriBuilderFactory_Release, 5123 UriBuilderFactory_CreateIUriBuilder, 5124 UriBuilderFactory_CreateInitializedIUriBuilder 5125 }; 5126 5127 static inline Uri* impl_from_IPersistStream(IPersistStream *iface) 5128 { 5129 return CONTAINING_RECORD(iface, Uri, IPersistStream_iface); 5130 } 5131 5132 static HRESULT WINAPI PersistStream_QueryInterface(IPersistStream *iface, REFIID riid, void **ppvObject) 5133 { 5134 Uri *This = impl_from_IPersistStream(iface); 5135 return IUri_QueryInterface(&This->IUri_iface, riid, ppvObject); 5136 } 5137 5138 static ULONG WINAPI PersistStream_AddRef(IPersistStream *iface) 5139 { 5140 Uri *This = impl_from_IPersistStream(iface); 5141 return IUri_AddRef(&This->IUri_iface); 5142 } 5143 5144 static ULONG WINAPI PersistStream_Release(IPersistStream *iface) 5145 { 5146 Uri *This = impl_from_IPersistStream(iface); 5147 return IUri_Release(&This->IUri_iface); 5148 } 5149 5150 static HRESULT WINAPI PersistStream_GetClassID(IPersistStream *iface, CLSID *pClassID) 5151 { 5152 Uri *This = impl_from_IPersistStream(iface); 5153 TRACE("(%p)->(%p)\n", This, pClassID); 5154 5155 if(!pClassID) 5156 return E_INVALIDARG; 5157 5158 *pClassID = CLSID_CUri; 5159 return S_OK; 5160 } 5161 5162 static HRESULT WINAPI PersistStream_IsDirty(IPersistStream *iface) 5163 { 5164 Uri *This = impl_from_IPersistStream(iface); 5165 TRACE("(%p)\n", This); 5166 return S_FALSE; 5167 } 5168 5169 struct persist_uri { 5170 DWORD size; 5171 DWORD unk1[2]; 5172 DWORD create_flags; 5173 DWORD unk2[3]; 5174 DWORD fields_no; 5175 BYTE data[1]; 5176 }; 5177 5178 static HRESULT WINAPI PersistStream_Load(IPersistStream *iface, IStream *pStm) 5179 { 5180 Uri *This = impl_from_IPersistStream(iface); 5181 struct persist_uri *data; 5182 parse_data parse; 5183 DWORD size; 5184 HRESULT hr; 5185 5186 TRACE("(%p)->(%p)\n", This, pStm); 5187 5188 if(This->create_flags) 5189 return E_UNEXPECTED; 5190 if(!pStm) 5191 return E_INVALIDARG; 5192 5193 hr = IStream_Read(pStm, &size, sizeof(DWORD), NULL); 5194 if(FAILED(hr)) 5195 return hr; 5196 data = heap_alloc(size); 5197 if(!data) 5198 return E_OUTOFMEMORY; 5199 hr = IStream_Read(pStm, data->unk1, size-sizeof(DWORD)-2, NULL); 5200 if(FAILED(hr)) { 5201 heap_free(data); 5202 return hr; 5203 } 5204 5205 if(size < sizeof(struct persist_uri)) { 5206 heap_free(data); 5207 return S_OK; 5208 } 5209 5210 if(*(DWORD*)data->data != Uri_PROPERTY_RAW_URI) { 5211 heap_free(data); 5212 ERR("Can't find raw_uri\n"); 5213 return E_UNEXPECTED; 5214 } 5215 5216 This->raw_uri = SysAllocString((WCHAR*)(data->data+sizeof(DWORD)*2)); 5217 if(!This->raw_uri) { 5218 heap_free(data); 5219 return E_OUTOFMEMORY; 5220 } 5221 This->create_flags = data->create_flags; 5222 heap_free(data); 5223 TRACE("%x %s\n", This->create_flags, debugstr_w(This->raw_uri)); 5224 5225 memset(&parse, 0, sizeof(parse_data)); 5226 parse.uri = This->raw_uri; 5227 if(!parse_uri(&parse, This->create_flags)) { 5228 SysFreeString(This->raw_uri); 5229 This->create_flags = 0; 5230 return E_UNEXPECTED; 5231 } 5232 5233 hr = canonicalize_uri(&parse, This, This->create_flags); 5234 if(FAILED(hr)) { 5235 SysFreeString(This->raw_uri); 5236 This->create_flags = 0; 5237 return hr; 5238 } 5239 5240 return S_OK; 5241 } 5242 5243 static inline BYTE* persist_stream_add_strprop(Uri *This, BYTE *p, DWORD type, DWORD len, WCHAR *data) 5244 { 5245 len *= sizeof(WCHAR); 5246 *(DWORD*)p = type; 5247 p += sizeof(DWORD); 5248 *(DWORD*)p = len+sizeof(WCHAR); 5249 p += sizeof(DWORD); 5250 memcpy(p, data, len); 5251 p += len; 5252 *(WCHAR*)p = 0; 5253 return p+sizeof(WCHAR); 5254 } 5255 5256 static inline void persist_stream_save(Uri *This, IStream *pStm, BOOL marshal, struct persist_uri *data) 5257 { 5258 BYTE *p = NULL; 5259 5260 data->create_flags = This->create_flags; 5261 5262 if(This->create_flags) { 5263 data->fields_no = 1; 5264 p = persist_stream_add_strprop(This, data->data, Uri_PROPERTY_RAW_URI, 5265 SysStringLen(This->raw_uri), This->raw_uri); 5266 } 5267 if(This->scheme_type!=URL_SCHEME_HTTP && This->scheme_type!=URL_SCHEME_HTTPS 5268 && This->scheme_type!=URL_SCHEME_FTP) 5269 return; 5270 5271 if(This->fragment_len) { 5272 data->fields_no++; 5273 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_FRAGMENT, 5274 This->fragment_len, This->canon_uri+This->fragment_start); 5275 } 5276 5277 if(This->host_len) { 5278 data->fields_no++; 5279 if(This->host_type == Uri_HOST_IPV6) 5280 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_HOST, 5281 This->host_len-2, This->canon_uri+This->host_start+1); 5282 else 5283 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_HOST, 5284 This->host_len, This->canon_uri+This->host_start); 5285 } 5286 5287 if(This->userinfo_split > -1) { 5288 data->fields_no++; 5289 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PASSWORD, 5290 This->userinfo_len-This->userinfo_split-1, 5291 This->canon_uri+This->userinfo_start+This->userinfo_split+1); 5292 } 5293 5294 if(This->path_len) { 5295 data->fields_no++; 5296 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PATH, 5297 This->path_len, This->canon_uri+This->path_start); 5298 } else if(marshal) { 5299 WCHAR no_path = '/'; 5300 data->fields_no++; 5301 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PATH, 1, &no_path); 5302 } 5303 5304 if(This->has_port) { 5305 data->fields_no++; 5306 *(DWORD*)p = Uri_PROPERTY_PORT; 5307 p += sizeof(DWORD); 5308 *(DWORD*)p = sizeof(DWORD); 5309 p += sizeof(DWORD); 5310 *(DWORD*)p = This->port; 5311 p += sizeof(DWORD); 5312 } 5313 5314 if(This->query_len) { 5315 data->fields_no++; 5316 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_QUERY, 5317 This->query_len, This->canon_uri+This->query_start); 5318 } 5319 5320 if(This->scheme_len) { 5321 data->fields_no++; 5322 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_SCHEME_NAME, 5323 This->scheme_len, This->canon_uri+This->scheme_start); 5324 } 5325 5326 if(This->userinfo_start>-1 && This->userinfo_split!=0) { 5327 data->fields_no++; 5328 if(This->userinfo_split > -1) 5329 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_USER_NAME, 5330 This->userinfo_split, This->canon_uri+This->userinfo_start); 5331 else 5332 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_USER_NAME, 5333 This->userinfo_len, This->canon_uri+This->userinfo_start); 5334 } 5335 } 5336 5337 static HRESULT WINAPI PersistStream_Save(IPersistStream *iface, IStream *pStm, BOOL fClearDirty) 5338 { 5339 Uri *This = impl_from_IPersistStream(iface); 5340 struct persist_uri *data; 5341 ULARGE_INTEGER size; 5342 HRESULT hres; 5343 5344 TRACE("(%p)->(%p %x)\n", This, pStm, fClearDirty); 5345 5346 if(!pStm) 5347 return E_INVALIDARG; 5348 5349 hres = IPersistStream_GetSizeMax(&This->IPersistStream_iface, &size); 5350 if(FAILED(hres)) 5351 return hres; 5352 5353 data = heap_alloc_zero(size.u.LowPart); 5354 if(!data) 5355 return E_OUTOFMEMORY; 5356 data->size = size.u.LowPart; 5357 persist_stream_save(This, pStm, FALSE, data); 5358 5359 hres = IStream_Write(pStm, data, data->size-2, NULL); 5360 heap_free(data); 5361 return hres; 5362 } 5363 5364 static HRESULT WINAPI PersistStream_GetSizeMax(IPersistStream *iface, ULARGE_INTEGER *pcbSize) 5365 { 5366 Uri *This = impl_from_IPersistStream(iface); 5367 TRACE("(%p)->(%p)\n", This, pcbSize); 5368 5369 if(!pcbSize) 5370 return E_INVALIDARG; 5371 5372 pcbSize->u.LowPart = 2+sizeof(struct persist_uri); 5373 pcbSize->u.HighPart = 0; 5374 if(This->create_flags) 5375 pcbSize->u.LowPart += (SysStringLen(This->raw_uri)+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5376 else /* there's no place for fields no */ 5377 pcbSize->u.LowPart -= sizeof(DWORD); 5378 if(This->scheme_type!=URL_SCHEME_HTTP && This->scheme_type!=URL_SCHEME_HTTPS 5379 && This->scheme_type!=URL_SCHEME_FTP) 5380 return S_OK; 5381 5382 if(This->fragment_len) 5383 pcbSize->u.LowPart += (This->fragment_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5384 if(This->host_len) { 5385 if(This->host_type == Uri_HOST_IPV6) 5386 pcbSize->u.LowPart += (This->host_len-1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5387 else 5388 pcbSize->u.LowPart += (This->host_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5389 } 5390 if(This->userinfo_split > -1) 5391 pcbSize->u.LowPart += (This->userinfo_len-This->userinfo_split)*sizeof(WCHAR) + 2*sizeof(DWORD); 5392 if(This->path_len) 5393 pcbSize->u.LowPart += (This->path_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5394 if(This->has_port) 5395 pcbSize->u.LowPart += 3*sizeof(DWORD); 5396 if(This->query_len) 5397 pcbSize->u.LowPart += (This->query_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5398 if(This->scheme_len) 5399 pcbSize->u.LowPart += (This->scheme_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5400 if(This->userinfo_start>-1 && This->userinfo_split!=0) { 5401 if(This->userinfo_split > -1) 5402 pcbSize->u.LowPart += (This->userinfo_split+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5403 else 5404 pcbSize->u.LowPart += (This->userinfo_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5405 } 5406 return S_OK; 5407 } 5408 5409 static const IPersistStreamVtbl PersistStreamVtbl = { 5410 PersistStream_QueryInterface, 5411 PersistStream_AddRef, 5412 PersistStream_Release, 5413 PersistStream_GetClassID, 5414 PersistStream_IsDirty, 5415 PersistStream_Load, 5416 PersistStream_Save, 5417 PersistStream_GetSizeMax 5418 }; 5419 5420 static inline Uri* impl_from_IMarshal(IMarshal *iface) 5421 { 5422 return CONTAINING_RECORD(iface, Uri, IMarshal_iface); 5423 } 5424 5425 static HRESULT WINAPI Marshal_QueryInterface(IMarshal *iface, REFIID riid, void **ppvObject) 5426 { 5427 Uri *This = impl_from_IMarshal(iface); 5428 return IUri_QueryInterface(&This->IUri_iface, riid, ppvObject); 5429 } 5430 5431 static ULONG WINAPI Marshal_AddRef(IMarshal *iface) 5432 { 5433 Uri *This = impl_from_IMarshal(iface); 5434 return IUri_AddRef(&This->IUri_iface); 5435 } 5436 5437 static ULONG WINAPI Marshal_Release(IMarshal *iface) 5438 { 5439 Uri *This = impl_from_IMarshal(iface); 5440 return IUri_Release(&This->IUri_iface); 5441 } 5442 5443 static HRESULT WINAPI Marshal_GetUnmarshalClass(IMarshal *iface, REFIID riid, void *pv, 5444 DWORD dwDestContext, void *pvDestContext, DWORD mshlflags, CLSID *pCid) 5445 { 5446 Uri *This = impl_from_IMarshal(iface); 5447 TRACE("(%p)->(%s %p %x %p %x %p)\n", This, debugstr_guid(riid), pv, 5448 dwDestContext, pvDestContext, mshlflags, pCid); 5449 5450 if(!pCid || (dwDestContext!=MSHCTX_LOCAL && dwDestContext!=MSHCTX_NOSHAREDMEM 5451 && dwDestContext!=MSHCTX_INPROC)) 5452 return E_INVALIDARG; 5453 5454 *pCid = CLSID_CUri; 5455 return S_OK; 5456 } 5457 5458 struct inproc_marshal_uri { 5459 DWORD size; 5460 DWORD mshlflags; 5461 DWORD unk[4]; /* process identifier? */ 5462 Uri *uri; 5463 }; 5464 5465 static HRESULT WINAPI Marshal_GetMarshalSizeMax(IMarshal *iface, REFIID riid, void *pv, 5466 DWORD dwDestContext, void *pvDestContext, DWORD mshlflags, DWORD *pSize) 5467 { 5468 Uri *This = impl_from_IMarshal(iface); 5469 ULARGE_INTEGER size; 5470 HRESULT hres; 5471 TRACE("(%p)->(%s %p %x %p %x %p)\n", This, debugstr_guid(riid), pv, 5472 dwDestContext, pvDestContext, mshlflags, pSize); 5473 5474 if(!pSize || (dwDestContext!=MSHCTX_LOCAL && dwDestContext!=MSHCTX_NOSHAREDMEM 5475 && dwDestContext!=MSHCTX_INPROC)) 5476 return E_INVALIDARG; 5477 5478 if(dwDestContext == MSHCTX_INPROC) { 5479 *pSize = sizeof(struct inproc_marshal_uri); 5480 return S_OK; 5481 } 5482 5483 hres = IPersistStream_GetSizeMax(&This->IPersistStream_iface, &size); 5484 if(FAILED(hres)) 5485 return hres; 5486 if(!This->path_len && (This->scheme_type==URL_SCHEME_HTTP 5487 || This->scheme_type==URL_SCHEME_HTTPS 5488 || This->scheme_type==URL_SCHEME_FTP)) 5489 size.u.LowPart += 3*sizeof(DWORD); 5490 *pSize = size.u.LowPart+2*sizeof(DWORD); 5491 return S_OK; 5492 } 5493 5494 static HRESULT WINAPI Marshal_MarshalInterface(IMarshal *iface, IStream *pStm, REFIID riid, 5495 void *pv, DWORD dwDestContext, void *pvDestContext, DWORD mshlflags) 5496 { 5497 Uri *This = impl_from_IMarshal(iface); 5498 DWORD *data; 5499 DWORD size; 5500 HRESULT hres; 5501 5502 TRACE("(%p)->(%p %s %p %x %p %x)\n", This, pStm, debugstr_guid(riid), pv, 5503 dwDestContext, pvDestContext, mshlflags); 5504 5505 if(!pStm || mshlflags!=MSHLFLAGS_NORMAL || (dwDestContext!=MSHCTX_LOCAL 5506 && dwDestContext!=MSHCTX_NOSHAREDMEM && dwDestContext!=MSHCTX_INPROC)) 5507 return E_INVALIDARG; 5508 5509 if(dwDestContext == MSHCTX_INPROC) { 5510 struct inproc_marshal_uri data; 5511 5512 data.size = sizeof(data); 5513 data.mshlflags = MSHCTX_INPROC; 5514 data.unk[0] = 0; 5515 data.unk[1] = 0; 5516 data.unk[2] = 0; 5517 data.unk[3] = 0; 5518 data.uri = This; 5519 5520 hres = IStream_Write(pStm, &data, data.size, NULL); 5521 if(FAILED(hres)) 5522 return hres; 5523 5524 IUri_AddRef(&This->IUri_iface); 5525 return S_OK; 5526 } 5527 5528 hres = IMarshal_GetMarshalSizeMax(iface, riid, pv, dwDestContext, 5529 pvDestContext, mshlflags, &size); 5530 if(FAILED(hres)) 5531 return hres; 5532 5533 data = heap_alloc_zero(size); 5534 if(!data) 5535 return E_OUTOFMEMORY; 5536 5537 data[0] = size; 5538 data[1] = dwDestContext; 5539 data[2] = size-2*sizeof(DWORD); 5540 persist_stream_save(This, pStm, TRUE, (struct persist_uri*)(data+2)); 5541 5542 hres = IStream_Write(pStm, data, data[0]-2, NULL); 5543 heap_free(data); 5544 return hres; 5545 } 5546 5547 static HRESULT WINAPI Marshal_UnmarshalInterface(IMarshal *iface, 5548 IStream *pStm, REFIID riid, void **ppv) 5549 { 5550 Uri *This = impl_from_IMarshal(iface); 5551 DWORD header[2]; 5552 HRESULT hres; 5553 5554 TRACE("(%p)->(%p %s %p)\n", This, pStm, debugstr_guid(riid), ppv); 5555 5556 if(This->create_flags) 5557 return E_UNEXPECTED; 5558 if(!pStm || !riid || !ppv) 5559 return E_INVALIDARG; 5560 5561 hres = IStream_Read(pStm, header, sizeof(header), NULL); 5562 if(FAILED(hres)) 5563 return hres; 5564 5565 if(header[1]!=MSHCTX_LOCAL && header[1]!=MSHCTX_NOSHAREDMEM 5566 && header[1]!=MSHCTX_INPROC) 5567 return E_UNEXPECTED; 5568 5569 if(header[1] == MSHCTX_INPROC) { 5570 struct inproc_marshal_uri data; 5571 parse_data parse; 5572 5573 hres = IStream_Read(pStm, data.unk, sizeof(data)-2*sizeof(DWORD), NULL); 5574 if(FAILED(hres)) 5575 return hres; 5576 5577 This->raw_uri = SysAllocString(data.uri->raw_uri); 5578 if(!This->raw_uri) { 5579 return E_OUTOFMEMORY; 5580 } 5581 5582 memset(&parse, 0, sizeof(parse_data)); 5583 parse.uri = This->raw_uri; 5584 5585 if(!parse_uri(&parse, data.uri->create_flags)) 5586 return E_INVALIDARG; 5587 5588 hres = canonicalize_uri(&parse, This, data.uri->create_flags); 5589 if(FAILED(hres)) 5590 return hres; 5591 5592 This->create_flags = data.uri->create_flags; 5593 IUri_Release(&data.uri->IUri_iface); 5594 5595 return IUri_QueryInterface(&This->IUri_iface, riid, ppv); 5596 } 5597 5598 hres = IPersistStream_Load(&This->IPersistStream_iface, pStm); 5599 if(FAILED(hres)) 5600 return hres; 5601 5602 return IUri_QueryInterface(&This->IUri_iface, riid, ppv); 5603 } 5604 5605 static HRESULT WINAPI Marshal_ReleaseMarshalData(IMarshal *iface, IStream *pStm) 5606 { 5607 Uri *This = impl_from_IMarshal(iface); 5608 LARGE_INTEGER off; 5609 DWORD header[2]; 5610 HRESULT hres; 5611 5612 TRACE("(%p)->(%p)\n", This, pStm); 5613 5614 if(!pStm) 5615 return E_INVALIDARG; 5616 5617 hres = IStream_Read(pStm, header, 2*sizeof(DWORD), NULL); 5618 if(FAILED(hres)) 5619 return hres; 5620 5621 if(header[1] == MSHCTX_INPROC) { 5622 struct inproc_marshal_uri data; 5623 5624 hres = IStream_Read(pStm, data.unk, sizeof(data)-2*sizeof(DWORD), NULL); 5625 if(FAILED(hres)) 5626 return hres; 5627 5628 IUri_Release(&data.uri->IUri_iface); 5629 return S_OK; 5630 } 5631 5632 off.u.LowPart = header[0]-sizeof(header)-2; 5633 off.u.HighPart = 0; 5634 return IStream_Seek(pStm, off, STREAM_SEEK_CUR, NULL); 5635 } 5636 5637 static HRESULT WINAPI Marshal_DisconnectObject(IMarshal *iface, DWORD dwReserved) 5638 { 5639 Uri *This = impl_from_IMarshal(iface); 5640 TRACE("(%p)->(%x)\n", This, dwReserved); 5641 return S_OK; 5642 } 5643 5644 static const IMarshalVtbl MarshalVtbl = { 5645 Marshal_QueryInterface, 5646 Marshal_AddRef, 5647 Marshal_Release, 5648 Marshal_GetUnmarshalClass, 5649 Marshal_GetMarshalSizeMax, 5650 Marshal_MarshalInterface, 5651 Marshal_UnmarshalInterface, 5652 Marshal_ReleaseMarshalData, 5653 Marshal_DisconnectObject 5654 }; 5655 5656 HRESULT Uri_Construct(IUnknown *pUnkOuter, LPVOID *ppobj) 5657 { 5658 Uri *ret = heap_alloc_zero(sizeof(Uri)); 5659 5660 TRACE("(%p %p)\n", pUnkOuter, ppobj); 5661 5662 *ppobj = ret; 5663 if(!ret) 5664 return E_OUTOFMEMORY; 5665 5666 ret->IUri_iface.lpVtbl = &UriVtbl; 5667 ret->IUriBuilderFactory_iface.lpVtbl = &UriBuilderFactoryVtbl; 5668 ret->IPersistStream_iface.lpVtbl = &PersistStreamVtbl; 5669 ret->IMarshal_iface.lpVtbl = &MarshalVtbl; 5670 ret->ref = 1; 5671 5672 *ppobj = &ret->IUri_iface; 5673 return S_OK; 5674 } 5675 5676 /*********************************************************************** 5677 * CreateUri (urlmon.@) 5678 * 5679 * Creates a new IUri object using the URI represented by pwzURI. This function 5680 * parses and validates the components of pwzURI and then canonicalizes the 5681 * parsed components. 5682 * 5683 * PARAMS 5684 * pwzURI [I] The URI to parse, validate, and canonicalize. 5685 * dwFlags [I] Flags which can affect how the parsing/canonicalization is performed. 5686 * dwReserved [I] Reserved (not used). 5687 * ppURI [O] The resulting IUri after parsing/canonicalization occurs. 5688 * 5689 * RETURNS 5690 * Success: Returns S_OK. ppURI contains the pointer to the newly allocated IUri. 5691 * Failure: E_INVALIDARG if there are invalid flag combinations in dwFlags, or an 5692 * invalid parameter, or pwzURI doesn't represent a valid URI. 5693 * E_OUTOFMEMORY if any memory allocation fails. 5694 * 5695 * NOTES 5696 * Default flags: 5697 * Uri_CREATE_CANONICALIZE, Uri_CREATE_DECODE_EXTRA_INFO, Uri_CREATE_CRACK_UNKNOWN_SCHEMES, 5698 * Uri_CREATE_PRE_PROCESS_HTML_URI, Uri_CREATE_NO_IE_SETTINGS. 5699 */ 5700 HRESULT WINAPI CreateUri(LPCWSTR pwzURI, DWORD dwFlags, DWORD_PTR dwReserved, IUri **ppURI) 5701 { 5702 const DWORD supported_flags = Uri_CREATE_ALLOW_RELATIVE|Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME| 5703 Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME|Uri_CREATE_NO_CANONICALIZE|Uri_CREATE_CANONICALIZE| 5704 Uri_CREATE_DECODE_EXTRA_INFO|Uri_CREATE_NO_DECODE_EXTRA_INFO|Uri_CREATE_CRACK_UNKNOWN_SCHEMES| 5705 Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES|Uri_CREATE_PRE_PROCESS_HTML_URI|Uri_CREATE_NO_PRE_PROCESS_HTML_URI| 5706 Uri_CREATE_NO_IE_SETTINGS|Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS|Uri_CREATE_FILE_USE_DOS_PATH; 5707 Uri *ret; 5708 HRESULT hr; 5709 parse_data data; 5710 5711 TRACE("(%s %x %x %p)\n", debugstr_w(pwzURI), dwFlags, (DWORD)dwReserved, ppURI); 5712 5713 if(!ppURI) 5714 return E_INVALIDARG; 5715 5716 if(!pwzURI) { 5717 *ppURI = NULL; 5718 return E_INVALIDARG; 5719 } 5720 5721 /* Check for invalid flags. */ 5722 if(has_invalid_flag_combination(dwFlags)) { 5723 *ppURI = NULL; 5724 return E_INVALIDARG; 5725 } 5726 5727 /* Currently unsupported. */ 5728 if(dwFlags & ~supported_flags) 5729 FIXME("Ignoring unsupported flag(s) %x\n", dwFlags & ~supported_flags); 5730 5731 hr = Uri_Construct(NULL, (void**)&ret); 5732 if(FAILED(hr)) { 5733 *ppURI = NULL; 5734 return hr; 5735 } 5736 5737 /* Explicitly set the default flags if it doesn't cause a flag conflict. */ 5738 apply_default_flags(&dwFlags); 5739 5740 /* Pre process the URI, unless told otherwise. */ 5741 if(!(dwFlags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI)) 5742 ret->raw_uri = pre_process_uri(pwzURI); 5743 else 5744 ret->raw_uri = SysAllocString(pwzURI); 5745 5746 if(!ret->raw_uri) { 5747 heap_free(ret); 5748 return E_OUTOFMEMORY; 5749 } 5750 5751 memset(&data, 0, sizeof(parse_data)); 5752 data.uri = ret->raw_uri; 5753 5754 /* Validate and parse the URI into its components. */ 5755 if(!parse_uri(&data, dwFlags)) { 5756 /* Encountered an unsupported or invalid URI */ 5757 IUri_Release(&ret->IUri_iface); 5758 *ppURI = NULL; 5759 return E_INVALIDARG; 5760 } 5761 5762 /* Canonicalize the URI. */ 5763 hr = canonicalize_uri(&data, ret, dwFlags); 5764 if(FAILED(hr)) { 5765 IUri_Release(&ret->IUri_iface); 5766 *ppURI = NULL; 5767 return hr; 5768 } 5769 5770 ret->create_flags = dwFlags; 5771 5772 *ppURI = &ret->IUri_iface; 5773 return S_OK; 5774 } 5775 5776 /*********************************************************************** 5777 * CreateUriWithFragment (urlmon.@) 5778 * 5779 * Creates a new IUri object. This is almost the same as CreateUri, expect that 5780 * it allows you to explicitly specify a fragment (pwzFragment) for pwzURI. 5781 * 5782 * PARAMS 5783 * pwzURI [I] The URI to parse and perform canonicalization on. 5784 * pwzFragment [I] The explicit fragment string which should be added to pwzURI. 5785 * dwFlags [I] The flags which will be passed to CreateUri. 5786 * dwReserved [I] Reserved (not used). 5787 * ppURI [O] The resulting IUri after parsing/canonicalization. 5788 * 5789 * RETURNS 5790 * Success: S_OK. ppURI contains the pointer to the newly allocated IUri. 5791 * Failure: E_INVALIDARG if pwzURI already contains a fragment and pwzFragment 5792 * isn't NULL. Will also return E_INVALIDARG for the same reasons as 5793 * CreateUri will. E_OUTOFMEMORY if any allocation fails. 5794 */ 5795 HRESULT WINAPI CreateUriWithFragment(LPCWSTR pwzURI, LPCWSTR pwzFragment, DWORD dwFlags, 5796 DWORD_PTR dwReserved, IUri **ppURI) 5797 { 5798 HRESULT hres; 5799 TRACE("(%s %s %x %x %p)\n", debugstr_w(pwzURI), debugstr_w(pwzFragment), dwFlags, (DWORD)dwReserved, ppURI); 5800 5801 if(!ppURI) 5802 return E_INVALIDARG; 5803 5804 if(!pwzURI) { 5805 *ppURI = NULL; 5806 return E_INVALIDARG; 5807 } 5808 5809 /* Check if a fragment should be appended to the URI string. */ 5810 if(pwzFragment) { 5811 WCHAR *uriW; 5812 DWORD uri_len, frag_len; 5813 BOOL add_pound; 5814 5815 /* Check if the original URI already has a fragment component. */ 5816 if(StrChrW(pwzURI, '#')) { 5817 *ppURI = NULL; 5818 return E_INVALIDARG; 5819 } 5820 5821 uri_len = lstrlenW(pwzURI); 5822 frag_len = lstrlenW(pwzFragment); 5823 5824 /* If the fragment doesn't start with a '#', one will be added. */ 5825 add_pound = *pwzFragment != '#'; 5826 5827 if(add_pound) 5828 uriW = heap_alloc((uri_len+frag_len+2)*sizeof(WCHAR)); 5829 else 5830 uriW = heap_alloc((uri_len+frag_len+1)*sizeof(WCHAR)); 5831 5832 if(!uriW) 5833 return E_OUTOFMEMORY; 5834 5835 memcpy(uriW, pwzURI, uri_len*sizeof(WCHAR)); 5836 if(add_pound) 5837 uriW[uri_len++] = '#'; 5838 memcpy(uriW+uri_len, pwzFragment, (frag_len+1)*sizeof(WCHAR)); 5839 5840 hres = CreateUri(uriW, dwFlags, 0, ppURI); 5841 5842 heap_free(uriW); 5843 } else 5844 /* A fragment string wasn't specified, so just forward the call. */ 5845 hres = CreateUri(pwzURI, dwFlags, 0, ppURI); 5846 5847 return hres; 5848 } 5849 5850 static HRESULT build_uri(const UriBuilder *builder, IUri **uri, DWORD create_flags, 5851 DWORD use_orig_flags, DWORD encoding_mask) 5852 { 5853 HRESULT hr; 5854 parse_data data; 5855 Uri *ret; 5856 5857 if(!uri) 5858 return E_POINTER; 5859 5860 if(encoding_mask && (!builder->uri || builder->modified_props)) { 5861 *uri = NULL; 5862 return E_NOTIMPL; 5863 } 5864 5865 /* Decide what flags should be used when creating the Uri. */ 5866 if((use_orig_flags & UriBuilder_USE_ORIGINAL_FLAGS) && builder->uri) 5867 create_flags = builder->uri->create_flags; 5868 else { 5869 if(has_invalid_flag_combination(create_flags)) { 5870 *uri = NULL; 5871 return E_INVALIDARG; 5872 } 5873 5874 /* Set the default flags if they don't cause a conflict. */ 5875 apply_default_flags(&create_flags); 5876 } 5877 5878 /* Return the base IUri if no changes have been made and the create_flags match. */ 5879 if(builder->uri && !builder->modified_props && builder->uri->create_flags == create_flags) { 5880 *uri = &builder->uri->IUri_iface; 5881 IUri_AddRef(*uri); 5882 return S_OK; 5883 } 5884 5885 hr = validate_components(builder, &data, create_flags); 5886 if(FAILED(hr)) { 5887 *uri = NULL; 5888 return hr; 5889 } 5890 5891 hr = Uri_Construct(NULL, (void**)&ret); 5892 if(FAILED(hr)) { 5893 *uri = NULL; 5894 return hr; 5895 } 5896 5897 hr = generate_uri(builder, &data, ret, create_flags); 5898 if(FAILED(hr)) { 5899 IUri_Release(&ret->IUri_iface); 5900 *uri = NULL; 5901 return hr; 5902 } 5903 5904 *uri = &ret->IUri_iface; 5905 return S_OK; 5906 } 5907 5908 static inline UriBuilder* impl_from_IUriBuilder(IUriBuilder *iface) 5909 { 5910 return CONTAINING_RECORD(iface, UriBuilder, IUriBuilder_iface); 5911 } 5912 5913 static HRESULT WINAPI UriBuilder_QueryInterface(IUriBuilder *iface, REFIID riid, void **ppv) 5914 { 5915 UriBuilder *This = impl_from_IUriBuilder(iface); 5916 5917 if(IsEqualGUID(&IID_IUnknown, riid)) { 5918 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv); 5919 *ppv = &This->IUriBuilder_iface; 5920 }else if(IsEqualGUID(&IID_IUriBuilder, riid)) { 5921 TRACE("(%p)->(IID_IUriBuilder %p)\n", This, ppv); 5922 *ppv = &This->IUriBuilder_iface; 5923 }else { 5924 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv); 5925 *ppv = NULL; 5926 return E_NOINTERFACE; 5927 } 5928 5929 IUnknown_AddRef((IUnknown*)*ppv); 5930 return S_OK; 5931 } 5932 5933 static ULONG WINAPI UriBuilder_AddRef(IUriBuilder *iface) 5934 { 5935 UriBuilder *This = impl_from_IUriBuilder(iface); 5936 LONG ref = InterlockedIncrement(&This->ref); 5937 5938 TRACE("(%p) ref=%d\n", This, ref); 5939 5940 return ref; 5941 } 5942 5943 static ULONG WINAPI UriBuilder_Release(IUriBuilder *iface) 5944 { 5945 UriBuilder *This = impl_from_IUriBuilder(iface); 5946 LONG ref = InterlockedDecrement(&This->ref); 5947 5948 TRACE("(%p) ref=%d\n", This, ref); 5949 5950 if(!ref) { 5951 if(This->uri) IUri_Release(&This->uri->IUri_iface); 5952 heap_free(This->fragment); 5953 heap_free(This->host); 5954 heap_free(This->password); 5955 heap_free(This->path); 5956 heap_free(This->query); 5957 heap_free(This->scheme); 5958 heap_free(This->username); 5959 heap_free(This); 5960 } 5961 5962 return ref; 5963 } 5964 5965 static HRESULT WINAPI UriBuilder_CreateUriSimple(IUriBuilder *iface, 5966 DWORD dwAllowEncodingPropertyMask, 5967 DWORD_PTR dwReserved, 5968 IUri **ppIUri) 5969 { 5970 UriBuilder *This = impl_from_IUriBuilder(iface); 5971 HRESULT hr; 5972 TRACE("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5973 5974 hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask); 5975 if(hr == E_NOTIMPL) 5976 FIXME("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5977 return hr; 5978 } 5979 5980 static HRESULT WINAPI UriBuilder_CreateUri(IUriBuilder *iface, 5981 DWORD dwCreateFlags, 5982 DWORD dwAllowEncodingPropertyMask, 5983 DWORD_PTR dwReserved, 5984 IUri **ppIUri) 5985 { 5986 UriBuilder *This = impl_from_IUriBuilder(iface); 5987 HRESULT hr; 5988 TRACE("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5989 5990 if(dwCreateFlags == -1) 5991 hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask); 5992 else 5993 hr = build_uri(This, ppIUri, dwCreateFlags, 0, dwAllowEncodingPropertyMask); 5994 5995 if(hr == E_NOTIMPL) 5996 FIXME("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5997 return hr; 5998 } 5999 6000 static HRESULT WINAPI UriBuilder_CreateUriWithFlags(IUriBuilder *iface, 6001 DWORD dwCreateFlags, 6002 DWORD dwUriBuilderFlags, 6003 DWORD dwAllowEncodingPropertyMask, 6004 DWORD_PTR dwReserved, 6005 IUri **ppIUri) 6006 { 6007 UriBuilder *This = impl_from_IUriBuilder(iface); 6008 HRESULT hr; 6009 TRACE("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags, 6010 dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 6011 6012 hr = build_uri(This, ppIUri, dwCreateFlags, dwUriBuilderFlags, dwAllowEncodingPropertyMask); 6013 if(hr == E_NOTIMPL) 6014 FIXME("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags, 6015 dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 6016 return hr; 6017 } 6018 6019 static HRESULT WINAPI UriBuilder_GetIUri(IUriBuilder *iface, IUri **ppIUri) 6020 { 6021 UriBuilder *This = impl_from_IUriBuilder(iface); 6022 TRACE("(%p)->(%p)\n", This, ppIUri); 6023 6024 if(!ppIUri) 6025 return E_POINTER; 6026 6027 if(This->uri) { 6028 IUri *uri = &This->uri->IUri_iface; 6029 IUri_AddRef(uri); 6030 *ppIUri = uri; 6031 } else 6032 *ppIUri = NULL; 6033 6034 return S_OK; 6035 } 6036 6037 static HRESULT WINAPI UriBuilder_SetIUri(IUriBuilder *iface, IUri *pIUri) 6038 { 6039 UriBuilder *This = impl_from_IUriBuilder(iface); 6040 TRACE("(%p)->(%p)\n", This, pIUri); 6041 6042 if(pIUri) { 6043 Uri *uri; 6044 6045 if((uri = get_uri_obj(pIUri))) { 6046 /* Only reset the builder if its Uri isn't the same as 6047 * the Uri passed to the function. 6048 */ 6049 if(This->uri != uri) { 6050 reset_builder(This); 6051 6052 This->uri = uri; 6053 if(uri->has_port) 6054 This->port = uri->port; 6055 6056 IUri_AddRef(pIUri); 6057 } 6058 } else { 6059 FIXME("(%p)->(%p) Unknown IUri types not supported yet.\n", This, pIUri); 6060 return E_NOTIMPL; 6061 } 6062 } else if(This->uri) 6063 /* Only reset the builder if its Uri isn't NULL. */ 6064 reset_builder(This); 6065 6066 return S_OK; 6067 } 6068 6069 static HRESULT WINAPI UriBuilder_GetFragment(IUriBuilder *iface, DWORD *pcchFragment, LPCWSTR *ppwzFragment) 6070 { 6071 UriBuilder *This = impl_from_IUriBuilder(iface); 6072 TRACE("(%p)->(%p %p)\n", This, pcchFragment, ppwzFragment); 6073 6074 if(!This->uri || This->uri->fragment_start == -1 || This->modified_props & Uri_HAS_FRAGMENT) 6075 return get_builder_component(&This->fragment, &This->fragment_len, NULL, 0, ppwzFragment, pcchFragment); 6076 else 6077 return get_builder_component(&This->fragment, &This->fragment_len, This->uri->canon_uri+This->uri->fragment_start, 6078 This->uri->fragment_len, ppwzFragment, pcchFragment); 6079 } 6080 6081 static HRESULT WINAPI UriBuilder_GetHost(IUriBuilder *iface, DWORD *pcchHost, LPCWSTR *ppwzHost) 6082 { 6083 UriBuilder *This = impl_from_IUriBuilder(iface); 6084 TRACE("(%p)->(%p %p)\n", This, pcchHost, ppwzHost); 6085 6086 if(!This->uri || This->uri->host_start == -1 || This->modified_props & Uri_HAS_HOST) 6087 return get_builder_component(&This->host, &This->host_len, NULL, 0, ppwzHost, pcchHost); 6088 else { 6089 if(This->uri->host_type == Uri_HOST_IPV6) 6090 /* Don't include the '[' and ']' around the address. */ 6091 return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start+1, 6092 This->uri->host_len-2, ppwzHost, pcchHost); 6093 else 6094 return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start, 6095 This->uri->host_len, ppwzHost, pcchHost); 6096 } 6097 } 6098 6099 static HRESULT WINAPI UriBuilder_GetPassword(IUriBuilder *iface, DWORD *pcchPassword, LPCWSTR *ppwzPassword) 6100 { 6101 UriBuilder *This = impl_from_IUriBuilder(iface); 6102 TRACE("(%p)->(%p %p)\n", This, pcchPassword, ppwzPassword); 6103 6104 if(!This->uri || This->uri->userinfo_split == -1 || This->modified_props & Uri_HAS_PASSWORD) 6105 return get_builder_component(&This->password, &This->password_len, NULL, 0, ppwzPassword, pcchPassword); 6106 else { 6107 const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start+This->uri->userinfo_split+1; 6108 DWORD len = This->uri->userinfo_len-This->uri->userinfo_split-1; 6109 return get_builder_component(&This->password, &This->password_len, start, len, ppwzPassword, pcchPassword); 6110 } 6111 } 6112 6113 static HRESULT WINAPI UriBuilder_GetPath(IUriBuilder *iface, DWORD *pcchPath, LPCWSTR *ppwzPath) 6114 { 6115 UriBuilder *This = impl_from_IUriBuilder(iface); 6116 TRACE("(%p)->(%p %p)\n", This, pcchPath, ppwzPath); 6117 6118 if(!This->uri || This->uri->path_start == -1 || This->modified_props & Uri_HAS_PATH) 6119 return get_builder_component(&This->path, &This->path_len, NULL, 0, ppwzPath, pcchPath); 6120 else 6121 return get_builder_component(&This->path, &This->path_len, This->uri->canon_uri+This->uri->path_start, 6122 This->uri->path_len, ppwzPath, pcchPath); 6123 } 6124 6125 static HRESULT WINAPI UriBuilder_GetPort(IUriBuilder *iface, BOOL *pfHasPort, DWORD *pdwPort) 6126 { 6127 UriBuilder *This = impl_from_IUriBuilder(iface); 6128 TRACE("(%p)->(%p %p)\n", This, pfHasPort, pdwPort); 6129 6130 if(!pfHasPort) { 6131 if(pdwPort) 6132 *pdwPort = 0; 6133 return E_POINTER; 6134 } 6135 6136 if(!pdwPort) { 6137 *pfHasPort = FALSE; 6138 return E_POINTER; 6139 } 6140 6141 *pfHasPort = This->has_port; 6142 *pdwPort = This->port; 6143 return S_OK; 6144 } 6145 6146 static HRESULT WINAPI UriBuilder_GetQuery(IUriBuilder *iface, DWORD *pcchQuery, LPCWSTR *ppwzQuery) 6147 { 6148 UriBuilder *This = impl_from_IUriBuilder(iface); 6149 TRACE("(%p)->(%p %p)\n", This, pcchQuery, ppwzQuery); 6150 6151 if(!This->uri || This->uri->query_start == -1 || This->modified_props & Uri_HAS_QUERY) 6152 return get_builder_component(&This->query, &This->query_len, NULL, 0, ppwzQuery, pcchQuery); 6153 else 6154 return get_builder_component(&This->query, &This->query_len, This->uri->canon_uri+This->uri->query_start, 6155 This->uri->query_len, ppwzQuery, pcchQuery); 6156 } 6157 6158 static HRESULT WINAPI UriBuilder_GetSchemeName(IUriBuilder *iface, DWORD *pcchSchemeName, LPCWSTR *ppwzSchemeName) 6159 { 6160 UriBuilder *This = impl_from_IUriBuilder(iface); 6161 TRACE("(%p)->(%p %p)\n", This, pcchSchemeName, ppwzSchemeName); 6162 6163 if(!This->uri || This->uri->scheme_start == -1 || This->modified_props & Uri_HAS_SCHEME_NAME) 6164 return get_builder_component(&This->scheme, &This->scheme_len, NULL, 0, ppwzSchemeName, pcchSchemeName); 6165 else 6166 return get_builder_component(&This->scheme, &This->scheme_len, This->uri->canon_uri+This->uri->scheme_start, 6167 This->uri->scheme_len, ppwzSchemeName, pcchSchemeName); 6168 } 6169 6170 static HRESULT WINAPI UriBuilder_GetUserName(IUriBuilder *iface, DWORD *pcchUserName, LPCWSTR *ppwzUserName) 6171 { 6172 UriBuilder *This = impl_from_IUriBuilder(iface); 6173 TRACE("(%p)->(%p %p)\n", This, pcchUserName, ppwzUserName); 6174 6175 if(!This->uri || This->uri->userinfo_start == -1 || This->uri->userinfo_split == 0 || 6176 This->modified_props & Uri_HAS_USER_NAME) 6177 return get_builder_component(&This->username, &This->username_len, NULL, 0, ppwzUserName, pcchUserName); 6178 else { 6179 const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start; 6180 6181 /* Check if there's a password in the userinfo section. */ 6182 if(This->uri->userinfo_split > -1) 6183 /* Don't include the password. */ 6184 return get_builder_component(&This->username, &This->username_len, start, 6185 This->uri->userinfo_split, ppwzUserName, pcchUserName); 6186 else 6187 return get_builder_component(&This->username, &This->username_len, start, 6188 This->uri->userinfo_len, ppwzUserName, pcchUserName); 6189 } 6190 } 6191 6192 static HRESULT WINAPI UriBuilder_SetFragment(IUriBuilder *iface, LPCWSTR pwzNewValue) 6193 { 6194 UriBuilder *This = impl_from_IUriBuilder(iface); 6195 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6196 return set_builder_component(&This->fragment, &This->fragment_len, pwzNewValue, '#', 6197 &This->modified_props, Uri_HAS_FRAGMENT); 6198 } 6199 6200 static HRESULT WINAPI UriBuilder_SetHost(IUriBuilder *iface, LPCWSTR pwzNewValue) 6201 { 6202 UriBuilder *This = impl_from_IUriBuilder(iface); 6203 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6204 6205 /* Host name can't be set to NULL. */ 6206 if(!pwzNewValue) 6207 return E_INVALIDARG; 6208 6209 return set_builder_component(&This->host, &This->host_len, pwzNewValue, 0, 6210 &This->modified_props, Uri_HAS_HOST); 6211 } 6212 6213 static HRESULT WINAPI UriBuilder_SetPassword(IUriBuilder *iface, LPCWSTR pwzNewValue) 6214 { 6215 UriBuilder *This = impl_from_IUriBuilder(iface); 6216 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6217 return set_builder_component(&This->password, &This->password_len, pwzNewValue, 0, 6218 &This->modified_props, Uri_HAS_PASSWORD); 6219 } 6220 6221 static HRESULT WINAPI UriBuilder_SetPath(IUriBuilder *iface, LPCWSTR pwzNewValue) 6222 { 6223 UriBuilder *This = impl_from_IUriBuilder(iface); 6224 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6225 return set_builder_component(&This->path, &This->path_len, pwzNewValue, 0, 6226 &This->modified_props, Uri_HAS_PATH); 6227 } 6228 6229 static HRESULT WINAPI UriBuilder_SetPort(IUriBuilder *iface, BOOL fHasPort, DWORD dwNewValue) 6230 { 6231 UriBuilder *This = impl_from_IUriBuilder(iface); 6232 TRACE("(%p)->(%d %d)\n", This, fHasPort, dwNewValue); 6233 6234 This->has_port = fHasPort; 6235 This->port = dwNewValue; 6236 This->modified_props |= Uri_HAS_PORT; 6237 return S_OK; 6238 } 6239 6240 static HRESULT WINAPI UriBuilder_SetQuery(IUriBuilder *iface, LPCWSTR pwzNewValue) 6241 { 6242 UriBuilder *This = impl_from_IUriBuilder(iface); 6243 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6244 return set_builder_component(&This->query, &This->query_len, pwzNewValue, '?', 6245 &This->modified_props, Uri_HAS_QUERY); 6246 } 6247 6248 static HRESULT WINAPI UriBuilder_SetSchemeName(IUriBuilder *iface, LPCWSTR pwzNewValue) 6249 { 6250 UriBuilder *This = impl_from_IUriBuilder(iface); 6251 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6252 6253 /* Only set the scheme name if it's not NULL or empty. */ 6254 if(!pwzNewValue || !*pwzNewValue) 6255 return E_INVALIDARG; 6256 6257 return set_builder_component(&This->scheme, &This->scheme_len, pwzNewValue, 0, 6258 &This->modified_props, Uri_HAS_SCHEME_NAME); 6259 } 6260 6261 static HRESULT WINAPI UriBuilder_SetUserName(IUriBuilder *iface, LPCWSTR pwzNewValue) 6262 { 6263 UriBuilder *This = impl_from_IUriBuilder(iface); 6264 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6265 return set_builder_component(&This->username, &This->username_len, pwzNewValue, 0, 6266 &This->modified_props, Uri_HAS_USER_NAME); 6267 } 6268 6269 static HRESULT WINAPI UriBuilder_RemoveProperties(IUriBuilder *iface, DWORD dwPropertyMask) 6270 { 6271 const DWORD accepted_flags = Uri_HAS_AUTHORITY|Uri_HAS_DOMAIN|Uri_HAS_EXTENSION|Uri_HAS_FRAGMENT|Uri_HAS_HOST| 6272 Uri_HAS_PASSWORD|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_QUERY| 6273 Uri_HAS_USER_INFO|Uri_HAS_USER_NAME; 6274 6275 UriBuilder *This = impl_from_IUriBuilder(iface); 6276 TRACE("(%p)->(0x%08x)\n", This, dwPropertyMask); 6277 6278 if(dwPropertyMask & ~accepted_flags) 6279 return E_INVALIDARG; 6280 6281 if(dwPropertyMask & Uri_HAS_FRAGMENT) 6282 UriBuilder_SetFragment(iface, NULL); 6283 6284 /* Even though you can't set the host name to NULL or an 6285 * empty string, you can still remove it... for some reason. 6286 */ 6287 if(dwPropertyMask & Uri_HAS_HOST) 6288 set_builder_component(&This->host, &This->host_len, NULL, 0, 6289 &This->modified_props, Uri_HAS_HOST); 6290 6291 if(dwPropertyMask & Uri_HAS_PASSWORD) 6292 UriBuilder_SetPassword(iface, NULL); 6293 6294 if(dwPropertyMask & Uri_HAS_PATH) 6295 UriBuilder_SetPath(iface, NULL); 6296 6297 if(dwPropertyMask & Uri_HAS_PORT) 6298 UriBuilder_SetPort(iface, FALSE, 0); 6299 6300 if(dwPropertyMask & Uri_HAS_QUERY) 6301 UriBuilder_SetQuery(iface, NULL); 6302 6303 if(dwPropertyMask & Uri_HAS_USER_NAME) 6304 UriBuilder_SetUserName(iface, NULL); 6305 6306 return S_OK; 6307 } 6308 6309 static HRESULT WINAPI UriBuilder_HasBeenModified(IUriBuilder *iface, BOOL *pfModified) 6310 { 6311 UriBuilder *This = impl_from_IUriBuilder(iface); 6312 TRACE("(%p)->(%p)\n", This, pfModified); 6313 6314 if(!pfModified) 6315 return E_POINTER; 6316 6317 *pfModified = This->modified_props > 0; 6318 return S_OK; 6319 } 6320 6321 static const IUriBuilderVtbl UriBuilderVtbl = { 6322 UriBuilder_QueryInterface, 6323 UriBuilder_AddRef, 6324 UriBuilder_Release, 6325 UriBuilder_CreateUriSimple, 6326 UriBuilder_CreateUri, 6327 UriBuilder_CreateUriWithFlags, 6328 UriBuilder_GetIUri, 6329 UriBuilder_SetIUri, 6330 UriBuilder_GetFragment, 6331 UriBuilder_GetHost, 6332 UriBuilder_GetPassword, 6333 UriBuilder_GetPath, 6334 UriBuilder_GetPort, 6335 UriBuilder_GetQuery, 6336 UriBuilder_GetSchemeName, 6337 UriBuilder_GetUserName, 6338 UriBuilder_SetFragment, 6339 UriBuilder_SetHost, 6340 UriBuilder_SetPassword, 6341 UriBuilder_SetPath, 6342 UriBuilder_SetPort, 6343 UriBuilder_SetQuery, 6344 UriBuilder_SetSchemeName, 6345 UriBuilder_SetUserName, 6346 UriBuilder_RemoveProperties, 6347 UriBuilder_HasBeenModified, 6348 }; 6349 6350 /*********************************************************************** 6351 * CreateIUriBuilder (urlmon.@) 6352 */ 6353 HRESULT WINAPI CreateIUriBuilder(IUri *pIUri, DWORD dwFlags, DWORD_PTR dwReserved, IUriBuilder **ppIUriBuilder) 6354 { 6355 UriBuilder *ret; 6356 6357 TRACE("(%p %x %x %p)\n", pIUri, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 6358 6359 if(!ppIUriBuilder) 6360 return E_POINTER; 6361 6362 ret = heap_alloc_zero(sizeof(UriBuilder)); 6363 if(!ret) 6364 return E_OUTOFMEMORY; 6365 6366 ret->IUriBuilder_iface.lpVtbl = &UriBuilderVtbl; 6367 ret->ref = 1; 6368 6369 if(pIUri) { 6370 Uri *uri; 6371 6372 if((uri = get_uri_obj(pIUri))) { 6373 if(!uri->create_flags) { 6374 heap_free(ret); 6375 return E_UNEXPECTED; 6376 } 6377 IUri_AddRef(pIUri); 6378 ret->uri = uri; 6379 6380 if(uri->has_port) 6381 /* Windows doesn't set 'has_port' to TRUE in this case. */ 6382 ret->port = uri->port; 6383 6384 } else { 6385 heap_free(ret); 6386 *ppIUriBuilder = NULL; 6387 FIXME("(%p %x %x %p): Unknown IUri types not supported yet.\n", pIUri, dwFlags, 6388 (DWORD)dwReserved, ppIUriBuilder); 6389 return E_NOTIMPL; 6390 } 6391 } 6392 6393 *ppIUriBuilder = &ret->IUriBuilder_iface; 6394 return S_OK; 6395 } 6396 6397 /* Merges the base path with the relative path and stores the resulting path 6398 * and path len in 'result' and 'result_len'. 6399 */ 6400 static HRESULT merge_paths(parse_data *data, const WCHAR *base, DWORD base_len, const WCHAR *relative, 6401 DWORD relative_len, WCHAR **result, DWORD *result_len, DWORD flags) 6402 { 6403 const WCHAR *end = NULL; 6404 DWORD base_copy_len = 0; 6405 WCHAR *ptr; 6406 6407 if(base_len) { 6408 if(data->scheme_type == URL_SCHEME_MK && *relative == '/') { 6409 /* Find '::' segment */ 6410 for(end = base; end < base+base_len-1; end++) { 6411 if(end[0] == ':' && end[1] == ':') { 6412 end++; 6413 break; 6414 } 6415 } 6416 6417 /* If not found, try finding the end of @xxx: */ 6418 if(end == base+base_len-1) 6419 end = *base == '@' ? wmemchr(base, ':', base_len) : NULL; 6420 }else { 6421 /* Find the characters that will be copied over from the base path. */ 6422 for (end = base + base_len - 1; end >= base; end--) if (*end == '/') break; 6423 if(end < base && data->scheme_type == URL_SCHEME_FILE) 6424 /* Try looking for a '\\'. */ 6425 for (end = base + base_len - 1; end >= base; end--) if (*end == '\\') break; 6426 } 6427 } 6428 6429 if (end) base_copy_len = (end+1)-base; 6430 *result = heap_alloc((base_copy_len+relative_len+1)*sizeof(WCHAR)); 6431 6432 if(!(*result)) { 6433 *result_len = 0; 6434 return E_OUTOFMEMORY; 6435 } 6436 6437 ptr = *result; 6438 memcpy(ptr, base, base_copy_len*sizeof(WCHAR)); 6439 ptr += base_copy_len; 6440 6441 memcpy(ptr, relative, relative_len*sizeof(WCHAR)); 6442 ptr += relative_len; 6443 *ptr = '\0'; 6444 6445 *result_len = (ptr-*result); 6446 TRACE("ret %s\n", debugstr_wn(*result, *result_len)); 6447 return S_OK; 6448 } 6449 6450 static HRESULT combine_uri(Uri *base, Uri *relative, DWORD flags, IUri **result, DWORD extras) { 6451 Uri *ret; 6452 HRESULT hr; 6453 parse_data data; 6454 Uri *proc_uri = base; 6455 DWORD create_flags = 0, len = 0; 6456 6457 memset(&data, 0, sizeof(parse_data)); 6458 6459 /* Base case is when the relative Uri has a scheme name, 6460 * if it does, then 'result' will contain the same data 6461 * as the relative Uri. 6462 */ 6463 if(relative->scheme_start > -1) { 6464 data.uri = SysAllocString(relative->raw_uri); 6465 if(!data.uri) { 6466 *result = NULL; 6467 return E_OUTOFMEMORY; 6468 } 6469 6470 parse_uri(&data, Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME); 6471 6472 hr = Uri_Construct(NULL, (void**)&ret); 6473 if(FAILED(hr)) { 6474 *result = NULL; 6475 return hr; 6476 } 6477 6478 if(extras & COMBINE_URI_FORCE_FLAG_USE) { 6479 if(flags & URL_DONT_SIMPLIFY) 6480 create_flags |= Uri_CREATE_NO_CANONICALIZE; 6481 if(flags & URL_DONT_UNESCAPE_EXTRA_INFO) 6482 create_flags |= Uri_CREATE_NO_DECODE_EXTRA_INFO; 6483 } 6484 6485 ret->raw_uri = data.uri; 6486 hr = canonicalize_uri(&data, ret, create_flags); 6487 if(FAILED(hr)) { 6488 IUri_Release(&ret->IUri_iface); 6489 *result = NULL; 6490 return hr; 6491 } 6492 6493 apply_default_flags(&create_flags); 6494 ret->create_flags = create_flags; 6495 6496 *result = &ret->IUri_iface; 6497 } else { 6498 WCHAR *path = NULL; 6499 DWORD raw_flags = 0; 6500 6501 if(base->scheme_start > -1) { 6502 data.scheme = base->canon_uri+base->scheme_start; 6503 data.scheme_len = base->scheme_len; 6504 data.scheme_type = base->scheme_type; 6505 } else { 6506 data.is_relative = TRUE; 6507 data.scheme_type = URL_SCHEME_UNKNOWN; 6508 create_flags |= Uri_CREATE_ALLOW_RELATIVE; 6509 } 6510 6511 if(relative->authority_start > -1) 6512 proc_uri = relative; 6513 6514 if(proc_uri->authority_start > -1) { 6515 if(proc_uri->userinfo_start > -1 && proc_uri->userinfo_split != 0) { 6516 data.username = proc_uri->canon_uri+proc_uri->userinfo_start; 6517 data.username_len = (proc_uri->userinfo_split > -1) ? proc_uri->userinfo_split : proc_uri->userinfo_len; 6518 } 6519 6520 if(proc_uri->userinfo_split > -1) { 6521 data.password = proc_uri->canon_uri+proc_uri->userinfo_start+proc_uri->userinfo_split+1; 6522 data.password_len = proc_uri->userinfo_len-proc_uri->userinfo_split-1; 6523 } 6524 6525 if(proc_uri->host_start > -1) { 6526 data.host = proc_uri->canon_uri+proc_uri->host_start; 6527 data.host_len = proc_uri->host_len; 6528 data.host_type = proc_uri->host_type; 6529 } 6530 6531 if(proc_uri->has_port) { 6532 data.has_port = TRUE; 6533 data.port_value = proc_uri->port; 6534 } 6535 } else if(base->scheme_type != URL_SCHEME_FILE) 6536 data.is_opaque = TRUE; 6537 6538 if(proc_uri == relative || relative->path_start == -1 || !relative->path_len) { 6539 if(proc_uri->path_start > -1) { 6540 data.path = proc_uri->canon_uri+proc_uri->path_start; 6541 data.path_len = proc_uri->path_len; 6542 } else if(!data.is_opaque) { 6543 /* Just set the path as a '/' if the base didn't have 6544 * one and if it's a hierarchical URI. 6545 */ 6546 static const WCHAR slashW[] = {'/',0}; 6547 data.path = slashW; 6548 data.path_len = 1; 6549 } 6550 6551 if(relative->query_start > -1) 6552 proc_uri = relative; 6553 6554 if(proc_uri->query_start > -1) { 6555 data.query = proc_uri->canon_uri+proc_uri->query_start; 6556 data.query_len = proc_uri->query_len; 6557 } 6558 } else { 6559 const WCHAR *ptr, **pptr; 6560 DWORD path_offset = 0, path_len = 0; 6561 6562 /* There's two possibilities on what will happen to the path component 6563 * of the result IUri. First, if the relative path begins with a '/' 6564 * then the resulting path will just be the relative path. Second, if 6565 * relative path doesn't begin with a '/' then the base path and relative 6566 * path are merged together. 6567 */ 6568 if(relative->path_len && *(relative->canon_uri+relative->path_start) == '/' && data.scheme_type != URL_SCHEME_MK) { 6569 WCHAR *tmp = NULL; 6570 BOOL copy_drive_path = FALSE; 6571 6572 /* If the relative IUri's path starts with a '/', then we 6573 * don't use the base IUri's path. Unless the base IUri 6574 * is a file URI, in which case it uses the drive path of 6575 * the base IUri (if it has any) in the new path. 6576 */ 6577 if(base->scheme_type == URL_SCHEME_FILE) { 6578 if(base->path_len > 3 && *(base->canon_uri+base->path_start) == '/' && 6579 is_drive_path(base->canon_uri+base->path_start+1)) { 6580 path_len += 3; 6581 copy_drive_path = TRUE; 6582 } 6583 } 6584 6585 path_len += relative->path_len; 6586 6587 path = heap_alloc((path_len+1)*sizeof(WCHAR)); 6588 if(!path) { 6589 *result = NULL; 6590 return E_OUTOFMEMORY; 6591 } 6592 6593 tmp = path; 6594 6595 /* Copy the base paths, drive path over. */ 6596 if(copy_drive_path) { 6597 memcpy(tmp, base->canon_uri+base->path_start, 3*sizeof(WCHAR)); 6598 tmp += 3; 6599 } 6600 6601 memcpy(tmp, relative->canon_uri+relative->path_start, relative->path_len*sizeof(WCHAR)); 6602 path[path_len] = '\0'; 6603 } else { 6604 /* Merge the base path with the relative path. */ 6605 hr = merge_paths(&data, base->canon_uri+base->path_start, base->path_len, 6606 relative->canon_uri+relative->path_start, relative->path_len, 6607 &path, &path_len, flags); 6608 if(FAILED(hr)) { 6609 *result = NULL; 6610 return hr; 6611 } 6612 6613 /* If the resulting IUri is a file URI, the drive path isn't 6614 * reduced out when the dot segments are removed. 6615 */ 6616 if(path_len >= 3 && data.scheme_type == URL_SCHEME_FILE && !data.host) { 6617 if(*path == '/' && is_drive_path(path+1)) 6618 path_offset = 2; 6619 else if(is_drive_path(path)) 6620 path_offset = 1; 6621 } 6622 } 6623 6624 /* Check if the dot segments need to be removed from the path. */ 6625 if(!(flags & URL_DONT_SIMPLIFY) && !data.is_opaque) { 6626 DWORD offset = (path_offset > 0) ? path_offset+1 : 0; 6627 DWORD new_len = remove_dot_segments(path+offset,path_len-offset); 6628 6629 if(new_len != path_len) { 6630 WCHAR *tmp = heap_realloc(path, (offset+new_len+1)*sizeof(WCHAR)); 6631 if(!tmp) { 6632 heap_free(path); 6633 *result = NULL; 6634 return E_OUTOFMEMORY; 6635 } 6636 6637 tmp[new_len+offset] = '\0'; 6638 path = tmp; 6639 path_len = new_len+offset; 6640 } 6641 } 6642 6643 if(relative->query_start > -1) { 6644 data.query = relative->canon_uri+relative->query_start; 6645 data.query_len = relative->query_len; 6646 } 6647 6648 /* Make sure the path component is valid. */ 6649 ptr = path; 6650 pptr = &ptr; 6651 if((data.is_opaque && !parse_path_opaque(pptr, &data, 0)) || 6652 (!data.is_opaque && !parse_path_hierarchical(pptr, &data, 0))) { 6653 heap_free(path); 6654 *result = NULL; 6655 return E_INVALIDARG; 6656 } 6657 } 6658 6659 if(relative->fragment_start > -1) { 6660 data.fragment = relative->canon_uri+relative->fragment_start; 6661 data.fragment_len = relative->fragment_len; 6662 } 6663 6664 if(flags & URL_DONT_SIMPLIFY) 6665 raw_flags |= RAW_URI_FORCE_PORT_DISP; 6666 if(flags & URL_FILE_USE_PATHURL) 6667 raw_flags |= RAW_URI_CONVERT_TO_DOS_PATH; 6668 6669 len = generate_raw_uri(&data, data.uri, raw_flags); 6670 data.uri = SysAllocStringLen(NULL, len); 6671 if(!data.uri) { 6672 heap_free(path); 6673 *result = NULL; 6674 return E_OUTOFMEMORY; 6675 } 6676 6677 generate_raw_uri(&data, data.uri, raw_flags); 6678 6679 hr = Uri_Construct(NULL, (void**)&ret); 6680 if(FAILED(hr)) { 6681 SysFreeString(data.uri); 6682 heap_free(path); 6683 *result = NULL; 6684 return hr; 6685 } 6686 6687 if(flags & URL_DONT_SIMPLIFY) 6688 create_flags |= Uri_CREATE_NO_CANONICALIZE; 6689 if(flags & URL_FILE_USE_PATHURL) 6690 create_flags |= Uri_CREATE_FILE_USE_DOS_PATH; 6691 6692 ret->raw_uri = data.uri; 6693 hr = canonicalize_uri(&data, ret, create_flags); 6694 if(FAILED(hr)) { 6695 IUri_Release(&ret->IUri_iface); 6696 *result = NULL; 6697 return hr; 6698 } 6699 6700 if(flags & URL_DONT_SIMPLIFY) 6701 ret->display_modifiers |= URI_DISPLAY_NO_DEFAULT_PORT_AUTH; 6702 6703 apply_default_flags(&create_flags); 6704 ret->create_flags = create_flags; 6705 *result = &ret->IUri_iface; 6706 6707 heap_free(path); 6708 } 6709 6710 return S_OK; 6711 } 6712 6713 /*********************************************************************** 6714 * CoInternetCombineIUri (urlmon.@) 6715 */ 6716 HRESULT WINAPI CoInternetCombineIUri(IUri *pBaseUri, IUri *pRelativeUri, DWORD dwCombineFlags, 6717 IUri **ppCombinedUri, DWORD_PTR dwReserved) 6718 { 6719 HRESULT hr; 6720 IInternetProtocolInfo *info; 6721 Uri *relative, *base; 6722 TRACE("(%p %p %x %p %x)\n", pBaseUri, pRelativeUri, dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 6723 6724 if(!ppCombinedUri) 6725 return E_INVALIDARG; 6726 6727 if(!pBaseUri || !pRelativeUri) { 6728 *ppCombinedUri = NULL; 6729 return E_INVALIDARG; 6730 } 6731 6732 relative = get_uri_obj(pRelativeUri); 6733 base = get_uri_obj(pBaseUri); 6734 if(!relative || !base) { 6735 *ppCombinedUri = NULL; 6736 FIXME("(%p %p %x %p %x) Unknown IUri types not supported yet.\n", 6737 pBaseUri, pRelativeUri, dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 6738 return E_NOTIMPL; 6739 } 6740 6741 info = get_protocol_info(base->canon_uri); 6742 if(info) { 6743 WCHAR result[INTERNET_MAX_URL_LENGTH+1]; 6744 DWORD result_len = 0; 6745 6746 hr = IInternetProtocolInfo_CombineUrl(info, base->canon_uri, relative->canon_uri, dwCombineFlags, 6747 result, INTERNET_MAX_URL_LENGTH+1, &result_len, 0); 6748 IInternetProtocolInfo_Release(info); 6749 if(SUCCEEDED(hr)) { 6750 hr = CreateUri(result, Uri_CREATE_ALLOW_RELATIVE, 0, ppCombinedUri); 6751 if(SUCCEEDED(hr)) 6752 return hr; 6753 } 6754 } 6755 6756 return combine_uri(base, relative, dwCombineFlags, ppCombinedUri, 0); 6757 } 6758 6759 /*********************************************************************** 6760 * CoInternetCombineUrlEx (urlmon.@) 6761 */ 6762 HRESULT WINAPI CoInternetCombineUrlEx(IUri *pBaseUri, LPCWSTR pwzRelativeUrl, DWORD dwCombineFlags, 6763 IUri **ppCombinedUri, DWORD_PTR dwReserved) 6764 { 6765 IUri *relative; 6766 Uri *base; 6767 HRESULT hr; 6768 IInternetProtocolInfo *info; 6769 6770 TRACE("(%p %s %x %p %x)\n", pBaseUri, debugstr_w(pwzRelativeUrl), dwCombineFlags, 6771 ppCombinedUri, (DWORD)dwReserved); 6772 6773 if(!ppCombinedUri) 6774 return E_POINTER; 6775 6776 if(!pwzRelativeUrl) { 6777 *ppCombinedUri = NULL; 6778 return E_UNEXPECTED; 6779 } 6780 6781 if(!pBaseUri) { 6782 *ppCombinedUri = NULL; 6783 return E_INVALIDARG; 6784 } 6785 6786 base = get_uri_obj(pBaseUri); 6787 if(!base) { 6788 *ppCombinedUri = NULL; 6789 FIXME("(%p %s %x %p %x) Unknown IUri's not supported yet.\n", pBaseUri, debugstr_w(pwzRelativeUrl), 6790 dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 6791 return E_NOTIMPL; 6792 } 6793 6794 info = get_protocol_info(base->canon_uri); 6795 if(info) { 6796 WCHAR result[INTERNET_MAX_URL_LENGTH+1]; 6797 DWORD result_len = 0; 6798 6799 hr = IInternetProtocolInfo_CombineUrl(info, base->canon_uri, pwzRelativeUrl, dwCombineFlags, 6800 result, INTERNET_MAX_URL_LENGTH+1, &result_len, 0); 6801 IInternetProtocolInfo_Release(info); 6802 if(SUCCEEDED(hr)) { 6803 hr = CreateUri(result, Uri_CREATE_ALLOW_RELATIVE, 0, ppCombinedUri); 6804 if(SUCCEEDED(hr)) 6805 return hr; 6806 } 6807 } 6808 6809 hr = CreateUri(pwzRelativeUrl, Uri_CREATE_ALLOW_RELATIVE|Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME, 0, &relative); 6810 if(FAILED(hr)) { 6811 *ppCombinedUri = NULL; 6812 return hr; 6813 } 6814 6815 hr = combine_uri(base, get_uri_obj(relative), dwCombineFlags, ppCombinedUri, COMBINE_URI_FORCE_FLAG_USE); 6816 6817 IUri_Release(relative); 6818 return hr; 6819 } 6820 6821 static HRESULT parse_canonicalize(const Uri *uri, DWORD flags, LPWSTR output, 6822 DWORD output_len, DWORD *result_len) 6823 { 6824 const WCHAR *ptr = NULL; 6825 WCHAR *path = NULL; 6826 const WCHAR **pptr; 6827 DWORD len = 0; 6828 BOOL reduce_path; 6829 6830 /* URL_UNESCAPE only has effect if none of the URL_ESCAPE flags are set. */ 6831 const BOOL allow_unescape = !(flags & URL_ESCAPE_UNSAFE) && 6832 !(flags & URL_ESCAPE_SPACES_ONLY) && 6833 !(flags & URL_ESCAPE_PERCENT); 6834 6835 6836 /* Check if the dot segments need to be removed from the 6837 * path component. 6838 */ 6839 if(uri->scheme_start > -1 && uri->path_start > -1) { 6840 ptr = uri->canon_uri+uri->scheme_start+uri->scheme_len+1; 6841 pptr = &ptr; 6842 } 6843 reduce_path = !(flags & URL_DONT_SIMPLIFY) && 6844 ptr && check_hierarchical(pptr); 6845 6846 for(ptr = uri->canon_uri; ptr < uri->canon_uri+uri->canon_len; ++ptr) { 6847 BOOL do_default_action = TRUE; 6848 6849 /* Keep track of the path if we need to remove dot segments from 6850 * it later. 6851 */ 6852 if(reduce_path && !path && ptr == uri->canon_uri+uri->path_start) 6853 path = output+len; 6854 6855 /* Check if it's time to reduce the path. */ 6856 if(reduce_path && ptr == uri->canon_uri+uri->path_start+uri->path_len) { 6857 DWORD current_path_len = (output+len) - path; 6858 DWORD new_path_len = remove_dot_segments(path, current_path_len); 6859 6860 /* Update the current length. */ 6861 len -= (current_path_len-new_path_len); 6862 reduce_path = FALSE; 6863 } 6864 6865 if(*ptr == '%') { 6866 const WCHAR decoded = decode_pct_val(ptr); 6867 if(decoded) { 6868 if(allow_unescape && (flags & URL_UNESCAPE)) { 6869 if(len < output_len) 6870 output[len] = decoded; 6871 len++; 6872 ptr += 2; 6873 do_default_action = FALSE; 6874 } 6875 } 6876 6877 /* See if %'s needed to encoded. */ 6878 if(do_default_action && (flags & URL_ESCAPE_PERCENT)) { 6879 if(len + 3 < output_len) 6880 pct_encode_val(*ptr, output+len); 6881 len += 3; 6882 do_default_action = FALSE; 6883 } 6884 } else if(*ptr == ' ') { 6885 if((flags & URL_ESCAPE_SPACES_ONLY) && 6886 !(flags & URL_ESCAPE_UNSAFE)) { 6887 if(len + 3 < output_len) 6888 pct_encode_val(*ptr, output+len); 6889 len += 3; 6890 do_default_action = FALSE; 6891 } 6892 } else if(is_ascii(*ptr) && !is_reserved(*ptr) && !is_unreserved(*ptr)) { 6893 if(flags & URL_ESCAPE_UNSAFE) { 6894 if(len + 3 < output_len) 6895 pct_encode_val(*ptr, output+len); 6896 len += 3; 6897 do_default_action = FALSE; 6898 } 6899 } 6900 6901 if(do_default_action) { 6902 if(len < output_len) 6903 output[len] = *ptr; 6904 len++; 6905 } 6906 } 6907 6908 /* Sometimes the path is the very last component of the IUri, so 6909 * see if the dot segments need to be reduced now. 6910 */ 6911 if(reduce_path && path) { 6912 DWORD current_path_len = (output+len) - path; 6913 DWORD new_path_len = remove_dot_segments(path, current_path_len); 6914 6915 /* Update the current length. */ 6916 len -= (current_path_len-new_path_len); 6917 } 6918 6919 if(len < output_len) 6920 output[len] = 0; 6921 else 6922 output[output_len-1] = 0; 6923 6924 /* The null terminator isn't included in the length. */ 6925 *result_len = len; 6926 if(len >= output_len) 6927 return STRSAFE_E_INSUFFICIENT_BUFFER; 6928 6929 return S_OK; 6930 } 6931 6932 static HRESULT parse_friendly(IUri *uri, LPWSTR output, DWORD output_len, 6933 DWORD *result_len) 6934 { 6935 HRESULT hr; 6936 DWORD display_len; 6937 BSTR display; 6938 6939 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_DISPLAY_URI, &display_len, 0); 6940 if(FAILED(hr)) { 6941 *result_len = 0; 6942 return hr; 6943 } 6944 6945 *result_len = display_len; 6946 if(display_len+1 > output_len) 6947 return STRSAFE_E_INSUFFICIENT_BUFFER; 6948 6949 hr = IUri_GetDisplayUri(uri, &display); 6950 if(FAILED(hr)) { 6951 *result_len = 0; 6952 return hr; 6953 } 6954 6955 memcpy(output, display, (display_len+1)*sizeof(WCHAR)); 6956 SysFreeString(display); 6957 return S_OK; 6958 } 6959 6960 static HRESULT parse_rootdocument(const Uri *uri, LPWSTR output, DWORD output_len, 6961 DWORD *result_len) 6962 { 6963 static const WCHAR colon_slashesW[] = {':','/','/'}; 6964 6965 WCHAR *ptr; 6966 DWORD len = 0; 6967 6968 /* Windows only returns the root document if the URI has an authority 6969 * and it's not an unknown scheme type or a file scheme type. 6970 */ 6971 if(uri->authority_start == -1 || 6972 uri->scheme_type == URL_SCHEME_UNKNOWN || 6973 uri->scheme_type == URL_SCHEME_FILE) { 6974 *result_len = 0; 6975 if(!output_len) 6976 return STRSAFE_E_INSUFFICIENT_BUFFER; 6977 6978 output[0] = 0; 6979 return S_OK; 6980 } 6981 6982 len = uri->scheme_len+uri->authority_len; 6983 /* For the "://" and '/' which will be added. */ 6984 len += 4; 6985 6986 if(len+1 > output_len) { 6987 *result_len = len; 6988 return STRSAFE_E_INSUFFICIENT_BUFFER; 6989 } 6990 6991 ptr = output; 6992 memcpy(ptr, uri->canon_uri+uri->scheme_start, uri->scheme_len*sizeof(WCHAR)); 6993 6994 /* Add the "://". */ 6995 ptr += uri->scheme_len; 6996 memcpy(ptr, colon_slashesW, sizeof(colon_slashesW)); 6997 6998 /* Add the authority. */ 6999 ptr += ARRAY_SIZE(colon_slashesW); 7000 memcpy(ptr, uri->canon_uri+uri->authority_start, uri->authority_len*sizeof(WCHAR)); 7001 7002 /* Add the '/' after the authority. */ 7003 ptr += uri->authority_len; 7004 *ptr = '/'; 7005 ptr[1] = 0; 7006 7007 *result_len = len; 7008 return S_OK; 7009 } 7010 7011 static HRESULT parse_document(const Uri *uri, LPWSTR output, DWORD output_len, 7012 DWORD *result_len) 7013 { 7014 DWORD len = 0; 7015 7016 /* It has to be a known scheme type, but, it can't be a file 7017 * scheme. It also has to hierarchical. 7018 */ 7019 if(uri->scheme_type == URL_SCHEME_UNKNOWN || 7020 uri->scheme_type == URL_SCHEME_FILE || 7021 uri->authority_start == -1) { 7022 *result_len = 0; 7023 if(output_len < 1) 7024 return STRSAFE_E_INSUFFICIENT_BUFFER; 7025 7026 output[0] = 0; 7027 return S_OK; 7028 } 7029 7030 if(uri->fragment_start > -1) 7031 len = uri->fragment_start; 7032 else 7033 len = uri->canon_len; 7034 7035 *result_len = len; 7036 if(len+1 > output_len) 7037 return STRSAFE_E_INSUFFICIENT_BUFFER; 7038 7039 memcpy(output, uri->canon_uri, len*sizeof(WCHAR)); 7040 output[len] = 0; 7041 return S_OK; 7042 } 7043 7044 static HRESULT parse_path_from_url(const Uri *uri, LPWSTR output, DWORD output_len, 7045 DWORD *result_len) 7046 { 7047 const WCHAR *path_ptr; 7048 WCHAR buffer[INTERNET_MAX_URL_LENGTH+1]; 7049 WCHAR *ptr; 7050 7051 if(uri->scheme_type != URL_SCHEME_FILE) { 7052 *result_len = 0; 7053 if(output_len > 0) 7054 output[0] = 0; 7055 return E_INVALIDARG; 7056 } 7057 7058 ptr = buffer; 7059 if(uri->host_start > -1) { 7060 static const WCHAR slash_slashW[] = {'\\','\\'}; 7061 7062 memcpy(ptr, slash_slashW, sizeof(slash_slashW)); 7063 ptr += ARRAY_SIZE(slash_slashW); 7064 memcpy(ptr, uri->canon_uri+uri->host_start, uri->host_len*sizeof(WCHAR)); 7065 ptr += uri->host_len; 7066 } 7067 7068 path_ptr = uri->canon_uri+uri->path_start; 7069 if(uri->path_len > 3 && *path_ptr == '/' && is_drive_path(path_ptr+1)) 7070 /* Skip past the '/' in front of the drive path. */ 7071 ++path_ptr; 7072 7073 for(; path_ptr < uri->canon_uri+uri->path_start+uri->path_len; ++path_ptr, ++ptr) { 7074 BOOL do_default_action = TRUE; 7075 7076 if(*path_ptr == '%') { 7077 const WCHAR decoded = decode_pct_val(path_ptr); 7078 if(decoded) { 7079 *ptr = decoded; 7080 path_ptr += 2; 7081 do_default_action = FALSE; 7082 } 7083 } else if(*path_ptr == '/') { 7084 *ptr = '\\'; 7085 do_default_action = FALSE; 7086 } 7087 7088 if(do_default_action) 7089 *ptr = *path_ptr; 7090 } 7091 7092 *ptr = 0; 7093 7094 *result_len = ptr-buffer; 7095 if(*result_len+1 > output_len) 7096 return STRSAFE_E_INSUFFICIENT_BUFFER; 7097 7098 memcpy(output, buffer, (*result_len+1)*sizeof(WCHAR)); 7099 return S_OK; 7100 } 7101 7102 static HRESULT parse_url_from_path(IUri *uri, LPWSTR output, DWORD output_len, 7103 DWORD *result_len) 7104 { 7105 HRESULT hr; 7106 BSTR received; 7107 DWORD len = 0; 7108 7109 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_ABSOLUTE_URI, &len, 0); 7110 if(FAILED(hr)) { 7111 *result_len = 0; 7112 return hr; 7113 } 7114 7115 *result_len = len; 7116 if(len+1 > output_len) 7117 return STRSAFE_E_INSUFFICIENT_BUFFER; 7118 7119 hr = IUri_GetAbsoluteUri(uri, &received); 7120 if(FAILED(hr)) { 7121 *result_len = 0; 7122 return hr; 7123 } 7124 7125 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7126 SysFreeString(received); 7127 7128 return S_OK; 7129 } 7130 7131 static HRESULT parse_schema(IUri *uri, LPWSTR output, DWORD output_len, 7132 DWORD *result_len) 7133 { 7134 HRESULT hr; 7135 DWORD len; 7136 BSTR received; 7137 7138 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_SCHEME_NAME, &len, 0); 7139 if(FAILED(hr)) { 7140 *result_len = 0; 7141 return hr; 7142 } 7143 7144 *result_len = len; 7145 if(len+1 > output_len) 7146 return STRSAFE_E_INSUFFICIENT_BUFFER; 7147 7148 hr = IUri_GetSchemeName(uri, &received); 7149 if(FAILED(hr)) { 7150 *result_len = 0; 7151 return hr; 7152 } 7153 7154 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7155 SysFreeString(received); 7156 7157 return S_OK; 7158 } 7159 7160 static HRESULT parse_site(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 7161 { 7162 HRESULT hr; 7163 DWORD len; 7164 BSTR received; 7165 7166 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_HOST, &len, 0); 7167 if(FAILED(hr)) { 7168 *result_len = 0; 7169 return hr; 7170 } 7171 7172 *result_len = len; 7173 if(len+1 > output_len) 7174 return STRSAFE_E_INSUFFICIENT_BUFFER; 7175 7176 hr = IUri_GetHost(uri, &received); 7177 if(FAILED(hr)) { 7178 *result_len = 0; 7179 return hr; 7180 } 7181 7182 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7183 SysFreeString(received); 7184 7185 return S_OK; 7186 } 7187 7188 static HRESULT parse_domain(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 7189 { 7190 HRESULT hr; 7191 DWORD len; 7192 BSTR received; 7193 7194 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_DOMAIN, &len, 0); 7195 if(FAILED(hr)) { 7196 *result_len = 0; 7197 return hr; 7198 } 7199 7200 *result_len = len; 7201 if(len+1 > output_len) 7202 return STRSAFE_E_INSUFFICIENT_BUFFER; 7203 7204 hr = IUri_GetDomain(uri, &received); 7205 if(FAILED(hr)) { 7206 *result_len = 0; 7207 return hr; 7208 } 7209 7210 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7211 SysFreeString(received); 7212 7213 return S_OK; 7214 } 7215 7216 static HRESULT parse_anchor(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 7217 { 7218 HRESULT hr; 7219 DWORD len; 7220 BSTR received; 7221 7222 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_FRAGMENT, &len, 0); 7223 if(FAILED(hr)) { 7224 *result_len = 0; 7225 return hr; 7226 } 7227 7228 *result_len = len; 7229 if(len+1 > output_len) 7230 return STRSAFE_E_INSUFFICIENT_BUFFER; 7231 7232 hr = IUri_GetFragment(uri, &received); 7233 if(FAILED(hr)) { 7234 *result_len = 0; 7235 return hr; 7236 } 7237 7238 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7239 SysFreeString(received); 7240 7241 return S_OK; 7242 } 7243 7244 /*********************************************************************** 7245 * CoInternetParseIUri (urlmon.@) 7246 */ 7247 HRESULT WINAPI CoInternetParseIUri(IUri *pIUri, PARSEACTION ParseAction, DWORD dwFlags, 7248 LPWSTR pwzResult, DWORD cchResult, DWORD *pcchResult, 7249 DWORD_PTR dwReserved) 7250 { 7251 HRESULT hr; 7252 Uri *uri; 7253 IInternetProtocolInfo *info; 7254 7255 TRACE("(%p %d %x %p %d %p %x)\n", pIUri, ParseAction, dwFlags, pwzResult, 7256 cchResult, pcchResult, (DWORD)dwReserved); 7257 7258 if(!pcchResult) 7259 return E_POINTER; 7260 7261 if(!pwzResult || !pIUri) { 7262 *pcchResult = 0; 7263 return E_INVALIDARG; 7264 } 7265 7266 if(!(uri = get_uri_obj(pIUri))) { 7267 *pcchResult = 0; 7268 FIXME("(%p %d %x %p %d %p %x) Unknown IUri's not supported for this action.\n", 7269 pIUri, ParseAction, dwFlags, pwzResult, cchResult, pcchResult, (DWORD)dwReserved); 7270 return E_NOTIMPL; 7271 } 7272 7273 info = get_protocol_info(uri->canon_uri); 7274 if(info) { 7275 hr = IInternetProtocolInfo_ParseUrl(info, uri->canon_uri, ParseAction, dwFlags, 7276 pwzResult, cchResult, pcchResult, 0); 7277 IInternetProtocolInfo_Release(info); 7278 if(SUCCEEDED(hr)) return hr; 7279 } 7280 7281 switch(ParseAction) { 7282 case PARSE_CANONICALIZE: 7283 hr = parse_canonicalize(uri, dwFlags, pwzResult, cchResult, pcchResult); 7284 break; 7285 case PARSE_FRIENDLY: 7286 hr = parse_friendly(pIUri, pwzResult, cchResult, pcchResult); 7287 break; 7288 case PARSE_ROOTDOCUMENT: 7289 hr = parse_rootdocument(uri, pwzResult, cchResult, pcchResult); 7290 break; 7291 case PARSE_DOCUMENT: 7292 hr = parse_document(uri, pwzResult, cchResult, pcchResult); 7293 break; 7294 case PARSE_PATH_FROM_URL: 7295 hr = parse_path_from_url(uri, pwzResult, cchResult, pcchResult); 7296 break; 7297 case PARSE_URL_FROM_PATH: 7298 hr = parse_url_from_path(pIUri, pwzResult, cchResult, pcchResult); 7299 break; 7300 case PARSE_SCHEMA: 7301 hr = parse_schema(pIUri, pwzResult, cchResult, pcchResult); 7302 break; 7303 case PARSE_SITE: 7304 hr = parse_site(pIUri, pwzResult, cchResult, pcchResult); 7305 break; 7306 case PARSE_DOMAIN: 7307 hr = parse_domain(pIUri, pwzResult, cchResult, pcchResult); 7308 break; 7309 case PARSE_LOCATION: 7310 case PARSE_ANCHOR: 7311 hr = parse_anchor(pIUri, pwzResult, cchResult, pcchResult); 7312 break; 7313 case PARSE_SECURITY_URL: 7314 case PARSE_MIME: 7315 case PARSE_SERVER: 7316 case PARSE_SECURITY_DOMAIN: 7317 *pcchResult = 0; 7318 hr = E_FAIL; 7319 break; 7320 default: 7321 *pcchResult = 0; 7322 hr = E_NOTIMPL; 7323 FIXME("(%p %d %x %p %d %p %x) Partial stub.\n", pIUri, ParseAction, dwFlags, 7324 pwzResult, cchResult, pcchResult, (DWORD)dwReserved); 7325 } 7326 7327 return hr; 7328 } 7329