1 /* 2 * Copyright 2010 Jacek Caban for CodeWeavers 3 * Copyright 2010 Thomas Mullaly 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public 7 * License as published by the Free Software Foundation; either 8 * version 2.1 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public 16 * License along with this library; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 18 */ 19 20 #include "urlmon_main.h" 21 22 #include <strsafe.h> 23 24 #define URI_DISPLAY_NO_ABSOLUTE_URI 0x1 25 #define URI_DISPLAY_NO_DEFAULT_PORT_AUTH 0x2 26 27 #define ALLOW_NULL_TERM_SCHEME 0x01 28 #define ALLOW_NULL_TERM_USER_NAME 0x02 29 #define ALLOW_NULL_TERM_PASSWORD 0x04 30 #define ALLOW_BRACKETLESS_IP_LITERAL 0x08 31 #define SKIP_IP_FUTURE_CHECK 0x10 32 #define IGNORE_PORT_DELIMITER 0x20 33 34 #define RAW_URI_FORCE_PORT_DISP 0x1 35 #define RAW_URI_CONVERT_TO_DOS_PATH 0x2 36 37 #define COMBINE_URI_FORCE_FLAG_USE 0x1 38 39 static const IID IID_IUriObj = {0x4b364760,0x9f51,0x11df,{0x98,0x1c,0x08,0x00,0x20,0x0c,0x9a,0x66}}; 40 41 typedef struct { 42 IUri IUri_iface; 43 IUriBuilderFactory IUriBuilderFactory_iface; 44 IPersistStream IPersistStream_iface; 45 IMarshal IMarshal_iface; 46 47 LONG ref; 48 49 BSTR raw_uri; 50 51 /* Information about the canonicalized URI's buffer. */ 52 WCHAR *canon_uri; 53 DWORD canon_size; 54 DWORD canon_len; 55 BOOL display_modifiers; 56 DWORD create_flags; 57 58 INT scheme_start; 59 DWORD scheme_len; 60 URL_SCHEME scheme_type; 61 62 INT userinfo_start; 63 DWORD userinfo_len; 64 INT userinfo_split; 65 66 INT host_start; 67 DWORD host_len; 68 Uri_HOST_TYPE host_type; 69 70 INT port_offset; 71 DWORD port; 72 BOOL has_port; 73 74 INT authority_start; 75 DWORD authority_len; 76 77 INT domain_offset; 78 79 INT path_start; 80 DWORD path_len; 81 INT extension_offset; 82 83 INT query_start; 84 DWORD query_len; 85 86 INT fragment_start; 87 DWORD fragment_len; 88 } Uri; 89 90 typedef struct { 91 IUriBuilder IUriBuilder_iface; 92 LONG ref; 93 94 Uri *uri; 95 DWORD modified_props; 96 97 WCHAR *fragment; 98 DWORD fragment_len; 99 100 WCHAR *host; 101 DWORD host_len; 102 103 WCHAR *password; 104 DWORD password_len; 105 106 WCHAR *path; 107 DWORD path_len; 108 109 BOOL has_port; 110 DWORD port; 111 112 WCHAR *query; 113 DWORD query_len; 114 115 WCHAR *scheme; 116 DWORD scheme_len; 117 118 WCHAR *username; 119 DWORD username_len; 120 } UriBuilder; 121 122 typedef struct { 123 const WCHAR *str; 124 DWORD len; 125 } h16; 126 127 typedef struct { 128 /* IPv6 addresses can hold up to 8 h16 components. */ 129 h16 components[8]; 130 DWORD h16_count; 131 132 /* An IPv6 can have 1 elision ("::"). */ 133 const WCHAR *elision; 134 135 /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */ 136 const WCHAR *ipv4; 137 DWORD ipv4_len; 138 139 INT components_size; 140 INT elision_size; 141 } ipv6_address; 142 143 typedef struct { 144 BSTR uri; 145 146 BOOL is_relative; 147 BOOL is_opaque; 148 BOOL has_implicit_scheme; 149 BOOL has_implicit_ip; 150 UINT implicit_ipv4; 151 BOOL must_have_path; 152 153 const WCHAR *scheme; 154 DWORD scheme_len; 155 URL_SCHEME scheme_type; 156 157 const WCHAR *username; 158 DWORD username_len; 159 160 const WCHAR *password; 161 DWORD password_len; 162 163 const WCHAR *host; 164 DWORD host_len; 165 Uri_HOST_TYPE host_type; 166 167 BOOL has_ipv6; 168 ipv6_address ipv6_address; 169 170 BOOL has_port; 171 const WCHAR *port; 172 DWORD port_len; 173 DWORD port_value; 174 175 const WCHAR *path; 176 DWORD path_len; 177 178 const WCHAR *query; 179 DWORD query_len; 180 181 const WCHAR *fragment; 182 DWORD fragment_len; 183 } parse_data; 184 185 static const CHAR hexDigits[] = "0123456789ABCDEF"; 186 187 /* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */ 188 static const struct { 189 URL_SCHEME scheme; 190 WCHAR scheme_name[16]; 191 } recognized_schemes[] = { 192 {URL_SCHEME_FTP, {'f','t','p',0}}, 193 {URL_SCHEME_HTTP, {'h','t','t','p',0}}, 194 {URL_SCHEME_GOPHER, {'g','o','p','h','e','r',0}}, 195 {URL_SCHEME_MAILTO, {'m','a','i','l','t','o',0}}, 196 {URL_SCHEME_NEWS, {'n','e','w','s',0}}, 197 {URL_SCHEME_NNTP, {'n','n','t','p',0}}, 198 {URL_SCHEME_TELNET, {'t','e','l','n','e','t',0}}, 199 {URL_SCHEME_WAIS, {'w','a','i','s',0}}, 200 {URL_SCHEME_FILE, {'f','i','l','e',0}}, 201 {URL_SCHEME_MK, {'m','k',0}}, 202 {URL_SCHEME_HTTPS, {'h','t','t','p','s',0}}, 203 {URL_SCHEME_SHELL, {'s','h','e','l','l',0}}, 204 {URL_SCHEME_SNEWS, {'s','n','e','w','s',0}}, 205 {URL_SCHEME_LOCAL, {'l','o','c','a','l',0}}, 206 {URL_SCHEME_JAVASCRIPT, {'j','a','v','a','s','c','r','i','p','t',0}}, 207 {URL_SCHEME_VBSCRIPT, {'v','b','s','c','r','i','p','t',0}}, 208 {URL_SCHEME_ABOUT, {'a','b','o','u','t',0}}, 209 {URL_SCHEME_RES, {'r','e','s',0}}, 210 {URL_SCHEME_MSSHELLROOTED, {'m','s','-','s','h','e','l','l','-','r','o','o','t','e','d',0}}, 211 {URL_SCHEME_MSSHELLIDLIST, {'m','s','-','s','h','e','l','l','-','i','d','l','i','s','t',0}}, 212 {URL_SCHEME_MSHELP, {'h','c','p',0}}, 213 {URL_SCHEME_WILDCARD, {'*',0}} 214 }; 215 216 /* List of default ports Windows recognizes. */ 217 static const struct { 218 URL_SCHEME scheme; 219 USHORT port; 220 } default_ports[] = { 221 {URL_SCHEME_FTP, 21}, 222 {URL_SCHEME_HTTP, 80}, 223 {URL_SCHEME_GOPHER, 70}, 224 {URL_SCHEME_NNTP, 119}, 225 {URL_SCHEME_TELNET, 23}, 226 {URL_SCHEME_WAIS, 210}, 227 {URL_SCHEME_HTTPS, 443}, 228 }; 229 230 /* List of 3-character top level domain names Windows seems to recognize. 231 * There might be more, but, these are the only ones I've found so far. 232 */ 233 static const struct { 234 WCHAR tld_name[4]; 235 } recognized_tlds[] = { 236 {{'c','o','m',0}}, 237 {{'e','d','u',0}}, 238 {{'g','o','v',0}}, 239 {{'i','n','t',0}}, 240 {{'m','i','l',0}}, 241 {{'n','e','t',0}}, 242 {{'o','r','g',0}} 243 }; 244 245 static Uri *get_uri_obj(IUri *uri) 246 { 247 Uri *ret; 248 HRESULT hres; 249 250 hres = IUri_QueryInterface(uri, &IID_IUriObj, (void**)&ret); 251 return SUCCEEDED(hres) ? ret : NULL; 252 } 253 254 static inline BOOL is_alpha(WCHAR val) { 255 return ((val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z')); 256 } 257 258 static inline BOOL is_num(WCHAR val) { 259 return (val >= '0' && val <= '9'); 260 } 261 262 static inline BOOL is_drive_path(const WCHAR *str) { 263 return (is_alpha(str[0]) && (str[1] == ':' || str[1] == '|')); 264 } 265 266 static inline BOOL is_unc_path(const WCHAR *str) { 267 return (str[0] == '\\' && str[1] == '\\'); 268 } 269 270 static inline BOOL is_forbidden_dos_path_char(WCHAR val) { 271 return (val == '>' || val == '<' || val == '\"'); 272 } 273 274 /* A URI is implicitly a file path if it begins with 275 * a drive letter (e.g. X:) or starts with "\\" (UNC path). 276 */ 277 static inline BOOL is_implicit_file_path(const WCHAR *str) { 278 return (is_unc_path(str) || (is_alpha(str[0]) && str[1] == ':')); 279 } 280 281 /* Checks if the URI is a hierarchical URI. A hierarchical 282 * URI is one that has "//" after the scheme. 283 */ 284 static BOOL check_hierarchical(const WCHAR **ptr) { 285 const WCHAR *start = *ptr; 286 287 if(**ptr != '/') 288 return FALSE; 289 290 ++(*ptr); 291 if(**ptr != '/') { 292 *ptr = start; 293 return FALSE; 294 } 295 296 ++(*ptr); 297 return TRUE; 298 } 299 300 /* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" */ 301 static inline BOOL is_unreserved(WCHAR val) { 302 return (is_alpha(val) || is_num(val) || val == '-' || val == '.' || 303 val == '_' || val == '~'); 304 } 305 306 /* sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 307 * / "*" / "+" / "," / ";" / "=" 308 */ 309 static inline BOOL is_subdelim(WCHAR val) { 310 return (val == '!' || val == '$' || val == '&' || 311 val == '\'' || val == '(' || val == ')' || 312 val == '*' || val == '+' || val == ',' || 313 val == ';' || val == '='); 314 } 315 316 /* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" */ 317 static inline BOOL is_gendelim(WCHAR val) { 318 return (val == ':' || val == '/' || val == '?' || 319 val == '#' || val == '[' || val == ']' || 320 val == '@'); 321 } 322 323 /* Characters that delimit the end of the authority 324 * section of a URI. Sometimes a '\\' is considered 325 * an authority delimiter. 326 */ 327 static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) { 328 return (val == '#' || val == '/' || val == '?' || 329 val == '\0' || (acceptSlash && val == '\\')); 330 } 331 332 /* reserved = gen-delims / sub-delims */ 333 static inline BOOL is_reserved(WCHAR val) { 334 return (is_subdelim(val) || is_gendelim(val)); 335 } 336 337 static inline BOOL is_hexdigit(WCHAR val) { 338 return ((val >= 'a' && val <= 'f') || 339 (val >= 'A' && val <= 'F') || 340 (val >= '0' && val <= '9')); 341 } 342 343 static inline BOOL is_path_delim(URL_SCHEME scheme, WCHAR val) { 344 return (!val || (val == '#' && scheme != URL_SCHEME_FILE) || val == '?'); 345 } 346 347 static inline BOOL is_slash(WCHAR c) 348 { 349 return c == '/' || c == '\\'; 350 } 351 352 static BOOL is_default_port(URL_SCHEME scheme, DWORD port) { 353 DWORD i; 354 355 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) { 356 if(default_ports[i].scheme == scheme && default_ports[i].port) 357 return TRUE; 358 } 359 360 return FALSE; 361 } 362 363 /* List of schemes types Windows seems to expect to be hierarchical. */ 364 static inline BOOL is_hierarchical_scheme(URL_SCHEME type) { 365 return(type == URL_SCHEME_HTTP || type == URL_SCHEME_FTP || 366 type == URL_SCHEME_GOPHER || type == URL_SCHEME_NNTP || 367 type == URL_SCHEME_TELNET || type == URL_SCHEME_WAIS || 368 type == URL_SCHEME_FILE || type == URL_SCHEME_HTTPS || 369 type == URL_SCHEME_RES); 370 } 371 372 /* Checks if 'flags' contains an invalid combination of Uri_CREATE flags. */ 373 static inline BOOL has_invalid_flag_combination(DWORD flags) { 374 return((flags & Uri_CREATE_DECODE_EXTRA_INFO && flags & Uri_CREATE_NO_DECODE_EXTRA_INFO) || 375 (flags & Uri_CREATE_CANONICALIZE && flags & Uri_CREATE_NO_CANONICALIZE) || 376 (flags & Uri_CREATE_CRACK_UNKNOWN_SCHEMES && flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES) || 377 (flags & Uri_CREATE_PRE_PROCESS_HTML_URI && flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI) || 378 (flags & Uri_CREATE_IE_SETTINGS && flags & Uri_CREATE_NO_IE_SETTINGS)); 379 } 380 381 /* Applies each default Uri_CREATE flags to 'flags' if it 382 * doesn't cause a flag conflict. 383 */ 384 static void apply_default_flags(DWORD *flags) { 385 if(!(*flags & Uri_CREATE_NO_CANONICALIZE)) 386 *flags |= Uri_CREATE_CANONICALIZE; 387 if(!(*flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) 388 *flags |= Uri_CREATE_DECODE_EXTRA_INFO; 389 if(!(*flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) 390 *flags |= Uri_CREATE_CRACK_UNKNOWN_SCHEMES; 391 if(!(*flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI)) 392 *flags |= Uri_CREATE_PRE_PROCESS_HTML_URI; 393 if(!(*flags & Uri_CREATE_IE_SETTINGS)) 394 *flags |= Uri_CREATE_NO_IE_SETTINGS; 395 } 396 397 /* Determines if the URI is hierarchical using the information already parsed into 398 * data and using the current location of parsing in the URI string. 399 * 400 * Windows considers a URI hierarchical if one of the following is true: 401 * A.) It's a wildcard scheme. 402 * B.) It's an implicit file scheme. 403 * C.) It's a known hierarchical scheme and it has two '\\' after the scheme name. 404 * (the '\\' will be converted into "//" during canonicalization). 405 * D.) "//" appears after the scheme name (or at the beginning if no scheme is given). 406 */ 407 static inline BOOL is_hierarchical_uri(const WCHAR **ptr, const parse_data *data) { 408 const WCHAR *start = *ptr; 409 410 if(data->scheme_type == URL_SCHEME_WILDCARD) 411 return TRUE; 412 else if(data->scheme_type == URL_SCHEME_FILE && data->has_implicit_scheme) 413 return TRUE; 414 else if(is_hierarchical_scheme(data->scheme_type) && (*ptr)[0] == '\\' && (*ptr)[1] == '\\') { 415 *ptr += 2; 416 return TRUE; 417 } else if(data->scheme_type != URL_SCHEME_MAILTO && check_hierarchical(ptr)) 418 return TRUE; 419 420 *ptr = start; 421 return FALSE; 422 } 423 424 /* Computes the size of the given IPv6 address. 425 * Each h16 component is 16 bits. If there is an IPv4 address, it's 426 * 32 bits. If there's an elision it can be 16 to 128 bits, depending 427 * on the number of other components. 428 * 429 * Modeled after google-url's CheckIPv6ComponentsSize function 430 */ 431 static void compute_ipv6_comps_size(ipv6_address *address) { 432 address->components_size = address->h16_count * 2; 433 434 if(address->ipv4) 435 /* IPv4 address is 4 bytes. */ 436 address->components_size += 4; 437 438 if(address->elision) { 439 /* An elision can be anywhere from 2 bytes up to 16 bytes. 440 * Its size depends on the size of the h16 and IPv4 components. 441 */ 442 address->elision_size = 16 - address->components_size; 443 if(address->elision_size < 2) 444 address->elision_size = 2; 445 } else 446 address->elision_size = 0; 447 } 448 449 /* Taken from dlls/jscript/lex.c */ 450 static int hex_to_int(WCHAR val) { 451 if(val >= '0' && val <= '9') 452 return val - '0'; 453 else if(val >= 'a' && val <= 'f') 454 return val - 'a' + 10; 455 else if(val >= 'A' && val <= 'F') 456 return val - 'A' + 10; 457 458 return -1; 459 } 460 461 /* Helper function for converting a percent encoded string 462 * representation of a WCHAR value into its actual WCHAR value. If 463 * the two characters following the '%' aren't valid hex values then 464 * this function returns the NULL character. 465 * 466 * E.g. 467 * "%2E" will result in '.' being returned by this function. 468 */ 469 static WCHAR decode_pct_val(const WCHAR *ptr) { 470 WCHAR ret = '\0'; 471 472 if(*ptr == '%' && is_hexdigit(*(ptr + 1)) && is_hexdigit(*(ptr + 2))) { 473 INT a = hex_to_int(*(ptr + 1)); 474 INT b = hex_to_int(*(ptr + 2)); 475 476 ret = a << 4; 477 ret += b; 478 } 479 480 return ret; 481 } 482 483 /* Helper function for percent encoding a given character 484 * and storing the encoded value into a given buffer (dest). 485 * 486 * It's up to the calling function to ensure that there is 487 * at least enough space in 'dest' for the percent encoded 488 * value to be stored (so dest + 3 spaces available). 489 */ 490 static inline void pct_encode_val(WCHAR val, WCHAR *dest) { 491 dest[0] = '%'; 492 dest[1] = hexDigits[(val >> 4) & 0xf]; 493 dest[2] = hexDigits[val & 0xf]; 494 } 495 496 /* Attempts to parse the domain name from the host. 497 * 498 * This function also includes the Top-level Domain (TLD) name 499 * of the host when it tries to find the domain name. If it finds 500 * a valid domain name it will assign 'domain_start' the offset 501 * into 'host' where the domain name starts. 502 * 503 * It's implied that if there is a domain name its range is: 504 * [host+domain_start, host+host_len). 505 */ 506 void find_domain_name(const WCHAR *host, DWORD host_len, 507 INT *domain_start) { 508 const WCHAR *last_tld, *sec_last_tld, *end; 509 510 end = host+host_len-1; 511 512 *domain_start = -1; 513 514 /* There has to be at least enough room for a '.' followed by a 515 * 3-character TLD for a domain to even exist in the host name. 516 */ 517 if(host_len < 4) 518 return; 519 520 last_tld = memrchrW(host, '.', host_len); 521 if(!last_tld) 522 /* http://hostname -> has no domain name. */ 523 return; 524 525 sec_last_tld = memrchrW(host, '.', last_tld-host); 526 if(!sec_last_tld) { 527 /* If the '.' is at the beginning of the host there 528 * has to be at least 3 characters in the TLD for it 529 * to be valid. 530 * Ex: .com -> .com as the domain name. 531 * .co -> has no domain name. 532 */ 533 if(last_tld-host == 0) { 534 if(end-(last_tld-1) < 3) 535 return; 536 } else if(last_tld-host == 3) { 537 DWORD i; 538 539 /* If there are three characters in front of last_tld and 540 * they are on the list of recognized TLDs, then this 541 * host doesn't have a domain (since the host only contains 542 * a TLD name. 543 * Ex: edu.uk -> has no domain name. 544 * foo.uk -> foo.uk as the domain name. 545 */ 546 for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) { 547 if(!StrCmpNIW(host, recognized_tlds[i].tld_name, 3)) 548 return; 549 } 550 } else if(last_tld-host < 3) 551 /* Anything less than 3 characters is considered part 552 * of the TLD name. 553 * Ex: ak.uk -> Has no domain name. 554 */ 555 return; 556 557 /* Otherwise the domain name is the whole host name. */ 558 *domain_start = 0; 559 } else if(end+1-last_tld > 3) { 560 /* If the last_tld has more than 3 characters, then it's automatically 561 * considered the TLD of the domain name. 562 * Ex: www.winehq.org.uk.test -> uk.test as the domain name. 563 */ 564 *domain_start = (sec_last_tld+1)-host; 565 } else if(last_tld - (sec_last_tld+1) < 4) { 566 DWORD i; 567 /* If the sec_last_tld is 3 characters long it HAS to be on the list of 568 * recognized to still be considered part of the TLD name, otherwise 569 * it's considered the domain name. 570 * Ex: www.google.com.uk -> google.com.uk as the domain name. 571 * www.google.foo.uk -> foo.uk as the domain name. 572 */ 573 if(last_tld - (sec_last_tld+1) == 3) { 574 for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) { 575 if(!StrCmpNIW(sec_last_tld+1, recognized_tlds[i].tld_name, 3)) { 576 const WCHAR *domain = memrchrW(host, '.', sec_last_tld-host); 577 578 if(!domain) 579 *domain_start = 0; 580 else 581 *domain_start = (domain+1) - host; 582 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start, 583 (host+host_len)-(host+*domain_start))); 584 return; 585 } 586 } 587 588 *domain_start = (sec_last_tld+1)-host; 589 } else { 590 /* Since the sec_last_tld is less than 3 characters it's considered 591 * part of the TLD. 592 * Ex: www.google.fo.uk -> google.fo.uk as the domain name. 593 */ 594 const WCHAR *domain = memrchrW(host, '.', sec_last_tld-host); 595 596 if(!domain) 597 *domain_start = 0; 598 else 599 *domain_start = (domain+1) - host; 600 } 601 } else { 602 /* The second to last TLD has more than 3 characters making it 603 * the domain name. 604 * Ex: www.google.test.us -> test.us as the domain name. 605 */ 606 *domain_start = (sec_last_tld+1)-host; 607 } 608 609 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start, 610 (host+host_len)-(host+*domain_start))); 611 } 612 613 /* Removes the dot segments from a hierarchical URIs path component. This 614 * function performs the removal in place. 615 * 616 * This function returns the new length of the path string. 617 */ 618 static DWORD remove_dot_segments(WCHAR *path, DWORD path_len) { 619 WCHAR *out = path; 620 const WCHAR *in = out; 621 const WCHAR *end = out + path_len; 622 DWORD len; 623 624 while(in < end) { 625 /* Move the first path segment in the input buffer to the end of 626 * the output buffer, and any subsequent characters up to, including 627 * the next "/" character (if any) or the end of the input buffer. 628 */ 629 while(in < end && !is_slash(*in)) 630 *out++ = *in++; 631 if(in == end) 632 break; 633 *out++ = *in++; 634 635 while(in < end) { 636 if(*in != '.') 637 break; 638 639 /* Handle ending "/." */ 640 if(in + 1 == end) { 641 ++in; 642 break; 643 } 644 645 /* Handle "/./" */ 646 if(is_slash(in[1])) { 647 in += 2; 648 continue; 649 } 650 651 /* If we don't have "/../" or ending "/.." */ 652 if(in[1] != '.' || (in + 2 != end && !is_slash(in[2]))) 653 break; 654 655 /* Find the slash preceding out pointer and move out pointer to it */ 656 if(out > path+1 && is_slash(*--out)) 657 --out; 658 while(out > path && !is_slash(*(--out))); 659 if(is_slash(*out)) 660 ++out; 661 in += 2; 662 if(in != end) 663 ++in; 664 } 665 } 666 667 len = out - path; 668 TRACE("(%p %d): Path after dot segments removed %s len=%d\n", path, path_len, 669 debugstr_wn(path, len), len); 670 return len; 671 } 672 673 /* Attempts to find the file extension in a given path. */ 674 static INT find_file_extension(const WCHAR *path, DWORD path_len) { 675 const WCHAR *end; 676 677 for(end = path+path_len-1; end >= path && *end != '/' && *end != '\\'; --end) { 678 if(*end == '.') 679 return end-path; 680 } 681 682 return -1; 683 } 684 685 /* Computes the location where the elision should occur in the IPv6 686 * address using the numerical values of each component stored in 687 * 'values'. If the address shouldn't contain an elision then 'index' 688 * is assigned -1 as its value. Otherwise 'index' will contain the 689 * starting index (into values) where the elision should be, and 'count' 690 * will contain the number of cells the elision covers. 691 * 692 * NOTES: 693 * Windows will expand an elision if the elision only represents one h16 694 * component of the address. 695 * 696 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7] 697 * 698 * If the IPv6 address contains an IPv4 address, the IPv4 address is also 699 * considered for being included as part of an elision if all its components 700 * are zeros. 701 * 702 * Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::] 703 */ 704 static void compute_elision_location(const ipv6_address *address, const USHORT values[8], 705 INT *index, DWORD *count) { 706 DWORD i, max_len, cur_len; 707 INT max_index, cur_index; 708 709 max_len = cur_len = 0; 710 max_index = cur_index = -1; 711 for(i = 0; i < 8; ++i) { 712 BOOL check_ipv4 = (address->ipv4 && i == 6); 713 BOOL is_end = (check_ipv4 || i == 7); 714 715 if(check_ipv4) { 716 /* Check if the IPv4 address contains only zeros. */ 717 if(values[i] == 0 && values[i+1] == 0) { 718 if(cur_index == -1) 719 cur_index = i; 720 721 cur_len += 2; 722 ++i; 723 } 724 } else if(values[i] == 0) { 725 if(cur_index == -1) 726 cur_index = i; 727 728 ++cur_len; 729 } 730 731 if(is_end || values[i] != 0) { 732 /* We only consider it for an elision if it's 733 * more than 1 component long. 734 */ 735 if(cur_len > 1 && cur_len > max_len) { 736 /* Found the new elision location. */ 737 max_len = cur_len; 738 max_index = cur_index; 739 } 740 741 /* Reset the current range for the next range of zeros. */ 742 cur_index = -1; 743 cur_len = 0; 744 } 745 } 746 747 *index = max_index; 748 *count = max_len; 749 } 750 751 /* Removes all the leading and trailing white spaces or 752 * control characters from the URI and removes all control 753 * characters inside of the URI string. 754 */ 755 static BSTR pre_process_uri(LPCWSTR uri) { 756 const WCHAR *start, *end, *ptr; 757 WCHAR *ptr2; 758 DWORD len; 759 BSTR ret; 760 761 start = uri; 762 /* Skip leading controls and whitespace. */ 763 while(*start && (iscntrlW(*start) || isspaceW(*start))) ++start; 764 765 /* URI consisted only of control/whitespace. */ 766 if(!*start) 767 return SysAllocStringLen(NULL, 0); 768 769 end = start + strlenW(start); 770 while(--end > start && (iscntrlW(*end) || isspaceW(*end))); 771 772 len = ++end - start; 773 for(ptr = start; ptr < end; ptr++) { 774 if(iscntrlW(*ptr)) 775 len--; 776 } 777 778 ret = SysAllocStringLen(NULL, len); 779 if(!ret) 780 return NULL; 781 782 for(ptr = start, ptr2=ret; ptr < end; ptr++) { 783 if(!iscntrlW(*ptr)) 784 *ptr2++ = *ptr; 785 } 786 787 return ret; 788 } 789 790 /* Converts the specified IPv4 address into an uint value. 791 * 792 * This function assumes that the IPv4 address has already been validated. 793 */ 794 static UINT ipv4toui(const WCHAR *ip, DWORD len) { 795 UINT ret = 0; 796 DWORD comp_value = 0; 797 const WCHAR *ptr; 798 799 for(ptr = ip; ptr < ip+len; ++ptr) { 800 if(*ptr == '.') { 801 ret <<= 8; 802 ret += comp_value; 803 comp_value = 0; 804 } else 805 comp_value = comp_value*10 + (*ptr-'0'); 806 } 807 808 ret <<= 8; 809 ret += comp_value; 810 811 return ret; 812 } 813 814 /* Converts an IPv4 address in numerical form into its fully qualified 815 * string form. This function returns the number of characters written 816 * to 'dest'. If 'dest' is NULL this function will return the number of 817 * characters that would have been written. 818 * 819 * It's up to the caller to ensure there's enough space in 'dest' for the 820 * address. 821 */ 822 static DWORD ui2ipv4(WCHAR *dest, UINT address) { 823 static const WCHAR formatW[] = 824 {'%','u','.','%','u','.','%','u','.','%','u',0}; 825 DWORD ret = 0; 826 UCHAR digits[4]; 827 828 digits[0] = (address >> 24) & 0xff; 829 digits[1] = (address >> 16) & 0xff; 830 digits[2] = (address >> 8) & 0xff; 831 digits[3] = address & 0xff; 832 833 if(!dest) { 834 WCHAR tmp[16]; 835 ret = sprintfW(tmp, formatW, digits[0], digits[1], digits[2], digits[3]); 836 } else 837 ret = sprintfW(dest, formatW, digits[0], digits[1], digits[2], digits[3]); 838 839 return ret; 840 } 841 842 static DWORD ui2str(WCHAR *dest, UINT value) { 843 static const WCHAR formatW[] = {'%','u',0}; 844 DWORD ret = 0; 845 846 if(!dest) { 847 WCHAR tmp[11]; 848 ret = sprintfW(tmp, formatW, value); 849 } else 850 ret = sprintfW(dest, formatW, value); 851 852 return ret; 853 } 854 855 /* Converts a h16 component (from an IPv6 address) into its 856 * numerical value. 857 * 858 * This function assumes that the h16 component has already been validated. 859 */ 860 static USHORT h16tous(h16 component) { 861 DWORD i; 862 USHORT ret = 0; 863 864 for(i = 0; i < component.len; ++i) { 865 ret <<= 4; 866 ret += hex_to_int(component.str[i]); 867 } 868 869 return ret; 870 } 871 872 /* Converts an IPv6 address into its 128 bits (16 bytes) numerical value. 873 * 874 * This function assumes that the ipv6_address has already been validated. 875 */ 876 static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) { 877 DWORD i, cur_component = 0; 878 BOOL already_passed_elision = FALSE; 879 880 for(i = 0; i < address->h16_count; ++i) { 881 if(address->elision) { 882 if(address->components[i].str > address->elision && !already_passed_elision) { 883 /* Means we just passed the elision and need to add its values to 884 * 'number' before we do anything else. 885 */ 886 INT j; 887 for(j = 0; j < address->elision_size; j+=2) 888 number[cur_component++] = 0; 889 890 already_passed_elision = TRUE; 891 } 892 } 893 894 number[cur_component++] = h16tous(address->components[i]); 895 } 896 897 /* Case when the elision appears after the h16 components. */ 898 if(!already_passed_elision && address->elision) { 899 INT j; 900 for(j = 0; j < address->elision_size; j+=2) 901 number[cur_component++] = 0; 902 } 903 904 if(address->ipv4) { 905 UINT value = ipv4toui(address->ipv4, address->ipv4_len); 906 907 if(cur_component != 6) { 908 ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component); 909 return FALSE; 910 } 911 912 number[cur_component++] = (value >> 16) & 0xffff; 913 number[cur_component] = value & 0xffff; 914 } 915 916 return TRUE; 917 } 918 919 /* Checks if the characters pointed to by 'ptr' are 920 * a percent encoded data octet. 921 * 922 * pct-encoded = "%" HEXDIG HEXDIG 923 */ 924 static BOOL check_pct_encoded(const WCHAR **ptr) { 925 const WCHAR *start = *ptr; 926 927 if(**ptr != '%') 928 return FALSE; 929 930 ++(*ptr); 931 if(!is_hexdigit(**ptr)) { 932 *ptr = start; 933 return FALSE; 934 } 935 936 ++(*ptr); 937 if(!is_hexdigit(**ptr)) { 938 *ptr = start; 939 return FALSE; 940 } 941 942 ++(*ptr); 943 return TRUE; 944 } 945 946 /* dec-octet = DIGIT ; 0-9 947 * / %x31-39 DIGIT ; 10-99 948 * / "1" 2DIGIT ; 100-199 949 * / "2" %x30-34 DIGIT ; 200-249 950 * / "25" %x30-35 ; 250-255 951 */ 952 static BOOL check_dec_octet(const WCHAR **ptr) { 953 const WCHAR *c1, *c2, *c3; 954 955 c1 = *ptr; 956 /* A dec-octet must be at least 1 digit long. */ 957 if(*c1 < '0' || *c1 > '9') 958 return FALSE; 959 960 ++(*ptr); 961 962 c2 = *ptr; 963 /* Since the 1-digit requirement was met, it doesn't 964 * matter if this is a DIGIT value, it's considered a 965 * dec-octet. 966 */ 967 if(*c2 < '0' || *c2 > '9') 968 return TRUE; 969 970 ++(*ptr); 971 972 c3 = *ptr; 973 /* Same explanation as above. */ 974 if(*c3 < '0' || *c3 > '9') 975 return TRUE; 976 977 /* Anything > 255 isn't a valid IP dec-octet. */ 978 if(*c1 >= '2' && *c2 >= '5' && *c3 >= '5') { 979 *ptr = c1; 980 return FALSE; 981 } 982 983 ++(*ptr); 984 return TRUE; 985 } 986 987 /* Checks if there is an implicit IPv4 address in the host component of the URI. 988 * The max value of an implicit IPv4 address is UINT_MAX. 989 * 990 * Ex: 991 * "234567" would be considered an implicit IPv4 address. 992 */ 993 static BOOL check_implicit_ipv4(const WCHAR **ptr, UINT *val) { 994 const WCHAR *start = *ptr; 995 ULONGLONG ret = 0; 996 *val = 0; 997 998 while(is_num(**ptr)) { 999 ret = ret*10 + (**ptr - '0'); 1000 1001 if(ret > UINT_MAX) { 1002 *ptr = start; 1003 return FALSE; 1004 } 1005 ++(*ptr); 1006 } 1007 1008 if(*ptr == start) 1009 return FALSE; 1010 1011 *val = ret; 1012 return TRUE; 1013 } 1014 1015 /* Checks if the string contains an IPv4 address. 1016 * 1017 * This function has a strict mode or a non-strict mode of operation 1018 * When 'strict' is set to FALSE this function will return TRUE if 1019 * the string contains at least 'dec-octet "." dec-octet' since partial 1020 * IPv4 addresses will be normalized out into full IPv4 addresses. When 1021 * 'strict' is set this function expects there to be a full IPv4 address. 1022 * 1023 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 1024 */ 1025 static BOOL check_ipv4address(const WCHAR **ptr, BOOL strict) { 1026 const WCHAR *start = *ptr; 1027 1028 if(!check_dec_octet(ptr)) { 1029 *ptr = start; 1030 return FALSE; 1031 } 1032 1033 if(**ptr != '.') { 1034 *ptr = start; 1035 return FALSE; 1036 } 1037 1038 ++(*ptr); 1039 if(!check_dec_octet(ptr)) { 1040 *ptr = start; 1041 return FALSE; 1042 } 1043 1044 if(**ptr != '.') { 1045 if(strict) { 1046 *ptr = start; 1047 return FALSE; 1048 } else 1049 return TRUE; 1050 } 1051 1052 ++(*ptr); 1053 if(!check_dec_octet(ptr)) { 1054 *ptr = start; 1055 return FALSE; 1056 } 1057 1058 if(**ptr != '.') { 1059 if(strict) { 1060 *ptr = start; 1061 return FALSE; 1062 } else 1063 return TRUE; 1064 } 1065 1066 ++(*ptr); 1067 if(!check_dec_octet(ptr)) { 1068 *ptr = start; 1069 return FALSE; 1070 } 1071 1072 /* Found a four digit ip address. */ 1073 return TRUE; 1074 } 1075 /* Tries to parse the scheme name of the URI. 1076 * 1077 * scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896. 1078 * NOTE: Windows accepts a number as the first character of a scheme. 1079 */ 1080 static BOOL parse_scheme_name(const WCHAR **ptr, parse_data *data, DWORD extras) { 1081 const WCHAR *start = *ptr; 1082 1083 data->scheme = NULL; 1084 data->scheme_len = 0; 1085 1086 while(**ptr) { 1087 if(**ptr == '*' && *ptr == start) { 1088 /* Might have found a wildcard scheme. If it is the next 1089 * char has to be a ':' for it to be a valid URI 1090 */ 1091 ++(*ptr); 1092 break; 1093 } else if(!is_num(**ptr) && !is_alpha(**ptr) && **ptr != '+' && 1094 **ptr != '-' && **ptr != '.') 1095 break; 1096 1097 (*ptr)++; 1098 } 1099 1100 if(*ptr == start) 1101 return FALSE; 1102 1103 /* Schemes must end with a ':' */ 1104 if(**ptr != ':' && !((extras & ALLOW_NULL_TERM_SCHEME) && !**ptr)) { 1105 *ptr = start; 1106 return FALSE; 1107 } 1108 1109 data->scheme = start; 1110 data->scheme_len = *ptr - start; 1111 1112 ++(*ptr); 1113 return TRUE; 1114 } 1115 1116 /* Tries to deduce the corresponding URL_SCHEME for the given URI. Stores 1117 * the deduced URL_SCHEME in data->scheme_type. 1118 */ 1119 static BOOL parse_scheme_type(parse_data *data) { 1120 /* If there's scheme data then see if it's a recognized scheme. */ 1121 if(data->scheme && data->scheme_len) { 1122 DWORD i; 1123 1124 for(i = 0; i < sizeof(recognized_schemes)/sizeof(recognized_schemes[0]); ++i) { 1125 if(lstrlenW(recognized_schemes[i].scheme_name) == data->scheme_len) { 1126 /* Has to be a case insensitive compare. */ 1127 if(!StrCmpNIW(recognized_schemes[i].scheme_name, data->scheme, data->scheme_len)) { 1128 data->scheme_type = recognized_schemes[i].scheme; 1129 return TRUE; 1130 } 1131 } 1132 } 1133 1134 /* If we get here it means it's not a recognized scheme. */ 1135 data->scheme_type = URL_SCHEME_UNKNOWN; 1136 return TRUE; 1137 } else if(data->is_relative) { 1138 /* Relative URI's have no scheme. */ 1139 data->scheme_type = URL_SCHEME_UNKNOWN; 1140 return TRUE; 1141 } else { 1142 /* Should never reach here! what happened... */ 1143 FIXME("(%p): Unable to determine scheme type for URI %s\n", data, debugstr_w(data->uri)); 1144 return FALSE; 1145 } 1146 } 1147 1148 /* Tries to parse (or deduce) the scheme_name of a URI. If it can't 1149 * parse a scheme from the URI it will try to deduce the scheme_name and scheme_type 1150 * using the flags specified in 'flags' (if any). Flags that affect how this function 1151 * operates are the Uri_CREATE_ALLOW_* flags. 1152 * 1153 * All parsed/deduced information will be stored in 'data' when the function returns. 1154 * 1155 * Returns TRUE if it was able to successfully parse the information. 1156 */ 1157 static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1158 static const WCHAR fileW[] = {'f','i','l','e',0}; 1159 static const WCHAR wildcardW[] = {'*',0}; 1160 1161 /* First check to see if the uri could implicitly be a file path. */ 1162 if(is_implicit_file_path(*ptr)) { 1163 if(flags & Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME) { 1164 data->scheme = fileW; 1165 data->scheme_len = lstrlenW(fileW); 1166 data->has_implicit_scheme = TRUE; 1167 1168 TRACE("(%p %p %x): URI is an implicit file path.\n", ptr, data, flags); 1169 } else { 1170 /* Windows does not consider anything that can implicitly be a file 1171 * path to be a valid URI if the ALLOW_IMPLICIT_FILE_SCHEME flag is not set... 1172 */ 1173 TRACE("(%p %p %x): URI is implicitly a file path, but, the ALLOW_IMPLICIT_FILE_SCHEME flag wasn't set.\n", 1174 ptr, data, flags); 1175 return FALSE; 1176 } 1177 } else if(!parse_scheme_name(ptr, data, extras)) { 1178 /* No scheme was found, this means it could be: 1179 * a) an implicit Wildcard scheme 1180 * b) a relative URI 1181 * c) an invalid URI. 1182 */ 1183 if(flags & Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME) { 1184 data->scheme = wildcardW; 1185 data->scheme_len = lstrlenW(wildcardW); 1186 data->has_implicit_scheme = TRUE; 1187 1188 TRACE("(%p %p %x): URI is an implicit wildcard scheme.\n", ptr, data, flags); 1189 } else if (flags & Uri_CREATE_ALLOW_RELATIVE) { 1190 data->is_relative = TRUE; 1191 TRACE("(%p %p %x): URI is relative.\n", ptr, data, flags); 1192 } else { 1193 TRACE("(%p %p %x): Malformed URI found. Unable to deduce scheme name.\n", ptr, data, flags); 1194 return FALSE; 1195 } 1196 } 1197 1198 if(!data->is_relative) 1199 TRACE("(%p %p %x): Found scheme=%s scheme_len=%d\n", ptr, data, flags, 1200 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len); 1201 1202 if(!parse_scheme_type(data)) 1203 return FALSE; 1204 1205 TRACE("(%p %p %x): Assigned %d as the URL_SCHEME.\n", ptr, data, flags, data->scheme_type); 1206 return TRUE; 1207 } 1208 1209 static BOOL parse_username(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1210 data->username = *ptr; 1211 1212 while(**ptr != ':' && **ptr != '@') { 1213 if(**ptr == '%') { 1214 if(!check_pct_encoded(ptr)) { 1215 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 1216 *ptr = data->username; 1217 data->username = NULL; 1218 return FALSE; 1219 } 1220 } else 1221 continue; 1222 } else if(extras & ALLOW_NULL_TERM_USER_NAME && !**ptr) 1223 break; 1224 else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 1225 *ptr = data->username; 1226 data->username = NULL; 1227 return FALSE; 1228 } 1229 1230 ++(*ptr); 1231 } 1232 1233 data->username_len = *ptr - data->username; 1234 return TRUE; 1235 } 1236 1237 static BOOL parse_password(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1238 data->password = *ptr; 1239 1240 while(**ptr != '@') { 1241 if(**ptr == '%') { 1242 if(!check_pct_encoded(ptr)) { 1243 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 1244 *ptr = data->password; 1245 data->password = NULL; 1246 return FALSE; 1247 } 1248 } else 1249 continue; 1250 } else if(extras & ALLOW_NULL_TERM_PASSWORD && !**ptr) 1251 break; 1252 else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 1253 *ptr = data->password; 1254 data->password = NULL; 1255 return FALSE; 1256 } 1257 1258 ++(*ptr); 1259 } 1260 1261 data->password_len = *ptr - data->password; 1262 return TRUE; 1263 } 1264 1265 /* Parses the userinfo part of the URI (if it exists). The userinfo field of 1266 * a URI can consist of "username:password@", or just "username@". 1267 * 1268 * RFC def: 1269 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 1270 * 1271 * NOTES: 1272 * 1) If there is more than one ':' in the userinfo part of the URI Windows 1273 * uses the first occurrence of ':' to delimit the username and password 1274 * components. 1275 * 1276 * ex: 1277 * ftp://user:pass:word@winehq.org 1278 * 1279 * would yield "user" as the username and "pass:word" as the password. 1280 * 1281 * 2) Windows allows any character to appear in the "userinfo" part of 1282 * a URI, as long as it's not an authority delimiter character set. 1283 */ 1284 static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) { 1285 const WCHAR *start = *ptr; 1286 1287 if(!parse_username(ptr, data, flags, 0)) { 1288 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 1289 return; 1290 } 1291 1292 if(**ptr == ':') { 1293 ++(*ptr); 1294 if(!parse_password(ptr, data, flags, 0)) { 1295 *ptr = start; 1296 data->username = NULL; 1297 data->username_len = 0; 1298 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 1299 return; 1300 } 1301 } 1302 1303 if(**ptr != '@') { 1304 *ptr = start; 1305 data->username = NULL; 1306 data->username_len = 0; 1307 data->password = NULL; 1308 data->password_len = 0; 1309 1310 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 1311 return; 1312 } 1313 1314 if(data->username) 1315 TRACE("(%p %p %x): Found username %s len=%d.\n", ptr, data, flags, 1316 debugstr_wn(data->username, data->username_len), data->username_len); 1317 1318 if(data->password) 1319 TRACE("(%p %p %x): Found password %s len=%d.\n", ptr, data, flags, 1320 debugstr_wn(data->password, data->password_len), data->password_len); 1321 1322 ++(*ptr); 1323 } 1324 1325 /* Attempts to parse a port from the URI. 1326 * 1327 * NOTES: 1328 * Windows seems to have a cap on what the maximum value 1329 * for a port can be. The max value is USHORT_MAX. 1330 * 1331 * port = *DIGIT 1332 */ 1333 static BOOL parse_port(const WCHAR **ptr, parse_data *data, DWORD flags) { 1334 UINT port = 0; 1335 data->port = *ptr; 1336 1337 while(!is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 1338 if(!is_num(**ptr)) { 1339 *ptr = data->port; 1340 data->port = NULL; 1341 return FALSE; 1342 } 1343 1344 port = port*10 + (**ptr-'0'); 1345 1346 if(port > USHRT_MAX) { 1347 *ptr = data->port; 1348 data->port = NULL; 1349 return FALSE; 1350 } 1351 1352 ++(*ptr); 1353 } 1354 1355 data->has_port = TRUE; 1356 data->port_value = port; 1357 data->port_len = *ptr - data->port; 1358 1359 TRACE("(%p %p %x): Found port %s len=%d value=%u\n", ptr, data, flags, 1360 debugstr_wn(data->port, data->port_len), data->port_len, data->port_value); 1361 return TRUE; 1362 } 1363 1364 /* Attempts to parse a IPv4 address from the URI. 1365 * 1366 * NOTES: 1367 * Windows normalizes IPv4 addresses, This means there are three 1368 * possibilities for the URI to contain an IPv4 address. 1369 * 1) A well formed address (ex. 192.2.2.2). 1370 * 2) A partially formed address. For example "192.0" would 1371 * normalize to "192.0.0.0" during canonicalization. 1372 * 3) An implicit IPv4 address. For example "256" would 1373 * normalize to "0.0.1.0" during canonicalization. Also 1374 * note that the maximum value for an implicit IP address 1375 * is UINT_MAX, if the value in the URI exceeds this then 1376 * it is not considered an IPv4 address. 1377 */ 1378 static BOOL parse_ipv4address(const WCHAR **ptr, parse_data *data, DWORD flags) { 1379 const BOOL is_unknown = data->scheme_type == URL_SCHEME_UNKNOWN; 1380 data->host = *ptr; 1381 1382 if(!check_ipv4address(ptr, FALSE)) { 1383 if(!check_implicit_ipv4(ptr, &data->implicit_ipv4)) { 1384 TRACE("(%p %p %x): URI didn't contain anything looking like an IPv4 address.\n", 1385 ptr, data, flags); 1386 *ptr = data->host; 1387 data->host = NULL; 1388 return FALSE; 1389 } else 1390 data->has_implicit_ip = TRUE; 1391 } 1392 1393 data->host_len = *ptr - data->host; 1394 data->host_type = Uri_HOST_IPV4; 1395 1396 /* Check if what we found is the only part of the host name (if it isn't 1397 * we don't have an IPv4 address). 1398 */ 1399 if(**ptr == ':') { 1400 ++(*ptr); 1401 if(!parse_port(ptr, data, flags)) { 1402 *ptr = data->host; 1403 data->host = NULL; 1404 return FALSE; 1405 } 1406 } else if(!is_auth_delim(**ptr, !is_unknown)) { 1407 /* Found more data which belongs to the host, so this isn't an IPv4. */ 1408 *ptr = data->host; 1409 data->host = NULL; 1410 data->has_implicit_ip = FALSE; 1411 return FALSE; 1412 } 1413 1414 TRACE("(%p %p %x): IPv4 address found. host=%s host_len=%d host_type=%d\n", 1415 ptr, data, flags, debugstr_wn(data->host, data->host_len), 1416 data->host_len, data->host_type); 1417 return TRUE; 1418 } 1419 1420 /* Attempts to parse the reg-name from the URI. 1421 * 1422 * Because of the way Windows handles ':' this function also 1423 * handles parsing the port. 1424 * 1425 * reg-name = *( unreserved / pct-encoded / sub-delims ) 1426 * 1427 * NOTE: 1428 * Windows allows everything, but, the characters in "auth_delims" and ':' 1429 * to appear in a reg-name, unless it's an unknown scheme type then ':' is 1430 * allowed to appear (even if a valid port isn't after it). 1431 * 1432 * Windows doesn't like host names which start with '[' and end with ']' 1433 * and don't contain a valid IP literal address in between them. 1434 * 1435 * On Windows if a '[' is encountered in the host name the ':' no longer 1436 * counts as a delimiter until you reach the next ']' or an "authority delimiter". 1437 * 1438 * A reg-name CAN be empty. 1439 */ 1440 static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1441 const BOOL has_start_bracket = **ptr == '['; 1442 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 1443 const BOOL is_res = data->scheme_type == URL_SCHEME_RES; 1444 BOOL inside_brackets = has_start_bracket; 1445 1446 /* res URIs don't have ports. */ 1447 BOOL ignore_col = (extras & IGNORE_PORT_DELIMITER) || is_res; 1448 1449 /* We have to be careful with file schemes. */ 1450 if(data->scheme_type == URL_SCHEME_FILE) { 1451 /* This is because an implicit file scheme could be "C:\\test" and it 1452 * would trick this function into thinking the host is "C", when after 1453 * canonicalization the host would end up being an empty string. A drive 1454 * path can also have a '|' instead of a ':' after the drive letter. 1455 */ 1456 if(is_drive_path(*ptr)) { 1457 /* Regular old drive paths have no host type (or host name). */ 1458 data->host_type = Uri_HOST_UNKNOWN; 1459 data->host = *ptr; 1460 data->host_len = 0; 1461 return TRUE; 1462 } else if(is_unc_path(*ptr)) 1463 /* Skip past the "\\" of a UNC path. */ 1464 *ptr += 2; 1465 } 1466 1467 data->host = *ptr; 1468 1469 /* For res URIs, everything before the first '/' is 1470 * considered the host. 1471 */ 1472 while((!is_res && !is_auth_delim(**ptr, known_scheme)) || 1473 (is_res && **ptr && **ptr != '/')) { 1474 if(**ptr == ':' && !ignore_col) { 1475 /* We can ignore ':' if we are inside brackets.*/ 1476 if(!inside_brackets) { 1477 const WCHAR *tmp = (*ptr)++; 1478 1479 /* Attempt to parse the port. */ 1480 if(!parse_port(ptr, data, flags)) { 1481 /* Windows expects there to be a valid port for known scheme types. */ 1482 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 1483 *ptr = data->host; 1484 data->host = NULL; 1485 TRACE("(%p %p %x %x): Expected valid port\n", ptr, data, flags, extras); 1486 return FALSE; 1487 } else 1488 /* Windows gives up on trying to parse a port when it 1489 * encounters an invalid port. 1490 */ 1491 ignore_col = TRUE; 1492 } else { 1493 data->host_len = tmp - data->host; 1494 break; 1495 } 1496 } 1497 } else if(**ptr == '%' && (known_scheme && !is_res)) { 1498 /* Has to be a legit % encoded value. */ 1499 if(!check_pct_encoded(ptr)) { 1500 *ptr = data->host; 1501 data->host = NULL; 1502 return FALSE; 1503 } else 1504 continue; 1505 } else if(is_res && is_forbidden_dos_path_char(**ptr)) { 1506 *ptr = data->host; 1507 data->host = NULL; 1508 return FALSE; 1509 } else if(**ptr == ']') 1510 inside_brackets = FALSE; 1511 else if(**ptr == '[') 1512 inside_brackets = TRUE; 1513 1514 ++(*ptr); 1515 } 1516 1517 if(has_start_bracket) { 1518 /* Make sure the last character of the host wasn't a ']'. */ 1519 if(*(*ptr-1) == ']') { 1520 TRACE("(%p %p %x %x): Expected an IP literal inside of the host\n", 1521 ptr, data, flags, extras); 1522 *ptr = data->host; 1523 data->host = NULL; 1524 return FALSE; 1525 } 1526 } 1527 1528 /* Don't overwrite our length if we found a port earlier. */ 1529 if(!data->port) 1530 data->host_len = *ptr - data->host; 1531 1532 /* If the host is empty, then it's an unknown host type. */ 1533 if(data->host_len == 0 || is_res) 1534 data->host_type = Uri_HOST_UNKNOWN; 1535 else 1536 data->host_type = Uri_HOST_DNS; 1537 1538 TRACE("(%p %p %x %x): Parsed reg-name. host=%s len=%d\n", ptr, data, flags, extras, 1539 debugstr_wn(data->host, data->host_len), data->host_len); 1540 return TRUE; 1541 } 1542 1543 /* Attempts to parse an IPv6 address out of the URI. 1544 * 1545 * IPv6address = 6( h16 ":" ) ls32 1546 * / "::" 5( h16 ":" ) ls32 1547 * / [ h16 ] "::" 4( h16 ":" ) ls32 1548 * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 1549 * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 1550 * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 1551 * / [ *4( h16 ":" ) h16 ] "::" ls32 1552 * / [ *5( h16 ":" ) h16 ] "::" h16 1553 * / [ *6( h16 ":" ) h16 ] "::" 1554 * 1555 * ls32 = ( h16 ":" h16 ) / IPv4address 1556 * ; least-significant 32 bits of address. 1557 * 1558 * h16 = 1*4HEXDIG 1559 * ; 16 bits of address represented in hexadecimal. 1560 * 1561 * Modeled after google-url's 'DoParseIPv6' function. 1562 */ 1563 static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data, DWORD flags) { 1564 const WCHAR *start, *cur_start; 1565 ipv6_address ip; 1566 1567 start = cur_start = *ptr; 1568 memset(&ip, 0, sizeof(ipv6_address)); 1569 1570 for(;; ++(*ptr)) { 1571 /* Check if we're on the last character of the host. */ 1572 BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN) 1573 || **ptr == ']'); 1574 1575 BOOL is_split = (**ptr == ':'); 1576 BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':'); 1577 1578 /* Check if we're at the end of a component, or 1579 * if we're at the end of the IPv6 address. 1580 */ 1581 if(is_split || is_end) { 1582 DWORD cur_len = 0; 1583 1584 cur_len = *ptr - cur_start; 1585 1586 /* h16 can't have a length > 4. */ 1587 if(cur_len > 4) { 1588 *ptr = start; 1589 1590 TRACE("(%p %p %x): h16 component to long.\n", 1591 ptr, data, flags); 1592 return FALSE; 1593 } 1594 1595 if(cur_len == 0) { 1596 /* An h16 component can't have the length of 0 unless 1597 * the elision is at the beginning of the address, or 1598 * at the end of the address. 1599 */ 1600 if(!((*ptr == start && is_elision) || 1601 (is_end && (*ptr-2) == ip.elision))) { 1602 *ptr = start; 1603 TRACE("(%p %p %x): IPv6 component cannot have a length of 0.\n", 1604 ptr, data, flags); 1605 return FALSE; 1606 } 1607 } 1608 1609 if(cur_len > 0) { 1610 /* An IPv6 address can have no more than 8 h16 components. */ 1611 if(ip.h16_count >= 8) { 1612 *ptr = start; 1613 TRACE("(%p %p %x): Not a IPv6 address, too many h16 components.\n", 1614 ptr, data, flags); 1615 return FALSE; 1616 } 1617 1618 ip.components[ip.h16_count].str = cur_start; 1619 ip.components[ip.h16_count].len = cur_len; 1620 1621 TRACE("(%p %p %x): Found h16 component %s, len=%d, h16_count=%d\n", 1622 ptr, data, flags, debugstr_wn(cur_start, cur_len), cur_len, 1623 ip.h16_count); 1624 ++ip.h16_count; 1625 } 1626 } 1627 1628 if(is_end) 1629 break; 1630 1631 if(is_elision) { 1632 /* A IPv6 address can only have 1 elision ('::'). */ 1633 if(ip.elision) { 1634 *ptr = start; 1635 1636 TRACE("(%p %p %x): IPv6 address cannot have 2 elisions.\n", 1637 ptr, data, flags); 1638 return FALSE; 1639 } 1640 1641 ip.elision = *ptr; 1642 ++(*ptr); 1643 } 1644 1645 if(is_split) 1646 cur_start = *ptr+1; 1647 else { 1648 if(!check_ipv4address(ptr, TRUE)) { 1649 if(!is_hexdigit(**ptr)) { 1650 /* Not a valid character for an IPv6 address. */ 1651 *ptr = start; 1652 return FALSE; 1653 } 1654 } else { 1655 /* Found an IPv4 address. */ 1656 ip.ipv4 = cur_start; 1657 ip.ipv4_len = *ptr - cur_start; 1658 1659 TRACE("(%p %p %x): Found an attached IPv4 address %s len=%d.\n", 1660 ptr, data, flags, debugstr_wn(ip.ipv4, ip.ipv4_len), 1661 ip.ipv4_len); 1662 1663 /* IPv4 addresses can only appear at the end of a IPv6. */ 1664 break; 1665 } 1666 } 1667 } 1668 1669 compute_ipv6_comps_size(&ip); 1670 1671 /* Make sure the IPv6 address adds up to 16 bytes. */ 1672 if(ip.components_size + ip.elision_size != 16) { 1673 *ptr = start; 1674 TRACE("(%p %p %x): Invalid IPv6 address, did not add up to 16 bytes.\n", 1675 ptr, data, flags); 1676 return FALSE; 1677 } 1678 1679 if(ip.elision_size == 2) { 1680 /* For some reason on Windows if an elision that represents 1681 * only one h16 component is encountered at the very begin or 1682 * end of an IPv6 address, Windows does not consider it a 1683 * valid IPv6 address. 1684 * 1685 * Ex: [::2:3:4:5:6:7] is not valid, even though the sum 1686 * of all the components == 128bits. 1687 */ 1688 if(ip.elision < ip.components[0].str || 1689 ip.elision > ip.components[ip.h16_count-1].str) { 1690 *ptr = start; 1691 TRACE("(%p %p %x): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n", 1692 ptr, data, flags); 1693 return FALSE; 1694 } 1695 } 1696 1697 data->host_type = Uri_HOST_IPV6; 1698 data->has_ipv6 = TRUE; 1699 data->ipv6_address = ip; 1700 1701 TRACE("(%p %p %x): Found valid IPv6 literal %s len=%d\n", 1702 ptr, data, flags, debugstr_wn(start, *ptr-start), 1703 (int)(*ptr-start)); 1704 return TRUE; 1705 } 1706 1707 /* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) */ 1708 static BOOL parse_ipvfuture(const WCHAR **ptr, parse_data *data, DWORD flags) { 1709 const WCHAR *start = *ptr; 1710 1711 /* IPvFuture has to start with a 'v' or 'V'. */ 1712 if(**ptr != 'v' && **ptr != 'V') 1713 return FALSE; 1714 1715 /* Following the v there must be at least 1 hex digit. */ 1716 ++(*ptr); 1717 if(!is_hexdigit(**ptr)) { 1718 *ptr = start; 1719 return FALSE; 1720 } 1721 1722 ++(*ptr); 1723 while(is_hexdigit(**ptr)) 1724 ++(*ptr); 1725 1726 /* End of the hexdigit sequence must be a '.' */ 1727 if(**ptr != '.') { 1728 *ptr = start; 1729 return FALSE; 1730 } 1731 1732 ++(*ptr); 1733 if(!is_unreserved(**ptr) && !is_subdelim(**ptr) && **ptr != ':') { 1734 *ptr = start; 1735 return FALSE; 1736 } 1737 1738 ++(*ptr); 1739 while(is_unreserved(**ptr) || is_subdelim(**ptr) || **ptr == ':') 1740 ++(*ptr); 1741 1742 data->host_type = Uri_HOST_UNKNOWN; 1743 1744 TRACE("(%p %p %x): Parsed IPvFuture address %s len=%d\n", ptr, data, flags, 1745 debugstr_wn(start, *ptr-start), (int)(*ptr-start)); 1746 1747 return TRUE; 1748 } 1749 1750 /* IP-literal = "[" ( IPv6address / IPvFuture ) "]" */ 1751 static BOOL parse_ip_literal(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1752 data->host = *ptr; 1753 1754 if(**ptr != '[' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) { 1755 data->host = NULL; 1756 return FALSE; 1757 } else if(**ptr == '[') 1758 ++(*ptr); 1759 1760 if(!parse_ipv6address(ptr, data, flags)) { 1761 if(extras & SKIP_IP_FUTURE_CHECK || !parse_ipvfuture(ptr, data, flags)) { 1762 *ptr = data->host; 1763 data->host = NULL; 1764 return FALSE; 1765 } 1766 } 1767 1768 if(**ptr != ']' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) { 1769 *ptr = data->host; 1770 data->host = NULL; 1771 return FALSE; 1772 } else if(!**ptr && extras & ALLOW_BRACKETLESS_IP_LITERAL) { 1773 /* The IP literal didn't contain brackets and was followed by 1774 * a NULL terminator, so no reason to even check the port. 1775 */ 1776 data->host_len = *ptr - data->host; 1777 return TRUE; 1778 } 1779 1780 ++(*ptr); 1781 if(**ptr == ':') { 1782 ++(*ptr); 1783 /* If a valid port is not found, then let it trickle down to 1784 * parse_reg_name. 1785 */ 1786 if(!parse_port(ptr, data, flags)) { 1787 *ptr = data->host; 1788 data->host = NULL; 1789 return FALSE; 1790 } 1791 } else 1792 data->host_len = *ptr - data->host; 1793 1794 return TRUE; 1795 } 1796 1797 /* Parses the host information from the URI. 1798 * 1799 * host = IP-literal / IPv4address / reg-name 1800 */ 1801 static BOOL parse_host(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 1802 if(!parse_ip_literal(ptr, data, flags, extras)) { 1803 if(!parse_ipv4address(ptr, data, flags)) { 1804 if(!parse_reg_name(ptr, data, flags, extras)) { 1805 TRACE("(%p %p %x %x): Malformed URI, Unknown host type.\n", 1806 ptr, data, flags, extras); 1807 return FALSE; 1808 } 1809 } 1810 } 1811 1812 return TRUE; 1813 } 1814 1815 /* Parses the authority information from the URI. 1816 * 1817 * authority = [ userinfo "@" ] host [ ":" port ] 1818 */ 1819 static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) { 1820 parse_userinfo(ptr, data, flags); 1821 1822 /* Parsing the port will happen during one of the host parsing 1823 * routines (if the URI has a port). 1824 */ 1825 if(!parse_host(ptr, data, flags, 0)) 1826 return FALSE; 1827 1828 return TRUE; 1829 } 1830 1831 /* Attempts to parse the path information of a hierarchical URI. */ 1832 static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) { 1833 const WCHAR *start = *ptr; 1834 static const WCHAR slash[] = {'/',0}; 1835 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 1836 1837 if(is_path_delim(data->scheme_type, **ptr)) { 1838 if(data->scheme_type == URL_SCHEME_WILDCARD && !data->must_have_path) { 1839 data->path = NULL; 1840 data->path_len = 0; 1841 } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) { 1842 /* If the path component is empty, then a '/' is added. */ 1843 data->path = slash; 1844 data->path_len = 1; 1845 } 1846 } else { 1847 while(!is_path_delim(data->scheme_type, **ptr)) { 1848 if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN && !is_file) { 1849 if(!check_pct_encoded(ptr)) { 1850 *ptr = start; 1851 return FALSE; 1852 } else 1853 continue; 1854 } else if(is_forbidden_dos_path_char(**ptr) && is_file && 1855 (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 1856 /* File schemes with USE_DOS_PATH set aren't allowed to have 1857 * a '<' or '>' or '\"' appear in them. 1858 */ 1859 *ptr = start; 1860 return FALSE; 1861 } else if(**ptr == '\\') { 1862 /* Not allowed to have a backslash if NO_CANONICALIZE is set 1863 * and the scheme is known type (but not a file scheme). 1864 */ 1865 if(flags & Uri_CREATE_NO_CANONICALIZE) { 1866 if(data->scheme_type != URL_SCHEME_FILE && 1867 data->scheme_type != URL_SCHEME_UNKNOWN) { 1868 *ptr = start; 1869 return FALSE; 1870 } 1871 } 1872 } 1873 1874 ++(*ptr); 1875 } 1876 1877 /* The only time a URI doesn't have a path is when 1878 * the NO_CANONICALIZE flag is set and the raw URI 1879 * didn't contain one. 1880 */ 1881 if(*ptr == start) { 1882 data->path = NULL; 1883 data->path_len = 0; 1884 } else { 1885 data->path = start; 1886 data->path_len = *ptr - start; 1887 } 1888 } 1889 1890 if(data->path) 1891 TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags, 1892 debugstr_wn(data->path, data->path_len), data->path_len); 1893 else 1894 TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags); 1895 1896 return TRUE; 1897 } 1898 1899 /* Parses the path of an opaque URI (much less strict than the parser 1900 * for a hierarchical URI). 1901 * 1902 * NOTE: 1903 * Windows allows invalid % encoded data to appear in opaque URI paths 1904 * for unknown scheme types. 1905 * 1906 * File schemes with USE_DOS_PATH set aren't allowed to have '<', '>', or '\"' 1907 * appear in them. 1908 */ 1909 static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags) { 1910 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 1911 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 1912 const BOOL is_mailto = data->scheme_type == URL_SCHEME_MAILTO; 1913 1914 if (is_mailto && (*ptr)[0] == '/' && (*ptr)[1] == '/') 1915 { 1916 if ((*ptr)[2]) data->path = *ptr + 2; 1917 else data->path = NULL; 1918 } 1919 else 1920 data->path = *ptr; 1921 1922 while(!is_path_delim(data->scheme_type, **ptr)) { 1923 if(**ptr == '%' && known_scheme) { 1924 if(!check_pct_encoded(ptr)) { 1925 *ptr = data->path; 1926 data->path = NULL; 1927 return FALSE; 1928 } else 1929 continue; 1930 } else if(is_forbidden_dos_path_char(**ptr) && is_file && 1931 (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 1932 *ptr = data->path; 1933 data->path = NULL; 1934 return FALSE; 1935 } 1936 1937 ++(*ptr); 1938 } 1939 1940 if (data->path) data->path_len = *ptr - data->path; 1941 TRACE("(%p %p %x): Parsed opaque URI path %s len=%d\n", ptr, data, flags, 1942 debugstr_wn(data->path, data->path_len), data->path_len); 1943 return TRUE; 1944 } 1945 1946 /* Determines how the URI should be parsed after the scheme information. 1947 * 1948 * If the scheme is followed by "//", then it is treated as a hierarchical URI 1949 * which then the authority and path information will be parsed out. Otherwise, the 1950 * URI will be treated as an opaque URI which the authority information is not parsed 1951 * out. 1952 * 1953 * RFC 3896 definition of hier-part: 1954 * 1955 * hier-part = "//" authority path-abempty 1956 * / path-absolute 1957 * / path-rootless 1958 * / path-empty 1959 * 1960 * MSDN opaque URI definition: 1961 * scheme ":" path [ "#" fragment ] 1962 * 1963 * NOTES: 1964 * If the URI is of an unknown scheme type and has a "//" following the scheme then it 1965 * is treated as a hierarchical URI, but, if the CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is 1966 * set then it is considered an opaque URI regardless of what follows the scheme information 1967 * (per MSDN documentation). 1968 */ 1969 static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) { 1970 const WCHAR *start = *ptr; 1971 1972 data->must_have_path = FALSE; 1973 1974 /* For javascript: URIs, simply set everything as a path */ 1975 if(data->scheme_type == URL_SCHEME_JAVASCRIPT) { 1976 data->path = *ptr; 1977 data->path_len = strlenW(*ptr); 1978 data->is_opaque = TRUE; 1979 *ptr += data->path_len; 1980 return TRUE; 1981 } 1982 1983 /* Checks if the authority information needs to be parsed. */ 1984 if(is_hierarchical_uri(ptr, data)) { 1985 /* Only treat it as a hierarchical URI if the scheme_type is known or 1986 * the Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is not set. 1987 */ 1988 if(data->scheme_type != URL_SCHEME_UNKNOWN || 1989 !(flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) { 1990 TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags); 1991 data->is_opaque = FALSE; 1992 1993 if(data->scheme_type == URL_SCHEME_WILDCARD && !data->has_implicit_scheme) { 1994 if(**ptr == '/' && *(*ptr+1) == '/') { 1995 data->must_have_path = TRUE; 1996 *ptr += 2; 1997 } 1998 } 1999 2000 /* TODO: Handle hierarchical URI's, parse authority then parse the path. */ 2001 if(!parse_authority(ptr, data, flags)) 2002 return FALSE; 2003 2004 return parse_path_hierarchical(ptr, data, flags); 2005 } else 2006 /* Reset ptr to its starting position so opaque path parsing 2007 * begins at the correct location. 2008 */ 2009 *ptr = start; 2010 } 2011 2012 /* If it reaches here, then the URI will be treated as an opaque 2013 * URI. 2014 */ 2015 2016 TRACE("(%p %p %x): Treating URI as an opaque URI.\n", ptr, data, flags); 2017 2018 data->is_opaque = TRUE; 2019 if(!parse_path_opaque(ptr, data, flags)) 2020 return FALSE; 2021 2022 return TRUE; 2023 } 2024 2025 /* Attempts to parse the query string from the URI. 2026 * 2027 * NOTES: 2028 * If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded 2029 * data is allowed to appear in the query string. For unknown scheme types 2030 * invalid percent encoded data is allowed to appear regardless. 2031 */ 2032 static BOOL parse_query(const WCHAR **ptr, parse_data *data, DWORD flags) { 2033 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2034 2035 if(**ptr != '?') { 2036 TRACE("(%p %p %x): URI didn't contain a query string.\n", ptr, data, flags); 2037 return TRUE; 2038 } 2039 2040 data->query = *ptr; 2041 2042 ++(*ptr); 2043 while(**ptr && **ptr != '#') { 2044 if(**ptr == '%' && known_scheme && 2045 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 2046 if(!check_pct_encoded(ptr)) { 2047 *ptr = data->query; 2048 data->query = NULL; 2049 return FALSE; 2050 } else 2051 continue; 2052 } 2053 2054 ++(*ptr); 2055 } 2056 2057 data->query_len = *ptr - data->query; 2058 2059 TRACE("(%p %p %x): Parsed query string %s len=%d\n", ptr, data, flags, 2060 debugstr_wn(data->query, data->query_len), data->query_len); 2061 return TRUE; 2062 } 2063 2064 /* Attempts to parse the fragment from the URI. 2065 * 2066 * NOTES: 2067 * If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded 2068 * data is allowed to appear in the query string. For unknown scheme types 2069 * invalid percent encoded data is allowed to appear regardless. 2070 */ 2071 static BOOL parse_fragment(const WCHAR **ptr, parse_data *data, DWORD flags) { 2072 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2073 2074 if(**ptr != '#') { 2075 TRACE("(%p %p %x): URI didn't contain a fragment.\n", ptr, data, flags); 2076 return TRUE; 2077 } 2078 2079 data->fragment = *ptr; 2080 2081 ++(*ptr); 2082 while(**ptr) { 2083 if(**ptr == '%' && known_scheme && 2084 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 2085 if(!check_pct_encoded(ptr)) { 2086 *ptr = data->fragment; 2087 data->fragment = NULL; 2088 return FALSE; 2089 } else 2090 continue; 2091 } 2092 2093 ++(*ptr); 2094 } 2095 2096 data->fragment_len = *ptr - data->fragment; 2097 2098 TRACE("(%p %p %x): Parsed fragment %s len=%d\n", ptr, data, flags, 2099 debugstr_wn(data->fragment, data->fragment_len), data->fragment_len); 2100 return TRUE; 2101 } 2102 2103 /* Parses and validates the components of the specified by data->uri 2104 * and stores the information it parses into 'data'. 2105 * 2106 * Returns TRUE if it successfully parsed the URI. False otherwise. 2107 */ 2108 static BOOL parse_uri(parse_data *data, DWORD flags) { 2109 const WCHAR *ptr; 2110 const WCHAR **pptr; 2111 2112 ptr = data->uri; 2113 pptr = &ptr; 2114 2115 TRACE("(%p %x): BEGINNING TO PARSE URI %s.\n", data, flags, debugstr_w(data->uri)); 2116 2117 if(!parse_scheme(pptr, data, flags, 0)) 2118 return FALSE; 2119 2120 if(!parse_hierpart(pptr, data, flags)) 2121 return FALSE; 2122 2123 if(!parse_query(pptr, data, flags)) 2124 return FALSE; 2125 2126 if(!parse_fragment(pptr, data, flags)) 2127 return FALSE; 2128 2129 TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags); 2130 return TRUE; 2131 } 2132 2133 static BOOL canonicalize_username(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2134 const WCHAR *ptr; 2135 2136 if(!data->username) { 2137 uri->userinfo_start = -1; 2138 return TRUE; 2139 } 2140 2141 uri->userinfo_start = uri->canon_len; 2142 for(ptr = data->username; ptr < data->username+data->username_len; ++ptr) { 2143 if(*ptr == '%') { 2144 /* Only decode % encoded values for known scheme types. */ 2145 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 2146 /* See if the value really needs decoding. */ 2147 WCHAR val = decode_pct_val(ptr); 2148 if(is_unreserved(val)) { 2149 if(!computeOnly) 2150 uri->canon_uri[uri->canon_len] = val; 2151 2152 ++uri->canon_len; 2153 2154 /* Move pass the hex characters. */ 2155 ptr += 2; 2156 continue; 2157 } 2158 } 2159 } else if(!is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') { 2160 /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag 2161 * is NOT set. 2162 */ 2163 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 2164 if(!computeOnly) 2165 pct_encode_val(*ptr, uri->canon_uri + uri->canon_len); 2166 2167 uri->canon_len += 3; 2168 continue; 2169 } 2170 } 2171 2172 if(!computeOnly) 2173 /* Nothing special, so just copy the character over. */ 2174 uri->canon_uri[uri->canon_len] = *ptr; 2175 ++uri->canon_len; 2176 } 2177 2178 return TRUE; 2179 } 2180 2181 static BOOL canonicalize_password(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2182 const WCHAR *ptr; 2183 2184 if(!data->password) { 2185 uri->userinfo_split = -1; 2186 return TRUE; 2187 } 2188 2189 if(uri->userinfo_start == -1) 2190 /* Has a password, but, doesn't have a username. */ 2191 uri->userinfo_start = uri->canon_len; 2192 2193 uri->userinfo_split = uri->canon_len - uri->userinfo_start; 2194 2195 /* Add the ':' to the userinfo component. */ 2196 if(!computeOnly) 2197 uri->canon_uri[uri->canon_len] = ':'; 2198 ++uri->canon_len; 2199 2200 for(ptr = data->password; ptr < data->password+data->password_len; ++ptr) { 2201 if(*ptr == '%') { 2202 /* Only decode % encoded values for known scheme types. */ 2203 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 2204 /* See if the value really needs decoding. */ 2205 WCHAR val = decode_pct_val(ptr); 2206 if(is_unreserved(val)) { 2207 if(!computeOnly) 2208 uri->canon_uri[uri->canon_len] = val; 2209 2210 ++uri->canon_len; 2211 2212 /* Move pass the hex characters. */ 2213 ptr += 2; 2214 continue; 2215 } 2216 } 2217 } else if(!is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') { 2218 /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag 2219 * is NOT set. 2220 */ 2221 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 2222 if(!computeOnly) 2223 pct_encode_val(*ptr, uri->canon_uri + uri->canon_len); 2224 2225 uri->canon_len += 3; 2226 continue; 2227 } 2228 } 2229 2230 if(!computeOnly) 2231 /* Nothing special, so just copy the character over. */ 2232 uri->canon_uri[uri->canon_len] = *ptr; 2233 ++uri->canon_len; 2234 } 2235 2236 return TRUE; 2237 } 2238 2239 /* Canonicalizes the userinfo of the URI represented by the parse_data. 2240 * 2241 * Canonicalization of the userinfo is a simple process. If there are any percent 2242 * encoded characters that fall in the "unreserved" character set, they are decoded 2243 * to their actual value. If a character is not in the "unreserved" or "reserved" sets 2244 * then it is percent encoded. Other than that the characters are copied over without 2245 * change. 2246 */ 2247 static BOOL canonicalize_userinfo(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2248 uri->userinfo_start = uri->userinfo_split = -1; 2249 uri->userinfo_len = 0; 2250 2251 if(!data->username && !data->password) 2252 /* URI doesn't have userinfo, so nothing to do here. */ 2253 return TRUE; 2254 2255 if(!canonicalize_username(data, uri, flags, computeOnly)) 2256 return FALSE; 2257 2258 if(!canonicalize_password(data, uri, flags, computeOnly)) 2259 return FALSE; 2260 2261 uri->userinfo_len = uri->canon_len - uri->userinfo_start; 2262 if(!computeOnly) 2263 TRACE("(%p %p %x %d): Canonicalized userinfo, userinfo_start=%d, userinfo=%s, userinfo_split=%d userinfo_len=%d.\n", 2264 data, uri, flags, computeOnly, uri->userinfo_start, debugstr_wn(uri->canon_uri + uri->userinfo_start, uri->userinfo_len), 2265 uri->userinfo_split, uri->userinfo_len); 2266 2267 /* Now insert the '@' after the userinfo. */ 2268 if(!computeOnly) 2269 uri->canon_uri[uri->canon_len] = '@'; 2270 ++uri->canon_len; 2271 2272 return TRUE; 2273 } 2274 2275 /* Attempts to canonicalize a reg_name. 2276 * 2277 * Things that happen: 2278 * 1) If Uri_CREATE_NO_CANONICALIZE flag is not set, then the reg_name is 2279 * lower cased. Unless it's an unknown scheme type, which case it's 2280 * no lower cased regardless. 2281 * 2282 * 2) Unreserved % encoded characters are decoded for known 2283 * scheme types. 2284 * 2285 * 3) Forbidden characters are % encoded as long as 2286 * Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS flag is not set and 2287 * it isn't an unknown scheme type. 2288 * 2289 * 4) If it's a file scheme and the host is "localhost" it's removed. 2290 * 2291 * 5) If it's a file scheme and Uri_CREATE_FILE_USE_DOS_PATH is set, 2292 * then the UNC path characters are added before the host name. 2293 */ 2294 static BOOL canonicalize_reg_name(const parse_data *data, Uri *uri, 2295 DWORD flags, BOOL computeOnly) { 2296 static const WCHAR localhostW[] = 2297 {'l','o','c','a','l','h','o','s','t',0}; 2298 const WCHAR *ptr; 2299 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2300 2301 if(data->scheme_type == URL_SCHEME_FILE && 2302 data->host_len == lstrlenW(localhostW)) { 2303 if(!StrCmpNIW(data->host, localhostW, data->host_len)) { 2304 uri->host_start = -1; 2305 uri->host_len = 0; 2306 uri->host_type = Uri_HOST_UNKNOWN; 2307 return TRUE; 2308 } 2309 } 2310 2311 if(data->scheme_type == URL_SCHEME_FILE && flags & Uri_CREATE_FILE_USE_DOS_PATH) { 2312 if(!computeOnly) { 2313 uri->canon_uri[uri->canon_len] = '\\'; 2314 uri->canon_uri[uri->canon_len+1] = '\\'; 2315 } 2316 uri->canon_len += 2; 2317 uri->authority_start = uri->canon_len; 2318 } 2319 2320 uri->host_start = uri->canon_len; 2321 2322 for(ptr = data->host; ptr < data->host+data->host_len; ++ptr) { 2323 if(*ptr == '%' && known_scheme) { 2324 WCHAR val = decode_pct_val(ptr); 2325 if(is_unreserved(val)) { 2326 /* If NO_CANONICALIZE is not set, then windows lower cases the 2327 * decoded value. 2328 */ 2329 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && isupperW(val)) { 2330 if(!computeOnly) 2331 uri->canon_uri[uri->canon_len] = tolowerW(val); 2332 } else { 2333 if(!computeOnly) 2334 uri->canon_uri[uri->canon_len] = val; 2335 } 2336 ++uri->canon_len; 2337 2338 /* Skip past the % encoded character. */ 2339 ptr += 2; 2340 continue; 2341 } else { 2342 /* Just copy the % over. */ 2343 if(!computeOnly) 2344 uri->canon_uri[uri->canon_len] = *ptr; 2345 ++uri->canon_len; 2346 } 2347 } else if(*ptr == '\\') { 2348 /* Only unknown scheme types could have made it here with a '\\' in the host name. */ 2349 if(!computeOnly) 2350 uri->canon_uri[uri->canon_len] = *ptr; 2351 ++uri->canon_len; 2352 } else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && 2353 !is_unreserved(*ptr) && !is_reserved(*ptr) && known_scheme) { 2354 if(!computeOnly) { 2355 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 2356 2357 /* The percent encoded value gets lower cased also. */ 2358 if(!(flags & Uri_CREATE_NO_CANONICALIZE)) { 2359 uri->canon_uri[uri->canon_len+1] = tolowerW(uri->canon_uri[uri->canon_len+1]); 2360 uri->canon_uri[uri->canon_len+2] = tolowerW(uri->canon_uri[uri->canon_len+2]); 2361 } 2362 } 2363 2364 uri->canon_len += 3; 2365 } else { 2366 if(!computeOnly) { 2367 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && known_scheme) 2368 uri->canon_uri[uri->canon_len] = tolowerW(*ptr); 2369 else 2370 uri->canon_uri[uri->canon_len] = *ptr; 2371 } 2372 2373 ++uri->canon_len; 2374 } 2375 } 2376 2377 uri->host_len = uri->canon_len - uri->host_start; 2378 2379 if(!computeOnly) 2380 TRACE("(%p %p %x %d): Canonicalize reg_name=%s len=%d\n", data, uri, flags, 2381 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2382 uri->host_len); 2383 2384 if(!computeOnly) 2385 find_domain_name(uri->canon_uri+uri->host_start, uri->host_len, 2386 &(uri->domain_offset)); 2387 2388 return TRUE; 2389 } 2390 2391 /* Attempts to canonicalize an implicit IPv4 address. */ 2392 static BOOL canonicalize_implicit_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2393 uri->host_start = uri->canon_len; 2394 2395 TRACE("%u\n", data->implicit_ipv4); 2396 /* For unknown scheme types Windows doesn't convert 2397 * the value into an IP address, but it still considers 2398 * it an IPv4 address. 2399 */ 2400 if(data->scheme_type == URL_SCHEME_UNKNOWN) { 2401 if(!computeOnly) 2402 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2403 uri->canon_len += data->host_len; 2404 } else { 2405 if(!computeOnly) 2406 uri->canon_len += ui2ipv4(uri->canon_uri+uri->canon_len, data->implicit_ipv4); 2407 else 2408 uri->canon_len += ui2ipv4(NULL, data->implicit_ipv4); 2409 } 2410 2411 uri->host_len = uri->canon_len - uri->host_start; 2412 uri->host_type = Uri_HOST_IPV4; 2413 2414 if(!computeOnly) 2415 TRACE("%p %p %x %d): Canonicalized implicit IP address=%s len=%d\n", 2416 data, uri, flags, computeOnly, 2417 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2418 uri->host_len); 2419 2420 return TRUE; 2421 } 2422 2423 /* Attempts to canonicalize an IPv4 address. 2424 * 2425 * If the parse_data represents a URI that has an implicit IPv4 address 2426 * (ex. http://256/, this function will convert 256 into 0.0.1.0). If 2427 * the implicit IP address exceeds the value of UINT_MAX (maximum value 2428 * for an IPv4 address) it's canonicalized as if it were a reg-name. 2429 * 2430 * If the parse_data contains a partial or full IPv4 address it normalizes it. 2431 * A partial IPv4 address is something like "192.0" and would be normalized to 2432 * "192.0.0.0". With a full (or partial) IPv4 address like "192.002.01.003" would 2433 * be normalized to "192.2.1.3". 2434 * 2435 * NOTES: 2436 * Windows ONLY normalizes IPv4 address for known scheme types (one that isn't 2437 * URL_SCHEME_UNKNOWN). For unknown scheme types, it simply copies the data from 2438 * the original URI into the canonicalized URI, but, it still recognizes URI's 2439 * host type as HOST_IPV4. 2440 */ 2441 static BOOL canonicalize_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2442 if(data->has_implicit_ip) 2443 return canonicalize_implicit_ipv4address(data, uri, flags, computeOnly); 2444 else { 2445 uri->host_start = uri->canon_len; 2446 2447 /* Windows only normalizes for known scheme types. */ 2448 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 2449 /* parse_data contains a partial or full IPv4 address, so normalize it. */ 2450 DWORD i, octetDigitCount = 0, octetCount = 0; 2451 BOOL octetHasDigit = FALSE; 2452 2453 for(i = 0; i < data->host_len; ++i) { 2454 if(data->host[i] == '0' && !octetHasDigit) { 2455 /* Can ignore leading zeros if: 2456 * 1) It isn't the last digit of the octet. 2457 * 2) i+1 != data->host_len 2458 * 3) i+1 != '.' 2459 */ 2460 if(octetDigitCount == 2 || 2461 i+1 == data->host_len || 2462 data->host[i+1] == '.') { 2463 if(!computeOnly) 2464 uri->canon_uri[uri->canon_len] = data->host[i]; 2465 ++uri->canon_len; 2466 TRACE("Adding zero\n"); 2467 } 2468 } else if(data->host[i] == '.') { 2469 if(!computeOnly) 2470 uri->canon_uri[uri->canon_len] = data->host[i]; 2471 ++uri->canon_len; 2472 2473 octetDigitCount = 0; 2474 octetHasDigit = FALSE; 2475 ++octetCount; 2476 } else { 2477 if(!computeOnly) 2478 uri->canon_uri[uri->canon_len] = data->host[i]; 2479 ++uri->canon_len; 2480 2481 ++octetDigitCount; 2482 octetHasDigit = TRUE; 2483 } 2484 } 2485 2486 /* Make sure the canonicalized IP address has 4 dec-octets. 2487 * If doesn't add "0" ones until there is 4; 2488 */ 2489 for( ; octetCount < 3; ++octetCount) { 2490 if(!computeOnly) { 2491 uri->canon_uri[uri->canon_len] = '.'; 2492 uri->canon_uri[uri->canon_len+1] = '0'; 2493 } 2494 2495 uri->canon_len += 2; 2496 } 2497 } else { 2498 /* Windows doesn't normalize addresses in unknown schemes. */ 2499 if(!computeOnly) 2500 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2501 uri->canon_len += data->host_len; 2502 } 2503 2504 uri->host_len = uri->canon_len - uri->host_start; 2505 if(!computeOnly) 2506 TRACE("(%p %p %x %d): Canonicalized IPv4 address, ip=%s len=%d\n", 2507 data, uri, flags, computeOnly, 2508 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2509 uri->host_len); 2510 } 2511 2512 return TRUE; 2513 } 2514 2515 /* Attempts to canonicalize the IPv6 address of the URI. 2516 * 2517 * Multiple things happen during the canonicalization of an IPv6 address: 2518 * 1) Any leading zero's in a h16 component are removed. 2519 * Ex: [0001:0022::] -> [1:22::] 2520 * 2521 * 2) The longest sequence of zero h16 components are compressed 2522 * into a "::" (elision). If there's a tie, the first is chosen. 2523 * 2524 * Ex: [0:0:0:0:1:6:7:8] -> [::1:6:7:8] 2525 * [0:0:0:0:1:2::] -> [::1:2:0:0] 2526 * [0:0:1:2:0:0:7:8] -> [::1:2:0:0:7:8] 2527 * 2528 * 3) If an IPv4 address is attached to the IPv6 address, it's 2529 * also normalized. 2530 * Ex: [::001.002.022.000] -> [::1.2.22.0] 2531 * 2532 * 4) If an elision is present, but, only represents one h16 component 2533 * it's expanded. 2534 * 2535 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7] 2536 * 2537 * 5) If the IPv6 address contains an IPv4 address and there exists 2538 * at least 1 non-zero h16 component the IPv4 address is converted 2539 * into two h16 components, otherwise it's normalized and kept as is. 2540 * 2541 * Ex: [::192.200.003.4] -> [::192.200.3.4] 2542 * [ffff::192.200.003.4] -> [ffff::c0c8:3041] 2543 * 2544 * NOTE: 2545 * For unknown scheme types Windows simply copies the address over without any 2546 * changes. 2547 * 2548 * IPv4 address can be included in an elision if all its components are 0's. 2549 */ 2550 static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri, 2551 DWORD flags, BOOL computeOnly) { 2552 uri->host_start = uri->canon_len; 2553 2554 if(data->scheme_type == URL_SCHEME_UNKNOWN) { 2555 if(!computeOnly) 2556 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2557 uri->canon_len += data->host_len; 2558 } else { 2559 USHORT values[8]; 2560 INT elision_start; 2561 DWORD i, elision_len; 2562 2563 if(!ipv6_to_number(&(data->ipv6_address), values)) { 2564 TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n", 2565 data, uri, flags, computeOnly); 2566 return FALSE; 2567 } 2568 2569 if(!computeOnly) 2570 uri->canon_uri[uri->canon_len] = '['; 2571 ++uri->canon_len; 2572 2573 /* Find where the elision should occur (if any). */ 2574 compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len); 2575 2576 TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags, 2577 computeOnly, elision_start, elision_len); 2578 2579 for(i = 0; i < 8; ++i) { 2580 BOOL in_elision = (elision_start > -1 && i >= elision_start && 2581 i < elision_start+elision_len); 2582 BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision && 2583 data->ipv6_address.h16_count == 0); 2584 2585 if(i == elision_start) { 2586 if(!computeOnly) { 2587 uri->canon_uri[uri->canon_len] = ':'; 2588 uri->canon_uri[uri->canon_len+1] = ':'; 2589 } 2590 uri->canon_len += 2; 2591 } 2592 2593 /* We can ignore the current component if we're in the elision. */ 2594 if(in_elision) 2595 continue; 2596 2597 /* We only add a ':' if we're not at i == 0, or when we're at 2598 * the very end of elision range since the ':' colon was handled 2599 * earlier. Otherwise we would end up with ":::" after elision. 2600 */ 2601 if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) { 2602 if(!computeOnly) 2603 uri->canon_uri[uri->canon_len] = ':'; 2604 ++uri->canon_len; 2605 } 2606 2607 if(do_ipv4) { 2608 UINT val; 2609 DWORD len; 2610 2611 /* Combine the two parts of the IPv4 address values. */ 2612 val = values[i]; 2613 val <<= 16; 2614 val += values[i+1]; 2615 2616 if(!computeOnly) 2617 len = ui2ipv4(uri->canon_uri+uri->canon_len, val); 2618 else 2619 len = ui2ipv4(NULL, val); 2620 2621 uri->canon_len += len; 2622 ++i; 2623 } else { 2624 /* Write a regular h16 component to the URI. */ 2625 2626 /* Short circuit for the trivial case. */ 2627 if(values[i] == 0) { 2628 if(!computeOnly) 2629 uri->canon_uri[uri->canon_len] = '0'; 2630 ++uri->canon_len; 2631 } else { 2632 static const WCHAR formatW[] = {'%','x',0}; 2633 2634 if(!computeOnly) 2635 uri->canon_len += sprintfW(uri->canon_uri+uri->canon_len, 2636 formatW, values[i]); 2637 else { 2638 WCHAR tmp[5]; 2639 uri->canon_len += sprintfW(tmp, formatW, values[i]); 2640 } 2641 } 2642 } 2643 } 2644 2645 /* Add the closing ']'. */ 2646 if(!computeOnly) 2647 uri->canon_uri[uri->canon_len] = ']'; 2648 ++uri->canon_len; 2649 } 2650 2651 uri->host_len = uri->canon_len - uri->host_start; 2652 2653 if(!computeOnly) 2654 TRACE("(%p %p %x %d): Canonicalized IPv6 address %s, len=%d\n", data, uri, flags, 2655 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 2656 uri->host_len); 2657 2658 return TRUE; 2659 } 2660 2661 /* Attempts to canonicalize the host of the URI (if any). */ 2662 static BOOL canonicalize_host(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2663 uri->host_start = -1; 2664 uri->host_len = 0; 2665 uri->domain_offset = -1; 2666 2667 if(data->host) { 2668 switch(data->host_type) { 2669 case Uri_HOST_DNS: 2670 uri->host_type = Uri_HOST_DNS; 2671 if(!canonicalize_reg_name(data, uri, flags, computeOnly)) 2672 return FALSE; 2673 2674 break; 2675 case Uri_HOST_IPV4: 2676 uri->host_type = Uri_HOST_IPV4; 2677 if(!canonicalize_ipv4address(data, uri, flags, computeOnly)) 2678 return FALSE; 2679 2680 break; 2681 case Uri_HOST_IPV6: 2682 if(!canonicalize_ipv6address(data, uri, flags, computeOnly)) 2683 return FALSE; 2684 2685 uri->host_type = Uri_HOST_IPV6; 2686 break; 2687 case Uri_HOST_UNKNOWN: 2688 if(data->host_len > 0 || data->scheme_type != URL_SCHEME_FILE) { 2689 uri->host_start = uri->canon_len; 2690 2691 /* Nothing happens to unknown host types. */ 2692 if(!computeOnly) 2693 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 2694 uri->canon_len += data->host_len; 2695 uri->host_len = data->host_len; 2696 } 2697 2698 uri->host_type = Uri_HOST_UNKNOWN; 2699 break; 2700 default: 2701 FIXME("(%p %p %x %d): Canonicalization for host type %d not supported.\n", data, 2702 uri, flags, computeOnly, data->host_type); 2703 return FALSE; 2704 } 2705 } 2706 2707 return TRUE; 2708 } 2709 2710 static BOOL canonicalize_port(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2711 BOOL has_default_port = FALSE; 2712 USHORT default_port = 0; 2713 DWORD i; 2714 2715 uri->port_offset = -1; 2716 2717 /* Check if the scheme has a default port. */ 2718 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) { 2719 if(default_ports[i].scheme == data->scheme_type) { 2720 has_default_port = TRUE; 2721 default_port = default_ports[i].port; 2722 break; 2723 } 2724 } 2725 2726 uri->has_port = data->has_port || has_default_port; 2727 2728 /* Possible cases: 2729 * 1) Has a port which is the default port. 2730 * 2) Has a port (not the default). 2731 * 3) Doesn't have a port, but, scheme has a default port. 2732 * 4) No port. 2733 */ 2734 if(has_default_port && data->has_port && data->port_value == default_port) { 2735 /* If it's the default port and this flag isn't set, don't do anything. */ 2736 if(flags & Uri_CREATE_NO_CANONICALIZE) { 2737 uri->port_offset = uri->canon_len-uri->authority_start; 2738 if(!computeOnly) 2739 uri->canon_uri[uri->canon_len] = ':'; 2740 ++uri->canon_len; 2741 2742 if(data->port) { 2743 /* Copy the original port over. */ 2744 if(!computeOnly) 2745 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR)); 2746 uri->canon_len += data->port_len; 2747 } else { 2748 if(!computeOnly) 2749 uri->canon_len += ui2str(uri->canon_uri+uri->canon_len, data->port_value); 2750 else 2751 uri->canon_len += ui2str(NULL, data->port_value); 2752 } 2753 } 2754 2755 uri->port = default_port; 2756 } else if(data->has_port) { 2757 uri->port_offset = uri->canon_len-uri->authority_start; 2758 if(!computeOnly) 2759 uri->canon_uri[uri->canon_len] = ':'; 2760 ++uri->canon_len; 2761 2762 if(flags & Uri_CREATE_NO_CANONICALIZE && data->port) { 2763 /* Copy the original over without changes. */ 2764 if(!computeOnly) 2765 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR)); 2766 uri->canon_len += data->port_len; 2767 } else { 2768 if(!computeOnly) 2769 uri->canon_len += ui2str(uri->canon_uri+uri->canon_len, data->port_value); 2770 else 2771 uri->canon_len += ui2str(NULL, data->port_value); 2772 } 2773 2774 uri->port = data->port_value; 2775 } else if(has_default_port) 2776 uri->port = default_port; 2777 2778 return TRUE; 2779 } 2780 2781 /* Canonicalizes the authority of the URI represented by the parse_data. */ 2782 static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2783 uri->authority_start = uri->canon_len; 2784 uri->authority_len = 0; 2785 2786 if(!canonicalize_userinfo(data, uri, flags, computeOnly)) 2787 return FALSE; 2788 2789 if(!canonicalize_host(data, uri, flags, computeOnly)) 2790 return FALSE; 2791 2792 if(!canonicalize_port(data, uri, flags, computeOnly)) 2793 return FALSE; 2794 2795 if(uri->host_start != -1 || (data->is_relative && (data->password || data->username))) 2796 uri->authority_len = uri->canon_len - uri->authority_start; 2797 else 2798 uri->authority_start = -1; 2799 2800 return TRUE; 2801 } 2802 2803 /* Attempts to canonicalize the path of a hierarchical URI. 2804 * 2805 * Things that happen: 2806 * 1). Forbidden characters are percent encoded, unless the NO_ENCODE_FORBIDDEN 2807 * flag is set or it's a file URI. Forbidden characters are always encoded 2808 * for file schemes regardless and forbidden characters are never encoded 2809 * for unknown scheme types. 2810 * 2811 * 2). For known scheme types '\\' are changed to '/'. 2812 * 2813 * 3). Percent encoded, unreserved characters are decoded to their actual values. 2814 * Unless the scheme type is unknown. For file schemes any percent encoded 2815 * character in the unreserved or reserved set is decoded. 2816 * 2817 * 4). For File schemes if the path is starts with a drive letter and doesn't 2818 * start with a '/' then one is appended. 2819 * Ex: file://c:/test.mp3 -> file:///c:/test.mp3 2820 * 2821 * 5). Dot segments are removed from the path for all scheme types 2822 * unless NO_CANONICALIZE flag is set. Dot segments aren't removed 2823 * for wildcard scheme types. 2824 * 2825 * NOTES: 2826 * file://c:/test%20test -> file:///c:/test%2520test 2827 * file://c:/test%3Etest -> file:///c:/test%253Etest 2828 * if Uri_CREATE_FILE_USE_DOS_PATH is not set: 2829 * file:///c:/test%20test -> file:///c:/test%20test 2830 * file:///c:/test%test -> file:///c:/test%25test 2831 */ 2832 static DWORD canonicalize_path_hierarchical(const WCHAR *path, DWORD path_len, URL_SCHEME scheme_type, BOOL has_host, DWORD flags, 2833 BOOL is_implicit_scheme, WCHAR *ret_path) { 2834 const BOOL known_scheme = scheme_type != URL_SCHEME_UNKNOWN; 2835 const BOOL is_file = scheme_type == URL_SCHEME_FILE; 2836 const BOOL is_res = scheme_type == URL_SCHEME_RES; 2837 const WCHAR *ptr; 2838 BOOL escape_pct = FALSE; 2839 DWORD len = 0; 2840 2841 if(!path) 2842 return 0; 2843 2844 ptr = path; 2845 2846 if(is_file && !has_host) { 2847 /* Check if a '/' needs to be appended for the file scheme. */ 2848 if(path_len > 1 && is_drive_path(ptr) && !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2849 if(ret_path) 2850 ret_path[len] = '/'; 2851 len++; 2852 escape_pct = TRUE; 2853 } else if(*ptr == '/') { 2854 if(!(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2855 /* Copy the extra '/' over. */ 2856 if(ret_path) 2857 ret_path[len] = '/'; 2858 len++; 2859 } 2860 ++ptr; 2861 } 2862 2863 if(is_drive_path(ptr)) { 2864 if(ret_path) { 2865 ret_path[len] = *ptr; 2866 /* If there's a '|' after the drive letter, convert it to a ':'. */ 2867 ret_path[len+1] = ':'; 2868 } 2869 ptr += 2; 2870 len += 2; 2871 } 2872 } 2873 2874 if(!is_file && *path && *path != '/') { 2875 /* Prepend a '/' to the path if it doesn't have one. */ 2876 if(ret_path) 2877 ret_path[len] = '/'; 2878 len++; 2879 } 2880 2881 for(; ptr < path+path_len; ++ptr) { 2882 BOOL do_default_action = TRUE; 2883 2884 if(*ptr == '%' && !is_res) { 2885 const WCHAR *tmp = ptr; 2886 WCHAR val; 2887 2888 /* Check if the % represents a valid encoded char, or if it needs encoding. */ 2889 BOOL force_encode = !check_pct_encoded(&tmp) && is_file && !(flags&Uri_CREATE_FILE_USE_DOS_PATH); 2890 val = decode_pct_val(ptr); 2891 2892 if(force_encode || escape_pct) { 2893 /* Escape the percent sign in the file URI. */ 2894 if(ret_path) 2895 pct_encode_val(*ptr, ret_path+len); 2896 len += 3; 2897 do_default_action = FALSE; 2898 } else if((is_unreserved(val) && known_scheme) || 2899 (is_file && !is_implicit_scheme && (is_unreserved(val) || is_reserved(val) || 2900 (val && flags&Uri_CREATE_FILE_USE_DOS_PATH && !is_forbidden_dos_path_char(val))))) { 2901 if(ret_path) 2902 ret_path[len] = val; 2903 len++; 2904 2905 ptr += 2; 2906 continue; 2907 } 2908 } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2909 /* Convert the '/' back to a '\\'. */ 2910 if(ret_path) 2911 ret_path[len] = '\\'; 2912 len++; 2913 do_default_action = FALSE; 2914 } else if(*ptr == '\\' && known_scheme) { 2915 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) { 2916 /* Convert '\\' into a '/'. */ 2917 if(ret_path) 2918 ret_path[len] = '/'; 2919 len++; 2920 do_default_action = FALSE; 2921 } 2922 } else if(known_scheme && !is_res && !is_unreserved(*ptr) && !is_reserved(*ptr) && 2923 (!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) { 2924 if(!is_file || !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 2925 /* Escape the forbidden character. */ 2926 if(ret_path) 2927 pct_encode_val(*ptr, ret_path+len); 2928 len += 3; 2929 do_default_action = FALSE; 2930 } 2931 } 2932 2933 if(do_default_action) { 2934 if(ret_path) 2935 ret_path[len] = *ptr; 2936 len++; 2937 } 2938 } 2939 2940 /* Removing the dot segments only happens when it's not in 2941 * computeOnly mode and it's not a wildcard scheme. File schemes 2942 * with USE_DOS_PATH set don't get dot segments removed. 2943 */ 2944 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) && 2945 scheme_type != URL_SCHEME_WILDCARD) { 2946 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && ret_path) { 2947 /* Remove the dot segments (if any) and reset everything to the new 2948 * correct length. 2949 */ 2950 len = remove_dot_segments(ret_path, len); 2951 } 2952 } 2953 2954 if(ret_path) 2955 TRACE("Canonicalized path %s len=%d\n", debugstr_wn(ret_path, len), len); 2956 return len; 2957 } 2958 2959 /* Attempts to canonicalize the path for an opaque URI. 2960 * 2961 * For known scheme types: 2962 * 1) forbidden characters are percent encoded if 2963 * NO_ENCODE_FORBIDDEN_CHARACTERS isn't set. 2964 * 2965 * 2) Percent encoded, unreserved characters are decoded 2966 * to their actual values, for known scheme types. 2967 * 2968 * 3) '\\' are changed to '/' for known scheme types 2969 * except for mailto schemes. 2970 * 2971 * 4) For file schemes, if USE_DOS_PATH is set all '/' 2972 * are converted to backslashes. 2973 * 2974 * 5) For file schemes, if USE_DOS_PATH isn't set all '\' 2975 * are converted to forward slashes. 2976 */ 2977 static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 2978 const WCHAR *ptr; 2979 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 2980 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 2981 const BOOL is_mk = data->scheme_type == URL_SCHEME_MK; 2982 2983 if(!data->path) { 2984 uri->path_start = -1; 2985 uri->path_len = 0; 2986 return TRUE; 2987 } 2988 2989 uri->path_start = uri->canon_len; 2990 2991 if(is_mk){ 2992 /* hijack this flag for SCHEME_MK to tell the function when to start 2993 * converting slashes */ 2994 flags |= Uri_CREATE_FILE_USE_DOS_PATH; 2995 } 2996 2997 /* For javascript: URIs, simply copy path part without any canonicalization */ 2998 if(data->scheme_type == URL_SCHEME_JAVASCRIPT) { 2999 if(!computeOnly) 3000 memcpy(uri->canon_uri+uri->canon_len, data->path, data->path_len*sizeof(WCHAR)); 3001 uri->path_len = data->path_len; 3002 uri->canon_len += data->path_len; 3003 return TRUE; 3004 } 3005 3006 /* Windows doesn't allow a "//" to appear after the scheme 3007 * of a URI, if it's an opaque URI. 3008 */ 3009 if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') { 3010 /* So it inserts a "/." before the "//" if it exists. */ 3011 if(!computeOnly) { 3012 uri->canon_uri[uri->canon_len] = '/'; 3013 uri->canon_uri[uri->canon_len+1] = '.'; 3014 } 3015 3016 uri->canon_len += 2; 3017 } 3018 3019 for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) { 3020 BOOL do_default_action = TRUE; 3021 3022 if(*ptr == '%' && known_scheme) { 3023 WCHAR val = decode_pct_val(ptr); 3024 3025 if(is_unreserved(val)) { 3026 if(!computeOnly) 3027 uri->canon_uri[uri->canon_len] = val; 3028 ++uri->canon_len; 3029 3030 ptr += 2; 3031 continue; 3032 } 3033 } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 3034 if(!computeOnly) 3035 uri->canon_uri[uri->canon_len] = '\\'; 3036 ++uri->canon_len; 3037 do_default_action = FALSE; 3038 } else if(*ptr == '\\') { 3039 if((data->is_relative || is_mk || is_file) && !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 3040 /* Convert to a '/'. */ 3041 if(!computeOnly) 3042 uri->canon_uri[uri->canon_len] = '/'; 3043 ++uri->canon_len; 3044 do_default_action = FALSE; 3045 } 3046 } else if(is_mk && *ptr == ':' && ptr + 1 < data->path + data->path_len && *(ptr + 1) == ':') { 3047 flags &= ~Uri_CREATE_FILE_USE_DOS_PATH; 3048 } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) && 3049 !(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 3050 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) { 3051 if(!computeOnly) 3052 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 3053 uri->canon_len += 3; 3054 do_default_action = FALSE; 3055 } 3056 } 3057 3058 if(do_default_action) { 3059 if(!computeOnly) 3060 uri->canon_uri[uri->canon_len] = *ptr; 3061 ++uri->canon_len; 3062 } 3063 } 3064 3065 if(is_mk && !computeOnly && !(flags & Uri_CREATE_NO_CANONICALIZE)) { 3066 DWORD new_len = remove_dot_segments(uri->canon_uri + uri->path_start, 3067 uri->canon_len - uri->path_start); 3068 uri->canon_len = uri->path_start + new_len; 3069 } 3070 3071 uri->path_len = uri->canon_len - uri->path_start; 3072 3073 if(!computeOnly) 3074 TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly, 3075 debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len); 3076 return TRUE; 3077 } 3078 3079 /* Determines how the URI represented by the parse_data should be canonicalized. 3080 * 3081 * Essentially, if the parse_data represents an hierarchical URI then it calls 3082 * canonicalize_authority and the canonicalization functions for the path. If the 3083 * URI is opaque it canonicalizes the path of the URI. 3084 */ 3085 static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3086 if(!data->is_opaque || (data->is_relative && (data->password || data->username))) { 3087 /* "//" is only added for non-wildcard scheme types. 3088 * 3089 * A "//" is only added to a relative URI if it has a 3090 * host or port component (this only happens if a IUriBuilder 3091 * is generating an IUri). 3092 */ 3093 if((data->is_relative && (data->host || data->has_port)) || 3094 (!data->is_relative && data->scheme_type != URL_SCHEME_WILDCARD)) { 3095 if(data->scheme_type == URL_SCHEME_WILDCARD) 3096 FIXME("Here\n"); 3097 3098 if(!computeOnly) { 3099 INT pos = uri->canon_len; 3100 3101 uri->canon_uri[pos] = '/'; 3102 uri->canon_uri[pos+1] = '/'; 3103 } 3104 uri->canon_len += 2; 3105 } 3106 3107 if(!canonicalize_authority(data, uri, flags, computeOnly)) 3108 return FALSE; 3109 3110 if(data->is_relative && (data->password || data->username)) { 3111 if(!canonicalize_path_opaque(data, uri, flags, computeOnly)) 3112 return FALSE; 3113 } else { 3114 if(!computeOnly) 3115 uri->path_start = uri->canon_len; 3116 uri->path_len = canonicalize_path_hierarchical(data->path, data->path_len, data->scheme_type, data->host_len != 0, 3117 flags, data->has_implicit_scheme, computeOnly ? NULL : uri->canon_uri+uri->canon_len); 3118 uri->canon_len += uri->path_len; 3119 if(!computeOnly && !uri->path_len) 3120 uri->path_start = -1; 3121 } 3122 } else { 3123 /* Opaque URI's don't have an authority. */ 3124 uri->userinfo_start = uri->userinfo_split = -1; 3125 uri->userinfo_len = 0; 3126 uri->host_start = -1; 3127 uri->host_len = 0; 3128 uri->host_type = Uri_HOST_UNKNOWN; 3129 uri->has_port = FALSE; 3130 uri->authority_start = -1; 3131 uri->authority_len = 0; 3132 uri->domain_offset = -1; 3133 uri->port_offset = -1; 3134 3135 if(is_hierarchical_scheme(data->scheme_type)) { 3136 DWORD i; 3137 3138 /* Absolute URIs aren't displayed for known scheme types 3139 * which should be hierarchical URIs. 3140 */ 3141 uri->display_modifiers |= URI_DISPLAY_NO_ABSOLUTE_URI; 3142 3143 /* Windows also sets the port for these (if they have one). */ 3144 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) { 3145 if(data->scheme_type == default_ports[i].scheme) { 3146 uri->has_port = TRUE; 3147 uri->port = default_ports[i].port; 3148 break; 3149 } 3150 } 3151 } 3152 3153 if(!canonicalize_path_opaque(data, uri, flags, computeOnly)) 3154 return FALSE; 3155 } 3156 3157 if(uri->path_start > -1 && !computeOnly) 3158 /* Finding file extensions happens for both types of URIs. */ 3159 uri->extension_offset = find_file_extension(uri->canon_uri+uri->path_start, uri->path_len); 3160 else 3161 uri->extension_offset = -1; 3162 3163 return TRUE; 3164 } 3165 3166 /* Attempts to canonicalize the query string of the URI. 3167 * 3168 * Things that happen: 3169 * 1) For known scheme types forbidden characters 3170 * are percent encoded, unless the NO_DECODE_EXTRA_INFO flag is set 3171 * or NO_ENCODE_FORBIDDEN_CHARACTERS is set. 3172 * 3173 * 2) For known scheme types, percent encoded, unreserved characters 3174 * are decoded as long as the NO_DECODE_EXTRA_INFO flag isn't set. 3175 */ 3176 static BOOL canonicalize_query(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3177 const WCHAR *ptr, *end; 3178 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 3179 3180 if(!data->query) { 3181 uri->query_start = -1; 3182 uri->query_len = 0; 3183 return TRUE; 3184 } 3185 3186 uri->query_start = uri->canon_len; 3187 3188 end = data->query+data->query_len; 3189 for(ptr = data->query; ptr < end; ++ptr) { 3190 if(*ptr == '%') { 3191 if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3192 WCHAR val = decode_pct_val(ptr); 3193 if(is_unreserved(val)) { 3194 if(!computeOnly) 3195 uri->canon_uri[uri->canon_len] = val; 3196 ++uri->canon_len; 3197 3198 ptr += 2; 3199 continue; 3200 } 3201 } 3202 } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) { 3203 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && 3204 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3205 if(!computeOnly) 3206 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 3207 uri->canon_len += 3; 3208 continue; 3209 } 3210 } 3211 3212 if(!computeOnly) 3213 uri->canon_uri[uri->canon_len] = *ptr; 3214 ++uri->canon_len; 3215 } 3216 3217 uri->query_len = uri->canon_len - uri->query_start; 3218 3219 if(!computeOnly) 3220 TRACE("(%p %p %x %d): Canonicalized query string %s len=%d\n", data, uri, flags, 3221 computeOnly, debugstr_wn(uri->canon_uri+uri->query_start, uri->query_len), 3222 uri->query_len); 3223 return TRUE; 3224 } 3225 3226 static BOOL canonicalize_fragment(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3227 const WCHAR *ptr, *end; 3228 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 3229 3230 if(!data->fragment) { 3231 uri->fragment_start = -1; 3232 uri->fragment_len = 0; 3233 return TRUE; 3234 } 3235 3236 uri->fragment_start = uri->canon_len; 3237 3238 end = data->fragment + data->fragment_len; 3239 for(ptr = data->fragment; ptr < end; ++ptr) { 3240 if(*ptr == '%') { 3241 if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3242 WCHAR val = decode_pct_val(ptr); 3243 if(is_unreserved(val)) { 3244 if(!computeOnly) 3245 uri->canon_uri[uri->canon_len] = val; 3246 ++uri->canon_len; 3247 3248 ptr += 2; 3249 continue; 3250 } 3251 } 3252 } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) { 3253 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && 3254 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 3255 if(!computeOnly) 3256 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 3257 uri->canon_len += 3; 3258 continue; 3259 } 3260 } 3261 3262 if(!computeOnly) 3263 uri->canon_uri[uri->canon_len] = *ptr; 3264 ++uri->canon_len; 3265 } 3266 3267 uri->fragment_len = uri->canon_len - uri->fragment_start; 3268 3269 if(!computeOnly) 3270 TRACE("(%p %p %x %d): Canonicalized fragment %s len=%d\n", data, uri, flags, 3271 computeOnly, debugstr_wn(uri->canon_uri+uri->fragment_start, uri->fragment_len), 3272 uri->fragment_len); 3273 return TRUE; 3274 } 3275 3276 /* Canonicalizes the scheme information specified in the parse_data using the specified flags. */ 3277 static BOOL canonicalize_scheme(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 3278 uri->scheme_start = -1; 3279 uri->scheme_len = 0; 3280 3281 if(!data->scheme) { 3282 /* The only type of URI that doesn't have to have a scheme is a relative 3283 * URI. 3284 */ 3285 if(!data->is_relative) { 3286 FIXME("(%p %p %x): Unable to determine the scheme type of %s.\n", data, 3287 uri, flags, debugstr_w(data->uri)); 3288 return FALSE; 3289 } 3290 } else { 3291 if(!computeOnly) { 3292 DWORD i; 3293 INT pos = uri->canon_len; 3294 3295 for(i = 0; i < data->scheme_len; ++i) { 3296 /* Scheme name must be lower case after canonicalization. */ 3297 uri->canon_uri[i + pos] = tolowerW(data->scheme[i]); 3298 } 3299 3300 uri->canon_uri[i + pos] = ':'; 3301 uri->scheme_start = pos; 3302 3303 TRACE("(%p %p %x): Canonicalized scheme=%s, len=%d.\n", data, uri, flags, 3304 debugstr_wn(uri->canon_uri+uri->scheme_start, data->scheme_len), data->scheme_len); 3305 } 3306 3307 /* This happens in both computation modes. */ 3308 uri->canon_len += data->scheme_len + 1; 3309 uri->scheme_len = data->scheme_len; 3310 } 3311 return TRUE; 3312 } 3313 3314 /* Computes what the length of the URI specified by the parse_data will be 3315 * after canonicalization occurs using the specified flags. 3316 * 3317 * This function will return a non-zero value indicating the length of the canonicalized 3318 * URI, or -1 on error. 3319 */ 3320 static int compute_canonicalized_length(const parse_data *data, DWORD flags) { 3321 Uri uri; 3322 3323 memset(&uri, 0, sizeof(Uri)); 3324 3325 TRACE("(%p %x): Beginning to compute canonicalized length for URI %s\n", data, flags, 3326 debugstr_w(data->uri)); 3327 3328 if(!canonicalize_scheme(data, &uri, flags, TRUE)) { 3329 ERR("(%p %x): Failed to compute URI scheme length.\n", data, flags); 3330 return -1; 3331 } 3332 3333 if(!canonicalize_hierpart(data, &uri, flags, TRUE)) { 3334 ERR("(%p %x): Failed to compute URI hierpart length.\n", data, flags); 3335 return -1; 3336 } 3337 3338 if(!canonicalize_query(data, &uri, flags, TRUE)) { 3339 ERR("(%p %x): Failed to compute query string length.\n", data, flags); 3340 return -1; 3341 } 3342 3343 if(!canonicalize_fragment(data, &uri, flags, TRUE)) { 3344 ERR("(%p %x): Failed to compute fragment length.\n", data, flags); 3345 return -1; 3346 } 3347 3348 TRACE("(%p %x): Finished computing canonicalized URI length. length=%d\n", data, flags, uri.canon_len); 3349 3350 return uri.canon_len; 3351 } 3352 3353 /* Canonicalizes the URI data specified in the parse_data, using the given flags. If the 3354 * canonicalization succeeds it will store all the canonicalization information 3355 * in the pointer to the Uri. 3356 * 3357 * To canonicalize a URI this function first computes what the length of the URI 3358 * specified by the parse_data will be. Once this is done it will then perform the actual 3359 * canonicalization of the URI. 3360 */ 3361 static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) { 3362 INT len; 3363 3364 uri->canon_uri = NULL; 3365 uri->canon_size = uri->canon_len = 0; 3366 3367 TRACE("(%p %p %x): beginning to canonicalize URI %s.\n", data, uri, flags, debugstr_w(data->uri)); 3368 3369 /* First try to compute the length of the URI. */ 3370 len = compute_canonicalized_length(data, flags); 3371 if(len == -1) { 3372 ERR("(%p %p %x): Could not compute the canonicalized length of %s.\n", data, uri, flags, 3373 debugstr_w(data->uri)); 3374 return E_INVALIDARG; 3375 } 3376 3377 uri->canon_uri = heap_alloc((len+1)*sizeof(WCHAR)); 3378 if(!uri->canon_uri) 3379 return E_OUTOFMEMORY; 3380 3381 uri->canon_size = len; 3382 if(!canonicalize_scheme(data, uri, flags, FALSE)) { 3383 ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags); 3384 return E_INVALIDARG; 3385 } 3386 uri->scheme_type = data->scheme_type; 3387 3388 if(!canonicalize_hierpart(data, uri, flags, FALSE)) { 3389 ERR("(%p %p %x): Unable to canonicalize the heirpart of the URI\n", data, uri, flags); 3390 return E_INVALIDARG; 3391 } 3392 3393 if(!canonicalize_query(data, uri, flags, FALSE)) { 3394 ERR("(%p %p %x): Unable to canonicalize query string of the URI.\n", 3395 data, uri, flags); 3396 return E_INVALIDARG; 3397 } 3398 3399 if(!canonicalize_fragment(data, uri, flags, FALSE)) { 3400 ERR("(%p %p %x): Unable to canonicalize fragment of the URI.\n", 3401 data, uri, flags); 3402 return E_INVALIDARG; 3403 } 3404 3405 /* There's a possibility we didn't use all the space we allocated 3406 * earlier. 3407 */ 3408 if(uri->canon_len < uri->canon_size) { 3409 /* This happens if the URI is hierarchical and dot 3410 * segments were removed from its path. 3411 */ 3412 WCHAR *tmp = heap_realloc(uri->canon_uri, (uri->canon_len+1)*sizeof(WCHAR)); 3413 if(!tmp) 3414 return E_OUTOFMEMORY; 3415 3416 uri->canon_uri = tmp; 3417 uri->canon_size = uri->canon_len; 3418 } 3419 3420 uri->canon_uri[uri->canon_len] = '\0'; 3421 TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri)); 3422 3423 return S_OK; 3424 } 3425 3426 static HRESULT get_builder_component(LPWSTR *component, DWORD *component_len, 3427 LPCWSTR source, DWORD source_len, 3428 LPCWSTR *output, DWORD *output_len) 3429 { 3430 if(!output_len) { 3431 if(output) 3432 *output = NULL; 3433 return E_POINTER; 3434 } 3435 3436 if(!output) { 3437 *output_len = 0; 3438 return E_POINTER; 3439 } 3440 3441 if(!(*component) && source) { 3442 /* Allocate 'component', and copy the contents from 'source' 3443 * into the new allocation. 3444 */ 3445 *component = heap_alloc((source_len+1)*sizeof(WCHAR)); 3446 if(!(*component)) 3447 return E_OUTOFMEMORY; 3448 3449 memcpy(*component, source, source_len*sizeof(WCHAR)); 3450 (*component)[source_len] = '\0'; 3451 *component_len = source_len; 3452 } 3453 3454 *output = *component; 3455 *output_len = *component_len; 3456 return *output ? S_OK : S_FALSE; 3457 } 3458 3459 /* Allocates 'component' and copies the string from 'new_value' into 'component'. 3460 * If 'prefix' is set and 'new_value' isn't NULL, then it checks if 'new_value' 3461 * starts with 'prefix'. If it doesn't then 'prefix' is prepended to 'component'. 3462 * 3463 * If everything is successful, then will set 'success_flag' in 'flags'. 3464 */ 3465 static HRESULT set_builder_component(LPWSTR *component, DWORD *component_len, LPCWSTR new_value, 3466 WCHAR prefix, DWORD *flags, DWORD success_flag) 3467 { 3468 heap_free(*component); 3469 3470 if(!new_value) { 3471 *component = NULL; 3472 *component_len = 0; 3473 } else { 3474 BOOL add_prefix = FALSE; 3475 DWORD len = lstrlenW(new_value); 3476 DWORD pos = 0; 3477 3478 if(prefix && *new_value != prefix) { 3479 add_prefix = TRUE; 3480 *component = heap_alloc((len+2)*sizeof(WCHAR)); 3481 } else 3482 *component = heap_alloc((len+1)*sizeof(WCHAR)); 3483 3484 if(!(*component)) 3485 return E_OUTOFMEMORY; 3486 3487 if(add_prefix) 3488 (*component)[pos++] = prefix; 3489 3490 memcpy(*component+pos, new_value, (len+1)*sizeof(WCHAR)); 3491 *component_len = len+pos; 3492 } 3493 3494 *flags |= success_flag; 3495 return S_OK; 3496 } 3497 3498 static void reset_builder(UriBuilder *builder) { 3499 if(builder->uri) 3500 IUri_Release(&builder->uri->IUri_iface); 3501 builder->uri = NULL; 3502 3503 heap_free(builder->fragment); 3504 builder->fragment = NULL; 3505 builder->fragment_len = 0; 3506 3507 heap_free(builder->host); 3508 builder->host = NULL; 3509 builder->host_len = 0; 3510 3511 heap_free(builder->password); 3512 builder->password = NULL; 3513 builder->password_len = 0; 3514 3515 heap_free(builder->path); 3516 builder->path = NULL; 3517 builder->path_len = 0; 3518 3519 heap_free(builder->query); 3520 builder->query = NULL; 3521 builder->query_len = 0; 3522 3523 heap_free(builder->scheme); 3524 builder->scheme = NULL; 3525 builder->scheme_len = 0; 3526 3527 heap_free(builder->username); 3528 builder->username = NULL; 3529 builder->username_len = 0; 3530 3531 builder->has_port = FALSE; 3532 builder->port = 0; 3533 builder->modified_props = 0; 3534 } 3535 3536 static HRESULT validate_scheme_name(const UriBuilder *builder, parse_data *data, DWORD flags) { 3537 const WCHAR *component; 3538 const WCHAR *ptr; 3539 const WCHAR **pptr; 3540 DWORD expected_len; 3541 3542 if(builder->scheme) { 3543 ptr = builder->scheme; 3544 expected_len = builder->scheme_len; 3545 } else if(builder->uri && builder->uri->scheme_start > -1) { 3546 ptr = builder->uri->canon_uri+builder->uri->scheme_start; 3547 expected_len = builder->uri->scheme_len; 3548 } else { 3549 static const WCHAR nullW[] = {0}; 3550 ptr = nullW; 3551 expected_len = 0; 3552 } 3553 3554 component = ptr; 3555 pptr = &ptr; 3556 if(parse_scheme(pptr, data, flags, ALLOW_NULL_TERM_SCHEME) && 3557 data->scheme_len == expected_len) { 3558 if(data->scheme) 3559 TRACE("(%p %p %x): Found valid scheme component %s len=%d.\n", builder, data, flags, 3560 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len); 3561 } else { 3562 TRACE("(%p %p %x): Invalid scheme component found %s.\n", builder, data, flags, 3563 debugstr_wn(component, expected_len)); 3564 return INET_E_INVALID_URL; 3565 } 3566 3567 return S_OK; 3568 } 3569 3570 static HRESULT validate_username(const UriBuilder *builder, parse_data *data, DWORD flags) { 3571 const WCHAR *ptr; 3572 const WCHAR **pptr; 3573 DWORD expected_len; 3574 3575 if(builder->username) { 3576 ptr = builder->username; 3577 expected_len = builder->username_len; 3578 } else if(!(builder->modified_props & Uri_HAS_USER_NAME) && builder->uri && 3579 builder->uri->userinfo_start > -1 && builder->uri->userinfo_split != 0) { 3580 /* Just use the username from the base Uri. */ 3581 data->username = builder->uri->canon_uri+builder->uri->userinfo_start; 3582 data->username_len = (builder->uri->userinfo_split > -1) ? 3583 builder->uri->userinfo_split : builder->uri->userinfo_len; 3584 ptr = NULL; 3585 } else { 3586 ptr = NULL; 3587 expected_len = 0; 3588 } 3589 3590 if(ptr) { 3591 const WCHAR *component = ptr; 3592 pptr = &ptr; 3593 if(parse_username(pptr, data, flags, ALLOW_NULL_TERM_USER_NAME) && 3594 data->username_len == expected_len) 3595 TRACE("(%p %p %x): Found valid username component %s len=%d.\n", builder, data, flags, 3596 debugstr_wn(data->username, data->username_len), data->username_len); 3597 else { 3598 TRACE("(%p %p %x): Invalid username component found %s.\n", builder, data, flags, 3599 debugstr_wn(component, expected_len)); 3600 return INET_E_INVALID_URL; 3601 } 3602 } 3603 3604 return S_OK; 3605 } 3606 3607 static HRESULT validate_password(const UriBuilder *builder, parse_data *data, DWORD flags) { 3608 const WCHAR *ptr; 3609 const WCHAR **pptr; 3610 DWORD expected_len; 3611 3612 if(builder->password) { 3613 ptr = builder->password; 3614 expected_len = builder->password_len; 3615 } else if(!(builder->modified_props & Uri_HAS_PASSWORD) && builder->uri && 3616 builder->uri->userinfo_split > -1) { 3617 data->password = builder->uri->canon_uri+builder->uri->userinfo_start+builder->uri->userinfo_split+1; 3618 data->password_len = builder->uri->userinfo_len-builder->uri->userinfo_split-1; 3619 ptr = NULL; 3620 } else { 3621 ptr = NULL; 3622 expected_len = 0; 3623 } 3624 3625 if(ptr) { 3626 const WCHAR *component = ptr; 3627 pptr = &ptr; 3628 if(parse_password(pptr, data, flags, ALLOW_NULL_TERM_PASSWORD) && 3629 data->password_len == expected_len) 3630 TRACE("(%p %p %x): Found valid password component %s len=%d.\n", builder, data, flags, 3631 debugstr_wn(data->password, data->password_len), data->password_len); 3632 else { 3633 TRACE("(%p %p %x): Invalid password component found %s.\n", builder, data, flags, 3634 debugstr_wn(component, expected_len)); 3635 return INET_E_INVALID_URL; 3636 } 3637 } 3638 3639 return S_OK; 3640 } 3641 3642 static HRESULT validate_userinfo(const UriBuilder *builder, parse_data *data, DWORD flags) { 3643 HRESULT hr; 3644 3645 hr = validate_username(builder, data, flags); 3646 if(FAILED(hr)) 3647 return hr; 3648 3649 hr = validate_password(builder, data, flags); 3650 if(FAILED(hr)) 3651 return hr; 3652 3653 return S_OK; 3654 } 3655 3656 static HRESULT validate_host(const UriBuilder *builder, parse_data *data, DWORD flags) { 3657 const WCHAR *ptr; 3658 const WCHAR **pptr; 3659 DWORD expected_len; 3660 3661 if(builder->host) { 3662 ptr = builder->host; 3663 expected_len = builder->host_len; 3664 } else if(!(builder->modified_props & Uri_HAS_HOST) && builder->uri && builder->uri->host_start > -1) { 3665 ptr = builder->uri->canon_uri + builder->uri->host_start; 3666 expected_len = builder->uri->host_len; 3667 } else 3668 ptr = NULL; 3669 3670 if(ptr) { 3671 const WCHAR *component = ptr; 3672 DWORD extras = ALLOW_BRACKETLESS_IP_LITERAL|IGNORE_PORT_DELIMITER|SKIP_IP_FUTURE_CHECK; 3673 pptr = &ptr; 3674 3675 if(parse_host(pptr, data, flags, extras) && data->host_len == expected_len) 3676 TRACE("(%p %p %x): Found valid host name %s len=%d type=%d.\n", builder, data, flags, 3677 debugstr_wn(data->host, data->host_len), data->host_len, data->host_type); 3678 else { 3679 TRACE("(%p %p %x): Invalid host name found %s.\n", builder, data, flags, 3680 debugstr_wn(component, expected_len)); 3681 return INET_E_INVALID_URL; 3682 } 3683 } 3684 3685 return S_OK; 3686 } 3687 3688 static void setup_port(const UriBuilder *builder, parse_data *data, DWORD flags) { 3689 if(builder->modified_props & Uri_HAS_PORT) { 3690 if(builder->has_port) { 3691 data->has_port = TRUE; 3692 data->port_value = builder->port; 3693 } 3694 } else if(builder->uri && builder->uri->has_port) { 3695 data->has_port = TRUE; 3696 data->port_value = builder->uri->port; 3697 } 3698 3699 if(data->has_port) 3700 TRACE("(%p %p %x): Using %u as port for IUri.\n", builder, data, flags, data->port_value); 3701 } 3702 3703 static HRESULT validate_path(const UriBuilder *builder, parse_data *data, DWORD flags) { 3704 const WCHAR *ptr = NULL; 3705 const WCHAR *component; 3706 const WCHAR **pptr; 3707 DWORD expected_len; 3708 BOOL check_len = TRUE; 3709 BOOL valid = FALSE; 3710 3711 if(builder->path) { 3712 ptr = builder->path; 3713 expected_len = builder->path_len; 3714 } else if(!(builder->modified_props & Uri_HAS_PATH) && 3715 builder->uri && builder->uri->path_start > -1) { 3716 ptr = builder->uri->canon_uri+builder->uri->path_start; 3717 expected_len = builder->uri->path_len; 3718 } else { 3719 static const WCHAR nullW[] = {0}; 3720 ptr = nullW; 3721 check_len = FALSE; 3722 expected_len = -1; 3723 } 3724 3725 component = ptr; 3726 pptr = &ptr; 3727 3728 /* How the path is validated depends on what type of 3729 * URI it is. 3730 */ 3731 valid = data->is_opaque ? 3732 parse_path_opaque(pptr, data, flags) : parse_path_hierarchical(pptr, data, flags); 3733 3734 if(!valid || (check_len && expected_len != data->path_len)) { 3735 TRACE("(%p %p %x): Invalid path component %s.\n", builder, data, flags, 3736 debugstr_wn(component, expected_len) ); 3737 return INET_E_INVALID_URL; 3738 } 3739 3740 TRACE("(%p %p %x): Valid path component %s len=%d.\n", builder, data, flags, 3741 debugstr_wn(data->path, data->path_len), data->path_len); 3742 3743 return S_OK; 3744 } 3745 3746 static HRESULT validate_query(const UriBuilder *builder, parse_data *data, DWORD flags) { 3747 const WCHAR *ptr = NULL; 3748 const WCHAR **pptr; 3749 DWORD expected_len; 3750 3751 if(builder->query) { 3752 ptr = builder->query; 3753 expected_len = builder->query_len; 3754 } else if(!(builder->modified_props & Uri_HAS_QUERY) && builder->uri && 3755 builder->uri->query_start > -1) { 3756 ptr = builder->uri->canon_uri+builder->uri->query_start; 3757 expected_len = builder->uri->query_len; 3758 } 3759 3760 if(ptr) { 3761 const WCHAR *component = ptr; 3762 pptr = &ptr; 3763 3764 if(parse_query(pptr, data, flags) && expected_len == data->query_len) 3765 TRACE("(%p %p %x): Valid query component %s len=%d.\n", builder, data, flags, 3766 debugstr_wn(data->query, data->query_len), data->query_len); 3767 else { 3768 TRACE("(%p %p %x): Invalid query component %s.\n", builder, data, flags, 3769 debugstr_wn(component, expected_len)); 3770 return INET_E_INVALID_URL; 3771 } 3772 } 3773 3774 return S_OK; 3775 } 3776 3777 static HRESULT validate_fragment(const UriBuilder *builder, parse_data *data, DWORD flags) { 3778 const WCHAR *ptr = NULL; 3779 const WCHAR **pptr; 3780 DWORD expected_len; 3781 3782 if(builder->fragment) { 3783 ptr = builder->fragment; 3784 expected_len = builder->fragment_len; 3785 } else if(!(builder->modified_props & Uri_HAS_FRAGMENT) && builder->uri && 3786 builder->uri->fragment_start > -1) { 3787 ptr = builder->uri->canon_uri+builder->uri->fragment_start; 3788 expected_len = builder->uri->fragment_len; 3789 } 3790 3791 if(ptr) { 3792 const WCHAR *component = ptr; 3793 pptr = &ptr; 3794 3795 if(parse_fragment(pptr, data, flags) && expected_len == data->fragment_len) 3796 TRACE("(%p %p %x): Valid fragment component %s len=%d.\n", builder, data, flags, 3797 debugstr_wn(data->fragment, data->fragment_len), data->fragment_len); 3798 else { 3799 TRACE("(%p %p %x): Invalid fragment component %s.\n", builder, data, flags, 3800 debugstr_wn(component, expected_len)); 3801 return INET_E_INVALID_URL; 3802 } 3803 } 3804 3805 return S_OK; 3806 } 3807 3808 static HRESULT validate_components(const UriBuilder *builder, parse_data *data, DWORD flags) { 3809 HRESULT hr; 3810 3811 memset(data, 0, sizeof(parse_data)); 3812 3813 TRACE("(%p %p %x): Beginning to validate builder components.\n", builder, data, flags); 3814 3815 hr = validate_scheme_name(builder, data, flags); 3816 if(FAILED(hr)) 3817 return hr; 3818 3819 /* Extra validation for file schemes. */ 3820 if(data->scheme_type == URL_SCHEME_FILE) { 3821 if((builder->password || (builder->uri && builder->uri->userinfo_split > -1)) || 3822 (builder->username || (builder->uri && builder->uri->userinfo_start > -1))) { 3823 TRACE("(%p %p %x): File schemes can't contain a username or password.\n", 3824 builder, data, flags); 3825 return INET_E_INVALID_URL; 3826 } 3827 } 3828 3829 hr = validate_userinfo(builder, data, flags); 3830 if(FAILED(hr)) 3831 return hr; 3832 3833 hr = validate_host(builder, data, flags); 3834 if(FAILED(hr)) 3835 return hr; 3836 3837 setup_port(builder, data, flags); 3838 3839 /* The URI is opaque if it doesn't have an authority component. */ 3840 if(!data->is_relative) 3841 data->is_opaque = !data->username && !data->password && !data->host && !data->has_port 3842 && data->scheme_type != URL_SCHEME_FILE; 3843 else 3844 data->is_opaque = !data->host && !data->has_port; 3845 3846 hr = validate_path(builder, data, flags); 3847 if(FAILED(hr)) 3848 return hr; 3849 3850 hr = validate_query(builder, data, flags); 3851 if(FAILED(hr)) 3852 return hr; 3853 3854 hr = validate_fragment(builder, data, flags); 3855 if(FAILED(hr)) 3856 return hr; 3857 3858 TRACE("(%p %p %x): Finished validating builder components.\n", builder, data, flags); 3859 3860 return S_OK; 3861 } 3862 3863 static HRESULT compare_file_paths(const Uri *a, const Uri *b, BOOL *ret) 3864 { 3865 WCHAR *canon_path_a, *canon_path_b; 3866 DWORD len_a, len_b; 3867 3868 if(!a->path_len) { 3869 *ret = !b->path_len; 3870 return S_OK; 3871 } 3872 3873 if(!b->path_len) { 3874 *ret = FALSE; 3875 return S_OK; 3876 } 3877 3878 /* Fast path */ 3879 if(a->path_len == b->path_len && !memicmpW(a->canon_uri+a->path_start, b->canon_uri+b->path_start, a->path_len)) { 3880 *ret = TRUE; 3881 return S_OK; 3882 } 3883 3884 len_a = canonicalize_path_hierarchical(a->canon_uri+a->path_start, a->path_len, a->scheme_type, FALSE, 0, FALSE, NULL); 3885 len_b = canonicalize_path_hierarchical(b->canon_uri+b->path_start, b->path_len, b->scheme_type, FALSE, 0, FALSE, NULL); 3886 3887 canon_path_a = heap_alloc(len_a*sizeof(WCHAR)); 3888 if(!canon_path_a) 3889 return E_OUTOFMEMORY; 3890 canon_path_b = heap_alloc(len_b*sizeof(WCHAR)); 3891 if(!canon_path_b) { 3892 heap_free(canon_path_a); 3893 return E_OUTOFMEMORY; 3894 } 3895 3896 len_a = canonicalize_path_hierarchical(a->canon_uri+a->path_start, a->path_len, a->scheme_type, FALSE, 0, FALSE, canon_path_a); 3897 len_b = canonicalize_path_hierarchical(b->canon_uri+b->path_start, b->path_len, b->scheme_type, FALSE, 0, FALSE, canon_path_b); 3898 3899 *ret = len_a == len_b && !memicmpW(canon_path_a, canon_path_b, len_a); 3900 3901 heap_free(canon_path_a); 3902 heap_free(canon_path_b); 3903 return S_OK; 3904 } 3905 3906 /* Checks if the two Uri's are logically equivalent. It's a simple 3907 * comparison, since they are both of type Uri, and it can access 3908 * the properties of each Uri directly without the need to go 3909 * through the "IUri_Get*" interface calls. 3910 */ 3911 static HRESULT compare_uris(const Uri *a, const Uri *b, BOOL *ret) { 3912 const BOOL known_scheme = a->scheme_type != URL_SCHEME_UNKNOWN; 3913 const BOOL are_hierarchical = a->authority_start > -1 && b->authority_start > -1; 3914 HRESULT hres; 3915 3916 *ret = FALSE; 3917 3918 if(a->scheme_type != b->scheme_type) 3919 return S_OK; 3920 3921 /* Only compare the scheme names (if any) if their unknown scheme types. */ 3922 if(!known_scheme) { 3923 if((a->scheme_start > -1 && b->scheme_start > -1) && 3924 (a->scheme_len == b->scheme_len)) { 3925 /* Make sure the schemes are the same. */ 3926 if(StrCmpNW(a->canon_uri+a->scheme_start, b->canon_uri+b->scheme_start, a->scheme_len)) 3927 return S_OK; 3928 } else if(a->scheme_len != b->scheme_len) 3929 /* One of the Uri's has a scheme name, while the other doesn't. */ 3930 return S_OK; 3931 } 3932 3933 /* If they have a userinfo component, perform case sensitive compare. */ 3934 if((a->userinfo_start > -1 && b->userinfo_start > -1) && 3935 (a->userinfo_len == b->userinfo_len)) { 3936 if(StrCmpNW(a->canon_uri+a->userinfo_start, b->canon_uri+b->userinfo_start, a->userinfo_len)) 3937 return S_OK; 3938 } else if(a->userinfo_len != b->userinfo_len) 3939 /* One of the Uri's had a userinfo, while the other one doesn't. */ 3940 return S_OK; 3941 3942 /* Check if they have a host name. */ 3943 if((a->host_start > -1 && b->host_start > -1) && 3944 (a->host_len == b->host_len)) { 3945 /* Perform a case insensitive compare if they are a known scheme type. */ 3946 if(known_scheme) { 3947 if(StrCmpNIW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len)) 3948 return S_OK; 3949 } else if(StrCmpNW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len)) 3950 return S_OK; 3951 } else if(a->host_len != b->host_len) 3952 /* One of the Uri's had a host, while the other one didn't. */ 3953 return S_OK; 3954 3955 if(a->has_port && b->has_port) { 3956 if(a->port != b->port) 3957 return S_OK; 3958 } else if(a->has_port || b->has_port) 3959 /* One had a port, while the other one didn't. */ 3960 return S_OK; 3961 3962 /* Windows is weird with how it handles paths. For example 3963 * One URI could be "http://google.com" (after canonicalization) 3964 * and one could be "http://google.com/" and the IsEqual function 3965 * would still evaluate to TRUE, but, only if they are both hierarchical 3966 * URIs. 3967 */ 3968 if(a->scheme_type == URL_SCHEME_FILE) { 3969 BOOL cmp; 3970 3971 hres = compare_file_paths(a, b, &cmp); 3972 if(FAILED(hres) || !cmp) 3973 return hres; 3974 } else if((a->path_start > -1 && b->path_start > -1) && 3975 (a->path_len == b->path_len)) { 3976 if(StrCmpNW(a->canon_uri+a->path_start, b->canon_uri+b->path_start, a->path_len)) 3977 return S_OK; 3978 } else if(are_hierarchical && a->path_len == -1 && b->path_len == 0) { 3979 if(*(a->canon_uri+a->path_start) != '/') 3980 return S_OK; 3981 } else if(are_hierarchical && b->path_len == 1 && a->path_len == 0) { 3982 if(*(b->canon_uri+b->path_start) != '/') 3983 return S_OK; 3984 } else if(a->path_len != b->path_len) 3985 return S_OK; 3986 3987 /* Compare the query strings of the two URIs. */ 3988 if((a->query_start > -1 && b->query_start > -1) && 3989 (a->query_len == b->query_len)) { 3990 if(StrCmpNW(a->canon_uri+a->query_start, b->canon_uri+b->query_start, a->query_len)) 3991 return S_OK; 3992 } else if(a->query_len != b->query_len) 3993 return S_OK; 3994 3995 if((a->fragment_start > -1 && b->fragment_start > -1) && 3996 (a->fragment_len == b->fragment_len)) { 3997 if(StrCmpNW(a->canon_uri+a->fragment_start, b->canon_uri+b->fragment_start, a->fragment_len)) 3998 return S_OK; 3999 } else if(a->fragment_len != b->fragment_len) 4000 return S_OK; 4001 4002 /* If we get here, the two URIs are equivalent. */ 4003 *ret = TRUE; 4004 return S_OK; 4005 } 4006 4007 static void convert_to_dos_path(const WCHAR *path, DWORD path_len, 4008 WCHAR *output, DWORD *output_len) 4009 { 4010 const WCHAR *ptr = path; 4011 4012 if(path_len > 3 && *ptr == '/' && is_drive_path(path+1)) 4013 /* Skip over the leading / before the drive path. */ 4014 ++ptr; 4015 4016 for(; ptr < path+path_len; ++ptr) { 4017 if(*ptr == '/') { 4018 if(output) 4019 *output++ = '\\'; 4020 (*output_len)++; 4021 } else { 4022 if(output) 4023 *output++ = *ptr; 4024 (*output_len)++; 4025 } 4026 } 4027 } 4028 4029 /* Generates a raw uri string using the parse_data. */ 4030 static DWORD generate_raw_uri(const parse_data *data, BSTR uri, DWORD flags) { 4031 DWORD length = 0; 4032 4033 if(data->scheme) { 4034 if(uri) { 4035 memcpy(uri, data->scheme, data->scheme_len*sizeof(WCHAR)); 4036 uri[data->scheme_len] = ':'; 4037 } 4038 length += data->scheme_len+1; 4039 } 4040 4041 if(!data->is_opaque) { 4042 /* For the "//" which appears before the authority component. */ 4043 if(uri) { 4044 uri[length] = '/'; 4045 uri[length+1] = '/'; 4046 } 4047 length += 2; 4048 4049 /* Check if we need to add the "\\" before the host name 4050 * of a UNC server name in a DOS path. 4051 */ 4052 if(flags & RAW_URI_CONVERT_TO_DOS_PATH && 4053 data->scheme_type == URL_SCHEME_FILE && data->host) { 4054 if(uri) { 4055 uri[length] = '\\'; 4056 uri[length+1] = '\\'; 4057 } 4058 length += 2; 4059 } 4060 } 4061 4062 if(data->username) { 4063 if(uri) 4064 memcpy(uri+length, data->username, data->username_len*sizeof(WCHAR)); 4065 length += data->username_len; 4066 } 4067 4068 if(data->password) { 4069 if(uri) { 4070 uri[length] = ':'; 4071 memcpy(uri+length+1, data->password, data->password_len*sizeof(WCHAR)); 4072 } 4073 length += data->password_len+1; 4074 } 4075 4076 if(data->password || data->username) { 4077 if(uri) 4078 uri[length] = '@'; 4079 ++length; 4080 } 4081 4082 if(data->host) { 4083 /* IPv6 addresses get the brackets added around them if they don't already 4084 * have them. 4085 */ 4086 const BOOL add_brackets = data->host_type == Uri_HOST_IPV6 && *(data->host) != '['; 4087 if(add_brackets) { 4088 if(uri) 4089 uri[length] = '['; 4090 ++length; 4091 } 4092 4093 if(uri) 4094 memcpy(uri+length, data->host, data->host_len*sizeof(WCHAR)); 4095 length += data->host_len; 4096 4097 if(add_brackets) { 4098 if(uri) 4099 uri[length] = ']'; 4100 length++; 4101 } 4102 } 4103 4104 if(data->has_port) { 4105 /* The port isn't included in the raw uri if it's the default 4106 * port for the scheme type. 4107 */ 4108 DWORD i; 4109 BOOL is_default = FALSE; 4110 4111 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) { 4112 if(data->scheme_type == default_ports[i].scheme && 4113 data->port_value == default_ports[i].port) 4114 is_default = TRUE; 4115 } 4116 4117 if(!is_default || flags & RAW_URI_FORCE_PORT_DISP) { 4118 if(uri) 4119 uri[length] = ':'; 4120 ++length; 4121 4122 if(uri) 4123 length += ui2str(uri+length, data->port_value); 4124 else 4125 length += ui2str(NULL, data->port_value); 4126 } 4127 } 4128 4129 /* Check if a '/' should be added before the path for hierarchical URIs. */ 4130 if(!data->is_opaque && data->path && *(data->path) != '/') { 4131 if(uri) 4132 uri[length] = '/'; 4133 ++length; 4134 } 4135 4136 if(data->path) { 4137 if(!data->is_opaque && data->scheme_type == URL_SCHEME_FILE && 4138 flags & RAW_URI_CONVERT_TO_DOS_PATH) { 4139 DWORD len = 0; 4140 4141 if(uri) 4142 convert_to_dos_path(data->path, data->path_len, uri+length, &len); 4143 else 4144 convert_to_dos_path(data->path, data->path_len, NULL, &len); 4145 4146 length += len; 4147 } else { 4148 if(uri) 4149 memcpy(uri+length, data->path, data->path_len*sizeof(WCHAR)); 4150 length += data->path_len; 4151 } 4152 } 4153 4154 if(data->query) { 4155 if(uri) 4156 memcpy(uri+length, data->query, data->query_len*sizeof(WCHAR)); 4157 length += data->query_len; 4158 } 4159 4160 if(data->fragment) { 4161 if(uri) 4162 memcpy(uri+length, data->fragment, data->fragment_len*sizeof(WCHAR)); 4163 length += data->fragment_len; 4164 } 4165 4166 if(uri) 4167 TRACE("(%p %p): Generated raw uri=%s len=%d\n", data, uri, debugstr_wn(uri, length), length); 4168 else 4169 TRACE("(%p %p): Computed raw uri len=%d\n", data, uri, length); 4170 4171 return length; 4172 } 4173 4174 static HRESULT generate_uri(const UriBuilder *builder, const parse_data *data, Uri *uri, DWORD flags) { 4175 HRESULT hr; 4176 DWORD length = generate_raw_uri(data, NULL, 0); 4177 uri->raw_uri = SysAllocStringLen(NULL, length); 4178 if(!uri->raw_uri) 4179 return E_OUTOFMEMORY; 4180 4181 generate_raw_uri(data, uri->raw_uri, 0); 4182 4183 hr = canonicalize_uri(data, uri, flags); 4184 if(FAILED(hr)) { 4185 if(hr == E_INVALIDARG) 4186 return INET_E_INVALID_URL; 4187 return hr; 4188 } 4189 4190 uri->create_flags = flags; 4191 return S_OK; 4192 } 4193 4194 static inline Uri* impl_from_IUri(IUri *iface) 4195 { 4196 return CONTAINING_RECORD(iface, Uri, IUri_iface); 4197 } 4198 4199 static inline void destroy_uri_obj(Uri *This) 4200 { 4201 SysFreeString(This->raw_uri); 4202 heap_free(This->canon_uri); 4203 heap_free(This); 4204 } 4205 4206 static HRESULT WINAPI Uri_QueryInterface(IUri *iface, REFIID riid, void **ppv) 4207 { 4208 Uri *This = impl_from_IUri(iface); 4209 4210 if(IsEqualGUID(&IID_IUnknown, riid)) { 4211 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv); 4212 *ppv = &This->IUri_iface; 4213 }else if(IsEqualGUID(&IID_IUri, riid)) { 4214 TRACE("(%p)->(IID_IUri %p)\n", This, ppv); 4215 *ppv = &This->IUri_iface; 4216 }else if(IsEqualGUID(&IID_IUriBuilderFactory, riid)) { 4217 TRACE("(%p)->(IID_IUriBuilderFactory %p)\n", This, ppv); 4218 *ppv = &This->IUriBuilderFactory_iface; 4219 }else if(IsEqualGUID(&IID_IPersistStream, riid)) { 4220 TRACE("(%p)->(IID_IPersistStream %p)\n", This, ppv); 4221 *ppv = &This->IPersistStream_iface; 4222 }else if(IsEqualGUID(&IID_IMarshal, riid)) { 4223 TRACE("(%p)->(IID_IMarshal %p)\n", This, ppv); 4224 *ppv = &This->IMarshal_iface; 4225 }else if(IsEqualGUID(&IID_IUriObj, riid)) { 4226 TRACE("(%p)->(IID_IUriObj %p)\n", This, ppv); 4227 *ppv = This; 4228 return S_OK; 4229 }else { 4230 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv); 4231 *ppv = NULL; 4232 return E_NOINTERFACE; 4233 } 4234 4235 IUnknown_AddRef((IUnknown*)*ppv); 4236 return S_OK; 4237 } 4238 4239 static ULONG WINAPI Uri_AddRef(IUri *iface) 4240 { 4241 Uri *This = impl_from_IUri(iface); 4242 LONG ref = InterlockedIncrement(&This->ref); 4243 4244 TRACE("(%p) ref=%d\n", This, ref); 4245 4246 return ref; 4247 } 4248 4249 static ULONG WINAPI Uri_Release(IUri *iface) 4250 { 4251 Uri *This = impl_from_IUri(iface); 4252 LONG ref = InterlockedDecrement(&This->ref); 4253 4254 TRACE("(%p) ref=%d\n", This, ref); 4255 4256 if(!ref) 4257 destroy_uri_obj(This); 4258 4259 return ref; 4260 } 4261 4262 static HRESULT WINAPI Uri_GetPropertyBSTR(IUri *iface, Uri_PROPERTY uriProp, BSTR *pbstrProperty, DWORD dwFlags) 4263 { 4264 Uri *This = impl_from_IUri(iface); 4265 HRESULT hres; 4266 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pbstrProperty, dwFlags); 4267 4268 if(!This->create_flags) 4269 return E_UNEXPECTED; 4270 if(!pbstrProperty) 4271 return E_POINTER; 4272 4273 if(uriProp > Uri_PROPERTY_STRING_LAST) { 4274 /* It only returns S_FALSE for the ZONE property... */ 4275 if(uriProp == Uri_PROPERTY_ZONE) { 4276 *pbstrProperty = SysAllocStringLen(NULL, 0); 4277 if(!(*pbstrProperty)) 4278 return E_OUTOFMEMORY; 4279 return S_FALSE; 4280 } 4281 4282 *pbstrProperty = NULL; 4283 return E_INVALIDARG; 4284 } 4285 4286 /* Don't have support for flags yet. */ 4287 if(dwFlags) { 4288 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags); 4289 return E_NOTIMPL; 4290 } 4291 4292 switch(uriProp) { 4293 case Uri_PROPERTY_ABSOLUTE_URI: 4294 if(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI) { 4295 *pbstrProperty = SysAllocStringLen(NULL, 0); 4296 hres = S_FALSE; 4297 } else { 4298 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) { 4299 if(This->userinfo_len == 0) { 4300 /* Don't include the '@' after the userinfo component. */ 4301 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-1); 4302 hres = S_OK; 4303 if(*pbstrProperty) { 4304 /* Copy everything before it. */ 4305 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 4306 4307 /* And everything after it. */ 4308 memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+1, 4309 (This->canon_len-This->userinfo_start-1)*sizeof(WCHAR)); 4310 } 4311 } else if(This->userinfo_split == 0 && This->userinfo_len == 1) { 4312 /* Don't include the ":@" */ 4313 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-2); 4314 hres = S_OK; 4315 if(*pbstrProperty) { 4316 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 4317 memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+2, 4318 (This->canon_len-This->userinfo_start-2)*sizeof(WCHAR)); 4319 } 4320 } else { 4321 *pbstrProperty = SysAllocString(This->canon_uri); 4322 hres = S_OK; 4323 } 4324 } else { 4325 *pbstrProperty = SysAllocString(This->canon_uri); 4326 hres = S_OK; 4327 } 4328 } 4329 4330 if(!(*pbstrProperty)) 4331 hres = E_OUTOFMEMORY; 4332 4333 break; 4334 case Uri_PROPERTY_AUTHORITY: 4335 if(This->authority_start > -1) { 4336 if(This->port_offset > -1 && is_default_port(This->scheme_type, This->port) && 4337 This->display_modifiers & URI_DISPLAY_NO_DEFAULT_PORT_AUTH) 4338 /* Don't include the port in the authority component. */ 4339 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->port_offset); 4340 else 4341 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->authority_len); 4342 hres = S_OK; 4343 } else { 4344 *pbstrProperty = SysAllocStringLen(NULL, 0); 4345 hres = S_FALSE; 4346 } 4347 4348 if(!(*pbstrProperty)) 4349 hres = E_OUTOFMEMORY; 4350 4351 break; 4352 case Uri_PROPERTY_DISPLAY_URI: 4353 /* The Display URI contains everything except for the userinfo for known 4354 * scheme types. 4355 */ 4356 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) { 4357 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-This->userinfo_len); 4358 4359 if(*pbstrProperty) { 4360 /* Copy everything before the userinfo over. */ 4361 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 4362 /* Copy everything after the userinfo over. */ 4363 memcpy(*pbstrProperty+This->userinfo_start, 4364 This->canon_uri+This->userinfo_start+This->userinfo_len+1, 4365 (This->canon_len-(This->userinfo_start+This->userinfo_len+1))*sizeof(WCHAR)); 4366 } 4367 } else 4368 *pbstrProperty = SysAllocString(This->canon_uri); 4369 4370 if(!(*pbstrProperty)) 4371 hres = E_OUTOFMEMORY; 4372 else 4373 hres = S_OK; 4374 4375 break; 4376 case Uri_PROPERTY_DOMAIN: 4377 if(This->domain_offset > -1) { 4378 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+This->domain_offset, 4379 This->host_len-This->domain_offset); 4380 hres = S_OK; 4381 } else { 4382 *pbstrProperty = SysAllocStringLen(NULL, 0); 4383 hres = S_FALSE; 4384 } 4385 4386 if(!(*pbstrProperty)) 4387 hres = E_OUTOFMEMORY; 4388 4389 break; 4390 case Uri_PROPERTY_EXTENSION: 4391 if(This->extension_offset > -1) { 4392 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start+This->extension_offset, 4393 This->path_len-This->extension_offset); 4394 hres = S_OK; 4395 } else { 4396 *pbstrProperty = SysAllocStringLen(NULL, 0); 4397 hres = S_FALSE; 4398 } 4399 4400 if(!(*pbstrProperty)) 4401 hres = E_OUTOFMEMORY; 4402 4403 break; 4404 case Uri_PROPERTY_FRAGMENT: 4405 if(This->fragment_start > -1) { 4406 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->fragment_start, This->fragment_len); 4407 hres = S_OK; 4408 } else { 4409 *pbstrProperty = SysAllocStringLen(NULL, 0); 4410 hres = S_FALSE; 4411 } 4412 4413 if(!(*pbstrProperty)) 4414 hres = E_OUTOFMEMORY; 4415 4416 break; 4417 case Uri_PROPERTY_HOST: 4418 if(This->host_start > -1) { 4419 /* The '[' and ']' aren't included for IPv6 addresses. */ 4420 if(This->host_type == Uri_HOST_IPV6) 4421 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+1, This->host_len-2); 4422 else 4423 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start, This->host_len); 4424 4425 hres = S_OK; 4426 } else { 4427 *pbstrProperty = SysAllocStringLen(NULL, 0); 4428 hres = S_FALSE; 4429 } 4430 4431 if(!(*pbstrProperty)) 4432 hres = E_OUTOFMEMORY; 4433 4434 break; 4435 case Uri_PROPERTY_PASSWORD: 4436 if(This->userinfo_split > -1) { 4437 *pbstrProperty = SysAllocStringLen( 4438 This->canon_uri+This->userinfo_start+This->userinfo_split+1, 4439 This->userinfo_len-This->userinfo_split-1); 4440 hres = S_OK; 4441 } else { 4442 *pbstrProperty = SysAllocStringLen(NULL, 0); 4443 hres = S_FALSE; 4444 } 4445 4446 if(!(*pbstrProperty)) 4447 return E_OUTOFMEMORY; 4448 4449 break; 4450 case Uri_PROPERTY_PATH: 4451 if(This->path_start > -1) { 4452 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len); 4453 hres = S_OK; 4454 } else { 4455 *pbstrProperty = SysAllocStringLen(NULL, 0); 4456 hres = S_FALSE; 4457 } 4458 4459 if(!(*pbstrProperty)) 4460 hres = E_OUTOFMEMORY; 4461 4462 break; 4463 case Uri_PROPERTY_PATH_AND_QUERY: 4464 if(This->path_start > -1) { 4465 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len+This->query_len); 4466 hres = S_OK; 4467 } else if(This->query_start > -1) { 4468 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len); 4469 hres = S_OK; 4470 } else { 4471 *pbstrProperty = SysAllocStringLen(NULL, 0); 4472 hres = S_FALSE; 4473 } 4474 4475 if(!(*pbstrProperty)) 4476 hres = E_OUTOFMEMORY; 4477 4478 break; 4479 case Uri_PROPERTY_QUERY: 4480 if(This->query_start > -1) { 4481 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len); 4482 hres = S_OK; 4483 } else { 4484 *pbstrProperty = SysAllocStringLen(NULL, 0); 4485 hres = S_FALSE; 4486 } 4487 4488 if(!(*pbstrProperty)) 4489 hres = E_OUTOFMEMORY; 4490 4491 break; 4492 case Uri_PROPERTY_RAW_URI: 4493 *pbstrProperty = SysAllocString(This->raw_uri); 4494 if(!(*pbstrProperty)) 4495 hres = E_OUTOFMEMORY; 4496 else 4497 hres = S_OK; 4498 break; 4499 case Uri_PROPERTY_SCHEME_NAME: 4500 if(This->scheme_start > -1) { 4501 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->scheme_start, This->scheme_len); 4502 hres = S_OK; 4503 } else { 4504 *pbstrProperty = SysAllocStringLen(NULL, 0); 4505 hres = S_FALSE; 4506 } 4507 4508 if(!(*pbstrProperty)) 4509 hres = E_OUTOFMEMORY; 4510 4511 break; 4512 case Uri_PROPERTY_USER_INFO: 4513 if(This->userinfo_start > -1) { 4514 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->userinfo_start, This->userinfo_len); 4515 hres = S_OK; 4516 } else { 4517 *pbstrProperty = SysAllocStringLen(NULL, 0); 4518 hres = S_FALSE; 4519 } 4520 4521 if(!(*pbstrProperty)) 4522 hres = E_OUTOFMEMORY; 4523 4524 break; 4525 case Uri_PROPERTY_USER_NAME: 4526 if(This->userinfo_start > -1 && This->userinfo_split != 0) { 4527 /* If userinfo_split is set, that means a password exists 4528 * so the username is only from userinfo_start to userinfo_split. 4529 */ 4530 if(This->userinfo_split > -1) { 4531 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_split); 4532 hres = S_OK; 4533 } else { 4534 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_len); 4535 hres = S_OK; 4536 } 4537 } else { 4538 *pbstrProperty = SysAllocStringLen(NULL, 0); 4539 hres = S_FALSE; 4540 } 4541 4542 if(!(*pbstrProperty)) 4543 return E_OUTOFMEMORY; 4544 4545 break; 4546 default: 4547 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags); 4548 hres = E_NOTIMPL; 4549 } 4550 4551 return hres; 4552 } 4553 4554 static HRESULT WINAPI Uri_GetPropertyLength(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags) 4555 { 4556 Uri *This = impl_from_IUri(iface); 4557 HRESULT hres; 4558 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pcchProperty, dwFlags); 4559 4560 if(!This->create_flags) 4561 return E_UNEXPECTED; 4562 if(!pcchProperty) 4563 return E_INVALIDARG; 4564 4565 /* Can only return a length for a property if it's a string. */ 4566 if(uriProp > Uri_PROPERTY_STRING_LAST) 4567 return E_INVALIDARG; 4568 4569 /* Don't have support for flags yet. */ 4570 if(dwFlags) { 4571 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 4572 return E_NOTIMPL; 4573 } 4574 4575 switch(uriProp) { 4576 case Uri_PROPERTY_ABSOLUTE_URI: 4577 if(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI) { 4578 *pcchProperty = 0; 4579 hres = S_FALSE; 4580 } else { 4581 if(This->scheme_type != URL_SCHEME_UNKNOWN) { 4582 if(This->userinfo_start > -1 && This->userinfo_len == 0) 4583 /* Don't include the '@' in the length. */ 4584 *pcchProperty = This->canon_len-1; 4585 else if(This->userinfo_start > -1 && This->userinfo_len == 1 && 4586 This->userinfo_split == 0) 4587 /* Don't include the ":@" in the length. */ 4588 *pcchProperty = This->canon_len-2; 4589 else 4590 *pcchProperty = This->canon_len; 4591 } else 4592 *pcchProperty = This->canon_len; 4593 4594 hres = S_OK; 4595 } 4596 4597 break; 4598 case Uri_PROPERTY_AUTHORITY: 4599 if(This->port_offset > -1 && 4600 This->display_modifiers & URI_DISPLAY_NO_DEFAULT_PORT_AUTH && 4601 is_default_port(This->scheme_type, This->port)) 4602 /* Only count up until the port in the authority. */ 4603 *pcchProperty = This->port_offset; 4604 else 4605 *pcchProperty = This->authority_len; 4606 hres = (This->authority_start > -1) ? S_OK : S_FALSE; 4607 break; 4608 case Uri_PROPERTY_DISPLAY_URI: 4609 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) 4610 *pcchProperty = This->canon_len-This->userinfo_len-1; 4611 else 4612 *pcchProperty = This->canon_len; 4613 4614 hres = S_OK; 4615 break; 4616 case Uri_PROPERTY_DOMAIN: 4617 if(This->domain_offset > -1) 4618 *pcchProperty = This->host_len - This->domain_offset; 4619 else 4620 *pcchProperty = 0; 4621 4622 hres = (This->domain_offset > -1) ? S_OK : S_FALSE; 4623 break; 4624 case Uri_PROPERTY_EXTENSION: 4625 if(This->extension_offset > -1) { 4626 *pcchProperty = This->path_len - This->extension_offset; 4627 hres = S_OK; 4628 } else { 4629 *pcchProperty = 0; 4630 hres = S_FALSE; 4631 } 4632 4633 break; 4634 case Uri_PROPERTY_FRAGMENT: 4635 *pcchProperty = This->fragment_len; 4636 hres = (This->fragment_start > -1) ? S_OK : S_FALSE; 4637 break; 4638 case Uri_PROPERTY_HOST: 4639 *pcchProperty = This->host_len; 4640 4641 /* '[' and ']' aren't included in the length. */ 4642 if(This->host_type == Uri_HOST_IPV6) 4643 *pcchProperty -= 2; 4644 4645 hres = (This->host_start > -1) ? S_OK : S_FALSE; 4646 break; 4647 case Uri_PROPERTY_PASSWORD: 4648 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_len-This->userinfo_split-1 : 0; 4649 hres = (This->userinfo_split > -1) ? S_OK : S_FALSE; 4650 break; 4651 case Uri_PROPERTY_PATH: 4652 *pcchProperty = This->path_len; 4653 hres = (This->path_start > -1) ? S_OK : S_FALSE; 4654 break; 4655 case Uri_PROPERTY_PATH_AND_QUERY: 4656 *pcchProperty = This->path_len+This->query_len; 4657 hres = (This->path_start > -1 || This->query_start > -1) ? S_OK : S_FALSE; 4658 break; 4659 case Uri_PROPERTY_QUERY: 4660 *pcchProperty = This->query_len; 4661 hres = (This->query_start > -1) ? S_OK : S_FALSE; 4662 break; 4663 case Uri_PROPERTY_RAW_URI: 4664 *pcchProperty = SysStringLen(This->raw_uri); 4665 hres = S_OK; 4666 break; 4667 case Uri_PROPERTY_SCHEME_NAME: 4668 *pcchProperty = This->scheme_len; 4669 hres = (This->scheme_start > -1) ? S_OK : S_FALSE; 4670 break; 4671 case Uri_PROPERTY_USER_INFO: 4672 *pcchProperty = This->userinfo_len; 4673 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE; 4674 break; 4675 case Uri_PROPERTY_USER_NAME: 4676 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_split : This->userinfo_len; 4677 if(This->userinfo_split == 0) 4678 hres = S_FALSE; 4679 else 4680 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE; 4681 break; 4682 default: 4683 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 4684 hres = E_NOTIMPL; 4685 } 4686 4687 return hres; 4688 } 4689 4690 static HRESULT WINAPI Uri_GetPropertyDWORD(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags) 4691 { 4692 Uri *This = impl_from_IUri(iface); 4693 HRESULT hres; 4694 4695 TRACE("(%p %s)->(%d %p %x)\n", This, debugstr_w(This->canon_uri), uriProp, pcchProperty, dwFlags); 4696 4697 if(!This->create_flags) 4698 return E_UNEXPECTED; 4699 if(!pcchProperty) 4700 return E_INVALIDARG; 4701 4702 /* Microsoft's implementation for the ZONE property of a URI seems to be lacking... 4703 * From what I can tell, instead of checking which URLZONE the URI belongs to it 4704 * simply assigns URLZONE_INVALID and returns E_NOTIMPL. This also applies to the GetZone 4705 * function. 4706 */ 4707 if(uriProp == Uri_PROPERTY_ZONE) { 4708 *pcchProperty = URLZONE_INVALID; 4709 return E_NOTIMPL; 4710 } 4711 4712 if(uriProp < Uri_PROPERTY_DWORD_START) { 4713 *pcchProperty = 0; 4714 return E_INVALIDARG; 4715 } 4716 4717 switch(uriProp) { 4718 case Uri_PROPERTY_HOST_TYPE: 4719 *pcchProperty = This->host_type; 4720 hres = S_OK; 4721 break; 4722 case Uri_PROPERTY_PORT: 4723 if(!This->has_port) { 4724 *pcchProperty = 0; 4725 hres = S_FALSE; 4726 } else { 4727 *pcchProperty = This->port; 4728 hres = S_OK; 4729 } 4730 4731 break; 4732 case Uri_PROPERTY_SCHEME: 4733 *pcchProperty = This->scheme_type; 4734 hres = S_OK; 4735 break; 4736 default: 4737 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 4738 hres = E_NOTIMPL; 4739 } 4740 4741 return hres; 4742 } 4743 4744 static HRESULT WINAPI Uri_HasProperty(IUri *iface, Uri_PROPERTY uriProp, BOOL *pfHasProperty) 4745 { 4746 Uri *This = impl_from_IUri(iface); 4747 4748 TRACE("(%p %s)->(%d %p)\n", This, debugstr_w(This->canon_uri), uriProp, pfHasProperty); 4749 4750 if(!pfHasProperty) 4751 return E_INVALIDARG; 4752 4753 switch(uriProp) { 4754 case Uri_PROPERTY_ABSOLUTE_URI: 4755 *pfHasProperty = !(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI); 4756 break; 4757 case Uri_PROPERTY_AUTHORITY: 4758 *pfHasProperty = This->authority_start > -1; 4759 break; 4760 case Uri_PROPERTY_DISPLAY_URI: 4761 *pfHasProperty = TRUE; 4762 break; 4763 case Uri_PROPERTY_DOMAIN: 4764 *pfHasProperty = This->domain_offset > -1; 4765 break; 4766 case Uri_PROPERTY_EXTENSION: 4767 *pfHasProperty = This->extension_offset > -1; 4768 break; 4769 case Uri_PROPERTY_FRAGMENT: 4770 *pfHasProperty = This->fragment_start > -1; 4771 break; 4772 case Uri_PROPERTY_HOST: 4773 *pfHasProperty = This->host_start > -1; 4774 break; 4775 case Uri_PROPERTY_PASSWORD: 4776 *pfHasProperty = This->userinfo_split > -1; 4777 break; 4778 case Uri_PROPERTY_PATH: 4779 *pfHasProperty = This->path_start > -1; 4780 break; 4781 case Uri_PROPERTY_PATH_AND_QUERY: 4782 *pfHasProperty = (This->path_start > -1 || This->query_start > -1); 4783 break; 4784 case Uri_PROPERTY_QUERY: 4785 *pfHasProperty = This->query_start > -1; 4786 break; 4787 case Uri_PROPERTY_RAW_URI: 4788 *pfHasProperty = TRUE; 4789 break; 4790 case Uri_PROPERTY_SCHEME_NAME: 4791 *pfHasProperty = This->scheme_start > -1; 4792 break; 4793 case Uri_PROPERTY_USER_INFO: 4794 *pfHasProperty = This->userinfo_start > -1; 4795 break; 4796 case Uri_PROPERTY_USER_NAME: 4797 if(This->userinfo_split == 0) 4798 *pfHasProperty = FALSE; 4799 else 4800 *pfHasProperty = This->userinfo_start > -1; 4801 break; 4802 case Uri_PROPERTY_HOST_TYPE: 4803 *pfHasProperty = TRUE; 4804 break; 4805 case Uri_PROPERTY_PORT: 4806 *pfHasProperty = This->has_port; 4807 break; 4808 case Uri_PROPERTY_SCHEME: 4809 *pfHasProperty = TRUE; 4810 break; 4811 case Uri_PROPERTY_ZONE: 4812 *pfHasProperty = FALSE; 4813 break; 4814 default: 4815 FIXME("(%p)->(%d %p): Unsupported property type.\n", This, uriProp, pfHasProperty); 4816 return E_NOTIMPL; 4817 } 4818 4819 return S_OK; 4820 } 4821 4822 static HRESULT WINAPI Uri_GetAbsoluteUri(IUri *iface, BSTR *pstrAbsoluteUri) 4823 { 4824 TRACE("(%p)->(%p)\n", iface, pstrAbsoluteUri); 4825 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_ABSOLUTE_URI, pstrAbsoluteUri, 0); 4826 } 4827 4828 static HRESULT WINAPI Uri_GetAuthority(IUri *iface, BSTR *pstrAuthority) 4829 { 4830 TRACE("(%p)->(%p)\n", iface, pstrAuthority); 4831 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_AUTHORITY, pstrAuthority, 0); 4832 } 4833 4834 static HRESULT WINAPI Uri_GetDisplayUri(IUri *iface, BSTR *pstrDisplayUri) 4835 { 4836 TRACE("(%p)->(%p)\n", iface, pstrDisplayUri); 4837 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_DISPLAY_URI, pstrDisplayUri, 0); 4838 } 4839 4840 static HRESULT WINAPI Uri_GetDomain(IUri *iface, BSTR *pstrDomain) 4841 { 4842 TRACE("(%p)->(%p)\n", iface, pstrDomain); 4843 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_DOMAIN, pstrDomain, 0); 4844 } 4845 4846 static HRESULT WINAPI Uri_GetExtension(IUri *iface, BSTR *pstrExtension) 4847 { 4848 TRACE("(%p)->(%p)\n", iface, pstrExtension); 4849 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_EXTENSION, pstrExtension, 0); 4850 } 4851 4852 static HRESULT WINAPI Uri_GetFragment(IUri *iface, BSTR *pstrFragment) 4853 { 4854 TRACE("(%p)->(%p)\n", iface, pstrFragment); 4855 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_FRAGMENT, pstrFragment, 0); 4856 } 4857 4858 static HRESULT WINAPI Uri_GetHost(IUri *iface, BSTR *pstrHost) 4859 { 4860 TRACE("(%p)->(%p)\n", iface, pstrHost); 4861 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_HOST, pstrHost, 0); 4862 } 4863 4864 static HRESULT WINAPI Uri_GetPassword(IUri *iface, BSTR *pstrPassword) 4865 { 4866 TRACE("(%p)->(%p)\n", iface, pstrPassword); 4867 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PASSWORD, pstrPassword, 0); 4868 } 4869 4870 static HRESULT WINAPI Uri_GetPath(IUri *iface, BSTR *pstrPath) 4871 { 4872 TRACE("(%p)->(%p)\n", iface, pstrPath); 4873 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH, pstrPath, 0); 4874 } 4875 4876 static HRESULT WINAPI Uri_GetPathAndQuery(IUri *iface, BSTR *pstrPathAndQuery) 4877 { 4878 TRACE("(%p)->(%p)\n", iface, pstrPathAndQuery); 4879 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH_AND_QUERY, pstrPathAndQuery, 0); 4880 } 4881 4882 static HRESULT WINAPI Uri_GetQuery(IUri *iface, BSTR *pstrQuery) 4883 { 4884 TRACE("(%p)->(%p)\n", iface, pstrQuery); 4885 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_QUERY, pstrQuery, 0); 4886 } 4887 4888 static HRESULT WINAPI Uri_GetRawUri(IUri *iface, BSTR *pstrRawUri) 4889 { 4890 TRACE("(%p)->(%p)\n", iface, pstrRawUri); 4891 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_RAW_URI, pstrRawUri, 0); 4892 } 4893 4894 static HRESULT WINAPI Uri_GetSchemeName(IUri *iface, BSTR *pstrSchemeName) 4895 { 4896 TRACE("(%p)->(%p)\n", iface, pstrSchemeName); 4897 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_SCHEME_NAME, pstrSchemeName, 0); 4898 } 4899 4900 static HRESULT WINAPI Uri_GetUserInfo(IUri *iface, BSTR *pstrUserInfo) 4901 { 4902 TRACE("(%p)->(%p)\n", iface, pstrUserInfo); 4903 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_INFO, pstrUserInfo, 0); 4904 } 4905 4906 static HRESULT WINAPI Uri_GetUserName(IUri *iface, BSTR *pstrUserName) 4907 { 4908 TRACE("(%p)->(%p)\n", iface, pstrUserName); 4909 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_NAME, pstrUserName, 0); 4910 } 4911 4912 static HRESULT WINAPI Uri_GetHostType(IUri *iface, DWORD *pdwHostType) 4913 { 4914 TRACE("(%p)->(%p)\n", iface, pdwHostType); 4915 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_HOST_TYPE, pdwHostType, 0); 4916 } 4917 4918 static HRESULT WINAPI Uri_GetPort(IUri *iface, DWORD *pdwPort) 4919 { 4920 TRACE("(%p)->(%p)\n", iface, pdwPort); 4921 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_PORT, pdwPort, 0); 4922 } 4923 4924 static HRESULT WINAPI Uri_GetScheme(IUri *iface, DWORD *pdwScheme) 4925 { 4926 TRACE("(%p)->(%p)\n", iface, pdwScheme); 4927 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_SCHEME, pdwScheme, 0); 4928 } 4929 4930 static HRESULT WINAPI Uri_GetZone(IUri *iface, DWORD *pdwZone) 4931 { 4932 TRACE("(%p)->(%p)\n", iface, pdwZone); 4933 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_ZONE,pdwZone, 0); 4934 } 4935 4936 static HRESULT WINAPI Uri_GetProperties(IUri *iface, DWORD *pdwProperties) 4937 { 4938 Uri *This = impl_from_IUri(iface); 4939 TRACE("(%p %s)->(%p)\n", This, debugstr_w(This->canon_uri), pdwProperties); 4940 4941 if(!This->create_flags) 4942 return E_UNEXPECTED; 4943 if(!pdwProperties) 4944 return E_INVALIDARG; 4945 4946 /* All URIs have these. */ 4947 *pdwProperties = Uri_HAS_DISPLAY_URI|Uri_HAS_RAW_URI|Uri_HAS_SCHEME|Uri_HAS_HOST_TYPE; 4948 4949 if(!(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI)) 4950 *pdwProperties |= Uri_HAS_ABSOLUTE_URI; 4951 4952 if(This->scheme_start > -1) 4953 *pdwProperties |= Uri_HAS_SCHEME_NAME; 4954 4955 if(This->authority_start > -1) { 4956 *pdwProperties |= Uri_HAS_AUTHORITY; 4957 if(This->userinfo_start > -1) { 4958 *pdwProperties |= Uri_HAS_USER_INFO; 4959 if(This->userinfo_split != 0) 4960 *pdwProperties |= Uri_HAS_USER_NAME; 4961 } 4962 if(This->userinfo_split > -1) 4963 *pdwProperties |= Uri_HAS_PASSWORD; 4964 if(This->host_start > -1) 4965 *pdwProperties |= Uri_HAS_HOST; 4966 if(This->domain_offset > -1) 4967 *pdwProperties |= Uri_HAS_DOMAIN; 4968 } 4969 4970 if(This->has_port) 4971 *pdwProperties |= Uri_HAS_PORT; 4972 if(This->path_start > -1) 4973 *pdwProperties |= Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY; 4974 if(This->query_start > -1) 4975 *pdwProperties |= Uri_HAS_QUERY|Uri_HAS_PATH_AND_QUERY; 4976 4977 if(This->extension_offset > -1) 4978 *pdwProperties |= Uri_HAS_EXTENSION; 4979 4980 if(This->fragment_start > -1) 4981 *pdwProperties |= Uri_HAS_FRAGMENT; 4982 4983 return S_OK; 4984 } 4985 4986 static HRESULT WINAPI Uri_IsEqual(IUri *iface, IUri *pUri, BOOL *pfEqual) 4987 { 4988 Uri *This = impl_from_IUri(iface); 4989 Uri *other; 4990 4991 TRACE("(%p %s)->(%p %p)\n", This, debugstr_w(This->canon_uri), pUri, pfEqual); 4992 4993 if(!This->create_flags) 4994 return E_UNEXPECTED; 4995 if(!pfEqual) 4996 return E_POINTER; 4997 4998 if(!pUri) { 4999 *pfEqual = FALSE; 5000 5001 /* For some reason Windows returns S_OK here... */ 5002 return S_OK; 5003 } 5004 5005 /* Try to convert it to a Uri (allows for a more simple comparison). */ 5006 if(!(other = get_uri_obj(pUri))) { 5007 FIXME("(%p)->(%p %p) No support for unknown IUri's yet.\n", iface, pUri, pfEqual); 5008 return E_NOTIMPL; 5009 } 5010 5011 TRACE("comparing to %s\n", debugstr_w(other->canon_uri)); 5012 return compare_uris(This, other, pfEqual); 5013 } 5014 5015 static const IUriVtbl UriVtbl = { 5016 Uri_QueryInterface, 5017 Uri_AddRef, 5018 Uri_Release, 5019 Uri_GetPropertyBSTR, 5020 Uri_GetPropertyLength, 5021 Uri_GetPropertyDWORD, 5022 Uri_HasProperty, 5023 Uri_GetAbsoluteUri, 5024 Uri_GetAuthority, 5025 Uri_GetDisplayUri, 5026 Uri_GetDomain, 5027 Uri_GetExtension, 5028 Uri_GetFragment, 5029 Uri_GetHost, 5030 Uri_GetPassword, 5031 Uri_GetPath, 5032 Uri_GetPathAndQuery, 5033 Uri_GetQuery, 5034 Uri_GetRawUri, 5035 Uri_GetSchemeName, 5036 Uri_GetUserInfo, 5037 Uri_GetUserName, 5038 Uri_GetHostType, 5039 Uri_GetPort, 5040 Uri_GetScheme, 5041 Uri_GetZone, 5042 Uri_GetProperties, 5043 Uri_IsEqual 5044 }; 5045 5046 static inline Uri* impl_from_IUriBuilderFactory(IUriBuilderFactory *iface) 5047 { 5048 return CONTAINING_RECORD(iface, Uri, IUriBuilderFactory_iface); 5049 } 5050 5051 static HRESULT WINAPI UriBuilderFactory_QueryInterface(IUriBuilderFactory *iface, REFIID riid, void **ppv) 5052 { 5053 Uri *This = impl_from_IUriBuilderFactory(iface); 5054 return IUri_QueryInterface(&This->IUri_iface, riid, ppv); 5055 } 5056 5057 static ULONG WINAPI UriBuilderFactory_AddRef(IUriBuilderFactory *iface) 5058 { 5059 Uri *This = impl_from_IUriBuilderFactory(iface); 5060 return IUri_AddRef(&This->IUri_iface); 5061 } 5062 5063 static ULONG WINAPI UriBuilderFactory_Release(IUriBuilderFactory *iface) 5064 { 5065 Uri *This = impl_from_IUriBuilderFactory(iface); 5066 return IUri_Release(&This->IUri_iface); 5067 } 5068 5069 static HRESULT WINAPI UriBuilderFactory_CreateIUriBuilder(IUriBuilderFactory *iface, 5070 DWORD dwFlags, 5071 DWORD_PTR dwReserved, 5072 IUriBuilder **ppIUriBuilder) 5073 { 5074 Uri *This = impl_from_IUriBuilderFactory(iface); 5075 TRACE("(%p)->(%08x %08x %p)\n", This, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 5076 5077 if(!ppIUriBuilder) 5078 return E_POINTER; 5079 5080 if(dwFlags || dwReserved) { 5081 *ppIUriBuilder = NULL; 5082 return E_INVALIDARG; 5083 } 5084 5085 return CreateIUriBuilder(NULL, 0, 0, ppIUriBuilder); 5086 } 5087 5088 static HRESULT WINAPI UriBuilderFactory_CreateInitializedIUriBuilder(IUriBuilderFactory *iface, 5089 DWORD dwFlags, 5090 DWORD_PTR dwReserved, 5091 IUriBuilder **ppIUriBuilder) 5092 { 5093 Uri *This = impl_from_IUriBuilderFactory(iface); 5094 TRACE("(%p)->(%08x %08x %p)\n", This, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 5095 5096 if(!ppIUriBuilder) 5097 return E_POINTER; 5098 5099 if(dwFlags || dwReserved) { 5100 *ppIUriBuilder = NULL; 5101 return E_INVALIDARG; 5102 } 5103 5104 return CreateIUriBuilder(&This->IUri_iface, 0, 0, ppIUriBuilder); 5105 } 5106 5107 static const IUriBuilderFactoryVtbl UriBuilderFactoryVtbl = { 5108 UriBuilderFactory_QueryInterface, 5109 UriBuilderFactory_AddRef, 5110 UriBuilderFactory_Release, 5111 UriBuilderFactory_CreateIUriBuilder, 5112 UriBuilderFactory_CreateInitializedIUriBuilder 5113 }; 5114 5115 static inline Uri* impl_from_IPersistStream(IPersistStream *iface) 5116 { 5117 return CONTAINING_RECORD(iface, Uri, IPersistStream_iface); 5118 } 5119 5120 static HRESULT WINAPI PersistStream_QueryInterface(IPersistStream *iface, REFIID riid, void **ppvObject) 5121 { 5122 Uri *This = impl_from_IPersistStream(iface); 5123 return IUri_QueryInterface(&This->IUri_iface, riid, ppvObject); 5124 } 5125 5126 static ULONG WINAPI PersistStream_AddRef(IPersistStream *iface) 5127 { 5128 Uri *This = impl_from_IPersistStream(iface); 5129 return IUri_AddRef(&This->IUri_iface); 5130 } 5131 5132 static ULONG WINAPI PersistStream_Release(IPersistStream *iface) 5133 { 5134 Uri *This = impl_from_IPersistStream(iface); 5135 return IUri_Release(&This->IUri_iface); 5136 } 5137 5138 static HRESULT WINAPI PersistStream_GetClassID(IPersistStream *iface, CLSID *pClassID) 5139 { 5140 Uri *This = impl_from_IPersistStream(iface); 5141 TRACE("(%p)->(%p)\n", This, pClassID); 5142 5143 if(!pClassID) 5144 return E_INVALIDARG; 5145 5146 *pClassID = CLSID_CUri; 5147 return S_OK; 5148 } 5149 5150 static HRESULT WINAPI PersistStream_IsDirty(IPersistStream *iface) 5151 { 5152 Uri *This = impl_from_IPersistStream(iface); 5153 TRACE("(%p)\n", This); 5154 return S_FALSE; 5155 } 5156 5157 struct persist_uri { 5158 DWORD size; 5159 DWORD unk1[2]; 5160 DWORD create_flags; 5161 DWORD unk2[3]; 5162 DWORD fields_no; 5163 BYTE data[1]; 5164 }; 5165 5166 static HRESULT WINAPI PersistStream_Load(IPersistStream *iface, IStream *pStm) 5167 { 5168 Uri *This = impl_from_IPersistStream(iface); 5169 struct persist_uri *data; 5170 parse_data parse; 5171 DWORD size; 5172 HRESULT hr; 5173 5174 TRACE("(%p)->(%p)\n", This, pStm); 5175 5176 if(This->create_flags) 5177 return E_UNEXPECTED; 5178 if(!pStm) 5179 return E_INVALIDARG; 5180 5181 hr = IStream_Read(pStm, &size, sizeof(DWORD), NULL); 5182 if(FAILED(hr)) 5183 return hr; 5184 data = heap_alloc(size); 5185 if(!data) 5186 return E_OUTOFMEMORY; 5187 hr = IStream_Read(pStm, data->unk1, size-sizeof(DWORD)-2, NULL); 5188 if(FAILED(hr)) { 5189 heap_free(data); 5190 return hr; 5191 } 5192 5193 if(size < sizeof(struct persist_uri)) { 5194 heap_free(data); 5195 return S_OK; 5196 } 5197 5198 if(*(DWORD*)data->data != Uri_PROPERTY_RAW_URI) { 5199 heap_free(data); 5200 ERR("Can't find raw_uri\n"); 5201 return E_UNEXPECTED; 5202 } 5203 5204 This->raw_uri = SysAllocString((WCHAR*)(data->data+sizeof(DWORD)*2)); 5205 if(!This->raw_uri) { 5206 heap_free(data); 5207 return E_OUTOFMEMORY; 5208 } 5209 This->create_flags = data->create_flags; 5210 heap_free(data); 5211 TRACE("%x %s\n", This->create_flags, debugstr_w(This->raw_uri)); 5212 5213 memset(&parse, 0, sizeof(parse_data)); 5214 parse.uri = This->raw_uri; 5215 if(!parse_uri(&parse, This->create_flags)) { 5216 SysFreeString(This->raw_uri); 5217 This->create_flags = 0; 5218 return E_UNEXPECTED; 5219 } 5220 5221 hr = canonicalize_uri(&parse, This, This->create_flags); 5222 if(FAILED(hr)) { 5223 SysFreeString(This->raw_uri); 5224 This->create_flags = 0; 5225 return hr; 5226 } 5227 5228 return S_OK; 5229 } 5230 5231 static inline BYTE* persist_stream_add_strprop(Uri *This, BYTE *p, DWORD type, DWORD len, WCHAR *data) 5232 { 5233 len *= sizeof(WCHAR); 5234 *(DWORD*)p = type; 5235 p += sizeof(DWORD); 5236 *(DWORD*)p = len+sizeof(WCHAR); 5237 p += sizeof(DWORD); 5238 memcpy(p, data, len); 5239 p += len; 5240 *(WCHAR*)p = 0; 5241 return p+sizeof(WCHAR); 5242 } 5243 5244 static inline void persist_stream_save(Uri *This, IStream *pStm, BOOL marshal, struct persist_uri *data) 5245 { 5246 BYTE *p = NULL; 5247 5248 data->create_flags = This->create_flags; 5249 5250 if(This->create_flags) { 5251 data->fields_no = 1; 5252 p = persist_stream_add_strprop(This, data->data, Uri_PROPERTY_RAW_URI, 5253 SysStringLen(This->raw_uri), This->raw_uri); 5254 } 5255 if(This->scheme_type!=URL_SCHEME_HTTP && This->scheme_type!=URL_SCHEME_HTTPS 5256 && This->scheme_type!=URL_SCHEME_FTP) 5257 return; 5258 5259 if(This->fragment_len) { 5260 data->fields_no++; 5261 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_FRAGMENT, 5262 This->fragment_len, This->canon_uri+This->fragment_start); 5263 } 5264 5265 if(This->host_len) { 5266 data->fields_no++; 5267 if(This->host_type == Uri_HOST_IPV6) 5268 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_HOST, 5269 This->host_len-2, This->canon_uri+This->host_start+1); 5270 else 5271 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_HOST, 5272 This->host_len, This->canon_uri+This->host_start); 5273 } 5274 5275 if(This->userinfo_split > -1) { 5276 data->fields_no++; 5277 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PASSWORD, 5278 This->userinfo_len-This->userinfo_split-1, 5279 This->canon_uri+This->userinfo_start+This->userinfo_split+1); 5280 } 5281 5282 if(This->path_len) { 5283 data->fields_no++; 5284 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PATH, 5285 This->path_len, This->canon_uri+This->path_start); 5286 } else if(marshal) { 5287 WCHAR no_path = '/'; 5288 data->fields_no++; 5289 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_PATH, 1, &no_path); 5290 } 5291 5292 if(This->has_port) { 5293 data->fields_no++; 5294 *(DWORD*)p = Uri_PROPERTY_PORT; 5295 p += sizeof(DWORD); 5296 *(DWORD*)p = sizeof(DWORD); 5297 p += sizeof(DWORD); 5298 *(DWORD*)p = This->port; 5299 p += sizeof(DWORD); 5300 } 5301 5302 if(This->query_len) { 5303 data->fields_no++; 5304 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_QUERY, 5305 This->query_len, This->canon_uri+This->query_start); 5306 } 5307 5308 if(This->scheme_len) { 5309 data->fields_no++; 5310 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_SCHEME_NAME, 5311 This->scheme_len, This->canon_uri+This->scheme_start); 5312 } 5313 5314 if(This->userinfo_start>-1 && This->userinfo_split!=0) { 5315 data->fields_no++; 5316 if(This->userinfo_split > -1) 5317 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_USER_NAME, 5318 This->userinfo_split, This->canon_uri+This->userinfo_start); 5319 else 5320 p = persist_stream_add_strprop(This, p, Uri_PROPERTY_USER_NAME, 5321 This->userinfo_len, This->canon_uri+This->userinfo_start); 5322 } 5323 } 5324 5325 static HRESULT WINAPI PersistStream_Save(IPersistStream *iface, IStream *pStm, BOOL fClearDirty) 5326 { 5327 Uri *This = impl_from_IPersistStream(iface); 5328 struct persist_uri *data; 5329 ULARGE_INTEGER size; 5330 HRESULT hres; 5331 5332 TRACE("(%p)->(%p %x)\n", This, pStm, fClearDirty); 5333 5334 if(!pStm) 5335 return E_INVALIDARG; 5336 5337 hres = IPersistStream_GetSizeMax(&This->IPersistStream_iface, &size); 5338 if(FAILED(hres)) 5339 return hres; 5340 5341 data = heap_alloc_zero(size.u.LowPart); 5342 if(!data) 5343 return E_OUTOFMEMORY; 5344 data->size = size.u.LowPart; 5345 persist_stream_save(This, pStm, FALSE, data); 5346 5347 hres = IStream_Write(pStm, data, data->size-2, NULL); 5348 heap_free(data); 5349 return hres; 5350 } 5351 5352 static HRESULT WINAPI PersistStream_GetSizeMax(IPersistStream *iface, ULARGE_INTEGER *pcbSize) 5353 { 5354 Uri *This = impl_from_IPersistStream(iface); 5355 TRACE("(%p)->(%p)\n", This, pcbSize); 5356 5357 if(!pcbSize) 5358 return E_INVALIDARG; 5359 5360 pcbSize->u.LowPart = 2+sizeof(struct persist_uri); 5361 pcbSize->u.HighPart = 0; 5362 if(This->create_flags) 5363 pcbSize->u.LowPart += (SysStringLen(This->raw_uri)+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5364 else /* there's no place for fields no */ 5365 pcbSize->u.LowPart -= sizeof(DWORD); 5366 if(This->scheme_type!=URL_SCHEME_HTTP && This->scheme_type!=URL_SCHEME_HTTPS 5367 && This->scheme_type!=URL_SCHEME_FTP) 5368 return S_OK; 5369 5370 if(This->fragment_len) 5371 pcbSize->u.LowPart += (This->fragment_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5372 if(This->host_len) { 5373 if(This->host_type == Uri_HOST_IPV6) 5374 pcbSize->u.LowPart += (This->host_len-1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5375 else 5376 pcbSize->u.LowPart += (This->host_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5377 } 5378 if(This->userinfo_split > -1) 5379 pcbSize->u.LowPart += (This->userinfo_len-This->userinfo_split)*sizeof(WCHAR) + 2*sizeof(DWORD); 5380 if(This->path_len) 5381 pcbSize->u.LowPart += (This->path_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5382 if(This->has_port) 5383 pcbSize->u.LowPart += 3*sizeof(DWORD); 5384 if(This->query_len) 5385 pcbSize->u.LowPart += (This->query_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5386 if(This->scheme_len) 5387 pcbSize->u.LowPart += (This->scheme_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5388 if(This->userinfo_start>-1 && This->userinfo_split!=0) { 5389 if(This->userinfo_split > -1) 5390 pcbSize->u.LowPart += (This->userinfo_split+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5391 else 5392 pcbSize->u.LowPart += (This->userinfo_len+1)*sizeof(WCHAR) + 2*sizeof(DWORD); 5393 } 5394 return S_OK; 5395 } 5396 5397 static const IPersistStreamVtbl PersistStreamVtbl = { 5398 PersistStream_QueryInterface, 5399 PersistStream_AddRef, 5400 PersistStream_Release, 5401 PersistStream_GetClassID, 5402 PersistStream_IsDirty, 5403 PersistStream_Load, 5404 PersistStream_Save, 5405 PersistStream_GetSizeMax 5406 }; 5407 5408 static inline Uri* impl_from_IMarshal(IMarshal *iface) 5409 { 5410 return CONTAINING_RECORD(iface, Uri, IMarshal_iface); 5411 } 5412 5413 static HRESULT WINAPI Marshal_QueryInterface(IMarshal *iface, REFIID riid, void **ppvObject) 5414 { 5415 Uri *This = impl_from_IMarshal(iface); 5416 return IUri_QueryInterface(&This->IUri_iface, riid, ppvObject); 5417 } 5418 5419 static ULONG WINAPI Marshal_AddRef(IMarshal *iface) 5420 { 5421 Uri *This = impl_from_IMarshal(iface); 5422 return IUri_AddRef(&This->IUri_iface); 5423 } 5424 5425 static ULONG WINAPI Marshal_Release(IMarshal *iface) 5426 { 5427 Uri *This = impl_from_IMarshal(iface); 5428 return IUri_Release(&This->IUri_iface); 5429 } 5430 5431 static HRESULT WINAPI Marshal_GetUnmarshalClass(IMarshal *iface, REFIID riid, void *pv, 5432 DWORD dwDestContext, void *pvDestContext, DWORD mshlflags, CLSID *pCid) 5433 { 5434 Uri *This = impl_from_IMarshal(iface); 5435 TRACE("(%p)->(%s %p %x %p %x %p)\n", This, debugstr_guid(riid), pv, 5436 dwDestContext, pvDestContext, mshlflags, pCid); 5437 5438 if(!pCid || (dwDestContext!=MSHCTX_LOCAL && dwDestContext!=MSHCTX_NOSHAREDMEM 5439 && dwDestContext!=MSHCTX_INPROC)) 5440 return E_INVALIDARG; 5441 5442 *pCid = CLSID_CUri; 5443 return S_OK; 5444 } 5445 5446 struct inproc_marshal_uri { 5447 DWORD size; 5448 DWORD mshlflags; 5449 DWORD unk[4]; /* process identifier? */ 5450 Uri *uri; 5451 }; 5452 5453 static HRESULT WINAPI Marshal_GetMarshalSizeMax(IMarshal *iface, REFIID riid, void *pv, 5454 DWORD dwDestContext, void *pvDestContext, DWORD mshlflags, DWORD *pSize) 5455 { 5456 Uri *This = impl_from_IMarshal(iface); 5457 ULARGE_INTEGER size; 5458 HRESULT hres; 5459 TRACE("(%p)->(%s %p %x %p %x %p)\n", This, debugstr_guid(riid), pv, 5460 dwDestContext, pvDestContext, mshlflags, pSize); 5461 5462 if(!pSize || (dwDestContext!=MSHCTX_LOCAL && dwDestContext!=MSHCTX_NOSHAREDMEM 5463 && dwDestContext!=MSHCTX_INPROC)) 5464 return E_INVALIDARG; 5465 5466 if(dwDestContext == MSHCTX_INPROC) { 5467 *pSize = sizeof(struct inproc_marshal_uri); 5468 return S_OK; 5469 } 5470 5471 hres = IPersistStream_GetSizeMax(&This->IPersistStream_iface, &size); 5472 if(FAILED(hres)) 5473 return hres; 5474 if(!This->path_len && (This->scheme_type==URL_SCHEME_HTTP 5475 || This->scheme_type==URL_SCHEME_HTTPS 5476 || This->scheme_type==URL_SCHEME_FTP)) 5477 size.u.LowPart += 3*sizeof(DWORD); 5478 *pSize = size.u.LowPart+2*sizeof(DWORD); 5479 return S_OK; 5480 } 5481 5482 static HRESULT WINAPI Marshal_MarshalInterface(IMarshal *iface, IStream *pStm, REFIID riid, 5483 void *pv, DWORD dwDestContext, void *pvDestContext, DWORD mshlflags) 5484 { 5485 Uri *This = impl_from_IMarshal(iface); 5486 DWORD *data; 5487 DWORD size; 5488 HRESULT hres; 5489 5490 TRACE("(%p)->(%p %s %p %x %p %x)\n", This, pStm, debugstr_guid(riid), pv, 5491 dwDestContext, pvDestContext, mshlflags); 5492 5493 if(!pStm || mshlflags!=MSHLFLAGS_NORMAL || (dwDestContext!=MSHCTX_LOCAL 5494 && dwDestContext!=MSHCTX_NOSHAREDMEM && dwDestContext!=MSHCTX_INPROC)) 5495 return E_INVALIDARG; 5496 5497 if(dwDestContext == MSHCTX_INPROC) { 5498 struct inproc_marshal_uri data; 5499 5500 data.size = sizeof(data); 5501 data.mshlflags = MSHCTX_INPROC; 5502 data.unk[0] = 0; 5503 data.unk[1] = 0; 5504 data.unk[2] = 0; 5505 data.unk[3] = 0; 5506 data.uri = This; 5507 5508 hres = IStream_Write(pStm, &data, data.size, NULL); 5509 if(FAILED(hres)) 5510 return hres; 5511 5512 IUri_AddRef(&This->IUri_iface); 5513 return S_OK; 5514 } 5515 5516 hres = IMarshal_GetMarshalSizeMax(iface, riid, pv, dwDestContext, 5517 pvDestContext, mshlflags, &size); 5518 if(FAILED(hres)) 5519 return hres; 5520 5521 data = heap_alloc_zero(size); 5522 if(!data) 5523 return E_OUTOFMEMORY; 5524 5525 data[0] = size; 5526 data[1] = dwDestContext; 5527 data[2] = size-2*sizeof(DWORD); 5528 persist_stream_save(This, pStm, TRUE, (struct persist_uri*)(data+2)); 5529 5530 hres = IStream_Write(pStm, data, data[0]-2, NULL); 5531 heap_free(data); 5532 return hres; 5533 } 5534 5535 static HRESULT WINAPI Marshal_UnmarshalInterface(IMarshal *iface, 5536 IStream *pStm, REFIID riid, void **ppv) 5537 { 5538 Uri *This = impl_from_IMarshal(iface); 5539 DWORD header[2]; 5540 HRESULT hres; 5541 5542 TRACE("(%p)->(%p %s %p)\n", This, pStm, debugstr_guid(riid), ppv); 5543 5544 if(This->create_flags) 5545 return E_UNEXPECTED; 5546 if(!pStm || !riid || !ppv) 5547 return E_INVALIDARG; 5548 5549 hres = IStream_Read(pStm, header, sizeof(header), NULL); 5550 if(FAILED(hres)) 5551 return hres; 5552 5553 if(header[1]!=MSHCTX_LOCAL && header[1]!=MSHCTX_NOSHAREDMEM 5554 && header[1]!=MSHCTX_INPROC) 5555 return E_UNEXPECTED; 5556 5557 if(header[1] == MSHCTX_INPROC) { 5558 struct inproc_marshal_uri data; 5559 parse_data parse; 5560 5561 hres = IStream_Read(pStm, data.unk, sizeof(data)-2*sizeof(DWORD), NULL); 5562 if(FAILED(hres)) 5563 return hres; 5564 5565 This->raw_uri = SysAllocString(data.uri->raw_uri); 5566 if(!This->raw_uri) { 5567 return E_OUTOFMEMORY; 5568 } 5569 5570 memset(&parse, 0, sizeof(parse_data)); 5571 parse.uri = This->raw_uri; 5572 5573 if(!parse_uri(&parse, data.uri->create_flags)) 5574 return E_INVALIDARG; 5575 5576 hres = canonicalize_uri(&parse, This, data.uri->create_flags); 5577 if(FAILED(hres)) 5578 return hres; 5579 5580 This->create_flags = data.uri->create_flags; 5581 IUri_Release(&data.uri->IUri_iface); 5582 5583 return IUri_QueryInterface(&This->IUri_iface, riid, ppv); 5584 } 5585 5586 hres = IPersistStream_Load(&This->IPersistStream_iface, pStm); 5587 if(FAILED(hres)) 5588 return hres; 5589 5590 return IUri_QueryInterface(&This->IUri_iface, riid, ppv); 5591 } 5592 5593 static HRESULT WINAPI Marshal_ReleaseMarshalData(IMarshal *iface, IStream *pStm) 5594 { 5595 Uri *This = impl_from_IMarshal(iface); 5596 LARGE_INTEGER off; 5597 DWORD header[2]; 5598 HRESULT hres; 5599 5600 TRACE("(%p)->(%p)\n", This, pStm); 5601 5602 if(!pStm) 5603 return E_INVALIDARG; 5604 5605 hres = IStream_Read(pStm, header, 2*sizeof(DWORD), NULL); 5606 if(FAILED(hres)) 5607 return hres; 5608 5609 if(header[1] == MSHCTX_INPROC) { 5610 struct inproc_marshal_uri data; 5611 5612 hres = IStream_Read(pStm, data.unk, sizeof(data)-2*sizeof(DWORD), NULL); 5613 if(FAILED(hres)) 5614 return hres; 5615 5616 IUri_Release(&data.uri->IUri_iface); 5617 return S_OK; 5618 } 5619 5620 off.u.LowPart = header[0]-sizeof(header)-2; 5621 off.u.HighPart = 0; 5622 return IStream_Seek(pStm, off, STREAM_SEEK_CUR, NULL); 5623 } 5624 5625 static HRESULT WINAPI Marshal_DisconnectObject(IMarshal *iface, DWORD dwReserved) 5626 { 5627 Uri *This = impl_from_IMarshal(iface); 5628 TRACE("(%p)->(%x)\n", This, dwReserved); 5629 return S_OK; 5630 } 5631 5632 static const IMarshalVtbl MarshalVtbl = { 5633 Marshal_QueryInterface, 5634 Marshal_AddRef, 5635 Marshal_Release, 5636 Marshal_GetUnmarshalClass, 5637 Marshal_GetMarshalSizeMax, 5638 Marshal_MarshalInterface, 5639 Marshal_UnmarshalInterface, 5640 Marshal_ReleaseMarshalData, 5641 Marshal_DisconnectObject 5642 }; 5643 5644 HRESULT Uri_Construct(IUnknown *pUnkOuter, LPVOID *ppobj) 5645 { 5646 Uri *ret = heap_alloc_zero(sizeof(Uri)); 5647 5648 TRACE("(%p %p)\n", pUnkOuter, ppobj); 5649 5650 *ppobj = ret; 5651 if(!ret) 5652 return E_OUTOFMEMORY; 5653 5654 ret->IUri_iface.lpVtbl = &UriVtbl; 5655 ret->IUriBuilderFactory_iface.lpVtbl = &UriBuilderFactoryVtbl; 5656 ret->IPersistStream_iface.lpVtbl = &PersistStreamVtbl; 5657 ret->IMarshal_iface.lpVtbl = &MarshalVtbl; 5658 ret->ref = 1; 5659 5660 *ppobj = &ret->IUri_iface; 5661 return S_OK; 5662 } 5663 5664 /*********************************************************************** 5665 * CreateUri (urlmon.@) 5666 * 5667 * Creates a new IUri object using the URI represented by pwzURI. This function 5668 * parses and validates the components of pwzURI and then canonicalizes the 5669 * parsed components. 5670 * 5671 * PARAMS 5672 * pwzURI [I] The URI to parse, validate, and canonicalize. 5673 * dwFlags [I] Flags which can affect how the parsing/canonicalization is performed. 5674 * dwReserved [I] Reserved (not used). 5675 * ppURI [O] The resulting IUri after parsing/canonicalization occurs. 5676 * 5677 * RETURNS 5678 * Success: Returns S_OK. ppURI contains the pointer to the newly allocated IUri. 5679 * Failure: E_INVALIDARG if there are invalid flag combinations in dwFlags, or an 5680 * invalid parameter, or pwzURI doesn't represent a valid URI. 5681 * E_OUTOFMEMORY if any memory allocation fails. 5682 * 5683 * NOTES 5684 * Default flags: 5685 * Uri_CREATE_CANONICALIZE, Uri_CREATE_DECODE_EXTRA_INFO, Uri_CREATE_CRACK_UNKNOWN_SCHEMES, 5686 * Uri_CREATE_PRE_PROCESS_HTML_URI, Uri_CREATE_NO_IE_SETTINGS. 5687 */ 5688 HRESULT WINAPI CreateUri(LPCWSTR pwzURI, DWORD dwFlags, DWORD_PTR dwReserved, IUri **ppURI) 5689 { 5690 const DWORD supported_flags = Uri_CREATE_ALLOW_RELATIVE|Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME| 5691 Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME|Uri_CREATE_NO_CANONICALIZE|Uri_CREATE_CANONICALIZE| 5692 Uri_CREATE_DECODE_EXTRA_INFO|Uri_CREATE_NO_DECODE_EXTRA_INFO|Uri_CREATE_CRACK_UNKNOWN_SCHEMES| 5693 Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES|Uri_CREATE_PRE_PROCESS_HTML_URI|Uri_CREATE_NO_PRE_PROCESS_HTML_URI| 5694 Uri_CREATE_NO_IE_SETTINGS|Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS|Uri_CREATE_FILE_USE_DOS_PATH; 5695 Uri *ret; 5696 HRESULT hr; 5697 parse_data data; 5698 5699 TRACE("(%s %x %x %p)\n", debugstr_w(pwzURI), dwFlags, (DWORD)dwReserved, ppURI); 5700 5701 if(!ppURI) 5702 return E_INVALIDARG; 5703 5704 if(!pwzURI) { 5705 *ppURI = NULL; 5706 return E_INVALIDARG; 5707 } 5708 5709 /* Check for invalid flags. */ 5710 if(has_invalid_flag_combination(dwFlags)) { 5711 *ppURI = NULL; 5712 return E_INVALIDARG; 5713 } 5714 5715 /* Currently unsupported. */ 5716 if(dwFlags & ~supported_flags) 5717 FIXME("Ignoring unsupported flag(s) %x\n", dwFlags & ~supported_flags); 5718 5719 hr = Uri_Construct(NULL, (void**)&ret); 5720 if(FAILED(hr)) { 5721 *ppURI = NULL; 5722 return hr; 5723 } 5724 5725 /* Explicitly set the default flags if it doesn't cause a flag conflict. */ 5726 apply_default_flags(&dwFlags); 5727 5728 /* Pre process the URI, unless told otherwise. */ 5729 if(!(dwFlags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI)) 5730 ret->raw_uri = pre_process_uri(pwzURI); 5731 else 5732 ret->raw_uri = SysAllocString(pwzURI); 5733 5734 if(!ret->raw_uri) { 5735 heap_free(ret); 5736 return E_OUTOFMEMORY; 5737 } 5738 5739 memset(&data, 0, sizeof(parse_data)); 5740 data.uri = ret->raw_uri; 5741 5742 /* Validate and parse the URI into its components. */ 5743 if(!parse_uri(&data, dwFlags)) { 5744 /* Encountered an unsupported or invalid URI */ 5745 IUri_Release(&ret->IUri_iface); 5746 *ppURI = NULL; 5747 return E_INVALIDARG; 5748 } 5749 5750 /* Canonicalize the URI. */ 5751 hr = canonicalize_uri(&data, ret, dwFlags); 5752 if(FAILED(hr)) { 5753 IUri_Release(&ret->IUri_iface); 5754 *ppURI = NULL; 5755 return hr; 5756 } 5757 5758 ret->create_flags = dwFlags; 5759 5760 *ppURI = &ret->IUri_iface; 5761 return S_OK; 5762 } 5763 5764 /*********************************************************************** 5765 * CreateUriWithFragment (urlmon.@) 5766 * 5767 * Creates a new IUri object. This is almost the same as CreateUri, expect that 5768 * it allows you to explicitly specify a fragment (pwzFragment) for pwzURI. 5769 * 5770 * PARAMS 5771 * pwzURI [I] The URI to parse and perform canonicalization on. 5772 * pwzFragment [I] The explicit fragment string which should be added to pwzURI. 5773 * dwFlags [I] The flags which will be passed to CreateUri. 5774 * dwReserved [I] Reserved (not used). 5775 * ppURI [O] The resulting IUri after parsing/canonicalization. 5776 * 5777 * RETURNS 5778 * Success: S_OK. ppURI contains the pointer to the newly allocated IUri. 5779 * Failure: E_INVALIDARG if pwzURI already contains a fragment and pwzFragment 5780 * isn't NULL. Will also return E_INVALIDARG for the same reasons as 5781 * CreateUri will. E_OUTOFMEMORY if any allocation fails. 5782 */ 5783 HRESULT WINAPI CreateUriWithFragment(LPCWSTR pwzURI, LPCWSTR pwzFragment, DWORD dwFlags, 5784 DWORD_PTR dwReserved, IUri **ppURI) 5785 { 5786 HRESULT hres; 5787 TRACE("(%s %s %x %x %p)\n", debugstr_w(pwzURI), debugstr_w(pwzFragment), dwFlags, (DWORD)dwReserved, ppURI); 5788 5789 if(!ppURI) 5790 return E_INVALIDARG; 5791 5792 if(!pwzURI) { 5793 *ppURI = NULL; 5794 return E_INVALIDARG; 5795 } 5796 5797 /* Check if a fragment should be appended to the URI string. */ 5798 if(pwzFragment) { 5799 WCHAR *uriW; 5800 DWORD uri_len, frag_len; 5801 BOOL add_pound; 5802 5803 /* Check if the original URI already has a fragment component. */ 5804 if(StrChrW(pwzURI, '#')) { 5805 *ppURI = NULL; 5806 return E_INVALIDARG; 5807 } 5808 5809 uri_len = lstrlenW(pwzURI); 5810 frag_len = lstrlenW(pwzFragment); 5811 5812 /* If the fragment doesn't start with a '#', one will be added. */ 5813 add_pound = *pwzFragment != '#'; 5814 5815 if(add_pound) 5816 uriW = heap_alloc((uri_len+frag_len+2)*sizeof(WCHAR)); 5817 else 5818 uriW = heap_alloc((uri_len+frag_len+1)*sizeof(WCHAR)); 5819 5820 if(!uriW) 5821 return E_OUTOFMEMORY; 5822 5823 memcpy(uriW, pwzURI, uri_len*sizeof(WCHAR)); 5824 if(add_pound) 5825 uriW[uri_len++] = '#'; 5826 memcpy(uriW+uri_len, pwzFragment, (frag_len+1)*sizeof(WCHAR)); 5827 5828 hres = CreateUri(uriW, dwFlags, 0, ppURI); 5829 5830 heap_free(uriW); 5831 } else 5832 /* A fragment string wasn't specified, so just forward the call. */ 5833 hres = CreateUri(pwzURI, dwFlags, 0, ppURI); 5834 5835 return hres; 5836 } 5837 5838 static HRESULT build_uri(const UriBuilder *builder, IUri **uri, DWORD create_flags, 5839 DWORD use_orig_flags, DWORD encoding_mask) 5840 { 5841 HRESULT hr; 5842 parse_data data; 5843 Uri *ret; 5844 5845 if(!uri) 5846 return E_POINTER; 5847 5848 if(encoding_mask && (!builder->uri || builder->modified_props)) { 5849 *uri = NULL; 5850 return E_NOTIMPL; 5851 } 5852 5853 /* Decide what flags should be used when creating the Uri. */ 5854 if((use_orig_flags & UriBuilder_USE_ORIGINAL_FLAGS) && builder->uri) 5855 create_flags = builder->uri->create_flags; 5856 else { 5857 if(has_invalid_flag_combination(create_flags)) { 5858 *uri = NULL; 5859 return E_INVALIDARG; 5860 } 5861 5862 /* Set the default flags if they don't cause a conflict. */ 5863 apply_default_flags(&create_flags); 5864 } 5865 5866 /* Return the base IUri if no changes have been made and the create_flags match. */ 5867 if(builder->uri && !builder->modified_props && builder->uri->create_flags == create_flags) { 5868 *uri = &builder->uri->IUri_iface; 5869 IUri_AddRef(*uri); 5870 return S_OK; 5871 } 5872 5873 hr = validate_components(builder, &data, create_flags); 5874 if(FAILED(hr)) { 5875 *uri = NULL; 5876 return hr; 5877 } 5878 5879 hr = Uri_Construct(NULL, (void**)&ret); 5880 if(FAILED(hr)) { 5881 *uri = NULL; 5882 return hr; 5883 } 5884 5885 hr = generate_uri(builder, &data, ret, create_flags); 5886 if(FAILED(hr)) { 5887 IUri_Release(&ret->IUri_iface); 5888 *uri = NULL; 5889 return hr; 5890 } 5891 5892 *uri = &ret->IUri_iface; 5893 return S_OK; 5894 } 5895 5896 static inline UriBuilder* impl_from_IUriBuilder(IUriBuilder *iface) 5897 { 5898 return CONTAINING_RECORD(iface, UriBuilder, IUriBuilder_iface); 5899 } 5900 5901 static HRESULT WINAPI UriBuilder_QueryInterface(IUriBuilder *iface, REFIID riid, void **ppv) 5902 { 5903 UriBuilder *This = impl_from_IUriBuilder(iface); 5904 5905 if(IsEqualGUID(&IID_IUnknown, riid)) { 5906 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv); 5907 *ppv = &This->IUriBuilder_iface; 5908 }else if(IsEqualGUID(&IID_IUriBuilder, riid)) { 5909 TRACE("(%p)->(IID_IUriBuilder %p)\n", This, ppv); 5910 *ppv = &This->IUriBuilder_iface; 5911 }else { 5912 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv); 5913 *ppv = NULL; 5914 return E_NOINTERFACE; 5915 } 5916 5917 IUnknown_AddRef((IUnknown*)*ppv); 5918 return S_OK; 5919 } 5920 5921 static ULONG WINAPI UriBuilder_AddRef(IUriBuilder *iface) 5922 { 5923 UriBuilder *This = impl_from_IUriBuilder(iface); 5924 LONG ref = InterlockedIncrement(&This->ref); 5925 5926 TRACE("(%p) ref=%d\n", This, ref); 5927 5928 return ref; 5929 } 5930 5931 static ULONG WINAPI UriBuilder_Release(IUriBuilder *iface) 5932 { 5933 UriBuilder *This = impl_from_IUriBuilder(iface); 5934 LONG ref = InterlockedDecrement(&This->ref); 5935 5936 TRACE("(%p) ref=%d\n", This, ref); 5937 5938 if(!ref) { 5939 if(This->uri) IUri_Release(&This->uri->IUri_iface); 5940 heap_free(This->fragment); 5941 heap_free(This->host); 5942 heap_free(This->password); 5943 heap_free(This->path); 5944 heap_free(This->query); 5945 heap_free(This->scheme); 5946 heap_free(This->username); 5947 heap_free(This); 5948 } 5949 5950 return ref; 5951 } 5952 5953 static HRESULT WINAPI UriBuilder_CreateUriSimple(IUriBuilder *iface, 5954 DWORD dwAllowEncodingPropertyMask, 5955 DWORD_PTR dwReserved, 5956 IUri **ppIUri) 5957 { 5958 UriBuilder *This = impl_from_IUriBuilder(iface); 5959 HRESULT hr; 5960 TRACE("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5961 5962 hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask); 5963 if(hr == E_NOTIMPL) 5964 FIXME("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5965 return hr; 5966 } 5967 5968 static HRESULT WINAPI UriBuilder_CreateUri(IUriBuilder *iface, 5969 DWORD dwCreateFlags, 5970 DWORD dwAllowEncodingPropertyMask, 5971 DWORD_PTR dwReserved, 5972 IUri **ppIUri) 5973 { 5974 UriBuilder *This = impl_from_IUriBuilder(iface); 5975 HRESULT hr; 5976 TRACE("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5977 5978 if(dwCreateFlags == -1) 5979 hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask); 5980 else 5981 hr = build_uri(This, ppIUri, dwCreateFlags, 0, dwAllowEncodingPropertyMask); 5982 5983 if(hr == E_NOTIMPL) 5984 FIXME("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5985 return hr; 5986 } 5987 5988 static HRESULT WINAPI UriBuilder_CreateUriWithFlags(IUriBuilder *iface, 5989 DWORD dwCreateFlags, 5990 DWORD dwUriBuilderFlags, 5991 DWORD dwAllowEncodingPropertyMask, 5992 DWORD_PTR dwReserved, 5993 IUri **ppIUri) 5994 { 5995 UriBuilder *This = impl_from_IUriBuilder(iface); 5996 HRESULT hr; 5997 TRACE("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags, 5998 dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 5999 6000 hr = build_uri(This, ppIUri, dwCreateFlags, dwUriBuilderFlags, dwAllowEncodingPropertyMask); 6001 if(hr == E_NOTIMPL) 6002 FIXME("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags, 6003 dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 6004 return hr; 6005 } 6006 6007 static HRESULT WINAPI UriBuilder_GetIUri(IUriBuilder *iface, IUri **ppIUri) 6008 { 6009 UriBuilder *This = impl_from_IUriBuilder(iface); 6010 TRACE("(%p)->(%p)\n", This, ppIUri); 6011 6012 if(!ppIUri) 6013 return E_POINTER; 6014 6015 if(This->uri) { 6016 IUri *uri = &This->uri->IUri_iface; 6017 IUri_AddRef(uri); 6018 *ppIUri = uri; 6019 } else 6020 *ppIUri = NULL; 6021 6022 return S_OK; 6023 } 6024 6025 static HRESULT WINAPI UriBuilder_SetIUri(IUriBuilder *iface, IUri *pIUri) 6026 { 6027 UriBuilder *This = impl_from_IUriBuilder(iface); 6028 TRACE("(%p)->(%p)\n", This, pIUri); 6029 6030 if(pIUri) { 6031 Uri *uri; 6032 6033 if((uri = get_uri_obj(pIUri))) { 6034 /* Only reset the builder if its Uri isn't the same as 6035 * the Uri passed to the function. 6036 */ 6037 if(This->uri != uri) { 6038 reset_builder(This); 6039 6040 This->uri = uri; 6041 if(uri->has_port) 6042 This->port = uri->port; 6043 6044 IUri_AddRef(pIUri); 6045 } 6046 } else { 6047 FIXME("(%p)->(%p) Unknown IUri types not supported yet.\n", This, pIUri); 6048 return E_NOTIMPL; 6049 } 6050 } else if(This->uri) 6051 /* Only reset the builder if its Uri isn't NULL. */ 6052 reset_builder(This); 6053 6054 return S_OK; 6055 } 6056 6057 static HRESULT WINAPI UriBuilder_GetFragment(IUriBuilder *iface, DWORD *pcchFragment, LPCWSTR *ppwzFragment) 6058 { 6059 UriBuilder *This = impl_from_IUriBuilder(iface); 6060 TRACE("(%p)->(%p %p)\n", This, pcchFragment, ppwzFragment); 6061 6062 if(!This->uri || This->uri->fragment_start == -1 || This->modified_props & Uri_HAS_FRAGMENT) 6063 return get_builder_component(&This->fragment, &This->fragment_len, NULL, 0, ppwzFragment, pcchFragment); 6064 else 6065 return get_builder_component(&This->fragment, &This->fragment_len, This->uri->canon_uri+This->uri->fragment_start, 6066 This->uri->fragment_len, ppwzFragment, pcchFragment); 6067 } 6068 6069 static HRESULT WINAPI UriBuilder_GetHost(IUriBuilder *iface, DWORD *pcchHost, LPCWSTR *ppwzHost) 6070 { 6071 UriBuilder *This = impl_from_IUriBuilder(iface); 6072 TRACE("(%p)->(%p %p)\n", This, pcchHost, ppwzHost); 6073 6074 if(!This->uri || This->uri->host_start == -1 || This->modified_props & Uri_HAS_HOST) 6075 return get_builder_component(&This->host, &This->host_len, NULL, 0, ppwzHost, pcchHost); 6076 else { 6077 if(This->uri->host_type == Uri_HOST_IPV6) 6078 /* Don't include the '[' and ']' around the address. */ 6079 return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start+1, 6080 This->uri->host_len-2, ppwzHost, pcchHost); 6081 else 6082 return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start, 6083 This->uri->host_len, ppwzHost, pcchHost); 6084 } 6085 } 6086 6087 static HRESULT WINAPI UriBuilder_GetPassword(IUriBuilder *iface, DWORD *pcchPassword, LPCWSTR *ppwzPassword) 6088 { 6089 UriBuilder *This = impl_from_IUriBuilder(iface); 6090 TRACE("(%p)->(%p %p)\n", This, pcchPassword, ppwzPassword); 6091 6092 if(!This->uri || This->uri->userinfo_split == -1 || This->modified_props & Uri_HAS_PASSWORD) 6093 return get_builder_component(&This->password, &This->password_len, NULL, 0, ppwzPassword, pcchPassword); 6094 else { 6095 const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start+This->uri->userinfo_split+1; 6096 DWORD len = This->uri->userinfo_len-This->uri->userinfo_split-1; 6097 return get_builder_component(&This->password, &This->password_len, start, len, ppwzPassword, pcchPassword); 6098 } 6099 } 6100 6101 static HRESULT WINAPI UriBuilder_GetPath(IUriBuilder *iface, DWORD *pcchPath, LPCWSTR *ppwzPath) 6102 { 6103 UriBuilder *This = impl_from_IUriBuilder(iface); 6104 TRACE("(%p)->(%p %p)\n", This, pcchPath, ppwzPath); 6105 6106 if(!This->uri || This->uri->path_start == -1 || This->modified_props & Uri_HAS_PATH) 6107 return get_builder_component(&This->path, &This->path_len, NULL, 0, ppwzPath, pcchPath); 6108 else 6109 return get_builder_component(&This->path, &This->path_len, This->uri->canon_uri+This->uri->path_start, 6110 This->uri->path_len, ppwzPath, pcchPath); 6111 } 6112 6113 static HRESULT WINAPI UriBuilder_GetPort(IUriBuilder *iface, BOOL *pfHasPort, DWORD *pdwPort) 6114 { 6115 UriBuilder *This = impl_from_IUriBuilder(iface); 6116 TRACE("(%p)->(%p %p)\n", This, pfHasPort, pdwPort); 6117 6118 if(!pfHasPort) { 6119 if(pdwPort) 6120 *pdwPort = 0; 6121 return E_POINTER; 6122 } 6123 6124 if(!pdwPort) { 6125 *pfHasPort = FALSE; 6126 return E_POINTER; 6127 } 6128 6129 *pfHasPort = This->has_port; 6130 *pdwPort = This->port; 6131 return S_OK; 6132 } 6133 6134 static HRESULT WINAPI UriBuilder_GetQuery(IUriBuilder *iface, DWORD *pcchQuery, LPCWSTR *ppwzQuery) 6135 { 6136 UriBuilder *This = impl_from_IUriBuilder(iface); 6137 TRACE("(%p)->(%p %p)\n", This, pcchQuery, ppwzQuery); 6138 6139 if(!This->uri || This->uri->query_start == -1 || This->modified_props & Uri_HAS_QUERY) 6140 return get_builder_component(&This->query, &This->query_len, NULL, 0, ppwzQuery, pcchQuery); 6141 else 6142 return get_builder_component(&This->query, &This->query_len, This->uri->canon_uri+This->uri->query_start, 6143 This->uri->query_len, ppwzQuery, pcchQuery); 6144 } 6145 6146 static HRESULT WINAPI UriBuilder_GetSchemeName(IUriBuilder *iface, DWORD *pcchSchemeName, LPCWSTR *ppwzSchemeName) 6147 { 6148 UriBuilder *This = impl_from_IUriBuilder(iface); 6149 TRACE("(%p)->(%p %p)\n", This, pcchSchemeName, ppwzSchemeName); 6150 6151 if(!This->uri || This->uri->scheme_start == -1 || This->modified_props & Uri_HAS_SCHEME_NAME) 6152 return get_builder_component(&This->scheme, &This->scheme_len, NULL, 0, ppwzSchemeName, pcchSchemeName); 6153 else 6154 return get_builder_component(&This->scheme, &This->scheme_len, This->uri->canon_uri+This->uri->scheme_start, 6155 This->uri->scheme_len, ppwzSchemeName, pcchSchemeName); 6156 } 6157 6158 static HRESULT WINAPI UriBuilder_GetUserName(IUriBuilder *iface, DWORD *pcchUserName, LPCWSTR *ppwzUserName) 6159 { 6160 UriBuilder *This = impl_from_IUriBuilder(iface); 6161 TRACE("(%p)->(%p %p)\n", This, pcchUserName, ppwzUserName); 6162 6163 if(!This->uri || This->uri->userinfo_start == -1 || This->uri->userinfo_split == 0 || 6164 This->modified_props & Uri_HAS_USER_NAME) 6165 return get_builder_component(&This->username, &This->username_len, NULL, 0, ppwzUserName, pcchUserName); 6166 else { 6167 const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start; 6168 6169 /* Check if there's a password in the userinfo section. */ 6170 if(This->uri->userinfo_split > -1) 6171 /* Don't include the password. */ 6172 return get_builder_component(&This->username, &This->username_len, start, 6173 This->uri->userinfo_split, ppwzUserName, pcchUserName); 6174 else 6175 return get_builder_component(&This->username, &This->username_len, start, 6176 This->uri->userinfo_len, ppwzUserName, pcchUserName); 6177 } 6178 } 6179 6180 static HRESULT WINAPI UriBuilder_SetFragment(IUriBuilder *iface, LPCWSTR pwzNewValue) 6181 { 6182 UriBuilder *This = impl_from_IUriBuilder(iface); 6183 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6184 return set_builder_component(&This->fragment, &This->fragment_len, pwzNewValue, '#', 6185 &This->modified_props, Uri_HAS_FRAGMENT); 6186 } 6187 6188 static HRESULT WINAPI UriBuilder_SetHost(IUriBuilder *iface, LPCWSTR pwzNewValue) 6189 { 6190 UriBuilder *This = impl_from_IUriBuilder(iface); 6191 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6192 6193 /* Host name can't be set to NULL. */ 6194 if(!pwzNewValue) 6195 return E_INVALIDARG; 6196 6197 return set_builder_component(&This->host, &This->host_len, pwzNewValue, 0, 6198 &This->modified_props, Uri_HAS_HOST); 6199 } 6200 6201 static HRESULT WINAPI UriBuilder_SetPassword(IUriBuilder *iface, LPCWSTR pwzNewValue) 6202 { 6203 UriBuilder *This = impl_from_IUriBuilder(iface); 6204 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6205 return set_builder_component(&This->password, &This->password_len, pwzNewValue, 0, 6206 &This->modified_props, Uri_HAS_PASSWORD); 6207 } 6208 6209 static HRESULT WINAPI UriBuilder_SetPath(IUriBuilder *iface, LPCWSTR pwzNewValue) 6210 { 6211 UriBuilder *This = impl_from_IUriBuilder(iface); 6212 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6213 return set_builder_component(&This->path, &This->path_len, pwzNewValue, 0, 6214 &This->modified_props, Uri_HAS_PATH); 6215 } 6216 6217 static HRESULT WINAPI UriBuilder_SetPort(IUriBuilder *iface, BOOL fHasPort, DWORD dwNewValue) 6218 { 6219 UriBuilder *This = impl_from_IUriBuilder(iface); 6220 TRACE("(%p)->(%d %d)\n", This, fHasPort, dwNewValue); 6221 6222 This->has_port = fHasPort; 6223 This->port = dwNewValue; 6224 This->modified_props |= Uri_HAS_PORT; 6225 return S_OK; 6226 } 6227 6228 static HRESULT WINAPI UriBuilder_SetQuery(IUriBuilder *iface, LPCWSTR pwzNewValue) 6229 { 6230 UriBuilder *This = impl_from_IUriBuilder(iface); 6231 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6232 return set_builder_component(&This->query, &This->query_len, pwzNewValue, '?', 6233 &This->modified_props, Uri_HAS_QUERY); 6234 } 6235 6236 static HRESULT WINAPI UriBuilder_SetSchemeName(IUriBuilder *iface, LPCWSTR pwzNewValue) 6237 { 6238 UriBuilder *This = impl_from_IUriBuilder(iface); 6239 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6240 6241 /* Only set the scheme name if it's not NULL or empty. */ 6242 if(!pwzNewValue || !*pwzNewValue) 6243 return E_INVALIDARG; 6244 6245 return set_builder_component(&This->scheme, &This->scheme_len, pwzNewValue, 0, 6246 &This->modified_props, Uri_HAS_SCHEME_NAME); 6247 } 6248 6249 static HRESULT WINAPI UriBuilder_SetUserName(IUriBuilder *iface, LPCWSTR pwzNewValue) 6250 { 6251 UriBuilder *This = impl_from_IUriBuilder(iface); 6252 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 6253 return set_builder_component(&This->username, &This->username_len, pwzNewValue, 0, 6254 &This->modified_props, Uri_HAS_USER_NAME); 6255 } 6256 6257 static HRESULT WINAPI UriBuilder_RemoveProperties(IUriBuilder *iface, DWORD dwPropertyMask) 6258 { 6259 const DWORD accepted_flags = Uri_HAS_AUTHORITY|Uri_HAS_DOMAIN|Uri_HAS_EXTENSION|Uri_HAS_FRAGMENT|Uri_HAS_HOST| 6260 Uri_HAS_PASSWORD|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_QUERY| 6261 Uri_HAS_USER_INFO|Uri_HAS_USER_NAME; 6262 6263 UriBuilder *This = impl_from_IUriBuilder(iface); 6264 TRACE("(%p)->(0x%08x)\n", This, dwPropertyMask); 6265 6266 if(dwPropertyMask & ~accepted_flags) 6267 return E_INVALIDARG; 6268 6269 if(dwPropertyMask & Uri_HAS_FRAGMENT) 6270 UriBuilder_SetFragment(iface, NULL); 6271 6272 /* Even though you can't set the host name to NULL or an 6273 * empty string, you can still remove it... for some reason. 6274 */ 6275 if(dwPropertyMask & Uri_HAS_HOST) 6276 set_builder_component(&This->host, &This->host_len, NULL, 0, 6277 &This->modified_props, Uri_HAS_HOST); 6278 6279 if(dwPropertyMask & Uri_HAS_PASSWORD) 6280 UriBuilder_SetPassword(iface, NULL); 6281 6282 if(dwPropertyMask & Uri_HAS_PATH) 6283 UriBuilder_SetPath(iface, NULL); 6284 6285 if(dwPropertyMask & Uri_HAS_PORT) 6286 UriBuilder_SetPort(iface, FALSE, 0); 6287 6288 if(dwPropertyMask & Uri_HAS_QUERY) 6289 UriBuilder_SetQuery(iface, NULL); 6290 6291 if(dwPropertyMask & Uri_HAS_USER_NAME) 6292 UriBuilder_SetUserName(iface, NULL); 6293 6294 return S_OK; 6295 } 6296 6297 static HRESULT WINAPI UriBuilder_HasBeenModified(IUriBuilder *iface, BOOL *pfModified) 6298 { 6299 UriBuilder *This = impl_from_IUriBuilder(iface); 6300 TRACE("(%p)->(%p)\n", This, pfModified); 6301 6302 if(!pfModified) 6303 return E_POINTER; 6304 6305 *pfModified = This->modified_props > 0; 6306 return S_OK; 6307 } 6308 6309 static const IUriBuilderVtbl UriBuilderVtbl = { 6310 UriBuilder_QueryInterface, 6311 UriBuilder_AddRef, 6312 UriBuilder_Release, 6313 UriBuilder_CreateUriSimple, 6314 UriBuilder_CreateUri, 6315 UriBuilder_CreateUriWithFlags, 6316 UriBuilder_GetIUri, 6317 UriBuilder_SetIUri, 6318 UriBuilder_GetFragment, 6319 UriBuilder_GetHost, 6320 UriBuilder_GetPassword, 6321 UriBuilder_GetPath, 6322 UriBuilder_GetPort, 6323 UriBuilder_GetQuery, 6324 UriBuilder_GetSchemeName, 6325 UriBuilder_GetUserName, 6326 UriBuilder_SetFragment, 6327 UriBuilder_SetHost, 6328 UriBuilder_SetPassword, 6329 UriBuilder_SetPath, 6330 UriBuilder_SetPort, 6331 UriBuilder_SetQuery, 6332 UriBuilder_SetSchemeName, 6333 UriBuilder_SetUserName, 6334 UriBuilder_RemoveProperties, 6335 UriBuilder_HasBeenModified, 6336 }; 6337 6338 /*********************************************************************** 6339 * CreateIUriBuilder (urlmon.@) 6340 */ 6341 HRESULT WINAPI CreateIUriBuilder(IUri *pIUri, DWORD dwFlags, DWORD_PTR dwReserved, IUriBuilder **ppIUriBuilder) 6342 { 6343 UriBuilder *ret; 6344 6345 TRACE("(%p %x %x %p)\n", pIUri, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 6346 6347 if(!ppIUriBuilder) 6348 return E_POINTER; 6349 6350 ret = heap_alloc_zero(sizeof(UriBuilder)); 6351 if(!ret) 6352 return E_OUTOFMEMORY; 6353 6354 ret->IUriBuilder_iface.lpVtbl = &UriBuilderVtbl; 6355 ret->ref = 1; 6356 6357 if(pIUri) { 6358 Uri *uri; 6359 6360 if((uri = get_uri_obj(pIUri))) { 6361 if(!uri->create_flags) { 6362 heap_free(ret); 6363 return E_UNEXPECTED; 6364 } 6365 IUri_AddRef(pIUri); 6366 ret->uri = uri; 6367 6368 if(uri->has_port) 6369 /* Windows doesn't set 'has_port' to TRUE in this case. */ 6370 ret->port = uri->port; 6371 6372 } else { 6373 heap_free(ret); 6374 *ppIUriBuilder = NULL; 6375 FIXME("(%p %x %x %p): Unknown IUri types not supported yet.\n", pIUri, dwFlags, 6376 (DWORD)dwReserved, ppIUriBuilder); 6377 return E_NOTIMPL; 6378 } 6379 } 6380 6381 *ppIUriBuilder = &ret->IUriBuilder_iface; 6382 return S_OK; 6383 } 6384 6385 /* Merges the base path with the relative path and stores the resulting path 6386 * and path len in 'result' and 'result_len'. 6387 */ 6388 static HRESULT merge_paths(parse_data *data, const WCHAR *base, DWORD base_len, const WCHAR *relative, 6389 DWORD relative_len, WCHAR **result, DWORD *result_len, DWORD flags) 6390 { 6391 const WCHAR *end = NULL; 6392 DWORD base_copy_len = 0; 6393 WCHAR *ptr; 6394 6395 if(base_len) { 6396 if(data->scheme_type == URL_SCHEME_MK && *relative == '/') { 6397 /* Find '::' segment */ 6398 for(end = base; end < base+base_len-1; end++) { 6399 if(end[0] == ':' && end[1] == ':') { 6400 end++; 6401 break; 6402 } 6403 } 6404 6405 /* If not found, try finding the end of @xxx: */ 6406 if(end == base+base_len-1) 6407 end = *base == '@' ? memchr(base, ':', base_len) : NULL; 6408 }else { 6409 /* Find the characters that will be copied over from the base path. */ 6410 end = memrchrW(base, '/', base_len); 6411 if(!end && data->scheme_type == URL_SCHEME_FILE) 6412 /* Try looking for a '\\'. */ 6413 end = memrchrW(base, '\\', base_len); 6414 } 6415 } 6416 6417 if(end) { 6418 base_copy_len = (end+1)-base; 6419 *result = heap_alloc((base_copy_len+relative_len+1)*sizeof(WCHAR)); 6420 } else 6421 *result = heap_alloc((relative_len+1)*sizeof(WCHAR)); 6422 6423 if(!(*result)) { 6424 *result_len = 0; 6425 return E_OUTOFMEMORY; 6426 } 6427 6428 ptr = *result; 6429 if(end) { 6430 memcpy(ptr, base, base_copy_len*sizeof(WCHAR)); 6431 ptr += base_copy_len; 6432 } 6433 6434 memcpy(ptr, relative, relative_len*sizeof(WCHAR)); 6435 ptr += relative_len; 6436 *ptr = '\0'; 6437 6438 *result_len = (ptr-*result); 6439 TRACE("ret %s\n", debugstr_wn(*result, *result_len)); 6440 return S_OK; 6441 } 6442 6443 static HRESULT combine_uri(Uri *base, Uri *relative, DWORD flags, IUri **result, DWORD extras) { 6444 Uri *ret; 6445 HRESULT hr; 6446 parse_data data; 6447 Uri *proc_uri = base; 6448 DWORD create_flags = 0, len = 0; 6449 6450 memset(&data, 0, sizeof(parse_data)); 6451 6452 /* Base case is when the relative Uri has a scheme name, 6453 * if it does, then 'result' will contain the same data 6454 * as the relative Uri. 6455 */ 6456 if(relative->scheme_start > -1) { 6457 data.uri = SysAllocString(relative->raw_uri); 6458 if(!data.uri) { 6459 *result = NULL; 6460 return E_OUTOFMEMORY; 6461 } 6462 6463 parse_uri(&data, Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME); 6464 6465 hr = Uri_Construct(NULL, (void**)&ret); 6466 if(FAILED(hr)) { 6467 *result = NULL; 6468 return hr; 6469 } 6470 6471 if(extras & COMBINE_URI_FORCE_FLAG_USE) { 6472 if(flags & URL_DONT_SIMPLIFY) 6473 create_flags |= Uri_CREATE_NO_CANONICALIZE; 6474 if(flags & URL_DONT_UNESCAPE_EXTRA_INFO) 6475 create_flags |= Uri_CREATE_NO_DECODE_EXTRA_INFO; 6476 } 6477 6478 ret->raw_uri = data.uri; 6479 hr = canonicalize_uri(&data, ret, create_flags); 6480 if(FAILED(hr)) { 6481 IUri_Release(&ret->IUri_iface); 6482 *result = NULL; 6483 return hr; 6484 } 6485 6486 apply_default_flags(&create_flags); 6487 ret->create_flags = create_flags; 6488 6489 *result = &ret->IUri_iface; 6490 } else { 6491 WCHAR *path = NULL; 6492 DWORD raw_flags = 0; 6493 6494 if(base->scheme_start > -1) { 6495 data.scheme = base->canon_uri+base->scheme_start; 6496 data.scheme_len = base->scheme_len; 6497 data.scheme_type = base->scheme_type; 6498 } else { 6499 data.is_relative = TRUE; 6500 data.scheme_type = URL_SCHEME_UNKNOWN; 6501 create_flags |= Uri_CREATE_ALLOW_RELATIVE; 6502 } 6503 6504 if(relative->authority_start > -1) 6505 proc_uri = relative; 6506 6507 if(proc_uri->authority_start > -1) { 6508 if(proc_uri->userinfo_start > -1 && proc_uri->userinfo_split != 0) { 6509 data.username = proc_uri->canon_uri+proc_uri->userinfo_start; 6510 data.username_len = (proc_uri->userinfo_split > -1) ? proc_uri->userinfo_split : proc_uri->userinfo_len; 6511 } 6512 6513 if(proc_uri->userinfo_split > -1) { 6514 data.password = proc_uri->canon_uri+proc_uri->userinfo_start+proc_uri->userinfo_split+1; 6515 data.password_len = proc_uri->userinfo_len-proc_uri->userinfo_split-1; 6516 } 6517 6518 if(proc_uri->host_start > -1) { 6519 data.host = proc_uri->canon_uri+proc_uri->host_start; 6520 data.host_len = proc_uri->host_len; 6521 data.host_type = proc_uri->host_type; 6522 } 6523 6524 if(proc_uri->has_port) { 6525 data.has_port = TRUE; 6526 data.port_value = proc_uri->port; 6527 } 6528 } else if(base->scheme_type != URL_SCHEME_FILE) 6529 data.is_opaque = TRUE; 6530 6531 if(proc_uri == relative || relative->path_start == -1 || !relative->path_len) { 6532 if(proc_uri->path_start > -1) { 6533 data.path = proc_uri->canon_uri+proc_uri->path_start; 6534 data.path_len = proc_uri->path_len; 6535 } else if(!data.is_opaque) { 6536 /* Just set the path as a '/' if the base didn't have 6537 * one and if it's a hierarchical URI. 6538 */ 6539 static const WCHAR slashW[] = {'/',0}; 6540 data.path = slashW; 6541 data.path_len = 1; 6542 } 6543 6544 if(relative->query_start > -1) 6545 proc_uri = relative; 6546 6547 if(proc_uri->query_start > -1) { 6548 data.query = proc_uri->canon_uri+proc_uri->query_start; 6549 data.query_len = proc_uri->query_len; 6550 } 6551 } else { 6552 const WCHAR *ptr, **pptr; 6553 DWORD path_offset = 0, path_len = 0; 6554 6555 /* There's two possibilities on what will happen to the path component 6556 * of the result IUri. First, if the relative path begins with a '/' 6557 * then the resulting path will just be the relative path. Second, if 6558 * relative path doesn't begin with a '/' then the base path and relative 6559 * path are merged together. 6560 */ 6561 if(relative->path_len && *(relative->canon_uri+relative->path_start) == '/' && data.scheme_type != URL_SCHEME_MK) { 6562 WCHAR *tmp = NULL; 6563 BOOL copy_drive_path = FALSE; 6564 6565 /* If the relative IUri's path starts with a '/', then we 6566 * don't use the base IUri's path. Unless the base IUri 6567 * is a file URI, in which case it uses the drive path of 6568 * the base IUri (if it has any) in the new path. 6569 */ 6570 if(base->scheme_type == URL_SCHEME_FILE) { 6571 if(base->path_len > 3 && *(base->canon_uri+base->path_start) == '/' && 6572 is_drive_path(base->canon_uri+base->path_start+1)) { 6573 path_len += 3; 6574 copy_drive_path = TRUE; 6575 } 6576 } 6577 6578 path_len += relative->path_len; 6579 6580 path = heap_alloc((path_len+1)*sizeof(WCHAR)); 6581 if(!path) { 6582 *result = NULL; 6583 return E_OUTOFMEMORY; 6584 } 6585 6586 tmp = path; 6587 6588 /* Copy the base paths, drive path over. */ 6589 if(copy_drive_path) { 6590 memcpy(tmp, base->canon_uri+base->path_start, 3*sizeof(WCHAR)); 6591 tmp += 3; 6592 } 6593 6594 memcpy(tmp, relative->canon_uri+relative->path_start, relative->path_len*sizeof(WCHAR)); 6595 path[path_len] = '\0'; 6596 } else { 6597 /* Merge the base path with the relative path. */ 6598 hr = merge_paths(&data, base->canon_uri+base->path_start, base->path_len, 6599 relative->canon_uri+relative->path_start, relative->path_len, 6600 &path, &path_len, flags); 6601 if(FAILED(hr)) { 6602 *result = NULL; 6603 return hr; 6604 } 6605 6606 /* If the resulting IUri is a file URI, the drive path isn't 6607 * reduced out when the dot segments are removed. 6608 */ 6609 if(path_len >= 3 && data.scheme_type == URL_SCHEME_FILE && !data.host) { 6610 if(*path == '/' && is_drive_path(path+1)) 6611 path_offset = 2; 6612 else if(is_drive_path(path)) 6613 path_offset = 1; 6614 } 6615 } 6616 6617 /* Check if the dot segments need to be removed from the path. */ 6618 if(!(flags & URL_DONT_SIMPLIFY) && !data.is_opaque) { 6619 DWORD offset = (path_offset > 0) ? path_offset+1 : 0; 6620 DWORD new_len = remove_dot_segments(path+offset,path_len-offset); 6621 6622 if(new_len != path_len) { 6623 WCHAR *tmp = heap_realloc(path, (offset+new_len+1)*sizeof(WCHAR)); 6624 if(!tmp) { 6625 heap_free(path); 6626 *result = NULL; 6627 return E_OUTOFMEMORY; 6628 } 6629 6630 tmp[new_len+offset] = '\0'; 6631 path = tmp; 6632 path_len = new_len+offset; 6633 } 6634 } 6635 6636 if(relative->query_start > -1) { 6637 data.query = relative->canon_uri+relative->query_start; 6638 data.query_len = relative->query_len; 6639 } 6640 6641 /* Make sure the path component is valid. */ 6642 ptr = path; 6643 pptr = &ptr; 6644 if((data.is_opaque && !parse_path_opaque(pptr, &data, 0)) || 6645 (!data.is_opaque && !parse_path_hierarchical(pptr, &data, 0))) { 6646 heap_free(path); 6647 *result = NULL; 6648 return E_INVALIDARG; 6649 } 6650 } 6651 6652 if(relative->fragment_start > -1) { 6653 data.fragment = relative->canon_uri+relative->fragment_start; 6654 data.fragment_len = relative->fragment_len; 6655 } 6656 6657 if(flags & URL_DONT_SIMPLIFY) 6658 raw_flags |= RAW_URI_FORCE_PORT_DISP; 6659 if(flags & URL_FILE_USE_PATHURL) 6660 raw_flags |= RAW_URI_CONVERT_TO_DOS_PATH; 6661 6662 len = generate_raw_uri(&data, data.uri, raw_flags); 6663 data.uri = SysAllocStringLen(NULL, len); 6664 if(!data.uri) { 6665 heap_free(path); 6666 *result = NULL; 6667 return E_OUTOFMEMORY; 6668 } 6669 6670 generate_raw_uri(&data, data.uri, raw_flags); 6671 6672 hr = Uri_Construct(NULL, (void**)&ret); 6673 if(FAILED(hr)) { 6674 SysFreeString(data.uri); 6675 heap_free(path); 6676 *result = NULL; 6677 return hr; 6678 } 6679 6680 if(flags & URL_DONT_SIMPLIFY) 6681 create_flags |= Uri_CREATE_NO_CANONICALIZE; 6682 if(flags & URL_FILE_USE_PATHURL) 6683 create_flags |= Uri_CREATE_FILE_USE_DOS_PATH; 6684 6685 ret->raw_uri = data.uri; 6686 hr = canonicalize_uri(&data, ret, create_flags); 6687 if(FAILED(hr)) { 6688 IUri_Release(&ret->IUri_iface); 6689 *result = NULL; 6690 return hr; 6691 } 6692 6693 if(flags & URL_DONT_SIMPLIFY) 6694 ret->display_modifiers |= URI_DISPLAY_NO_DEFAULT_PORT_AUTH; 6695 6696 apply_default_flags(&create_flags); 6697 ret->create_flags = create_flags; 6698 *result = &ret->IUri_iface; 6699 6700 heap_free(path); 6701 } 6702 6703 return S_OK; 6704 } 6705 6706 /*********************************************************************** 6707 * CoInternetCombineIUri (urlmon.@) 6708 */ 6709 HRESULT WINAPI CoInternetCombineIUri(IUri *pBaseUri, IUri *pRelativeUri, DWORD dwCombineFlags, 6710 IUri **ppCombinedUri, DWORD_PTR dwReserved) 6711 { 6712 HRESULT hr; 6713 IInternetProtocolInfo *info; 6714 Uri *relative, *base; 6715 TRACE("(%p %p %x %p %x)\n", pBaseUri, pRelativeUri, dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 6716 6717 if(!ppCombinedUri) 6718 return E_INVALIDARG; 6719 6720 if(!pBaseUri || !pRelativeUri) { 6721 *ppCombinedUri = NULL; 6722 return E_INVALIDARG; 6723 } 6724 6725 relative = get_uri_obj(pRelativeUri); 6726 base = get_uri_obj(pBaseUri); 6727 if(!relative || !base) { 6728 *ppCombinedUri = NULL; 6729 FIXME("(%p %p %x %p %x) Unknown IUri types not supported yet.\n", 6730 pBaseUri, pRelativeUri, dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 6731 return E_NOTIMPL; 6732 } 6733 6734 info = get_protocol_info(base->canon_uri); 6735 if(info) { 6736 WCHAR result[INTERNET_MAX_URL_LENGTH+1]; 6737 DWORD result_len = 0; 6738 6739 hr = IInternetProtocolInfo_CombineUrl(info, base->canon_uri, relative->canon_uri, dwCombineFlags, 6740 result, INTERNET_MAX_URL_LENGTH+1, &result_len, 0); 6741 IInternetProtocolInfo_Release(info); 6742 if(SUCCEEDED(hr)) { 6743 hr = CreateUri(result, Uri_CREATE_ALLOW_RELATIVE, 0, ppCombinedUri); 6744 if(SUCCEEDED(hr)) 6745 return hr; 6746 } 6747 } 6748 6749 return combine_uri(base, relative, dwCombineFlags, ppCombinedUri, 0); 6750 } 6751 6752 /*********************************************************************** 6753 * CoInternetCombineUrlEx (urlmon.@) 6754 */ 6755 HRESULT WINAPI CoInternetCombineUrlEx(IUri *pBaseUri, LPCWSTR pwzRelativeUrl, DWORD dwCombineFlags, 6756 IUri **ppCombinedUri, DWORD_PTR dwReserved) 6757 { 6758 IUri *relative; 6759 Uri *base; 6760 HRESULT hr; 6761 IInternetProtocolInfo *info; 6762 6763 TRACE("(%p %s %x %p %x)\n", pBaseUri, debugstr_w(pwzRelativeUrl), dwCombineFlags, 6764 ppCombinedUri, (DWORD)dwReserved); 6765 6766 if(!ppCombinedUri) 6767 return E_POINTER; 6768 6769 if(!pwzRelativeUrl) { 6770 *ppCombinedUri = NULL; 6771 return E_UNEXPECTED; 6772 } 6773 6774 if(!pBaseUri) { 6775 *ppCombinedUri = NULL; 6776 return E_INVALIDARG; 6777 } 6778 6779 base = get_uri_obj(pBaseUri); 6780 if(!base) { 6781 *ppCombinedUri = NULL; 6782 FIXME("(%p %s %x %p %x) Unknown IUri's not supported yet.\n", pBaseUri, debugstr_w(pwzRelativeUrl), 6783 dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 6784 return E_NOTIMPL; 6785 } 6786 6787 info = get_protocol_info(base->canon_uri); 6788 if(info) { 6789 WCHAR result[INTERNET_MAX_URL_LENGTH+1]; 6790 DWORD result_len = 0; 6791 6792 hr = IInternetProtocolInfo_CombineUrl(info, base->canon_uri, pwzRelativeUrl, dwCombineFlags, 6793 result, INTERNET_MAX_URL_LENGTH+1, &result_len, 0); 6794 IInternetProtocolInfo_Release(info); 6795 if(SUCCEEDED(hr)) { 6796 hr = CreateUri(result, Uri_CREATE_ALLOW_RELATIVE, 0, ppCombinedUri); 6797 if(SUCCEEDED(hr)) 6798 return hr; 6799 } 6800 } 6801 6802 hr = CreateUri(pwzRelativeUrl, Uri_CREATE_ALLOW_RELATIVE|Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME, 0, &relative); 6803 if(FAILED(hr)) { 6804 *ppCombinedUri = NULL; 6805 return hr; 6806 } 6807 6808 hr = combine_uri(base, get_uri_obj(relative), dwCombineFlags, ppCombinedUri, COMBINE_URI_FORCE_FLAG_USE); 6809 6810 IUri_Release(relative); 6811 return hr; 6812 } 6813 6814 static HRESULT parse_canonicalize(const Uri *uri, DWORD flags, LPWSTR output, 6815 DWORD output_len, DWORD *result_len) 6816 { 6817 const WCHAR *ptr = NULL; 6818 WCHAR *path = NULL; 6819 const WCHAR **pptr; 6820 DWORD len = 0; 6821 BOOL reduce_path; 6822 6823 /* URL_UNESCAPE only has effect if none of the URL_ESCAPE flags are set. */ 6824 const BOOL allow_unescape = !(flags & URL_ESCAPE_UNSAFE) && 6825 !(flags & URL_ESCAPE_SPACES_ONLY) && 6826 !(flags & URL_ESCAPE_PERCENT); 6827 6828 6829 /* Check if the dot segments need to be removed from the 6830 * path component. 6831 */ 6832 if(uri->scheme_start > -1 && uri->path_start > -1) { 6833 ptr = uri->canon_uri+uri->scheme_start+uri->scheme_len+1; 6834 pptr = &ptr; 6835 } 6836 reduce_path = !(flags & URL_DONT_SIMPLIFY) && 6837 ptr && check_hierarchical(pptr); 6838 6839 for(ptr = uri->canon_uri; ptr < uri->canon_uri+uri->canon_len; ++ptr) { 6840 BOOL do_default_action = TRUE; 6841 6842 /* Keep track of the path if we need to remove dot segments from 6843 * it later. 6844 */ 6845 if(reduce_path && !path && ptr == uri->canon_uri+uri->path_start) 6846 path = output+len; 6847 6848 /* Check if it's time to reduce the path. */ 6849 if(reduce_path && ptr == uri->canon_uri+uri->path_start+uri->path_len) { 6850 DWORD current_path_len = (output+len) - path; 6851 DWORD new_path_len = remove_dot_segments(path, current_path_len); 6852 6853 /* Update the current length. */ 6854 len -= (current_path_len-new_path_len); 6855 reduce_path = FALSE; 6856 } 6857 6858 if(*ptr == '%') { 6859 const WCHAR decoded = decode_pct_val(ptr); 6860 if(decoded) { 6861 if(allow_unescape && (flags & URL_UNESCAPE)) { 6862 if(len < output_len) 6863 output[len] = decoded; 6864 len++; 6865 ptr += 2; 6866 do_default_action = FALSE; 6867 } 6868 } 6869 6870 /* See if %'s needed to encoded. */ 6871 if(do_default_action && (flags & URL_ESCAPE_PERCENT)) { 6872 if(len + 3 < output_len) 6873 pct_encode_val(*ptr, output+len); 6874 len += 3; 6875 do_default_action = FALSE; 6876 } 6877 } else if(*ptr == ' ') { 6878 if((flags & URL_ESCAPE_SPACES_ONLY) && 6879 !(flags & URL_ESCAPE_UNSAFE)) { 6880 if(len + 3 < output_len) 6881 pct_encode_val(*ptr, output+len); 6882 len += 3; 6883 do_default_action = FALSE; 6884 } 6885 } else if(!is_reserved(*ptr) && !is_unreserved(*ptr)) { 6886 if(flags & URL_ESCAPE_UNSAFE) { 6887 if(len + 3 < output_len) 6888 pct_encode_val(*ptr, output+len); 6889 len += 3; 6890 do_default_action = FALSE; 6891 } 6892 } 6893 6894 if(do_default_action) { 6895 if(len < output_len) 6896 output[len] = *ptr; 6897 len++; 6898 } 6899 } 6900 6901 /* Sometimes the path is the very last component of the IUri, so 6902 * see if the dot segments need to be reduced now. 6903 */ 6904 if(reduce_path && path) { 6905 DWORD current_path_len = (output+len) - path; 6906 DWORD new_path_len = remove_dot_segments(path, current_path_len); 6907 6908 /* Update the current length. */ 6909 len -= (current_path_len-new_path_len); 6910 } 6911 6912 if(len < output_len) 6913 output[len] = 0; 6914 else 6915 output[output_len-1] = 0; 6916 6917 /* The null terminator isn't included in the length. */ 6918 *result_len = len; 6919 if(len >= output_len) 6920 return STRSAFE_E_INSUFFICIENT_BUFFER; 6921 6922 return S_OK; 6923 } 6924 6925 static HRESULT parse_friendly(IUri *uri, LPWSTR output, DWORD output_len, 6926 DWORD *result_len) 6927 { 6928 HRESULT hr; 6929 DWORD display_len; 6930 BSTR display; 6931 6932 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_DISPLAY_URI, &display_len, 0); 6933 if(FAILED(hr)) { 6934 *result_len = 0; 6935 return hr; 6936 } 6937 6938 *result_len = display_len; 6939 if(display_len+1 > output_len) 6940 return STRSAFE_E_INSUFFICIENT_BUFFER; 6941 6942 hr = IUri_GetDisplayUri(uri, &display); 6943 if(FAILED(hr)) { 6944 *result_len = 0; 6945 return hr; 6946 } 6947 6948 memcpy(output, display, (display_len+1)*sizeof(WCHAR)); 6949 SysFreeString(display); 6950 return S_OK; 6951 } 6952 6953 static HRESULT parse_rootdocument(const Uri *uri, LPWSTR output, DWORD output_len, 6954 DWORD *result_len) 6955 { 6956 static const WCHAR colon_slashesW[] = {':','/','/'}; 6957 6958 WCHAR *ptr; 6959 DWORD len = 0; 6960 6961 /* Windows only returns the root document if the URI has an authority 6962 * and it's not an unknown scheme type or a file scheme type. 6963 */ 6964 if(uri->authority_start == -1 || 6965 uri->scheme_type == URL_SCHEME_UNKNOWN || 6966 uri->scheme_type == URL_SCHEME_FILE) { 6967 *result_len = 0; 6968 if(!output_len) 6969 return STRSAFE_E_INSUFFICIENT_BUFFER; 6970 6971 output[0] = 0; 6972 return S_OK; 6973 } 6974 6975 len = uri->scheme_len+uri->authority_len; 6976 /* For the "://" and '/' which will be added. */ 6977 len += 4; 6978 6979 if(len+1 > output_len) { 6980 *result_len = len; 6981 return STRSAFE_E_INSUFFICIENT_BUFFER; 6982 } 6983 6984 ptr = output; 6985 memcpy(ptr, uri->canon_uri+uri->scheme_start, uri->scheme_len*sizeof(WCHAR)); 6986 6987 /* Add the "://". */ 6988 ptr += uri->scheme_len; 6989 memcpy(ptr, colon_slashesW, sizeof(colon_slashesW)); 6990 6991 /* Add the authority. */ 6992 ptr += sizeof(colon_slashesW)/sizeof(WCHAR); 6993 memcpy(ptr, uri->canon_uri+uri->authority_start, uri->authority_len*sizeof(WCHAR)); 6994 6995 /* Add the '/' after the authority. */ 6996 ptr += uri->authority_len; 6997 *ptr = '/'; 6998 ptr[1] = 0; 6999 7000 *result_len = len; 7001 return S_OK; 7002 } 7003 7004 static HRESULT parse_document(const Uri *uri, LPWSTR output, DWORD output_len, 7005 DWORD *result_len) 7006 { 7007 DWORD len = 0; 7008 7009 /* It has to be a known scheme type, but, it can't be a file 7010 * scheme. It also has to hierarchical. 7011 */ 7012 if(uri->scheme_type == URL_SCHEME_UNKNOWN || 7013 uri->scheme_type == URL_SCHEME_FILE || 7014 uri->authority_start == -1) { 7015 *result_len = 0; 7016 if(output_len < 1) 7017 return STRSAFE_E_INSUFFICIENT_BUFFER; 7018 7019 output[0] = 0; 7020 return S_OK; 7021 } 7022 7023 if(uri->fragment_start > -1) 7024 len = uri->fragment_start; 7025 else 7026 len = uri->canon_len; 7027 7028 *result_len = len; 7029 if(len+1 > output_len) 7030 return STRSAFE_E_INSUFFICIENT_BUFFER; 7031 7032 memcpy(output, uri->canon_uri, len*sizeof(WCHAR)); 7033 output[len] = 0; 7034 return S_OK; 7035 } 7036 7037 static HRESULT parse_path_from_url(const Uri *uri, LPWSTR output, DWORD output_len, 7038 DWORD *result_len) 7039 { 7040 const WCHAR *path_ptr; 7041 WCHAR buffer[INTERNET_MAX_URL_LENGTH+1]; 7042 WCHAR *ptr; 7043 7044 if(uri->scheme_type != URL_SCHEME_FILE) { 7045 *result_len = 0; 7046 if(output_len > 0) 7047 output[0] = 0; 7048 return E_INVALIDARG; 7049 } 7050 7051 ptr = buffer; 7052 if(uri->host_start > -1) { 7053 static const WCHAR slash_slashW[] = {'\\','\\'}; 7054 7055 memcpy(ptr, slash_slashW, sizeof(slash_slashW)); 7056 ptr += sizeof(slash_slashW)/sizeof(WCHAR); 7057 memcpy(ptr, uri->canon_uri+uri->host_start, uri->host_len*sizeof(WCHAR)); 7058 ptr += uri->host_len; 7059 } 7060 7061 path_ptr = uri->canon_uri+uri->path_start; 7062 if(uri->path_len > 3 && *path_ptr == '/' && is_drive_path(path_ptr+1)) 7063 /* Skip past the '/' in front of the drive path. */ 7064 ++path_ptr; 7065 7066 for(; path_ptr < uri->canon_uri+uri->path_start+uri->path_len; ++path_ptr, ++ptr) { 7067 BOOL do_default_action = TRUE; 7068 7069 if(*path_ptr == '%') { 7070 const WCHAR decoded = decode_pct_val(path_ptr); 7071 if(decoded) { 7072 *ptr = decoded; 7073 path_ptr += 2; 7074 do_default_action = FALSE; 7075 } 7076 } else if(*path_ptr == '/') { 7077 *ptr = '\\'; 7078 do_default_action = FALSE; 7079 } 7080 7081 if(do_default_action) 7082 *ptr = *path_ptr; 7083 } 7084 7085 *ptr = 0; 7086 7087 *result_len = ptr-buffer; 7088 if(*result_len+1 > output_len) 7089 return STRSAFE_E_INSUFFICIENT_BUFFER; 7090 7091 memcpy(output, buffer, (*result_len+1)*sizeof(WCHAR)); 7092 return S_OK; 7093 } 7094 7095 static HRESULT parse_url_from_path(IUri *uri, LPWSTR output, DWORD output_len, 7096 DWORD *result_len) 7097 { 7098 HRESULT hr; 7099 BSTR received; 7100 DWORD len = 0; 7101 7102 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_ABSOLUTE_URI, &len, 0); 7103 if(FAILED(hr)) { 7104 *result_len = 0; 7105 return hr; 7106 } 7107 7108 *result_len = len; 7109 if(len+1 > output_len) 7110 return STRSAFE_E_INSUFFICIENT_BUFFER; 7111 7112 hr = IUri_GetAbsoluteUri(uri, &received); 7113 if(FAILED(hr)) { 7114 *result_len = 0; 7115 return hr; 7116 } 7117 7118 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7119 SysFreeString(received); 7120 7121 return S_OK; 7122 } 7123 7124 static HRESULT parse_schema(IUri *uri, LPWSTR output, DWORD output_len, 7125 DWORD *result_len) 7126 { 7127 HRESULT hr; 7128 DWORD len; 7129 BSTR received; 7130 7131 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_SCHEME_NAME, &len, 0); 7132 if(FAILED(hr)) { 7133 *result_len = 0; 7134 return hr; 7135 } 7136 7137 *result_len = len; 7138 if(len+1 > output_len) 7139 return STRSAFE_E_INSUFFICIENT_BUFFER; 7140 7141 hr = IUri_GetSchemeName(uri, &received); 7142 if(FAILED(hr)) { 7143 *result_len = 0; 7144 return hr; 7145 } 7146 7147 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7148 SysFreeString(received); 7149 7150 return S_OK; 7151 } 7152 7153 static HRESULT parse_site(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 7154 { 7155 HRESULT hr; 7156 DWORD len; 7157 BSTR received; 7158 7159 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_HOST, &len, 0); 7160 if(FAILED(hr)) { 7161 *result_len = 0; 7162 return hr; 7163 } 7164 7165 *result_len = len; 7166 if(len+1 > output_len) 7167 return STRSAFE_E_INSUFFICIENT_BUFFER; 7168 7169 hr = IUri_GetHost(uri, &received); 7170 if(FAILED(hr)) { 7171 *result_len = 0; 7172 return hr; 7173 } 7174 7175 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7176 SysFreeString(received); 7177 7178 return S_OK; 7179 } 7180 7181 static HRESULT parse_domain(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 7182 { 7183 HRESULT hr; 7184 DWORD len; 7185 BSTR received; 7186 7187 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_DOMAIN, &len, 0); 7188 if(FAILED(hr)) { 7189 *result_len = 0; 7190 return hr; 7191 } 7192 7193 *result_len = len; 7194 if(len+1 > output_len) 7195 return STRSAFE_E_INSUFFICIENT_BUFFER; 7196 7197 hr = IUri_GetDomain(uri, &received); 7198 if(FAILED(hr)) { 7199 *result_len = 0; 7200 return hr; 7201 } 7202 7203 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7204 SysFreeString(received); 7205 7206 return S_OK; 7207 } 7208 7209 static HRESULT parse_anchor(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 7210 { 7211 HRESULT hr; 7212 DWORD len; 7213 BSTR received; 7214 7215 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_FRAGMENT, &len, 0); 7216 if(FAILED(hr)) { 7217 *result_len = 0; 7218 return hr; 7219 } 7220 7221 *result_len = len; 7222 if(len+1 > output_len) 7223 return STRSAFE_E_INSUFFICIENT_BUFFER; 7224 7225 hr = IUri_GetFragment(uri, &received); 7226 if(FAILED(hr)) { 7227 *result_len = 0; 7228 return hr; 7229 } 7230 7231 memcpy(output, received, (len+1)*sizeof(WCHAR)); 7232 SysFreeString(received); 7233 7234 return S_OK; 7235 } 7236 7237 /*********************************************************************** 7238 * CoInternetParseIUri (urlmon.@) 7239 */ 7240 HRESULT WINAPI CoInternetParseIUri(IUri *pIUri, PARSEACTION ParseAction, DWORD dwFlags, 7241 LPWSTR pwzResult, DWORD cchResult, DWORD *pcchResult, 7242 DWORD_PTR dwReserved) 7243 { 7244 HRESULT hr; 7245 Uri *uri; 7246 IInternetProtocolInfo *info; 7247 7248 TRACE("(%p %d %x %p %d %p %x)\n", pIUri, ParseAction, dwFlags, pwzResult, 7249 cchResult, pcchResult, (DWORD)dwReserved); 7250 7251 if(!pcchResult) 7252 return E_POINTER; 7253 7254 if(!pwzResult || !pIUri) { 7255 *pcchResult = 0; 7256 return E_INVALIDARG; 7257 } 7258 7259 if(!(uri = get_uri_obj(pIUri))) { 7260 *pcchResult = 0; 7261 FIXME("(%p %d %x %p %d %p %x) Unknown IUri's not supported for this action.\n", 7262 pIUri, ParseAction, dwFlags, pwzResult, cchResult, pcchResult, (DWORD)dwReserved); 7263 return E_NOTIMPL; 7264 } 7265 7266 info = get_protocol_info(uri->canon_uri); 7267 if(info) { 7268 hr = IInternetProtocolInfo_ParseUrl(info, uri->canon_uri, ParseAction, dwFlags, 7269 pwzResult, cchResult, pcchResult, 0); 7270 IInternetProtocolInfo_Release(info); 7271 if(SUCCEEDED(hr)) return hr; 7272 } 7273 7274 switch(ParseAction) { 7275 case PARSE_CANONICALIZE: 7276 hr = parse_canonicalize(uri, dwFlags, pwzResult, cchResult, pcchResult); 7277 break; 7278 case PARSE_FRIENDLY: 7279 hr = parse_friendly(pIUri, pwzResult, cchResult, pcchResult); 7280 break; 7281 case PARSE_ROOTDOCUMENT: 7282 hr = parse_rootdocument(uri, pwzResult, cchResult, pcchResult); 7283 break; 7284 case PARSE_DOCUMENT: 7285 hr = parse_document(uri, pwzResult, cchResult, pcchResult); 7286 break; 7287 case PARSE_PATH_FROM_URL: 7288 hr = parse_path_from_url(uri, pwzResult, cchResult, pcchResult); 7289 break; 7290 case PARSE_URL_FROM_PATH: 7291 hr = parse_url_from_path(pIUri, pwzResult, cchResult, pcchResult); 7292 break; 7293 case PARSE_SCHEMA: 7294 hr = parse_schema(pIUri, pwzResult, cchResult, pcchResult); 7295 break; 7296 case PARSE_SITE: 7297 hr = parse_site(pIUri, pwzResult, cchResult, pcchResult); 7298 break; 7299 case PARSE_DOMAIN: 7300 hr = parse_domain(pIUri, pwzResult, cchResult, pcchResult); 7301 break; 7302 case PARSE_LOCATION: 7303 case PARSE_ANCHOR: 7304 hr = parse_anchor(pIUri, pwzResult, cchResult, pcchResult); 7305 break; 7306 case PARSE_SECURITY_URL: 7307 case PARSE_MIME: 7308 case PARSE_SERVER: 7309 case PARSE_SECURITY_DOMAIN: 7310 *pcchResult = 0; 7311 hr = E_FAIL; 7312 break; 7313 default: 7314 *pcchResult = 0; 7315 hr = E_NOTIMPL; 7316 FIXME("(%p %d %x %p %d %p %x) Partial stub.\n", pIUri, ParseAction, dwFlags, 7317 pwzResult, cchResult, pcchResult, (DWORD)dwReserved); 7318 } 7319 7320 return hr; 7321 } 7322