1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package urlesc 6 7import ( 8 "net/url" 9 "testing" 10) 11 12type URLTest struct { 13 in string 14 out *url.URL 15 roundtrip string // expected result of reserializing the URL; empty means same as "in". 16} 17 18var urltests = []URLTest{ 19 // no path 20 { 21 "http://www.google.com", 22 &url.URL{ 23 Scheme: "http", 24 Host: "www.google.com", 25 }, 26 "", 27 }, 28 // path 29 { 30 "http://www.google.com/", 31 &url.URL{ 32 Scheme: "http", 33 Host: "www.google.com", 34 Path: "/", 35 }, 36 "", 37 }, 38 // path with hex escaping 39 { 40 "http://www.google.com/file%20one%26two", 41 &url.URL{ 42 Scheme: "http", 43 Host: "www.google.com", 44 Path: "/file one&two", 45 }, 46 "http://www.google.com/file%20one&two", 47 }, 48 // user 49 { 50 "ftp://webmaster@www.google.com/", 51 &url.URL{ 52 Scheme: "ftp", 53 User: url.User("webmaster"), 54 Host: "www.google.com", 55 Path: "/", 56 }, 57 "", 58 }, 59 // escape sequence in username 60 { 61 "ftp://john%20doe@www.google.com/", 62 &url.URL{ 63 Scheme: "ftp", 64 User: url.User("john doe"), 65 Host: "www.google.com", 66 Path: "/", 67 }, 68 "ftp://john%20doe@www.google.com/", 69 }, 70 // query 71 { 72 "http://www.google.com/?q=go+language", 73 &url.URL{ 74 Scheme: "http", 75 Host: "www.google.com", 76 Path: "/", 77 RawQuery: "q=go+language", 78 }, 79 "", 80 }, 81 // query with hex escaping: NOT parsed 82 { 83 "http://www.google.com/?q=go%20language", 84 &url.URL{ 85 Scheme: "http", 86 Host: "www.google.com", 87 Path: "/", 88 RawQuery: "q=go%20language", 89 }, 90 "", 91 }, 92 // %20 outside query 93 { 94 "http://www.google.com/a%20b?q=c+d", 95 &url.URL{ 96 Scheme: "http", 97 Host: "www.google.com", 98 Path: "/a b", 99 RawQuery: "q=c+d", 100 }, 101 "", 102 }, 103 // path without leading /, so no parsing 104 { 105 "http:www.google.com/?q=go+language", 106 &url.URL{ 107 Scheme: "http", 108 Opaque: "www.google.com/", 109 RawQuery: "q=go+language", 110 }, 111 "http:www.google.com/?q=go+language", 112 }, 113 // path without leading /, so no parsing 114 { 115 "http:%2f%2fwww.google.com/?q=go+language", 116 &url.URL{ 117 Scheme: "http", 118 Opaque: "%2f%2fwww.google.com/", 119 RawQuery: "q=go+language", 120 }, 121 "http:%2f%2fwww.google.com/?q=go+language", 122 }, 123 // non-authority with path 124 { 125 "mailto:/webmaster@golang.org", 126 &url.URL{ 127 Scheme: "mailto", 128 Path: "/webmaster@golang.org", 129 }, 130 "mailto:///webmaster@golang.org", // unfortunate compromise 131 }, 132 // non-authority 133 { 134 "mailto:webmaster@golang.org", 135 &url.URL{ 136 Scheme: "mailto", 137 Opaque: "webmaster@golang.org", 138 }, 139 "", 140 }, 141 // unescaped :// in query should not create a scheme 142 { 143 "/foo?query=http://bad", 144 &url.URL{ 145 Path: "/foo", 146 RawQuery: "query=http://bad", 147 }, 148 "", 149 }, 150 // leading // without scheme should create an authority 151 { 152 "//foo", 153 &url.URL{ 154 Host: "foo", 155 }, 156 "", 157 }, 158 // leading // without scheme, with userinfo, path, and query 159 { 160 "//user@foo/path?a=b", 161 &url.URL{ 162 User: url.User("user"), 163 Host: "foo", 164 Path: "/path", 165 RawQuery: "a=b", 166 }, 167 "", 168 }, 169 // Three leading slashes isn't an authority, but doesn't return an error. 170 // (We can't return an error, as this code is also used via 171 // ServeHTTP -> ReadRequest -> Parse, which is arguably a 172 // different URL parsing context, but currently shares the 173 // same codepath) 174 { 175 "///threeslashes", 176 &url.URL{ 177 Path: "///threeslashes", 178 }, 179 "", 180 }, 181 { 182 "http://user:password@google.com", 183 &url.URL{ 184 Scheme: "http", 185 User: url.UserPassword("user", "password"), 186 Host: "google.com", 187 }, 188 "http://user:password@google.com", 189 }, 190 // unescaped @ in username should not confuse host 191 { 192 "http://j@ne:password@google.com", 193 &url.URL{ 194 Scheme: "http", 195 User: url.UserPassword("j@ne", "password"), 196 Host: "google.com", 197 }, 198 "http://j%40ne:password@google.com", 199 }, 200 // unescaped @ in password should not confuse host 201 { 202 "http://jane:p@ssword@google.com", 203 &url.URL{ 204 Scheme: "http", 205 User: url.UserPassword("jane", "p@ssword"), 206 Host: "google.com", 207 }, 208 "http://jane:p%40ssword@google.com", 209 }, 210 { 211 "http://j@ne:password@google.com/p@th?q=@go", 212 &url.URL{ 213 Scheme: "http", 214 User: url.UserPassword("j@ne", "password"), 215 Host: "google.com", 216 Path: "/p@th", 217 RawQuery: "q=@go", 218 }, 219 "http://j%40ne:password@google.com/p@th?q=@go", 220 }, 221 { 222 "http://www.google.com/?q=go+language#foo", 223 &url.URL{ 224 Scheme: "http", 225 Host: "www.google.com", 226 Path: "/", 227 RawQuery: "q=go+language", 228 Fragment: "foo", 229 }, 230 "", 231 }, 232 { 233 "http://www.google.com/?q=go+language#foo%26bar", 234 &url.URL{ 235 Scheme: "http", 236 Host: "www.google.com", 237 Path: "/", 238 RawQuery: "q=go+language", 239 Fragment: "foo&bar", 240 }, 241 "http://www.google.com/?q=go+language#foo&bar", 242 }, 243 { 244 "file:///home/adg/rabbits", 245 &url.URL{ 246 Scheme: "file", 247 Host: "", 248 Path: "/home/adg/rabbits", 249 }, 250 "file:///home/adg/rabbits", 251 }, 252 // "Windows" paths are no exception to the rule. 253 // See golang.org/issue/6027, especially comment #9. 254 { 255 "file:///C:/FooBar/Baz.txt", 256 &url.URL{ 257 Scheme: "file", 258 Host: "", 259 Path: "/C:/FooBar/Baz.txt", 260 }, 261 "file:///C:/FooBar/Baz.txt", 262 }, 263 // case-insensitive scheme 264 { 265 "MaIlTo:webmaster@golang.org", 266 &url.URL{ 267 Scheme: "mailto", 268 Opaque: "webmaster@golang.org", 269 }, 270 "mailto:webmaster@golang.org", 271 }, 272 // Relative path 273 { 274 "a/b/c", 275 &url.URL{ 276 Path: "a/b/c", 277 }, 278 "a/b/c", 279 }, 280 // escaped '?' in username and password 281 { 282 "http://%3Fam:pa%3Fsword@google.com", 283 &url.URL{ 284 Scheme: "http", 285 User: url.UserPassword("?am", "pa?sword"), 286 Host: "google.com", 287 }, 288 "", 289 }, 290 // escaped '?' and '#' in path 291 { 292 "http://example.com/%3F%23", 293 &url.URL{ 294 Scheme: "http", 295 Host: "example.com", 296 Path: "?#", 297 }, 298 "", 299 }, 300 // unescaped [ ] ! ' ( ) * in path 301 { 302 "http://example.com/[]!'()*", 303 &url.URL{ 304 Scheme: "http", 305 Host: "example.com", 306 Path: "[]!'()*", 307 }, 308 "http://example.com/[]!'()*", 309 }, 310 // escaped : / ? # [ ] @ in username and password 311 { 312 "http://%3A%2F%3F:%23%5B%5D%40@example.com", 313 &url.URL{ 314 Scheme: "http", 315 User: url.UserPassword(":/?", "#[]@"), 316 Host: "example.com", 317 }, 318 "", 319 }, 320 // unescaped ! $ & ' ( ) * + , ; = in username and password 321 { 322 "http://!$&'():*+,;=@example.com", 323 &url.URL{ 324 Scheme: "http", 325 User: url.UserPassword("!$&'()", "*+,;="), 326 Host: "example.com", 327 }, 328 "", 329 }, 330 // unescaped = : / . ? = in query component 331 { 332 "http://example.com/?q=http://google.com/?q=", 333 &url.URL{ 334 Scheme: "http", 335 Host: "example.com", 336 Path: "/", 337 RawQuery: "q=http://google.com/?q=", 338 }, 339 "", 340 }, 341 // unescaped : / ? [ ] @ ! $ & ' ( ) * + , ; = in fragment 342 { 343 "http://example.com/#:/?%23[]@!$&'()*+,;=", 344 &url.URL{ 345 Scheme: "http", 346 Host: "example.com", 347 Path: "/", 348 Fragment: ":/?#[]@!$&'()*+,;=", 349 }, 350 "", 351 }, 352} 353 354func DoTestString(t *testing.T, parse func(string) (*url.URL, error), name string, tests []URLTest) { 355 for _, tt := range tests { 356 u, err := parse(tt.in) 357 if err != nil { 358 t.Errorf("%s(%q) returned error %s", name, tt.in, err) 359 continue 360 } 361 expected := tt.in 362 if len(tt.roundtrip) > 0 { 363 expected = tt.roundtrip 364 } 365 s := Escape(u) 366 if s != expected { 367 t.Errorf("Escape(%s(%q)) == %q (expected %q)", name, tt.in, s, expected) 368 } 369 } 370} 371 372func TestURLString(t *testing.T) { 373 DoTestString(t, url.Parse, "Parse", urltests) 374 375 // no leading slash on path should prepend 376 // slash on String() call 377 noslash := URLTest{ 378 "http://www.google.com/search", 379 &url.URL{ 380 Scheme: "http", 381 Host: "www.google.com", 382 Path: "search", 383 }, 384 "", 385 } 386 s := Escape(noslash.out) 387 if s != noslash.in { 388 t.Errorf("Expected %s; go %s", noslash.in, s) 389 } 390} 391 392type EscapeTest struct { 393 in string 394 out string 395 err error 396} 397 398var escapeTests = []EscapeTest{ 399 { 400 "", 401 "", 402 nil, 403 }, 404 { 405 "abc", 406 "abc", 407 nil, 408 }, 409 { 410 "one two", 411 "one+two", 412 nil, 413 }, 414 { 415 "10%", 416 "10%25", 417 nil, 418 }, 419 { 420 " ?&=#+%!<>#\"{}|\\^[]`☺\t:/@$'()*,;", 421 "+?%26%3D%23%2B%25%21%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09%3A/%40%24%27%28%29%2A%2C%3B", 422 nil, 423 }, 424} 425 426func TestEscape(t *testing.T) { 427 for _, tt := range escapeTests { 428 actual := QueryEscape(tt.in) 429 if tt.out != actual { 430 t.Errorf("QueryEscape(%q) = %q, want %q", tt.in, actual, tt.out) 431 } 432 433 // for bonus points, verify that escape:unescape is an identity. 434 roundtrip, err := url.QueryUnescape(actual) 435 if roundtrip != tt.in || err != nil { 436 t.Errorf("QueryUnescape(%q) = %q, %s; want %q, %s", actual, roundtrip, err, tt.in, "[no error]") 437 } 438 } 439} 440 441var resolveReferenceTests = []struct { 442 base, rel, expected string 443}{ 444 // Absolute URL references 445 {"http://foo.com?a=b", "https://bar.com/", "https://bar.com/"}, 446 {"http://foo.com/", "https://bar.com/?a=b", "https://bar.com/?a=b"}, 447 {"http://foo.com/bar", "mailto:foo@example.com", "mailto:foo@example.com"}, 448 449 // Path-absolute references 450 {"http://foo.com/bar", "/baz", "http://foo.com/baz"}, 451 {"http://foo.com/bar?a=b#f", "/baz", "http://foo.com/baz"}, 452 {"http://foo.com/bar?a=b", "/baz?c=d", "http://foo.com/baz?c=d"}, 453 454 // Scheme-relative 455 {"https://foo.com/bar?a=b", "//bar.com/quux", "https://bar.com/quux"}, 456 457 // Path-relative references: 458 459 // ... current directory 460 {"http://foo.com", ".", "http://foo.com/"}, 461 {"http://foo.com/bar", ".", "http://foo.com/"}, 462 {"http://foo.com/bar/", ".", "http://foo.com/bar/"}, 463 464 // ... going down 465 {"http://foo.com", "bar", "http://foo.com/bar"}, 466 {"http://foo.com/", "bar", "http://foo.com/bar"}, 467 {"http://foo.com/bar/baz", "quux", "http://foo.com/bar/quux"}, 468 469 // ... going up 470 {"http://foo.com/bar/baz", "../quux", "http://foo.com/quux"}, 471 {"http://foo.com/bar/baz", "../../../../../quux", "http://foo.com/quux"}, 472 {"http://foo.com/bar", "..", "http://foo.com/"}, 473 {"http://foo.com/bar/baz", "./..", "http://foo.com/"}, 474 // ".." in the middle (issue 3560) 475 {"http://foo.com/bar/baz", "quux/dotdot/../tail", "http://foo.com/bar/quux/tail"}, 476 {"http://foo.com/bar/baz", "quux/./dotdot/../tail", "http://foo.com/bar/quux/tail"}, 477 {"http://foo.com/bar/baz", "quux/./dotdot/.././tail", "http://foo.com/bar/quux/tail"}, 478 {"http://foo.com/bar/baz", "quux/./dotdot/./../tail", "http://foo.com/bar/quux/tail"}, 479 {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/././../../tail", "http://foo.com/bar/quux/tail"}, 480 {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/./.././../tail", "http://foo.com/bar/quux/tail"}, 481 {"http://foo.com/bar/baz", "quux/./dotdot/dotdot/dotdot/./../../.././././tail", "http://foo.com/bar/quux/tail"}, 482 {"http://foo.com/bar/baz", "quux/./dotdot/../dotdot/../dot/./tail/..", "http://foo.com/bar/quux/dot/"}, 483 484 // Remove any dot-segments prior to forming the target URI. 485 // http://tools.ietf.org/html/rfc3986#section-5.2.4 486 {"http://foo.com/dot/./dotdot/../foo/bar", "../baz", "http://foo.com/dot/baz"}, 487 488 // Triple dot isn't special 489 {"http://foo.com/bar", "...", "http://foo.com/..."}, 490 491 // Fragment 492 {"http://foo.com/bar", ".#frag", "http://foo.com/#frag"}, 493 494 // RFC 3986: Normal Examples 495 // http://tools.ietf.org/html/rfc3986#section-5.4.1 496 {"http://a/b/c/d;p?q", "g:h", "g:h"}, 497 {"http://a/b/c/d;p?q", "g", "http://a/b/c/g"}, 498 {"http://a/b/c/d;p?q", "./g", "http://a/b/c/g"}, 499 {"http://a/b/c/d;p?q", "g/", "http://a/b/c/g/"}, 500 {"http://a/b/c/d;p?q", "/g", "http://a/g"}, 501 {"http://a/b/c/d;p?q", "//g", "http://g"}, 502 {"http://a/b/c/d;p?q", "?y", "http://a/b/c/d;p?y"}, 503 {"http://a/b/c/d;p?q", "g?y", "http://a/b/c/g?y"}, 504 {"http://a/b/c/d;p?q", "#s", "http://a/b/c/d;p?q#s"}, 505 {"http://a/b/c/d;p?q", "g#s", "http://a/b/c/g#s"}, 506 {"http://a/b/c/d;p?q", "g?y#s", "http://a/b/c/g?y#s"}, 507 {"http://a/b/c/d;p?q", ";x", "http://a/b/c/;x"}, 508 {"http://a/b/c/d;p?q", "g;x", "http://a/b/c/g;x"}, 509 {"http://a/b/c/d;p?q", "g;x?y#s", "http://a/b/c/g;x?y#s"}, 510 {"http://a/b/c/d;p?q", "", "http://a/b/c/d;p?q"}, 511 {"http://a/b/c/d;p?q", ".", "http://a/b/c/"}, 512 {"http://a/b/c/d;p?q", "./", "http://a/b/c/"}, 513 {"http://a/b/c/d;p?q", "..", "http://a/b/"}, 514 {"http://a/b/c/d;p?q", "../", "http://a/b/"}, 515 {"http://a/b/c/d;p?q", "../g", "http://a/b/g"}, 516 {"http://a/b/c/d;p?q", "../..", "http://a/"}, 517 {"http://a/b/c/d;p?q", "../../", "http://a/"}, 518 {"http://a/b/c/d;p?q", "../../g", "http://a/g"}, 519 520 // RFC 3986: Abnormal Examples 521 // http://tools.ietf.org/html/rfc3986#section-5.4.2 522 {"http://a/b/c/d;p?q", "../../../g", "http://a/g"}, 523 {"http://a/b/c/d;p?q", "../../../../g", "http://a/g"}, 524 {"http://a/b/c/d;p?q", "/./g", "http://a/g"}, 525 {"http://a/b/c/d;p?q", "/../g", "http://a/g"}, 526 {"http://a/b/c/d;p?q", "g.", "http://a/b/c/g."}, 527 {"http://a/b/c/d;p?q", ".g", "http://a/b/c/.g"}, 528 {"http://a/b/c/d;p?q", "g..", "http://a/b/c/g.."}, 529 {"http://a/b/c/d;p?q", "..g", "http://a/b/c/..g"}, 530 {"http://a/b/c/d;p?q", "./../g", "http://a/b/g"}, 531 {"http://a/b/c/d;p?q", "./g/.", "http://a/b/c/g/"}, 532 {"http://a/b/c/d;p?q", "g/./h", "http://a/b/c/g/h"}, 533 {"http://a/b/c/d;p?q", "g/../h", "http://a/b/c/h"}, 534 {"http://a/b/c/d;p?q", "g;x=1/./y", "http://a/b/c/g;x=1/y"}, 535 {"http://a/b/c/d;p?q", "g;x=1/../y", "http://a/b/c/y"}, 536 {"http://a/b/c/d;p?q", "g?y/./x", "http://a/b/c/g?y/./x"}, 537 {"http://a/b/c/d;p?q", "g?y/../x", "http://a/b/c/g?y/../x"}, 538 {"http://a/b/c/d;p?q", "g#s/./x", "http://a/b/c/g#s/./x"}, 539 {"http://a/b/c/d;p?q", "g#s/../x", "http://a/b/c/g#s/../x"}, 540 541 // Extras. 542 {"https://a/b/c/d;p?q", "//g?q", "https://g?q"}, 543 {"https://a/b/c/d;p?q", "//g#s", "https://g#s"}, 544 {"https://a/b/c/d;p?q", "//g/d/e/f?y#s", "https://g/d/e/f?y#s"}, 545 {"https://a/b/c/d;p#s", "?y", "https://a/b/c/d;p?y"}, 546 {"https://a/b/c/d;p?q#s", "?y", "https://a/b/c/d;p?y"}, 547} 548 549func TestResolveReference(t *testing.T) { 550 mustParse := func(url_ string) *url.URL { 551 u, err := url.Parse(url_) 552 if err != nil { 553 t.Fatalf("Expected URL to parse: %q, got error: %v", url_, err) 554 } 555 return u 556 } 557 opaque := &url.URL{Scheme: "scheme", Opaque: "opaque"} 558 for _, test := range resolveReferenceTests { 559 base := mustParse(test.base) 560 rel := mustParse(test.rel) 561 url := base.ResolveReference(rel) 562 if Escape(url) != test.expected { 563 t.Errorf("URL(%q).ResolveReference(%q) == %q, got %q", test.base, test.rel, test.expected, Escape(url)) 564 } 565 // Ensure that new instances are returned. 566 if base == url { 567 t.Errorf("Expected URL.ResolveReference to return new URL instance.") 568 } 569 // Test the convenience wrapper too. 570 url, err := base.Parse(test.rel) 571 if err != nil { 572 t.Errorf("URL(%q).Parse(%q) failed: %v", test.base, test.rel, err) 573 } else if Escape(url) != test.expected { 574 t.Errorf("URL(%q).Parse(%q) == %q, got %q", test.base, test.rel, test.expected, Escape(url)) 575 } else if base == url { 576 // Ensure that new instances are returned for the wrapper too. 577 t.Errorf("Expected URL.Parse to return new URL instance.") 578 } 579 // Ensure Opaque resets the URL. 580 url = base.ResolveReference(opaque) 581 if *url != *opaque { 582 t.Errorf("ResolveReference failed to resolve opaque URL: want %#v, got %#v", url, opaque) 583 } 584 // Test the convenience wrapper with an opaque URL too. 585 url, err = base.Parse("scheme:opaque") 586 if err != nil { 587 t.Errorf(`URL(%q).Parse("scheme:opaque") failed: %v`, test.base, err) 588 } else if *url != *opaque { 589 t.Errorf("Parse failed to resolve opaque URL: want %#v, got %#v", url, opaque) 590 } else if base == url { 591 // Ensure that new instances are returned, again. 592 t.Errorf("Expected URL.Parse to return new URL instance.") 593 } 594 } 595} 596 597type shouldEscapeTest struct { 598 in byte 599 mode encoding 600 escape bool 601} 602 603var shouldEscapeTests = []shouldEscapeTest{ 604 // Unreserved characters (§2.3) 605 {'a', encodePath, false}, 606 {'a', encodeUserPassword, false}, 607 {'a', encodeQueryComponent, false}, 608 {'a', encodeFragment, false}, 609 {'z', encodePath, false}, 610 {'A', encodePath, false}, 611 {'Z', encodePath, false}, 612 {'0', encodePath, false}, 613 {'9', encodePath, false}, 614 {'-', encodePath, false}, 615 {'-', encodeUserPassword, false}, 616 {'-', encodeQueryComponent, false}, 617 {'-', encodeFragment, false}, 618 {'.', encodePath, false}, 619 {'_', encodePath, false}, 620 {'~', encodePath, false}, 621 622 // User information (§3.2.1) 623 {':', encodeUserPassword, true}, 624 {'/', encodeUserPassword, true}, 625 {'?', encodeUserPassword, true}, 626 {'@', encodeUserPassword, true}, 627 {'$', encodeUserPassword, false}, 628 {'&', encodeUserPassword, false}, 629 {'+', encodeUserPassword, false}, 630 {',', encodeUserPassword, false}, 631 {';', encodeUserPassword, false}, 632 {'=', encodeUserPassword, false}, 633} 634 635func TestShouldEscape(t *testing.T) { 636 for _, tt := range shouldEscapeTests { 637 if shouldEscape(tt.in, tt.mode) != tt.escape { 638 t.Errorf("shouldEscape(%q, %v) returned %v; expected %v", tt.in, tt.mode, !tt.escape, tt.escape) 639 } 640 } 641} 642