1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package urlesc
6
7import (
8	"net/url"
9	"testing"
10)
11
12type URLTest struct {
13	in        string
14	out       *url.URL
15	roundtrip string // expected result of reserializing the URL; empty means same as "in".
16}
17
18var urltests = []URLTest{
19	// no path
20	{
21		"http://www.google.com",
22		&url.URL{
23			Scheme: "http",
24			Host:   "www.google.com",
25		},
26		"",
27	},
28	// path
29	{
30		"http://www.google.com/",
31		&url.URL{
32			Scheme: "http",
33			Host:   "www.google.com",
34			Path:   "/",
35		},
36		"",
37	},
38	// path with hex escaping
39	{
40		"http://www.google.com/file%20one%26two",
41		&url.URL{
42			Scheme: "http",
43			Host:   "www.google.com",
44			Path:   "/file one&two",
45		},
46		"http://www.google.com/file%20one&two",
47	},
48	// user
49	{
50		"ftp://webmaster@www.google.com/",
51		&url.URL{
52			Scheme: "ftp",
53			User:   url.User("webmaster"),
54			Host:   "www.google.com",
55			Path:   "/",
56		},
57		"",
58	},
59	// escape sequence in username
60	{
61		"ftp://john%20doe@www.google.com/",
62		&url.URL{
63			Scheme: "ftp",
64			User:   url.User("john doe"),
65			Host:   "www.google.com",
66			Path:   "/",
67		},
68		"ftp://john%20doe@www.google.com/",
69	},
70	// query
71	{
72		"http://www.google.com/?q=go+language",
73		&url.URL{
74			Scheme:   "http",
75			Host:     "www.google.com",
76			Path:     "/",
77			RawQuery: "q=go+language",
78		},
79		"",
80	},
81	// query with hex escaping: NOT parsed
82	{
83		"http://www.google.com/?q=go%20language",
84		&url.URL{
85			Scheme:   "http",
86			Host:     "www.google.com",
87			Path:     "/",
88			RawQuery: "q=go%20language",
89		},
90		"",
91	},
92	// %20 outside query
93	{
94		"http://www.google.com/a%20b?q=c+d",
95		&url.URL{
96			Scheme:   "http",
97			Host:     "www.google.com",
98			Path:     "/a b",
99			RawQuery: "q=c+d",
100		},
101		"",
102	},
103	// path without leading /, so no parsing
104	{
105		"http:www.google.com/?q=go+language",
106		&url.URL{
107			Scheme:   "http",
108			Opaque:   "www.google.com/",
109			RawQuery: "q=go+language",
110		},
111		"http:www.google.com/?q=go+language",
112	},
113	// path without leading /, so no parsing
114	{
115		"http:%2f%2fwww.google.com/?q=go+language",
116		&url.URL{
117			Scheme:   "http",
118			Opaque:   "%2f%2fwww.google.com/",
119			RawQuery: "q=go+language",
120		},
121		"http:%2f%2fwww.google.com/?q=go+language",
122	},
123	// non-authority with path
124	{
125		"mailto:/webmaster@golang.org",
126		&url.URL{
127			Scheme: "mailto",
128			Path:   "/webmaster@golang.org",
129		},
130		"mailto:///webmaster@golang.org", // unfortunate compromise
131	},
132	// non-authority
133	{
134		"mailto:webmaster@golang.org",
135		&url.URL{
136			Scheme: "mailto",
137			Opaque: "webmaster@golang.org",
138		},
139		"",
140	},
141	// unescaped :// in query should not create a scheme
142	{
143		"/foo?query=http://bad",
144		&url.URL{
145			Path:     "/foo",
146			RawQuery: "query=http://bad",
147		},
148		"",
149	},
150	// leading // without scheme should create an authority
151	{
152		"//foo",
153		&url.URL{
154			Host: "foo",
155		},
156		"",
157	},
158	// leading // without scheme, with userinfo, path, and query
159	{
160		"//user@foo/path?a=b",
161		&url.URL{
162			User:     url.User("user"),
163			Host:     "foo",
164			Path:     "/path",
165			RawQuery: "a=b",
166		},
167		"",
168	},
169	// Three leading slashes isn't an authority, but doesn't return an error.
170	// (We can't return an error, as this code is also used via
171	// ServeHTTP -> ReadRequest -> Parse, which is arguably a
172	// different URL parsing context, but currently shares the
173	// same codepath)
174	{
175		"///threeslashes",
176		&url.URL{
177			Path: "///threeslashes",
178		},
179		"",
180	},
181	{
182		"http://user:password@google.com",
183		&url.URL{
184			Scheme: "http",
185			User:   url.UserPassword("user", "password"),
186			Host:   "google.com",
187		},
188		"http://user:password@google.com",
189	},
190	// unescaped @ in username should not confuse host
191	{
192		"http://j@ne:password@google.com",
193		&url.URL{
194			Scheme: "http",
195			User:   url.UserPassword("j@ne", "password"),
196			Host:   "google.com",
197		},
198		"http://j%40ne:password@google.com",
199	},
200	// unescaped @ in password should not confuse host
201	{
202		"http://jane:p@ssword@google.com",
203		&url.URL{
204			Scheme: "http",
205			User:   url.UserPassword("jane", "p@ssword"),
206			Host:   "google.com",
207		},
208		"http://jane:p%40ssword@google.com",
209	},
210	{
211		"http://j@ne:password@google.com/p@th?q=@go",
212		&url.URL{
213			Scheme:   "http",
214			User:     url.UserPassword("j@ne", "password"),
215			Host:     "google.com",
216			Path:     "/p@th",
217			RawQuery: "q=@go",
218		},
219		"http://j%40ne:password@google.com/p@th?q=@go",
220	},
221	{
222		"http://www.google.com/?q=go+language#foo",
223		&url.URL{
224			Scheme:   "http",
225			Host:     "www.google.com",
226			Path:     "/",
227			RawQuery: "q=go+language",
228			Fragment: "foo",
229		},
230		"",
231	},
232	{
233		"http://www.google.com/?q=go+language#foo%26bar",
234		&url.URL{
235			Scheme:   "http",
236			Host:     "www.google.com",
237			Path:     "/",
238			RawQuery: "q=go+language",
239			Fragment: "foo&bar",
240		},
241		"http://www.google.com/?q=go+language#foo&bar",
242	},
243	{
244		"file:///home/adg/rabbits",
245		&url.URL{
246			Scheme: "file",
247			Host:   "",
248			Path:   "/home/adg/rabbits",
249		},
250		"file:///home/adg/rabbits",
251	},
252	// "Windows" paths are no exception to the rule.
253	// See golang.org/issue/6027, especially comment #9.
254	{
255		"file:///C:/FooBar/Baz.txt",
256		&url.URL{
257			Scheme: "file",
258			Host:   "",
259			Path:   "/C:/FooBar/Baz.txt",
260		},
261		"file:///C:/FooBar/Baz.txt",
262	},
263	// case-insensitive scheme
264	{
265		"MaIlTo:webmaster@golang.org",
266		&url.URL{
267			Scheme: "mailto",
268			Opaque: "webmaster@golang.org",
269		},
270		"mailto:webmaster@golang.org",
271	},
272	// Relative path
273	{
274		"a/b/c",
275		&url.URL{
276			Path: "a/b/c",
277		},
278		"a/b/c",
279	},
280	// escaped '?' in username and password
281	{
282		"http://%3Fam:pa%3Fsword@google.com",
283		&url.URL{
284			Scheme: "http",
285			User:   url.UserPassword("?am", "pa?sword"),
286			Host:   "google.com",
287		},
288		"",
289	},
290	// escaped '?' and '#' in path
291	{
292		"http://example.com/%3F%23",
293		&url.URL{
294			Scheme: "http",
295			Host:   "example.com",
296			Path:   "?#",
297		},
298		"",
299	},
300	// unescaped [ ] ! ' ( ) * in path
301	{
302		"http://example.com/[]!'()*",
303		&url.URL{
304			Scheme: "http",
305			Host:   "example.com",
306			Path:   "[]!'()*",
307		},
308		"http://example.com/[]!'()*",
309	},
310	// escaped : / ? # [ ] @ in username and password
311	{
312		"http://%3A%2F%3F:%23%5B%5D%40@example.com",
313		&url.URL{
314			Scheme: "http",
315			User:   url.UserPassword(":/?", "#[]@"),
316			Host:   "example.com",
317		},
318		"",
319	},
320	// unescaped ! $ & ' ( ) * + , ; = in username and password
321	{
322		"http://!$&'():*+,;=@example.com",
323		&url.URL{
324			Scheme: "http",
325			User:   url.UserPassword("!$&'()", "*+,;="),
326			Host:   "example.com",
327		},
328		"",
329	},
330	// unescaped = : / . ? = in query component
331	{
332		"http://example.com/?q=http://google.com/?q=",
333		&url.URL{
334			Scheme:   "http",
335			Host:     "example.com",
336			Path:     "/",
337			RawQuery: "q=http://google.com/?q=",
338		},
339		"",
340	},
341	// unescaped : / ? [ ] @ ! $ & ' ( ) * + , ; = in fragment
342	{
343		"http://example.com/#:/?%23[]@!$&'()*+,;=",
344		&url.URL{
345			Scheme:   "http",
346			Host:     "example.com",
347			Path:     "/",
348			Fragment: ":/?#[]@!$&'()*+,;=",
349		},
350		"",
351	},
352}
353
354func DoTestString(t *testing.T, parse func(string) (*url.URL, error), name string, tests []URLTest) {
355	for _, tt := range tests {
356		u, err := parse(tt.in)
357		if err != nil {
358			t.Errorf("%s(%q) returned error %s", name, tt.in, err)
359			continue
360		}
361		expected := tt.in
362		if len(tt.roundtrip) > 0 {
363			expected = tt.roundtrip
364		}
365		s := Escape(u)
366		if s != expected {
367			t.Errorf("Escape(%s(%q)) == %q (expected %q)", name, tt.in, s, expected)
368		}
369	}
370}
371
372func TestURLString(t *testing.T) {
373	DoTestString(t, url.Parse, "Parse", urltests)
374
375	// no leading slash on path should prepend
376	// slash on String() call
377	noslash := URLTest{
378		"http://www.google.com/search",
379		&url.URL{
380			Scheme: "http",
381			Host:   "www.google.com",
382			Path:   "search",
383		},
384		"",
385	}
386	s := Escape(noslash.out)
387	if s != noslash.in {
388		t.Errorf("Expected %s; go %s", noslash.in, s)
389	}
390}
391
392type EscapeTest struct {
393	in  string
394	out string
395	err error
396}
397
398var escapeTests = []EscapeTest{
399	{
400		"",
401		"",
402		nil,
403	},
404	{
405		"abc",
406		"abc",
407		nil,
408	},
409	{
410		"one two",
411		"one+two",
412		nil,
413	},
414	{
415		"10%",
416		"10%25",
417		nil,
418	},
419	{
420		" ?&=#+%!<>#\"{}|\\^[]`☺\t:/@$'()*,;",
421		"+?%26%3D%23%2B%25%21%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09%3A/%40%24%27%28%29%2A%2C%3B",
422		nil,
423	},
424}
425
426func TestEscape(t *testing.T) {
427	for _, tt := range escapeTests {
428		actual := QueryEscape(tt.in)
429		if tt.out != actual {
430			t.Errorf("QueryEscape(%q) = %q, want %q", tt.in, actual, tt.out)
431		}
432
433		// for bonus points, verify that escape:unescape is an identity.
434		roundtrip, err := url.QueryUnescape(actual)
435		if roundtrip != tt.in || err != nil {
436			t.Errorf("QueryUnescape(%q) = %q, %s; want %q, %s", actual, roundtrip, err, tt.in, "[no error]")
437		}
438	}
439}
440
441var resolveReferenceTests = []struct {
442	base, rel, expected string
443}{
444	// Absolute URL references
445	{"http://foo.com?a=b", "https://bar.com/", "https://bar.com/"},
446	{"http://foo.com/", "https://bar.com/?a=b", "https://bar.com/?a=b"},
447	{"http://foo.com/bar", "mailto:foo@example.com", "mailto:foo@example.com"},
448
449	// Path-absolute references
450	{"http://foo.com/bar", "/baz", "http://foo.com/baz"},
451	{"http://foo.com/bar?a=b#f", "/baz", "http://foo.com/baz"},
452	{"http://foo.com/bar?a=b", "/baz?c=d", "http://foo.com/baz?c=d"},
453
454	// Scheme-relative
455	{"https://foo.com/bar?a=b", "//bar.com/quux", "https://bar.com/quux"},
456
457	// Path-relative references:
458
459	// ... current directory
460	{"http://foo.com", ".", "http://foo.com/"},
461	{"http://foo.com/bar", ".", "http://foo.com/"},
462	{"http://foo.com/bar/", ".", "http://foo.com/bar/"},
463
464	// ... going down
465	{"http://foo.com", "bar", "http://foo.com/bar"},
466	{"http://foo.com/", "bar", "http://foo.com/bar"},
467	{"http://foo.com/bar/baz", "quux", "http://foo.com/bar/quux"},
468
469	// ... going up
470	{"http://foo.com/bar/baz", "../quux", "http://foo.com/quux"},
471	{"http://foo.com/bar/baz", "../../../../../quux", "http://foo.com/quux"},
472	{"http://foo.com/bar", "..", "http://foo.com/"},
473	{"http://foo.com/bar/baz", "./..", "http://foo.com/"},
474	// ".." in the middle (issue 3560)
475	{"http://foo.com/bar/baz", "quux/dotdot/../tail", "http://foo.com/bar/quux/tail"},
476	{"http://foo.com/bar/baz", "quux/./dotdot/../tail", "http://foo.com/bar/quux/tail"},
477	{"http://foo.com/bar/baz", "quux/./dotdot/.././tail", "http://foo.com/bar/quux/tail"},
478	{"http://foo.com/bar/baz", "quux/./dotdot/./../tail", "http://foo.com/bar/quux/tail"},
479	{"http://foo.com/bar/baz", "quux/./dotdot/dotdot/././../../tail", "http://foo.com/bar/quux/tail"},
480	{"http://foo.com/bar/baz", "quux/./dotdot/dotdot/./.././../tail", "http://foo.com/bar/quux/tail"},
481	{"http://foo.com/bar/baz", "quux/./dotdot/dotdot/dotdot/./../../.././././tail", "http://foo.com/bar/quux/tail"},
482	{"http://foo.com/bar/baz", "quux/./dotdot/../dotdot/../dot/./tail/..", "http://foo.com/bar/quux/dot/"},
483
484	// Remove any dot-segments prior to forming the target URI.
485	// http://tools.ietf.org/html/rfc3986#section-5.2.4
486	{"http://foo.com/dot/./dotdot/../foo/bar", "../baz", "http://foo.com/dot/baz"},
487
488	// Triple dot isn't special
489	{"http://foo.com/bar", "...", "http://foo.com/..."},
490
491	// Fragment
492	{"http://foo.com/bar", ".#frag", "http://foo.com/#frag"},
493
494	// RFC 3986: Normal Examples
495	// http://tools.ietf.org/html/rfc3986#section-5.4.1
496	{"http://a/b/c/d;p?q", "g:h", "g:h"},
497	{"http://a/b/c/d;p?q", "g", "http://a/b/c/g"},
498	{"http://a/b/c/d;p?q", "./g", "http://a/b/c/g"},
499	{"http://a/b/c/d;p?q", "g/", "http://a/b/c/g/"},
500	{"http://a/b/c/d;p?q", "/g", "http://a/g"},
501	{"http://a/b/c/d;p?q", "//g", "http://g"},
502	{"http://a/b/c/d;p?q", "?y", "http://a/b/c/d;p?y"},
503	{"http://a/b/c/d;p?q", "g?y", "http://a/b/c/g?y"},
504	{"http://a/b/c/d;p?q", "#s", "http://a/b/c/d;p?q#s"},
505	{"http://a/b/c/d;p?q", "g#s", "http://a/b/c/g#s"},
506	{"http://a/b/c/d;p?q", "g?y#s", "http://a/b/c/g?y#s"},
507	{"http://a/b/c/d;p?q", ";x", "http://a/b/c/;x"},
508	{"http://a/b/c/d;p?q", "g;x", "http://a/b/c/g;x"},
509	{"http://a/b/c/d;p?q", "g;x?y#s", "http://a/b/c/g;x?y#s"},
510	{"http://a/b/c/d;p?q", "", "http://a/b/c/d;p?q"},
511	{"http://a/b/c/d;p?q", ".", "http://a/b/c/"},
512	{"http://a/b/c/d;p?q", "./", "http://a/b/c/"},
513	{"http://a/b/c/d;p?q", "..", "http://a/b/"},
514	{"http://a/b/c/d;p?q", "../", "http://a/b/"},
515	{"http://a/b/c/d;p?q", "../g", "http://a/b/g"},
516	{"http://a/b/c/d;p?q", "../..", "http://a/"},
517	{"http://a/b/c/d;p?q", "../../", "http://a/"},
518	{"http://a/b/c/d;p?q", "../../g", "http://a/g"},
519
520	// RFC 3986: Abnormal Examples
521	// http://tools.ietf.org/html/rfc3986#section-5.4.2
522	{"http://a/b/c/d;p?q", "../../../g", "http://a/g"},
523	{"http://a/b/c/d;p?q", "../../../../g", "http://a/g"},
524	{"http://a/b/c/d;p?q", "/./g", "http://a/g"},
525	{"http://a/b/c/d;p?q", "/../g", "http://a/g"},
526	{"http://a/b/c/d;p?q", "g.", "http://a/b/c/g."},
527	{"http://a/b/c/d;p?q", ".g", "http://a/b/c/.g"},
528	{"http://a/b/c/d;p?q", "g..", "http://a/b/c/g.."},
529	{"http://a/b/c/d;p?q", "..g", "http://a/b/c/..g"},
530	{"http://a/b/c/d;p?q", "./../g", "http://a/b/g"},
531	{"http://a/b/c/d;p?q", "./g/.", "http://a/b/c/g/"},
532	{"http://a/b/c/d;p?q", "g/./h", "http://a/b/c/g/h"},
533	{"http://a/b/c/d;p?q", "g/../h", "http://a/b/c/h"},
534	{"http://a/b/c/d;p?q", "g;x=1/./y", "http://a/b/c/g;x=1/y"},
535	{"http://a/b/c/d;p?q", "g;x=1/../y", "http://a/b/c/y"},
536	{"http://a/b/c/d;p?q", "g?y/./x", "http://a/b/c/g?y/./x"},
537	{"http://a/b/c/d;p?q", "g?y/../x", "http://a/b/c/g?y/../x"},
538	{"http://a/b/c/d;p?q", "g#s/./x", "http://a/b/c/g#s/./x"},
539	{"http://a/b/c/d;p?q", "g#s/../x", "http://a/b/c/g#s/../x"},
540
541	// Extras.
542	{"https://a/b/c/d;p?q", "//g?q", "https://g?q"},
543	{"https://a/b/c/d;p?q", "//g#s", "https://g#s"},
544	{"https://a/b/c/d;p?q", "//g/d/e/f?y#s", "https://g/d/e/f?y#s"},
545	{"https://a/b/c/d;p#s", "?y", "https://a/b/c/d;p?y"},
546	{"https://a/b/c/d;p?q#s", "?y", "https://a/b/c/d;p?y"},
547}
548
549func TestResolveReference(t *testing.T) {
550	mustParse := func(url_ string) *url.URL {
551		u, err := url.Parse(url_)
552		if err != nil {
553			t.Fatalf("Expected URL to parse: %q, got error: %v", url_, err)
554		}
555		return u
556	}
557	opaque := &url.URL{Scheme: "scheme", Opaque: "opaque"}
558	for _, test := range resolveReferenceTests {
559		base := mustParse(test.base)
560		rel := mustParse(test.rel)
561		url := base.ResolveReference(rel)
562		if Escape(url) != test.expected {
563			t.Errorf("URL(%q).ResolveReference(%q) == %q, got %q", test.base, test.rel, test.expected, Escape(url))
564		}
565		// Ensure that new instances are returned.
566		if base == url {
567			t.Errorf("Expected URL.ResolveReference to return new URL instance.")
568		}
569		// Test the convenience wrapper too.
570		url, err := base.Parse(test.rel)
571		if err != nil {
572			t.Errorf("URL(%q).Parse(%q) failed: %v", test.base, test.rel, err)
573		} else if Escape(url) != test.expected {
574			t.Errorf("URL(%q).Parse(%q) == %q, got %q", test.base, test.rel, test.expected, Escape(url))
575		} else if base == url {
576			// Ensure that new instances are returned for the wrapper too.
577			t.Errorf("Expected URL.Parse to return new URL instance.")
578		}
579		// Ensure Opaque resets the URL.
580		url = base.ResolveReference(opaque)
581		if *url != *opaque {
582			t.Errorf("ResolveReference failed to resolve opaque URL: want %#v, got %#v", url, opaque)
583		}
584		// Test the convenience wrapper with an opaque URL too.
585		url, err = base.Parse("scheme:opaque")
586		if err != nil {
587			t.Errorf(`URL(%q).Parse("scheme:opaque") failed: %v`, test.base, err)
588		} else if *url != *opaque {
589			t.Errorf("Parse failed to resolve opaque URL: want %#v, got %#v", url, opaque)
590		} else if base == url {
591			// Ensure that new instances are returned, again.
592			t.Errorf("Expected URL.Parse to return new URL instance.")
593		}
594	}
595}
596
597type shouldEscapeTest struct {
598	in     byte
599	mode   encoding
600	escape bool
601}
602
603var shouldEscapeTests = []shouldEscapeTest{
604	// Unreserved characters (§2.3)
605	{'a', encodePath, false},
606	{'a', encodeUserPassword, false},
607	{'a', encodeQueryComponent, false},
608	{'a', encodeFragment, false},
609	{'z', encodePath, false},
610	{'A', encodePath, false},
611	{'Z', encodePath, false},
612	{'0', encodePath, false},
613	{'9', encodePath, false},
614	{'-', encodePath, false},
615	{'-', encodeUserPassword, false},
616	{'-', encodeQueryComponent, false},
617	{'-', encodeFragment, false},
618	{'.', encodePath, false},
619	{'_', encodePath, false},
620	{'~', encodePath, false},
621
622	// User information (§3.2.1)
623	{':', encodeUserPassword, true},
624	{'/', encodeUserPassword, true},
625	{'?', encodeUserPassword, true},
626	{'@', encodeUserPassword, true},
627	{'$', encodeUserPassword, false},
628	{'&', encodeUserPassword, false},
629	{'+', encodeUserPassword, false},
630	{',', encodeUserPassword, false},
631	{';', encodeUserPassword, false},
632	{'=', encodeUserPassword, false},
633}
634
635func TestShouldEscape(t *testing.T) {
636	for _, tt := range shouldEscapeTests {
637		if shouldEscape(tt.in, tt.mode) != tt.escape {
638			t.Errorf("shouldEscape(%q, %v) returned %v; expected %v", tt.in, tt.mode, !tt.escape, tt.escape)
639		}
640	}
641}
642