1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build !go1.10
6
7package idna
8
9import "testing"
10
11// TestLabelErrors tests strings returned in case of error. All results should
12// be identical to the reference implementation and can be verified at
13// http://unicode.org/cldr/utility/idna.jsp. The reference implementation,
14// however, seems to not display Bidi and ContextJ errors.
15//
16// In some cases the behavior of browsers is added as a comment. In all cases,
17// whenever a resolve search returns an error here, Chrome will treat the input
18// string as a search string (including those for Bidi and Context J errors),
19// unless noted otherwise.
20func TestLabelErrors(t *testing.T) {
21	encode := func(s string) string { s, _ = encode(acePrefix, s); return s }
22	type kind struct {
23		name string
24		f    func(string) (string, error)
25	}
26	punyA := kind{"PunycodeA", punycode.ToASCII}
27	resolve := kind{"ResolveA", Lookup.ToASCII}
28	display := kind{"ToUnicode", Display.ToUnicode}
29	p := New(VerifyDNSLength(true), MapForLookup(), BidiRule())
30	lengthU := kind{"CheckLengthU", p.ToUnicode}
31	lengthA := kind{"CheckLengthA", p.ToASCII}
32	p = New(MapForLookup(), StrictDomainName(false))
33	std3 := kind{"STD3", p.ToASCII}
34
35	testCases := []struct {
36		kind
37		input   string
38		want    string
39		wantErr string
40	}{
41		{lengthU, "", "", "A4"}, // From UTS 46 conformance test.
42		{lengthA, "", "", "A4"},
43
44		{lengthU, "xn--", "", "A4"},
45		{lengthU, "foo.xn--", "foo.", "A4"}, // TODO: is dropping xn-- correct?
46		{lengthU, "xn--.foo", ".foo", "A4"},
47		{lengthU, "foo.xn--.bar", "foo..bar", "A4"},
48
49		{display, "xn--", "", ""},
50		{display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct?
51		{display, "xn--.foo", ".foo", ""},
52		{display, "foo.xn--.bar", "foo..bar", ""},
53
54		{lengthA, "a..b", "a..b", "A4"},
55		{punyA, ".b", ".b", ""},
56		// For backwards compatibility, the Punycode profile does not map runes.
57		{punyA, "\u3002b", "xn--b-83t", ""},
58		{punyA, "..b", "..b", ""},
59		// Only strip leading empty labels for certain profiles. Stripping
60		// leading empty labels here but not for "empty" punycode above seems
61		// inconsistent, but seems to be applied by both the conformance test
62		// and Chrome. So we turn it off by default, support it as an option,
63		// and enable it in profiles where it seems commonplace.
64		{lengthA, ".b", "b", ""},
65		{lengthA, "\u3002b", "b", ""},
66		{lengthA, "..b", "b", ""},
67		{lengthA, "b..", "b..", ""},
68
69		{resolve, "a..b", "a..b", ""},
70		{resolve, ".b", "b", ""},
71		{resolve, "\u3002b", "b", ""},
72		{resolve, "..b", "b", ""},
73		{resolve, "b..", "b..", ""},
74
75		// Raw punycode
76		{punyA, "", "", ""},
77		{punyA, "*.foo.com", "*.foo.com", ""},
78		{punyA, "Foo.com", "Foo.com", ""},
79
80		// STD3 rules
81		{display, "*.foo.com", "*.foo.com", "P1"},
82		{std3, "*.foo.com", "*.foo.com", ""},
83
84		// Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
85		// Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return
86		// lab9.be.
87		{resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be")
88		{display, "lab⒐be", "lab⒐be", "P1"},
89
90		{resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de"
91		{display, "Plan⒐faß.de", "plan⒐faß.de", "P1"},
92
93		// Chrome 54.0 recognizes the error and treats this input verbatim as a
94		// search string.
95		// Safari 10.0 (non-conform spec) decomposes "⒈" and computes the
96		// punycode on the result using transitional mapping.
97		// Firefox 49.0.1 goes haywire on this string and prints a bunch of what
98		// seems to be nested punycode encodings.
99		{resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"},
100		{display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"},
101
102		{resolve, "a\u200Cb", "ab", ""},
103		{display, "a\u200Cb", "a\u200Cb", "C"},
104
105		{resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"},
106		{display, "a\u200Cb", "a\u200Cb", "C"},
107
108		{resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"},
109		{
110			// Notice how the string gets transformed, even with an error.
111			// Chrome will use the original string if it finds an error, so not
112			// the transformed one.
113			display,
114			"gr\ufecb\ufeae\ufe91\ufef2.de",
115			"gr\u0639\u0631\u0628\u064a.de",
116			"B",
117		},
118
119		{resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ
120		{display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"},
121
122		// normalize input
123		{resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢
124		{display, "a\u0323\u0322", "\u1ea1\u0322", ""},
125
126		// Non-normalized strings are not normalized when they originate from
127		// punycode. Despite the error, Chrome, Safari and Firefox will attempt
128		// to look up the input punycode.
129		{resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"},
130		{display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"},
131	}
132
133	for _, tc := range testCases {
134		doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr)
135	}
136}
137