1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package publicsuffix
6
7import (
8	"sort"
9	"strings"
10	"testing"
11)
12
13func TestNodeLabel(t *testing.T) {
14	for i, want := range nodeLabels {
15		got := nodeLabel(uint32(i))
16		if got != want {
17			t.Errorf("%d: got %q, want %q", i, got, want)
18		}
19	}
20}
21
22func TestFind(t *testing.T) {
23	testCases := []string{
24		"",
25		"a",
26		"a0",
27		"aaaa",
28		"ao",
29		"ap",
30		"ar",
31		"aro",
32		"arp",
33		"arpa",
34		"arpaa",
35		"arpb",
36		"az",
37		"b",
38		"b0",
39		"ba",
40		"z",
41		"zu",
42		"zv",
43		"zw",
44		"zx",
45		"zy",
46		"zz",
47		"zzzz",
48	}
49	for _, tc := range testCases {
50		got := find(tc, 0, numTLD)
51		want := notFound
52		for i := uint32(0); i < numTLD; i++ {
53			if tc == nodeLabel(i) {
54				want = i
55				break
56			}
57		}
58		if got != want {
59			t.Errorf("%q: got %d, want %d", tc, got, want)
60		}
61	}
62}
63
64func TestICANN(t *testing.T) {
65	testCases := map[string]bool{
66		"foo.org":            true,
67		"foo.co.uk":          true,
68		"foo.dyndns.org":     false,
69		"foo.go.dyndns.org":  false,
70		"foo.blogspot.co.uk": false,
71		"foo.intranet":       false,
72	}
73	for domain, want := range testCases {
74		_, got := PublicSuffix(domain)
75		if got != want {
76			t.Errorf("%q: got %v, want %v", domain, got, want)
77		}
78	}
79}
80
81var publicSuffixTestCases = []struct {
82	domain    string
83	wantPS    string
84	wantICANN bool
85}{
86	// Empty string.
87	{"", "", false},
88
89	// The .ao rules are:
90	// ao
91	// ed.ao
92	// gv.ao
93	// og.ao
94	// co.ao
95	// pb.ao
96	// it.ao
97	{"ao", "ao", true},
98	{"www.ao", "ao", true},
99	{"pb.ao", "pb.ao", true},
100	{"www.pb.ao", "pb.ao", true},
101	{"www.xxx.yyy.zzz.pb.ao", "pb.ao", true},
102
103	// The .ar rules are:
104	// ar
105	// com.ar
106	// edu.ar
107	// gob.ar
108	// gov.ar
109	// int.ar
110	// mil.ar
111	// net.ar
112	// org.ar
113	// tur.ar
114	// blogspot.com.ar (in the PRIVATE DOMAIN section).
115	{"ar", "ar", true},
116	{"www.ar", "ar", true},
117	{"nic.ar", "ar", true},
118	{"www.nic.ar", "ar", true},
119	{"com.ar", "com.ar", true},
120	{"www.com.ar", "com.ar", true},
121	{"blogspot.com.ar", "blogspot.com.ar", false},                 // PRIVATE DOMAIN.
122	{"www.blogspot.com.ar", "blogspot.com.ar", false},             // PRIVATE DOMAIN.
123	{"www.xxx.yyy.zzz.blogspot.com.ar", "blogspot.com.ar", false}, // PRIVATE DOMAIN.
124	{"logspot.com.ar", "com.ar", true},
125	{"zlogspot.com.ar", "com.ar", true},
126	{"zblogspot.com.ar", "com.ar", true},
127
128	// The .arpa rules are:
129	// arpa
130	// e164.arpa
131	// in-addr.arpa
132	// ip6.arpa
133	// iris.arpa
134	// uri.arpa
135	// urn.arpa
136	{"arpa", "arpa", true},
137	{"www.arpa", "arpa", true},
138	{"urn.arpa", "urn.arpa", true},
139	{"www.urn.arpa", "urn.arpa", true},
140	{"www.xxx.yyy.zzz.urn.arpa", "urn.arpa", true},
141
142	// The relevant {kobe,kyoto}.jp rules are:
143	// jp
144	// *.kobe.jp
145	// !city.kobe.jp
146	// kyoto.jp
147	// ide.kyoto.jp
148	{"jp", "jp", true},
149	{"kobe.jp", "jp", true},
150	{"c.kobe.jp", "c.kobe.jp", true},
151	{"b.c.kobe.jp", "c.kobe.jp", true},
152	{"a.b.c.kobe.jp", "c.kobe.jp", true},
153	{"city.kobe.jp", "kobe.jp", true},
154	{"www.city.kobe.jp", "kobe.jp", true},
155	{"kyoto.jp", "kyoto.jp", true},
156	{"test.kyoto.jp", "kyoto.jp", true},
157	{"ide.kyoto.jp", "ide.kyoto.jp", true},
158	{"b.ide.kyoto.jp", "ide.kyoto.jp", true},
159	{"a.b.ide.kyoto.jp", "ide.kyoto.jp", true},
160
161	// The .tw rules are:
162	// tw
163	// edu.tw
164	// gov.tw
165	// mil.tw
166	// com.tw
167	// net.tw
168	// org.tw
169	// idv.tw
170	// game.tw
171	// ebiz.tw
172	// club.tw
173	// 網路.tw (xn--zf0ao64a.tw)
174	// 組織.tw (xn--uc0atv.tw)
175	// 商業.tw (xn--czrw28b.tw)
176	// blogspot.tw
177	{"tw", "tw", true},
178	{"aaa.tw", "tw", true},
179	{"www.aaa.tw", "tw", true},
180	{"xn--czrw28b.aaa.tw", "tw", true},
181	{"edu.tw", "edu.tw", true},
182	{"www.edu.tw", "edu.tw", true},
183	{"xn--czrw28b.edu.tw", "edu.tw", true},
184	{"xn--czrw28b.tw", "xn--czrw28b.tw", true},
185	{"www.xn--czrw28b.tw", "xn--czrw28b.tw", true},
186	{"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw", true},
187	{"xn--kpry57d.tw", "tw", true},
188
189	// The .uk rules are:
190	// uk
191	// ac.uk
192	// co.uk
193	// gov.uk
194	// ltd.uk
195	// me.uk
196	// net.uk
197	// nhs.uk
198	// org.uk
199	// plc.uk
200	// police.uk
201	// *.sch.uk
202	// blogspot.co.uk (in the PRIVATE DOMAIN section).
203	{"uk", "uk", true},
204	{"aaa.uk", "uk", true},
205	{"www.aaa.uk", "uk", true},
206	{"mod.uk", "uk", true},
207	{"www.mod.uk", "uk", true},
208	{"sch.uk", "uk", true},
209	{"mod.sch.uk", "mod.sch.uk", true},
210	{"www.sch.uk", "www.sch.uk", true},
211	{"co.uk", "co.uk", true},
212	{"www.co.uk", "co.uk", true},
213	{"blogspot.co.uk", "blogspot.co.uk", false}, // PRIVATE DOMAIN.
214	{"blogspot.nic.uk", "uk", true},
215	{"blogspot.sch.uk", "blogspot.sch.uk", true},
216
217	// The .рф rules are
218	// рф (xn--p1ai)
219	{"xn--p1ai", "xn--p1ai", true},
220	{"aaa.xn--p1ai", "xn--p1ai", true},
221	{"www.xxx.yyy.xn--p1ai", "xn--p1ai", true},
222
223	// The .bd rules are:
224	// *.bd
225	{"bd", "bd", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†).
226	{"www.bd", "www.bd", true},
227	{"xxx.www.bd", "www.bd", true},
228	{"zzz.bd", "zzz.bd", true},
229	{"www.zzz.bd", "zzz.bd", true},
230	{"www.xxx.yyy.zzz.bd", "zzz.bd", true},
231
232	// The .ck rules are:
233	// *.ck
234	// !www.ck
235	{"ck", "ck", false}, // The catch-all "*" rule is not in the ICANN DOMAIN section. See footnote (†).
236	{"www.ck", "ck", true},
237	{"xxx.www.ck", "ck", true},
238	{"zzz.ck", "zzz.ck", true},
239	{"www.zzz.ck", "zzz.ck", true},
240	{"www.xxx.yyy.zzz.ck", "zzz.ck", true},
241
242	// The .myjino.ru rules (in the PRIVATE DOMAIN section) are:
243	// myjino.ru
244	// *.hosting.myjino.ru
245	// *.landing.myjino.ru
246	// *.spectrum.myjino.ru
247	// *.vps.myjino.ru
248	{"myjino.ru", "myjino.ru", false},
249	{"aaa.myjino.ru", "myjino.ru", false},
250	{"bbb.ccc.myjino.ru", "myjino.ru", false},
251	{"hosting.ddd.myjino.ru", "myjino.ru", false},
252	{"landing.myjino.ru", "myjino.ru", false},
253	{"www.landing.myjino.ru", "www.landing.myjino.ru", false},
254	{"spectrum.vps.myjino.ru", "spectrum.vps.myjino.ru", false},
255
256	// The .uberspace.de rules (in the PRIVATE DOMAIN section) are:
257	// *.uberspace.de
258	{"uberspace.de", "de", true}, // "de" is in the ICANN DOMAIN section. See footnote (†).
259	{"aaa.uberspace.de", "aaa.uberspace.de", false},
260	{"bbb.ccc.uberspace.de", "ccc.uberspace.de", false},
261
262	// There are no .nosuchtld rules.
263	{"nosuchtld", "nosuchtld", false},
264	{"foo.nosuchtld", "nosuchtld", false},
265	{"bar.foo.nosuchtld", "nosuchtld", false},
266
267	// (†) There is some disagreement on how wildcards behave: what should the
268	// public suffix of "platform.sh" be when both "*.platform.sh" and "sh" is
269	// in the PSL, but "platform.sh" is not? Two possible answers are
270	// "platform.sh" and "sh", there are valid arguments for either behavior,
271	// and different browsers have implemented different behaviors.
272	//
273	// This implementation, Go's golang.org/x/net/publicsuffix, returns "sh",
274	// the same as a literal interpretation of the "Formal Algorithm" section
275	// of https://publicsuffix.org/list/
276	//
277	// Together, the TestPublicSuffix and TestSlowPublicSuffix tests check that
278	// the Go implementation (func PublicSuffix in list.go) and the literal
279	// interpretation (func slowPublicSuffix in list_test.go) produce the same
280	// (golden) results on every test case in this publicSuffixTestCases slice,
281	// including some "platform.sh" style cases.
282	//
283	// More discussion of "the platform.sh problem" is at:
284	//  - https://github.com/publicsuffix/list/issues/694
285	//  - https://bugzilla.mozilla.org/show_bug.cgi?id=1124625#c6
286	//  - https://wiki.mozilla.org/Public_Suffix_List/platform.sh_Problem
287}
288
289func BenchmarkPublicSuffix(b *testing.B) {
290	for i := 0; i < b.N; i++ {
291		for _, tc := range publicSuffixTestCases {
292			List.PublicSuffix(tc.domain)
293		}
294	}
295}
296
297func TestPublicSuffix(t *testing.T) {
298	for _, tc := range publicSuffixTestCases {
299		gotPS, gotICANN := PublicSuffix(tc.domain)
300		if gotPS != tc.wantPS || gotICANN != tc.wantICANN {
301			t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN)
302		}
303	}
304}
305
306func TestSlowPublicSuffix(t *testing.T) {
307	for _, tc := range publicSuffixTestCases {
308		gotPS, gotICANN := slowPublicSuffix(tc.domain)
309		if gotPS != tc.wantPS || gotICANN != tc.wantICANN {
310			t.Errorf("%q: got (%q, %t), want (%q, %t)", tc.domain, gotPS, gotICANN, tc.wantPS, tc.wantICANN)
311		}
312	}
313}
314
315func TestNumICANNRules(t *testing.T) {
316	if numICANNRules <= 0 {
317		t.Fatal("no ICANN rules")
318	}
319	if numICANNRules >= len(rules) {
320		t.Fatal("no Private rules")
321	}
322	// Check the last ICANN and first Private rules. If the underlying public
323	// suffix list changes, we may need to update these hard-coded checks.
324	if got, want := rules[numICANNRules-1], "zuerich"; got != want {
325		t.Errorf("last ICANN rule: got %q, wawnt %q", got, want)
326	}
327	if got, want := rules[numICANNRules], "cc.ua"; got != want {
328		t.Errorf("first Private rule: got %q, wawnt %q", got, want)
329	}
330}
331
332type slowPublicSuffixRule struct {
333	ruleParts []string
334	icann     bool
335}
336
337// slowPublicSuffix implements the canonical (but O(number of rules)) public
338// suffix algorithm described at http://publicsuffix.org/list/.
339//
340// 1. Match domain against all rules and take note of the matching ones.
341// 2. If no rules match, the prevailing rule is "*".
342// 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
343// 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
344// 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
345// 6. The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
346// 7. The registered or registrable domain is the public suffix plus one additional label.
347//
348// This function returns the public suffix, not the registrable domain, and so
349// it stops after step 6.
350func slowPublicSuffix(domain string) (string, bool) {
351	match := func(rulePart, domainPart string) bool {
352		switch rulePart[0] {
353		case '*':
354			return true
355		case '!':
356			return rulePart[1:] == domainPart
357		}
358		return rulePart == domainPart
359	}
360
361	domainParts := strings.Split(domain, ".")
362	var matchingRules []slowPublicSuffixRule
363
364loop:
365	for i, rule := range rules {
366		ruleParts := strings.Split(rule, ".")
367		if len(domainParts) < len(ruleParts) {
368			continue
369		}
370		for i := range ruleParts {
371			rulePart := ruleParts[len(ruleParts)-1-i]
372			domainPart := domainParts[len(domainParts)-1-i]
373			if !match(rulePart, domainPart) {
374				continue loop
375			}
376		}
377		matchingRules = append(matchingRules, slowPublicSuffixRule{
378			ruleParts: ruleParts,
379			icann:     i < numICANNRules,
380		})
381	}
382	if len(matchingRules) == 0 {
383		matchingRules = append(matchingRules, slowPublicSuffixRule{
384			ruleParts: []string{"*"},
385			icann:     false,
386		})
387	} else {
388		sort.Sort(byPriority(matchingRules))
389	}
390
391	prevailing := matchingRules[0]
392	if prevailing.ruleParts[0][0] == '!' {
393		prevailing.ruleParts = prevailing.ruleParts[1:]
394	}
395	if prevailing.ruleParts[0][0] == '*' {
396		replaced := domainParts[len(domainParts)-len(prevailing.ruleParts)]
397		prevailing.ruleParts = append([]string{replaced}, prevailing.ruleParts[1:]...)
398	}
399	return strings.Join(prevailing.ruleParts, "."), prevailing.icann
400}
401
402type byPriority []slowPublicSuffixRule
403
404func (b byPriority) Len() int      { return len(b) }
405func (b byPriority) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
406func (b byPriority) Less(i, j int) bool {
407	if b[i].ruleParts[0][0] == '!' {
408		return true
409	}
410	if b[j].ruleParts[0][0] == '!' {
411		return false
412	}
413	return len(b[i].ruleParts) > len(b[j].ruleParts)
414}
415
416// eTLDPlusOneTestCases come from
417// https://github.com/publicsuffix/list/blob/master/tests/test_psl.txt
418var eTLDPlusOneTestCases = []struct {
419	domain, want string
420}{
421	// Empty input.
422	{"", ""},
423	// Unlisted TLD.
424	{"example", ""},
425	{"example.example", "example.example"},
426	{"b.example.example", "example.example"},
427	{"a.b.example.example", "example.example"},
428	// TLD with only 1 rule.
429	{"biz", ""},
430	{"domain.biz", "domain.biz"},
431	{"b.domain.biz", "domain.biz"},
432	{"a.b.domain.biz", "domain.biz"},
433	// TLD with some 2-level rules.
434	{"com", ""},
435	{"example.com", "example.com"},
436	{"b.example.com", "example.com"},
437	{"a.b.example.com", "example.com"},
438	{"uk.com", ""},
439	{"example.uk.com", "example.uk.com"},
440	{"b.example.uk.com", "example.uk.com"},
441	{"a.b.example.uk.com", "example.uk.com"},
442	{"test.ac", "test.ac"},
443	// TLD with only 1 (wildcard) rule.
444	{"mm", ""},
445	{"c.mm", ""},
446	{"b.c.mm", "b.c.mm"},
447	{"a.b.c.mm", "b.c.mm"},
448	// More complex TLD.
449	{"jp", ""},
450	{"test.jp", "test.jp"},
451	{"www.test.jp", "test.jp"},
452	{"ac.jp", ""},
453	{"test.ac.jp", "test.ac.jp"},
454	{"www.test.ac.jp", "test.ac.jp"},
455	{"kyoto.jp", ""},
456	{"test.kyoto.jp", "test.kyoto.jp"},
457	{"ide.kyoto.jp", ""},
458	{"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
459	{"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
460	{"c.kobe.jp", ""},
461	{"b.c.kobe.jp", "b.c.kobe.jp"},
462	{"a.b.c.kobe.jp", "b.c.kobe.jp"},
463	{"city.kobe.jp", "city.kobe.jp"},
464	{"www.city.kobe.jp", "city.kobe.jp"},
465	// TLD with a wildcard rule and exceptions.
466	{"ck", ""},
467	{"test.ck", ""},
468	{"b.test.ck", "b.test.ck"},
469	{"a.b.test.ck", "b.test.ck"},
470	{"www.ck", "www.ck"},
471	{"www.www.ck", "www.ck"},
472	// US K12.
473	{"us", ""},
474	{"test.us", "test.us"},
475	{"www.test.us", "test.us"},
476	{"ak.us", ""},
477	{"test.ak.us", "test.ak.us"},
478	{"www.test.ak.us", "test.ak.us"},
479	{"k12.ak.us", ""},
480	{"test.k12.ak.us", "test.k12.ak.us"},
481	{"www.test.k12.ak.us", "test.k12.ak.us"},
482	// Punycoded IDN labels
483	{"xn--85x722f.com.cn", "xn--85x722f.com.cn"},
484	{"xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
485	{"www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn"},
486	{"shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn"},
487	{"xn--55qx5d.cn", ""},
488	{"xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
489	{"www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s"},
490	{"shishi.xn--fiqs8s", "shishi.xn--fiqs8s"},
491	{"xn--fiqs8s", ""},
492
493	// Invalid input
494	{".", ""},
495	{"de.", ""},
496	{".de", ""},
497	{".com.au", ""},
498	{"com.au.", ""},
499	{"com..au", ""},
500}
501
502func TestEffectiveTLDPlusOne(t *testing.T) {
503	for _, tc := range eTLDPlusOneTestCases {
504		got, _ := EffectiveTLDPlusOne(tc.domain)
505		if got != tc.want {
506			t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
507		}
508	}
509}
510