1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:generate go run gen.go
6
7// Package publicsuffix provides a public suffix list based on data from
8// https://publicsuffix.org/
9//
10// A public suffix is one under which Internet users can directly register
11// names. It is related to, but different from, a TLD (top level domain).
12//
13// "com" is a TLD (top level domain). Top level means it has no dots.
14//
15// "com" is also a public suffix. Amazon and Google have registered different
16// siblings under that domain: "amazon.com" and "google.com".
17//
18// "au" is another TLD, again because it has no dots. But it's not "amazon.au".
19// Instead, it's "amazon.com.au".
20//
21// "com.au" isn't an actual TLD, because it's not at the top level (it has
22// dots). But it is an eTLD (effective TLD), because that's the branching point
23// for domain name registrars.
24//
25// Another name for "an eTLD" is "a public suffix". Often, what's more of
26// interest is the eTLD+1, or one more label than the public suffix. For
27// example, browsers partition read/write access to HTTP cookies according to
28// the eTLD+1. Web pages served from "amazon.com.au" can't read cookies from
29// "google.com.au", but web pages served from "maps.google.com" can share
30// cookies from "www.google.com", so you don't have to sign into Google Maps
31// separately from signing into Google Web Search. Note that all four of those
32// domains have 3 labels and 2 dots. The first two domains are each an eTLD+1,
33// the last two are not (but share the same eTLD+1: "google.com").
34//
35// All of these domains have the same eTLD+1:
36//  - "www.books.amazon.co.uk"
37//  - "books.amazon.co.uk"
38//  - "amazon.co.uk"
39// Specifically, the eTLD+1 is "amazon.co.uk", because the eTLD is "co.uk".
40//
41// There is no closed form algorithm to calculate the eTLD of a domain.
42// Instead, the calculation is data driven. This package provides a
43// pre-compiled snapshot of Mozilla's PSL (Public Suffix List) data at
44// https://publicsuffix.org/
45package publicsuffix // import "golang.org/x/net/publicsuffix"
46
47// TODO: specify case sensitivity and leading/trailing dot behavior for
48// func PublicSuffix and func EffectiveTLDPlusOne.
49
50import (
51	"fmt"
52	"net/http/cookiejar"
53	"strings"
54)
55
56// List implements the cookiejar.PublicSuffixList interface by calling the
57// PublicSuffix function.
58var List cookiejar.PublicSuffixList = list{}
59
60type list struct{}
61
62func (list) PublicSuffix(domain string) string {
63	ps, _ := PublicSuffix(domain)
64	return ps
65}
66
67func (list) String() string {
68	return version
69}
70
71// PublicSuffix returns the public suffix of the domain using a copy of the
72// publicsuffix.org database compiled into the library.
73//
74// icann is whether the public suffix is managed by the Internet Corporation
75// for Assigned Names and Numbers. If not, the public suffix is either a
76// privately managed domain (and in practice, not a top level domain) or an
77// unmanaged top level domain (and not explicitly mentioned in the
78// publicsuffix.org list). For example, "foo.org" and "foo.co.uk" are ICANN
79// domains, "foo.dyndns.org" and "foo.blogspot.co.uk" are private domains and
80// "cromulent" is an unmanaged top level domain.
81//
82// Use cases for distinguishing ICANN domains like "foo.com" from private
83// domains like "foo.appspot.com" can be found at
84// https://wiki.mozilla.org/Public_Suffix_List/Use_Cases
85func PublicSuffix(domain string) (publicSuffix string, icann bool) {
86	lo, hi := uint32(0), uint32(numTLD)
87	s, suffix, icannNode, wildcard := domain, len(domain), false, false
88loop:
89	for {
90		dot := strings.LastIndex(s, ".")
91		if wildcard {
92			icann = icannNode
93			suffix = 1 + dot
94		}
95		if lo == hi {
96			break
97		}
98		f := find(s[1+dot:], lo, hi)
99		if f == notFound {
100			break
101		}
102
103		u := nodes[f] >> (nodesBitsTextOffset + nodesBitsTextLength)
104		icannNode = u&(1<<nodesBitsICANN-1) != 0
105		u >>= nodesBitsICANN
106		u = children[u&(1<<nodesBitsChildren-1)]
107		lo = u & (1<<childrenBitsLo - 1)
108		u >>= childrenBitsLo
109		hi = u & (1<<childrenBitsHi - 1)
110		u >>= childrenBitsHi
111		switch u & (1<<childrenBitsNodeType - 1) {
112		case nodeTypeNormal:
113			suffix = 1 + dot
114		case nodeTypeException:
115			suffix = 1 + len(s)
116			break loop
117		}
118		u >>= childrenBitsNodeType
119		wildcard = u&(1<<childrenBitsWildcard-1) != 0
120		if !wildcard {
121			icann = icannNode
122		}
123
124		if dot == -1 {
125			break
126		}
127		s = s[:dot]
128	}
129	if suffix == len(domain) {
130		// If no rules match, the prevailing rule is "*".
131		return domain[1+strings.LastIndex(domain, "."):], icann
132	}
133	return domain[suffix:], icann
134}
135
136const notFound uint32 = 1<<32 - 1
137
138// find returns the index of the node in the range [lo, hi) whose label equals
139// label, or notFound if there is no such node. The range is assumed to be in
140// strictly increasing node label order.
141func find(label string, lo, hi uint32) uint32 {
142	for lo < hi {
143		mid := lo + (hi-lo)/2
144		s := nodeLabel(mid)
145		if s < label {
146			lo = mid + 1
147		} else if s == label {
148			return mid
149		} else {
150			hi = mid
151		}
152	}
153	return notFound
154}
155
156// nodeLabel returns the label for the i'th node.
157func nodeLabel(i uint32) string {
158	x := nodes[i]
159	length := x & (1<<nodesBitsTextLength - 1)
160	x >>= nodesBitsTextLength
161	offset := x & (1<<nodesBitsTextOffset - 1)
162	return text[offset : offset+length]
163}
164
165// EffectiveTLDPlusOne returns the effective top level domain plus one more
166// label. For example, the eTLD+1 for "foo.bar.golang.org" is "golang.org".
167func EffectiveTLDPlusOne(domain string) (string, error) {
168	if strings.HasPrefix(domain, ".") || strings.HasSuffix(domain, ".") || strings.Contains(domain, "..") {
169		return "", fmt.Errorf("publicsuffix: empty label in domain %q", domain)
170	}
171
172	suffix, _ := PublicSuffix(domain)
173	if len(domain) <= len(suffix) {
174		return "", fmt.Errorf("publicsuffix: cannot derive eTLD+1 for domain %q", domain)
175	}
176	i := len(domain) - len(suffix) - 1
177	if domain[i] != '.' {
178		return "", fmt.Errorf("publicsuffix: invalid public suffix %q for domain %q", suffix, domain)
179	}
180	return domain[1+strings.LastIndex(domain[:i], "."):], nil
181}
182