1// Copyright 2012 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5//go:generate go run gen.go 6 7// Package publicsuffix provides a public suffix list based on data from 8// https://publicsuffix.org/ 9// 10// A public suffix is one under which Internet users can directly register 11// names. It is related to, but different from, a TLD (top level domain). 12// 13// "com" is a TLD (top level domain). Top level means it has no dots. 14// 15// "com" is also a public suffix. Amazon and Google have registered different 16// siblings under that domain: "amazon.com" and "google.com". 17// 18// "au" is another TLD, again because it has no dots. But it's not "amazon.au". 19// Instead, it's "amazon.com.au". 20// 21// "com.au" isn't an actual TLD, because it's not at the top level (it has 22// dots). But it is an eTLD (effective TLD), because that's the branching point 23// for domain name registrars. 24// 25// Another name for "an eTLD" is "a public suffix". Often, what's more of 26// interest is the eTLD+1, or one more label than the public suffix. For 27// example, browsers partition read/write access to HTTP cookies according to 28// the eTLD+1. Web pages served from "amazon.com.au" can't read cookies from 29// "google.com.au", but web pages served from "maps.google.com" can share 30// cookies from "www.google.com", so you don't have to sign into Google Maps 31// separately from signing into Google Web Search. Note that all four of those 32// domains have 3 labels and 2 dots. The first two domains are each an eTLD+1, 33// the last two are not (but share the same eTLD+1: "google.com"). 34// 35// All of these domains have the same eTLD+1: 36// - "www.books.amazon.co.uk" 37// - "books.amazon.co.uk" 38// - "amazon.co.uk" 39// Specifically, the eTLD+1 is "amazon.co.uk", because the eTLD is "co.uk". 40// 41// There is no closed form algorithm to calculate the eTLD of a domain. 42// Instead, the calculation is data driven. This package provides a 43// pre-compiled snapshot of Mozilla's PSL (Public Suffix List) data at 44// https://publicsuffix.org/ 45package publicsuffix // import "golang.org/x/net/publicsuffix" 46 47// TODO: specify case sensitivity and leading/trailing dot behavior for 48// func PublicSuffix and func EffectiveTLDPlusOne. 49 50import ( 51 "fmt" 52 "net/http/cookiejar" 53 "strings" 54) 55 56// List implements the cookiejar.PublicSuffixList interface by calling the 57// PublicSuffix function. 58var List cookiejar.PublicSuffixList = list{} 59 60type list struct{} 61 62func (list) PublicSuffix(domain string) string { 63 ps, _ := PublicSuffix(domain) 64 return ps 65} 66 67func (list) String() string { 68 return version 69} 70 71// PublicSuffix returns the public suffix of the domain using a copy of the 72// publicsuffix.org database compiled into the library. 73// 74// icann is whether the public suffix is managed by the Internet Corporation 75// for Assigned Names and Numbers. If not, the public suffix is either a 76// privately managed domain (and in practice, not a top level domain) or an 77// unmanaged top level domain (and not explicitly mentioned in the 78// publicsuffix.org list). For example, "foo.org" and "foo.co.uk" are ICANN 79// domains, "foo.dyndns.org" and "foo.blogspot.co.uk" are private domains and 80// "cromulent" is an unmanaged top level domain. 81// 82// Use cases for distinguishing ICANN domains like "foo.com" from private 83// domains like "foo.appspot.com" can be found at 84// https://wiki.mozilla.org/Public_Suffix_List/Use_Cases 85func PublicSuffix(domain string) (publicSuffix string, icann bool) { 86 lo, hi := uint32(0), uint32(numTLD) 87 s, suffix, icannNode, wildcard := domain, len(domain), false, false 88loop: 89 for { 90 dot := strings.LastIndex(s, ".") 91 if wildcard { 92 icann = icannNode 93 suffix = 1 + dot 94 } 95 if lo == hi { 96 break 97 } 98 f := find(s[1+dot:], lo, hi) 99 if f == notFound { 100 break 101 } 102 103 u := nodes[f] >> (nodesBitsTextOffset + nodesBitsTextLength) 104 icannNode = u&(1<<nodesBitsICANN-1) != 0 105 u >>= nodesBitsICANN 106 u = children[u&(1<<nodesBitsChildren-1)] 107 lo = u & (1<<childrenBitsLo - 1) 108 u >>= childrenBitsLo 109 hi = u & (1<<childrenBitsHi - 1) 110 u >>= childrenBitsHi 111 switch u & (1<<childrenBitsNodeType - 1) { 112 case nodeTypeNormal: 113 suffix = 1 + dot 114 case nodeTypeException: 115 suffix = 1 + len(s) 116 break loop 117 } 118 u >>= childrenBitsNodeType 119 wildcard = u&(1<<childrenBitsWildcard-1) != 0 120 if !wildcard { 121 icann = icannNode 122 } 123 124 if dot == -1 { 125 break 126 } 127 s = s[:dot] 128 } 129 if suffix == len(domain) { 130 // If no rules match, the prevailing rule is "*". 131 return domain[1+strings.LastIndex(domain, "."):], icann 132 } 133 return domain[suffix:], icann 134} 135 136const notFound uint32 = 1<<32 - 1 137 138// find returns the index of the node in the range [lo, hi) whose label equals 139// label, or notFound if there is no such node. The range is assumed to be in 140// strictly increasing node label order. 141func find(label string, lo, hi uint32) uint32 { 142 for lo < hi { 143 mid := lo + (hi-lo)/2 144 s := nodeLabel(mid) 145 if s < label { 146 lo = mid + 1 147 } else if s == label { 148 return mid 149 } else { 150 hi = mid 151 } 152 } 153 return notFound 154} 155 156// nodeLabel returns the label for the i'th node. 157func nodeLabel(i uint32) string { 158 x := nodes[i] 159 length := x & (1<<nodesBitsTextLength - 1) 160 x >>= nodesBitsTextLength 161 offset := x & (1<<nodesBitsTextOffset - 1) 162 return text[offset : offset+length] 163} 164 165// EffectiveTLDPlusOne returns the effective top level domain plus one more 166// label. For example, the eTLD+1 for "foo.bar.golang.org" is "golang.org". 167func EffectiveTLDPlusOne(domain string) (string, error) { 168 if strings.HasPrefix(domain, ".") || strings.HasSuffix(domain, ".") || strings.Contains(domain, "..") { 169 return "", fmt.Errorf("publicsuffix: empty label in domain %q", domain) 170 } 171 172 suffix, _ := PublicSuffix(domain) 173 if len(domain) <= len(suffix) { 174 return "", fmt.Errorf("publicsuffix: cannot derive eTLD+1 for domain %q", domain) 175 } 176 i := len(domain) - len(suffix) - 1 177 if domain[i] != '.' { 178 return "", fmt.Errorf("publicsuffix: invalid public suffix %q for domain %q", suffix, domain) 179 } 180 return domain[1+strings.LastIndex(domain[:i], "."):], nil 181} 182