1// Copyright (C) 2019 Storj Labs, Inc. 2// See LICENSE for copying information. 3 4package trust 5 6import ( 7 "bufio" 8 "context" 9 "io" 10 "net" 11 "net/http" 12 "net/url" 13 "strings" 14 15 "github.com/zeebo/errs" 16) 17 18var ( 19 // ErrHTTPSource is an error class for HTTP source errors. 20 ErrHTTPSource = errs.Class("HTTP source") 21) 22 23// HTTPSource represents a trust source at a http:// or https:// URL. 24type HTTPSource struct { 25 url *url.URL 26} 27 28// NewHTTPSource constructs a new HTTPSource from a URL. The URL must be 29// an http:// or https:// URL. The fragment cannot be set. 30func NewHTTPSource(httpURL string) (*HTTPSource, error) { 31 u, err := url.Parse(httpURL) 32 if err != nil { 33 return nil, ErrHTTPSource.New("%q: not a URL: %w", httpURL, err) 34 } 35 if u.Scheme != "http" && u.Scheme != "https" { 36 return nil, ErrHTTPSource.New("%q: scheme is not supported", httpURL) 37 } 38 if u.Host == "" { 39 return nil, ErrHTTPSource.New(`%q: host is missing`, httpURL) 40 } 41 if u.Fragment != "" { 42 return nil, ErrHTTPSource.New("%q: fragment is not allowed", httpURL) 43 } 44 return &HTTPSource{url: u}, nil 45} 46 47// String implements the Source interface and returns the URL. 48func (source *HTTPSource) String() string { 49 return source.url.String() 50} 51 52// Static implements the Source interface. It returns false for this source. 53func (source *HTTPSource) Static() bool { return false } 54 55// FetchEntries implements the Source interface and returns entries parsed from 56// the list retrieved over HTTP(S). The entries returned are only authoritative 57// if the entry URL has a host that matches or is a subdomain of the source URL. 58func (source *HTTPSource) FetchEntries(ctx context.Context) (_ []Entry, err error) { 59 defer mon.Task()(&ctx)(&err) 60 61 req, err := http.NewRequestWithContext(ctx, "GET", source.url.String(), nil) 62 if err != nil { 63 return nil, ErrHTTPSource.Wrap(err) 64 } 65 66 resp, err := http.DefaultClient.Do(req) 67 if err != nil { 68 return nil, ErrHTTPSource.Wrap(err) 69 } 70 defer func() { 71 // Errors closing the response body can be ignored since they don't 72 // impact the correctness of the function. 73 _ = resp.Body.Close() 74 }() 75 if resp.StatusCode != http.StatusOK { 76 return nil, ErrHTTPSource.New("%q: unexpected status code %d: %q", source.url, resp.StatusCode, tryReadLine(resp.Body)) 77 } 78 79 urls, err := ParseSatelliteURLList(ctx, resp.Body) 80 if err != nil { 81 return nil, ErrHTTPSource.New("cannot parse list at %q: %w", source.url, err) 82 } 83 84 var entries []Entry 85 for _, url := range urls { 86 authoritative := URLMatchesHTTPSourceHost(url.Host, source.url.Hostname()) 87 88 entries = append(entries, Entry{ 89 SatelliteURL: url, 90 Authoritative: authoritative, 91 }) 92 } 93 return entries, nil 94} 95 96// URLMatchesHTTPSourceHost takes the Satellite URL host and the host of the 97// HTTPSource URL and determines if the SatelliteURL matches or is in the 98// same domain as the HTTPSource URL. 99func URLMatchesHTTPSourceHost(urlHost, sourceHost string) bool { 100 urlIP := net.ParseIP(urlHost) 101 sourceIP := net.ParseIP(sourceHost) 102 103 // If one is an IP and the other isn't, then this isn't a match. 104 // TODO: should we resolve the non-IP host and see if it then matches? 105 if (urlIP != nil) != (sourceIP != nil) { 106 return false 107 } 108 109 // Both are IP addresses. Check for equality. 110 if urlIP != nil && sourceIP != nil { 111 return urlIP.Equal(sourceIP) 112 } 113 114 // Both are domain names. Check if the URL host matches or is a subdomain of 115 // the source host. 116 urlHost = normalizeHost(urlHost) 117 sourceHost = normalizeHost(sourceHost) 118 if urlHost == sourceHost { 119 return true 120 } 121 return strings.HasSuffix(urlHost, "."+sourceHost) 122} 123 124func normalizeHost(host string) string { 125 return strings.ToLower(strings.Trim(host, ".")) 126} 127 128func tryReadLine(r io.Reader) string { 129 scanner := bufio.NewScanner(r) 130 scanner.Scan() 131 return scanner.Text() 132} 133