1// Copyright (C) 2019 Storj Labs, Inc.
2// See LICENSE for copying information.
3
4package trust
5
6import (
7	"bufio"
8	"context"
9	"io"
10	"net"
11	"net/http"
12	"net/url"
13	"strings"
14
15	"github.com/zeebo/errs"
16)
17
18var (
19	// ErrHTTPSource is an error class for HTTP source errors.
20	ErrHTTPSource = errs.Class("HTTP source")
21)
22
23// HTTPSource represents a trust source at a http:// or https:// URL.
24type HTTPSource struct {
25	url *url.URL
26}
27
28// NewHTTPSource constructs a new HTTPSource from a URL. The URL must be
29// an http:// or https:// URL. The fragment cannot be set.
30func NewHTTPSource(httpURL string) (*HTTPSource, error) {
31	u, err := url.Parse(httpURL)
32	if err != nil {
33		return nil, ErrHTTPSource.New("%q: not a URL: %w", httpURL, err)
34	}
35	if u.Scheme != "http" && u.Scheme != "https" {
36		return nil, ErrHTTPSource.New("%q: scheme is not supported", httpURL)
37	}
38	if u.Host == "" {
39		return nil, ErrHTTPSource.New(`%q: host is missing`, httpURL)
40	}
41	if u.Fragment != "" {
42		return nil, ErrHTTPSource.New("%q: fragment is not allowed", httpURL)
43	}
44	return &HTTPSource{url: u}, nil
45}
46
47// String implements the Source interface and returns the URL.
48func (source *HTTPSource) String() string {
49	return source.url.String()
50}
51
52// Static implements the Source interface. It returns false for this source.
53func (source *HTTPSource) Static() bool { return false }
54
55// FetchEntries implements the Source interface and returns entries parsed from
56// the list retrieved over HTTP(S). The entries returned are only authoritative
57// if the entry URL has a host that matches or is a subdomain of the source URL.
58func (source *HTTPSource) FetchEntries(ctx context.Context) (_ []Entry, err error) {
59	defer mon.Task()(&ctx)(&err)
60
61	req, err := http.NewRequestWithContext(ctx, "GET", source.url.String(), nil)
62	if err != nil {
63		return nil, ErrHTTPSource.Wrap(err)
64	}
65
66	resp, err := http.DefaultClient.Do(req)
67	if err != nil {
68		return nil, ErrHTTPSource.Wrap(err)
69	}
70	defer func() {
71		// Errors closing the response body can be ignored since they don't
72		// impact the correctness of the function.
73		_ = resp.Body.Close()
74	}()
75	if resp.StatusCode != http.StatusOK {
76		return nil, ErrHTTPSource.New("%q: unexpected status code %d: %q", source.url, resp.StatusCode, tryReadLine(resp.Body))
77	}
78
79	urls, err := ParseSatelliteURLList(ctx, resp.Body)
80	if err != nil {
81		return nil, ErrHTTPSource.New("cannot parse list at %q: %w", source.url, err)
82	}
83
84	var entries []Entry
85	for _, url := range urls {
86		authoritative := URLMatchesHTTPSourceHost(url.Host, source.url.Hostname())
87
88		entries = append(entries, Entry{
89			SatelliteURL:  url,
90			Authoritative: authoritative,
91		})
92	}
93	return entries, nil
94}
95
96// URLMatchesHTTPSourceHost takes the Satellite URL host and the host of the
97// HTTPSource URL and determines if the SatelliteURL matches or is in the
98// same domain as the HTTPSource URL.
99func URLMatchesHTTPSourceHost(urlHost, sourceHost string) bool {
100	urlIP := net.ParseIP(urlHost)
101	sourceIP := net.ParseIP(sourceHost)
102
103	// If one is an IP and the other isn't, then this isn't a match.
104	// TODO: should we resolve the non-IP host and see if it then matches?
105	if (urlIP != nil) != (sourceIP != nil) {
106		return false
107	}
108
109	// Both are IP addresses. Check for equality.
110	if urlIP != nil && sourceIP != nil {
111		return urlIP.Equal(sourceIP)
112	}
113
114	// Both are domain names. Check if the URL host matches or is a subdomain of
115	// the source host.
116	urlHost = normalizeHost(urlHost)
117	sourceHost = normalizeHost(sourceHost)
118	if urlHost == sourceHost {
119		return true
120	}
121	return strings.HasSuffix(urlHost, "."+sourceHost)
122}
123
124func normalizeHost(host string) string {
125	return strings.ToLower(strings.Trim(host, "."))
126}
127
128func tryReadLine(r io.Reader) string {
129	scanner := bufio.NewScanner(r)
130	scanner.Scan()
131	return scanner.Text()
132}
133