1// Package sessionresolver contains the resolver used by the session. This
2// resolver will try to figure out which is the best service for running
3// domain name resolutions and will consistently use it.
4//
5// Occasionally this code will also swap the best resolver with other
6// ~good resolvers to give them a chance to perform.
7//
8// The penalty/reward mechanism is strongly derivative, so the code should
9// adapt ~quickly to changing network conditions. Occasionally, we will
10// have longer resolutions when trying out other resolvers.
11//
12// At the beginning we randomize the known resolvers so that we do not
13// have any preferential ordering. The initial resolutions may be slower
14// if there are many issues with resolvers.
15//
16// The system resolver is given the lowest priority at the beginning
17// but it will of course be the most popular resolver if anything else
18// is failing us. (We will still occasionally probe for other working
19// resolvers and increase their score on success.)
20//
21// We also support a socks5 proxy. When such a proxy is configured,
22// the code WILL skip http3 resolvers AS WELL AS the system
23// resolver, in an attempt to avoid leaking your queries.
24package sessionresolver
25
26import (
27	"context"
28	"encoding/json"
29	"errors"
30	"fmt"
31	"math/rand"
32	"net/url"
33	"sync"
34	"time"
35
36	"github.com/ooni/probe-cli/v3/internal/engine/internal/multierror"
37	"github.com/ooni/probe-cli/v3/internal/engine/netx/bytecounter"
38	"github.com/ooni/probe-cli/v3/internal/engine/runtimex"
39)
40
41// Resolver is the session resolver. Resolver will try to use
42// a bunch of DoT/DoH resolvers before falling back to the
43// system resolver. The relative priorities of the resolver
44// are stored onto the KVStore such that we can remember them
45// and therefore we can generally give preference to underlying
46// DoT/DoH resolvers that work better.
47//
48// You MUST NOT modify public fields of this structure once it
49// has been created, because that MAY lead to data races.
50//
51// You should create an instance of this structure and use
52// it in internal/engine/session.go.
53type Resolver struct {
54	// ByteCounter is the optional byte counter. It will count
55	// the bytes used by any child resolver except for the
56	// system resolver, whose bytes ARE NOT counted. If this
57	// field is not set, then we won't count the bytes.
58	ByteCounter *bytecounter.Counter
59
60	// KVStore is the optional key-value store where you
61	// want us to write statistics about which resolver is
62	// working better in your network. If this field is
63	// not set, then we'll use a in-memory store.
64	KVStore KVStore
65
66	// Logger is the optional logger you want us to use
67	// to emit log messages.
68	Logger Logger
69
70	// ProxyURL is the optional URL of the socks5 proxy
71	// we should be using. If not set, then we WON'T use
72	// any proxy. If set, then we WON'T use any http3
73	// based resolvers and we WON'T use the system resolver.
74	ProxyURL *url.URL
75
76	// codec is the optional codec to use. If not set, we
77	// will construct a default codec.
78	codec codec
79
80	// dnsClientMaker is the optional dnsclientmaker to
81	// use. If not set, we will use the default.
82	dnsClientMaker dnsclientmaker
83
84	// mu provides synchronisation of internal fields.
85	mu sync.Mutex
86
87	// once ensures that CloseIdleConnection is
88	// run just once.
89	once sync.Once
90
91	// res maps a URL to a child resolver. We will
92	// construct child resolvers just once and we
93	// will track them into this field.
94	res map[string]childResolver
95}
96
97// CloseIdleConnections closes the idle connections, if any. This
98// function is guaranteed to be idempotent.
99func (r *Resolver) CloseIdleConnections() {
100	r.once.Do(r.closeall)
101}
102
103// Stats returns stats about the session resolver.
104func (r *Resolver) Stats() string {
105	data, err := json.Marshal(r.readstatedefault())
106	runtimex.PanicOnError(err, "json.Marshal should not fail here")
107	return fmt.Sprintf("sessionresolver: %s", string(data))
108}
109
110// ErrLookupHost indicates that LookupHost failed.
111var ErrLookupHost = errors.New("sessionresolver: LookupHost failed")
112
113// LookupHost implements Resolver.LookupHost. This function returns a
114// multierror.Union error on failure, so you can see individual errors
115// and get a better picture of what's been going wrong.
116func (r *Resolver) LookupHost(ctx context.Context, hostname string) ([]string, error) {
117	state := r.readstatedefault()
118	r.maybeConfusion(state, time.Now().UnixNano())
119	defer r.writestate(state)
120	me := multierror.New(ErrLookupHost)
121	for _, e := range state {
122		if r.ProxyURL != nil && r.shouldSkipWithProxy(e) {
123			r.logger().Infof("sessionresolver: skipping with proxy: %+v", e)
124			continue // we cannot proxy this URL so ignore it
125		}
126		addrs, err := r.lookupHost(ctx, e, hostname)
127		if err == nil {
128			return addrs, nil
129		}
130		me.Add(&errwrapper{error: err, URL: e.URL})
131	}
132	return nil, me
133}
134
135func (r *Resolver) shouldSkipWithProxy(e *resolverinfo) bool {
136	URL, err := url.Parse(e.URL)
137	if err != nil {
138		return true // please skip
139	}
140	switch URL.Scheme {
141	case "https", "dot", "tcp":
142		return false // we can handle this
143	default:
144		return true // please skip
145	}
146}
147
148func (r *Resolver) lookupHost(ctx context.Context, ri *resolverinfo, hostname string) ([]string, error) {
149	const ewma = 0.9 // the last sample is very important
150	re, err := r.getresolver(ri.URL)
151	if err != nil {
152		r.logger().Warnf("sessionresolver: getresolver: %s", err.Error())
153		ri.Score = 0 // this is a hard error
154		return nil, err
155	}
156	addrs, err := r.timeLimitedLookup(ctx, re, hostname)
157	if err == nil {
158		r.logger().Infof("sessionresolver: %s... %v", ri.URL, nil)
159		ri.Score = ewma*1.0 + (1-ewma)*ri.Score // increase score
160		return addrs, nil
161	}
162	r.logger().Warnf("sessionresolver: %s... %s", ri.URL, err.Error())
163	ri.Score = ewma*0.0 + (1-ewma)*ri.Score // decrease score
164	return nil, err
165}
166
167// maybeConfusion will rearrange the  first elements of the vector
168// with low probability, so giving other resolvers a chance
169// to run and show that they are also viable. We do not fully
170// reorder the vector because that could lead to long runtimes.
171//
172// The return value is only meaningful for testing.
173func (r *Resolver) maybeConfusion(state []*resolverinfo, seed int64) int {
174	rng := rand.New(rand.NewSource(seed))
175	const confusion = 0.3
176	if rng.Float64() >= confusion {
177		return -1
178	}
179	switch len(state) {
180	case 0, 1: // nothing to do
181		return 0
182	case 2:
183		state[0], state[1] = state[1], state[0]
184		return 2
185	default:
186		state[0], state[2] = state[2], state[0]
187		return 3
188	}
189}
190
191// Network implements Resolver.Network.
192func (r *Resolver) Network() string {
193	return "sessionresolver"
194}
195
196// Address implements Resolver.Address.
197func (r *Resolver) Address() string {
198	return ""
199}
200