1/*
2 * Copyright (c) 2016, Psiphon Inc.
3 * All rights reserved.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20package server
21
22import (
23	"crypto/hmac"
24	"crypto/sha256"
25	"fmt"
26	"io"
27	"net"
28	"os"
29	"path/filepath"
30	"strconv"
31	"strings"
32	"time"
33
34	"github.com/ooni/psiphon/oopsi/github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
35	"github.com/ooni/psiphon/oopsi/github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
36	maxminddb "github.com/ooni/psiphon/oopsi/github.com/oschwald/maxminddb-golang"
37	cache "github.com/ooni/psiphon/oopsi/github.com/patrickmn/go-cache"
38)
39
40const (
41	GEOIP_SESSION_CACHE_TTL = 60 * time.Minute
42	GEOIP_UNKNOWN_VALUE     = "None"
43)
44
45// GeoIPData is GeoIP data for a client session. Individual client
46// IP addresses are neither logged nor explicitly referenced during a session.
47// The GeoIP country, city, and ISP corresponding to a client IP address are
48// resolved and then logged along with usage stats. The DiscoveryValue is
49// a special value derived from the client IP that's used to compartmentalize
50// discoverable servers (see calculateDiscoveryValue for details).
51type GeoIPData struct {
52	Country           string
53	City              string
54	ISP               string
55	ASN               string
56	ASO               string
57	HasDiscoveryValue bool
58	DiscoveryValue    int
59}
60
61// NewGeoIPData returns a GeoIPData initialized with the expected
62// GEOIP_UNKNOWN_VALUE values to be used when GeoIP lookup fails.
63func NewGeoIPData() GeoIPData {
64	return GeoIPData{
65		Country: GEOIP_UNKNOWN_VALUE,
66		City:    GEOIP_UNKNOWN_VALUE,
67		ISP:     GEOIP_UNKNOWN_VALUE,
68		ASN:     GEOIP_UNKNOWN_VALUE,
69		ASO:     GEOIP_UNKNOWN_VALUE,
70	}
71}
72
73// SetLogFields adds the GeoIPData fields to LogFields, following Psiphon
74// metric field name and format conventions.
75func (g GeoIPData) SetLogFields(logFields LogFields) {
76	g.SetLogFieldsWithPrefix("", logFields)
77}
78
79func (g GeoIPData) SetLogFieldsWithPrefix(prefix string, logFields LogFields) {
80
81	// In psi_web, the space replacement was done to accommodate space
82	// delimited logging, which is no longer required; we retain the
83	// transformation so that stats aggregation isn't impacted.
84	logFields[prefix+"client_region"] = strings.Replace(g.Country, " ", "_", -1)
85	logFields[prefix+"client_city"] = strings.Replace(g.City, " ", "_", -1)
86	logFields[prefix+"client_isp"] = strings.Replace(g.ISP, " ", "_", -1)
87	logFields[prefix+"client_asn"] = strings.Replace(g.ASN, " ", "_", -1)
88	logFields[prefix+"client_aso"] = strings.Replace(g.ASO, " ", "_", -1)
89}
90
91// GeoIPService implements GeoIP lookup and session/GeoIP caching.
92// Lookup is via a MaxMind database; the ReloadDatabase function
93// supports hot reloading of MaxMind data while the server is
94// running.
95type GeoIPService struct {
96	databases             []*geoIPDatabase
97	sessionCache          *cache.Cache
98	discoveryValueHMACKey string
99}
100
101type geoIPDatabase struct {
102	common.ReloadableFile
103	filename       string
104	tempFilename   string
105	tempFileSuffix int64
106	maxMindReader  *maxminddb.Reader
107}
108
109// NewGeoIPService initializes a new GeoIPService.
110func NewGeoIPService(
111	databaseFilenames []string,
112	discoveryValueHMACKey string) (*GeoIPService, error) {
113
114	geoIP := &GeoIPService{
115		databases:             make([]*geoIPDatabase, len(databaseFilenames)),
116		sessionCache:          cache.New(GEOIP_SESSION_CACHE_TTL, 1*time.Minute),
117		discoveryValueHMACKey: discoveryValueHMACKey,
118	}
119
120	for i, filename := range databaseFilenames {
121
122		database := &geoIPDatabase{
123			filename: filename,
124		}
125
126		database.ReloadableFile = common.NewReloadableFile(
127			filename,
128			false,
129			func(_ []byte, _ time.Time) error {
130
131				// In order to safely mmap the database file, a temporary copy
132				// is made and that copy is mmapped. The original file may be
133				// repaved without affecting the mmap; upon hot reload, a new
134				// temporary copy is made and once it is successful, the old
135				// mmap is closed and previous temporary file deleted.
136				//
137				// On any reload error, database state remains the same.
138
139				src, err := os.Open(database.filename)
140				if err != nil {
141					return errors.Trace(err)
142				}
143
144				tempFileSuffix := database.tempFileSuffix + 1
145
146				tempFilename := fmt.Sprintf(
147					"%s.%d",
148					filepath.Join(os.TempDir(), filepath.Base(database.filename)),
149					tempFileSuffix)
150
151				dst, err := os.Create(tempFilename)
152				if err != nil {
153					src.Close()
154					return errors.Trace(err)
155				}
156
157				_, err = io.Copy(dst, src)
158				src.Close()
159				dst.Close()
160				if err != nil {
161					_ = os.Remove(tempFilename)
162					return errors.Trace(err)
163				}
164
165				maxMindReader, err := maxminddb.Open(tempFilename)
166				if err != nil {
167					_ = os.Remove(tempFilename)
168					return errors.Trace(err)
169				}
170
171				if database.maxMindReader != nil {
172					database.maxMindReader.Close()
173					_ = os.Remove(database.tempFilename)
174				}
175
176				database.maxMindReader = maxMindReader
177				database.tempFilename = tempFilename
178				database.tempFileSuffix = tempFileSuffix
179
180				return nil
181			})
182
183		_, err := database.Reload()
184		if err != nil {
185			return nil, errors.Trace(err)
186		}
187
188		geoIP.databases[i] = database
189	}
190
191	return geoIP, nil
192}
193
194// Reloaders gets the list of reloadable databases in use
195// by the GeoIPService. This list is used to hot reload
196// these databases.
197func (geoIP *GeoIPService) Reloaders() []common.Reloader {
198	reloaders := make([]common.Reloader, len(geoIP.databases))
199	for i, database := range geoIP.databases {
200		reloaders[i] = database
201	}
202	return reloaders
203}
204
205// Lookup determines a GeoIPData for a given string client IP address.
206//
207// When addDiscoveryValue is true, GeoIPData.DiscoveryValue is calculated and
208// GeoIPData.HasDiscoveryValue is true.
209func (geoIP *GeoIPService) Lookup(
210	strIP string, addDiscoveryValue bool) GeoIPData {
211
212	return geoIP.LookupIP(net.ParseIP(strIP), addDiscoveryValue)
213}
214
215// LookupIP determines a GeoIPData for a given client IP address.
216//
217// When addDiscoveryValue is true, GeoIPData.DiscoveryValue is calculated and
218// GeoIPData.HasDiscoveryValue is true.
219func (geoIP *GeoIPService) LookupIP(
220	IP net.IP, addDiscoveryValue bool) GeoIPData {
221
222	result := NewGeoIPData()
223
224	if IP == nil {
225		return result
226	}
227
228	// Populate GeoIP fields.
229
230	var geoIPFields struct {
231		Country struct {
232			ISOCode string `maxminddb:"iso_code"`
233		} `maxminddb:"country"`
234		City struct {
235			Names map[string]string `maxminddb:"names"`
236		} `maxminddb:"city"`
237		ISP string `maxminddb:"isp"`
238		ASN int    `maxminddb:"autonomous_system_number"`
239		ASO string `maxminddb:"autonomous_system_organization"`
240	}
241
242	geoIPFields.ASN = -1
243
244	// Each database will populate geoIPFields with the values it contains. In the
245	// current MaxMind deployment, the City database populates Country and City and
246	// the separate ISP database populates ISP.
247	for _, database := range geoIP.databases {
248		database.ReloadableFile.RLock()
249		err := database.maxMindReader.Lookup(IP, &geoIPFields)
250		database.ReloadableFile.RUnlock()
251		if err != nil {
252			log.WithTraceFields(LogFields{"error": err}).Warning("GeoIP lookup failed")
253		}
254	}
255
256	if geoIPFields.Country.ISOCode != "" {
257		result.Country = geoIPFields.Country.ISOCode
258	}
259
260	name, ok := geoIPFields.City.Names["en"]
261	if ok && name != "" {
262		result.City = name
263	}
264
265	if geoIPFields.ISP != "" {
266		result.ISP = geoIPFields.ISP
267	}
268
269	if geoIPFields.ASN != -1 {
270		result.ASN = strconv.Itoa(geoIPFields.ASN)
271	}
272
273	if geoIPFields.ASO != "" {
274		result.ASO = geoIPFields.ASO
275	}
276
277	// Populate DiscoveryValue fields (even when there's no GeoIP database).
278
279	if addDiscoveryValue {
280		result.HasDiscoveryValue = true
281		result.DiscoveryValue = calculateDiscoveryValue(
282			geoIP.discoveryValueHMACKey, IP)
283	}
284
285	return result
286}
287
288// SetSessionCache adds the sessionID/geoIPData pair to the
289// session cache. This value will not expire; the caller must
290// call MarkSessionCacheToExpire to initiate expiry.
291// Calling SetSessionCache for an existing sessionID will
292// replace the previous value and reset any expiry.
293func (geoIP *GeoIPService) SetSessionCache(sessionID string, geoIPData GeoIPData) {
294	geoIP.sessionCache.Set(sessionID, geoIPData, cache.NoExpiration)
295}
296
297// MarkSessionCacheToExpire initiates expiry for an existing
298// session cache entry, if the session ID is found in the cache.
299// Concurrency note: SetSessionCache and MarkSessionCacheToExpire
300// should not be called concurrently for a single session ID.
301func (geoIP *GeoIPService) MarkSessionCacheToExpire(sessionID string) {
302	geoIPData, found := geoIP.sessionCache.Get(sessionID)
303	// Note: potential race condition between Get and Set. In practice,
304	// the tunnel server won't clobber a SetSessionCache value by calling
305	// MarkSessionCacheToExpire concurrently.
306	if found {
307		geoIP.sessionCache.Set(sessionID, geoIPData, cache.DefaultExpiration)
308	}
309}
310
311// GetSessionCache returns the cached GeoIPData for the
312// specified session ID; a blank GeoIPData is returned
313// if the session ID is not found in the cache.
314func (geoIP *GeoIPService) GetSessionCache(sessionID string) GeoIPData {
315	geoIPData, found := geoIP.sessionCache.Get(sessionID)
316	if !found {
317		return NewGeoIPData()
318	}
319	return geoIPData.(GeoIPData)
320}
321
322// InSessionCache returns whether the session ID is present
323// in the session cache.
324func (geoIP *GeoIPService) InSessionCache(sessionID string) bool {
325	_, found := geoIP.sessionCache.Get(sessionID)
326	return found
327}
328
329// calculateDiscoveryValue derives a value from the client IP address to be
330// used as input in the server discovery algorithm. Since we do not explicitly
331// store the client IP address, we must derive the value here and store it for
332// later use by the discovery algorithm.
333// See https://github.com/ooni/psiphon/oopsi/github.com/Psiphon-Inc/psiphon-automation/tree/master/Automation/psi_ops_discovery.py
334// for full details.
335func calculateDiscoveryValue(discoveryValueHMACKey string, ipAddress net.IP) int {
336	// From: psi_ops_discovery.calculate_ip_address_strategy_value:
337	//     # Mix bits from all octets of the client IP address to determine the
338	//     # bucket. An HMAC is used to prevent pre-calculation of buckets for IPs.
339	//     return ord(hmac.new(HMAC_KEY, ip_address, hashlib.sha256).digest()[0])
340	// TODO: use 3-octet algorithm?
341	hash := hmac.New(sha256.New, []byte(discoveryValueHMACKey))
342	hash.Write([]byte(ipAddress.String()))
343	return int(hash.Sum(nil)[0])
344}
345