1/*
2 * Copyright (c) 2016, Psiphon Inc.
3 * All rights reserved.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20package server
21
22import (
23	"crypto/hmac"
24	"crypto/sha256"
25	"fmt"
26	"io"
27	"net"
28	"os"
29	"path/filepath"
30	"strconv"
31	"strings"
32	"time"
33
34	"github.com/ooni/psiphon/oopsi/github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
35	"github.com/ooni/psiphon/oopsi/github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
36	maxminddb "github.com/ooni/psiphon/oopsi/github.com/oschwald/maxminddb-golang"
37	cache "github.com/ooni/psiphon/oopsi/github.com/patrickmn/go-cache"
38)
39
40const (
41	GEOIP_SESSION_CACHE_TTL = 60 * time.Minute
42	GEOIP_UNKNOWN_VALUE     = "None"
43)
44
45// GeoIPData is GeoIP data for a client session. Individual client
46// IP addresses are neither logged nor explicitly referenced during a session.
47// The GeoIP country, city, and ISP corresponding to a client IP address are
48// resolved and then logged along with usage stats. The DiscoveryValue is
49// a special value derived from the client IP that's used to compartmentalize
50// discoverable servers (see calculateDiscoveryValue for details).
51type GeoIPData struct {
52	Country        string
53	City           string
54	ISP            string
55	ASN            string
56	ASO            string
57	DiscoveryValue int
58}
59
60// NewGeoIPData returns a GeoIPData initialized with the expected
61// GEOIP_UNKNOWN_VALUE values to be used when GeoIP lookup fails.
62func NewGeoIPData() GeoIPData {
63	return GeoIPData{
64		Country: GEOIP_UNKNOWN_VALUE,
65		City:    GEOIP_UNKNOWN_VALUE,
66		ISP:     GEOIP_UNKNOWN_VALUE,
67		ASN:     GEOIP_UNKNOWN_VALUE,
68		ASO:     GEOIP_UNKNOWN_VALUE,
69	}
70}
71
72// SetLogFields adds the GeoIPData fields to LogFields, following Psiphon
73// metric field name and format conventions.
74func (g GeoIPData) SetLogFields(logFields LogFields) {
75	g.SetLogFieldsWithPrefix("", logFields)
76}
77
78func (g GeoIPData) SetLogFieldsWithPrefix(prefix string, logFields LogFields) {
79
80	// In psi_web, the space replacement was done to accommodate space
81	// delimited logging, which is no longer required; we retain the
82	// transformation so that stats aggregation isn't impacted.
83	logFields[prefix+"client_region"] = strings.Replace(g.Country, " ", "_", -1)
84	logFields[prefix+"client_city"] = strings.Replace(g.City, " ", "_", -1)
85	logFields[prefix+"client_isp"] = strings.Replace(g.ISP, " ", "_", -1)
86	logFields[prefix+"client_asn"] = strings.Replace(g.ASN, " ", "_", -1)
87	logFields[prefix+"client_aso"] = strings.Replace(g.ASO, " ", "_", -1)
88}
89
90// GeoIPService implements GeoIP lookup and session/GeoIP caching.
91// Lookup is via a MaxMind database; the ReloadDatabase function
92// supports hot reloading of MaxMind data while the server is
93// running.
94type GeoIPService struct {
95	databases             []*geoIPDatabase
96	sessionCache          *cache.Cache
97	discoveryValueHMACKey string
98}
99
100type geoIPDatabase struct {
101	common.ReloadableFile
102	filename       string
103	tempFilename   string
104	tempFileSuffix int64
105	maxMindReader  *maxminddb.Reader
106}
107
108// NewGeoIPService initializes a new GeoIPService.
109func NewGeoIPService(
110	databaseFilenames []string,
111	discoveryValueHMACKey string) (*GeoIPService, error) {
112
113	geoIP := &GeoIPService{
114		databases:             make([]*geoIPDatabase, len(databaseFilenames)),
115		sessionCache:          cache.New(GEOIP_SESSION_CACHE_TTL, 1*time.Minute),
116		discoveryValueHMACKey: discoveryValueHMACKey,
117	}
118
119	for i, filename := range databaseFilenames {
120
121		database := &geoIPDatabase{
122			filename: filename,
123		}
124
125		database.ReloadableFile = common.NewReloadableFile(
126			filename,
127			false,
128			func(_ []byte, _ time.Time) error {
129
130				// In order to safely mmap the database file, a temporary copy
131				// is made and that copy is mmapped. The original file may be
132				// repaved without affecting the mmap; upon hot reload, a new
133				// temporary copy is made and once it is successful, the old
134				// mmap is closed and previous temporary file deleted.
135				//
136				// On any reload error, database state remains the same.
137
138				src, err := os.Open(database.filename)
139				if err != nil {
140					return errors.Trace(err)
141				}
142
143				tempFileSuffix := database.tempFileSuffix + 1
144
145				tempFilename := fmt.Sprintf(
146					"%s.%d",
147					filepath.Join(os.TempDir(), filepath.Base(database.filename)),
148					tempFileSuffix)
149
150				dst, err := os.Create(tempFilename)
151				if err != nil {
152					src.Close()
153					return errors.Trace(err)
154				}
155
156				_, err = io.Copy(dst, src)
157				src.Close()
158				dst.Close()
159				if err != nil {
160					_ = os.Remove(tempFilename)
161					return errors.Trace(err)
162				}
163
164				maxMindReader, err := maxminddb.Open(tempFilename)
165				if err != nil {
166					_ = os.Remove(tempFilename)
167					return errors.Trace(err)
168				}
169
170				if database.maxMindReader != nil {
171					database.maxMindReader.Close()
172					_ = os.Remove(database.tempFilename)
173				}
174
175				database.maxMindReader = maxMindReader
176				database.tempFilename = tempFilename
177				database.tempFileSuffix = tempFileSuffix
178
179				return nil
180			})
181
182		_, err := database.Reload()
183		if err != nil {
184			return nil, errors.Trace(err)
185		}
186
187		geoIP.databases[i] = database
188	}
189
190	return geoIP, nil
191}
192
193// Reloaders gets the list of reloadable databases in use
194// by the GeoIPService. This list is used to hot reload
195// these databases.
196func (geoIP *GeoIPService) Reloaders() []common.Reloader {
197	reloaders := make([]common.Reloader, len(geoIP.databases))
198	for i, database := range geoIP.databases {
199		reloaders[i] = database
200	}
201	return reloaders
202}
203
204// Lookup determines a GeoIPData for a given client IP address.
205func (geoIP *GeoIPService) Lookup(ipAddress string) GeoIPData {
206	result := NewGeoIPData()
207
208	ip := net.ParseIP(ipAddress)
209
210	if ip == nil || len(geoIP.databases) == 0 {
211		return result
212	}
213
214	var geoIPFields struct {
215		Country struct {
216			ISOCode string `maxminddb:"iso_code"`
217		} `maxminddb:"country"`
218		City struct {
219			Names map[string]string `maxminddb:"names"`
220		} `maxminddb:"city"`
221		ISP string `maxminddb:"isp"`
222		ASN int    `maxminddb:"autonomous_system_number"`
223		ASO string `maxminddb:"autonomous_system_organization"`
224	}
225
226	geoIPFields.ASN = -1
227
228	// Each database will populate geoIPFields with the values it contains. In the
229	// current MaxMind deployment, the City database populates Country and City and
230	// the separate ISP database populates ISP.
231	for _, database := range geoIP.databases {
232		database.ReloadableFile.RLock()
233		err := database.maxMindReader.Lookup(ip, &geoIPFields)
234		database.ReloadableFile.RUnlock()
235		if err != nil {
236			log.WithTraceFields(LogFields{"error": err}).Warning("GeoIP lookup failed")
237		}
238	}
239
240	if geoIPFields.Country.ISOCode != "" {
241		result.Country = geoIPFields.Country.ISOCode
242	}
243
244	name, ok := geoIPFields.City.Names["en"]
245	if ok && name != "" {
246		result.City = name
247	}
248
249	if geoIPFields.ISP != "" {
250		result.ISP = geoIPFields.ISP
251	}
252
253	if geoIPFields.ASN != -1 {
254		result.ASN = strconv.Itoa(geoIPFields.ASN)
255	}
256
257	if geoIPFields.ASO != "" {
258		result.ASO = geoIPFields.ASO
259	}
260
261	result.DiscoveryValue = calculateDiscoveryValue(
262		geoIP.discoveryValueHMACKey, ipAddress)
263
264	return result
265}
266
267// SetSessionCache adds the sessionID/geoIPData pair to the
268// session cache. This value will not expire; the caller must
269// call MarkSessionCacheToExpire to initiate expiry.
270// Calling SetSessionCache for an existing sessionID will
271// replace the previous value and reset any expiry.
272func (geoIP *GeoIPService) SetSessionCache(sessionID string, geoIPData GeoIPData) {
273	geoIP.sessionCache.Set(sessionID, geoIPData, cache.NoExpiration)
274}
275
276// MarkSessionCacheToExpire initiates expiry for an existing
277// session cache entry, if the session ID is found in the cache.
278// Concurrency note: SetSessionCache and MarkSessionCacheToExpire
279// should not be called concurrently for a single session ID.
280func (geoIP *GeoIPService) MarkSessionCacheToExpire(sessionID string) {
281	geoIPData, found := geoIP.sessionCache.Get(sessionID)
282	// Note: potential race condition between Get and Set. In practice,
283	// the tunnel server won't clobber a SetSessionCache value by calling
284	// MarkSessionCacheToExpire concurrently.
285	if found {
286		geoIP.sessionCache.Set(sessionID, geoIPData, cache.DefaultExpiration)
287	}
288}
289
290// GetSessionCache returns the cached GeoIPData for the
291// specified session ID; a blank GeoIPData is returned
292// if the session ID is not found in the cache.
293func (geoIP *GeoIPService) GetSessionCache(sessionID string) GeoIPData {
294	geoIPData, found := geoIP.sessionCache.Get(sessionID)
295	if !found {
296		return NewGeoIPData()
297	}
298	return geoIPData.(GeoIPData)
299}
300
301// InSessionCache returns whether the session ID is present
302// in the session cache.
303func (geoIP *GeoIPService) InSessionCache(sessionID string) bool {
304	_, found := geoIP.sessionCache.Get(sessionID)
305	return found
306}
307
308// calculateDiscoveryValue derives a value from the client IP address to be
309// used as input in the server discovery algorithm. Since we do not explicitly
310// store the client IP address, we must derive the value here and store it for
311// later use by the discovery algorithm.
312// See https://bitbucket.org/psiphon/psiphon-circumvention-system/src/tip/Automation/psi_ops_discovery.py
313// for full details.
314func calculateDiscoveryValue(discoveryValueHMACKey, ipAddress string) int {
315	// From: psi_ops_discovery.calculate_ip_address_strategy_value:
316	//     # Mix bits from all octets of the client IP address to determine the
317	//     # bucket. An HMAC is used to prevent pre-calculation of buckets for IPs.
318	//     return ord(hmac.new(HMAC_KEY, ip_address, hashlib.sha256).digest()[0])
319	// TODO: use 3-octet algorithm?
320	hash := hmac.New(sha256.New, []byte(discoveryValueHMACKey))
321	hash.Write([]byte(ipAddress))
322	return int(hash.Sum(nil)[0])
323}
324