1/* 2 * Copyright (c) 2016, Psiphon Inc. 3 * All rights reserved. 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20package server 21 22import ( 23 "crypto/hmac" 24 "crypto/sha256" 25 "fmt" 26 "io" 27 "net" 28 "os" 29 "path/filepath" 30 "strconv" 31 "strings" 32 "time" 33 34 "github.com/ooni/psiphon/oopsi/github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common" 35 "github.com/ooni/psiphon/oopsi/github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors" 36 maxminddb "github.com/ooni/psiphon/oopsi/github.com/oschwald/maxminddb-golang" 37 cache "github.com/ooni/psiphon/oopsi/github.com/patrickmn/go-cache" 38) 39 40const ( 41 GEOIP_SESSION_CACHE_TTL = 60 * time.Minute 42 GEOIP_UNKNOWN_VALUE = "None" 43) 44 45// GeoIPData is GeoIP data for a client session. Individual client 46// IP addresses are neither logged nor explicitly referenced during a session. 47// The GeoIP country, city, and ISP corresponding to a client IP address are 48// resolved and then logged along with usage stats. The DiscoveryValue is 49// a special value derived from the client IP that's used to compartmentalize 50// discoverable servers (see calculateDiscoveryValue for details). 51type GeoIPData struct { 52 Country string 53 City string 54 ISP string 55 ASN string 56 ASO string 57 HasDiscoveryValue bool 58 DiscoveryValue int 59} 60 61// NewGeoIPData returns a GeoIPData initialized with the expected 62// GEOIP_UNKNOWN_VALUE values to be used when GeoIP lookup fails. 63func NewGeoIPData() GeoIPData { 64 return GeoIPData{ 65 Country: GEOIP_UNKNOWN_VALUE, 66 City: GEOIP_UNKNOWN_VALUE, 67 ISP: GEOIP_UNKNOWN_VALUE, 68 ASN: GEOIP_UNKNOWN_VALUE, 69 ASO: GEOIP_UNKNOWN_VALUE, 70 } 71} 72 73// SetLogFields adds the GeoIPData fields to LogFields, following Psiphon 74// metric field name and format conventions. 75func (g GeoIPData) SetLogFields(logFields LogFields) { 76 g.SetLogFieldsWithPrefix("", logFields) 77} 78 79func (g GeoIPData) SetLogFieldsWithPrefix(prefix string, logFields LogFields) { 80 81 // In psi_web, the space replacement was done to accommodate space 82 // delimited logging, which is no longer required; we retain the 83 // transformation so that stats aggregation isn't impacted. 84 logFields[prefix+"client_region"] = strings.Replace(g.Country, " ", "_", -1) 85 logFields[prefix+"client_city"] = strings.Replace(g.City, " ", "_", -1) 86 logFields[prefix+"client_isp"] = strings.Replace(g.ISP, " ", "_", -1) 87 logFields[prefix+"client_asn"] = strings.Replace(g.ASN, " ", "_", -1) 88 logFields[prefix+"client_aso"] = strings.Replace(g.ASO, " ", "_", -1) 89} 90 91// GeoIPService implements GeoIP lookup and session/GeoIP caching. 92// Lookup is via a MaxMind database; the ReloadDatabase function 93// supports hot reloading of MaxMind data while the server is 94// running. 95type GeoIPService struct { 96 databases []*geoIPDatabase 97 sessionCache *cache.Cache 98 discoveryValueHMACKey string 99} 100 101type geoIPDatabase struct { 102 common.ReloadableFile 103 filename string 104 tempFilename string 105 tempFileSuffix int64 106 maxMindReader *maxminddb.Reader 107} 108 109// NewGeoIPService initializes a new GeoIPService. 110func NewGeoIPService( 111 databaseFilenames []string, 112 discoveryValueHMACKey string) (*GeoIPService, error) { 113 114 geoIP := &GeoIPService{ 115 databases: make([]*geoIPDatabase, len(databaseFilenames)), 116 sessionCache: cache.New(GEOIP_SESSION_CACHE_TTL, 1*time.Minute), 117 discoveryValueHMACKey: discoveryValueHMACKey, 118 } 119 120 for i, filename := range databaseFilenames { 121 122 database := &geoIPDatabase{ 123 filename: filename, 124 } 125 126 database.ReloadableFile = common.NewReloadableFile( 127 filename, 128 false, 129 func(_ []byte, _ time.Time) error { 130 131 // In order to safely mmap the database file, a temporary copy 132 // is made and that copy is mmapped. The original file may be 133 // repaved without affecting the mmap; upon hot reload, a new 134 // temporary copy is made and once it is successful, the old 135 // mmap is closed and previous temporary file deleted. 136 // 137 // On any reload error, database state remains the same. 138 139 src, err := os.Open(database.filename) 140 if err != nil { 141 return errors.Trace(err) 142 } 143 144 tempFileSuffix := database.tempFileSuffix + 1 145 146 tempFilename := fmt.Sprintf( 147 "%s.%d", 148 filepath.Join(os.TempDir(), filepath.Base(database.filename)), 149 tempFileSuffix) 150 151 dst, err := os.Create(tempFilename) 152 if err != nil { 153 src.Close() 154 return errors.Trace(err) 155 } 156 157 _, err = io.Copy(dst, src) 158 src.Close() 159 dst.Close() 160 if err != nil { 161 _ = os.Remove(tempFilename) 162 return errors.Trace(err) 163 } 164 165 maxMindReader, err := maxminddb.Open(tempFilename) 166 if err != nil { 167 _ = os.Remove(tempFilename) 168 return errors.Trace(err) 169 } 170 171 if database.maxMindReader != nil { 172 database.maxMindReader.Close() 173 _ = os.Remove(database.tempFilename) 174 } 175 176 database.maxMindReader = maxMindReader 177 database.tempFilename = tempFilename 178 database.tempFileSuffix = tempFileSuffix 179 180 return nil 181 }) 182 183 _, err := database.Reload() 184 if err != nil { 185 return nil, errors.Trace(err) 186 } 187 188 geoIP.databases[i] = database 189 } 190 191 return geoIP, nil 192} 193 194// Reloaders gets the list of reloadable databases in use 195// by the GeoIPService. This list is used to hot reload 196// these databases. 197func (geoIP *GeoIPService) Reloaders() []common.Reloader { 198 reloaders := make([]common.Reloader, len(geoIP.databases)) 199 for i, database := range geoIP.databases { 200 reloaders[i] = database 201 } 202 return reloaders 203} 204 205// Lookup determines a GeoIPData for a given string client IP address. 206// 207// When addDiscoveryValue is true, GeoIPData.DiscoveryValue is calculated and 208// GeoIPData.HasDiscoveryValue is true. 209func (geoIP *GeoIPService) Lookup( 210 strIP string, addDiscoveryValue bool) GeoIPData { 211 212 return geoIP.LookupIP(net.ParseIP(strIP), addDiscoveryValue) 213} 214 215// LookupIP determines a GeoIPData for a given client IP address. 216// 217// When addDiscoveryValue is true, GeoIPData.DiscoveryValue is calculated and 218// GeoIPData.HasDiscoveryValue is true. 219func (geoIP *GeoIPService) LookupIP( 220 IP net.IP, addDiscoveryValue bool) GeoIPData { 221 222 result := NewGeoIPData() 223 224 if IP == nil { 225 return result 226 } 227 228 // Populate GeoIP fields. 229 230 var geoIPFields struct { 231 Country struct { 232 ISOCode string `maxminddb:"iso_code"` 233 } `maxminddb:"country"` 234 City struct { 235 Names map[string]string `maxminddb:"names"` 236 } `maxminddb:"city"` 237 ISP string `maxminddb:"isp"` 238 ASN int `maxminddb:"autonomous_system_number"` 239 ASO string `maxminddb:"autonomous_system_organization"` 240 } 241 242 geoIPFields.ASN = -1 243 244 // Each database will populate geoIPFields with the values it contains. In the 245 // current MaxMind deployment, the City database populates Country and City and 246 // the separate ISP database populates ISP. 247 for _, database := range geoIP.databases { 248 database.ReloadableFile.RLock() 249 err := database.maxMindReader.Lookup(IP, &geoIPFields) 250 database.ReloadableFile.RUnlock() 251 if err != nil { 252 log.WithTraceFields(LogFields{"error": err}).Warning("GeoIP lookup failed") 253 } 254 } 255 256 if geoIPFields.Country.ISOCode != "" { 257 result.Country = geoIPFields.Country.ISOCode 258 } 259 260 name, ok := geoIPFields.City.Names["en"] 261 if ok && name != "" { 262 result.City = name 263 } 264 265 if geoIPFields.ISP != "" { 266 result.ISP = geoIPFields.ISP 267 } 268 269 if geoIPFields.ASN != -1 { 270 result.ASN = strconv.Itoa(geoIPFields.ASN) 271 } 272 273 if geoIPFields.ASO != "" { 274 result.ASO = geoIPFields.ASO 275 } 276 277 // Populate DiscoveryValue fields (even when there's no GeoIP database). 278 279 if addDiscoveryValue { 280 result.HasDiscoveryValue = true 281 result.DiscoveryValue = calculateDiscoveryValue( 282 geoIP.discoveryValueHMACKey, IP) 283 } 284 285 return result 286} 287 288// SetSessionCache adds the sessionID/geoIPData pair to the 289// session cache. This value will not expire; the caller must 290// call MarkSessionCacheToExpire to initiate expiry. 291// Calling SetSessionCache for an existing sessionID will 292// replace the previous value and reset any expiry. 293func (geoIP *GeoIPService) SetSessionCache(sessionID string, geoIPData GeoIPData) { 294 geoIP.sessionCache.Set(sessionID, geoIPData, cache.NoExpiration) 295} 296 297// MarkSessionCacheToExpire initiates expiry for an existing 298// session cache entry, if the session ID is found in the cache. 299// Concurrency note: SetSessionCache and MarkSessionCacheToExpire 300// should not be called concurrently for a single session ID. 301func (geoIP *GeoIPService) MarkSessionCacheToExpire(sessionID string) { 302 geoIPData, found := geoIP.sessionCache.Get(sessionID) 303 // Note: potential race condition between Get and Set. In practice, 304 // the tunnel server won't clobber a SetSessionCache value by calling 305 // MarkSessionCacheToExpire concurrently. 306 if found { 307 geoIP.sessionCache.Set(sessionID, geoIPData, cache.DefaultExpiration) 308 } 309} 310 311// GetSessionCache returns the cached GeoIPData for the 312// specified session ID; a blank GeoIPData is returned 313// if the session ID is not found in the cache. 314func (geoIP *GeoIPService) GetSessionCache(sessionID string) GeoIPData { 315 geoIPData, found := geoIP.sessionCache.Get(sessionID) 316 if !found { 317 return NewGeoIPData() 318 } 319 return geoIPData.(GeoIPData) 320} 321 322// InSessionCache returns whether the session ID is present 323// in the session cache. 324func (geoIP *GeoIPService) InSessionCache(sessionID string) bool { 325 _, found := geoIP.sessionCache.Get(sessionID) 326 return found 327} 328 329// calculateDiscoveryValue derives a value from the client IP address to be 330// used as input in the server discovery algorithm. Since we do not explicitly 331// store the client IP address, we must derive the value here and store it for 332// later use by the discovery algorithm. 333// See https://github.com/ooni/psiphon/oopsi/github.com/Psiphon-Inc/psiphon-automation/tree/master/Automation/psi_ops_discovery.py 334// for full details. 335func calculateDiscoveryValue(discoveryValueHMACKey string, ipAddress net.IP) int { 336 // From: psi_ops_discovery.calculate_ip_address_strategy_value: 337 // # Mix bits from all octets of the client IP address to determine the 338 // # bucket. An HMAC is used to prevent pre-calculation of buckets for IPs. 339 // return ord(hmac.new(HMAC_KEY, ip_address, hashlib.sha256).digest()[0]) 340 // TODO: use 3-octet algorithm? 341 hash := hmac.New(sha256.New, []byte(discoveryValueHMACKey)) 342 hash.Write([]byte(ipAddress.String())) 343 return int(hash.Sum(nil)[0]) 344} 345