1/* 2 * Copyright (c) 2016, Psiphon Inc. 3 * All rights reserved. 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20package server 21 22import ( 23 "crypto/hmac" 24 "crypto/sha256" 25 "fmt" 26 "io" 27 "net" 28 "os" 29 "path/filepath" 30 "strconv" 31 "strings" 32 "time" 33 34 "github.com/ooni/psiphon/oopsi/github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common" 35 "github.com/ooni/psiphon/oopsi/github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors" 36 maxminddb "github.com/ooni/psiphon/oopsi/github.com/oschwald/maxminddb-golang" 37 cache "github.com/ooni/psiphon/oopsi/github.com/patrickmn/go-cache" 38) 39 40const ( 41 GEOIP_SESSION_CACHE_TTL = 60 * time.Minute 42 GEOIP_UNKNOWN_VALUE = "None" 43) 44 45// GeoIPData is GeoIP data for a client session. Individual client 46// IP addresses are neither logged nor explicitly referenced during a session. 47// The GeoIP country, city, and ISP corresponding to a client IP address are 48// resolved and then logged along with usage stats. The DiscoveryValue is 49// a special value derived from the client IP that's used to compartmentalize 50// discoverable servers (see calculateDiscoveryValue for details). 51type GeoIPData struct { 52 Country string 53 City string 54 ISP string 55 ASN string 56 ASO string 57 DiscoveryValue int 58} 59 60// NewGeoIPData returns a GeoIPData initialized with the expected 61// GEOIP_UNKNOWN_VALUE values to be used when GeoIP lookup fails. 62func NewGeoIPData() GeoIPData { 63 return GeoIPData{ 64 Country: GEOIP_UNKNOWN_VALUE, 65 City: GEOIP_UNKNOWN_VALUE, 66 ISP: GEOIP_UNKNOWN_VALUE, 67 ASN: GEOIP_UNKNOWN_VALUE, 68 ASO: GEOIP_UNKNOWN_VALUE, 69 } 70} 71 72// SetLogFields adds the GeoIPData fields to LogFields, following Psiphon 73// metric field name and format conventions. 74func (g GeoIPData) SetLogFields(logFields LogFields) { 75 g.SetLogFieldsWithPrefix("", logFields) 76} 77 78func (g GeoIPData) SetLogFieldsWithPrefix(prefix string, logFields LogFields) { 79 80 // In psi_web, the space replacement was done to accommodate space 81 // delimited logging, which is no longer required; we retain the 82 // transformation so that stats aggregation isn't impacted. 83 logFields[prefix+"client_region"] = strings.Replace(g.Country, " ", "_", -1) 84 logFields[prefix+"client_city"] = strings.Replace(g.City, " ", "_", -1) 85 logFields[prefix+"client_isp"] = strings.Replace(g.ISP, " ", "_", -1) 86 logFields[prefix+"client_asn"] = strings.Replace(g.ASN, " ", "_", -1) 87 logFields[prefix+"client_aso"] = strings.Replace(g.ASO, " ", "_", -1) 88} 89 90// GeoIPService implements GeoIP lookup and session/GeoIP caching. 91// Lookup is via a MaxMind database; the ReloadDatabase function 92// supports hot reloading of MaxMind data while the server is 93// running. 94type GeoIPService struct { 95 databases []*geoIPDatabase 96 sessionCache *cache.Cache 97 discoveryValueHMACKey string 98} 99 100type geoIPDatabase struct { 101 common.ReloadableFile 102 filename string 103 tempFilename string 104 tempFileSuffix int64 105 maxMindReader *maxminddb.Reader 106} 107 108// NewGeoIPService initializes a new GeoIPService. 109func NewGeoIPService( 110 databaseFilenames []string, 111 discoveryValueHMACKey string) (*GeoIPService, error) { 112 113 geoIP := &GeoIPService{ 114 databases: make([]*geoIPDatabase, len(databaseFilenames)), 115 sessionCache: cache.New(GEOIP_SESSION_CACHE_TTL, 1*time.Minute), 116 discoveryValueHMACKey: discoveryValueHMACKey, 117 } 118 119 for i, filename := range databaseFilenames { 120 121 database := &geoIPDatabase{ 122 filename: filename, 123 } 124 125 database.ReloadableFile = common.NewReloadableFile( 126 filename, 127 false, 128 func(_ []byte, _ time.Time) error { 129 130 // In order to safely mmap the database file, a temporary copy 131 // is made and that copy is mmapped. The original file may be 132 // repaved without affecting the mmap; upon hot reload, a new 133 // temporary copy is made and once it is successful, the old 134 // mmap is closed and previous temporary file deleted. 135 // 136 // On any reload error, database state remains the same. 137 138 src, err := os.Open(database.filename) 139 if err != nil { 140 return errors.Trace(err) 141 } 142 143 tempFileSuffix := database.tempFileSuffix + 1 144 145 tempFilename := fmt.Sprintf( 146 "%s.%d", 147 filepath.Join(os.TempDir(), filepath.Base(database.filename)), 148 tempFileSuffix) 149 150 dst, err := os.Create(tempFilename) 151 if err != nil { 152 src.Close() 153 return errors.Trace(err) 154 } 155 156 _, err = io.Copy(dst, src) 157 src.Close() 158 dst.Close() 159 if err != nil { 160 _ = os.Remove(tempFilename) 161 return errors.Trace(err) 162 } 163 164 maxMindReader, err := maxminddb.Open(tempFilename) 165 if err != nil { 166 _ = os.Remove(tempFilename) 167 return errors.Trace(err) 168 } 169 170 if database.maxMindReader != nil { 171 database.maxMindReader.Close() 172 _ = os.Remove(database.tempFilename) 173 } 174 175 database.maxMindReader = maxMindReader 176 database.tempFilename = tempFilename 177 database.tempFileSuffix = tempFileSuffix 178 179 return nil 180 }) 181 182 _, err := database.Reload() 183 if err != nil { 184 return nil, errors.Trace(err) 185 } 186 187 geoIP.databases[i] = database 188 } 189 190 return geoIP, nil 191} 192 193// Reloaders gets the list of reloadable databases in use 194// by the GeoIPService. This list is used to hot reload 195// these databases. 196func (geoIP *GeoIPService) Reloaders() []common.Reloader { 197 reloaders := make([]common.Reloader, len(geoIP.databases)) 198 for i, database := range geoIP.databases { 199 reloaders[i] = database 200 } 201 return reloaders 202} 203 204// Lookup determines a GeoIPData for a given client IP address. 205func (geoIP *GeoIPService) Lookup(ipAddress string) GeoIPData { 206 result := NewGeoIPData() 207 208 ip := net.ParseIP(ipAddress) 209 210 if ip == nil || len(geoIP.databases) == 0 { 211 return result 212 } 213 214 var geoIPFields struct { 215 Country struct { 216 ISOCode string `maxminddb:"iso_code"` 217 } `maxminddb:"country"` 218 City struct { 219 Names map[string]string `maxminddb:"names"` 220 } `maxminddb:"city"` 221 ISP string `maxminddb:"isp"` 222 ASN int `maxminddb:"autonomous_system_number"` 223 ASO string `maxminddb:"autonomous_system_organization"` 224 } 225 226 geoIPFields.ASN = -1 227 228 // Each database will populate geoIPFields with the values it contains. In the 229 // current MaxMind deployment, the City database populates Country and City and 230 // the separate ISP database populates ISP. 231 for _, database := range geoIP.databases { 232 database.ReloadableFile.RLock() 233 err := database.maxMindReader.Lookup(ip, &geoIPFields) 234 database.ReloadableFile.RUnlock() 235 if err != nil { 236 log.WithTraceFields(LogFields{"error": err}).Warning("GeoIP lookup failed") 237 } 238 } 239 240 if geoIPFields.Country.ISOCode != "" { 241 result.Country = geoIPFields.Country.ISOCode 242 } 243 244 name, ok := geoIPFields.City.Names["en"] 245 if ok && name != "" { 246 result.City = name 247 } 248 249 if geoIPFields.ISP != "" { 250 result.ISP = geoIPFields.ISP 251 } 252 253 if geoIPFields.ASN != -1 { 254 result.ASN = strconv.Itoa(geoIPFields.ASN) 255 } 256 257 if geoIPFields.ASO != "" { 258 result.ASO = geoIPFields.ASO 259 } 260 261 result.DiscoveryValue = calculateDiscoveryValue( 262 geoIP.discoveryValueHMACKey, ipAddress) 263 264 return result 265} 266 267// SetSessionCache adds the sessionID/geoIPData pair to the 268// session cache. This value will not expire; the caller must 269// call MarkSessionCacheToExpire to initiate expiry. 270// Calling SetSessionCache for an existing sessionID will 271// replace the previous value and reset any expiry. 272func (geoIP *GeoIPService) SetSessionCache(sessionID string, geoIPData GeoIPData) { 273 geoIP.sessionCache.Set(sessionID, geoIPData, cache.NoExpiration) 274} 275 276// MarkSessionCacheToExpire initiates expiry for an existing 277// session cache entry, if the session ID is found in the cache. 278// Concurrency note: SetSessionCache and MarkSessionCacheToExpire 279// should not be called concurrently for a single session ID. 280func (geoIP *GeoIPService) MarkSessionCacheToExpire(sessionID string) { 281 geoIPData, found := geoIP.sessionCache.Get(sessionID) 282 // Note: potential race condition between Get and Set. In practice, 283 // the tunnel server won't clobber a SetSessionCache value by calling 284 // MarkSessionCacheToExpire concurrently. 285 if found { 286 geoIP.sessionCache.Set(sessionID, geoIPData, cache.DefaultExpiration) 287 } 288} 289 290// GetSessionCache returns the cached GeoIPData for the 291// specified session ID; a blank GeoIPData is returned 292// if the session ID is not found in the cache. 293func (geoIP *GeoIPService) GetSessionCache(sessionID string) GeoIPData { 294 geoIPData, found := geoIP.sessionCache.Get(sessionID) 295 if !found { 296 return NewGeoIPData() 297 } 298 return geoIPData.(GeoIPData) 299} 300 301// InSessionCache returns whether the session ID is present 302// in the session cache. 303func (geoIP *GeoIPService) InSessionCache(sessionID string) bool { 304 _, found := geoIP.sessionCache.Get(sessionID) 305 return found 306} 307 308// calculateDiscoveryValue derives a value from the client IP address to be 309// used as input in the server discovery algorithm. Since we do not explicitly 310// store the client IP address, we must derive the value here and store it for 311// later use by the discovery algorithm. 312// See https://bitbucket.org/psiphon/psiphon-circumvention-system/src/tip/Automation/psi_ops_discovery.py 313// for full details. 314func calculateDiscoveryValue(discoveryValueHMACKey, ipAddress string) int { 315 // From: psi_ops_discovery.calculate_ip_address_strategy_value: 316 // # Mix bits from all octets of the client IP address to determine the 317 // # bucket. An HMAC is used to prevent pre-calculation of buckets for IPs. 318 // return ord(hmac.new(HMAC_KEY, ip_address, hashlib.sha256).digest()[0]) 319 // TODO: use 3-octet algorithm? 320 hash := hmac.New(sha256.New, []byte(discoveryValueHMACKey)) 321 hash.Write([]byte(ipAddress)) 322 return int(hash.Sum(nil)[0]) 323} 324