1from __future__ import print_function, unicode_literals 2 3import contextlib 4import datetime 5import json 6import locale 7import logging 8import os 9import sqlite3 10import threading 11 12import six 13from six.moves.urllib.request import urlopen 14 15from rbtools.api.errors import CacheError 16from rbtools.utils.appdirs import user_cache_dir 17 18 19MINIMUM_VERSION = '2.0.14' # Minimum server version to enable the API cache. 20 21_locale_lock = threading.Lock() # Lock for getting / setting locale. 22 23 24class CacheEntry(object): 25 """An entry in the API Cache.""" 26 27 DATE_FORMAT = '%Y-%m-%dT%H:%M:%S' # ISO Date format 28 29 def __init__(self, url, vary_headers, max_age, etag, local_date, 30 last_modified, mime_type, item_mime_type, response_body): 31 """Create a new cache entry.""" 32 self.url = url 33 self.vary_headers = vary_headers 34 self.max_age = max_age 35 self.etag = etag 36 self.local_date = local_date 37 self.last_modified = last_modified 38 self.mime_type = mime_type 39 self.item_mime_type = item_mime_type 40 self.response_body = response_body 41 42 def matches_request(self, request): 43 """Determine if the cache entry matches the given request. 44 45 This is done by comparing the value of the headers field to the 46 headers in the request 47 """ 48 if self.vary_headers: 49 for header, value in six.iteritems(self.vary_headers): 50 if request.headers.get(header) != value: 51 return False 52 53 return True 54 55 def up_to_date(self): 56 """Determine if the cache entry is up to date.""" 57 if self.max_age is not None: 58 max_age = datetime.timedelta(seconds=self.max_age) 59 return self.local_date + max_age > datetime.datetime.now() 60 61 return True 62 63 64class HTTPResponse(object): 65 """An uncached HTTP response that can be read() more than once. 66 67 This is intended to be API-compatible with a urllib2 response object. This 68 allows a response to be read more than once. 69 """ 70 def __init__(self, response): 71 """Extract the data from a urllib2 HTTP response.""" 72 self.headers = response.info() 73 self.content = response.read() 74 self.code = response.getcode() 75 76 def info(self): 77 """Get the headers associated with the response.""" 78 return self.headers 79 80 def read(self): 81 """Get the content associated with the response.""" 82 return self.content 83 84 def getcode(self): 85 """Get the associated HTTP response code.""" 86 return self.code 87 88 89class CachedHTTPResponse(object): 90 """A response returned from the APICache. 91 92 This is intended to be API-compatible with a urllib2 response object. 93 """ 94 def __init__(self, cache_entry): 95 """Create a new CachedResponse from the given CacheEntry.""" 96 self.headers = { 97 'Content-Type': cache_entry.mime_type, 98 'Item-Content-Type': cache_entry.item_mime_type, 99 } 100 101 self.content = cache_entry.response_body 102 103 def info(self): 104 """Get the headers associated with the response.""" 105 return self.headers 106 107 def read(self): 108 """Get the content associated with the response.""" 109 return self.content 110 111 def getcode(self): 112 """Get the associated HTTP response code, which is always 200. 113 114 This method returns 200 because it is pretending that it made a 115 successful HTTP request. 116 """ 117 return 200 118 119 120class APICache(object): 121 """An API cache backed by a SQLite database.""" 122 123 # The format for the Expires: header. Requires an English locale. 124 EXPIRES_FORMAT = '%a, %d %b %Y %H:%M:%S %Z' 125 126 DEFAULT_CACHE_DIR = user_cache_dir('rbtools') 127 DEFAULT_CACHE_PATH = os.path.join(DEFAULT_CACHE_DIR, 'apicache.db') 128 129 # The API Cache's schema version. If the schema is updated, update this 130 # value. 131 SCHEMA_VERSION = 2 132 133 def __init__(self, create_db_in_memory=False, db_location=None, 134 urlopen=urlopen): 135 """Create a new instance of the APICache 136 137 If the db_path is provided, it will be used as the path to the SQLite 138 database; otherwise, the default cache (in the CACHE_DIR) will be used. 139 The urlopen parameter determines the method that is used to open URLs. 140 """ 141 self.urlopen = urlopen 142 143 if create_db_in_memory: 144 logging.debug('Creating API cache in memory.') 145 146 self.db = sqlite3.connect(':memory:') 147 self.cache_path = None 148 self._create_schema() 149 else: 150 self.cache_path = db_location or self.DEFAULT_CACHE_PATH 151 152 try: 153 cache_exists = os.path.exists(self.cache_path) 154 create_schema = True 155 156 if not cache_exists: 157 cache_dir = os.path.dirname(self.cache_path) 158 159 if not os.path.exists(cache_dir): 160 logging.debug('Cache directory "%s" does not exist; ' 161 'creating.', 162 cache_dir) 163 os.makedirs(cache_dir) 164 165 logging.debug('API cache "%s" does not exist; creating.', 166 self.cache_path) 167 168 self.db = sqlite3.connect(self.cache_path) 169 170 if cache_exists: 171 try: 172 with contextlib.closing(self.db.cursor()) as c: 173 c.execute('SELECT version FROM cache_info') 174 row = c.fetchone() 175 176 if row and row[0] == self.SCHEMA_VERSION: 177 create_schema = False 178 except sqlite3.Error as e: 179 self._die( 180 'Could not get the HTTP cache schema version', e) 181 182 if create_schema: 183 self._create_schema() 184 except (OSError, sqlite3.Error): 185 # OSError will be thrown if we cannot create the directory or 186 # file for the API cache. sqlite3.Error will be thrown if 187 # connect fails. In either case, HTTP requests can still be 188 # made, they will just passed through to the URL opener without 189 # attempting to interact with the API cache. 190 logging.warn('Could not create or access API cache "%s". Try ' 191 'running "rbt clear-cache" to clear the HTTP ' 192 'cache for the API.', 193 self.cache_path) 194 195 if self.db is not None: 196 self.db.row_factory = APICache._row_factory 197 198 def make_request(self, request): 199 """Perform the specified request. 200 201 If there is an up-to-date cached entry in our store, a CachedResponse 202 will be returned. Otherwise, The urlopen method will be used to 203 execute the request and a CachedResponse (if our entry is still up to 204 date) or a Response (if it is not) will be returned. 205 """ 206 if self.db is None or request.method != 'GET': 207 # We can only cache HTTP GET requests and only if we were able to 208 # access the API cache database. 209 return self.urlopen(request) 210 211 entry = self._get_entry(request) 212 213 if entry: 214 if entry.up_to_date(): 215 logging.debug('Cached response for HTTP GET %s up to date', 216 request.get_full_url()) 217 response = CachedHTTPResponse(entry) 218 else: 219 if entry.etag: 220 request.add_header('If-none-match', entry.etag) 221 222 if entry.last_modified: 223 request.add_header('If-modified-since', 224 entry.last_modified) 225 226 response = HTTPResponse(self.urlopen(request)) 227 228 if response.getcode() == 304: 229 logging.debug('Cached response for HTTP GET %s expired ' 230 'and was not modified', 231 request.get_full_url()) 232 entry.local_date = datetime.datetime.now() 233 self._save_entry(entry) 234 response = CachedHTTPResponse(entry) 235 elif 200 <= response.getcode() < 300: 236 logging.debug('Cached response for HTTP GET %s expired ' 237 'and was modified', 238 request.get_full_url()) 239 response_headers = response.info() 240 cache_info = self._get_caching_info(request.headers, 241 response_headers) 242 243 if cache_info: 244 entry.max_age = cache_info['max_age'] 245 entry.etag = cache_info['etag'] 246 entry.local_date = datetime.datetime.now() 247 entry.last_modified = cache_info['last_modified'] 248 249 entry.mime_type = response_headers['Content-Type'] 250 entry.item_mime_type = \ 251 response_headers.get('Item-Content-Type') 252 entry.response_body = response.read() 253 254 if entry.vary_headers != cache_info['vary_headers']: 255 # The Vary: header has changed since the last time 256 # we retrieved the resource so we need to remove 257 # the old cache entry and save the new one. 258 self._delete_entry(entry) 259 entry.vary_headers = cache_info['vary_headers'] 260 261 self._save_entry(entry) 262 else: 263 # This resource is no longer cache-able so we should 264 # delete our cached version. 265 logging.debug('Cached response for HTTP GET request ' 266 'to %s is no longer cacheable', 267 request.get_full_url()) 268 self._delete_entry(entry) 269 else: 270 response = HTTPResponse(self.urlopen(request)) 271 response_headers = response.info() 272 273 cache_info = self._get_caching_info(request.headers, 274 response_headers) 275 276 if cache_info: 277 self._save_entry(CacheEntry( 278 request.get_full_url(), 279 cache_info['vary_headers'], 280 cache_info['max_age'], 281 cache_info['etag'], 282 datetime.datetime.now(), 283 cache_info['last_modified'], 284 response_headers.get('Content-Type'), 285 response_headers.get('Item-Content-Type'), 286 response.read())) 287 288 logging.debug('Added cache entry for HTTP GET request to %s', 289 request.get_full_url()) 290 291 else: 292 logging.debug('HTTP GET request to %s cannot be cached', 293 request.get_full_url()) 294 295 return response 296 297 def _get_caching_info(self, request_headers, response_headers): 298 """Get the caching info for the response to the given request. 299 300 A dictionary with caching information is returned, or None if the 301 response cannot be cached. 302 """ 303 max_age = None 304 no_cache = False 305 306 expires = response_headers.get('Expires') 307 308 if expires: 309 # We switch to the C locale to parse the 'Expires' header because 310 # the formatting specifiers are locale specific and the header 311 # *must* be provided in English. After parsing the header, we 312 # restore the locale to the user's previous locale. 313 # 314 # We also note that changing the locale is not thread-safe so we 315 # use a lock around this. 316 with _locale_lock: 317 old_locale = locale.setlocale(locale.LC_TIME) 318 319 try: 320 # 'setlocale' requires the second parameter to be a 'str' 321 # in both Python 2.x and Python 3+. 322 locale.setlocale(locale.LC_TIME, str('C')) 323 expires = datetime.datetime.strptime(expires, 324 self.EXPIRES_FORMAT) 325 326 # We assign to max_age because the value of max-age in the 327 # Cache-Control header overrides the behaviour of the 328 # 'Expires' header. 329 now = datetime.datetime.now() 330 331 if expires < now: 332 max_age = 0 333 else: 334 max_age = (expires - now).seconds 335 except ValueError: 336 logging.error('The format of the "Expires" header (value ' 337 '%s) does not match the expected format.', 338 expires) 339 except locale.Error: 340 logging.error('The C locale is unavailable on this ' 341 'system. The "Expires" header cannot be ' 342 'parsed.') 343 finally: 344 locale.setlocale(locale.LC_TIME, old_locale) 345 346 # The value of the Cache-Control header is a list of comma separated 347 # values. We only care about some of them, notably max-age, no-cache, 348 # no-store, and must-revalidate. The other values are only applicable 349 # to intermediaries. 350 for kvp in self._split_csv(response_headers.get('Cache-Control', '')): 351 if kvp.startswith('max-age'): 352 max_age = int(kvp.split('=')[1].strip()) 353 elif kvp.startswith('no-cache'): 354 # The no-cache specifier optionally has an associated header 355 # that we shouldn't cache. However, the *only* headers we are 356 # caching are headers that describe the the cached content: 357 # Content-Type, and Item-Content-Type. 358 no_cache = True 359 elif kvp == 'no-store': 360 # If no-store is specified, we cannot cache anything about this 361 # resource. 362 return None 363 elif kvp == 'must-revalidate': 364 # We treat must-revalidate identical to no-cache because we are 365 # not an intermediary. 366 no_cache = True 367 368 # The Pragma: header is an obsolete header that may contain the value 369 # no-cache, which is equivalent to Cache-Control: no-cache. We check 370 # for it for posterity's sake. 371 if 'no-cache' in response_headers.get('Pragma', ''): 372 no_cache = True 373 374 etag = response_headers.get('ETag') 375 last_modified = response_headers.get('Last-Modified') 376 vary_headers = response_headers.get('Vary') 377 378 # The Vary header specifies a list of headers that *may* alter the 379 # returned response. The cached response can only be used when these 380 # headers have the same value as those provided in the request. 381 if vary_headers: 382 vary_headers = dict( 383 (header, request_headers.get(header)) 384 for header in self._split_csv(vary_headers) 385 ) 386 else: 387 vary_headers = {} 388 389 if no_cache: 390 # If no-cache is specified, the resource must always be requested, 391 # so we will treat this as if the max_age is zero. 392 max_age = 0 393 394 if no_cache and not etag and not last_modified: 395 # We have no information with which to provide the server to check 396 # if our content is up to date. Therefore, the information cannot 397 # be cached. 398 return None 399 400 return { 401 'max_age': max_age, 402 'etag': etag, 403 'last_modified': last_modified, 404 'vary_headers': vary_headers 405 } 406 407 def _create_schema(self): 408 """Create the schema for the API cache database.""" 409 try: 410 with contextlib.closing(self.db.cursor()) as c: 411 c.execute('DROP TABLE IF EXISTS api_cache') 412 c.execute('DROP TABLE IF EXISTS cache_info') 413 414 c.execute('''CREATE TABLE api_cache( 415 url TEXT, 416 vary_headers TEXT, 417 max_age INTEGER, 418 etag TEXT, 419 local_date TEXT, 420 last_modified TEXT, 421 mime_type TEXT, 422 item_mime_type TEXT, 423 response_body BLOB, 424 PRIMARY KEY(url, vary_headers) 425 )''') 426 427 c.execute('CREATE TABLE cache_info(version INTEGER)') 428 429 c.execute('INSERT INTO cache_info(version) VALUES(?)', 430 (self.SCHEMA_VERSION,)) 431 432 self._write_db() 433 except sqlite3.Error as e: 434 self._die('Could not create database schema for the HTTP cache', e) 435 436 def _get_entry(self, request): 437 """Find an entry in the API cache store that matches the request. 438 439 If no such cache entry exists, this returns None. 440 """ 441 url = request.get_full_url() 442 443 try: 444 with contextlib.closing(self.db.cursor()) as c: 445 for row in c.execute('SELECT * FROM api_cache WHERE url=?', 446 (url,)): 447 if row.matches_request(request): 448 return row 449 except sqlite3.Error as e: 450 self._die('Could not retrieve an entry from the HTTP cache', e) 451 452 return None 453 454 def _save_entry(self, entry): 455 """Save the entry into the store. 456 457 If the entry already exists in the store, do an UPDATE; otherwise do an 458 INSERT. This does not commit to the database. 459 """ 460 vary_headers = json.dumps(entry.vary_headers) 461 local_date = entry.local_date.strftime(entry.DATE_FORMAT) 462 463 try: 464 with contextlib.closing(self.db.cursor()) as c: 465 try: 466 c.execute('''INSERT INTO api_cache (url, 467 vary_headers, 468 max_age, 469 etag, 470 local_date, 471 last_modified, 472 mime_type, 473 item_mime_type, 474 response_body) 475 VALUES(?,?,?,?,?,?,?,?,?)''', 476 (entry.url, vary_headers, entry.max_age, 477 entry.etag, local_date, entry.last_modified, 478 entry.mime_type, entry.item_mime_type, 479 sqlite3.Binary(entry.response_body))) 480 except sqlite3.IntegrityError: 481 c.execute('''UPDATE api_cache 482 SET max_age=?, 483 etag=?, 484 local_date=?, 485 last_modified=?, 486 mime_type=?, 487 item_mime_type=?, 488 response_body=? 489 WHERE url=? AND vary_headers=?''', 490 (entry.max_age, entry.etag, local_date, 491 entry.last_modified, entry.mime_type, 492 entry.item_mime_type, 493 sqlite3.Binary(entry.response_body), entry.url, 494 vary_headers)) 495 496 self._write_db() 497 except sqlite3.Error as e: 498 self._die('Could not write entry to the HTTP cache for the API', e) 499 500 def _delete_entry(self, entry): 501 """Remove the entry from the store.""" 502 try: 503 with contextlib.closing(self.db.cursor()) as c: 504 c.execute( 505 'DELETE FROM api_cache WHERE URL=? AND vary_headers=?', 506 (entry.url, json.dumps(entry.vary_headers))) 507 508 self._write_db() 509 except sqlite3.Error as e: 510 self._die('Could not delete entry from the HTTP cache for the API', 511 e) 512 513 @staticmethod 514 def _row_factory(cursor, row): 515 """A factory for creating individual Cache Entries from db rows.""" 516 return CacheEntry( 517 url=row[0], 518 vary_headers=json.loads(row[1]), 519 max_age=row[2], 520 etag=row[3], 521 local_date=datetime.datetime.strptime(row[4], 522 CacheEntry.DATE_FORMAT), 523 last_modified=row[5], 524 mime_type=row[6], 525 item_mime_type=row[7], 526 response_body=six.binary_type(row[8]), 527 ) 528 529 def _write_db(self): 530 """Flush the contents of the DB to the disk.""" 531 if self.db: 532 try: 533 self.db.commit() 534 except sqlite3.Error as e: 535 self._die('Could not write database to disk', e) 536 537 def _die(self, message, inner_exception): 538 """Build an appropriate CacheError and raise it.""" 539 message = '%s: %s.' % (message, inner_exception) 540 541 if self.cache_path: 542 if self.cache_path == APICache.DEFAULT_CACHE_PATH: 543 cache_args = '' 544 else: 545 cache_args = ' --cache-location %s' % self.cache_path 546 547 message += (' Try running "rbt clear-cache%s" to manually clear ' 548 'the HTTP Cache for the API.' 549 % cache_args) 550 551 raise CacheError(message) 552 553 def _split_csv(self, csvline): 554 """Split a line of comma-separated values into a list.""" 555 return [ 556 s.strip() 557 for s in csvline.split(',') 558 ] 559 560 561def clear_cache(cache_path=APICache.DEFAULT_CACHE_PATH): 562 """Delete the HTTP cache used for the API.""" 563 try: 564 os.unlink(cache_path) 565 print('Cleared cache in "%s"' % cache_path) 566 except Exception as e: 567 logging.error('Could not clear cache in "%s": %s. Try manually ' 568 'removing it if it exists.', 569 cache_path, e) 570