1from __future__ import print_function, unicode_literals
2
3import contextlib
4import datetime
5import json
6import locale
7import logging
8import os
9import sqlite3
10import threading
11
12import six
13from six.moves.urllib.request import urlopen
14
15from rbtools.api.errors import CacheError
16from rbtools.utils.appdirs import user_cache_dir
17
18
19MINIMUM_VERSION = '2.0.14'  # Minimum server version to enable the API cache.
20
21_locale_lock = threading.Lock()  # Lock for getting / setting locale.
22
23
24class CacheEntry(object):
25    """An entry in the API Cache."""
26
27    DATE_FORMAT = '%Y-%m-%dT%H:%M:%S'  # ISO Date format
28
29    def __init__(self, url, vary_headers, max_age, etag, local_date,
30                 last_modified, mime_type, item_mime_type, response_body):
31        """Create a new cache entry."""
32        self.url = url
33        self.vary_headers = vary_headers
34        self.max_age = max_age
35        self.etag = etag
36        self.local_date = local_date
37        self.last_modified = last_modified
38        self.mime_type = mime_type
39        self.item_mime_type = item_mime_type
40        self.response_body = response_body
41
42    def matches_request(self, request):
43        """Determine if the cache entry matches the given request.
44
45        This is done by comparing the value of the headers field to the
46        headers in the request
47        """
48        if self.vary_headers:
49            for header, value in six.iteritems(self.vary_headers):
50                if request.headers.get(header) != value:
51                    return False
52
53        return True
54
55    def up_to_date(self):
56        """Determine if the cache entry is up to date."""
57        if self.max_age is not None:
58            max_age = datetime.timedelta(seconds=self.max_age)
59            return self.local_date + max_age > datetime.datetime.now()
60
61        return True
62
63
64class HTTPResponse(object):
65    """An uncached HTTP response that can be read() more than once.
66
67    This is intended to be API-compatible with a urllib2 response object. This
68    allows a response to be read more than once.
69    """
70    def __init__(self, response):
71        """Extract the data from a urllib2 HTTP response."""
72        self.headers = response.info()
73        self.content = response.read()
74        self.code = response.getcode()
75
76    def info(self):
77        """Get the headers associated with the response."""
78        return self.headers
79
80    def read(self):
81        """Get the content associated with the response."""
82        return self.content
83
84    def getcode(self):
85        """Get the associated HTTP response code."""
86        return self.code
87
88
89class CachedHTTPResponse(object):
90    """A response returned from the APICache.
91
92    This is intended to be API-compatible with a urllib2 response object.
93    """
94    def __init__(self, cache_entry):
95        """Create a new CachedResponse from the given CacheEntry."""
96        self.headers = {
97            'Content-Type': cache_entry.mime_type,
98            'Item-Content-Type': cache_entry.item_mime_type,
99        }
100
101        self.content = cache_entry.response_body
102
103    def info(self):
104        """Get the headers associated with the response."""
105        return self.headers
106
107    def read(self):
108        """Get the content associated with the response."""
109        return self.content
110
111    def getcode(self):
112        """Get the associated HTTP response code, which is always 200.
113
114        This method returns 200 because it is pretending that it made a
115        successful HTTP request.
116        """
117        return 200
118
119
120class APICache(object):
121    """An API cache backed by a SQLite database."""
122
123    # The format for the Expires: header. Requires an English locale.
124    EXPIRES_FORMAT = '%a, %d %b %Y %H:%M:%S %Z'
125
126    DEFAULT_CACHE_DIR = user_cache_dir('rbtools')
127    DEFAULT_CACHE_PATH = os.path.join(DEFAULT_CACHE_DIR, 'apicache.db')
128
129    # The API Cache's schema version. If the schema is updated, update this
130    # value.
131    SCHEMA_VERSION = 2
132
133    def __init__(self, create_db_in_memory=False, db_location=None,
134                 urlopen=urlopen):
135        """Create a new instance of the APICache
136
137        If the db_path is provided, it will be used as the path to the SQLite
138        database; otherwise, the default cache (in the CACHE_DIR) will be used.
139        The urlopen parameter determines the method that is used to open URLs.
140        """
141        self.urlopen = urlopen
142
143        if create_db_in_memory:
144            logging.debug('Creating API cache in memory.')
145
146            self.db = sqlite3.connect(':memory:')
147            self.cache_path = None
148            self._create_schema()
149        else:
150            self.cache_path = db_location or self.DEFAULT_CACHE_PATH
151
152            try:
153                cache_exists = os.path.exists(self.cache_path)
154                create_schema = True
155
156                if not cache_exists:
157                    cache_dir = os.path.dirname(self.cache_path)
158
159                    if not os.path.exists(cache_dir):
160                        logging.debug('Cache directory "%s" does not exist; '
161                                      'creating.',
162                                      cache_dir)
163                        os.makedirs(cache_dir)
164
165                    logging.debug('API cache "%s" does not exist; creating.',
166                                  self.cache_path)
167
168                self.db = sqlite3.connect(self.cache_path)
169
170                if cache_exists:
171                    try:
172                        with contextlib.closing(self.db.cursor()) as c:
173                            c.execute('SELECT version FROM cache_info')
174                            row = c.fetchone()
175
176                            if row and row[0] == self.SCHEMA_VERSION:
177                                create_schema = False
178                    except sqlite3.Error as e:
179                        self._die(
180                            'Could not get the HTTP cache schema version', e)
181
182                if create_schema:
183                    self._create_schema()
184            except (OSError, sqlite3.Error):
185                # OSError will be thrown if we cannot create the directory or
186                # file for the API cache. sqlite3.Error will be thrown if
187                # connect fails. In either case, HTTP requests can still be
188                # made, they will just passed through to the URL opener without
189                # attempting to interact with the API cache.
190                logging.warn('Could not create or access API cache "%s". Try '
191                             'running "rbt clear-cache" to clear the HTTP '
192                             'cache for the API.',
193                             self.cache_path)
194
195        if self.db is not None:
196            self.db.row_factory = APICache._row_factory
197
198    def make_request(self, request):
199        """Perform the specified request.
200
201        If there is an up-to-date cached entry in our store, a CachedResponse
202        will be returned. Otherwise, The urlopen method will be used to
203        execute the request and a CachedResponse (if our entry is still up to
204        date) or a Response (if it is not) will be returned.
205        """
206        if self.db is None or request.method != 'GET':
207            # We can only cache HTTP GET requests and only if we were able to
208            # access the API cache database.
209            return self.urlopen(request)
210
211        entry = self._get_entry(request)
212
213        if entry:
214            if entry.up_to_date():
215                logging.debug('Cached response for HTTP GET %s up to date',
216                              request.get_full_url())
217                response = CachedHTTPResponse(entry)
218            else:
219                if entry.etag:
220                    request.add_header('If-none-match', entry.etag)
221
222                if entry.last_modified:
223                    request.add_header('If-modified-since',
224                                       entry.last_modified)
225
226                response = HTTPResponse(self.urlopen(request))
227
228                if response.getcode() == 304:
229                    logging.debug('Cached response for HTTP GET %s expired '
230                                  'and was not modified',
231                                  request.get_full_url())
232                    entry.local_date = datetime.datetime.now()
233                    self._save_entry(entry)
234                    response = CachedHTTPResponse(entry)
235                elif 200 <= response.getcode() < 300:
236                    logging.debug('Cached response for HTTP GET %s expired '
237                                  'and was modified',
238                                  request.get_full_url())
239                    response_headers = response.info()
240                    cache_info = self._get_caching_info(request.headers,
241                                                        response_headers)
242
243                    if cache_info:
244                        entry.max_age = cache_info['max_age']
245                        entry.etag = cache_info['etag']
246                        entry.local_date = datetime.datetime.now()
247                        entry.last_modified = cache_info['last_modified']
248
249                        entry.mime_type = response_headers['Content-Type']
250                        entry.item_mime_type = \
251                            response_headers.get('Item-Content-Type')
252                        entry.response_body = response.read()
253
254                        if entry.vary_headers != cache_info['vary_headers']:
255                            # The Vary: header has changed since the last time
256                            # we retrieved the resource so we need to remove
257                            # the old cache entry and save the new one.
258                            self._delete_entry(entry)
259                            entry.vary_headers = cache_info['vary_headers']
260
261                        self._save_entry(entry)
262                    else:
263                        # This resource is no longer cache-able so we should
264                        # delete our cached version.
265                        logging.debug('Cached response for HTTP GET request '
266                                      'to %s is no longer cacheable',
267                                      request.get_full_url())
268                        self._delete_entry(entry)
269        else:
270            response = HTTPResponse(self.urlopen(request))
271            response_headers = response.info()
272
273            cache_info = self._get_caching_info(request.headers,
274                                                response_headers)
275
276            if cache_info:
277                self._save_entry(CacheEntry(
278                    request.get_full_url(),
279                    cache_info['vary_headers'],
280                    cache_info['max_age'],
281                    cache_info['etag'],
282                    datetime.datetime.now(),
283                    cache_info['last_modified'],
284                    response_headers.get('Content-Type'),
285                    response_headers.get('Item-Content-Type'),
286                    response.read()))
287
288                logging.debug('Added cache entry for HTTP GET request to %s',
289                              request.get_full_url())
290
291            else:
292                logging.debug('HTTP GET request to %s cannot be cached',
293                              request.get_full_url())
294
295        return response
296
297    def _get_caching_info(self, request_headers, response_headers):
298        """Get the caching info for the response to the given request.
299
300        A dictionary with caching information is returned, or None if the
301        response cannot be cached.
302        """
303        max_age = None
304        no_cache = False
305
306        expires = response_headers.get('Expires')
307
308        if expires:
309            # We switch to the C locale to parse the 'Expires' header because
310            # the formatting specifiers are locale specific and the header
311            # *must* be provided in English. After parsing the header, we
312            # restore the locale to the user's previous locale.
313            #
314            # We also note that changing the locale is not thread-safe so we
315            # use a lock around this.
316            with _locale_lock:
317                old_locale = locale.setlocale(locale.LC_TIME)
318
319                try:
320                    # 'setlocale' requires the second parameter to be a 'str'
321                    # in both Python 2.x and Python 3+.
322                    locale.setlocale(locale.LC_TIME, str('C'))
323                    expires = datetime.datetime.strptime(expires,
324                                                         self.EXPIRES_FORMAT)
325
326                    # We assign to max_age because the value of max-age in the
327                    # Cache-Control header overrides the behaviour of the
328                    # 'Expires' header.
329                    now = datetime.datetime.now()
330
331                    if expires < now:
332                        max_age = 0
333                    else:
334                        max_age = (expires - now).seconds
335                except ValueError:
336                    logging.error('The format of the "Expires" header (value '
337                                  '%s) does not match the expected format.',
338                                  expires)
339                except locale.Error:
340                    logging.error('The C locale is unavailable on this '
341                                  'system. The "Expires" header cannot be '
342                                  'parsed.')
343                finally:
344                    locale.setlocale(locale.LC_TIME, old_locale)
345
346        # The value of the Cache-Control header is a list of comma separated
347        # values. We only care about some of them, notably max-age, no-cache,
348        # no-store, and must-revalidate. The other values are only applicable
349        # to intermediaries.
350        for kvp in self._split_csv(response_headers.get('Cache-Control', '')):
351            if kvp.startswith('max-age'):
352                max_age = int(kvp.split('=')[1].strip())
353            elif kvp.startswith('no-cache'):
354                # The no-cache specifier optionally has an associated header
355                # that we shouldn't cache. However, the *only* headers we are
356                # caching are headers that describe the the cached content:
357                # Content-Type, and Item-Content-Type.
358                no_cache = True
359            elif kvp == 'no-store':
360                # If no-store is specified, we cannot cache anything about this
361                # resource.
362                return None
363            elif kvp == 'must-revalidate':
364                # We treat must-revalidate identical to no-cache because we are
365                # not an intermediary.
366                no_cache = True
367
368        # The Pragma: header is an obsolete header that may contain the value
369        # no-cache, which is equivalent to Cache-Control: no-cache. We check
370        # for it for posterity's sake.
371        if 'no-cache' in response_headers.get('Pragma', ''):
372            no_cache = True
373
374        etag = response_headers.get('ETag')
375        last_modified = response_headers.get('Last-Modified')
376        vary_headers = response_headers.get('Vary')
377
378        # The Vary header specifies a list of headers that *may* alter the
379        # returned response. The cached response can only be used when these
380        # headers have the same value as those provided in the request.
381        if vary_headers:
382            vary_headers = dict(
383                (header, request_headers.get(header))
384                for header in self._split_csv(vary_headers)
385            )
386        else:
387            vary_headers = {}
388
389        if no_cache:
390            # If no-cache is specified, the resource must always be requested,
391            # so we will treat this as if the max_age is zero.
392            max_age = 0
393
394        if no_cache and not etag and not last_modified:
395            # We have no information with which to provide the server to check
396            # if our content is up to date. Therefore, the information cannot
397            # be cached.
398            return None
399
400        return {
401            'max_age': max_age,
402            'etag': etag,
403            'last_modified': last_modified,
404            'vary_headers': vary_headers
405        }
406
407    def _create_schema(self):
408        """Create the schema for the API cache database."""
409        try:
410            with contextlib.closing(self.db.cursor()) as c:
411                c.execute('DROP TABLE IF EXISTS api_cache')
412                c.execute('DROP TABLE IF EXISTS cache_info')
413
414                c.execute('''CREATE TABLE api_cache(
415                                 url            TEXT,
416                                 vary_headers   TEXT,
417                                 max_age        INTEGER,
418                                 etag           TEXT,
419                                 local_date     TEXT,
420                                 last_modified  TEXT,
421                                 mime_type      TEXT,
422                                 item_mime_type TEXT,
423                                 response_body  BLOB,
424                                 PRIMARY KEY(url, vary_headers)
425                             )''')
426
427                c.execute('CREATE TABLE cache_info(version INTEGER)')
428
429                c.execute('INSERT INTO cache_info(version) VALUES(?)',
430                          (self.SCHEMA_VERSION,))
431
432            self._write_db()
433        except sqlite3.Error as e:
434            self._die('Could not create database schema for the HTTP cache', e)
435
436    def _get_entry(self, request):
437        """Find an entry in the API cache store that matches the request.
438
439        If no such cache entry exists, this returns None.
440        """
441        url = request.get_full_url()
442
443        try:
444            with contextlib.closing(self.db.cursor()) as c:
445                for row in c.execute('SELECT * FROM api_cache WHERE url=?',
446                                     (url,)):
447                    if row.matches_request(request):
448                        return row
449        except sqlite3.Error as e:
450            self._die('Could not retrieve an entry from the HTTP cache', e)
451
452        return None
453
454    def _save_entry(self, entry):
455        """Save the entry into the store.
456
457        If the entry already exists in the store, do an UPDATE; otherwise do an
458        INSERT. This does not commit to the database.
459        """
460        vary_headers = json.dumps(entry.vary_headers)
461        local_date = entry.local_date.strftime(entry.DATE_FORMAT)
462
463        try:
464            with contextlib.closing(self.db.cursor()) as c:
465                try:
466                    c.execute('''INSERT INTO api_cache (url,
467                                                        vary_headers,
468                                                        max_age,
469                                                        etag,
470                                                        local_date,
471                                                        last_modified,
472                                                        mime_type,
473                                                        item_mime_type,
474                                                        response_body)
475                                 VALUES(?,?,?,?,?,?,?,?,?)''',
476                              (entry.url, vary_headers, entry.max_age,
477                               entry.etag, local_date, entry.last_modified,
478                               entry.mime_type, entry.item_mime_type,
479                               sqlite3.Binary(entry.response_body)))
480                except sqlite3.IntegrityError:
481                    c.execute('''UPDATE api_cache
482                                 SET max_age=?,
483                                     etag=?,
484                                     local_date=?,
485                                     last_modified=?,
486                                     mime_type=?,
487                                     item_mime_type=?,
488                                     response_body=?
489                                 WHERE url=? AND vary_headers=?''',
490                              (entry.max_age, entry.etag, local_date,
491                               entry.last_modified, entry.mime_type,
492                               entry.item_mime_type,
493                               sqlite3.Binary(entry.response_body), entry.url,
494                               vary_headers))
495
496            self._write_db()
497        except sqlite3.Error as e:
498            self._die('Could not write entry to the HTTP cache for the API', e)
499
500    def _delete_entry(self, entry):
501        """Remove the entry from the store."""
502        try:
503            with contextlib.closing(self.db.cursor()) as c:
504                c.execute(
505                    'DELETE FROM api_cache WHERE URL=? AND vary_headers=?',
506                    (entry.url, json.dumps(entry.vary_headers)))
507
508            self._write_db()
509        except sqlite3.Error as e:
510            self._die('Could not delete entry from the HTTP cache for the API',
511                      e)
512
513    @staticmethod
514    def _row_factory(cursor, row):
515        """A factory for creating individual Cache Entries from db rows."""
516        return CacheEntry(
517            url=row[0],
518            vary_headers=json.loads(row[1]),
519            max_age=row[2],
520            etag=row[3],
521            local_date=datetime.datetime.strptime(row[4],
522                                                  CacheEntry.DATE_FORMAT),
523            last_modified=row[5],
524            mime_type=row[6],
525            item_mime_type=row[7],
526            response_body=six.binary_type(row[8]),
527        )
528
529    def _write_db(self):
530        """Flush the contents of the DB to the disk."""
531        if self.db:
532            try:
533                self.db.commit()
534            except sqlite3.Error as e:
535                self._die('Could not write database to disk', e)
536
537    def _die(self, message, inner_exception):
538        """Build an appropriate CacheError and raise it."""
539        message = '%s: %s.' % (message, inner_exception)
540
541        if self.cache_path:
542            if self.cache_path == APICache.DEFAULT_CACHE_PATH:
543                cache_args = ''
544            else:
545                cache_args = ' --cache-location %s' % self.cache_path
546
547            message += (' Try running "rbt clear-cache%s" to manually clear '
548                        'the HTTP Cache for the API.'
549                        % cache_args)
550
551        raise CacheError(message)
552
553    def _split_csv(self, csvline):
554        """Split a line of comma-separated values into a list."""
555        return [
556            s.strip()
557            for s in csvline.split(',')
558        ]
559
560
561def clear_cache(cache_path=APICache.DEFAULT_CACHE_PATH):
562    """Delete the HTTP cache used for the API."""
563    try:
564        os.unlink(cache_path)
565        print('Cleared cache in "%s"' % cache_path)
566    except Exception as e:
567        logging.error('Could not clear cache in "%s": %s. Try manually '
568                      'removing it if it exists.',
569                      cache_path, e)
570