1# Copyright 2014-2017 Insight Software Consortium.
2# Copyright 2004-2009 Roman Yakovenko.
3# Distributed under the Boost Software License, Version 1.0.
4# See http://www.boost.org/LICENSE_1_0.txt
5
6import os
7import timeit
8import hashlib
9try:
10    import cPickle as pickle
11except ImportError:
12    import pickle
13from pygccxml import utils
14from . import config as cxx_parsers_cfg
15
16
17def file_signature(filename):
18    """
19    Return a signature for a file.
20
21    """
22
23    if not os.path.isfile(filename):
24        return None
25    if not os.path.exists(filename):
26        return None
27
28    # Duplicate auto-generated files can be recognized with the sha1 hash.
29    sig = hashlib.sha1()
30    with open(filename, "rb") as f:
31        buf = f.read()
32        sig.update(buf)
33
34    return sig.hexdigest()
35
36
37def configuration_signature(config):
38    """
39    Return a signature for a configuration (xml_generator_configuration_t)
40    object.
41
42    This can then be used as a key in the cache.
43    This method must take into account anything about
44    a configuration that could cause the declarations generated
45    to be different between runs.
46
47    """
48
49    sig = hashlib.sha1()
50    if isinstance(config, cxx_parsers_cfg.xml_generator_configuration_t):
51        sig.update(str(config.xml_generator_path).encode())
52    sig.update(str(config.working_directory).encode('utf-8'))
53    if isinstance(config, cxx_parsers_cfg.xml_generator_configuration_t):
54        sig.update(str(config.cflags).encode('utf-8'))
55    for p in config.include_paths:
56        sig.update(str(p).encode('utf-8'))
57    for s in config.define_symbols:
58        sig.update(str(s).encode('utf-8'))
59    for u in config.undefine_symbols:
60        sig.update(str(u).encode('utf-8'))
61    return sig.hexdigest()
62
63
64class cache_base_t(object):
65    logger = utils.loggers.declarations_cache
66
67    def __init__(self):
68        object.__init__(self)
69
70    def flush(self):
71        """ Flush (write out) the cache to disk if needed. """
72
73        raise NotImplementedError()
74
75    def update(self, source_file, configuration, declarations, included_files):
76        """
77        update cache entry
78
79        :param source_file: path to the C++ source file being parsed
80        :param configuration: configuration used in
81               parsing :class:`xml_generator_configuration_t`
82        :param declarations: declaration tree found when parsing
83        :param included_files: files included by parsing.
84        """
85
86        raise NotImplementedError()
87
88    def cached_value(self, source_file, configuration):
89        """
90        Return declarations, we have cached, for the source_file and the
91        given configuration.
92
93        :param source_file: path to the C++ source file being parsed.
94        :param configuration: configuration that was used for parsing.
95
96        """
97
98        raise NotImplementedError()
99
100
101class record_t(object):
102
103    def __init__(
104            self,
105            source_signature,
106            config_signature,
107            included_files,
108            included_files_signature,
109            declarations):
110        self.__source_signature = source_signature
111        self.__config_signature = config_signature
112        self.__included_files = included_files
113        self.__included_files_signature = included_files_signature
114        self.__declarations = declarations
115        self.__was_hit = True  # Track if there was a cache hit
116
117    @property
118    def was_hit(self):
119        return self.__was_hit
120
121    @was_hit.setter
122    def was_hit(self, was_hit):
123        self.__was_hit = was_hit
124
125    def key(self):
126        return self.__source_signature, self.__config_signature
127
128    @staticmethod
129    def create_key(source_file, configuration):
130        return (
131            file_signature(source_file),
132            configuration_signature(configuration))
133
134    @property
135    def source_signature(self):
136        return self.__source_signature
137
138    @property
139    def config_signature(self):
140        return self.__config_signature
141
142    @property
143    def included_files(self):
144        return self.__included_files
145
146    @property
147    def included_files_signature(self):
148        return self.__included_files_signature
149
150    @property
151    def declarations(self):
152        return self.__declarations
153
154
155class file_cache_t(cache_base_t):
156
157    """ Cache implementation to store data in a pickled form in a file.
158        This class contains some cache logic that keeps track of which entries
159        have been 'hit' in the cache and if an entry has not been hit then
160        it is deleted at the time of the flush().  This keeps the cache from
161        growing larger when files change and are not used again.
162    """
163
164    def __init__(self, name):
165        """
166        :param name: name of the cache file.
167        """
168
169        cache_base_t.__init__(self)
170        self.__name = name  # Name of cache file
171        # Map record_key to record_t
172        self.__cache = self.__load(self.__name)
173        self.__needs_flushed = not bool(
174            self.__cache)  # If empty then we need to flush
175        for entry in self.__cache.values():  # Clear hit flags
176            entry.was_hit = False
177
178    @staticmethod
179    def __load(file_name):
180        """ Load pickled cache from file and return the object. """
181
182        if os.path.exists(file_name) and not os.path.isfile(file_name):
183            raise RuntimeError(
184                'Cache should be initialized with valid full file name')
185        if not os.path.exists(file_name):
186            open(file_name, 'w+b').close()
187            return {}
188        cache_file_obj = open(file_name, 'rb')
189        try:
190            file_cache_t.logger.info('Loading cache file "%s".', file_name)
191            start_time = timeit.default_timer()
192            cache = pickle.load(cache_file_obj)
193            file_cache_t.logger.debug(
194                "Cache file has been loaded in %.1f secs",
195                (timeit.default_timer() - start_time))
196            file_cache_t.logger.debug(
197                "Found cache in file: [%s]  entries: %s",
198                file_name, len(list(cache.keys())))
199        except (pickle.UnpicklingError, AttributeError, EOFError,
200                ImportError, IndexError) as error:
201            file_cache_t.logger.exception(
202                "Error occurred while reading cache file: %s",
203                error)
204            cache_file_obj.close()
205            file_cache_t.logger.info(
206                "Invalid cache file: [%s]  Regenerating.",
207                file_name)
208            open(file_name, 'w+b').close()   # Create empty file
209            cache = {}                       # Empty cache
210        finally:
211            cache_file_obj.close()
212        return cache
213
214    def flush(self):
215        # If not marked as needing flushed, then return immediately
216        if not self.__needs_flushed:
217            self.logger.debug("Cache did not change, ignoring flush.")
218            return
219
220        # Remove entries that did not get a cache hit
221        num_removed = 0
222        for key in list(self.__cache.keys()):
223            if not self.__cache[key].was_hit:
224                num_removed += 1
225                del self.__cache[key]
226        if num_removed > 0:
227            self.logger.debug(
228                "There are %s removed entries from cache.",
229                num_removed)
230        # Save out the cache to disk
231        with open(self.__name, "w+b") as cache_file:
232            pickle.dump(self.__cache, cache_file, pickle.HIGHEST_PROTOCOL)
233
234    def update(self, source_file, configuration, declarations, included_files):
235        """ Update a cached record with the current key and value contents. """
236
237        record = record_t(
238            source_signature=file_signature(source_file),
239            config_signature=configuration_signature(configuration),
240            included_files=included_files,
241            included_files_signature=list(
242                map(
243                    file_signature,
244                    included_files)),
245            declarations=declarations)
246        # Switched over to holding full record in cache so we don't have
247        # to keep creating records in the next method.
248        self.__cache[record.key()] = record
249        self.__cache[record.key()].was_hit = True
250        self.__needs_flushed = True
251
252    def cached_value(self, source_file, configuration):
253        """
254        Attempt to lookup the cached declarations for the given file and
255        configuration.
256
257        Returns None if declaration not found or signature check fails.
258
259        """
260
261        key = record_t.create_key(source_file, configuration)
262        if key not in self.__cache:
263            return None
264        record = self.__cache[key]
265        if self.__is_valid_signature(record):
266            record.was_hit = True  # Record cache hit
267            return record.declarations
268
269        # some file has been changed
270        del self.__cache[key]
271        return None
272
273    @staticmethod
274    def __is_valid_signature(record):
275        for index, included_file in enumerate(record.included_files):
276            if file_signature(included_file) != \
277                    record.included_files_signature[index]:
278                return False
279        return True
280
281
282class dummy_cache_t(cache_base_t):
283
284    """
285    This is an empty cache object.
286
287    By default no caching is enabled in pygccxml.
288
289    """
290
291    def __init__(self):
292        cache_base_t.__init__(self)
293
294    def flush(self):
295        pass
296
297    def update(self, source_file, configuration, declarations, included_files):
298        pass
299
300    def cached_value(self, source_file, configuration):
301        return None
302