1# lru_cache.py -- Simple LRU cache for dulwich
2# Copyright (C) 2006, 2008 Canonical Ltd
3#
4# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
5# General Public License as public by the Free Software Foundation; version 2.0
6# or (at your option) any later version. You can redistribute it and/or
7# modify it under the terms of either of these two licenses.
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15# You should have received a copy of the licenses; if not, see
16# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
17# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
18# License, Version 2.0.
19#
20
21"""A simple least-recently-used (LRU) cache."""
22
23_null_key = object()
24
25
26class _LRUNode(object):
27    """This maintains the linked-list which is the lru internals."""
28
29    __slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size')
30
31    def __init__(self, key, value, cleanup=None):
32        self.prev = None
33        self.next_key = _null_key
34        self.key = key
35        self.value = value
36        self.cleanup = cleanup
37        # TODO: We could compute this 'on-the-fly' like we used to, and remove
38        #       one pointer from this object, we just need to decide if it
39        #       actually costs us much of anything in normal usage
40        self.size = None
41
42    def __repr__(self):
43        if self.prev is None:
44            prev_key = None
45        else:
46            prev_key = self.prev.key
47        return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key,
48                                     self.next_key, prev_key)
49
50    def run_cleanup(self):
51        if self.cleanup is not None:
52            self.cleanup(self.key, self.value)
53        self.cleanup = None
54        # Just make sure to break any refcycles, etc
55        self.value = None
56
57
58class LRUCache(object):
59    """A class which manages a cache of entries, removing unused ones."""
60
61    def __init__(self, max_cache=100, after_cleanup_count=None):
62        self._cache = {}
63        # The "HEAD" of the lru linked list
64        self._most_recently_used = None
65        # The "TAIL" of the lru linked list
66        self._least_recently_used = None
67        self._update_max_cache(max_cache, after_cleanup_count)
68
69    def __contains__(self, key):
70        return key in self._cache
71
72    def __getitem__(self, key):
73        cache = self._cache
74        node = cache[key]
75        # Inlined from _record_access to decrease the overhead of __getitem__
76        # We also have more knowledge about structure if __getitem__ is
77        # succeeding, then we know that self._most_recently_used must not be
78        # None, etc.
79        mru = self._most_recently_used
80        if node is mru:
81            # Nothing to do, this node is already at the head of the queue
82            return node.value
83        # Remove this node from the old location
84        node_prev = node.prev
85        next_key = node.next_key
86        # benchmarking shows that the lookup of _null_key in globals is faster
87        # than the attribute lookup for (node is self._least_recently_used)
88        if next_key is _null_key:
89            # 'node' is the _least_recently_used, because it doesn't have a
90            # 'next' item. So move the current lru to the previous node.
91            self._least_recently_used = node_prev
92        else:
93            node_next = cache[next_key]
94            node_next.prev = node_prev
95        node_prev.next_key = next_key
96        # Insert this node at the front of the list
97        node.next_key = mru.key
98        mru.prev = node
99        self._most_recently_used = node
100        node.prev = None
101        return node.value
102
103    def __len__(self):
104        return len(self._cache)
105
106    def _walk_lru(self):
107        """Walk the LRU list, only meant to be used in tests."""
108        node = self._most_recently_used
109        if node is not None:
110            if node.prev is not None:
111                raise AssertionError('the _most_recently_used entry is not'
112                                     ' supposed to have a previous entry'
113                                     ' %s' % (node,))
114        while node is not None:
115            if node.next_key is _null_key:
116                if node is not self._least_recently_used:
117                    raise AssertionError('only the last node should have'
118                                         ' no next value: %s' % (node,))
119                node_next = None
120            else:
121                node_next = self._cache[node.next_key]
122                if node_next.prev is not node:
123                    raise AssertionError('inconsistency found, node.next.prev'
124                                         ' != node: %s' % (node,))
125            if node.prev is None:
126                if node is not self._most_recently_used:
127                    raise AssertionError('only the _most_recently_used should'
128                                         ' not have a previous node: %s'
129                                         % (node,))
130            else:
131                if node.prev.next_key != node.key:
132                    raise AssertionError('inconsistency found, node.prev.next'
133                                         ' != node: %s' % (node,))
134            yield node
135            node = node_next
136
137    def add(self, key, value, cleanup=None):
138        """Add a new value to the cache.
139
140        Also, if the entry is ever removed from the cache, call
141        cleanup(key, value).
142
143        Args:
144          key: The key to store it under
145          value: The object to store
146          cleanup: None or a function taking (key, value) to indicate
147                        'value' should be cleaned up.
148        """
149        if key is _null_key:
150            raise ValueError('cannot use _null_key as a key')
151        if key in self._cache:
152            node = self._cache[key]
153            node.run_cleanup()
154            node.value = value
155            node.cleanup = cleanup
156        else:
157            node = _LRUNode(key, value, cleanup=cleanup)
158            self._cache[key] = node
159        self._record_access(node)
160
161        if len(self._cache) > self._max_cache:
162            # Trigger the cleanup
163            self.cleanup()
164
165    def cache_size(self):
166        """Get the number of entries we will cache."""
167        return self._max_cache
168
169    def get(self, key, default=None):
170        node = self._cache.get(key, None)
171        if node is None:
172            return default
173        self._record_access(node)
174        return node.value
175
176    def keys(self):
177        """Get the list of keys currently cached.
178
179        Note that values returned here may not be available by the time you
180        request them later. This is simply meant as a peak into the current
181        state.
182
183        Returns: An unordered list of keys that are currently cached.
184        """
185        return self._cache.keys()
186
187    def items(self):
188        """Get the key:value pairs as a dict."""
189        return dict((k, n.value) for k, n in self._cache.items())
190
191    def cleanup(self):
192        """Clear the cache until it shrinks to the requested size.
193
194        This does not completely wipe the cache, just makes sure it is under
195        the after_cleanup_count.
196        """
197        # Make sure the cache is shrunk to the correct size
198        while len(self._cache) > self._after_cleanup_count:
199            self._remove_lru()
200
201    def __setitem__(self, key, value):
202        """Add a value to the cache, there will be no cleanup function."""
203        self.add(key, value, cleanup=None)
204
205    def _record_access(self, node):
206        """Record that key was accessed."""
207        # Move 'node' to the front of the queue
208        if self._most_recently_used is None:
209            self._most_recently_used = node
210            self._least_recently_used = node
211            return
212        elif node is self._most_recently_used:
213            # Nothing to do, this node is already at the head of the queue
214            return
215        # We've taken care of the tail pointer, remove the node, and insert it
216        # at the front
217        # REMOVE
218        if node is self._least_recently_used:
219            self._least_recently_used = node.prev
220        if node.prev is not None:
221            node.prev.next_key = node.next_key
222        if node.next_key is not _null_key:
223            node_next = self._cache[node.next_key]
224            node_next.prev = node.prev
225        # INSERT
226        node.next_key = self._most_recently_used.key
227        self._most_recently_used.prev = node
228        self._most_recently_used = node
229        node.prev = None
230
231    def _remove_node(self, node):
232        if node is self._least_recently_used:
233            self._least_recently_used = node.prev
234        self._cache.pop(node.key)
235        # If we have removed all entries, remove the head pointer as well
236        if self._least_recently_used is None:
237            self._most_recently_used = None
238        node.run_cleanup()
239        # Now remove this node from the linked list
240        if node.prev is not None:
241            node.prev.next_key = node.next_key
242        if node.next_key is not _null_key:
243            node_next = self._cache[node.next_key]
244            node_next.prev = node.prev
245        # And remove this node's pointers
246        node.prev = None
247        node.next_key = _null_key
248
249    def _remove_lru(self):
250        """Remove one entry from the lru, and handle consequences.
251
252        If there are no more references to the lru, then this entry should be
253        removed from the cache.
254        """
255        self._remove_node(self._least_recently_used)
256
257    def clear(self):
258        """Clear out all of the cache."""
259        # Clean up in LRU order
260        while self._cache:
261            self._remove_lru()
262
263    def resize(self, max_cache, after_cleanup_count=None):
264        """Change the number of entries that will be cached."""
265        self._update_max_cache(max_cache,
266                               after_cleanup_count=after_cleanup_count)
267
268    def _update_max_cache(self, max_cache, after_cleanup_count=None):
269        self._max_cache = max_cache
270        if after_cleanup_count is None:
271            self._after_cleanup_count = self._max_cache * 8 / 10
272        else:
273            self._after_cleanup_count = min(after_cleanup_count,
274                                            self._max_cache)
275        self.cleanup()
276
277
278class LRUSizeCache(LRUCache):
279    """An LRUCache that removes things based on the size of the values.
280
281    This differs in that it doesn't care how many actual items there are,
282    it just restricts the cache to be cleaned up after so much data is stored.
283
284    The size of items added will be computed using compute_size(value), which
285    defaults to len() if not supplied.
286    """
287
288    def __init__(self, max_size=1024*1024, after_cleanup_size=None,
289                 compute_size=None):
290        """Create a new LRUSizeCache.
291
292        Args:
293          max_size: The max number of bytes to store before we start
294            clearing out entries.
295          after_cleanup_size: After cleaning up, shrink everything to this
296            size.
297          compute_size: A function to compute the size of the values. We
298            use a function here, so that you can pass 'len' if you are just
299            using simple strings, or a more complex function if you are using
300            something like a list of strings, or even a custom object.
301            The function should take the form "compute_size(value) => integer".
302            If not supplied, it defaults to 'len()'
303        """
304        self._value_size = 0
305        self._compute_size = compute_size
306        if compute_size is None:
307            self._compute_size = len
308        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
309        LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
310
311    def add(self, key, value, cleanup=None):
312        """Add a new value to the cache.
313
314        Also, if the entry is ever removed from the cache, call
315        cleanup(key, value).
316
317        Args:
318          key: The key to store it under
319          value: The object to store
320          cleanup: None or a function taking (key, value) to indicate
321                        'value' should be cleaned up.
322        """
323        if key is _null_key:
324            raise ValueError('cannot use _null_key as a key')
325        node = self._cache.get(key, None)
326        value_len = self._compute_size(value)
327        if value_len >= self._after_cleanup_size:
328            # The new value is 'too big to fit', as it would fill up/overflow
329            # the cache all by itself
330            if node is not None:
331                # We won't be replacing the old node, so just remove it
332                self._remove_node(node)
333            if cleanup is not None:
334                cleanup(key, value)
335            return
336        if node is None:
337            node = _LRUNode(key, value, cleanup=cleanup)
338            self._cache[key] = node
339        else:
340            self._value_size -= node.size
341        node.size = value_len
342        self._value_size += value_len
343        self._record_access(node)
344
345        if self._value_size > self._max_size:
346            # Time to cleanup
347            self.cleanup()
348
349    def cleanup(self):
350        """Clear the cache until it shrinks to the requested size.
351
352        This does not completely wipe the cache, just makes sure it is under
353        the after_cleanup_size.
354        """
355        # Make sure the cache is shrunk to the correct size
356        while self._value_size > self._after_cleanup_size:
357            self._remove_lru()
358
359    def _remove_node(self, node):
360        self._value_size -= node.size
361        LRUCache._remove_node(self, node)
362
363    def resize(self, max_size, after_cleanup_size=None):
364        """Change the number of bytes that will be cached."""
365        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
366        max_cache = max(int(max_size/512), 1)
367        self._update_max_cache(max_cache)
368
369    def _update_max_size(self, max_size, after_cleanup_size=None):
370        self._max_size = max_size
371        if after_cleanup_size is None:
372            self._after_cleanup_size = self._max_size * 8 // 10
373        else:
374            self._after_cleanup_size = min(after_cleanup_size, self._max_size)
375