1# lru_cache.py -- Simple LRU cache for dulwich 2# Copyright (C) 2006, 2008 Canonical Ltd 3# 4# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 5# General Public License as public by the Free Software Foundation; version 2.0 6# or (at your option) any later version. You can redistribute it and/or 7# modify it under the terms of either of these two licenses. 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15# You should have received a copy of the licenses; if not, see 16# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 17# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 18# License, Version 2.0. 19# 20 21"""A simple least-recently-used (LRU) cache.""" 22 23_null_key = object() 24 25 26class _LRUNode(object): 27 """This maintains the linked-list which is the lru internals.""" 28 29 __slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size') 30 31 def __init__(self, key, value, cleanup=None): 32 self.prev = None 33 self.next_key = _null_key 34 self.key = key 35 self.value = value 36 self.cleanup = cleanup 37 # TODO: We could compute this 'on-the-fly' like we used to, and remove 38 # one pointer from this object, we just need to decide if it 39 # actually costs us much of anything in normal usage 40 self.size = None 41 42 def __repr__(self): 43 if self.prev is None: 44 prev_key = None 45 else: 46 prev_key = self.prev.key 47 return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key, 48 self.next_key, prev_key) 49 50 def run_cleanup(self): 51 if self.cleanup is not None: 52 self.cleanup(self.key, self.value) 53 self.cleanup = None 54 # Just make sure to break any refcycles, etc 55 self.value = None 56 57 58class LRUCache(object): 59 """A class which manages a cache of entries, removing unused ones.""" 60 61 def __init__(self, max_cache=100, after_cleanup_count=None): 62 self._cache = {} 63 # The "HEAD" of the lru linked list 64 self._most_recently_used = None 65 # The "TAIL" of the lru linked list 66 self._least_recently_used = None 67 self._update_max_cache(max_cache, after_cleanup_count) 68 69 def __contains__(self, key): 70 return key in self._cache 71 72 def __getitem__(self, key): 73 cache = self._cache 74 node = cache[key] 75 # Inlined from _record_access to decrease the overhead of __getitem__ 76 # We also have more knowledge about structure if __getitem__ is 77 # succeeding, then we know that self._most_recently_used must not be 78 # None, etc. 79 mru = self._most_recently_used 80 if node is mru: 81 # Nothing to do, this node is already at the head of the queue 82 return node.value 83 # Remove this node from the old location 84 node_prev = node.prev 85 next_key = node.next_key 86 # benchmarking shows that the lookup of _null_key in globals is faster 87 # than the attribute lookup for (node is self._least_recently_used) 88 if next_key is _null_key: 89 # 'node' is the _least_recently_used, because it doesn't have a 90 # 'next' item. So move the current lru to the previous node. 91 self._least_recently_used = node_prev 92 else: 93 node_next = cache[next_key] 94 node_next.prev = node_prev 95 node_prev.next_key = next_key 96 # Insert this node at the front of the list 97 node.next_key = mru.key 98 mru.prev = node 99 self._most_recently_used = node 100 node.prev = None 101 return node.value 102 103 def __len__(self): 104 return len(self._cache) 105 106 def _walk_lru(self): 107 """Walk the LRU list, only meant to be used in tests.""" 108 node = self._most_recently_used 109 if node is not None: 110 if node.prev is not None: 111 raise AssertionError('the _most_recently_used entry is not' 112 ' supposed to have a previous entry' 113 ' %s' % (node,)) 114 while node is not None: 115 if node.next_key is _null_key: 116 if node is not self._least_recently_used: 117 raise AssertionError('only the last node should have' 118 ' no next value: %s' % (node,)) 119 node_next = None 120 else: 121 node_next = self._cache[node.next_key] 122 if node_next.prev is not node: 123 raise AssertionError('inconsistency found, node.next.prev' 124 ' != node: %s' % (node,)) 125 if node.prev is None: 126 if node is not self._most_recently_used: 127 raise AssertionError('only the _most_recently_used should' 128 ' not have a previous node: %s' 129 % (node,)) 130 else: 131 if node.prev.next_key != node.key: 132 raise AssertionError('inconsistency found, node.prev.next' 133 ' != node: %s' % (node,)) 134 yield node 135 node = node_next 136 137 def add(self, key, value, cleanup=None): 138 """Add a new value to the cache. 139 140 Also, if the entry is ever removed from the cache, call 141 cleanup(key, value). 142 143 Args: 144 key: The key to store it under 145 value: The object to store 146 cleanup: None or a function taking (key, value) to indicate 147 'value' should be cleaned up. 148 """ 149 if key is _null_key: 150 raise ValueError('cannot use _null_key as a key') 151 if key in self._cache: 152 node = self._cache[key] 153 node.run_cleanup() 154 node.value = value 155 node.cleanup = cleanup 156 else: 157 node = _LRUNode(key, value, cleanup=cleanup) 158 self._cache[key] = node 159 self._record_access(node) 160 161 if len(self._cache) > self._max_cache: 162 # Trigger the cleanup 163 self.cleanup() 164 165 def cache_size(self): 166 """Get the number of entries we will cache.""" 167 return self._max_cache 168 169 def get(self, key, default=None): 170 node = self._cache.get(key, None) 171 if node is None: 172 return default 173 self._record_access(node) 174 return node.value 175 176 def keys(self): 177 """Get the list of keys currently cached. 178 179 Note that values returned here may not be available by the time you 180 request them later. This is simply meant as a peak into the current 181 state. 182 183 Returns: An unordered list of keys that are currently cached. 184 """ 185 return self._cache.keys() 186 187 def items(self): 188 """Get the key:value pairs as a dict.""" 189 return dict((k, n.value) for k, n in self._cache.items()) 190 191 def cleanup(self): 192 """Clear the cache until it shrinks to the requested size. 193 194 This does not completely wipe the cache, just makes sure it is under 195 the after_cleanup_count. 196 """ 197 # Make sure the cache is shrunk to the correct size 198 while len(self._cache) > self._after_cleanup_count: 199 self._remove_lru() 200 201 def __setitem__(self, key, value): 202 """Add a value to the cache, there will be no cleanup function.""" 203 self.add(key, value, cleanup=None) 204 205 def _record_access(self, node): 206 """Record that key was accessed.""" 207 # Move 'node' to the front of the queue 208 if self._most_recently_used is None: 209 self._most_recently_used = node 210 self._least_recently_used = node 211 return 212 elif node is self._most_recently_used: 213 # Nothing to do, this node is already at the head of the queue 214 return 215 # We've taken care of the tail pointer, remove the node, and insert it 216 # at the front 217 # REMOVE 218 if node is self._least_recently_used: 219 self._least_recently_used = node.prev 220 if node.prev is not None: 221 node.prev.next_key = node.next_key 222 if node.next_key is not _null_key: 223 node_next = self._cache[node.next_key] 224 node_next.prev = node.prev 225 # INSERT 226 node.next_key = self._most_recently_used.key 227 self._most_recently_used.prev = node 228 self._most_recently_used = node 229 node.prev = None 230 231 def _remove_node(self, node): 232 if node is self._least_recently_used: 233 self._least_recently_used = node.prev 234 self._cache.pop(node.key) 235 # If we have removed all entries, remove the head pointer as well 236 if self._least_recently_used is None: 237 self._most_recently_used = None 238 node.run_cleanup() 239 # Now remove this node from the linked list 240 if node.prev is not None: 241 node.prev.next_key = node.next_key 242 if node.next_key is not _null_key: 243 node_next = self._cache[node.next_key] 244 node_next.prev = node.prev 245 # And remove this node's pointers 246 node.prev = None 247 node.next_key = _null_key 248 249 def _remove_lru(self): 250 """Remove one entry from the lru, and handle consequences. 251 252 If there are no more references to the lru, then this entry should be 253 removed from the cache. 254 """ 255 self._remove_node(self._least_recently_used) 256 257 def clear(self): 258 """Clear out all of the cache.""" 259 # Clean up in LRU order 260 while self._cache: 261 self._remove_lru() 262 263 def resize(self, max_cache, after_cleanup_count=None): 264 """Change the number of entries that will be cached.""" 265 self._update_max_cache(max_cache, 266 after_cleanup_count=after_cleanup_count) 267 268 def _update_max_cache(self, max_cache, after_cleanup_count=None): 269 self._max_cache = max_cache 270 if after_cleanup_count is None: 271 self._after_cleanup_count = self._max_cache * 8 / 10 272 else: 273 self._after_cleanup_count = min(after_cleanup_count, 274 self._max_cache) 275 self.cleanup() 276 277 278class LRUSizeCache(LRUCache): 279 """An LRUCache that removes things based on the size of the values. 280 281 This differs in that it doesn't care how many actual items there are, 282 it just restricts the cache to be cleaned up after so much data is stored. 283 284 The size of items added will be computed using compute_size(value), which 285 defaults to len() if not supplied. 286 """ 287 288 def __init__(self, max_size=1024*1024, after_cleanup_size=None, 289 compute_size=None): 290 """Create a new LRUSizeCache. 291 292 Args: 293 max_size: The max number of bytes to store before we start 294 clearing out entries. 295 after_cleanup_size: After cleaning up, shrink everything to this 296 size. 297 compute_size: A function to compute the size of the values. We 298 use a function here, so that you can pass 'len' if you are just 299 using simple strings, or a more complex function if you are using 300 something like a list of strings, or even a custom object. 301 The function should take the form "compute_size(value) => integer". 302 If not supplied, it defaults to 'len()' 303 """ 304 self._value_size = 0 305 self._compute_size = compute_size 306 if compute_size is None: 307 self._compute_size = len 308 self._update_max_size(max_size, after_cleanup_size=after_cleanup_size) 309 LRUCache.__init__(self, max_cache=max(int(max_size/512), 1)) 310 311 def add(self, key, value, cleanup=None): 312 """Add a new value to the cache. 313 314 Also, if the entry is ever removed from the cache, call 315 cleanup(key, value). 316 317 Args: 318 key: The key to store it under 319 value: The object to store 320 cleanup: None or a function taking (key, value) to indicate 321 'value' should be cleaned up. 322 """ 323 if key is _null_key: 324 raise ValueError('cannot use _null_key as a key') 325 node = self._cache.get(key, None) 326 value_len = self._compute_size(value) 327 if value_len >= self._after_cleanup_size: 328 # The new value is 'too big to fit', as it would fill up/overflow 329 # the cache all by itself 330 if node is not None: 331 # We won't be replacing the old node, so just remove it 332 self._remove_node(node) 333 if cleanup is not None: 334 cleanup(key, value) 335 return 336 if node is None: 337 node = _LRUNode(key, value, cleanup=cleanup) 338 self._cache[key] = node 339 else: 340 self._value_size -= node.size 341 node.size = value_len 342 self._value_size += value_len 343 self._record_access(node) 344 345 if self._value_size > self._max_size: 346 # Time to cleanup 347 self.cleanup() 348 349 def cleanup(self): 350 """Clear the cache until it shrinks to the requested size. 351 352 This does not completely wipe the cache, just makes sure it is under 353 the after_cleanup_size. 354 """ 355 # Make sure the cache is shrunk to the correct size 356 while self._value_size > self._after_cleanup_size: 357 self._remove_lru() 358 359 def _remove_node(self, node): 360 self._value_size -= node.size 361 LRUCache._remove_node(self, node) 362 363 def resize(self, max_size, after_cleanup_size=None): 364 """Change the number of bytes that will be cached.""" 365 self._update_max_size(max_size, after_cleanup_size=after_cleanup_size) 366 max_cache = max(int(max_size/512), 1) 367 self._update_max_cache(max_cache) 368 369 def _update_max_size(self, max_size, after_cleanup_size=None): 370 self._max_size = max_size 371 if after_cleanup_size is None: 372 self._after_cleanup_size = self._max_size * 8 // 10 373 else: 374 self._after_cleanup_size = min(after_cleanup_size, self._max_size) 375