1 2""" 3``grizzled.collections.dict`` contains some useful dictionary classes 4that extend the behavior of the built-in Python ``dict`` type. 5""" 6__docformat__ = "restructuredtext en" 7 8# --------------------------------------------------------------------------- 9# Imports 10# --------------------------------------------------------------------------- 11 12import sys 13 14# --------------------------------------------------------------------------- 15# Exports 16# --------------------------------------------------------------------------- 17 18__all__ = ['OrderedDict', 'LRUDict'] 19 20# --------------------------------------------------------------------------- 21# Public Classes 22# --------------------------------------------------------------------------- 23 24 ############################## 25 # OrderedDict implementation # 26 ############################## 27 28class OrderedDict(dict): 29 """ 30 ``OrderedDict`` is a simple ordered dictionary. The ``keys()``, 31 ``items()``, ``__iter__()``, and other methods all return the keys in the 32 order they were added to the dictionary. Note that re-adding a key (i.e., 33 replacing a key with a new value) does not change the original order. 34 35 An ``OrderedDict`` object is instantiated with exactly the same parameters 36 as a ``dict`` object. The methods it provides are identical to those in 37 the ``dict`` type and are not documented here. 38 """ 39 def __init__(self, *args, **kw): 40 dict.__init__(self, *args, **kw) 41 self.__orderedKeys = [] 42 self.__keyPositions = {} 43 44 def __setitem__(self, key, value): 45 try: 46 index = self.__keyPositions[key] 47 except KeyError: 48 index = len(self.__orderedKeys) 49 self.__orderedKeys += [key] 50 self.__keyPositions[key] = index 51 52 dict.__setitem__(self, key, value) 53 54 def __delitem__(self, key): 55 index = self.__keyPositions[key] 56 del self.__orderedKeys[index] 57 del self.__keyPositions[key] 58 dict.__delitem__(self, key) 59 60 def __iter__(self): 61 for key in self.__orderedKeys: 62 yield key 63 64 def __str__(self): 65 s = '{' 66 sep = '' 67 for k, v in self.iteritems(): 68 s += sep 69 if type(k) == str: 70 s += "'%s'" % k 71 else: 72 s += str(k) 73 74 s += ': ' 75 if type(v) == str: 76 s += "'%s'" % v 77 else: 78 s += str(v) 79 sep = ', ' 80 s += '}' 81 return s 82 83 def keys(self): 84 return self.__orderedKeys 85 86 def items(self): 87 return [(key, self[key]) for key in self.__orderedKeys] 88 89 def values(self): 90 return [self[key] for key in self.__orderedKeys] 91 92 def iteritems(self): 93 for key in self.__orderedKeys: 94 yield (key, self[key]) 95 96 def iterkeys(self): 97 for key in self.__orderedKeys: 98 yield key 99 100 def update(self, d): 101 for key, value in d.iteritems(): 102 self[key] = value 103 104 def pop(self, key, default=None): 105 try: 106 result = self[key] 107 del self[key] 108 109 except KeyError: 110 if not default: 111 raise 112 113 result = default 114 115 return result 116 117 def popitem(self): 118 key, value = dict.popitem(self) 119 del self[key] 120 return (key, value) 121 122 ########################## 123 # LRUDict Implementation # 124 ########################## 125 126# Implementation note: 127# 128# Each entry in the LRUDict dictionary is an LRUListEntry. Basically, 129# we maintain two structures: 130# 131# 1. A linked list of dictionary entries, in order of recency. 132# 2. A dictionary of those linked list items. 133# 134# When accessing or updating an entry in the dictionary, the logic 135# is more or less like the following "get" scenario: 136# 137# - Using the key, get the LRUListEntry from the dictionary. 138# - Extract the value from the LRUListEntry, to return to the caller. 139# - Move the LRUListEntry to the front of the recency queue. 140 141class LRUListEntry(object): 142 143 def __init__(self, key, value): 144 self.key = key 145 self.value = value 146 self.next = None 147 self.prev = None 148 149 def __hash__(self): 150 return self.key.__hash__() 151 152 def __str__(self): 153 return '(%s, %s)' % (self.key, self.value) 154 155 def __repr__(self): 156 return str(self) 157 158class LRUList(object): 159 160 def __init__(self): 161 self.head = self.tail = None 162 self.size = 0 163 164 def __del__(self): 165 self.clear() 166 167 def __str__(self): 168 return '[' + ', '.join([str(tup) for tup in self.items()]) + ']' 169 170 def __repr__(self): 171 return self.__class__.__name__ + ':' + str(self) 172 173 def __len__(self): 174 return self.size 175 176 def __iter__(self): 177 entry = self.head 178 while entry: 179 yield entry.key 180 entry = entry.next 181 182 def keys(self): 183 return [k for k in self] 184 185 def items(self): 186 result = [] 187 for key, value in self.iteritems(): 188 result.append((key, value)) 189 return result 190 191 def values(self): 192 result = [] 193 for key, value in self.iteritems(): 194 result.append(value) 195 return result 196 197 def iteritems(self): 198 entry = self.head 199 while entry: 200 yield (entry.key, entry.value) 201 entry = entry.next 202 203 def iterkeys(self): 204 self.__iter__() 205 206 def itervalues(self): 207 entry = self.head 208 while entry: 209 yield entry.value 210 entry = entry.next 211 212 def clear(self): 213 while self.head: 214 cur = self.head 215 next = self.head.next 216 cur.next = cur.previous = cur.key = cur.value = None 217 self.head = next 218 219 self.tail = None 220 self.size = 0 221 222 def remove(self, entry): 223 if entry.next: 224 entry.next.previous = entry.previous 225 226 if entry.previous: 227 entry.previous.next = entry.next 228 229 if entry == self.head: 230 self.head = entry.next 231 232 if entry == self.tail: 233 self.tail = entry.previous 234 235 entry.next = entry.previous = None 236 self.size -= 1 237 assert self.size >= 0 238 239 def remove_tail(self): 240 result = self.tail 241 242 if result: 243 self.remove(result) 244 245 return result 246 247 def add_to_head(self, entry): 248 if type(entry) == tuple: 249 key, value = entry 250 entry = LRUListEntry(key, value) 251 else: 252 entry.next = entry.previous = None 253 254 if self.head: 255 assert self.tail 256 entry.next = self.head 257 self.head.previous = entry 258 self.head = entry 259 260 else: 261 assert not self.tail 262 self.head = self.tail = entry 263 264 self.size += 1 265 266 def move_to_head(self, entry): 267 self.remove(entry) 268 self.add_to_head(entry) 269 270class LRUDict(dict): 271 """ 272 ``LRUDict`` is a dictionary of a fixed maximum size that enforces a least 273 recently used discard policy. When the dictionary is full (i.e., contains 274 the maximum number of entries), any attempt to insert a new entry causes 275 one of the least recently used entries to be discarded. 276 277 **Note**: 278 279 - Setting or updating a key in the dictionary refreshes the corresponding 280 value, making it "new" again, even if it replaces the existing value with 281 itself. 282 - Retrieving a value from the dictionary also refreshes the entry. 283 - Iterating over the contents of the dictionary (via ``in`` or ``items()`` 284 or any other similar method) does *not* affect the recency of the 285 dictionary's contents. 286 - This implementation is *not* thread-safe. 287 288 An ``LRUDict`` also supports the concept of *removal listeners*. Removal 289 listeners are functions that are notified when objects are removed from 290 the dictionary. Removal listeners can be: 291 292 - *eject only* listeners, meaning they're only notified when objects are 293 ejected from the cache to make room for new objects, or 294 - *removal* listeners, meaning they're notified whenever an object is 295 removed for *any* reason, including via ``del``. 296 297 This implementation is based on a Java ``LRUMap`` class in the 298 ``org.clapper.util`` library. See http://www.clapper.org/software/java/util/ 299 for details. 300 """ 301 def __init__(self, *args, **kw): 302 """ 303 Initialize an ``LRUDict`` that will hold, at most, ``max_capacity`` 304 items. Attempts to insert more than ``max_capacity`` items in the 305 dictionary will cause the least-recently used entries to drop out of 306 the dictionary. 307 308 :Keywords: 309 max_capacity : int 310 The maximum size of the dictionary 311 """ 312 if kw.has_key('max_capacity'): 313 self.__max_capacity = kw['max_capacity'] 314 del kw['max_capacity'] 315 else: 316 self.__max_capacity = sys.maxint 317 318 dict.__init__(self) 319 self.__removal_listeners = {} 320 self.__lru_queue = LRUList() 321 322 def __del__(self): 323 self.clear() 324 325 def get_max_capacity(self): 326 """ 327 Get the maximum capacity of the dictionary. 328 329 :rtype: int 330 :return: the maximum capacity 331 """ 332 return self.__max_capacity 333 334 def set_max_capacity(self, new_capacity): 335 """ 336 Set or change the maximum capacity of the dictionary. Reducing 337 the size of a dictionary with items already in it might result 338 in items being evicted. 339 340 :Parameters: 341 new_capacity : int 342 the new maximum capacity 343 """ 344 self.__max_capacity = new_capacity 345 if len(self) > new_capacity: 346 self.__clear_to(new_capacity) 347 348 max_capacity = property(get_max_capacity, set_max_capacity, 349 doc='The maximum capacity. Can be reset at will.') 350 351 def add_ejection_listener(self, listener, *args): 352 """ 353 Add an ejection listener to the dictionary. The listener function 354 should take at least two parameters: the key and value being removed. 355 It can also take additional parameters, which are passed through 356 unmodified. 357 358 An ejection listener is only notified when objects are ejected from 359 the cache to make room for new objects; more to the point, an ejection 360 listener is never notified when an object is removed from the cache 361 manually, via use of the ``del`` operator. 362 363 :Parameters: 364 listener : function 365 Function to invoke 366 367 args : iterable 368 Any additional parameters to pass to the function 369 """ 370 self.__removal_listeners[listener] = (True, args) 371 372 def add_removal_listener(self, listener, *args): 373 """ 374 Add a removal listener to the dictionary. The listener function should 375 take at least two parameters: the key and value being removed. It can 376 also take additional parameters, which are passed through unmodified. 377 378 A removal listener is notified when objects are ejected from the cache 379 to make room for new objects *and* when objects are manually deleted 380 from the cache. 381 382 :Parameters: 383 listener : function 384 Function to invoke 385 386 args : iterable 387 Any additional parameters to pass to the function 388 """ 389 self.__removal_listeners[listener] = (False, args) 390 391 def remove_listener(self, listener): 392 """ 393 Remove the specified removal or ejection listener from the list of 394 listeners. 395 396 :Parameters: 397 listener : function 398 Function object to remove 399 400 :rtype: bool 401 :return: ``True`` if the listener was found and removed; ``False`` 402 otherwise 403 """ 404 try: 405 del self.__removal_listeners[listener] 406 return True 407 except KeyError: 408 return False 409 410 def clear_listeners(self): 411 """ 412 Clear all removal and ejection listeners from the list of listeners. 413 """ 414 for key in self.__removal_listeners.keys(): 415 del self.__removal_listeners[key] 416 417 def __setitem__(self, key, value): 418 self.__put(key, value) 419 420 def __getitem__(self, key): 421 lru_entry = dict.__getitem__(self, key) 422 self.__lru_queue.move_to_head(lru_entry) 423 return lru_entry.value 424 425 def __delitem__(self, key): 426 lru_entry = dict.__getitem__(self, key) 427 self.__lru_queue.remove(lru_entry) 428 dict.__delitem__(self, key) 429 self.__notify_listeners(False, [(lru_entry.key, lru_entry.value)]) 430 431 def __str__(self): 432 s = '{' 433 sep = '' 434 for k, v in self.iteritems(): 435 s += sep 436 if type(k) == str: 437 s += "'%s'" % k 438 else: 439 s += str(k) 440 441 s += ': ' 442 if type(v) == str: 443 s += "'%s'" % v 444 else: 445 s += str(v) 446 sep = ', ' 447 s += '}' 448 return s 449 450 def __iter__(self): 451 return self.__lru_queue.__iter__() 452 453 def clear(self): 454 self.__clear_to(0) 455 456 def get(self, key, default=None): 457 try: 458 lru_entry = self.__getitem__(key) 459 value = lru_entry.value 460 except KeyError: 461 value = default 462 return value 463 464 def keys(self): 465 return self.__lru_queue.keys() 466 467 def items(self): 468 return self.__lru_queue.items() 469 470 def values(self): 471 return self.__lru_queue.values() 472 473 def iteritems(self): 474 return self.__lru_queue.iteritems() 475 476 def iterkeys(self): 477 return self.__lru_queue.iterkeys() 478 479 def itervalues(self): 480 return self.__lru_queue.itervalues() 481 482 def update(self, d): 483 for key, value in d.iteritems(): 484 self[key] = value 485 486 def pop(self, key, default=None): 487 try: 488 result = self[key] 489 del self[key] 490 491 except KeyError: 492 if not default: 493 raise 494 495 result = default 496 497 return result 498 499 def popitem(self): 500 """ 501 Pops the least recently used recent key/value pair from the 502 dictionary. 503 504 :rtype: tuple 505 :return: the least recent key/value pair, as a tuple 506 507 :raise KeyError: empty dictionary 508 """ 509 if len(self) == 0: 510 raise KeyError, 'Attempted popitem() on empty dictionary' 511 512 lru_entry = self.__lru_queue.remove_tail() 513 dict.__delitem__(self, lru_entry.key) 514 return lru_entry.key, lru_entry.value 515 516 def __put(self, key, value): 517 try: 518 lru_entry = dict.__getitem__(self, key) 519 520 # Replacing an existing value with a new one. Move the entry 521 # to the head of the list. 522 523 lru_entry.value = value 524 self.__lru_queue.move_to_head(lru_entry) 525 526 except KeyError: 527 # Not there. Have to add a new one. Clear out the cruft first. 528 # Preserve one of the entries we're clearing, to avoid 529 # reallocation. 530 531 lru_entry = self.__clear_to(self.max_capacity - 1) 532 if lru_entry: 533 lru_entry.key, lru_entry.value = key, value 534 else: 535 lru_entry = LRUListEntry(key, value) 536 self.__lru_queue.add_to_head(lru_entry) 537 538 dict.__setitem__(self, key, lru_entry) 539 540 def __clear_to(self, size): 541 old_tail = None 542 while len(self.__lru_queue) > size: 543 old_tail = self.__lru_queue.remove_tail() 544 assert old_tail 545 key = old_tail.key 546 value = dict.__delitem__(self, key) 547 self.__notify_listeners(True, [(key, value)]) 548 549 assert len(self.__lru_queue) <= size 550 assert len(self) == len(self.__lru_queue) 551 return old_tail 552 553 def __notify_listeners(self, ejecting, key_value_pairs): 554 if self.__removal_listeners: 555 for key, value in key_value_pairs: 556 for func, func_data in self.__removal_listeners.items(): 557 on_eject_only, args = func_data 558 if (not on_eject_only) or ejecting: 559 func(key, value, *args) 560