1
2"""
3``grizzled.collections.dict`` contains some useful dictionary classes
4that extend the behavior of the built-in Python ``dict`` type.
5"""
6__docformat__ = "restructuredtext en"
7
8# ---------------------------------------------------------------------------
9# Imports
10# ---------------------------------------------------------------------------
11
12import sys
13
14# ---------------------------------------------------------------------------
15# Exports
16# ---------------------------------------------------------------------------
17
18__all__ = ['OrderedDict', 'LRUDict']
19
20# ---------------------------------------------------------------------------
21# Public Classes
22# ---------------------------------------------------------------------------
23
24                         ##############################
25                         # OrderedDict implementation #
26                         ##############################
27
28class OrderedDict(dict):
29    """
30    ``OrderedDict`` is a simple ordered dictionary. The ``keys()``,
31    ``items()``, ``__iter__()``, and other methods all return the keys in the
32    order they were added to the dictionary. Note that re-adding a key (i.e.,
33    replacing a key with a new value) does not change the original order.
34
35    An ``OrderedDict`` object is instantiated with exactly the same parameters
36    as a ``dict`` object. The methods it provides are identical to those in
37    the ``dict`` type and are not documented here.
38    """
39    def __init__(self, *args, **kw):
40        dict.__init__(self, *args, **kw)
41        self.__orderedKeys = []
42        self.__keyPositions = {}
43
44    def __setitem__(self, key, value):
45        try:
46            index = self.__keyPositions[key]
47        except KeyError:
48            index = len(self.__orderedKeys)
49            self.__orderedKeys += [key]
50            self.__keyPositions[key] = index
51
52        dict.__setitem__(self, key, value)
53
54    def __delitem__(self, key):
55        index = self.__keyPositions[key]
56        del self.__orderedKeys[index]
57        del self.__keyPositions[key]
58        dict.__delitem__(self, key)
59
60    def __iter__(self):
61        for key in self.__orderedKeys:
62            yield key
63
64    def __str__(self):
65        s = '{'
66        sep = ''
67        for k, v in self.iteritems():
68            s += sep
69            if type(k) == str:
70                s += "'%s'" % k
71            else:
72                s += str(k)
73
74            s += ': '
75            if type(v) == str:
76                s += "'%s'" % v
77            else:
78                s += str(v)
79            sep = ', '
80        s += '}'
81        return s
82
83    def keys(self):
84        return self.__orderedKeys
85
86    def items(self):
87        return [(key, self[key]) for key in self.__orderedKeys]
88
89    def values(self):
90        return [self[key] for key in self.__orderedKeys]
91
92    def iteritems(self):
93        for key in self.__orderedKeys:
94            yield (key, self[key])
95
96    def iterkeys(self):
97        for key in self.__orderedKeys:
98            yield key
99
100    def update(self, d):
101        for key, value in d.iteritems():
102            self[key] = value
103
104    def pop(self, key, default=None):
105        try:
106            result = self[key]
107            del self[key]
108
109        except KeyError:
110            if not default:
111                raise
112
113            result = default
114
115        return result
116
117    def popitem(self):
118        key, value = dict.popitem(self)
119        del self[key]
120        return (key, value)
121
122                           ##########################
123                           # LRUDict Implementation #
124                           ##########################
125
126# Implementation note:
127#
128# Each entry in the LRUDict dictionary is an LRUListEntry. Basically,
129# we maintain two structures:
130#
131# 1. A linked list of dictionary entries, in order of recency.
132# 2. A dictionary of those linked list items.
133#
134# When accessing or updating an entry in the dictionary, the logic
135# is more or less like the following "get" scenario:
136#
137# - Using the key, get the LRUListEntry from the dictionary.
138# - Extract the value from the LRUListEntry, to return to the caller.
139# - Move the LRUListEntry to the front of the recency queue.
140
141class LRUListEntry(object):
142
143    def __init__(self, key, value):
144        self.key = key
145        self.value = value
146        self.next = None
147        self.prev = None
148
149    def __hash__(self):
150        return self.key.__hash__()
151
152    def __str__(self):
153        return '(%s, %s)' % (self.key, self.value)
154
155    def __repr__(self):
156        return str(self)
157
158class LRUList(object):
159
160    def __init__(self):
161        self.head = self.tail = None
162        self.size = 0
163
164    def __del__(self):
165        self.clear()
166
167    def __str__(self):
168        return '[' + ', '.join([str(tup) for tup in self.items()]) + ']'
169
170    def __repr__(self):
171        return self.__class__.__name__ + ':' + str(self)
172
173    def __len__(self):
174        return self.size
175
176    def __iter__(self):
177        entry = self.head
178        while entry:
179            yield entry.key
180            entry = entry.next
181
182    def keys(self):
183        return [k for k in self]
184
185    def items(self):
186        result = []
187        for key, value in self.iteritems():
188            result.append((key, value))
189        return result
190
191    def values(self):
192        result = []
193        for key, value in self.iteritems():
194            result.append(value)
195        return result
196
197    def iteritems(self):
198        entry = self.head
199        while entry:
200            yield (entry.key, entry.value)
201            entry = entry.next
202
203    def iterkeys(self):
204        self.__iter__()
205
206    def itervalues(self):
207        entry = self.head
208        while entry:
209            yield entry.value
210            entry = entry.next
211
212    def clear(self):
213        while self.head:
214            cur = self.head
215            next = self.head.next
216            cur.next = cur.previous = cur.key = cur.value = None
217            self.head = next
218
219        self.tail = None
220        self.size = 0
221
222    def remove(self, entry):
223        if entry.next:
224            entry.next.previous = entry.previous
225
226        if entry.previous:
227            entry.previous.next = entry.next
228
229        if entry == self.head:
230            self.head = entry.next
231
232        if entry == self.tail:
233            self.tail = entry.previous
234
235        entry.next = entry.previous = None
236        self.size -= 1
237        assert self.size >= 0
238
239    def remove_tail(self):
240        result = self.tail
241
242        if result:
243            self.remove(result)
244
245        return result
246
247    def add_to_head(self, entry):
248        if type(entry) == tuple:
249            key, value = entry
250            entry = LRUListEntry(key, value)
251        else:
252            entry.next = entry.previous = None
253
254        if self.head:
255            assert self.tail
256            entry.next = self.head
257            self.head.previous = entry
258            self.head = entry
259
260        else:
261            assert not self.tail
262            self.head = self.tail = entry
263
264        self.size += 1
265
266    def move_to_head(self, entry):
267        self.remove(entry)
268        self.add_to_head(entry)
269
270class LRUDict(dict):
271    """
272    ``LRUDict`` is a dictionary of a fixed maximum size that enforces a least
273    recently used discard policy. When the dictionary is full (i.e., contains
274    the maximum number of entries), any attempt to insert a new entry causes
275    one of the least recently used entries to be discarded.
276
277    **Note**:
278
279    - Setting or updating a key in the dictionary refreshes the corresponding
280      value, making it "new" again, even if it replaces the existing value with
281      itself.
282    - Retrieving a value from the dictionary also refreshes the entry.
283    - Iterating over the contents of the dictionary (via ``in`` or ``items()``
284      or any other similar method) does *not* affect the recency of the
285      dictionary's contents.
286    - This implementation is *not* thread-safe.
287
288    An ``LRUDict`` also supports the concept of *removal listeners*. Removal
289    listeners are functions that are notified when objects are removed from
290    the dictionary. Removal listeners can be:
291
292    - *eject only* listeners, meaning they're only notified when objects are
293      ejected from the cache to make room for new objects, or
294    - *removal* listeners, meaning they're notified whenever an object is
295      removed for *any* reason, including via ``del``.
296
297    This implementation is based on a Java ``LRUMap`` class in the
298    ``org.clapper.util`` library. See http://www.clapper.org/software/java/util/
299    for details.
300    """
301    def __init__(self, *args, **kw):
302        """
303        Initialize an ``LRUDict`` that will hold, at most, ``max_capacity``
304        items. Attempts to insert more than ``max_capacity`` items in the
305        dictionary will cause the least-recently used entries to drop out of
306        the dictionary.
307
308        :Keywords:
309            max_capacity : int
310                The maximum size of the dictionary
311        """
312        if kw.has_key('max_capacity'):
313            self.__max_capacity = kw['max_capacity']
314            del kw['max_capacity']
315        else:
316            self.__max_capacity = sys.maxint
317
318        dict.__init__(self)
319        self.__removal_listeners = {}
320        self.__lru_queue = LRUList()
321
322    def __del__(self):
323        self.clear()
324
325    def get_max_capacity(self):
326        """
327        Get the maximum capacity of the dictionary.
328
329        :rtype: int
330        :return: the maximum capacity
331        """
332        return self.__max_capacity
333
334    def set_max_capacity(self, new_capacity):
335        """
336        Set or change the maximum capacity of the dictionary. Reducing
337        the size of a dictionary with items already in it might result
338        in items being evicted.
339
340        :Parameters:
341            new_capacity : int
342                the new maximum capacity
343        """
344        self.__max_capacity = new_capacity
345        if len(self) > new_capacity:
346            self.__clear_to(new_capacity)
347
348    max_capacity = property(get_max_capacity, set_max_capacity,
349                            doc='The maximum capacity. Can be reset at will.')
350
351    def add_ejection_listener(self, listener, *args):
352        """
353        Add an ejection listener to the dictionary. The listener function
354        should take at least two parameters: the key and value being removed.
355        It can also take additional parameters, which are passed through
356        unmodified.
357
358        An ejection listener is only notified when objects are ejected from
359        the cache to make room for new objects; more to the point, an ejection
360        listener is never notified when an object is removed from the cache
361        manually, via use of the ``del`` operator.
362
363        :Parameters:
364            listener : function
365                Function to invoke
366
367            args : iterable
368                Any additional parameters to pass to the function
369        """
370        self.__removal_listeners[listener] = (True, args)
371
372    def add_removal_listener(self, listener, *args):
373        """
374        Add a removal listener to the dictionary. The listener function should
375        take at least two parameters: the key and value being removed. It can
376        also take additional parameters, which are passed through unmodified.
377
378        A removal listener is notified when objects are ejected from the cache
379        to make room for new objects *and* when objects are manually deleted
380        from the cache.
381
382        :Parameters:
383            listener : function
384                Function to invoke
385
386            args : iterable
387                Any additional parameters to pass to the function
388        """
389        self.__removal_listeners[listener] = (False, args)
390
391    def remove_listener(self, listener):
392        """
393        Remove the specified removal or ejection listener from the list of
394        listeners.
395
396        :Parameters:
397            listener : function
398                Function object to remove
399
400        :rtype: bool
401        :return: ``True`` if the listener was found and removed; ``False``
402                 otherwise
403        """
404        try:
405            del self.__removal_listeners[listener]
406            return True
407        except KeyError:
408            return False
409
410    def clear_listeners(self):
411        """
412        Clear all removal and ejection listeners from the list of listeners.
413        """
414        for key in self.__removal_listeners.keys():
415            del self.__removal_listeners[key]
416
417    def __setitem__(self, key, value):
418        self.__put(key, value)
419
420    def __getitem__(self, key):
421        lru_entry = dict.__getitem__(self, key)
422        self.__lru_queue.move_to_head(lru_entry)
423        return lru_entry.value
424
425    def __delitem__(self, key):
426        lru_entry = dict.__getitem__(self, key)
427        self.__lru_queue.remove(lru_entry)
428        dict.__delitem__(self, key)
429        self.__notify_listeners(False, [(lru_entry.key, lru_entry.value)])
430
431    def __str__(self):
432        s = '{'
433        sep = ''
434        for k, v in self.iteritems():
435            s += sep
436            if type(k) == str:
437                s += "'%s'" % k
438            else:
439                s += str(k)
440
441            s += ': '
442            if type(v) == str:
443                s += "'%s'" % v
444            else:
445                s += str(v)
446            sep = ', '
447        s += '}'
448        return s
449
450    def __iter__(self):
451        return self.__lru_queue.__iter__()
452
453    def clear(self):
454        self.__clear_to(0)
455
456    def get(self, key, default=None):
457        try:
458            lru_entry = self.__getitem__(key)
459            value = lru_entry.value
460        except KeyError:
461            value = default
462        return value
463
464    def keys(self):
465        return self.__lru_queue.keys()
466
467    def items(self):
468        return self.__lru_queue.items()
469
470    def values(self):
471        return self.__lru_queue.values()
472
473    def iteritems(self):
474        return self.__lru_queue.iteritems()
475
476    def iterkeys(self):
477        return self.__lru_queue.iterkeys()
478
479    def itervalues(self):
480        return self.__lru_queue.itervalues()
481
482    def update(self, d):
483        for key, value in d.iteritems():
484            self[key] = value
485
486    def pop(self, key, default=None):
487        try:
488            result = self[key]
489            del self[key]
490
491        except KeyError:
492            if not default:
493                raise
494
495            result = default
496
497        return result
498
499    def popitem(self):
500        """
501        Pops the least recently used recent key/value pair from the
502        dictionary.
503
504        :rtype: tuple
505        :return: the least recent key/value pair, as a tuple
506
507        :raise KeyError: empty dictionary
508        """
509        if len(self) == 0:
510            raise KeyError, 'Attempted popitem() on empty dictionary'
511
512        lru_entry = self.__lru_queue.remove_tail()
513        dict.__delitem__(self, lru_entry.key)
514        return lru_entry.key, lru_entry.value
515
516    def __put(self, key, value):
517        try:
518            lru_entry = dict.__getitem__(self, key)
519
520            # Replacing an existing value with a new one. Move the entry
521            # to the head of the list.
522
523            lru_entry.value = value
524            self.__lru_queue.move_to_head(lru_entry)
525
526        except KeyError:
527            # Not there. Have to add a new one. Clear out the cruft first.
528            # Preserve one of the entries we're clearing, to avoid
529            # reallocation.
530
531            lru_entry = self.__clear_to(self.max_capacity - 1)
532            if lru_entry:
533                lru_entry.key, lru_entry.value = key, value
534            else:
535                lru_entry = LRUListEntry(key, value)
536            self.__lru_queue.add_to_head(lru_entry)
537
538        dict.__setitem__(self, key, lru_entry)
539
540    def __clear_to(self, size):
541        old_tail = None
542        while len(self.__lru_queue) > size:
543            old_tail = self.__lru_queue.remove_tail()
544            assert old_tail
545            key = old_tail.key
546            value = dict.__delitem__(self, key)
547            self.__notify_listeners(True, [(key, value)])
548
549        assert len(self.__lru_queue) <= size
550        assert len(self) == len(self.__lru_queue)
551        return old_tail
552
553    def __notify_listeners(self, ejecting, key_value_pairs):
554        if self.__removal_listeners:
555            for key, value in key_value_pairs:
556                for func, func_data in self.__removal_listeners.items():
557                    on_eject_only, args = func_data
558                    if (not on_eject_only) or ejecting:
559                        func(key, value, *args)
560