1##############################################################################
2#
3# Copyright (c) 2001, 2002 Zope Foundation and Contributors.
4# All Rights Reserved.
5#
6# This software is subject to the provisions of the Zope Public License,
7# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
8# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
9# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
10# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
11# FOR A PARTICULAR PURPOSE.
12#
13##############################################################################
14"""Implement an OID to File-position (long integer) mapping."""
15
16# To save space, we do two things:
17#
18#     1. We split the keys (OIDS) into 6-byte prefixes and 2-byte suffixes.
19#        We use the prefixes as keys in a mapping from prefix to mappings
20#        of suffix to data:
21#
22#           data is  {prefix -> {suffix -> data}}
23#
24#     2. We limit the data size to 48 bits. This should allow databases
25#        as large as 256 terabytes.
26#
27# Most of the space is consumed by items in the mappings from 2-byte
28# suffix to 6-byte data. This should reduce the overall memory usage to
29# 8-16 bytes per OID.
30#
31# Because
32#  - the mapping from suffix to data contains at most 65535 entries,
33#  - this is an in-memory data structure
34#  - new keys are inserted sequentially,
35# we use a BTree bucket instead of a full BTree to store the results.
36#
37# We use p64 to convert integers to 8-byte strings and lop off the two
38# high-order bytes when saving. On loading data, we add the leading
39# bytes back before using u64 to convert the data back to (long)
40# integers.
41import struct
42
43from BTrees.fsBTree import fsBucket
44from BTrees.OOBTree import OOBTree
45import six
46
47from ZODB._compat import INT_TYPES
48from ZODB._compat import Pickler
49from ZODB._compat import Unpickler
50from ZODB._compat import _protocol
51
52
53# convert between numbers and six-byte strings
54
55def num2str(n):
56    return struct.pack(">Q", n)[2:]
57
58def str2num(s):
59    return struct.unpack(">Q", b"\000\000" + s)[0]
60
61def prefix_plus_one(s):
62    num = str2num(s)
63    return num2str(num + 1)
64
65def prefix_minus_one(s):
66    num = str2num(s)
67    return num2str(num - 1)
68
69def ensure_bytes(s):
70    # on Python 3 we might pickle bytes and unpickle unicode strings
71    return s.encode('ascii') if not isinstance(s, bytes) else s
72
73
74class fsIndex(object):
75
76    def __init__(self, data=None):
77        self._data = OOBTree()
78        if data:
79            self.update(data)
80
81    def __getstate__(self):
82        return dict(
83            state_version = 1,
84            _data = [(k, v.toString())
85                     for (k, v) in six.iteritems(self._data)
86                     ]
87            )
88
89    def __setstate__(self, state):
90        version = state.pop('state_version', 0)
91        getattr(self, '_setstate_%s' % version)(state)
92
93    def _setstate_0(self, state):
94        self.__dict__.clear()
95        self.__dict__.update(state)
96        self._data = OOBTree([
97            (ensure_bytes(k), v)
98            for (k, v) in self._data.items()
99            ])
100
101    def _setstate_1(self, state):
102        self._data = OOBTree([
103            (ensure_bytes(k), fsBucket().fromString(ensure_bytes(v)))
104            for (k, v) in state['_data']
105            ])
106
107    def __getitem__(self, key):
108        assert isinstance(key, bytes)
109        return str2num(self._data[key[:6]][key[6:]])
110
111    def save(self, pos, fname):
112        with open(fname, 'wb') as f:
113            pickler = Pickler(f, _protocol)
114            pickler.fast = True
115            pickler.dump(pos)
116            for k, v in six.iteritems(self._data):
117                pickler.dump((k, v.toString()))
118            pickler.dump(None)
119
120    @classmethod
121    def load(class_, fname):
122        with open(fname, 'rb') as f:
123            unpickler = Unpickler(f)
124            pos = unpickler.load()
125            if not isinstance(pos, INT_TYPES):
126                # NB: this might contain OIDs that got unpickled
127                # into Unicode strings on Python 3; hope the caller
128                # will pipe the result to fsIndex().update() to normalize
129                # the keys
130                return pos                  # Old format
131            index = class_()
132            data = index._data
133            while 1:
134                v = unpickler.load()
135                if not v:
136                    break
137                k, v = v
138                data[ensure_bytes(k)] = fsBucket().fromString(ensure_bytes(v))
139            return dict(pos=pos, index=index)
140
141    def get(self, key, default=None):
142        assert isinstance(key, bytes)
143        tree = self._data.get(key[:6], default)
144        if tree is default:
145            return default
146        v = tree.get(key[6:], default)
147        if v is default:
148            return default
149        return str2num(v)
150
151    def __setitem__(self, key, value):
152        assert isinstance(key, bytes)
153        value = num2str(value)
154        treekey = key[:6]
155        tree = self._data.get(treekey)
156        if tree is None:
157            tree = fsBucket()
158            self._data[treekey] = tree
159        tree[key[6:]] = value
160
161    def __delitem__(self, key):
162        assert isinstance(key, bytes)
163        treekey = key[:6]
164        tree = self._data.get(treekey)
165        if tree is None:
166            raise KeyError(key)
167        del tree[key[6:]]
168        if not tree:
169            del self._data[treekey]
170
171    def __len__(self):
172        r = 0
173        for tree in six.itervalues(self._data):
174            r += len(tree)
175        return r
176
177    def update(self, mapping):
178        for k, v in mapping.items():
179            self[ensure_bytes(k)] = v
180
181    def has_key(self, key):
182        v = self.get(key, self)
183        return v is not self
184
185    def __contains__(self, key):
186        assert isinstance(key, bytes)
187        tree = self._data.get(key[:6])
188        if tree is None:
189            return False
190        v = tree.get(key[6:], None)
191        if v is None:
192            return False
193        return True
194
195    def clear(self):
196        self._data.clear()
197
198    def __iter__(self):
199        for prefix, tree in six.iteritems(self._data):
200            for suffix in tree:
201                yield prefix + suffix
202
203    iterkeys = __iter__
204
205    def keys(self):
206        return list(self.iterkeys())
207
208    def iteritems(self):
209        for prefix, tree in six.iteritems(self._data):
210            for suffix, value in six.iteritems(tree):
211                yield (prefix + suffix, str2num(value))
212
213    def items(self):
214        return list(self.iteritems())
215
216    def itervalues(self):
217        for tree in six.itervalues(self._data):
218            for value in six.itervalues(tree):
219                yield str2num(value)
220
221    def values(self):
222        return list(self.itervalues())
223
224    # Comment below applies for the following minKey and maxKey methods
225    #
226    # Obscure:  what if `tree` is actually empty?  We're relying here on
227    # that this class doesn't implement __delitem__:  once a key gets
228    # into an fsIndex, the only way it can go away is by invoking
229    # clear().  Therefore nothing in _data.values() is ever empty.
230    #
231    # Note that because `tree` is an fsBTree, its minKey()/maxKey() methods are
232    # very efficient.
233
234    def minKey(self, key=None):
235        if key is None:
236            smallest_prefix = self._data.minKey()
237        else:
238            smallest_prefix = self._data.minKey(key[:6])
239
240        tree = self._data[smallest_prefix]
241
242        assert tree
243
244        if key is None:
245            smallest_suffix = tree.minKey()
246        else:
247            try:
248                smallest_suffix = tree.minKey(key[6:])
249            except ValueError: # 'empty tree' (no suffix >= arg)
250                next_prefix = prefix_plus_one(smallest_prefix)
251                smallest_prefix = self._data.minKey(next_prefix)
252                tree = self._data[smallest_prefix]
253                assert tree
254                smallest_suffix = tree.minKey()
255
256        return smallest_prefix + smallest_suffix
257
258    def maxKey(self, key=None):
259        if key is None:
260            biggest_prefix = self._data.maxKey()
261        else:
262            biggest_prefix = self._data.maxKey(key[:6])
263
264        tree = self._data[biggest_prefix]
265
266        assert tree
267
268        if key is None:
269            biggest_suffix = tree.maxKey()
270        else:
271            try:
272                biggest_suffix = tree.maxKey(key[6:])
273            except ValueError: # 'empty tree' (no suffix <= arg)
274                next_prefix = prefix_minus_one(biggest_prefix)
275                biggest_prefix = self._data.maxKey(next_prefix)
276                tree = self._data[biggest_prefix]
277                assert tree
278                biggest_suffix = tree.maxKey()
279
280        return biggest_prefix + biggest_suffix
281