1############################################################################## 2# 3# Copyright (c) 2001, 2002 Zope Foundation and Contributors. 4# All Rights Reserved. 5# 6# This software is subject to the provisions of the Zope Public License, 7# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. 8# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED 9# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 10# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS 11# FOR A PARTICULAR PURPOSE. 12# 13############################################################################## 14"""Implement an OID to File-position (long integer) mapping.""" 15 16# To save space, we do two things: 17# 18# 1. We split the keys (OIDS) into 6-byte prefixes and 2-byte suffixes. 19# We use the prefixes as keys in a mapping from prefix to mappings 20# of suffix to data: 21# 22# data is {prefix -> {suffix -> data}} 23# 24# 2. We limit the data size to 48 bits. This should allow databases 25# as large as 256 terabytes. 26# 27# Most of the space is consumed by items in the mappings from 2-byte 28# suffix to 6-byte data. This should reduce the overall memory usage to 29# 8-16 bytes per OID. 30# 31# Because 32# - the mapping from suffix to data contains at most 65535 entries, 33# - this is an in-memory data structure 34# - new keys are inserted sequentially, 35# we use a BTree bucket instead of a full BTree to store the results. 36# 37# We use p64 to convert integers to 8-byte strings and lop off the two 38# high-order bytes when saving. On loading data, we add the leading 39# bytes back before using u64 to convert the data back to (long) 40# integers. 41import struct 42 43from BTrees.fsBTree import fsBucket 44from BTrees.OOBTree import OOBTree 45import six 46 47from ZODB._compat import INT_TYPES 48from ZODB._compat import Pickler 49from ZODB._compat import Unpickler 50from ZODB._compat import _protocol 51 52 53# convert between numbers and six-byte strings 54 55def num2str(n): 56 return struct.pack(">Q", n)[2:] 57 58def str2num(s): 59 return struct.unpack(">Q", b"\000\000" + s)[0] 60 61def prefix_plus_one(s): 62 num = str2num(s) 63 return num2str(num + 1) 64 65def prefix_minus_one(s): 66 num = str2num(s) 67 return num2str(num - 1) 68 69def ensure_bytes(s): 70 # on Python 3 we might pickle bytes and unpickle unicode strings 71 return s.encode('ascii') if not isinstance(s, bytes) else s 72 73 74class fsIndex(object): 75 76 def __init__(self, data=None): 77 self._data = OOBTree() 78 if data: 79 self.update(data) 80 81 def __getstate__(self): 82 return dict( 83 state_version = 1, 84 _data = [(k, v.toString()) 85 for (k, v) in six.iteritems(self._data) 86 ] 87 ) 88 89 def __setstate__(self, state): 90 version = state.pop('state_version', 0) 91 getattr(self, '_setstate_%s' % version)(state) 92 93 def _setstate_0(self, state): 94 self.__dict__.clear() 95 self.__dict__.update(state) 96 self._data = OOBTree([ 97 (ensure_bytes(k), v) 98 for (k, v) in self._data.items() 99 ]) 100 101 def _setstate_1(self, state): 102 self._data = OOBTree([ 103 (ensure_bytes(k), fsBucket().fromString(ensure_bytes(v))) 104 for (k, v) in state['_data'] 105 ]) 106 107 def __getitem__(self, key): 108 assert isinstance(key, bytes) 109 return str2num(self._data[key[:6]][key[6:]]) 110 111 def save(self, pos, fname): 112 with open(fname, 'wb') as f: 113 pickler = Pickler(f, _protocol) 114 pickler.fast = True 115 pickler.dump(pos) 116 for k, v in six.iteritems(self._data): 117 pickler.dump((k, v.toString())) 118 pickler.dump(None) 119 120 @classmethod 121 def load(class_, fname): 122 with open(fname, 'rb') as f: 123 unpickler = Unpickler(f) 124 pos = unpickler.load() 125 if not isinstance(pos, INT_TYPES): 126 # NB: this might contain OIDs that got unpickled 127 # into Unicode strings on Python 3; hope the caller 128 # will pipe the result to fsIndex().update() to normalize 129 # the keys 130 return pos # Old format 131 index = class_() 132 data = index._data 133 while 1: 134 v = unpickler.load() 135 if not v: 136 break 137 k, v = v 138 data[ensure_bytes(k)] = fsBucket().fromString(ensure_bytes(v)) 139 return dict(pos=pos, index=index) 140 141 def get(self, key, default=None): 142 assert isinstance(key, bytes) 143 tree = self._data.get(key[:6], default) 144 if tree is default: 145 return default 146 v = tree.get(key[6:], default) 147 if v is default: 148 return default 149 return str2num(v) 150 151 def __setitem__(self, key, value): 152 assert isinstance(key, bytes) 153 value = num2str(value) 154 treekey = key[:6] 155 tree = self._data.get(treekey) 156 if tree is None: 157 tree = fsBucket() 158 self._data[treekey] = tree 159 tree[key[6:]] = value 160 161 def __delitem__(self, key): 162 assert isinstance(key, bytes) 163 treekey = key[:6] 164 tree = self._data.get(treekey) 165 if tree is None: 166 raise KeyError(key) 167 del tree[key[6:]] 168 if not tree: 169 del self._data[treekey] 170 171 def __len__(self): 172 r = 0 173 for tree in six.itervalues(self._data): 174 r += len(tree) 175 return r 176 177 def update(self, mapping): 178 for k, v in mapping.items(): 179 self[ensure_bytes(k)] = v 180 181 def has_key(self, key): 182 v = self.get(key, self) 183 return v is not self 184 185 def __contains__(self, key): 186 assert isinstance(key, bytes) 187 tree = self._data.get(key[:6]) 188 if tree is None: 189 return False 190 v = tree.get(key[6:], None) 191 if v is None: 192 return False 193 return True 194 195 def clear(self): 196 self._data.clear() 197 198 def __iter__(self): 199 for prefix, tree in six.iteritems(self._data): 200 for suffix in tree: 201 yield prefix + suffix 202 203 iterkeys = __iter__ 204 205 def keys(self): 206 return list(self.iterkeys()) 207 208 def iteritems(self): 209 for prefix, tree in six.iteritems(self._data): 210 for suffix, value in six.iteritems(tree): 211 yield (prefix + suffix, str2num(value)) 212 213 def items(self): 214 return list(self.iteritems()) 215 216 def itervalues(self): 217 for tree in six.itervalues(self._data): 218 for value in six.itervalues(tree): 219 yield str2num(value) 220 221 def values(self): 222 return list(self.itervalues()) 223 224 # Comment below applies for the following minKey and maxKey methods 225 # 226 # Obscure: what if `tree` is actually empty? We're relying here on 227 # that this class doesn't implement __delitem__: once a key gets 228 # into an fsIndex, the only way it can go away is by invoking 229 # clear(). Therefore nothing in _data.values() is ever empty. 230 # 231 # Note that because `tree` is an fsBTree, its minKey()/maxKey() methods are 232 # very efficient. 233 234 def minKey(self, key=None): 235 if key is None: 236 smallest_prefix = self._data.minKey() 237 else: 238 smallest_prefix = self._data.minKey(key[:6]) 239 240 tree = self._data[smallest_prefix] 241 242 assert tree 243 244 if key is None: 245 smallest_suffix = tree.minKey() 246 else: 247 try: 248 smallest_suffix = tree.minKey(key[6:]) 249 except ValueError: # 'empty tree' (no suffix >= arg) 250 next_prefix = prefix_plus_one(smallest_prefix) 251 smallest_prefix = self._data.minKey(next_prefix) 252 tree = self._data[smallest_prefix] 253 assert tree 254 smallest_suffix = tree.minKey() 255 256 return smallest_prefix + smallest_suffix 257 258 def maxKey(self, key=None): 259 if key is None: 260 biggest_prefix = self._data.maxKey() 261 else: 262 biggest_prefix = self._data.maxKey(key[:6]) 263 264 tree = self._data[biggest_prefix] 265 266 assert tree 267 268 if key is None: 269 biggest_suffix = tree.maxKey() 270 else: 271 try: 272 biggest_suffix = tree.maxKey(key[6:]) 273 except ValueError: # 'empty tree' (no suffix <= arg) 274 next_prefix = prefix_minus_one(biggest_prefix) 275 biggest_prefix = self._data.maxKey(next_prefix) 276 tree = self._data[biggest_prefix] 277 assert tree 278 biggest_suffix = tree.maxKey() 279 280 return biggest_prefix + biggest_suffix 281