1# 2# vector.py 3# 4# This source file is part of the FoundationDB open source project 5# 6# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors 7# 8# Licensed under the Apache License, Version 2.0 (the "License"); 9# you may not use this file except in compliance with the License. 10# You may obtain a copy of the License at 11# 12# http://www.apache.org/licenses/LICENSE-2.0 13# 14# Unless required by applicable law or agreed to in writing, software 15# distributed under the License is distributed on an "AS IS" BASIS, 16# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17# See the License for the specific language governing permissions and 18# limitations under the License. 19# 20 21"""FoundationDB Vector Layer. 22 23Provides the Vector() class for storing and manipulating arrays 24in FoundationDB. 25""" 26 27import fdb 28import fdb.tuple 29 30import threading 31 32fdb.api_version(22) 33 34 35################################### 36# This defines a Subspace of keys # 37################################### 38 39 40class Subspace (object): 41 def __init__(self, prefixTuple, rawPrefix=""): 42 self.rawPrefix = rawPrefix + fdb.tuple.pack(prefixTuple) 43 44 def __getitem__(self, name): 45 return Subspace((name,), self.rawPrefix) 46 47 def key(self): 48 return self.rawPrefix 49 50 def pack(self, tuple): 51 return self.rawPrefix + fdb.tuple.pack(tuple) 52 53 def unpack(self, key): 54 assert key.startswith(self.rawPrefix) 55 return fdb.tuple.unpack(key[len(self.rawPrefix):]) 56 57 def range(self, tuple=()): 58 p = fdb.tuple.range(tuple) 59 return slice(self.rawPrefix + p.start, self.rawPrefix + p.stop) 60 61 62######################## 63# _ImplicitTransaction # 64######################## 65 66# A local class which is used to allow vector operations to be performed without 67# explicitly passing a transaction. It is created by vector.use_transaction 68# and is used as follows: 69# 70# with vector.use_transaction(tr): 71# vector[0] = 1 72# vector.push(1) 73# ... 74 75 76class _ImplicitTransaction: 77 def __init__(self, vector, tr): 78 self.vector = vector 79 self.tr = tr 80 self.initialValue = self.vector.local.tr 81 82 def __enter__(self): 83 if self.initialValue is not None and self.vector.local.tr != self.tr: 84 raise Exception('use_transaction cannot be nested') 85 86 self.vector.local.tr = self.tr 87 88 def __exit__(self, type, value, traceback): 89 self.vector.local.tr = self.initialValue 90 91 92########## 93# Vector # 94########## 95 96# Vector stores each of its values using its index as the key. 97# The size of a vector is equal to the index of its last key + 1. 98## 99# For indexes smaller than the vector's size that have no associated key 100# in the database, the value will be the specified defaultValue. 101## 102# If the last value in the vector has the default value, its key will 103# always be set so that size can be determined. 104## 105# By creating Vector with a Subspace, all kv pairs modified by the 106# layer will have keys that start within that Subspace. 107 108 109class Vector: 110 """Represents a potentially sparse array in FoundationDB.""" 111 112 # Public functions 113 114 def __init__(self, subspace, defaultValue=''): 115 self.subspace = subspace 116 self.defaultValue = defaultValue 117 self.local = threading.local() 118 self.local.tr = None 119 120 def use_transaction(self, tr): 121 """ 122 Get an object that can be used in a with statement to perform operations 123 on this vector without supplying a transaction as an argument to each operation. 124 125 For example: 126 127 with vector.use_transaction(tr): 128 vector[0] = 1 129 vector.push(1) 130 ... 131 """ 132 return _ImplicitTransaction(self, tr) 133 134 def size(self, tr=None): 135 """Get the number of items in the Vector. This number includes the sparsely represented items.""" 136 return self._size(self._to_transaction(tr)) 137 138 def push(self, val, tr=None): 139 """Push a single item onto the end of the Vector.""" 140 self._push(val, self._to_transaction(tr)) 141 142 def back(self, tr=None): 143 """Get the value of the last item in the Vector.""" 144 return self._back(self._to_transaction(tr)) 145 146 def front(self, tr=None): 147 """Get the value of the first item in the Vector.""" 148 return self._get(0, self._to_transaction(tr)) 149 150 def pop(self, tr=None): 151 """Get and pops the last item off the Vector.""" 152 return self._pop(self._to_transaction(tr)) 153 154 def swap(self, i1, i2, tr=None): 155 """Swap the items at positions i1 and i2.""" 156 self._swap(i1, i2, self._to_transaction(tr)) 157 158 def get(self, index, tr=None): 159 """Get the item at the specified index.""" 160 return self._get(index, self._to_transaction(tr)) 161 162 def get_range(self, startIndex=None, endIndex=None, step=None, tr=None): 163 """Get a range of items in the Vector, returned as a generator.""" 164 return self._get_range(startIndex, endIndex, step, self._to_transaction(tr)) 165 166 def set(self, index, val, tr=None): 167 """Set the value at a particular index in the Vector.""" 168 self._set(index, val, self._to_transaction(tr)) 169 170 def empty(self, tr=None): 171 """Test whether the Vector is empty.""" 172 return self._size(self._to_transaction(tr)) == 0 173 174 def resize(self, length, tr=None): 175 """Grow or shrink the size of the Vector.""" 176 self._resize(length, self._to_transaction(tr)) 177 178 def clear(self, tr=None): 179 """Remove all items from the Vector.""" 180 self._clear(self._to_transaction(tr)) 181 182 # Vector supports array notation when combined with use_transaction 183 def __setitem__(self, index, val): 184 """Set the item at the specified index. Can only be used with use_transaction.""" 185 self.set(index, val) 186 187 def __getitem__(self, index): 188 """ 189 Get the item(s) at the specified index or range. Ranges are returned as a generator. Can only 190 be used with use_transaction() 191 """ 192 if isinstance(index, slice): 193 return self.get_range(index.start, index.stop, index.step) 194 else: 195 return self.get(index) 196 197 # Private functions 198 199 @fdb.transactional 200 def _push(self, val, tr): 201 tr[self._key_at(self.size())] = fdb.tuple.pack((val,)) 202 203 @fdb.transactional 204 def _back(self, tr): 205 keyRange = self.subspace.range() 206 last = tr.get_range(keyRange.start, keyRange.stop, 1, True) 207 for k, v in last: 208 return fdb.tuple.unpack(v)[0] 209 return None 210 211 @fdb.transactional 212 def _pop(self, tr): 213 keyRange = self.subspace.range() 214 215 # Read the last two entries so we can check if the second to last item 216 # is being represented sparsely. If so, we will be required to set it 217 # to the default value 218 lastTwo = list(tr.get_range(keyRange.start, keyRange.stop, 2, True)) 219 indices = [self.subspace.unpack(kv.key)[0] for kv in lastTwo] 220 221 # Vector was empty 222 if len(lastTwo) == 0: 223 return None 224 225 # Vector has size one: 226 elif indices[0] == 0: 227 pass 228 229 # Second to last item is being represented sparsely 230 elif len(lastTwo) == 1 or indices[0] > indices[1] + 1: 231 tr[self._key_at(indices[0] - 1)] = fdb.tuple.pack((self.defaultValue,)) 232 233 del tr[lastTwo[0].key] 234 return fdb.tuple.unpack(lastTwo[0].value)[0] 235 236 @fdb.transactional 237 def _swap(self, i1, i2, tr): 238 k1 = self._key_at(i1) 239 k2 = self._key_at(i2) 240 241 currentSize = self._size(tr) 242 v1 = tr[k1] 243 v2 = tr[k2] 244 245 if i1 > currentSize or i2 > currentSize or i1 < 0 or i2 < 0: 246 raise IndexError('vector.swap: indices (%d, %d) out of range' % (i1, i2)) 247 248 if v2.present(): 249 tr[k1] = v2 250 elif v1.present() and i1 < currentSize - 1: 251 del tr[k1] 252 253 if v1.present(): 254 tr[k2] = v1 255 elif v2.present() and i2 < currentSize - 1: 256 del tr[k2] 257 258 @fdb.transactional 259 def _get(self, index, tr): 260 if index < 0: 261 raise IndexError('vector.get: index \'%d\' out of range' % index) 262 263 start = self._key_at(index) 264 end = self.subspace.range().stop 265 266 output = tr.get_range(start, end, 1) 267 for k, v in output: 268 # The requested index had an associated key 269 if(start == k): 270 return fdb.tuple.unpack(v)[0] 271 272 # The requested index is sparsely represented 273 return self.defaultValue 274 275 # We requested a value past the end of the vector 276 raise IndexError('vector.get: index \'%d\' out of range' % index) 277 278 def _get_range(self, startIndex, endIndex, step, tr): 279 size = self._size(tr) 280 if startIndex is not None and startIndex < 0: 281 startIndex = max(0, size + startIndex) 282 if endIndex is not None and endIndex < 0: 283 endIndex = max(0, size + endIndex) 284 285 if step is None: 286 if startIndex is None or endIndex is None or startIndex <= endIndex: 287 step = 1 288 else: 289 step = -1 290 elif step == 0: 291 raise ValueError('vector.get_range: step cannot be zero') 292 293 if startIndex is None: 294 if step > 0: 295 start = self.subspace.range().start 296 else: 297 end = self.subspace.range().stop 298 else: 299 if step > 0: 300 start = self._key_at(startIndex) 301 else: 302 end = self._key_at(startIndex + 1) 303 304 if endIndex is None: 305 if step > 0: 306 end = self.subspace.range().stop 307 else: 308 start = self.subspace.range().start 309 310 else: 311 if step > 0: 312 end = self._key_at(endIndex) 313 else: 314 start = self._key_at(endIndex + 1) 315 316 result = tr.get_range(start, end, 0, step < 0) 317 318 currentIndex = startIndex 319 if currentIndex is None: 320 if step > 0: 321 currentIndex = 0 322 else: 323 currentIndex = size - 1 324 elif currentIndex >= size: 325 currentIndex = size - 1 326 327 for k, v in result: 328 keyIndex = self.subspace.unpack(k)[0] 329 while (step > 0 and currentIndex < keyIndex) or (step < 0 and currentIndex > keyIndex): 330 currentIndex = currentIndex + step 331 yield self.defaultValue 332 333 if currentIndex == keyIndex: 334 currentIndex = currentIndex + step 335 yield fdb.tuple.unpack(v)[0] 336 337 @fdb.transactional 338 def _size(self, tr): 339 keyRange = self.subspace.range() 340 lastKey = tr.get_key(fdb.KeySelector.last_less_or_equal(keyRange.stop)) 341 if lastKey < keyRange.start: 342 return 0 343 344 return self.subspace.unpack(lastKey)[0] + 1 345 346 @fdb.transactional 347 def _set(self, index, val, tr): 348 tr[self._key_at(index)] = fdb.tuple.pack((val,)) 349 350 @fdb.transactional 351 def _resize(self, length, tr): 352 currentSize = self.size() 353 if(length == currentSize): 354 return 355 if(length < currentSize): 356 self._shrink(tr, length, currentSize) 357 else: 358 self._expand(tr, length, currentSize) 359 360 @fdb.transactional 361 def _shrink(self, tr, length, currentSize): 362 tr.clear_range(self._key_at(length), self.subspace.range().stop) 363 364 # Check if the new end of the vector was being sparsely represented 365 if self._size(tr) < length: 366 tr[self._key_at(length - 1)] = fdb.tuple.pack((self.defaultValue,)) 367 368 @fdb.transactional 369 def _expand(self, tr, length, currentSize): 370 tr[self._key_at(length - 1)] = fdb.tuple.pack((self.defaultValue,)) 371 372 @fdb.transactional 373 def _clear(self, tr): 374 del tr[self.subspace.range()] 375 376 def _to_transaction(self, tr): 377 if tr is None: 378 if self.local.tr is None: 379 raise Exception('No transaction specified and use_transaction has not been called') 380 else: 381 return self.local.tr 382 else: 383 return tr 384 385 def _key_at(self, index): 386 return self.subspace.pack((index,)) 387 388 389################## 390# internal tests # 391################## 392 393 394# caution: modifies the database! 395@fdb.transactional 396def vector_test(tr): 397 vector = Vector(Subspace(('test_vector',)), 0) 398 399 with vector.use_transaction(tr): 400 print 'Clearing any previous values in the vector' 401 vector.clear() 402 403 print '\nMODIFIERS' 404 405 # Set + Push 406 vector[0] = 1 407 vector[1] = 2 408 vector.push(3) 409 _print_vector(vector, tr) 410 411 # Swap 412 vector.swap(0, 2) 413 _print_vector(vector, tr) 414 415 # Pop 416 print 'Popped:', vector.pop() 417 _print_vector(vector, tr) 418 419 # Clear 420 vector.clear() 421 422 print 'Pop empty:', vector.pop() 423 _print_vector(vector, tr) 424 425 vector.push('Foo') 426 print 'Pop size 1:', vector.pop() 427 _print_vector(vector, tr) 428 429 print '\nCAPACITY OPERATIONS' 430 431 # Capacity 432 print 'Size:', vector.size() 433 print 'Empty:', vector.empty() 434 435 print 'Resizing to length 5' 436 vector.resize(5) 437 _print_vector(vector, tr) 438 print 'Size:', vector.size() 439 440 print 'Setting values' 441 vector[0] = 'The' 442 vector[1] = 'Quick' 443 vector[2] = 'Brown' 444 vector[3] = 'Fox' 445 vector[4] = 'Jumps' 446 vector[5] = 'Over' 447 _print_vector(vector, tr) 448 449 print '\nFRONT' 450 print vector.front() 451 452 print '\nBACK' 453 print vector.back() 454 455 print '\nELEMENT ACCESS' 456 print 'Index 0:', vector[0] 457 print 'Index 5:', vector[5] 458 459 _print_vector(vector, tr) 460 print 'Size:', vector.size() 461 462 print '\nRESIZING' 463 print 'Resizing to 3' 464 vector.resize(3) 465 _print_vector(vector, tr) 466 print 'Size:', vector.size() 467 468 print 'Resizing to 3 again' 469 vector.resize(3) 470 _print_vector(vector, tr) 471 print 'Size:', vector.size() 472 473 print 'Resizing to 6' 474 vector.resize(6) 475 _print_vector(vector, tr) 476 print 'Size:', vector.size() 477 478 print '\nSPARSE TESTS' 479 print 'Popping sparse vector' 480 vector.pop() 481 482 _print_vector(vector, tr) 483 print 'Size:', vector.size() 484 485 print 'Resizing to 4' 486 vector.resize(4) 487 488 _print_vector(vector, tr) 489 print 'Size:', vector.size() 490 491 print 'Adding "word" to index 10, resize to 25' 492 vector[10] = 'word' 493 vector.resize(25) 494 495 _print_vector(vector, tr) 496 print 'Size:', vector.size() 497 498 print 'Popping sparse vector' 499 vector.pop() 500 501 _print_vector(vector, tr) 502 print 'Size:', vector.size() 503 504 print 'Swapping with sparse element' 505 vector.swap(10, 15) 506 507 _print_vector(vector, tr) 508 print 'Size:', vector.size() 509 510 print 'Swapping sparse elements' 511 vector.swap(12, 13) 512 513 _print_vector(vector, tr) 514 print 'Size:', vector.size() 515 516 517############################## 518# Vector sample usage # 519############################## 520 521 522import sys 523 524 525# caution: modifies the database! 526@fdb.transactional 527def vector_example(tr): 528 vector = Vector(Subspace(('my_vector',)), 0) 529 530 with vector.use_transaction(tr): 531 vector.clear() 532 533 for i in range(0, 5): 534 vector.push(i) 535 536 _print_vector(vector, tr) 537 538 print 'Pop last item: %d' % vector.pop() 539 _print_vector(vector, tr) 540 541 vector[1] = 10 542 vector.resize(11) 543 _print_vector(vector, tr) 544 545 vector.swap(1, 10) 546 _print_vector(vector, tr) 547 548 549def _print_vector(vector, tr): 550 first = True 551 with vector.use_transaction(tr): 552 for v in vector: 553 if not first: 554 sys.stdout.write(',') 555 556 first = False 557 sys.stdout.write(repr(v)) 558 559 print 560 561 562# caution: modifies the database! 563if __name__ == '__main__': 564 db = fdb.open() 565 vector_example(db) 566 # vector_test(db) 567