1#
2# vector.py
3#
4# This source file is part of the FoundationDB open source project
5#
6# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
7#
8# Licensed under the Apache License, Version 2.0 (the "License");
9# you may not use this file except in compliance with the License.
10# You may obtain a copy of the License at
11#
12#     http://www.apache.org/licenses/LICENSE-2.0
13#
14# Unless required by applicable law or agreed to in writing, software
15# distributed under the License is distributed on an "AS IS" BASIS,
16# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17# See the License for the specific language governing permissions and
18# limitations under the License.
19#
20
21"""FoundationDB Vector Layer.
22
23Provides the Vector() class for storing and manipulating arrays
24in FoundationDB.
25"""
26
27import fdb
28import fdb.tuple
29
30import threading
31
32fdb.api_version(22)
33
34
35###################################
36# This defines a Subspace of keys #
37###################################
38
39
40class Subspace (object):
41    def __init__(self, prefixTuple, rawPrefix=""):
42        self.rawPrefix = rawPrefix + fdb.tuple.pack(prefixTuple)
43
44    def __getitem__(self, name):
45        return Subspace((name,), self.rawPrefix)
46
47    def key(self):
48        return self.rawPrefix
49
50    def pack(self, tuple):
51        return self.rawPrefix + fdb.tuple.pack(tuple)
52
53    def unpack(self, key):
54        assert key.startswith(self.rawPrefix)
55        return fdb.tuple.unpack(key[len(self.rawPrefix):])
56
57    def range(self, tuple=()):
58        p = fdb.tuple.range(tuple)
59        return slice(self.rawPrefix + p.start, self.rawPrefix + p.stop)
60
61
62########################
63# _ImplicitTransaction #
64########################
65
66# A local class which is used to allow vector operations to be performed without
67# explicitly passing a transaction. It is created by vector.use_transaction
68# and is used as follows:
69#
70# with vector.use_transaction(tr):
71#     vector[0] = 1
72#     vector.push(1)
73#     ...
74
75
76class _ImplicitTransaction:
77    def __init__(self, vector, tr):
78        self.vector = vector
79        self.tr = tr
80        self.initialValue = self.vector.local.tr
81
82    def __enter__(self):
83        if self.initialValue is not None and self.vector.local.tr != self.tr:
84            raise Exception('use_transaction cannot be nested')
85
86        self.vector.local.tr = self.tr
87
88    def __exit__(self, type, value, traceback):
89        self.vector.local.tr = self.initialValue
90
91
92##########
93# Vector #
94##########
95
96# Vector stores each of its values using its index as the key.
97# The size of a vector is equal to the index of its last key + 1.
98##
99# For indexes smaller than the vector's size that have no associated key
100# in the database, the value will be the specified defaultValue.
101##
102# If the last value in the vector has the default value, its key will
103# always be set so that size can be determined.
104##
105# By creating Vector with a Subspace, all kv pairs modified by the
106# layer will have keys that start within that Subspace.
107
108
109class Vector:
110    """Represents a potentially sparse array in FoundationDB."""
111
112    # Public functions
113
114    def __init__(self, subspace, defaultValue=''):
115        self.subspace = subspace
116        self.defaultValue = defaultValue
117        self.local = threading.local()
118        self.local.tr = None
119
120    def use_transaction(self, tr):
121        """
122        Get an object that can be used in a with statement to perform operations
123        on this vector without supplying a transaction as an argument to each operation.
124
125        For example:
126
127        with vector.use_transaction(tr):
128            vector[0] = 1
129            vector.push(1)
130            ...
131        """
132        return _ImplicitTransaction(self, tr)
133
134    def size(self, tr=None):
135        """Get the number of items in the Vector. This number includes the sparsely represented items."""
136        return self._size(self._to_transaction(tr))
137
138    def push(self, val, tr=None):
139        """Push a single item onto the end of the Vector."""
140        self._push(val, self._to_transaction(tr))
141
142    def back(self, tr=None):
143        """Get the value of the last item in the Vector."""
144        return self._back(self._to_transaction(tr))
145
146    def front(self, tr=None):
147        """Get the value of the first item in the Vector."""
148        return self._get(0, self._to_transaction(tr))
149
150    def pop(self, tr=None):
151        """Get and pops the last item off the Vector."""
152        return self._pop(self._to_transaction(tr))
153
154    def swap(self, i1, i2, tr=None):
155        """Swap the items at positions i1 and i2."""
156        self._swap(i1, i2, self._to_transaction(tr))
157
158    def get(self, index, tr=None):
159        """Get the item at the specified index."""
160        return self._get(index, self._to_transaction(tr))
161
162    def get_range(self, startIndex=None, endIndex=None, step=None, tr=None):
163        """Get a range of items in the Vector, returned as a generator."""
164        return self._get_range(startIndex, endIndex, step, self._to_transaction(tr))
165
166    def set(self, index, val, tr=None):
167        """Set the value at a particular index in the Vector."""
168        self._set(index, val, self._to_transaction(tr))
169
170    def empty(self, tr=None):
171        """Test whether the Vector is empty."""
172        return self._size(self._to_transaction(tr)) == 0
173
174    def resize(self, length, tr=None):
175        """Grow or shrink the size of the Vector."""
176        self._resize(length, self._to_transaction(tr))
177
178    def clear(self, tr=None):
179        """Remove all items from the Vector."""
180        self._clear(self._to_transaction(tr))
181
182    # Vector supports array notation when combined with use_transaction
183    def __setitem__(self, index, val):
184        """Set the item at the specified index. Can only be used with use_transaction."""
185        self.set(index, val)
186
187    def __getitem__(self, index):
188        """
189        Get the item(s) at the specified index or range. Ranges are returned as a generator. Can only
190        be used with use_transaction()
191        """
192        if isinstance(index, slice):
193            return self.get_range(index.start, index.stop, index.step)
194        else:
195            return self.get(index)
196
197    # Private functions
198
199    @fdb.transactional
200    def _push(self, val, tr):
201        tr[self._key_at(self.size())] = fdb.tuple.pack((val,))
202
203    @fdb.transactional
204    def _back(self, tr):
205        keyRange = self.subspace.range()
206        last = tr.get_range(keyRange.start, keyRange.stop, 1, True)
207        for k, v in last:
208            return fdb.tuple.unpack(v)[0]
209        return None
210
211    @fdb.transactional
212    def _pop(self, tr):
213        keyRange = self.subspace.range()
214
215        # Read the last two entries so we can check if the second to last item
216        # is being represented sparsely. If so, we will be required to set it
217        # to the default value
218        lastTwo = list(tr.get_range(keyRange.start, keyRange.stop, 2, True))
219        indices = [self.subspace.unpack(kv.key)[0] for kv in lastTwo]
220
221        # Vector was empty
222        if len(lastTwo) == 0:
223            return None
224
225        # Vector has size one:
226        elif indices[0] == 0:
227            pass
228
229        # Second to last item is being represented sparsely
230        elif len(lastTwo) == 1 or indices[0] > indices[1] + 1:
231            tr[self._key_at(indices[0] - 1)] = fdb.tuple.pack((self.defaultValue,))
232
233        del tr[lastTwo[0].key]
234        return fdb.tuple.unpack(lastTwo[0].value)[0]
235
236    @fdb.transactional
237    def _swap(self, i1, i2, tr):
238        k1 = self._key_at(i1)
239        k2 = self._key_at(i2)
240
241        currentSize = self._size(tr)
242        v1 = tr[k1]
243        v2 = tr[k2]
244
245        if i1 > currentSize or i2 > currentSize or i1 < 0 or i2 < 0:
246            raise IndexError('vector.swap: indices (%d, %d) out of range' % (i1, i2))
247
248        if v2.present():
249            tr[k1] = v2
250        elif v1.present() and i1 < currentSize - 1:
251            del tr[k1]
252
253        if v1.present():
254            tr[k2] = v1
255        elif v2.present() and i2 < currentSize - 1:
256            del tr[k2]
257
258    @fdb.transactional
259    def _get(self, index, tr):
260        if index < 0:
261            raise IndexError('vector.get: index \'%d\' out of range' % index)
262
263        start = self._key_at(index)
264        end = self.subspace.range().stop
265
266        output = tr.get_range(start, end, 1)
267        for k, v in output:
268            # The requested index had an associated key
269            if(start == k):
270                return fdb.tuple.unpack(v)[0]
271
272            # The requested index is sparsely represented
273            return self.defaultValue
274
275        # We requested a value past the end of the vector
276        raise IndexError('vector.get: index \'%d\' out of range' % index)
277
278    def _get_range(self, startIndex, endIndex, step, tr):
279        size = self._size(tr)
280        if startIndex is not None and startIndex < 0:
281            startIndex = max(0, size + startIndex)
282        if endIndex is not None and endIndex < 0:
283            endIndex = max(0, size + endIndex)
284
285        if step is None:
286            if startIndex is None or endIndex is None or startIndex <= endIndex:
287                step = 1
288            else:
289                step = -1
290        elif step == 0:
291            raise ValueError('vector.get_range: step cannot be zero')
292
293        if startIndex is None:
294            if step > 0:
295                start = self.subspace.range().start
296            else:
297                end = self.subspace.range().stop
298        else:
299            if step > 0:
300                start = self._key_at(startIndex)
301            else:
302                end = self._key_at(startIndex + 1)
303
304        if endIndex is None:
305            if step > 0:
306                end = self.subspace.range().stop
307            else:
308                start = self.subspace.range().start
309
310        else:
311            if step > 0:
312                end = self._key_at(endIndex)
313            else:
314                start = self._key_at(endIndex + 1)
315
316        result = tr.get_range(start, end, 0, step < 0)
317
318        currentIndex = startIndex
319        if currentIndex is None:
320            if step > 0:
321                currentIndex = 0
322            else:
323                currentIndex = size - 1
324        elif currentIndex >= size:
325            currentIndex = size - 1
326
327        for k, v in result:
328            keyIndex = self.subspace.unpack(k)[0]
329            while (step > 0 and currentIndex < keyIndex) or (step < 0 and currentIndex > keyIndex):
330                currentIndex = currentIndex + step
331                yield self.defaultValue
332
333            if currentIndex == keyIndex:
334                currentIndex = currentIndex + step
335                yield fdb.tuple.unpack(v)[0]
336
337    @fdb.transactional
338    def _size(self, tr):
339        keyRange = self.subspace.range()
340        lastKey = tr.get_key(fdb.KeySelector.last_less_or_equal(keyRange.stop))
341        if lastKey < keyRange.start:
342            return 0
343
344        return self.subspace.unpack(lastKey)[0] + 1
345
346    @fdb.transactional
347    def _set(self, index, val, tr):
348        tr[self._key_at(index)] = fdb.tuple.pack((val,))
349
350    @fdb.transactional
351    def _resize(self, length, tr):
352        currentSize = self.size()
353        if(length == currentSize):
354            return
355        if(length < currentSize):
356            self._shrink(tr, length, currentSize)
357        else:
358            self._expand(tr, length, currentSize)
359
360    @fdb.transactional
361    def _shrink(self, tr, length, currentSize):
362        tr.clear_range(self._key_at(length), self.subspace.range().stop)
363
364        # Check if the new end of the vector was being sparsely represented
365        if self._size(tr) < length:
366            tr[self._key_at(length - 1)] = fdb.tuple.pack((self.defaultValue,))
367
368    @fdb.transactional
369    def _expand(self, tr, length, currentSize):
370        tr[self._key_at(length - 1)] = fdb.tuple.pack((self.defaultValue,))
371
372    @fdb.transactional
373    def _clear(self, tr):
374        del tr[self.subspace.range()]
375
376    def _to_transaction(self, tr):
377        if tr is None:
378            if self.local.tr is None:
379                raise Exception('No transaction specified and use_transaction has not been called')
380            else:
381                return self.local.tr
382        else:
383            return tr
384
385    def _key_at(self, index):
386        return self.subspace.pack((index,))
387
388
389##################
390# internal tests #
391##################
392
393
394# caution: modifies the database!
395@fdb.transactional
396def vector_test(tr):
397    vector = Vector(Subspace(('test_vector',)), 0)
398
399    with vector.use_transaction(tr):
400        print 'Clearing any previous values in the vector'
401        vector.clear()
402
403        print '\nMODIFIERS'
404
405        # Set + Push
406        vector[0] = 1
407        vector[1] = 2
408        vector.push(3)
409        _print_vector(vector, tr)
410
411        # Swap
412        vector.swap(0, 2)
413        _print_vector(vector, tr)
414
415        # Pop
416        print 'Popped:', vector.pop()
417        _print_vector(vector, tr)
418
419        # Clear
420        vector.clear()
421
422        print 'Pop empty:', vector.pop()
423        _print_vector(vector, tr)
424
425        vector.push('Foo')
426        print 'Pop size 1:', vector.pop()
427        _print_vector(vector, tr)
428
429        print '\nCAPACITY OPERATIONS'
430
431        # Capacity
432        print 'Size:', vector.size()
433        print 'Empty:', vector.empty()
434
435        print 'Resizing to length 5'
436        vector.resize(5)
437        _print_vector(vector, tr)
438        print 'Size:', vector.size()
439
440        print 'Setting values'
441        vector[0] = 'The'
442        vector[1] = 'Quick'
443        vector[2] = 'Brown'
444        vector[3] = 'Fox'
445        vector[4] = 'Jumps'
446        vector[5] = 'Over'
447        _print_vector(vector, tr)
448
449        print '\nFRONT'
450        print vector.front()
451
452        print '\nBACK'
453        print vector.back()
454
455        print '\nELEMENT ACCESS'
456        print 'Index 0:', vector[0]
457        print 'Index 5:', vector[5]
458
459        _print_vector(vector, tr)
460        print 'Size:', vector.size()
461
462        print '\nRESIZING'
463        print 'Resizing to 3'
464        vector.resize(3)
465        _print_vector(vector, tr)
466        print 'Size:', vector.size()
467
468        print 'Resizing to 3 again'
469        vector.resize(3)
470        _print_vector(vector, tr)
471        print 'Size:', vector.size()
472
473        print 'Resizing to 6'
474        vector.resize(6)
475        _print_vector(vector, tr)
476        print 'Size:', vector.size()
477
478        print '\nSPARSE TESTS'
479        print 'Popping sparse vector'
480        vector.pop()
481
482        _print_vector(vector, tr)
483        print 'Size:', vector.size()
484
485        print 'Resizing to 4'
486        vector.resize(4)
487
488        _print_vector(vector, tr)
489        print 'Size:', vector.size()
490
491        print 'Adding "word" to index 10, resize to 25'
492        vector[10] = 'word'
493        vector.resize(25)
494
495        _print_vector(vector, tr)
496        print 'Size:', vector.size()
497
498        print 'Popping sparse vector'
499        vector.pop()
500
501        _print_vector(vector, tr)
502        print 'Size:', vector.size()
503
504        print 'Swapping with sparse element'
505        vector.swap(10, 15)
506
507        _print_vector(vector, tr)
508        print 'Size:', vector.size()
509
510        print 'Swapping sparse elements'
511        vector.swap(12, 13)
512
513        _print_vector(vector, tr)
514        print 'Size:', vector.size()
515
516
517##############################
518# Vector sample usage #
519##############################
520
521
522import sys
523
524
525# caution: modifies the database!
526@fdb.transactional
527def vector_example(tr):
528    vector = Vector(Subspace(('my_vector',)), 0)
529
530    with vector.use_transaction(tr):
531        vector.clear()
532
533        for i in range(0, 5):
534            vector.push(i)
535
536        _print_vector(vector, tr)
537
538        print 'Pop last item: %d' % vector.pop()
539        _print_vector(vector, tr)
540
541        vector[1] = 10
542        vector.resize(11)
543        _print_vector(vector, tr)
544
545        vector.swap(1, 10)
546        _print_vector(vector, tr)
547
548
549def _print_vector(vector, tr):
550    first = True
551    with vector.use_transaction(tr):
552        for v in vector:
553            if not first:
554                sys.stdout.write(',')
555
556            first = False
557            sys.stdout.write(repr(v))
558
559    print
560
561
562# caution: modifies the database!
563if __name__ == '__main__':
564    db = fdb.open()
565    vector_example(db)
566    # vector_test(db)
567