1# Copyright 2009 Matt Chaput. All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are met:
5#
6#    1. Redistributions of source code must retain the above copyright notice,
7#       this list of conditions and the following disclaimer.
8#
9#    2. Redistributions in binary form must reproduce the above copyright
10#       notice, this list of conditions and the following disclaimer in the
11#       documentation and/or other materials provided with the distribution.
12#
13# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
14# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
15# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
16# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
17# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
18# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
19# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
20# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
21# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
22# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23#
24# The views and conclusions contained in the software and documentation are
25# those of the authors and should not be interpreted as representing official
26# policies, either expressed or implied, of Matt Chaput.
27
28"""This module defines writer and reader classes for a fast, immutable
29on-disk key-value database format. The current format is based heavily on
30D. J. Bernstein's CDB format (http://cr.yp.to/cdb.html).
31"""
32
33import os, struct
34from binascii import crc32
35from bisect import bisect_left
36from hashlib import md5  # @UnresolvedImport
37
38from whoosh.compat import b, bytes_type
39from whoosh.compat import xrange
40from whoosh.util.numlists import GrowableArray
41from whoosh.system import _INT_SIZE, emptybytes
42
43
44# Exceptions
45
46class FileFormatError(Exception):
47    pass
48
49
50# Hash functions
51
52def cdb_hash(key):
53    h = 5381
54    for c in key:
55        h = (h + (h << 5)) & 0xffffffff ^ ord(c)
56    return h
57
58
59def md5_hash(key):
60    return int(md5(key).hexdigest(), 16) & 0xffffffff
61
62
63def crc_hash(key):
64    return crc32(key) & 0xffffffff
65
66
67_hash_functions = (md5_hash, crc_hash, cdb_hash)
68
69
70# Structs
71
72# Two uints before the key/value pair giving the length of the key and value
73_lengths = struct.Struct("!ii")
74# A pointer in a hash table, giving the hash value and the key position
75_pointer = struct.Struct("!Iq")
76# A pointer in the hash table directory, giving the position and number of slots
77_dir_entry = struct.Struct("!qi")
78
79_directory_size = 256 * _dir_entry.size
80
81
82# Basic hash file
83
84class HashWriter(object):
85    """Implements a fast on-disk key-value store. This hash uses a two-level
86    hashing scheme, where a key is hashed, the low eight bits of the hash value
87    are used to index into one of 256 hash tables. This is basically the CDB
88    algorithm, but unlike CDB this object writes all data serially (it doesn't
89    seek backwards to overwrite information at the end).
90
91    Also unlike CDB, this format uses 64-bit file pointers, so the file length
92    is essentially unlimited. However, each key and value must be less than
93    2 GB in length.
94    """
95
96    def __init__(self, dbfile, magic=b("HSH3"), hashtype=0):
97        """
98        :param dbfile: a :class:`~whoosh.filedb.structfile.StructFile` object
99            to write to.
100        :param magic: the format tag bytes to write at the start of the file.
101        :param hashtype: an integer indicating which hashing algorithm to use.
102            Possible values are 0 (MD5), 1 (CRC32), or 2 (CDB hash).
103        """
104
105        self.dbfile = dbfile
106        self.hashtype = hashtype
107        self.hashfn = _hash_functions[self.hashtype]
108        # A place for subclasses to put extra metadata
109        self.extras = {}
110
111        self.startoffset = dbfile.tell()
112        # Write format tag
113        dbfile.write(magic)
114        # Write hash type
115        dbfile.write_byte(self.hashtype)
116        # Unused future expansion bits
117        dbfile.write_int(0)
118        dbfile.write_int(0)
119
120        # 256 lists of hashed keys and positions
121        self.buckets = [[] for _ in xrange(256)]
122        # List to remember the positions of the hash tables
123        self.directory = []
124
125    def tell(self):
126        return self.dbfile.tell()
127
128    def add(self, key, value):
129        """Adds a key/value pair to the file. Note that keys DO NOT need to be
130        unique. You can store multiple values under the same key and retrieve
131        them using :meth:`HashReader.all`.
132        """
133
134        assert isinstance(key, bytes_type)
135        assert isinstance(value, bytes_type)
136
137        dbfile = self.dbfile
138        pos = dbfile.tell()
139        dbfile.write(_lengths.pack(len(key), len(value)))
140        dbfile.write(key)
141        dbfile.write(value)
142
143        # Get hash value for the key
144        h = self.hashfn(key)
145        # Add hash and on-disk position to appropriate bucket
146        self.buckets[h & 255].append((h, pos))
147
148    def add_all(self, items):
149        """Convenience method to add a sequence of ``(key, value)`` pairs. This
150        is the same as calling :meth:`HashWriter.add` on each pair in the
151        sequence.
152        """
153
154        add = self.add
155        for key, value in items:
156            add(key, value)
157
158    def _write_hashes(self):
159        # Writes 256 hash tables containing pointers to the key/value pairs
160
161        dbfile = self.dbfile
162        # Represent and empty slot in the hash table using 0,0 (no key can
163        # start at position 0 because of the header)
164        null = (0, 0)
165
166        for entries in self.buckets:
167            # Start position of this bucket's hash table
168            pos = dbfile.tell()
169            # Remember the start position and the number of slots
170            numslots = 2 * len(entries)
171            self.directory.append((pos, numslots))
172
173            # Create the empty hash table
174            hashtable = [null] * numslots
175            # For each (hash value, key position) tuple in the bucket
176            for hashval, position in entries:
177                # Bitshift and wrap to get the slot for this entry
178                slot = (hashval >> 8) % numslots
179                # If the slot is taken, keep going until we find an empty slot
180                while hashtable[slot] != null:
181                    slot = (slot + 1) % numslots
182                # Insert the entry into the hashtable
183                hashtable[slot] = (hashval, position)
184
185            # Write the hash table for this bucket to disk
186            for hashval, position in hashtable:
187                dbfile.write(_pointer.pack(hashval, position))
188
189    def _write_directory(self):
190        # Writes a directory of pointers to the 256 hash tables
191
192        dbfile = self.dbfile
193        for position, numslots in self.directory:
194            dbfile.write(_dir_entry.pack(position, numslots))
195
196    def _write_extras(self):
197        self.dbfile.write_pickle(self.extras)
198
199    def close(self):
200        dbfile = self.dbfile
201
202        # Write hash tables
203        self._write_hashes()
204        # Write directory of pointers to hash tables
205        self._write_directory()
206
207        expos = dbfile.tell()
208        # Write extra information
209        self._write_extras()
210        # Write length of pickle
211        dbfile.write_int(dbfile.tell() - expos)
212
213        endpos = dbfile.tell()
214        dbfile.close()
215        return endpos
216
217
218class HashReader(object):
219    """Reader for the fast on-disk key-value files created by
220    :class:`HashWriter`.
221    """
222
223    def __init__(self, dbfile, length=None, magic=b("HSH3"), startoffset=0):
224        """
225        :param dbfile: a :class:`~whoosh.filedb.structfile.StructFile` object
226            to read from.
227        :param length: the length of the file data. This is necessary since the
228            hashing information is written at the end of the file.
229        :param magic: the format tag bytes to look for at the start of the
230            file. If the file's format tag does not match these bytes, the
231            object raises a :class:`FileFormatError` exception.
232        :param startoffset: the starting point of the file data.
233        """
234
235        self.dbfile = dbfile
236        self.startoffset = startoffset
237        self.is_closed = False
238
239        if length is None:
240            dbfile.seek(0, os.SEEK_END)
241            length = dbfile.tell() - startoffset
242
243        dbfile.seek(startoffset)
244        # Check format tag
245        filemagic = dbfile.read(4)
246        if filemagic != magic:
247            raise FileFormatError("Unknown file header %r" % filemagic)
248        # Read hash type
249        self.hashtype = dbfile.read_byte()
250        self.hashfn = _hash_functions[self.hashtype]
251        # Skip unused future expansion bits
252        dbfile.read_int()
253        dbfile.read_int()
254        self.startofdata = dbfile.tell()
255
256        exptr = startoffset + length - _INT_SIZE
257        # Get the length of extras from the end of the file
258        exlen = dbfile.get_int(exptr)
259        # Read the extras
260        expos = exptr - exlen
261        dbfile.seek(expos)
262        self._read_extras()
263
264        # Calculate the directory base from the beginning of the extras
265        dbfile.seek(expos - _directory_size)
266        # Read directory of hash tables
267        self.tables = []
268        entrysize = _dir_entry.size
269        unpackentry = _dir_entry.unpack
270        for _ in xrange(256):
271            # position, numslots
272            self.tables.append(unpackentry(dbfile.read(entrysize)))
273        # The position of the first hash table is the end of the key/value pairs
274        self.endofdata = self.tables[0][0]
275
276    @classmethod
277    def open(cls, storage, name):
278        """Convenience method to open a hash file given a
279        :class:`whoosh.filedb.filestore.Storage` object and a name. This takes
280        care of opening the file and passing its length to the initializer.
281        """
282
283        length = storage.file_length(name)
284        dbfile = storage.open_file(name)
285        return cls(dbfile, length)
286
287    def file(self):
288        return self.dbfile
289
290    def _read_extras(self):
291        try:
292            self.extras = self.dbfile.read_pickle()
293        except EOFError:
294            self.extras = {}
295
296    def close(self):
297        if self.is_closed:
298            raise Exception("Tried to close %r twice" % self)
299        self.dbfile.close()
300        self.is_closed = True
301
302    def key_at(self, pos):
303        # Returns the key bytes at the given position
304
305        dbfile = self.dbfile
306        keylen = dbfile.get_uint(pos)
307        return dbfile.get(pos + _lengths.size, keylen)
308
309    def key_and_range_at(self, pos):
310        # Returns a (keybytes, datapos, datalen) tuple for the key at the given
311        # position
312        dbfile = self.dbfile
313        lenssize = _lengths.size
314
315        if pos >= self.endofdata:
316            return None
317
318        keylen, datalen = _lengths.unpack(dbfile.get(pos, lenssize))
319        keybytes = dbfile.get(pos + lenssize, keylen)
320        datapos = pos + lenssize + keylen
321        return keybytes, datapos, datalen
322
323    def _ranges(self, pos=None, eod=None):
324        # Yields a series of (keypos, keylength, datapos, datalength) tuples
325        # for the key/value pairs in the file
326        dbfile = self.dbfile
327        pos = pos or self.startofdata
328        eod = eod or self.endofdata
329        lenssize = _lengths.size
330        unpacklens = _lengths.unpack
331
332        while pos < eod:
333            keylen, datalen = unpacklens(dbfile.get(pos, lenssize))
334            keypos = pos + lenssize
335            datapos = keypos + keylen
336            yield (keypos, keylen, datapos, datalen)
337            pos = datapos + datalen
338
339    def __getitem__(self, key):
340        for value in self.all(key):
341            return value
342        raise KeyError(key)
343
344    def __iter__(self):
345        dbfile = self.dbfile
346        for keypos, keylen, datapos, datalen in self._ranges():
347            key = dbfile.get(keypos, keylen)
348            value = dbfile.get(datapos, datalen)
349            yield (key, value)
350
351    def __contains__(self, key):
352        for _ in self.ranges_for_key(key):
353            return True
354        return False
355
356    def keys(self):
357        dbfile = self.dbfile
358        for keypos, keylen, _, _ in self._ranges():
359            yield dbfile.get(keypos, keylen)
360
361    def values(self):
362        dbfile = self.dbfile
363        for _, _, datapos, datalen in self._ranges():
364            yield dbfile.get(datapos, datalen)
365
366    def items(self):
367        dbfile = self.dbfile
368        for keypos, keylen, datapos, datalen in self._ranges():
369            yield (dbfile.get(keypos, keylen), dbfile.get(datapos, datalen))
370
371    def get(self, key, default=None):
372        for value in self.all(key):
373            return value
374        return default
375
376    def all(self, key):
377        """Yields a sequence of values associated with the given key.
378        """
379
380        dbfile = self.dbfile
381        for datapos, datalen in self.ranges_for_key(key):
382            yield dbfile.get(datapos, datalen)
383
384    def ranges_for_key(self, key):
385        """Yields a sequence of ``(datapos, datalength)`` tuples associated
386        with the given key.
387        """
388
389        if not isinstance(key, bytes_type):
390            raise TypeError("Key %r should be bytes" % key)
391        dbfile = self.dbfile
392
393        # Hash the key
394        keyhash = self.hashfn(key)
395        # Get the position and number of slots for the hash table in which the
396        # key may be found
397        tablestart, numslots = self.tables[keyhash & 255]
398        # If the hash table is empty, we know the key doesn't exists
399        if not numslots:
400            return
401
402        ptrsize = _pointer.size
403        unpackptr = _pointer.unpack
404        lenssize = _lengths.size
405        unpacklens = _lengths.unpack
406
407        # Calculate where the key's slot should be
408        slotpos = tablestart + (((keyhash >> 8) % numslots) * ptrsize)
409        # Read slots looking for our key's hash value
410        for _ in xrange(numslots):
411            slothash, itempos = unpackptr(dbfile.get(slotpos, ptrsize))
412            # If this slot is empty, we're done
413            if not itempos:
414                return
415
416            # If the key hash in this slot matches our key's hash, we might have
417            # a match, so read the actual key and see if it's our key
418            if slothash == keyhash:
419                # Read the key and value lengths
420                keylen, datalen = unpacklens(dbfile.get(itempos, lenssize))
421                # Only bother reading the actual key if the lengths match
422                if keylen == len(key):
423                    keystart = itempos + lenssize
424                    if key == dbfile.get(keystart, keylen):
425                        # The keys match, so yield (datapos, datalen)
426                        yield (keystart + keylen, datalen)
427
428            slotpos += ptrsize
429            # If we reach the end of the hashtable, wrap around
430            if slotpos == tablestart + (numslots * ptrsize):
431                slotpos = tablestart
432
433    def range_for_key(self, key):
434        for item in self.ranges_for_key(key):
435            return item
436        raise KeyError(key)
437
438
439# Ordered hash file
440
441class OrderedHashWriter(HashWriter):
442    """Implements an on-disk hash, but requires that keys be added in order.
443    An :class:`OrderedHashReader` can then look up "nearest keys" based on
444    the ordering.
445    """
446
447    def __init__(self, dbfile):
448        HashWriter.__init__(self, dbfile)
449        # Keep an array of the positions of all keys
450        self.index = GrowableArray("H")
451        # Keep track of the last key added
452        self.lastkey = emptybytes
453
454    def add(self, key, value):
455        if key <= self.lastkey:
456            raise ValueError("Keys must increase: %r..%r"
457                             % (self.lastkey, key))
458        self.index.append(self.dbfile.tell())
459        HashWriter.add(self, key, value)
460        self.lastkey = key
461
462    def _write_extras(self):
463        dbfile = self.dbfile
464        index = self.index
465
466        # Store metadata about the index array
467        self.extras["indextype"] = index.typecode
468        self.extras["indexlen"] = len(index)
469        # Write the extras
470        HashWriter._write_extras(self)
471        # Write the index array
472        index.to_file(dbfile)
473
474
475class OrderedHashReader(HashReader):
476    def closest_key(self, key):
477        """Returns the closest key equal to or greater than the given key. If
478        there is no key in the file equal to or greater than the given key,
479        returns None.
480        """
481
482        pos = self.closest_key_pos(key)
483        if pos is None:
484            return None
485        return self.key_at(pos)
486
487    def ranges_from(self, key):
488        """Yields a series of ``(keypos, keylen, datapos, datalen)`` tuples
489        for the ordered series of keys equal or greater than the given key.
490        """
491
492        pos = self.closest_key_pos(key)
493        if pos is None:
494            return
495
496        for item in self._ranges(pos=pos):
497            yield item
498
499    def keys_from(self, key):
500        """Yields an ordered series of keys equal to or greater than the given
501        key.
502        """
503
504        dbfile = self.dbfile
505        for keypos, keylen, _, _ in self.ranges_from(key):
506            yield dbfile.get(keypos, keylen)
507
508    def items_from(self, key):
509        """Yields an ordered series of ``(key, value)`` tuples for keys equal
510        to or greater than the given key.
511        """
512
513        dbfile = self.dbfile
514        for keypos, keylen, datapos, datalen in self.ranges_from(key):
515            yield (dbfile.get(keypos, keylen), dbfile.get(datapos, datalen))
516
517    def _read_extras(self):
518        dbfile = self.dbfile
519
520        # Read the extras
521        HashReader._read_extras(self)
522
523        # Set up for reading the index array
524        indextype = self.extras["indextype"]
525        self.indexbase = dbfile.tell()
526        self.indexlen = self.extras["indexlen"]
527        self.indexsize = struct.calcsize(indextype)
528        # Set up the function to read values from the index array
529        if indextype == "B":
530            self._get_pos = dbfile.get_byte
531        elif indextype == "H":
532            self._get_pos = dbfile.get_ushort
533        elif indextype == "i":
534            self._get_pos = dbfile.get_int
535        elif indextype == "I":
536            self._get_pos = dbfile.get_uint
537        elif indextype == "q":
538            self._get_pos = dbfile.get_long
539        else:
540            raise Exception("Unknown index type %r" % indextype)
541
542    def closest_key_pos(self, key):
543        # Given a key, return the position of that key OR the next highest key
544        # if the given key does not exist
545        if not isinstance(key, bytes_type):
546            raise TypeError("Key %r should be bytes" % key)
547
548        indexbase = self.indexbase
549        indexsize = self.indexsize
550        key_at = self.key_at
551        _get_pos = self._get_pos
552
553        # Do a binary search of the positions in the index array
554        lo = 0
555        hi = self.indexlen
556        while lo < hi:
557            mid = (lo + hi) // 2
558            midkey = key_at(_get_pos(indexbase + mid * indexsize))
559            if midkey < key:
560                lo = mid + 1
561            else:
562                hi = mid
563
564        # If we went off the end, return None
565        if lo == self.indexlen:
566            return None
567        # Return the closest key
568        return _get_pos(indexbase + lo * indexsize)
569
570
571# Fielded Ordered hash file
572
573class FieldedOrderedHashWriter(HashWriter):
574    """Implements an on-disk hash, but writes separate position indexes for
575    each field.
576    """
577
578    def __init__(self, dbfile):
579        HashWriter.__init__(self, dbfile)
580        # Map field names to (startpos, indexpos, length, typecode)
581        self.fieldmap = self.extras["fieldmap"] = {}
582
583        # Keep track of the last key added
584        self.lastkey = emptybytes
585
586    def start_field(self, fieldname):
587        self.fieldstart = self.dbfile.tell()
588        self.fieldname = fieldname
589        # Keep an array of the positions of all keys
590        self.poses = GrowableArray("H")
591        self.lastkey = emptybytes
592
593    def add(self, key, value):
594        if key <= self.lastkey:
595            raise ValueError("Keys must increase: %r..%r"
596                             % (self.lastkey, key))
597        self.poses.append(self.dbfile.tell() - self.fieldstart)
598        HashWriter.add(self, key, value)
599        self.lastkey = key
600
601    def end_field(self):
602        dbfile = self.dbfile
603        fieldname = self.fieldname
604        poses = self.poses
605        self.fieldmap[fieldname] = (self.fieldstart, dbfile.tell(), len(poses),
606                                    poses.typecode)
607        poses.to_file(dbfile)
608
609
610class FieldedOrderedHashReader(HashReader):
611    def __init__(self, *args, **kwargs):
612        HashReader.__init__(self, *args, **kwargs)
613        self.fieldmap = self.extras["fieldmap"]
614        # Make a sorted list of the field names with their start and end ranges
615        self.fieldlist = []
616        for fieldname in sorted(self.fieldmap.keys()):
617            startpos, ixpos, ixsize, ixtype = self.fieldmap[fieldname]
618            self.fieldlist.append((fieldname, startpos, ixpos))
619
620    def field_start(self, fieldname):
621        return self.fieldmap[fieldname][0]
622
623    def fielded_ranges(self, pos=None, eod=None):
624        flist = self.fieldlist
625        fpos = 0
626        fieldname, start, end = flist[fpos]
627        for keypos, keylen, datapos, datalen in self._ranges(pos, eod):
628            if keypos >= end:
629                fpos += 1
630                fieldname, start, end = flist[fpos]
631            yield fieldname, keypos, keylen, datapos, datalen
632
633    def iter_terms(self):
634        get = self.dbfile.get
635        for fieldname, keypos, keylen, _, _ in self.fielded_ranges():
636            yield fieldname, get(keypos, keylen)
637
638    def iter_term_items(self):
639        get = self.dbfile.get
640        for item in self.fielded_ranges():
641            fieldname, keypos, keylen, datapos, datalen = item
642            yield fieldname, get(keypos, keylen), get(datapos, datalen)
643
644    def contains_term(self, fieldname, btext):
645        try:
646            x = self.range_for_term(fieldname, btext)
647            return True
648        except KeyError:
649            return False
650
651    def range_for_term(self, fieldname, btext):
652        start, ixpos, ixsize, code = self.fieldmap[fieldname]
653        for datapos, datalen in self.ranges_for_key(btext):
654            if start < datapos < ixpos:
655                return datapos, datalen
656        raise KeyError((fieldname, btext))
657
658    def term_data(self, fieldname, btext):
659        datapos, datalen = self.range_for_term(fieldname, btext)
660        return self.dbfile.get(datapos, datalen)
661
662    def term_get(self, fieldname, btext, default=None):
663        try:
664            return self.term_data(fieldname, btext)
665        except KeyError:
666            return default
667
668    def closest_term_pos(self, fieldname, key):
669        # Given a key, return the position of that key OR the next highest key
670        # if the given key does not exist
671        if not isinstance(key, bytes_type):
672            raise TypeError("Key %r should be bytes" % key)
673
674        dbfile = self.dbfile
675        key_at = self.key_at
676        startpos, ixpos, ixsize, ixtype = self.fieldmap[fieldname]
677
678        if ixtype == "B":
679            get_pos = dbfile.get_byte
680        elif ixtype == "H":
681            get_pos = dbfile.get_ushort
682        elif ixtype == "i":
683            get_pos = dbfile.get_int
684        elif ixtype == "I":
685            get_pos = dbfile.get_uint
686        elif ixtype == "q":
687            get_pos = dbfile.get_long
688        else:
689            raise Exception("Unknown index type %r" % ixtype)
690
691        # Do a binary search of the positions in the index array
692        lo = 0
693        hi = ixsize
694        while lo < hi:
695            mid = (lo + hi) // 2
696            midkey = key_at(startpos + get_pos(ixpos + mid * ixsize))
697            if midkey < key:
698                lo = mid + 1
699            else:
700                hi = mid
701
702        # If we went off the end, return None
703        if lo == ixsize:
704            return None
705        # Return the closest key
706        return startpos + get_pos(ixpos + lo * ixsize)
707
708    def closest_term(self, fieldname, btext):
709        pos = self.closest_term_pos(fieldname, btext)
710        if pos is None:
711            return None
712        return self.key_at(pos)
713
714    def term_ranges_from(self, fieldname, btext):
715        pos = self.closest_term_pos(fieldname, btext)
716        if pos is None:
717            return
718
719        startpos, ixpos, ixsize, ixtype = self.fieldmap[fieldname]
720        for item in self._ranges(pos, ixpos):
721            yield item
722
723    def terms_from(self, fieldname, btext):
724        dbfile = self.dbfile
725        for keypos, keylen, _, _ in self.term_ranges_from(fieldname, btext):
726            yield dbfile.get(keypos, keylen)
727
728    def term_items_from(self, fieldname, btext):
729        dbfile = self.dbfile
730        for item in self.term_ranges_from(fieldname, btext):
731            keypos, keylen, datapos, datalen = item
732            yield (dbfile.get(keypos, keylen), dbfile.get(datapos, datalen))
733
734
735
736