1# Copyright 2009 Matt Chaput. All rights reserved. 2# 3# Redistribution and use in source and binary forms, with or without 4# modification, are permitted provided that the following conditions are met: 5# 6# 1. Redistributions of source code must retain the above copyright notice, 7# this list of conditions and the following disclaimer. 8# 9# 2. Redistributions in binary form must reproduce the above copyright 10# notice, this list of conditions and the following disclaimer in the 11# documentation and/or other materials provided with the distribution. 12# 13# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR 14# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 15# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 16# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 17# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 18# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, 19# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 20# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 21# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 22# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23# 24# The views and conclusions contained in the software and documentation are 25# those of the authors and should not be interpreted as representing official 26# policies, either expressed or implied, of Matt Chaput. 27 28"""This module defines writer and reader classes for a fast, immutable 29on-disk key-value database format. The current format is based heavily on 30D. J. Bernstein's CDB format (http://cr.yp.to/cdb.html). 31""" 32 33import os, struct 34from binascii import crc32 35from bisect import bisect_left 36from hashlib import md5 # @UnresolvedImport 37 38from whoosh.compat import b, bytes_type 39from whoosh.compat import xrange 40from whoosh.util.numlists import GrowableArray 41from whoosh.system import _INT_SIZE, emptybytes 42 43 44# Exceptions 45 46class FileFormatError(Exception): 47 pass 48 49 50# Hash functions 51 52def cdb_hash(key): 53 h = 5381 54 for c in key: 55 h = (h + (h << 5)) & 0xffffffff ^ ord(c) 56 return h 57 58 59def md5_hash(key): 60 return int(md5(key).hexdigest(), 16) & 0xffffffff 61 62 63def crc_hash(key): 64 return crc32(key) & 0xffffffff 65 66 67_hash_functions = (md5_hash, crc_hash, cdb_hash) 68 69 70# Structs 71 72# Two uints before the key/value pair giving the length of the key and value 73_lengths = struct.Struct("!ii") 74# A pointer in a hash table, giving the hash value and the key position 75_pointer = struct.Struct("!Iq") 76# A pointer in the hash table directory, giving the position and number of slots 77_dir_entry = struct.Struct("!qi") 78 79_directory_size = 256 * _dir_entry.size 80 81 82# Basic hash file 83 84class HashWriter(object): 85 """Implements a fast on-disk key-value store. This hash uses a two-level 86 hashing scheme, where a key is hashed, the low eight bits of the hash value 87 are used to index into one of 256 hash tables. This is basically the CDB 88 algorithm, but unlike CDB this object writes all data serially (it doesn't 89 seek backwards to overwrite information at the end). 90 91 Also unlike CDB, this format uses 64-bit file pointers, so the file length 92 is essentially unlimited. However, each key and value must be less than 93 2 GB in length. 94 """ 95 96 def __init__(self, dbfile, magic=b("HSH3"), hashtype=0): 97 """ 98 :param dbfile: a :class:`~whoosh.filedb.structfile.StructFile` object 99 to write to. 100 :param magic: the format tag bytes to write at the start of the file. 101 :param hashtype: an integer indicating which hashing algorithm to use. 102 Possible values are 0 (MD5), 1 (CRC32), or 2 (CDB hash). 103 """ 104 105 self.dbfile = dbfile 106 self.hashtype = hashtype 107 self.hashfn = _hash_functions[self.hashtype] 108 # A place for subclasses to put extra metadata 109 self.extras = {} 110 111 self.startoffset = dbfile.tell() 112 # Write format tag 113 dbfile.write(magic) 114 # Write hash type 115 dbfile.write_byte(self.hashtype) 116 # Unused future expansion bits 117 dbfile.write_int(0) 118 dbfile.write_int(0) 119 120 # 256 lists of hashed keys and positions 121 self.buckets = [[] for _ in xrange(256)] 122 # List to remember the positions of the hash tables 123 self.directory = [] 124 125 def tell(self): 126 return self.dbfile.tell() 127 128 def add(self, key, value): 129 """Adds a key/value pair to the file. Note that keys DO NOT need to be 130 unique. You can store multiple values under the same key and retrieve 131 them using :meth:`HashReader.all`. 132 """ 133 134 assert isinstance(key, bytes_type) 135 assert isinstance(value, bytes_type) 136 137 dbfile = self.dbfile 138 pos = dbfile.tell() 139 dbfile.write(_lengths.pack(len(key), len(value))) 140 dbfile.write(key) 141 dbfile.write(value) 142 143 # Get hash value for the key 144 h = self.hashfn(key) 145 # Add hash and on-disk position to appropriate bucket 146 self.buckets[h & 255].append((h, pos)) 147 148 def add_all(self, items): 149 """Convenience method to add a sequence of ``(key, value)`` pairs. This 150 is the same as calling :meth:`HashWriter.add` on each pair in the 151 sequence. 152 """ 153 154 add = self.add 155 for key, value in items: 156 add(key, value) 157 158 def _write_hashes(self): 159 # Writes 256 hash tables containing pointers to the key/value pairs 160 161 dbfile = self.dbfile 162 # Represent and empty slot in the hash table using 0,0 (no key can 163 # start at position 0 because of the header) 164 null = (0, 0) 165 166 for entries in self.buckets: 167 # Start position of this bucket's hash table 168 pos = dbfile.tell() 169 # Remember the start position and the number of slots 170 numslots = 2 * len(entries) 171 self.directory.append((pos, numslots)) 172 173 # Create the empty hash table 174 hashtable = [null] * numslots 175 # For each (hash value, key position) tuple in the bucket 176 for hashval, position in entries: 177 # Bitshift and wrap to get the slot for this entry 178 slot = (hashval >> 8) % numslots 179 # If the slot is taken, keep going until we find an empty slot 180 while hashtable[slot] != null: 181 slot = (slot + 1) % numslots 182 # Insert the entry into the hashtable 183 hashtable[slot] = (hashval, position) 184 185 # Write the hash table for this bucket to disk 186 for hashval, position in hashtable: 187 dbfile.write(_pointer.pack(hashval, position)) 188 189 def _write_directory(self): 190 # Writes a directory of pointers to the 256 hash tables 191 192 dbfile = self.dbfile 193 for position, numslots in self.directory: 194 dbfile.write(_dir_entry.pack(position, numslots)) 195 196 def _write_extras(self): 197 self.dbfile.write_pickle(self.extras) 198 199 def close(self): 200 dbfile = self.dbfile 201 202 # Write hash tables 203 self._write_hashes() 204 # Write directory of pointers to hash tables 205 self._write_directory() 206 207 expos = dbfile.tell() 208 # Write extra information 209 self._write_extras() 210 # Write length of pickle 211 dbfile.write_int(dbfile.tell() - expos) 212 213 endpos = dbfile.tell() 214 dbfile.close() 215 return endpos 216 217 218class HashReader(object): 219 """Reader for the fast on-disk key-value files created by 220 :class:`HashWriter`. 221 """ 222 223 def __init__(self, dbfile, length=None, magic=b("HSH3"), startoffset=0): 224 """ 225 :param dbfile: a :class:`~whoosh.filedb.structfile.StructFile` object 226 to read from. 227 :param length: the length of the file data. This is necessary since the 228 hashing information is written at the end of the file. 229 :param magic: the format tag bytes to look for at the start of the 230 file. If the file's format tag does not match these bytes, the 231 object raises a :class:`FileFormatError` exception. 232 :param startoffset: the starting point of the file data. 233 """ 234 235 self.dbfile = dbfile 236 self.startoffset = startoffset 237 self.is_closed = False 238 239 if length is None: 240 dbfile.seek(0, os.SEEK_END) 241 length = dbfile.tell() - startoffset 242 243 dbfile.seek(startoffset) 244 # Check format tag 245 filemagic = dbfile.read(4) 246 if filemagic != magic: 247 raise FileFormatError("Unknown file header %r" % filemagic) 248 # Read hash type 249 self.hashtype = dbfile.read_byte() 250 self.hashfn = _hash_functions[self.hashtype] 251 # Skip unused future expansion bits 252 dbfile.read_int() 253 dbfile.read_int() 254 self.startofdata = dbfile.tell() 255 256 exptr = startoffset + length - _INT_SIZE 257 # Get the length of extras from the end of the file 258 exlen = dbfile.get_int(exptr) 259 # Read the extras 260 expos = exptr - exlen 261 dbfile.seek(expos) 262 self._read_extras() 263 264 # Calculate the directory base from the beginning of the extras 265 dbfile.seek(expos - _directory_size) 266 # Read directory of hash tables 267 self.tables = [] 268 entrysize = _dir_entry.size 269 unpackentry = _dir_entry.unpack 270 for _ in xrange(256): 271 # position, numslots 272 self.tables.append(unpackentry(dbfile.read(entrysize))) 273 # The position of the first hash table is the end of the key/value pairs 274 self.endofdata = self.tables[0][0] 275 276 @classmethod 277 def open(cls, storage, name): 278 """Convenience method to open a hash file given a 279 :class:`whoosh.filedb.filestore.Storage` object and a name. This takes 280 care of opening the file and passing its length to the initializer. 281 """ 282 283 length = storage.file_length(name) 284 dbfile = storage.open_file(name) 285 return cls(dbfile, length) 286 287 def file(self): 288 return self.dbfile 289 290 def _read_extras(self): 291 try: 292 self.extras = self.dbfile.read_pickle() 293 except EOFError: 294 self.extras = {} 295 296 def close(self): 297 if self.is_closed: 298 raise Exception("Tried to close %r twice" % self) 299 self.dbfile.close() 300 self.is_closed = True 301 302 def key_at(self, pos): 303 # Returns the key bytes at the given position 304 305 dbfile = self.dbfile 306 keylen = dbfile.get_uint(pos) 307 return dbfile.get(pos + _lengths.size, keylen) 308 309 def key_and_range_at(self, pos): 310 # Returns a (keybytes, datapos, datalen) tuple for the key at the given 311 # position 312 dbfile = self.dbfile 313 lenssize = _lengths.size 314 315 if pos >= self.endofdata: 316 return None 317 318 keylen, datalen = _lengths.unpack(dbfile.get(pos, lenssize)) 319 keybytes = dbfile.get(pos + lenssize, keylen) 320 datapos = pos + lenssize + keylen 321 return keybytes, datapos, datalen 322 323 def _ranges(self, pos=None, eod=None): 324 # Yields a series of (keypos, keylength, datapos, datalength) tuples 325 # for the key/value pairs in the file 326 dbfile = self.dbfile 327 pos = pos or self.startofdata 328 eod = eod or self.endofdata 329 lenssize = _lengths.size 330 unpacklens = _lengths.unpack 331 332 while pos < eod: 333 keylen, datalen = unpacklens(dbfile.get(pos, lenssize)) 334 keypos = pos + lenssize 335 datapos = keypos + keylen 336 yield (keypos, keylen, datapos, datalen) 337 pos = datapos + datalen 338 339 def __getitem__(self, key): 340 for value in self.all(key): 341 return value 342 raise KeyError(key) 343 344 def __iter__(self): 345 dbfile = self.dbfile 346 for keypos, keylen, datapos, datalen in self._ranges(): 347 key = dbfile.get(keypos, keylen) 348 value = dbfile.get(datapos, datalen) 349 yield (key, value) 350 351 def __contains__(self, key): 352 for _ in self.ranges_for_key(key): 353 return True 354 return False 355 356 def keys(self): 357 dbfile = self.dbfile 358 for keypos, keylen, _, _ in self._ranges(): 359 yield dbfile.get(keypos, keylen) 360 361 def values(self): 362 dbfile = self.dbfile 363 for _, _, datapos, datalen in self._ranges(): 364 yield dbfile.get(datapos, datalen) 365 366 def items(self): 367 dbfile = self.dbfile 368 for keypos, keylen, datapos, datalen in self._ranges(): 369 yield (dbfile.get(keypos, keylen), dbfile.get(datapos, datalen)) 370 371 def get(self, key, default=None): 372 for value in self.all(key): 373 return value 374 return default 375 376 def all(self, key): 377 """Yields a sequence of values associated with the given key. 378 """ 379 380 dbfile = self.dbfile 381 for datapos, datalen in self.ranges_for_key(key): 382 yield dbfile.get(datapos, datalen) 383 384 def ranges_for_key(self, key): 385 """Yields a sequence of ``(datapos, datalength)`` tuples associated 386 with the given key. 387 """ 388 389 if not isinstance(key, bytes_type): 390 raise TypeError("Key %r should be bytes" % key) 391 dbfile = self.dbfile 392 393 # Hash the key 394 keyhash = self.hashfn(key) 395 # Get the position and number of slots for the hash table in which the 396 # key may be found 397 tablestart, numslots = self.tables[keyhash & 255] 398 # If the hash table is empty, we know the key doesn't exists 399 if not numslots: 400 return 401 402 ptrsize = _pointer.size 403 unpackptr = _pointer.unpack 404 lenssize = _lengths.size 405 unpacklens = _lengths.unpack 406 407 # Calculate where the key's slot should be 408 slotpos = tablestart + (((keyhash >> 8) % numslots) * ptrsize) 409 # Read slots looking for our key's hash value 410 for _ in xrange(numslots): 411 slothash, itempos = unpackptr(dbfile.get(slotpos, ptrsize)) 412 # If this slot is empty, we're done 413 if not itempos: 414 return 415 416 # If the key hash in this slot matches our key's hash, we might have 417 # a match, so read the actual key and see if it's our key 418 if slothash == keyhash: 419 # Read the key and value lengths 420 keylen, datalen = unpacklens(dbfile.get(itempos, lenssize)) 421 # Only bother reading the actual key if the lengths match 422 if keylen == len(key): 423 keystart = itempos + lenssize 424 if key == dbfile.get(keystart, keylen): 425 # The keys match, so yield (datapos, datalen) 426 yield (keystart + keylen, datalen) 427 428 slotpos += ptrsize 429 # If we reach the end of the hashtable, wrap around 430 if slotpos == tablestart + (numslots * ptrsize): 431 slotpos = tablestart 432 433 def range_for_key(self, key): 434 for item in self.ranges_for_key(key): 435 return item 436 raise KeyError(key) 437 438 439# Ordered hash file 440 441class OrderedHashWriter(HashWriter): 442 """Implements an on-disk hash, but requires that keys be added in order. 443 An :class:`OrderedHashReader` can then look up "nearest keys" based on 444 the ordering. 445 """ 446 447 def __init__(self, dbfile): 448 HashWriter.__init__(self, dbfile) 449 # Keep an array of the positions of all keys 450 self.index = GrowableArray("H") 451 # Keep track of the last key added 452 self.lastkey = emptybytes 453 454 def add(self, key, value): 455 if key <= self.lastkey: 456 raise ValueError("Keys must increase: %r..%r" 457 % (self.lastkey, key)) 458 self.index.append(self.dbfile.tell()) 459 HashWriter.add(self, key, value) 460 self.lastkey = key 461 462 def _write_extras(self): 463 dbfile = self.dbfile 464 index = self.index 465 466 # Store metadata about the index array 467 self.extras["indextype"] = index.typecode 468 self.extras["indexlen"] = len(index) 469 # Write the extras 470 HashWriter._write_extras(self) 471 # Write the index array 472 index.to_file(dbfile) 473 474 475class OrderedHashReader(HashReader): 476 def closest_key(self, key): 477 """Returns the closest key equal to or greater than the given key. If 478 there is no key in the file equal to or greater than the given key, 479 returns None. 480 """ 481 482 pos = self.closest_key_pos(key) 483 if pos is None: 484 return None 485 return self.key_at(pos) 486 487 def ranges_from(self, key): 488 """Yields a series of ``(keypos, keylen, datapos, datalen)`` tuples 489 for the ordered series of keys equal or greater than the given key. 490 """ 491 492 pos = self.closest_key_pos(key) 493 if pos is None: 494 return 495 496 for item in self._ranges(pos=pos): 497 yield item 498 499 def keys_from(self, key): 500 """Yields an ordered series of keys equal to or greater than the given 501 key. 502 """ 503 504 dbfile = self.dbfile 505 for keypos, keylen, _, _ in self.ranges_from(key): 506 yield dbfile.get(keypos, keylen) 507 508 def items_from(self, key): 509 """Yields an ordered series of ``(key, value)`` tuples for keys equal 510 to or greater than the given key. 511 """ 512 513 dbfile = self.dbfile 514 for keypos, keylen, datapos, datalen in self.ranges_from(key): 515 yield (dbfile.get(keypos, keylen), dbfile.get(datapos, datalen)) 516 517 def _read_extras(self): 518 dbfile = self.dbfile 519 520 # Read the extras 521 HashReader._read_extras(self) 522 523 # Set up for reading the index array 524 indextype = self.extras["indextype"] 525 self.indexbase = dbfile.tell() 526 self.indexlen = self.extras["indexlen"] 527 self.indexsize = struct.calcsize(indextype) 528 # Set up the function to read values from the index array 529 if indextype == "B": 530 self._get_pos = dbfile.get_byte 531 elif indextype == "H": 532 self._get_pos = dbfile.get_ushort 533 elif indextype == "i": 534 self._get_pos = dbfile.get_int 535 elif indextype == "I": 536 self._get_pos = dbfile.get_uint 537 elif indextype == "q": 538 self._get_pos = dbfile.get_long 539 else: 540 raise Exception("Unknown index type %r" % indextype) 541 542 def closest_key_pos(self, key): 543 # Given a key, return the position of that key OR the next highest key 544 # if the given key does not exist 545 if not isinstance(key, bytes_type): 546 raise TypeError("Key %r should be bytes" % key) 547 548 indexbase = self.indexbase 549 indexsize = self.indexsize 550 key_at = self.key_at 551 _get_pos = self._get_pos 552 553 # Do a binary search of the positions in the index array 554 lo = 0 555 hi = self.indexlen 556 while lo < hi: 557 mid = (lo + hi) // 2 558 midkey = key_at(_get_pos(indexbase + mid * indexsize)) 559 if midkey < key: 560 lo = mid + 1 561 else: 562 hi = mid 563 564 # If we went off the end, return None 565 if lo == self.indexlen: 566 return None 567 # Return the closest key 568 return _get_pos(indexbase + lo * indexsize) 569 570 571# Fielded Ordered hash file 572 573class FieldedOrderedHashWriter(HashWriter): 574 """Implements an on-disk hash, but writes separate position indexes for 575 each field. 576 """ 577 578 def __init__(self, dbfile): 579 HashWriter.__init__(self, dbfile) 580 # Map field names to (startpos, indexpos, length, typecode) 581 self.fieldmap = self.extras["fieldmap"] = {} 582 583 # Keep track of the last key added 584 self.lastkey = emptybytes 585 586 def start_field(self, fieldname): 587 self.fieldstart = self.dbfile.tell() 588 self.fieldname = fieldname 589 # Keep an array of the positions of all keys 590 self.poses = GrowableArray("H") 591 self.lastkey = emptybytes 592 593 def add(self, key, value): 594 if key <= self.lastkey: 595 raise ValueError("Keys must increase: %r..%r" 596 % (self.lastkey, key)) 597 self.poses.append(self.dbfile.tell() - self.fieldstart) 598 HashWriter.add(self, key, value) 599 self.lastkey = key 600 601 def end_field(self): 602 dbfile = self.dbfile 603 fieldname = self.fieldname 604 poses = self.poses 605 self.fieldmap[fieldname] = (self.fieldstart, dbfile.tell(), len(poses), 606 poses.typecode) 607 poses.to_file(dbfile) 608 609 610class FieldedOrderedHashReader(HashReader): 611 def __init__(self, *args, **kwargs): 612 HashReader.__init__(self, *args, **kwargs) 613 self.fieldmap = self.extras["fieldmap"] 614 # Make a sorted list of the field names with their start and end ranges 615 self.fieldlist = [] 616 for fieldname in sorted(self.fieldmap.keys()): 617 startpos, ixpos, ixsize, ixtype = self.fieldmap[fieldname] 618 self.fieldlist.append((fieldname, startpos, ixpos)) 619 620 def field_start(self, fieldname): 621 return self.fieldmap[fieldname][0] 622 623 def fielded_ranges(self, pos=None, eod=None): 624 flist = self.fieldlist 625 fpos = 0 626 fieldname, start, end = flist[fpos] 627 for keypos, keylen, datapos, datalen in self._ranges(pos, eod): 628 if keypos >= end: 629 fpos += 1 630 fieldname, start, end = flist[fpos] 631 yield fieldname, keypos, keylen, datapos, datalen 632 633 def iter_terms(self): 634 get = self.dbfile.get 635 for fieldname, keypos, keylen, _, _ in self.fielded_ranges(): 636 yield fieldname, get(keypos, keylen) 637 638 def iter_term_items(self): 639 get = self.dbfile.get 640 for item in self.fielded_ranges(): 641 fieldname, keypos, keylen, datapos, datalen = item 642 yield fieldname, get(keypos, keylen), get(datapos, datalen) 643 644 def contains_term(self, fieldname, btext): 645 try: 646 x = self.range_for_term(fieldname, btext) 647 return True 648 except KeyError: 649 return False 650 651 def range_for_term(self, fieldname, btext): 652 start, ixpos, ixsize, code = self.fieldmap[fieldname] 653 for datapos, datalen in self.ranges_for_key(btext): 654 if start < datapos < ixpos: 655 return datapos, datalen 656 raise KeyError((fieldname, btext)) 657 658 def term_data(self, fieldname, btext): 659 datapos, datalen = self.range_for_term(fieldname, btext) 660 return self.dbfile.get(datapos, datalen) 661 662 def term_get(self, fieldname, btext, default=None): 663 try: 664 return self.term_data(fieldname, btext) 665 except KeyError: 666 return default 667 668 def closest_term_pos(self, fieldname, key): 669 # Given a key, return the position of that key OR the next highest key 670 # if the given key does not exist 671 if not isinstance(key, bytes_type): 672 raise TypeError("Key %r should be bytes" % key) 673 674 dbfile = self.dbfile 675 key_at = self.key_at 676 startpos, ixpos, ixsize, ixtype = self.fieldmap[fieldname] 677 678 if ixtype == "B": 679 get_pos = dbfile.get_byte 680 elif ixtype == "H": 681 get_pos = dbfile.get_ushort 682 elif ixtype == "i": 683 get_pos = dbfile.get_int 684 elif ixtype == "I": 685 get_pos = dbfile.get_uint 686 elif ixtype == "q": 687 get_pos = dbfile.get_long 688 else: 689 raise Exception("Unknown index type %r" % ixtype) 690 691 # Do a binary search of the positions in the index array 692 lo = 0 693 hi = ixsize 694 while lo < hi: 695 mid = (lo + hi) // 2 696 midkey = key_at(startpos + get_pos(ixpos + mid * ixsize)) 697 if midkey < key: 698 lo = mid + 1 699 else: 700 hi = mid 701 702 # If we went off the end, return None 703 if lo == ixsize: 704 return None 705 # Return the closest key 706 return startpos + get_pos(ixpos + lo * ixsize) 707 708 def closest_term(self, fieldname, btext): 709 pos = self.closest_term_pos(fieldname, btext) 710 if pos is None: 711 return None 712 return self.key_at(pos) 713 714 def term_ranges_from(self, fieldname, btext): 715 pos = self.closest_term_pos(fieldname, btext) 716 if pos is None: 717 return 718 719 startpos, ixpos, ixsize, ixtype = self.fieldmap[fieldname] 720 for item in self._ranges(pos, ixpos): 721 yield item 722 723 def terms_from(self, fieldname, btext): 724 dbfile = self.dbfile 725 for keypos, keylen, _, _ in self.term_ranges_from(fieldname, btext): 726 yield dbfile.get(keypos, keylen) 727 728 def term_items_from(self, fieldname, btext): 729 dbfile = self.dbfile 730 for item in self.term_ranges_from(fieldname, btext): 731 keypos, keylen, datapos, datalen = item 732 yield (dbfile.get(keypos, keylen), dbfile.get(datapos, datalen)) 733 734 735 736