1# -*- coding: utf-8 -*- 2 3######################################################################## 4# 5# License: BSD 6# Created: June 02, 2004 7# Author: Francesc Alted - faltet@pytables.com 8# 9# $Source: /cvsroot/pytables/pytables/tables/indexes.py $ 10# $Id$ 11# 12######################################################################## 13 14"""Here is defined the IndexArray class.""" 15 16from bisect import bisect_left, bisect_right 17 18from .node import NotLoggedMixin 19from .carray import CArray 20from .earray import EArray 21from . import indexesextension 22 23 24# Declarations for inheriting 25 26 27class CacheArray(indexesextension.CacheArray, NotLoggedMixin, EArray): 28 """Container for keeping index caches of 1st and 2nd level.""" 29 30 # Class identifier. 31 _c_classid = 'CACHEARRAY' 32 33 34class LastRowArray(indexesextension.LastRowArray, NotLoggedMixin, CArray): 35 """Container for keeping sorted and indices values of last row of an 36 index.""" 37 38 # Class identifier. 39 _c_classid = 'LASTROWARRAY' 40 41 42class IndexArray(indexesextension.IndexArray, NotLoggedMixin, EArray): 43 """Represent the index (sorted or reverse index) dataset in HDF5 file. 44 45 All NumPy typecodes are supported except for complex datatypes. 46 47 Parameters 48 ---------- 49 parentnode 50 The Index class from which this object will hang off. 51 52 .. versionchanged:: 3.0 53 Renamed from *parentNode* to *parentnode*. 54 55 name : str 56 The name of this node in its parent group. 57 atom 58 An Atom object representing the shape and type of the atomic objects to 59 be saved. Only scalar atoms are supported. 60 title 61 Sets a TITLE attribute on the array entity. 62 filters : Filters 63 An instance of the Filters class that provides information about the 64 desired I/O filters to be applied during the life of this object. 65 byteorder 66 The byteroder of the data on-disk. 67 68 """ 69 70 # Class identifier. 71 _c_classid = 'INDEXARRAY' 72 73 # Properties 74 # ~~~~~~~~~~ 75 @property 76 def chunksize(self): 77 """The chunksize for this object.""" 78 return self.chunkshape[1] 79 80 @property 81 def slicesize(self): 82 """The slicesize for this object.""" 83 return self.shape[1] 84 85 # Other methods 86 # ~~~~~~~~~~~~~ 87 def __init__(self, parentnode, name, 88 atom=None, title="", 89 filters=None, byteorder=None): 90 """Create an IndexArray instance.""" 91 92 self._v_pathname = parentnode._g_join(name) 93 if atom is not None: 94 # The shape and chunkshape needs to be fixed here 95 if name == "sorted": 96 reduction = parentnode.reduction 97 shape = (0, parentnode.slicesize // reduction) 98 chunkshape = (1, parentnode.chunksize // reduction) 99 else: 100 shape = (0, parentnode.slicesize) 101 chunkshape = (1, parentnode.chunksize) 102 else: 103 # The shape and chunkshape will be read from disk later on 104 shape = None 105 chunkshape = None 106 107 super(IndexArray, self).__init__( 108 parentnode, name, atom, shape, title, filters, 109 chunkshape=chunkshape, byteorder=byteorder) 110 111 # This version of searchBin uses both ranges (1st level) and 112 # bounds (2nd level) caches. It uses a cache for boundary rows, 113 # but not for 'sorted' rows (this is only supported for the 114 # 'optimized' types). 115 def _search_bin(self, nrow, item): 116 item1, item2 = item 117 result1 = -1 118 result2 = -1 119 hi = self.shape[1] 120 ranges = self._v_parent.rvcache 121 boundscache = self.boundscache 122 # First, look at the beginning of the slice 123 begin = ranges[nrow, 0] 124 # Look for items at the beginning of sorted slices 125 if item1 <= begin: 126 result1 = 0 127 if item2 < begin: 128 result2 = 0 129 if result1 >= 0 and result2 >= 0: 130 return (result1, result2) 131 # Then, look for items at the end of the sorted slice 132 end = ranges[nrow, 1] 133 if result1 < 0: 134 if item1 > end: 135 result1 = hi 136 if result2 < 0: 137 if item2 >= end: 138 result2 = hi 139 if result1 >= 0 and result2 >= 0: 140 return (result1, result2) 141 # Finally, do a lookup for item1 and item2 if they were not found 142 # Lookup in the middle of slice for item1 143 chunksize = self.chunksize # Number of elements/chunksize 144 nchunk = -1 145 # Try to get the bounds row from the LRU cache 146 nslot = boundscache.getslot(nrow) 147 if nslot >= 0: 148 # Cache hit. Use the row kept there. 149 bounds = boundscache.getitem(nslot) 150 else: 151 # No luck with cached data. Read the row and put it in the cache. 152 bounds = self._v_parent.bounds[nrow] 153 size = bounds.size * bounds.itemsize 154 boundscache.setitem(nrow, bounds, size) 155 if result1 < 0: 156 # Search the appropriate chunk in bounds cache 157 nchunk = bisect_left(bounds, item1) 158 chunk = self._read_sorted_slice(nrow, chunksize * nchunk, 159 chunksize * (nchunk + 1)) 160 result1 = indexesextension._bisect_left(chunk, item1, chunksize) 161 result1 += chunksize * nchunk 162 # Lookup in the middle of slice for item2 163 if result2 < 0: 164 # Search the appropriate chunk in bounds cache 165 nchunk2 = bisect_right(bounds, item2) 166 if nchunk2 != nchunk: 167 chunk = self._read_sorted_slice(nrow, chunksize * nchunk2, 168 chunksize * (nchunk2 + 1)) 169 result2 = indexesextension._bisect_right(chunk, item2, chunksize) 170 result2 += chunksize * nchunk2 171 return (result1, result2) 172 173 def __str__(self): 174 "A compact representation of this class" 175 return "IndexArray(path=%s)" % self._v_pathname 176 177 def __repr__(self): 178 """A verbose representation of this class.""" 179 180 return """%s 181 atom = %r 182 shape = %s 183 nrows = %s 184 chunksize = %s 185 slicesize = %s 186 byteorder = %r""" % (self, self.atom, self.shape, self.nrows, 187 self.chunksize, self.slicesize, self.byteorder) 188 189 190## Local Variables: 191## mode: python 192## py-indent-offset: 4 193## tab-width: 4 194## fill-column: 72 195## End: 196