1# -*- coding: utf-8 -*-
2
3########################################################################
4#
5# License: BSD
6# Created: June 02, 2004
7# Author:  Francesc Alted - faltet@pytables.com
8#
9# $Source: /cvsroot/pytables/pytables/tables/indexes.py $
10# $Id$
11#
12########################################################################
13
14"""Here is defined the IndexArray class."""
15
16from bisect import bisect_left, bisect_right
17
18from .node import NotLoggedMixin
19from .carray import CArray
20from .earray import EArray
21from . import indexesextension
22
23
24# Declarations for inheriting
25
26
27class CacheArray(indexesextension.CacheArray, NotLoggedMixin, EArray):
28    """Container for keeping index caches of 1st and 2nd level."""
29
30    # Class identifier.
31    _c_classid = 'CACHEARRAY'
32
33
34class LastRowArray(indexesextension.LastRowArray, NotLoggedMixin, CArray):
35    """Container for keeping sorted and indices values of last row of an
36    index."""
37
38    # Class identifier.
39    _c_classid = 'LASTROWARRAY'
40
41
42class IndexArray(indexesextension.IndexArray, NotLoggedMixin, EArray):
43    """Represent the index (sorted or reverse index) dataset in HDF5 file.
44
45    All NumPy typecodes are supported except for complex datatypes.
46
47    Parameters
48    ----------
49    parentnode
50        The Index class from which this object will hang off.
51
52        .. versionchanged:: 3.0
53           Renamed from *parentNode* to *parentnode*.
54
55    name : str
56        The name of this node in its parent group.
57    atom
58        An Atom object representing the shape and type of the atomic objects to
59        be saved. Only scalar atoms are supported.
60    title
61        Sets a TITLE attribute on the array entity.
62    filters : Filters
63        An instance of the Filters class that provides information about the
64        desired I/O filters to be applied during the life of this object.
65    byteorder
66        The byteroder of the data on-disk.
67
68    """
69
70    # Class identifier.
71    _c_classid = 'INDEXARRAY'
72
73    # Properties
74    # ~~~~~~~~~~
75    @property
76    def chunksize(self):
77        """The chunksize for this object."""
78        return self.chunkshape[1]
79
80    @property
81    def slicesize(self):
82        """The slicesize for this object."""
83        return self.shape[1]
84
85    # Other methods
86    # ~~~~~~~~~~~~~
87    def __init__(self, parentnode, name,
88                 atom=None, title="",
89                 filters=None, byteorder=None):
90        """Create an IndexArray instance."""
91
92        self._v_pathname = parentnode._g_join(name)
93        if atom is not None:
94            # The shape and chunkshape needs to be fixed here
95            if name == "sorted":
96                reduction = parentnode.reduction
97                shape = (0, parentnode.slicesize // reduction)
98                chunkshape = (1, parentnode.chunksize // reduction)
99            else:
100                shape = (0, parentnode.slicesize)
101                chunkshape = (1, parentnode.chunksize)
102        else:
103            # The shape and chunkshape will be read from disk later on
104            shape = None
105            chunkshape = None
106
107        super(IndexArray, self).__init__(
108            parentnode, name, atom, shape, title, filters,
109            chunkshape=chunkshape, byteorder=byteorder)
110
111    # This version of searchBin uses both ranges (1st level) and
112    # bounds (2nd level) caches. It uses a cache for boundary rows,
113    # but not for 'sorted' rows (this is only supported for the
114    # 'optimized' types).
115    def _search_bin(self, nrow, item):
116        item1, item2 = item
117        result1 = -1
118        result2 = -1
119        hi = self.shape[1]
120        ranges = self._v_parent.rvcache
121        boundscache = self.boundscache
122        # First, look at the beginning of the slice
123        begin = ranges[nrow, 0]
124        # Look for items at the beginning of sorted slices
125        if item1 <= begin:
126            result1 = 0
127        if item2 < begin:
128            result2 = 0
129        if result1 >= 0 and result2 >= 0:
130            return (result1, result2)
131        # Then, look for items at the end of the sorted slice
132        end = ranges[nrow, 1]
133        if result1 < 0:
134            if item1 > end:
135                result1 = hi
136        if result2 < 0:
137            if item2 >= end:
138                result2 = hi
139        if result1 >= 0 and result2 >= 0:
140            return (result1, result2)
141        # Finally, do a lookup for item1 and item2 if they were not found
142        # Lookup in the middle of slice for item1
143        chunksize = self.chunksize  # Number of elements/chunksize
144        nchunk = -1
145        # Try to get the bounds row from the LRU cache
146        nslot = boundscache.getslot(nrow)
147        if nslot >= 0:
148            # Cache hit. Use the row kept there.
149            bounds = boundscache.getitem(nslot)
150        else:
151            # No luck with cached data. Read the row and put it in the cache.
152            bounds = self._v_parent.bounds[nrow]
153            size = bounds.size * bounds.itemsize
154            boundscache.setitem(nrow, bounds, size)
155        if result1 < 0:
156            # Search the appropriate chunk in bounds cache
157            nchunk = bisect_left(bounds, item1)
158            chunk = self._read_sorted_slice(nrow, chunksize * nchunk,
159                                            chunksize * (nchunk + 1))
160            result1 = indexesextension._bisect_left(chunk, item1, chunksize)
161            result1 += chunksize * nchunk
162        # Lookup in the middle of slice for item2
163        if result2 < 0:
164            # Search the appropriate chunk in bounds cache
165            nchunk2 = bisect_right(bounds, item2)
166            if nchunk2 != nchunk:
167                chunk = self._read_sorted_slice(nrow, chunksize * nchunk2,
168                                                chunksize * (nchunk2 + 1))
169            result2 = indexesextension._bisect_right(chunk, item2, chunksize)
170            result2 += chunksize * nchunk2
171        return (result1, result2)
172
173    def __str__(self):
174        "A compact representation of this class"
175        return "IndexArray(path=%s)" % self._v_pathname
176
177    def __repr__(self):
178        """A verbose representation of this class."""
179
180        return """%s
181  atom = %r
182  shape = %s
183  nrows = %s
184  chunksize = %s
185  slicesize = %s
186  byteorder = %r""" % (self, self.atom, self.shape, self.nrows,
187                       self.chunksize, self.slicesize, self.byteorder)
188
189
190## Local Variables:
191## mode: python
192## py-indent-offset: 4
193## tab-width: 4
194## fill-column: 72
195## End:
196