1# -*- coding: utf-8 -*- 2 3######################################################################## 4# 5# License: BSD 6# Created: May 18, 2006 7# Author: Francesc Alted - faltet@pytables.com 8# 9# $Id$ 10# 11######################################################################## 12 13"""cython interface for keeping indexes classes. 14 15Classes (type extensions): 16 17 IndexArray 18 CacheArray 19 LastRowArray 20 21Functions: 22 23 keysort 24 25Misc variables: 26 27""" 28 29import cython 30import numpy 31cimport numpy as cnp 32 33from .exceptions import HDF5ExtError 34from hdf5extension cimport Array 35 36 37# Types, constants, functions, classes & other objects from everywhere 38from numpy cimport import_array, ndarray, \ 39 npy_int8, npy_int16, npy_int32, npy_int64, \ 40 npy_uint8, npy_uint16, npy_uint32, npy_uint64, \ 41 npy_float32, npy_float64, \ 42 npy_float, npy_double, npy_longdouble 43 44# These two types are defined in npy_common.h but not in cython's numpy.pxd 45ctypedef unsigned char npy_bool 46ctypedef npy_uint16 npy_float16 47 48from libc.stdlib cimport malloc, free 49from libc.string cimport memcpy, strncmp 50 51from definitions cimport hid_t, herr_t, hsize_t, H5Screate_simple, H5Sclose 52from lrucacheextension cimport NumCache 53 54 55 56#------------------------------------------------------------------- 57 58# External C functions 59 60# Functions for optimized operations with ARRAY for indexing purposes 61cdef extern from "H5ARRAY-opt.h" nogil: 62 herr_t H5ARRAYOinit_readSlice( 63 hid_t dataset_id, hid_t *mem_space_id, hsize_t count) 64 herr_t H5ARRAYOread_readSlice( 65 hid_t dataset_id, hid_t type_id, 66 hsize_t irow, hsize_t start, hsize_t stop, void *data) 67 herr_t H5ARRAYOread_readSortedSlice( 68 hid_t dataset_id, hid_t mem_space_id, hid_t type_id, 69 hsize_t irow, hsize_t start, hsize_t stop, void *data) 70 herr_t H5ARRAYOread_readBoundsSlice( 71 hid_t dataset_id, hid_t mem_space_id, hid_t type_id, 72 hsize_t irow, hsize_t start, hsize_t stop, void *data) 73 herr_t H5ARRAYOreadSliceLR( 74 hid_t dataset_id, hid_t type_id, hsize_t start, hsize_t stop, void *data) 75 76 77# Functions for optimized operations for dealing with indexes 78cdef extern from "idx-opt.h" nogil: 79 int bisect_left_b(npy_int8 *a, long x, int hi, int offset) 80 int bisect_left_ub(npy_uint8 *a, long x, int hi, int offset) 81 int bisect_right_b(npy_int8 *a, long x, int hi, int offset) 82 int bisect_right_ub(npy_uint8 *a, long x, int hi, int offset) 83 int bisect_left_s(npy_int16 *a, long x, int hi, int offset) 84 int bisect_left_us(npy_uint16 *a, long x, int hi, int offset) 85 int bisect_right_s(npy_int16 *a, long x, int hi, int offset) 86 int bisect_right_us(npy_uint16 *a, long x, int hi, int offset) 87 int bisect_left_i(npy_int32 *a, long x, int hi, int offset) 88 int bisect_left_ui(npy_uint32 *a, npy_uint32 x, int hi, int offset) 89 int bisect_right_i(npy_int32 *a, long x, int hi, int offset) 90 int bisect_right_ui(npy_uint32 *a, npy_uint32 x, int hi, int offset) 91 int bisect_left_ll(npy_int64 *a, npy_int64 x, int hi, int offset) 92 int bisect_left_ull(npy_uint64 *a, npy_uint64 x, int hi, int offset) 93 int bisect_right_ll(npy_int64 *a, npy_int64 x, int hi, int offset) 94 int bisect_right_ull(npy_uint64 *a, npy_uint64 x, int hi, int offset) 95 int bisect_left_e(npy_float16 *a, npy_float64 x, int hi, int offset) 96 int bisect_right_e(npy_float16 *a, npy_float64 x, int hi, int offset) 97 int bisect_left_f(npy_float32 *a, npy_float64 x, int hi, int offset) 98 int bisect_right_f(npy_float32 *a, npy_float64 x, int hi, int offset) 99 int bisect_left_d(npy_float64 *a, npy_float64 x, int hi, int offset) 100 int bisect_right_d(npy_float64 *a, npy_float64 x, int hi, int offset) 101 int bisect_left_g(npy_longdouble *a, npy_longdouble x, int hi, int offset) 102 int bisect_right_g(npy_longdouble *a, npy_longdouble x, int hi, int offset) 103 104 105#---------------------------------------------------------------------------- 106 107# Initialization code 108 109# The numpy API requires this function to be called before 110# using any numpy facilities in an extension module. 111import_array() 112 113#--------------------------------------------------------------------------- 114 115ctypedef fused floating_type: 116 npy_float32 117 npy_float64 118 npy_longdouble 119 120 121ctypedef fused number_type: 122 npy_int8 123 npy_int16 124 npy_int32 125 npy_int64 126 127 npy_uint8 128 npy_uint16 129 npy_uint32 130 npy_uint64 131 132 npy_float32 133 npy_float64 134 npy_longdouble 135 136#=========================================================================== 137# Functions 138#=========================================================================== 139 140#--------------------------------------------------------------------------- 141# keysort 142#--------------------------------------------------------------------------- 143 144DEF PYA_QS_STACK = 100 145DEF SMALL_QUICKSORT = 15 146 147def keysort(ndarray array1, ndarray array2): 148 """Sort array1 in-place. array2 is also sorted following the array1 order. 149 150 array1 can be of any type, except complex or string. array2 may be made of 151 elements on any size. 152 153 """ 154 cdef size_t size = cnp.PyArray_SIZE(array1) 155 cdef size_t elsize1 = cnp.PyArray_ITEMSIZE(array1) 156 cdef size_t elsize2 = cnp.PyArray_ITEMSIZE(array2) 157 cdef int type_num = cnp.PyArray_TYPE(array1) 158 159 # floating types 160 if type_num == cnp.NPY_FLOAT16: 161 _keysort[npy_float16](<npy_float16*>array1.data, array2.data, elsize2, size) 162 elif type_num == cnp.NPY_FLOAT32: 163 _keysort[npy_float32](<npy_float32*>array1.data, array2.data, elsize2, size) 164 elif type_num == cnp.NPY_FLOAT64: 165 _keysort[npy_float64](<npy_float64*>array1.data, array2.data, elsize2, size) 166 elif type_num == cnp.NPY_LONGDOUBLE: 167 _keysort[npy_longdouble](<npy_longdouble*>array1.data, array2.data, elsize2, size) 168 # signed integer types 169 elif type_num == cnp.NPY_INT8: 170 _keysort[npy_int8](<npy_int8*>array1.data, array2.data, elsize2, size) 171 elif type_num == cnp.NPY_INT16: 172 _keysort[npy_int16](<npy_int16*>array1.data, array2.data, elsize2, size) 173 elif type_num == cnp.NPY_INT32: 174 _keysort[npy_int32](<npy_int32*>array1.data, array2.data, elsize2, size) 175 elif type_num == cnp.NPY_INT64: 176 _keysort[npy_int64](<npy_int64*>array1.data, array2.data, elsize2, size) 177 # unsigned integer types 178 elif type_num == cnp.NPY_UINT8: 179 _keysort[npy_uint8](<npy_uint8*>array1.data, array2.data, elsize2, size) 180 elif type_num == cnp.NPY_UINT16: 181 _keysort[npy_uint16](<npy_uint16*>array1.data, array2.data, elsize2, size) 182 elif type_num == cnp.NPY_UINT32: 183 _keysort[npy_uint32](<npy_uint32*>array1.data, array2.data, elsize2, size) 184 elif type_num == cnp.NPY_UINT64: 185 _keysort[npy_uint64](<npy_uint64*>array1.data, array2.data, elsize2, size) 186 # other 187 elif type_num == cnp.NPY_BOOL: 188 _keysort[npy_bool](<npy_bool*>array1.data, array2.data, elsize2, size) 189 elif type_num == cnp.NPY_STRING: 190 _keysort_string(array1.data, elsize1, array2.data, elsize2, size) 191 else: 192 raise ValueError("Unknown array datatype") 193 194 195cdef inline void swap_bytes(char *x, char *y, size_t n) nogil: 196 if n == 8: 197 (<npy_int64*>x)[0], (<npy_int64*>y)[0] = (<npy_int64*>y)[0], (<npy_int64*>x)[0] 198 elif n == 4: 199 (<npy_int32*>x)[0], (<npy_int32*>y)[0] = (<npy_int32*>y)[0], (<npy_int32*>x)[0] 200 elif n == 2: 201 (<npy_int16*>x)[0], (<npy_int16*>y)[0] = (<npy_int16*>y)[0], (<npy_int16*>x)[0] 202 else: 203 for i in range(n): 204 x[i], y[i] = y[i], x[i] 205 206 207cdef inline int less_than(number_type* a, number_type* b) nogil: 208 if number_type in floating_type: 209 return a[0] < b[0] or (b[0] != b[0] and a[0] == a[0]) 210 else: 211 return a[0] < b[0] 212 213 214@cython.cdivision(True) 215cdef void _keysort(number_type* start1, char* start2, size_t elsize2, size_t n) nogil: 216 cdef number_type *pl = start1 217 cdef number_type *pr = start1 + (n - 1) 218 219 cdef char *ipl = start2 220 cdef char *ipr = start2 + (n - 1) * elsize2 221 222 cdef number_type vp 223 cdef char *ivp = <char *> malloc(elsize2) 224 225 cdef number_type *stack[PYA_QS_STACK] 226 cdef number_type **sptr = stack 227 228 cdef char *istack[PYA_QS_STACK] 229 cdef char **isptr = istack 230 231 cdef size_t stack_index = 0 232 233 cdef number_type *pm 234 cdef number_type *pi 235 cdef number_type *pj 236 cdef number_type *pt 237 cdef char *ipm 238 cdef char *ipi 239 cdef char *ipj 240 cdef char *ipt 241 242 while True: 243 while pr - pl > SMALL_QUICKSORT: 244 pm = pl + ((pr - pl) >> 1) 245 ipm = ipl + ((ipr - ipl)/elsize2 >> 1)*elsize2 246 247 if less_than(pm, pl): 248 pm[0], pl[0] = pl[0], pm[0] 249 swap_bytes(ipm, ipl, elsize2) 250 251 if less_than(pr, pm): 252 pr[0], pm[0] = pm[0], pr[0] 253 swap_bytes(ipr, ipm, elsize2) 254 255 if less_than(pm, pl): 256 pm[0], pl[0] = pl[0], pm[0] 257 swap_bytes(ipm, ipl, elsize2) 258 259 vp = pm[0] 260 261 pi = pl 262 ipi = ipl 263 264 pj = pr - 1 265 ipj = ipr - elsize2 266 267 pm[0], pj[0] = pj[0], pm[0] 268 swap_bytes(ipm, ipj, elsize2) 269 270 while True: 271 pi += 1 272 ipi += elsize2 273 while less_than(pi, &vp): 274 pi += 1 275 ipi += elsize2 276 277 pj -= 1 278 ipj -= elsize2 279 while less_than(&vp, pj): 280 pj -= 1 281 ipj -= elsize2 282 283 if pi >= pj: 284 break 285 286 pi[0], pj[0] = pj[0], pi[0] 287 swap_bytes(ipi, ipj, elsize2) 288 289 pi[0], (pr-1)[0] = (pr-1)[0], pi[0] 290 swap_bytes(ipi, ipr-elsize2, elsize2) 291 292 # push largest partition on stack and proceed with the other 293 if (pi - pl) < (pr - pi): 294 sptr[0] = pi + 1 295 sptr[1] = pr 296 sptr += 2 297 298 isptr[0] = ipi + elsize2 299 isptr[1] = ipr 300 isptr += 2 301 302 pr = pi - 1 303 ipr = ipi - elsize2 304 else: 305 sptr[0] = pl 306 sptr[1] = pi - 1 307 sptr += 2 308 309 isptr[0] = ipl 310 isptr[1] = ipi - elsize2 311 isptr += 2 312 313 pl = pi + 1 314 ipl = ipi + elsize2 315 316 pi = pl + 1 317 ipi = ipl + elsize2 318 while pi <= pr: 319 vp = pi[0] 320 memcpy(ivp, ipi, elsize2) 321 322 pj = pi 323 pt = pi - 1 324 325 ipj = ipi 326 ipt = ipi - elsize2 327 328 while pj > pl and less_than(&vp, pt): 329 pj[0] = pt[0] 330 pj -= 1 331 pt -= 1 332 333 memcpy(ipj, ipt, elsize2) 334 ipj -= elsize2 335 ipt -= elsize2 336 337 pj[0] = vp 338 memcpy(ipj, ivp, elsize2) 339 340 pi += 1 341 ipi += elsize2 342 343 if sptr == stack: 344 break 345 346 sptr -= 2 347 pl = sptr[0] 348 pr = sptr[1] 349 350 isptr -= 2 351 ipl = isptr[0] 352 ipr = isptr[1] 353 354 free(ivp) 355 356 357@cython.cdivision(True) 358cdef void _keysort_string(char* start1, size_t ss, char* start2, size_t ts, size_t n) nogil: 359 cdef char *pl = start1 360 cdef char *pr = start1 + (n - 1) * ss 361 362 cdef char *ipl = start2 363 cdef char *ipr = start2 + (n - 1) * ts 364 365 cdef char *vp = <char *>malloc(ss) 366 cdef char *ivp = <char *>malloc(ts) 367 368 cdef char *stack[PYA_QS_STACK] 369 cdef char **sptr = stack 370 371 cdef char *istack[PYA_QS_STACK] 372 cdef char **isptr = istack 373 374 cdef size_t stack_index = 0 375 376 cdef char *pm 377 cdef char *pi 378 cdef char *pj 379 cdef char *pt 380 381 cdef char *ipm 382 cdef char *ipi 383 cdef char *ipj 384 cdef char *ipt 385 386 while True: 387 while pr - pl > SMALL_QUICKSORT * ss: 388 pm = pl + ((pr - pl)/ss >> 1)*ss 389 ipm = ipl + ((ipr - ipl)/ts >> 1)*ts 390 391 if strncmp(pm, pl, ss) < 0: 392 swap_bytes(pm, pl, ss) 393 swap_bytes(ipm, ipl, ts) 394 395 if strncmp(pr, pm, ss) < 0: 396 swap_bytes(pr, pm, ss) 397 swap_bytes(ipr, ipm, ts) 398 399 if strncmp(pm, pl, ss) < 0: 400 swap_bytes(pm, pl, ss) 401 swap_bytes(ipm, ipl, ts) 402 403 memcpy(vp, pm, ss) 404 405 pi = pl 406 ipi = ipl 407 408 pj = pr - ss 409 ipj = ipr - ts 410 411 swap_bytes(pm, pj, ss) 412 swap_bytes(ipm, ipj, ts) 413 414 while True: 415 pi += ss 416 ipi += ts 417 while strncmp(pi, vp, ss) < 0: 418 pi += ss 419 ipi += ts 420 421 pj -= ss 422 ipj -= ts 423 while strncmp(vp, pj, ss) < 0: 424 pj -= ss 425 ipj -= ts 426 427 if pi >= pj: 428 break 429 430 swap_bytes(pi, pj, ss) 431 swap_bytes(ipi, ipj, ts) 432 433 swap_bytes(pi, pr-ss, ss) 434 swap_bytes(ipi, ipr-ts, ts) 435 436 # push largest partition on stack and proceed with the other 437 if (pi - pl) < (pr - pi): 438 sptr[0] = pi + ss 439 sptr[1] = pr 440 sptr += 2 441 442 isptr[0] = ipi + ts 443 isptr[1] = ipr 444 isptr += 2 445 446 pr = pi - ss 447 ipr = ipi - ts 448 else: 449 sptr[0] = pl 450 sptr[1] = pi - ss 451 sptr += 2 452 453 isptr[0] = ipl 454 isptr[1] = ipi - ts 455 isptr += 2 456 457 pl = pi + ss 458 ipl = ipi + ts 459 460 pi = pl + ss 461 ipi = ipl + ts 462 463 while pi <= pr: 464 memcpy(vp, pi, ss) 465 memcpy(ivp, ipi, ts) 466 467 pj = pi 468 pt = pi - ss 469 470 ipj = ipi 471 ipt = ipi - ts 472 473 while pj > pl and strncmp(vp, pt, ss) < 0: 474 memcpy(pj, pt, ss) 475 pj -= ss 476 pt -= ss 477 478 memcpy(ipj, ipt, ts) 479 ipj -= ts 480 ipt -= ts 481 482 memcpy(pj, vp, ss) 483 memcpy(ipj, ivp, ts) 484 485 pi += ss 486 ipi += ts 487 488 if sptr == stack: 489 break 490 491 sptr -= 2 492 pl = sptr[0] 493 pr = sptr[1] 494 495 isptr -= 2 496 ipl = isptr[0] 497 ipr = isptr[1] 498 499 free(vp) 500 free(ivp) 501 502#--------------------------------------------------------------------------- 503# bisect 504#--------------------------------------------------------------------------- 505 506# This has been copied from the standard module bisect. 507# Checks for the values out of limits has been added at the beginning 508# because I forsee that this should be a very common case. 509# 2004-05-20 510def _bisect_left(a, x, int hi): 511 """Return the index where to insert item x in list a, assuming a is sorted. 512 513 The return value i is such that all e in a[:i] have e < x, and all e in 514 a[i:] have e >= x. So if x already appears in the list, i points just 515 before the leftmost x already there. 516 517 """ 518 519 cdef int lo, mid 520 521 lo = 0 522 if x <= a[0]: return 0 523 if a[-1] < x: return hi 524 while lo < hi: 525 mid = (lo+hi)/2 526 if a[mid] < x: lo = mid+1 527 else: hi = mid 528 return lo 529 530 531def _bisect_right(a, x, int hi): 532 """Return the index where to insert item x in list a, assuming a is sorted. 533 534 The return value i is such that all e in a[:i] have e <= x, and all e in 535 a[i:] have e > x. So if x already appears in the list, i points just 536 beyond the rightmost x already there. 537 538 """ 539 540 cdef int lo, mid 541 542 lo = 0 543 if x < a[0]: return 0 544 if a[-1] <= x: return hi 545 while lo < hi: 546 mid = (lo+hi)/2 547 if x < a[mid]: hi = mid 548 else: lo = mid+1 549 return lo 550 551 552#=========================================================================== 553# Classes 554#=========================================================================== 555 556 557 558cdef class Index: 559 pass 560 561 562cdef class CacheArray(Array): 563 """Container for keeping index caches of 1st and 2nd level.""" 564 565 cdef hid_t mem_space_id 566 567 cdef initread(self, int nbounds): 568 # "Actions to accelerate the reads afterwards." 569 570 # Precompute the mem_space_id 571 if (H5ARRAYOinit_readSlice(self.dataset_id, &self.mem_space_id, 572 nbounds) < 0): 573 raise HDF5ExtError("Problems initializing the bounds array data.") 574 return 575 576 cdef read_slice(self, hsize_t nrow, hsize_t start, hsize_t stop, void *rbuf): 577 # "Read an slice of bounds." 578 579 if (H5ARRAYOread_readBoundsSlice( 580 self.dataset_id, self.mem_space_id, self.type_id, 581 nrow, start, stop, rbuf) < 0): 582 raise HDF5ExtError("Problems reading the bounds array data.") 583 return 584 585 def _g_close(self): 586 super(Array, self)._g_close() 587 # Release specific resources of this class 588 if self.mem_space_id > 0: 589 H5Sclose(self.mem_space_id) 590 591 592cdef class IndexArray(Array): 593 """Container for keeping sorted and indices values.""" 594 595 cdef void *rbufst 596 cdef void *rbufln 597 cdef void *rbufrv 598 cdef void *rbufbc 599 cdef void *rbuflb 600 cdef hid_t mem_space_id 601 cdef int l_chunksize, l_slicesize, nbounds, indsize 602 cdef CacheArray bounds_ext 603 cdef NumCache boundscache, sortedcache 604 cdef ndarray bufferbc, bufferlb 605 606 def _read_index_slice(self, hsize_t irow, hsize_t start, hsize_t stop, 607 ndarray idx): 608 cdef herr_t ret 609 610 # Do the physical read 611 with nogil: 612 ret = H5ARRAYOread_readSlice(self.dataset_id, self.type_id, 613 irow, start, stop, idx.data) 614 615 if ret < 0: 616 raise HDF5ExtError("Problems reading the index indices.") 617 618 619 def _init_sorted_slice(self, index): 620 """Initialize the structures for doing a binary search.""" 621 622 cdef long ndims 623 cdef int rank, buflen, cachesize 624 cdef char *bname 625 cdef hsize_t count[2] 626 cdef ndarray starts, lengths, rvcache 627 cdef object maxslots, rowsize 628 629 dtype = self.atom.dtype 630 # Create the buffer for reading sorted data chunks if not created yet 631 if <object>self.bufferlb is None: 632 # Internal buffers 633 self.bufferlb = numpy.empty(dtype=dtype, shape=self.chunksize) 634 # Get the pointers to the different buffer data areas 635 self.rbuflb = self.bufferlb.data 636 # Init structures for accelerating sorted array reads 637 rank = 2 638 count[0] = 1 639 count[1] = self.chunksize 640 self.mem_space_id = H5Screate_simple(rank, count, NULL) 641 # Cache some counters in local extension variables 642 self.l_chunksize = self.chunksize 643 self.l_slicesize = self.slicesize 644 645 # Get the addresses of buffer data 646 starts = index.starts 647 lengths = index.lengths 648 self.rbufst = starts.data 649 self.rbufln = lengths.data 650 # The 1st cache is loaded completely in memory and needs to be reloaded 651 rvcache = index.ranges[:] 652 self.rbufrv = rvcache.data 653 index.rvcache = <object>rvcache 654 # Init the bounds array for reading 655 self.nbounds = index.bounds.shape[1] 656 self.bounds_ext = <CacheArray>index.bounds 657 self.bounds_ext.initread(self.nbounds) 658 if str(dtype) in self._v_parent.opt_search_types: 659 # The next caches should be defined only for optimized search types. 660 # The 2nd level cache will replace the already existing ObjectCache and 661 # already bound to the boundscache attribute. This way, the cache will 662 # not be duplicated (I know, this smells badly, but anyway). 663 params = self._v_file.params 664 rowsize = (self.bounds_ext._v_chunkshape[1] * dtype.itemsize) 665 maxslots = params['BOUNDS_MAX_SIZE'] / rowsize 666 self.boundscache = <NumCache>NumCache( 667 (maxslots, self.nbounds), dtype, 'non-opt types bounds') 668 self.bufferbc = numpy.empty(dtype=dtype, shape=self.nbounds) 669 # Get the pointer for the internal buffer for 2nd level cache 670 self.rbufbc = self.bufferbc.data 671 # Another NumCache for the sorted values 672 rowsize = (self.chunksize*dtype.itemsize) 673 maxslots = params['SORTED_MAX_SIZE'] / (self.chunksize*dtype.itemsize) 674 self.sortedcache = <NumCache>NumCache( 675 (maxslots, self.chunksize), dtype, 'sorted') 676 677 678 679 cdef void *_g_read_sorted_slice(self, hsize_t irow, hsize_t start, 680 hsize_t stop): 681 """Read the sorted part of an index.""" 682 683 with nogil: 684 ret = H5ARRAYOread_readSortedSlice( 685 self.dataset_id, self.mem_space_id, self.type_id, 686 irow, start, stop, self.rbuflb) 687 688 if ret < 0: 689 raise HDF5ExtError("Problems reading the array data.") 690 691 return self.rbuflb 692 693 # can't time machine since this function is cdef'd 694 #_g_read_sorted_slice = prveious_api(_g_read_sorted_slice) 695 696 # This is callable from python 697 def _read_sorted_slice(self, hsize_t irow, hsize_t start, hsize_t stop): 698 """Read the sorted part of an index.""" 699 700 self._g_read_sorted_slice(irow, start, stop) 701 return self.bufferlb 702 703 704 cdef void *get_lru_bounds(self, int nrow, int nbounds): 705 """Get the bounds from the cache, or read them.""" 706 707 cdef void *vpointer 708 cdef long nslot 709 710 nslot = self.boundscache.getslot_(nrow) 711 if nslot >= 0: 712 vpointer = self.boundscache.getitem1_(nslot) 713 else: 714 # Bounds row is not in cache. Read it and put it in the LRU cache. 715 self.bounds_ext.read_slice(nrow, 0, nbounds, self.rbufbc) 716 self.boundscache.setitem_(nrow, self.rbufbc, 0) 717 vpointer = self.rbufbc 718 return vpointer 719 720 # can't time machine since get_lru_bounds() function is cdef'd 721 722 cdef void *get_lru_sorted(self, int nrow, int ncs, int nchunk, int cs): 723 """Get the sorted row from the cache or read it.""" 724 725 cdef void *vpointer 726 cdef npy_int64 nckey 727 cdef long nslot 728 cdef hsize_t start, stop 729 730 # Compute the number of chunk read and use it as the key for the cache. 731 nckey = nrow*ncs+nchunk 732 nslot = self.sortedcache.getslot_(nckey) 733 if nslot >= 0: 734 vpointer = self.sortedcache.getitem1_(nslot) 735 else: 736 # The sorted chunk is not in cache. Read it and put it in the LRU cache. 737 start = cs*nchunk 738 stop = cs*(nchunk+1) 739 vpointer = self._g_read_sorted_slice(nrow, start, stop) 740 self.sortedcache.setitem_(nckey, vpointer, 0) 741 return vpointer 742 743 # can't time machine since get_lru_sorted() function is cdef'd 744 745 # Optimized version for int8 746 def _search_bin_na_b(self, long item1, long item2): 747 cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow 748 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 749 cdef int *rbufst 750 cdef int *rbufln 751 752 # Variables with specific type 753 cdef npy_int8 *rbufrv 754 cdef npy_int8 *rbufbc = NULL 755 cdef npy_int8 *rbuflb = NULL 756 757 cs = self.l_chunksize 758 ss = self.l_slicesize 759 ncs = ss / cs 760 nbounds = self.nbounds 761 nrows = self.nrows 762 rbufst = <int *>self.rbufst 763 rbufln = <int *>self.rbufln 764 rbufrv = <npy_int8 *>self.rbufrv 765 tlength = 0 766 for nrow from 0 <= nrow < nrows: 767 rvrow = nrow*2 768 bread = 0 769 nchunk = -1 770 771 # Look if item1 is in this row 772 if item1 > rbufrv[rvrow]: 773 if item1 <= rbufrv[rvrow+1]: 774 # Get the bounds row from the LRU cache or read them. 775 rbufbc = <npy_int8 *>self.get_lru_bounds(nrow, nbounds) 776 bread = 1 777 nchunk = bisect_left_b(rbufbc, item1, nbounds, 0) 778 # Get the sorted row from the LRU cache or read it. 779 rbuflb = <npy_int8 *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 780 start = bisect_left_b(rbuflb, item1, cs, 0) + cs*nchunk 781 else: 782 start = ss 783 else: 784 start = 0 785 # Now, for item2 786 if item2 >= rbufrv[rvrow]: 787 if item2 < rbufrv[rvrow+1]: 788 if not bread: 789 # Get the bounds row from the LRU cache or read them. 790 rbufbc = <npy_int8 *>self.get_lru_bounds(nrow, nbounds) 791 nchunk2 = bisect_right_b(rbufbc, item2, nbounds, 0) 792 if nchunk2 <> nchunk: 793 # Get the sorted row from the LRU cache or read it. 794 rbuflb = <npy_int8 *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 795 stop = bisect_right_b(rbuflb, item2, cs, 0) + cs*nchunk2 796 else: 797 stop = ss 798 else: 799 stop = 0 800 length = stop - start 801 tlength = tlength + length 802 rbufst[nrow] = start 803 rbufln[nrow] = length 804 return tlength 805 806 807 # Optimized version for uint8 808 def _search_bin_na_ub(self, long item1, long item2): 809 cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow 810 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 811 cdef int *rbufst 812 cdef int *rbufln 813 814 # Variables with specific type 815 cdef npy_uint8 *rbufrv 816 cdef npy_uint8 *rbufbc = NULL 817 cdef npy_uint8 *rbuflb = NULL 818 819 cs = self.l_chunksize 820 ss = self.l_slicesize 821 ncs = ss / cs 822 nbounds = self.nbounds 823 nrows = self.nrows 824 rbufst = <int *>self.rbufst 825 rbufln = <int *>self.rbufln 826 rbufrv = <npy_uint8 *>self.rbufrv 827 tlength = 0 828 for nrow from 0 <= nrow < nrows: 829 rvrow = nrow*2 830 bread = 0 831 nchunk = -1 832 833 # Look if item1 is in this row 834 if item1 > rbufrv[rvrow]: 835 if item1 <= rbufrv[rvrow+1]: 836 # Get the bounds row from the LRU cache or read them. 837 rbufbc = <npy_uint8 *>self.get_lru_bounds(nrow, nbounds) 838 bread = 1 839 nchunk = bisect_left_ub(rbufbc, item1, nbounds, 0) 840 # Get the sorted row from the LRU cache or read it. 841 rbuflb = <npy_uint8 *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 842 start = bisect_left_ub(rbuflb, item1, cs, 0) + cs*nchunk 843 else: 844 start = ss 845 else: 846 start = 0 847 # Now, for item2 848 if item2 >= rbufrv[rvrow]: 849 if item2 < rbufrv[rvrow+1]: 850 if not bread: 851 # Get the bounds row from the LRU cache or read them. 852 rbufbc = <npy_uint8 *>self.get_lru_bounds(nrow, nbounds) 853 nchunk2 = bisect_right_ub(rbufbc, item2, nbounds, 0) 854 if nchunk2 <> nchunk: 855 # Get the sorted row from the LRU cache or read it. 856 rbuflb = <npy_uint8 *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 857 stop = bisect_right_ub(rbuflb, item2, cs, 0) + cs*nchunk2 858 else: 859 stop = ss 860 else: 861 stop = 0 862 length = stop - start 863 tlength = tlength + length 864 rbufst[nrow] = start 865 rbufln[nrow] = length 866 return tlength 867 868 869 # Optimized version for int16 870 def _search_bin_na_s(self, long item1, long item2): 871 cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow 872 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 873 cdef int *rbufst 874 cdef int *rbufln 875 876 # Variables with specific type 877 cdef npy_int16 *rbufrv 878 cdef npy_int16 *rbufbc = NULL 879 cdef npy_int16 *rbuflb = NULL 880 881 cs = self.l_chunksize 882 ss = self.l_slicesize 883 ncs = ss / cs 884 nbounds = self.nbounds 885 nrows = self.nrows 886 rbufst = <int *>self.rbufst 887 rbufln = <int *>self.rbufln 888 rbufrv = <npy_int16 *>self.rbufrv 889 tlength = 0 890 for nrow from 0 <= nrow < nrows: 891 rvrow = nrow*2 892 bread = 0 893 nchunk = -1 894 # Look if item1 is in this row 895 if item1 > rbufrv[rvrow]: 896 if item1 <= rbufrv[rvrow+1]: 897 # Get the bounds row from the LRU cache or read them. 898 rbufbc = <npy_int16 *>self.get_lru_bounds(nrow, nbounds) 899 bread = 1 900 nchunk = bisect_left_s(rbufbc, item1, nbounds, 0) 901 # Get the sorted row from the LRU cache or read it. 902 rbuflb = <npy_int16 *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 903 start = bisect_left_s(rbuflb, item1, cs, 0) + cs*nchunk 904 else: 905 start = ss 906 else: 907 start = 0 908 # Now, for item2 909 if item2 >= rbufrv[rvrow]: 910 if item2 < rbufrv[rvrow+1]: 911 if not bread: 912 # Get the bounds row from the LRU cache or read them. 913 rbufbc = <npy_int16 *>self.get_lru_bounds(nrow, nbounds) 914 nchunk2 = bisect_right_s(rbufbc, item2, nbounds, 0) 915 if nchunk2 <> nchunk: 916 # Get the sorted row from the LRU cache or read it. 917 rbuflb = <npy_int16 *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 918 stop = bisect_right_s(rbuflb, item2, cs, 0) + cs*nchunk2 919 else: 920 stop = ss 921 else: 922 stop = 0 923 length = stop - start 924 tlength = tlength + length 925 rbufst[nrow] = start 926 rbufln[nrow] = length 927 return tlength 928 929 930 # Optimized version for uint16 931 def _search_bin_na_us(self, long item1, long item2): 932 cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow 933 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 934 cdef int *rbufst 935 cdef int *rbufln 936 937 # Variables with specific type 938 cdef npy_uint16 *rbufrv 939 cdef npy_uint16 *rbufbc = NULL 940 cdef npy_uint16 *rbuflb = NULL 941 942 cs = self.l_chunksize 943 ss = self.l_slicesize 944 ncs = ss / cs 945 nbounds = self.nbounds 946 nrows = self.nrows 947 rbufst = <int *>self.rbufst 948 rbufln = <int *>self.rbufln 949 rbufrv = <npy_uint16 *>self.rbufrv 950 tlength = 0 951 for nrow from 0 <= nrow < nrows: 952 rvrow = nrow*2 953 bread = 0 954 nchunk = -1 955 # Look if item1 is in this row 956 if item1 > rbufrv[rvrow]: 957 if item1 <= rbufrv[rvrow+1]: 958 # Get the bounds row from the LRU cache or read them. 959 rbufbc = <npy_uint16 *>self.get_lru_bounds(nrow, nbounds) 960 bread = 1 961 nchunk = bisect_left_us(rbufbc, item1, nbounds, 0) 962 # Get the sorted row from the LRU cache or read it. 963 rbuflb = <npy_uint16 *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 964 start = bisect_left_us(rbuflb, item1, cs, 0) + cs*nchunk 965 else: 966 start = ss 967 else: 968 start = 0 969 # Now, for item2 970 if item2 >= rbufrv[rvrow]: 971 if item2 < rbufrv[rvrow+1]: 972 if not bread: 973 # Get the bounds row from the LRU cache or read them. 974 rbufbc = <npy_uint16 *>self.get_lru_bounds(nrow, nbounds) 975 nchunk2 = bisect_right_us(rbufbc, item2, nbounds, 0) 976 if nchunk2 <> nchunk: 977 # Get the sorted row from the LRU cache or read it. 978 rbuflb = <npy_uint16 *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 979 stop = bisect_right_us(rbuflb, item2, cs, 0) + cs*nchunk2 980 else: 981 stop = ss 982 else: 983 stop = 0 984 length = stop - start 985 tlength = tlength + length 986 rbufst[nrow] = start 987 rbufln[nrow] = length 988 return tlength 989 990 991 # Optimized version for int32 992 def _search_bin_na_i(self, long item1, long item2): 993 cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow 994 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 995 cdef int *rbufst 996 cdef int *rbufln 997 998 # Variables with specific type 999 cdef npy_int32 *rbufrv 1000 cdef npy_int32 *rbufbc = NULL 1001 cdef npy_int32 *rbuflb = NULL 1002 1003 cs = self.l_chunksize 1004 ss = self.l_slicesize 1005 ncs = ss / cs 1006 nbounds = self.nbounds 1007 nrows = self.nrows 1008 rbufst = <int *>self.rbufst 1009 rbufln = <int *>self.rbufln 1010 rbufrv = <npy_int32 *>self.rbufrv 1011 tlength = 0 1012 for nrow from 0 <= nrow < nrows: 1013 rvrow = nrow*2 1014 bread = 0 1015 nchunk = -1 1016 # Look if item1 is in this row 1017 if item1 > rbufrv[rvrow]: 1018 if item1 <= rbufrv[rvrow+1]: 1019 # Get the bounds row from the LRU cache or read them. 1020 rbufbc = <npy_int32 *>self.get_lru_bounds(nrow, nbounds) 1021 bread = 1 1022 nchunk = bisect_left_i(rbufbc, item1, nbounds, 0) 1023 # Get the sorted row from the LRU cache or read it. 1024 rbuflb = <npy_int32 *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 1025 start = bisect_left_i(rbuflb, item1, cs, 0) + cs*nchunk 1026 else: 1027 start = ss 1028 else: 1029 start = 0 1030 # Now, for item2 1031 if item2 >= rbufrv[rvrow]: 1032 if item2 < rbufrv[rvrow+1]: 1033 if not bread: 1034 # Get the bounds row from the LRU cache or read them. 1035 rbufbc = <npy_int32 *>self.get_lru_bounds(nrow, nbounds) 1036 nchunk2 = bisect_right_i(rbufbc, item2, nbounds, 0) 1037 if nchunk2 <> nchunk: 1038 # Get the sorted row from the LRU cache or read it. 1039 rbuflb = <npy_int32 *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 1040 stop = bisect_right_i(rbuflb, item2, cs, 0) + cs*nchunk2 1041 else: 1042 stop = ss 1043 else: 1044 stop = 0 1045 length = stop - start 1046 tlength = tlength + length 1047 rbufst[nrow] = start 1048 rbufln[nrow] = length 1049 return tlength 1050 1051 1052 # Optimized version for uint32 1053 def _search_bin_na_ui(self, npy_uint32 item1, npy_uint32 item2): 1054 cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow 1055 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 1056 cdef int *rbufst 1057 cdef int *rbufln 1058 1059 # Variables with specific type 1060 cdef npy_uint32 *rbufrv 1061 cdef npy_uint32 *rbufbc = NULL 1062 cdef npy_uint32 *rbuflb = NULL 1063 1064 cs = self.l_chunksize 1065 ss = self.l_slicesize 1066 ncs = ss / cs 1067 nbounds = self.nbounds 1068 nrows = self.nrows 1069 rbufst = <int *>self.rbufst 1070 rbufln = <int *>self.rbufln 1071 rbufrv = <npy_uint32 *>self.rbufrv 1072 tlength = 0 1073 for nrow from 0 <= nrow < nrows: 1074 rvrow = nrow*2 1075 bread = 0 1076 nchunk = -1 1077 # Look if item1 is in this row 1078 if item1 > rbufrv[rvrow]: 1079 if item1 <= rbufrv[rvrow+1]: 1080 # Get the bounds row from the LRU cache or read them. 1081 rbufbc = <npy_uint32 *>self.get_lru_bounds(nrow, nbounds) 1082 bread = 1 1083 nchunk = bisect_left_ui(rbufbc, item1, nbounds, 0) 1084 # Get the sorted row from the LRU cache or read it. 1085 rbuflb = <npy_uint32 *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 1086 start = bisect_left_ui(rbuflb, item1, cs, 0) + cs*nchunk 1087 else: 1088 start = ss 1089 else: 1090 start = 0 1091 # Now, for item2 1092 if item2 >= rbufrv[rvrow]: 1093 if item2 < rbufrv[rvrow+1]: 1094 if not bread: 1095 # Get the bounds row from the LRU cache or read them. 1096 rbufbc = <npy_uint32 *>self.get_lru_bounds(nrow, nbounds) 1097 nchunk2 = bisect_right_ui(rbufbc, item2, nbounds, 0) 1098 if nchunk2 <> nchunk: 1099 # Get the sorted row from the LRU cache or read it. 1100 rbuflb = <npy_uint32 *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 1101 stop = bisect_right_ui(rbuflb, item2, cs, 0) + cs*nchunk2 1102 else: 1103 stop = ss 1104 else: 1105 stop = 0 1106 length = stop - start 1107 tlength = tlength + length 1108 rbufst[nrow] = start 1109 rbufln[nrow] = length 1110 return tlength 1111 1112 1113 # Optimized version for int64 1114 def _search_bin_na_ll(self, npy_int64 item1, npy_int64 item2): 1115 cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow 1116 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 1117 cdef int *rbufst 1118 cdef int *rbufln 1119 1120 # Variables with specific type 1121 cdef npy_int64 *rbufrv 1122 cdef npy_int64 *rbufbc = NULL 1123 cdef npy_int64 *rbuflb = NULL 1124 1125 cs = self.l_chunksize 1126 ss = self.l_slicesize 1127 ncs = ss / cs 1128 nbounds = self.nbounds 1129 nrows = self.nrows 1130 rbufst = <int *>self.rbufst 1131 rbufln = <int *>self.rbufln 1132 rbufrv = <npy_int64 *>self.rbufrv 1133 tlength = 0 1134 for nrow from 0 <= nrow < nrows: 1135 rvrow = nrow*2 1136 bread = 0 1137 nchunk = -1 1138 # Look if item1 is in this row 1139 if item1 > rbufrv[rvrow]: 1140 if item1 <= rbufrv[rvrow+1]: 1141 # Get the bounds row from the LRU cache or read them. 1142 rbufbc = <npy_int64 *>self.get_lru_bounds(nrow, nbounds) 1143 bread = 1 1144 nchunk = bisect_left_ll(rbufbc, item1, nbounds, 0) 1145 # Get the sorted row from the LRU cache or read it. 1146 rbuflb = <npy_int64 *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 1147 start = bisect_left_ll(rbuflb, item1, cs, 0) + cs*nchunk 1148 else: 1149 start = ss 1150 else: 1151 start = 0 1152 # Now, for item2 1153 if item2 >= rbufrv[rvrow]: 1154 if item2 < rbufrv[rvrow+1]: 1155 if not bread: 1156 # Get the bounds row from the LRU cache or read them. 1157 rbufbc = <npy_int64 *>self.get_lru_bounds(nrow, nbounds) 1158 nchunk2 = bisect_right_ll(rbufbc, item2, nbounds, 0) 1159 if nchunk2 <> nchunk: 1160 # Get the sorted row from the LRU cache or read it. 1161 rbuflb = <npy_int64 *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 1162 stop = bisect_right_ll(rbuflb, item2, cs, 0) + cs*nchunk2 1163 else: 1164 stop = ss 1165 else: 1166 stop = 0 1167 length = stop - start 1168 tlength = tlength + length 1169 rbufst[nrow] = start 1170 rbufln[nrow] = length 1171 return tlength 1172 1173 1174 # Optimized version for uint64 1175 def _search_bin_na_ull(self, npy_uint64 item1, npy_uint64 item2): 1176 cdef int cs, ss, ncs, nrow, nrows, nbounds, rvrow 1177 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 1178 cdef int *rbufst 1179 cdef int *rbufln 1180 1181 # Variables with specific type 1182 cdef npy_uint64 *rbufrv 1183 cdef npy_uint64 *rbufbc = NULL 1184 cdef npy_uint64 *rbuflb = NULL 1185 1186 cs = self.l_chunksize 1187 ss = self.l_slicesize 1188 ncs = ss / cs 1189 nbounds = self.nbounds 1190 nrows = self.nrows 1191 rbufst = <int *>self.rbufst 1192 rbufln = <int *>self.rbufln 1193 rbufrv = <npy_uint64 *>self.rbufrv 1194 tlength = 0 1195 for nrow from 0 <= nrow < nrows: 1196 rvrow = nrow*2 1197 bread = 0 1198 nchunk = -1 1199 # Look if item1 is in this row 1200 if item1 > rbufrv[rvrow]: 1201 if item1 <= rbufrv[rvrow+1]: 1202 # Get the bounds row from the LRU cache or read them. 1203 rbufbc = <npy_uint64 *>self.get_lru_bounds(nrow, nbounds) 1204 bread = 1 1205 nchunk = bisect_left_ull(rbufbc, item1, nbounds, 0) 1206 # Get the sorted row from the LRU cache or read it. 1207 rbuflb = <npy_uint64 *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 1208 start = bisect_left_ull(rbuflb, item1, cs, 0) + cs*nchunk 1209 else: 1210 start = ss 1211 else: 1212 start = 0 1213 # Now, for item2 1214 if item2 >= rbufrv[rvrow]: 1215 if item2 < rbufrv[rvrow+1]: 1216 if not bread: 1217 # Get the bounds row from the LRU cache or read them. 1218 rbufbc = <npy_uint64 *>self.get_lru_bounds(nrow, nbounds) 1219 nchunk2 = bisect_right_ull(rbufbc, item2, nbounds, 0) 1220 if nchunk2 <> nchunk: 1221 # Get the sorted row from the LRU cache or read it. 1222 rbuflb = <npy_uint64 *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 1223 stop = bisect_right_ull(rbuflb, item2, cs, 0) + cs*nchunk2 1224 else: 1225 stop = ss 1226 else: 1227 stop = 0 1228 length = stop - start 1229 tlength = tlength + length 1230 rbufst[nrow] = start 1231 rbufln[nrow] = length 1232 return tlength 1233 1234 1235 # Optimized version for float16 1236 def _search_bin_na_e(self, npy_float64 item1, npy_float64 item2): 1237 cdef int cs, ss, ncs, nrow, nrows, nrow2, nbounds, rvrow 1238 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 1239 cdef int *rbufst 1240 cdef int *rbufln 1241 1242 # Variables with specific type 1243 cdef npy_float16 *rbufrv 1244 cdef npy_float16 *rbufbc = NULL 1245 cdef npy_float16 *rbuflb = NULL 1246 1247 cs = self.l_chunksize 1248 ss = self.l_slicesize 1249 ncs = ss / cs 1250 nbounds = self.nbounds 1251 nrows = self.nrows 1252 tlength = 0 1253 rbufst = <int *>self.rbufst 1254 rbufln = <int *>self.rbufln 1255 # Limits not in cache, do a lookup 1256 rbufrv = <npy_float16 *>self.rbufrv 1257 for nrow from 0 <= nrow < nrows: 1258 rvrow = nrow*2 1259 bread = 0 1260 nchunk = -1 1261 1262 # Look if item1 is in this row 1263 if item1 > rbufrv[rvrow]: 1264 if item1 <= rbufrv[rvrow+1]: 1265 # Get the bounds row from the LRU cache or read them. 1266 rbufbc = <npy_float16 *>self.get_lru_bounds(nrow, nbounds) 1267 bread = 1 1268 nchunk = bisect_left_e(rbufbc, item1, nbounds, 0) 1269 # Get the sorted row from the LRU cache or read it. 1270 rbuflb = <npy_float16 *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 1271 start = bisect_left_e(rbuflb, item1, cs, 0) + cs*nchunk 1272 else: 1273 start = ss 1274 else: 1275 start = 0 1276 # Now, for item2 1277 if item2 >= rbufrv[rvrow]: 1278 if item2 < rbufrv[rvrow+1]: 1279 if not bread: 1280 # Get the bounds row from the LRU cache or read them. 1281 rbufbc = <npy_float16 *>self.get_lru_bounds(nrow, nbounds) 1282 nchunk2 = bisect_right_e(rbufbc, item2, nbounds, 0) 1283 if nchunk2 <> nchunk: 1284 # Get the sorted row from the LRU cache or read it. 1285 rbuflb = <npy_float16 *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 1286 stop = bisect_right_e(rbuflb, item2, cs, 0) + cs*nchunk2 1287 else: 1288 stop = ss 1289 else: 1290 stop = 0 1291 length = stop - start 1292 tlength = tlength + length 1293 rbufst[nrow] = start 1294 rbufln[nrow] = length 1295 return tlength 1296 1297 1298 # Optimized version for float32 1299 def _search_bin_na_f(self, npy_float64 item1, npy_float64 item2): 1300 cdef int cs, ss, ncs, nrow, nrows, nrow2, nbounds, rvrow 1301 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 1302 cdef int *rbufst 1303 cdef int *rbufln 1304 # Variables with specific type 1305 cdef npy_float32 *rbufrv 1306 cdef npy_float32 *rbufbc = NULL 1307 cdef npy_float32 *rbuflb = NULL 1308 1309 cs = self.l_chunksize 1310 ss = self.l_slicesize 1311 ncs = ss / cs 1312 nbounds = self.nbounds 1313 nrows = self.nrows 1314 tlength = 0 1315 rbufst = <int *>self.rbufst 1316 rbufln = <int *>self.rbufln 1317 1318 # Limits not in cache, do a lookup 1319 rbufrv = <npy_float32 *>self.rbufrv 1320 for nrow from 0 <= nrow < nrows: 1321 rvrow = nrow*2 1322 bread = 0 1323 nchunk = -1 1324 # Look if item1 is in this row 1325 if item1 > rbufrv[rvrow]: 1326 if item1 <= rbufrv[rvrow+1]: 1327 # Get the bounds row from the LRU cache or read them. 1328 rbufbc = <npy_float32 *>self.get_lru_bounds(nrow, nbounds) 1329 bread = 1 1330 nchunk = bisect_left_f(rbufbc, item1, nbounds, 0) 1331 # Get the sorted row from the LRU cache or read it. 1332 rbuflb = <npy_float32 *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 1333 start = bisect_left_f(rbuflb, item1, cs, 0) + cs*nchunk 1334 else: 1335 start = ss 1336 else: 1337 start = 0 1338 # Now, for item2 1339 if item2 >= rbufrv[rvrow]: 1340 if item2 < rbufrv[rvrow+1]: 1341 if not bread: 1342 # Get the bounds row from the LRU cache or read them. 1343 rbufbc = <npy_float32 *>self.get_lru_bounds(nrow, nbounds) 1344 nchunk2 = bisect_right_f(rbufbc, item2, nbounds, 0) 1345 if nchunk2 <> nchunk: 1346 # Get the sorted row from the LRU cache or read it. 1347 rbuflb = <npy_float32 *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 1348 stop = bisect_right_f(rbuflb, item2, cs, 0) + cs*nchunk2 1349 else: 1350 stop = ss 1351 else: 1352 stop = 0 1353 length = stop - start 1354 tlength = tlength + length 1355 rbufst[nrow] = start 1356 rbufln[nrow] = length 1357 return tlength 1358 1359 1360 # Optimized version for float64 1361 def _search_bin_na_d(self, npy_float64 item1, npy_float64 item2): 1362 cdef int cs, ss, ncs, nrow, nrows, nrow2, nbounds, rvrow 1363 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 1364 cdef int *rbufst 1365 cdef int *rbufln 1366 1367 # Variables with specific type 1368 cdef npy_float64 *rbufrv 1369 cdef npy_float64 *rbufbc = NULL 1370 cdef npy_float64 *rbuflb = NULL 1371 1372 cs = self.l_chunksize 1373 ss = self.l_slicesize 1374 ncs = ss / cs 1375 nbounds = self.nbounds 1376 nrows = self.nrows 1377 tlength = 0 1378 rbufst = <int *>self.rbufst 1379 rbufln = <int *>self.rbufln 1380 1381 # Limits not in cache, do a lookup 1382 rbufrv = <npy_float64 *>self.rbufrv 1383 for nrow from 0 <= nrow < nrows: 1384 rvrow = nrow*2 1385 bread = 0 1386 nchunk = -1 1387 1388 # Look if item1 is in this row 1389 if item1 > rbufrv[rvrow]: 1390 if item1 <= rbufrv[rvrow+1]: 1391 # Get the bounds row from the LRU cache or read them. 1392 rbufbc = <npy_float64 *>self.get_lru_bounds(nrow, nbounds) 1393 bread = 1 1394 nchunk = bisect_left_d(rbufbc, item1, nbounds, 0) 1395 # Get the sorted row from the LRU cache or read it. 1396 rbuflb = <npy_float64 *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 1397 start = bisect_left_d(rbuflb, item1, cs, 0) + cs*nchunk 1398 else: 1399 start = ss 1400 else: 1401 start = 0 1402 # Now, for item2 1403 if item2 >= rbufrv[rvrow]: 1404 if item2 < rbufrv[rvrow+1]: 1405 if not bread: 1406 # Get the bounds row from the LRU cache or read them. 1407 rbufbc = <npy_float64 *>self.get_lru_bounds(nrow, nbounds) 1408 nchunk2 = bisect_right_d(rbufbc, item2, nbounds, 0) 1409 if nchunk2 <> nchunk: 1410 # Get the sorted row from the LRU cache or read it. 1411 rbuflb = <npy_float64 *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 1412 stop = bisect_right_d(rbuflb, item2, cs, 0) + cs*nchunk2 1413 else: 1414 stop = ss 1415 else: 1416 stop = 0 1417 length = stop - start 1418 tlength = tlength + length 1419 rbufst[nrow] = start 1420 rbufln[nrow] = length 1421 return tlength 1422 1423 1424 # Optimized version for npy_longdouble/float96/float128 1425 def _search_bin_na_g(self, npy_longdouble item1, npy_longdouble item2): 1426 cdef int cs, ss, ncs, nrow, nrows, nrow2, nbounds, rvrow 1427 cdef int start, stop, tlength, length, bread, nchunk, nchunk2 1428 cdef int *rbufst 1429 cdef int *rbufln 1430 1431 # Variables with specific type 1432 cdef npy_longdouble *rbufrv 1433 cdef npy_longdouble *rbufbc = NULL 1434 cdef npy_longdouble *rbuflb = NULL 1435 1436 cs = self.l_chunksize 1437 ss = self.l_slicesize 1438 ncs = ss / cs 1439 nbounds = self.nbounds 1440 nrows = self.nrows 1441 tlength = 0 1442 rbufst = <int *>self.rbufst 1443 rbufln = <int *>self.rbufln 1444 1445 # Limits not in cache, do a lookup 1446 rbufrv = <npy_longdouble *>self.rbufrv 1447 for nrow from 0 <= nrow < nrows: 1448 rvrow = nrow*2 1449 bread = 0 1450 nchunk = -1 1451 1452 # Look if item1 is in this row 1453 if item1 > rbufrv[rvrow]: 1454 if item1 <= rbufrv[rvrow+1]: 1455 # Get the bounds row from the LRU cache or read them. 1456 rbufbc = <npy_longdouble *>self.get_lru_bounds(nrow, nbounds) 1457 bread = 1 1458 nchunk = bisect_left_g(rbufbc, item1, nbounds, 0) 1459 # Get the sorted row from the LRU cache or read it. 1460 rbuflb = <npy_longdouble *>self.get_lru_sorted(nrow, ncs, nchunk, cs) 1461 start = bisect_left_g(rbuflb, item1, cs, 0) + cs*nchunk 1462 else: 1463 start = ss 1464 else: 1465 start = 0 1466 # Now, for item2 1467 if item2 >= rbufrv[rvrow]: 1468 if item2 < rbufrv[rvrow+1]: 1469 if not bread: 1470 # Get the bounds row from the LRU cache or read them. 1471 rbufbc = <npy_longdouble *>self.get_lru_bounds(nrow, nbounds) 1472 nchunk2 = bisect_right_g(rbufbc, item2, nbounds, 0) 1473 if nchunk2 <> nchunk: 1474 # Get the sorted row from the LRU cache or read it. 1475 rbuflb = <npy_longdouble *>self.get_lru_sorted(nrow, ncs, nchunk2, cs) 1476 stop = bisect_right_g(rbuflb, item2, cs, 0) + cs*nchunk2 1477 else: 1478 stop = ss 1479 else: 1480 stop = 0 1481 length = stop - start 1482 tlength = tlength + length 1483 rbufst[nrow] = start 1484 rbufln[nrow] = length 1485 return tlength 1486 1487 1488 def _g_close(self): 1489 super(Array, self)._g_close() 1490 # Release specific resources of this class 1491 if self.mem_space_id > 0: 1492 H5Sclose(self.mem_space_id) 1493 1494 1495cdef class LastRowArray(Array): 1496 """ 1497 Container for keeping sorted and indices values of last rows of an index. 1498 """ 1499 1500 def _read_index_slice(self, hsize_t start, hsize_t stop, ndarray idx): 1501 """Read the reverse index part of an LR index.""" 1502 1503 with nogil: 1504 ret = H5ARRAYOreadSliceLR(self.dataset_id, self.type_id, 1505 start, stop, idx.data) 1506 1507 if ret < 0: 1508 raise HDF5ExtError("Problems reading the index data in Last Row.") 1509 1510 1511 def _read_sorted_slice(self, IndexArray sorted, hsize_t start, hsize_t stop): 1512 """Read the sorted part of an LR index.""" 1513 1514 cdef void *rbuflb 1515 1516 rbuflb = sorted.rbuflb # direct access to rbuflb: very fast. 1517 with nogil: 1518 ret = H5ARRAYOreadSliceLR(self.dataset_id, self.type_id, 1519 start, stop, rbuflb) 1520 1521 if ret < 0: 1522 raise HDF5ExtError("Problems reading the index data.") 1523 return sorted.bufferlb[:stop-start] 1524 1525 1526 1527## Local Variables: 1528## mode: python 1529## py-indent-offset: 2 1530## tab-width: 2 1531## fill-column: 78 1532## End: 1533