1# -*- coding: utf-8 -*- 2 3######################################################################## 4# 5# License: BSD 6# Created: October 10, 2002 7# Author: Francesc Alted - faltet@pytables.com 8# 9# $Id$ 10# 11######################################################################## 12 13"""Here is defined the Array class.""" 14 15import operator 16import sys 17import numpy 18 19from . import hdf5extension 20from .filters import Filters 21from .flavor import flavor_of, array_as_internal, internal_to_flavor 22from .leaf import Leaf 23from .utils import (is_idx, convert_to_np_atom2, SizeType, lazyattr, 24 byteorders, quantize) 25 26 27 28# default version for ARRAY objects 29# obversion = "1.0" # initial version 30# obversion = "2.0" # Added an optional EXTDIM attribute 31# obversion = "2.1" # Added support for complex datatypes 32# obversion = "2.2" # This adds support for time datatypes. 33# obversion = "2.3" # This adds support for enumerated datatypes. 34obversion = "2.4" # Numeric and numarray flavors are gone. 35 36 37class Array(hdf5extension.Array, Leaf): 38 """This class represents homogeneous datasets in an HDF5 file. 39 40 This class provides methods to write or read data to or from array objects 41 in the file. This class does not allow you neither to enlarge nor compress 42 the datasets on disk; use the EArray class (see :ref:`EArrayClassDescr`) if 43 you want enlargeable dataset support or compression features, or CArray 44 (see :ref:`CArrayClassDescr`) if you just want compression. 45 46 An interesting property of the Array class is that it remembers the 47 *flavor* of the object that has been saved so that if you saved, for 48 example, a list, you will get a list during readings afterwards; if you 49 saved a NumPy array, you will get a NumPy object, and so forth. 50 51 Note that this class inherits all the public attributes and methods that 52 Leaf (see :ref:`LeafClassDescr`) already provides. However, as Array 53 instances have no internal I/O buffers, it is not necessary to use the 54 flush() method they inherit from Leaf in order to save their internal state 55 to disk. When a writing method call returns, all the data is already on 56 disk. 57 58 Parameters 59 ---------- 60 parentnode 61 The parent :class:`Group` object. 62 63 .. versionchanged:: 3.0 64 Renamed from *parentNode* to *parentnode* 65 66 name : str 67 The name of this node in its parent group. 68 obj 69 The array or scalar to be saved. Accepted types are NumPy 70 arrays and scalars as well as native Python sequences and 71 scalars, provided that values are regular (i.e. they are not 72 like ``[[1,2],2]``) and homogeneous (i.e. all the elements are 73 of the same type). 74 75 .. versionchanged:: 3.0 76 Renamed form *object* into *obj*. 77 title 78 A description for this node (it sets the ``TITLE`` HDF5 attribute on 79 disk). 80 byteorder 81 The byteorder of the data *on disk*, specified as 'little' or 'big'. 82 If this is not specified, the byteorder is that of the given `object`. 83 track_times 84 Whether time data associated with the leaf are recorded (object 85 access time, raw data modification time, metadata change time, object 86 birth time); default True. Semantics of these times depend on their 87 implementation in the HDF5 library: refer to documentation of the 88 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata 89 change time) is implemented. 90 91 .. versionadded:: 3.4.3 92 93 """ 94 95 # Class identifier. 96 _c_classid = 'ARRAY' 97 98 # Lazy read-only attributes 99 # ````````````````````````` 100 @lazyattr 101 def dtype(self): 102 """The NumPy ``dtype`` that most closely matches this array.""" 103 104 return self.atom.dtype 105 106 # Properties 107 # ~~~~~~~~~~ 108 109 @property 110 def nrows(self): 111 "The number of rows in the array." 112 if self.shape == (): 113 return SizeType(1) # scalar case 114 else: 115 return self.shape[self.maindim] 116 117 @property 118 def rowsize(self): 119 "The size of the rows in bytes in dimensions orthogonal to *maindim*." 120 maindim = self.maindim 121 rowsize = self.atom.size 122 for i, dim in enumerate(self.shape): 123 if i != maindim: 124 rowsize *= dim 125 return rowsize 126 127 @property 128 def size_in_memory(self): 129 """The size of this array's data in bytes when it is fully loaded into 130 memory.""" 131 return self.nrows * self.rowsize 132 133 # Other methods 134 # ~~~~~~~~~~~~~ 135 def __init__(self, parentnode, name, 136 obj=None, title="", 137 byteorder=None, _log=True, _atom=None, 138 track_times=True): 139 140 self._v_version = None 141 """The object version of this array.""" 142 self._v_new = new = obj is not None 143 """Is this the first time the node has been created?""" 144 self._v_new_title = title 145 """New title for this node.""" 146 self._obj = obj 147 """The object to be stored in the array. It can be any of numpy, 148 list, tuple, string, integer of floating point types, provided 149 that they are regular (i.e. they are not like ``[[1, 2], 2]``). 150 151 .. versionchanged:: 3.0 152 Renamed form *_object* into *_obj*. 153 154 """ 155 156 self._v_convert = True 157 """Whether the ``Array`` object must be converted or not.""" 158 159 # Miscellaneous iteration rubbish. 160 self._start = None 161 """Starting row for the current iteration.""" 162 self._stop = None 163 """Stopping row for the current iteration.""" 164 self._step = None 165 """Step size for the current iteration.""" 166 self._nrowsread = None 167 """Number of rows read up to the current state of iteration.""" 168 self._startb = None 169 """Starting row for current buffer.""" 170 self._stopb = None 171 """Stopping row for current buffer. """ 172 self._row = None 173 """Current row in iterators (sentinel).""" 174 self._init = False 175 """Whether we are in the middle of an iteration or not (sentinel).""" 176 self.listarr = None 177 """Current buffer in iterators.""" 178 179 # Documented (*public*) attributes. 180 self.atom = _atom 181 """An Atom (see :ref:`AtomClassDescr`) instance representing the *type* 182 and *shape* of the atomic objects to be saved. 183 """ 184 self.shape = None 185 """The shape of the stored array.""" 186 self.nrow = None 187 """On iterators, this is the index of the current row.""" 188 self.extdim = -1 # ordinary arrays are not enlargeable 189 """The index of the enlargeable dimension.""" 190 191 # Ordinary arrays have no filters: leaf is created with default ones. 192 super(Array, self).__init__(parentnode, name, new, Filters(), 193 byteorder, _log, track_times) 194 195 def _g_create(self): 196 """Save a new array in file.""" 197 198 self._v_version = obversion 199 try: 200 # `Leaf._g_post_init_hook()` should be setting the flavor on disk. 201 self._flavor = flavor = flavor_of(self._obj) 202 nparr = array_as_internal(self._obj, flavor) 203 except: # XXX 204 # Problems converting data. Close the node and re-raise exception. 205 self.close(flush=0) 206 raise 207 208 # Raise an error in case of unsupported object 209 if nparr.dtype.kind in ['V', 'U', 'O']: # in void, unicode, object 210 raise TypeError("Array objects cannot currently deal with void, " 211 "unicode or object arrays") 212 213 # Decrease the number of references to the object 214 self._obj = None 215 216 # Fix the byteorder of data 217 nparr = self._g_fix_byteorder_data(nparr, nparr.dtype.byteorder) 218 219 # Create the array on-disk 220 try: 221 # ``self._v_objectid`` needs to be set because would be 222 # needed for setting attributes in some descendants later 223 # on 224 (self._v_objectid, self.shape, self.atom) = self._create_array( 225 nparr, self._v_new_title, self.atom) 226 except: # XXX 227 # Problems creating the Array on disk. Close node and re-raise. 228 self.close(flush=0) 229 raise 230 231 # Compute the optimal buffer size 232 self.nrowsinbuf = self._calc_nrowsinbuf() 233 # Arrays don't have chunkshapes (so, set it to None) 234 self._v_chunkshape = None 235 236 return self._v_objectid 237 238 def _g_open(self): 239 """Get the metadata info for an array in file.""" 240 241 (oid, self.atom, self.shape, self._v_chunkshape) = self._open_array() 242 243 self.nrowsinbuf = self._calc_nrowsinbuf() 244 245 return oid 246 247 def get_enum(self): 248 """Get the enumerated type associated with this array. 249 250 If this array is of an enumerated type, the corresponding Enum instance 251 (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated 252 type, a TypeError is raised. 253 254 """ 255 256 if self.atom.kind != 'enum': 257 raise TypeError("array ``%s`` is not of an enumerated type" 258 % self._v_pathname) 259 260 return self.atom.enum 261 262 def iterrows(self, start=None, stop=None, step=None): 263 """Iterate over the rows of the array. 264 265 This method returns an iterator yielding an object of the current 266 flavor for each selected row in the array. The returned rows are taken 267 from the *main dimension*. 268 269 If a range is not supplied, *all the rows* in the array are iterated 270 upon - you can also use the :meth:`Array.__iter__` special method for 271 that purpose. If you only want to iterate over a given *range of rows* 272 in the array, you may use the start, stop and step parameters. 273 274 Examples 275 -------- 276 277 :: 278 279 result = [row for row in arrayInstance.iterrows(step=4)] 280 281 .. versionchanged:: 3.0 282 If the *start* parameter is provided and *stop* is None then the 283 array is iterated from *start* to the last line. 284 In PyTables < 3.0 only one element was returned. 285 286 """ 287 288 try: 289 (self._start, self._stop, self._step) = self._process_range( 290 start, stop, step) 291 except IndexError: 292 # If problems with indexes, silently return the null tuple 293 return () 294 self._init_loop() 295 return self 296 297 def __iter__(self): 298 """Iterate over the rows of the array. 299 300 This is equivalent to calling :meth:`Array.iterrows` with default 301 arguments, i.e. it iterates over *all the rows* in the array. 302 303 Examples 304 -------- 305 306 :: 307 308 result = [row[2] for row in array] 309 310 Which is equivalent to:: 311 312 result = [row[2] for row in array.iterrows()] 313 314 """ 315 316 if not self._init: 317 # If the iterator is called directly, assign default variables 318 self._start = 0 319 self._stop = self.nrows 320 self._step = 1 321 # and initialize the loop 322 self._init_loop() 323 return self 324 325 def _init_loop(self): 326 """Initialization for the __iter__ iterator.""" 327 328 self._nrowsread = self._start 329 self._startb = self._start 330 self._row = -1 # Sentinel 331 self._init = True # Sentinel 332 self.nrow = SizeType(self._start - self._step) # row number 333 334 def __next__(self): 335 """Get the next element of the array during an iteration. 336 337 The element is returned as an object of the current flavor. 338 339 """ 340 341 # this could probably be sped up for long iterations by reusing the 342 # listarr buffer 343 if self._nrowsread >= self._stop: 344 self._init = False 345 self.listarr = None # fixes issue #308 346 raise StopIteration # end of iteration 347 else: 348 # Read a chunk of rows 349 if self._row + 1 >= self.nrowsinbuf or self._row < 0: 350 self._stopb = self._startb + self._step * self.nrowsinbuf 351 # Protection for reading more elements than needed 352 if self._stopb > self._stop: 353 self._stopb = self._stop 354 listarr = self._read(self._startb, self._stopb, self._step) 355 # Swap the axes to easy the return of elements 356 if self.extdim > 0: 357 listarr = listarr.swapaxes(self.extdim, 0) 358 self.listarr = internal_to_flavor(listarr, self.flavor) 359 self._row = -1 360 self._startb = self._stopb 361 self._row += 1 362 self.nrow += self._step 363 self._nrowsread += self._step 364 # Fixes bug #968132 365 # if self.listarr.shape: 366 if self.shape: 367 return self.listarr[self._row] 368 else: 369 return self.listarr # Scalar case 370 371 def _interpret_indexing(self, keys): 372 """Internal routine used by __getitem__ and __setitem__""" 373 374 maxlen = len(self.shape) 375 shape = (maxlen,) 376 startl = numpy.empty(shape=shape, dtype=SizeType) 377 stopl = numpy.empty(shape=shape, dtype=SizeType) 378 stepl = numpy.empty(shape=shape, dtype=SizeType) 379 stop_None = numpy.zeros(shape=shape, dtype=SizeType) 380 if not isinstance(keys, tuple): 381 keys = (keys,) 382 nkeys = len(keys) 383 dim = 0 384 # Here is some problem when dealing with [...,...] params 385 # but this is a bit weird way to pass parameters anyway 386 for key in keys: 387 ellipsis = 0 # Sentinel 388 if isinstance(key, type(Ellipsis)): 389 ellipsis = 1 390 for diml in range(dim, len(self.shape) - (nkeys - dim) + 1): 391 startl[dim] = 0 392 stopl[dim] = self.shape[diml] 393 stepl[dim] = 1 394 dim += 1 395 elif dim >= maxlen: 396 raise IndexError("Too many indices for object '%s'" % 397 self._v_pathname) 398 elif is_idx(key): 399 key = operator.index(key) 400 401 # Protection for index out of range 402 if key >= self.shape[dim]: 403 raise IndexError("Index out of range") 404 if key < 0: 405 # To support negative values (Fixes bug #968149) 406 key += self.shape[dim] 407 start, stop, step = self._process_range( 408 key, key + 1, 1, dim=dim) 409 stop_None[dim] = 1 410 elif isinstance(key, slice): 411 start, stop, step = self._process_range( 412 key.start, key.stop, key.step, dim=dim) 413 else: 414 raise TypeError("Non-valid index or slice: %s" % key) 415 if not ellipsis: 416 startl[dim] = start 417 stopl[dim] = stop 418 stepl[dim] = step 419 dim += 1 420 421 # Complete the other dimensions, if needed 422 if dim < len(self.shape): 423 for diml in range(dim, len(self.shape)): 424 startl[dim] = 0 425 stopl[dim] = self.shape[diml] 426 stepl[dim] = 1 427 dim += 1 428 429 # Compute the shape for the container properly. Fixes #1288792 430 shape = [] 431 for dim in range(len(self.shape)): 432 # The negative division operates differently with python scalars 433 # and numpy scalars (which are similar to C conventions). See: 434 # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3 435 # and 436 # http://www.peterbe.com/Integer-division-in-programming-languages 437 # for more info on this issue. 438 # I've finally decided to rely on the len(xrange) function. 439 # F. Alted 2006-09-25 440 # Switch to `lrange` to allow long ranges (see #99). 441 # use xrange, since it supports large integers as of Python 2.6 442 # see github #181 443 new_dim = len(range(startl[dim], stopl[dim], stepl[dim])) 444 if not (new_dim == 1 and stop_None[dim]): 445 shape.append(new_dim) 446 447 return startl, stopl, stepl, shape 448 449 def _fancy_selection(self, args): 450 """Performs a NumPy-style fancy selection in `self`. 451 452 Implements advanced NumPy-style selection operations in 453 addition to the standard slice-and-int behavior. 454 455 Indexing arguments may be ints, slices or lists of indices. 456 457 Note: This is a backport from the h5py project. 458 459 """ 460 461 # Internal functions 462 463 def validate_number(num, length): 464 """Validate a list member for the given axis length.""" 465 466 try: 467 num = int(num) 468 except TypeError: 469 raise TypeError("Illegal index: %r" % num) 470 if num > length - 1: 471 raise IndexError("Index out of bounds: %d" % num) 472 473 def expand_ellipsis(args, rank): 474 """Expand ellipsis objects and fill in missing axes.""" 475 476 n_el = sum(1 for arg in args if arg is Ellipsis) 477 if n_el > 1: 478 raise IndexError("Only one ellipsis may be used.") 479 elif n_el == 0 and len(args) != rank: 480 args = args + (Ellipsis,) 481 482 final_args = [] 483 n_args = len(args) 484 for idx, arg in enumerate(args): 485 if arg is Ellipsis: 486 final_args.extend((slice(None),) * (rank - n_args + 1)) 487 else: 488 final_args.append(arg) 489 490 if len(final_args) > rank: 491 raise IndexError("Too many indices.") 492 493 return final_args 494 495 def translate_slice(exp, length): 496 """Given a slice object, return a 3-tuple (start, count, step) 497 498 This is for for use with the hyperslab selection routines. 499 500 """ 501 502 start, stop, step = exp.start, exp.stop, exp.step 503 if start is None: 504 start = 0 505 else: 506 start = int(start) 507 if stop is None: 508 stop = length 509 else: 510 stop = int(stop) 511 if step is None: 512 step = 1 513 else: 514 step = int(step) 515 516 if step < 1: 517 raise IndexError("Step must be >= 1 (got %d)" % step) 518 if stop == start: 519 raise IndexError("Zero-length selections are not allowed") 520 if stop < start: 521 raise IndexError("Reverse-order selections are not allowed") 522 if start < 0: 523 start = length + start 524 if stop < 0: 525 stop = length + stop 526 527 if not 0 <= start <= (length - 1): 528 raise IndexError( 529 "Start index %s out of range (0-%d)" % (start, length - 1)) 530 if not 1 <= stop <= length: 531 raise IndexError( 532 "Stop index %s out of range (1-%d)" % (stop, length)) 533 534 count = (stop - start) // step 535 if (stop - start) % step != 0: 536 count += 1 537 538 if start + count > length: 539 raise IndexError( 540 "Selection out of bounds (%d; axis has %d)" % 541 (start + count, length)) 542 543 return start, count, step 544 545 # Main code for _fancy_selection 546 mshape = [] 547 selection = [] 548 549 if not isinstance(args, tuple): 550 args = (args,) 551 552 args = expand_ellipsis(args, len(self.shape)) 553 554 list_seen = False 555 reorder = None 556 for idx, (exp, length) in enumerate(zip(args, self.shape)): 557 if isinstance(exp, slice): 558 start, count, step = translate_slice(exp, length) 559 selection.append((start, count, step, idx, "AND")) 560 mshape.append(count) 561 else: 562 try: 563 exp = list(exp) 564 except TypeError: 565 exp = [exp] # Handle scalar index as a list of length 1 566 mshape.append(0) # Keep track of scalar index for NumPy 567 else: 568 mshape.append(len(exp)) 569 if len(exp) == 0: 570 raise IndexError( 571 "Empty selections are not allowed (axis %d)" % idx) 572 elif len(exp) > 1: 573 if list_seen: 574 raise IndexError("Only one selection list is allowed") 575 else: 576 list_seen = True 577 else: 578 if (not isinstance(exp[0], (int, numpy.integer)) or 579 (isinstance(exp[0], numpy.ndarray) and not 580 numpy.issubdtype(exp[0].dtype, numpy.integer))): 581 raise TypeError("Only integer coordinates allowed.") 582 583 nexp = numpy.asarray(exp, dtype="i8") 584 # Convert negative values 585 nexp = numpy.where(nexp < 0, length + nexp, nexp) 586 # Check whether the list is ordered or not 587 # (only one unordered list is allowed) 588 if not len(nexp) == len(numpy.unique(nexp)): 589 raise IndexError( 590 "Selection lists cannot have repeated values") 591 neworder = nexp.argsort() 592 if (neworder.shape != (len(exp),) or 593 numpy.sum( 594 numpy.abs( 595 neworder - numpy.arange(len(exp)))) != 0): 596 if reorder is not None: 597 raise IndexError( 598 "Only one selection list can be unordered") 599 corrected_idx = sum(1 for x in mshape if x != 0) - 1 600 reorder = (corrected_idx, neworder) 601 nexp = nexp[neworder] 602 for select_idx in range(len(nexp) + 1): 603 # This crazy piece of code performs a list selection 604 # using HDF5 hyperslabs. 605 # For each index, perform a "NOTB" selection on every 606 # portion of *this axis* which falls *outside* the list 607 # selection. For this to work, the input array MUST be 608 # monotonically increasing. 609 if select_idx < len(nexp): 610 validate_number(nexp[select_idx], length) 611 if select_idx == 0: 612 start = 0 613 count = nexp[0] 614 elif select_idx == len(nexp): 615 start = nexp[-1] + 1 616 count = length - start 617 else: 618 start = nexp[select_idx - 1] + 1 619 count = nexp[select_idx] - start 620 if count > 0: 621 selection.append((start, count, 1, idx, "NOTB")) 622 623 mshape = tuple(x for x in mshape if x != 0) 624 return selection, reorder, mshape 625 626 def __getitem__(self, key): 627 """Get a row, a range of rows or a slice from the array. 628 629 The set of tokens allowed for the key is the same as that for extended 630 slicing in Python (including the Ellipsis or ... token). The result is 631 an object of the current flavor; its shape depends on the kind of slice 632 used as key and the shape of the array itself. 633 634 Furthermore, NumPy-style fancy indexing, where a list of indices in a 635 certain axis is specified, is also supported. Note that only one list 636 per selection is supported right now. Finally, NumPy-style point and 637 boolean selections are supported as well. 638 639 Examples 640 -------- 641 642 :: 643 644 array1 = array[4] # simple selection 645 array2 = array[4:1000:2] # slice selection 646 array3 = array[1, ..., ::2, 1:4, 4:] # general slice selection 647 array4 = array[1, [1,5,10], ..., -1] # fancy selection 648 array5 = array[np.where(array[:] > 4)] # point selection 649 array6 = array[array[:] > 4] # boolean selection 650 651 """ 652 653 self._g_check_open() 654 655 try: 656 # First, try with a regular selection 657 startl, stopl, stepl, shape = self._interpret_indexing(key) 658 arr = self._read_slice(startl, stopl, stepl, shape) 659 except TypeError: 660 # Then, try with a point-wise selection 661 try: 662 coords = self._point_selection(key) 663 arr = self._read_coords(coords) 664 except TypeError: 665 # Finally, try with a fancy selection 666 selection, reorder, shape = self._fancy_selection(key) 667 arr = self._read_selection(selection, reorder, shape) 668 669 if self.flavor == "numpy" or not self._v_convert: 670 return arr 671 672 return internal_to_flavor(arr, self.flavor) 673 674 def __setitem__(self, key, value): 675 """Set a row, a range of rows or a slice in the array. 676 677 It takes different actions depending on the type of the key parameter: 678 if it is an integer, the corresponding array row is set to value (the 679 value is broadcast when needed). If key is a slice, the row slice 680 determined by it is set to value (as usual, if the slice to be updated 681 exceeds the actual shape of the array, only the values in the existing 682 range are updated). 683 684 If value is a multidimensional object, then its shape must be 685 compatible with the shape determined by key, otherwise, a ValueError 686 will be raised. 687 688 Furthermore, NumPy-style fancy indexing, where a list of indices in a 689 certain axis is specified, is also supported. Note that only one list 690 per selection is supported right now. Finally, NumPy-style point and 691 boolean selections are supported as well. 692 693 Examples 694 -------- 695 696 :: 697 698 a1[0] = 333 # assign an integer to a Integer Array row 699 a2[0] = 'b' # assign a string to a string Array row 700 a3[1:4] = 5 # broadcast 5 to slice 1:4 701 a4[1:4:2] = 'xXx' # broadcast 'xXx' to slice 1:4:2 702 703 # General slice update (a5.shape = (4,3,2,8,5,10). 704 a5[1, ..., ::2, 1:4, 4:] = numpy.arange(1728, shape=(4,3,2,4,3,6)) 705 a6[1, [1,5,10], ..., -1] = arr # fancy selection 706 a7[np.where(a6[:] > 4)] = 4 # point selection + broadcast 707 a8[arr > 4] = arr2 # boolean selection 708 709 """ 710 711 self._g_check_open() 712 713 # Create an array compliant with the specified slice 714 nparr = convert_to_np_atom2(value, self.atom) 715 if nparr.size == 0: 716 return 717 718 # truncate data if least_significant_digit filter is set 719 # TODO: add the least_significant_digit attribute to the array on disk 720 if (self.filters.least_significant_digit is not None and 721 not numpy.issubdtype(nparr.dtype, numpy.signedinteger)): 722 nparr = quantize(nparr, self.filters.least_significant_digit) 723 724 try: 725 startl, stopl, stepl, shape = self._interpret_indexing(key) 726 self._write_slice(startl, stopl, stepl, shape, nparr) 727 except TypeError: 728 # Then, try with a point-wise selection 729 try: 730 coords = self._point_selection(key) 731 self._write_coords(coords, nparr) 732 except TypeError: 733 selection, reorder, shape = self._fancy_selection(key) 734 self._write_selection(selection, reorder, shape, nparr) 735 736 def _check_shape(self, nparr, slice_shape): 737 """Test that nparr shape is consistent with underlying object. 738 739 If not, try creating a new nparr object, using broadcasting if 740 necessary. 741 742 """ 743 744 if nparr.shape != (slice_shape + self.atom.dtype.shape): 745 # Create an array compliant with the specified shape 746 narr = numpy.empty(shape=slice_shape, dtype=self.atom.dtype) 747 748 # Assign the value to it. It will raise a ValueError exception 749 # if the objects cannot be broadcast to a single shape. 750 narr[...] = nparr 751 return narr 752 else: 753 return nparr 754 755 def _read_slice(self, startl, stopl, stepl, shape): 756 """Read a slice based on `startl`, `stopl` and `stepl`.""" 757 758 nparr = numpy.empty(dtype=self.atom.dtype, shape=shape) 759 # Protection against reading empty arrays 760 if 0 not in shape: 761 # Arrays that have non-zero dimensionality 762 self._g_read_slice(startl, stopl, stepl, nparr) 763 # For zero-shaped arrays, return the scalar 764 if nparr.shape == (): 765 nparr = nparr[()] 766 return nparr 767 768 def _read_coords(self, coords): 769 """Read a set of points defined by `coords`.""" 770 771 nparr = numpy.empty(dtype=self.atom.dtype, shape=len(coords)) 772 if len(coords) > 0: 773 self._g_read_coords(coords, nparr) 774 # For zero-shaped arrays, return the scalar 775 if nparr.shape == (): 776 nparr = nparr[()] 777 return nparr 778 779 def _read_selection(self, selection, reorder, shape): 780 """Read a `selection`. 781 782 Reorder if necessary. 783 784 """ 785 786 # Create the container for the slice 787 nparr = numpy.empty(dtype=self.atom.dtype, shape=shape) 788 # Arrays that have non-zero dimensionality 789 self._g_read_selection(selection, nparr) 790 # For zero-shaped arrays, return the scalar 791 if nparr.shape == (): 792 nparr = nparr[()] 793 elif reorder is not None: 794 # We need to reorder the array 795 idx, neworder = reorder 796 k = [slice(None)] * len(shape) 797 k[idx] = neworder.argsort() 798 # Apparently, a copy is not needed here, but doing it 799 # for symmetry with the `_write_selection()` method. 800 nparr = nparr[tuple(k)].copy() 801 return nparr 802 803 def _write_slice(self, startl, stopl, stepl, shape, nparr): 804 """Write `nparr` in a slice based on `startl`, `stopl` and `stepl`.""" 805 806 nparr = self._check_shape(nparr, tuple(shape)) 807 countl = ((stopl - startl - 1) // stepl) + 1 808 self._g_write_slice(startl, stepl, countl, nparr) 809 810 def _write_coords(self, coords, nparr): 811 """Write `nparr` values in points defined by `coords` coordinates.""" 812 813 if len(coords) > 0: 814 nparr = self._check_shape(nparr, (len(coords),)) 815 self._g_write_coords(coords, nparr) 816 817 def _write_selection(self, selection, reorder, shape, nparr): 818 """Write `nparr` in `selection`. 819 820 Reorder if necessary. 821 822 """ 823 824 nparr = self._check_shape(nparr, tuple(shape)) 825 # Check whether we should reorder the array 826 if reorder is not None: 827 idx, neworder = reorder 828 k = [slice(None)] * len(shape) 829 k[idx] = neworder 830 # For a reason a don't understand well, we need a copy of 831 # the reordered array 832 nparr = nparr[tuple(k)].copy() 833 self._g_write_selection(selection, nparr) 834 835 def _read(self, start, stop, step, out=None): 836 """Read the array from disk without slice or flavor processing.""" 837 838 nrowstoread = len(range(start, stop, step)) 839 shape = list(self.shape) 840 if shape: 841 shape[self.maindim] = nrowstoread 842 if out is None: 843 arr = numpy.empty(dtype=self.atom.dtype, shape=shape) 844 else: 845 bytes_required = self.rowsize * nrowstoread 846 # if buffer is too small, it will segfault 847 if bytes_required != out.nbytes: 848 raise ValueError(('output array size invalid, got {0} bytes, ' 849 'need {1} bytes').format(out.nbytes, 850 bytes_required)) 851 if not out.flags['C_CONTIGUOUS']: 852 raise ValueError('output array not C contiguous') 853 arr = out 854 # Protection against reading empty arrays 855 if 0 not in shape: 856 # Arrays that have non-zero dimensionality 857 self._read_array(start, stop, step, arr) 858 # data is always read in the system byteorder 859 # if the out array's byteorder is different, do a byteswap 860 if (out is not None and 861 byteorders[arr.dtype.byteorder] != sys.byteorder): 862 arr.byteswap(True) 863 return arr 864 865 def read(self, start=None, stop=None, step=None, out=None): 866 """Get data in the array as an object of the current flavor. 867 868 The start, stop and step parameters can be used to select only a 869 *range of rows* in the array. Their meanings are the same as in 870 the built-in range() Python function, except that negative values 871 of step are not allowed yet. Moreover, if only start is specified, 872 then stop will be set to start + 1. If you do not specify neither 873 start nor stop, then *all the rows* in the array are selected. 874 875 The out parameter may be used to specify a NumPy array to receive 876 the output data. Note that the array must have the same size as 877 the data selected with the other parameters. Note that the array's 878 datatype is not checked and no type casting is performed, so if it 879 does not match the datatype on disk, the output will not be correct. 880 Also, this parameter is only valid when the array's flavor is set 881 to 'numpy'. Otherwise, a TypeError will be raised. 882 883 When data is read from disk in NumPy format, the output will be 884 in the current system's byteorder, regardless of how it is stored 885 on disk. 886 The exception is when an output buffer is supplied, in which case 887 the output will be in the byteorder of that output buffer. 888 889 .. versionchanged:: 3.0 890 Added the *out* parameter. 891 892 """ 893 894 self._g_check_open() 895 if out is not None and self.flavor != 'numpy': 896 msg = ("Optional 'out' argument may only be supplied if array " 897 "flavor is 'numpy', currently is {0}").format(self.flavor) 898 raise TypeError(msg) 899 (start, stop, step) = self._process_range_read(start, stop, step) 900 arr = self._read(start, stop, step, out) 901 return internal_to_flavor(arr, self.flavor) 902 903 def _g_copy_with_stats(self, group, name, start, stop, step, 904 title, filters, chunkshape, _log, **kwargs): 905 """Private part of Leaf.copy() for each kind of leaf.""" 906 907 # Compute the correct indices. 908 (start, stop, step) = self._process_range_read(start, stop, step) 909 # Get the slice of the array 910 # (non-buffered version) 911 if self.shape: 912 arr = self[start:stop:step] 913 else: 914 arr = self[()] 915 # Build the new Array object. Use the _atom reserved keyword 916 # just in case the array is being copied from a native HDF5 917 # with atomic types different from scalars. 918 # For details, see #275 of trac. 919 object_ = Array(group, name, arr, title=title, _log=_log, 920 _atom=self.atom) 921 nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size 922 923 return (object_, nbytes) 924 925 def __repr__(self): 926 """This provides more metainfo in addition to standard __str__""" 927 928 return """%s 929 atom := %r 930 maindim := %r 931 flavor := %r 932 byteorder := %r 933 chunkshape := %r""" % (self, self.atom, self.maindim, 934 self.flavor, self.byteorder, 935 self.chunkshape) 936 937 938class ImageArray(Array): 939 """Array containing an image. 940 941 This class has no additional behaviour or functionality compared to 942 that of an ordinary array. It simply enables the user to open an 943 ``IMAGE`` HDF5 node as a normal `Array` node in PyTables. 944 945 """ 946 947 # Class identifier. 948 _c_classid = 'IMAGE' 949