1# -*- coding: utf-8 -*-
2
3########################################################################
4#
5# License: BSD
6# Created: October 10, 2002
7# Author: Francesc Alted - faltet@pytables.com
8#
9# $Id$
10#
11########################################################################
12
13"""Here is defined the Array class."""
14
15import operator
16import sys
17import numpy
18
19from . import hdf5extension
20from .filters import Filters
21from .flavor import flavor_of, array_as_internal, internal_to_flavor
22from .leaf import Leaf
23from .utils import (is_idx, convert_to_np_atom2, SizeType, lazyattr,
24                    byteorders, quantize)
25
26
27
28# default version for ARRAY objects
29# obversion = "1.0"    # initial version
30# obversion = "2.0"    # Added an optional EXTDIM attribute
31# obversion = "2.1"    # Added support for complex datatypes
32# obversion = "2.2"    # This adds support for time datatypes.
33# obversion = "2.3"    # This adds support for enumerated datatypes.
34obversion = "2.4"    # Numeric and numarray flavors are gone.
35
36
37class Array(hdf5extension.Array, Leaf):
38    """This class represents homogeneous datasets in an HDF5 file.
39
40    This class provides methods to write or read data to or from array objects
41    in the file. This class does not allow you neither to enlarge nor compress
42    the datasets on disk; use the EArray class (see :ref:`EArrayClassDescr`) if
43    you want enlargeable dataset support or compression features, or CArray
44    (see :ref:`CArrayClassDescr`) if you just want compression.
45
46    An interesting property of the Array class is that it remembers the
47    *flavor* of the object that has been saved so that if you saved, for
48    example, a list, you will get a list during readings afterwards; if you
49    saved a NumPy array, you will get a NumPy object, and so forth.
50
51    Note that this class inherits all the public attributes and methods that
52    Leaf (see :ref:`LeafClassDescr`) already provides. However, as Array
53    instances have no internal I/O buffers, it is not necessary to use the
54    flush() method they inherit from Leaf in order to save their internal state
55    to disk.  When a writing method call returns, all the data is already on
56    disk.
57
58    Parameters
59    ----------
60    parentnode
61        The parent :class:`Group` object.
62
63        .. versionchanged:: 3.0
64           Renamed from *parentNode* to *parentnode*
65
66    name : str
67        The name of this node in its parent group.
68    obj
69        The array or scalar to be saved.  Accepted types are NumPy
70        arrays and scalars as well as native Python sequences and
71        scalars, provided that values are regular (i.e. they are not
72        like ``[[1,2],2]``) and homogeneous (i.e. all the elements are
73        of the same type).
74
75        .. versionchanged:: 3.0
76           Renamed form *object* into *obj*.
77    title
78        A description for this node (it sets the ``TITLE`` HDF5 attribute on
79        disk).
80    byteorder
81        The byteorder of the data *on disk*, specified as 'little' or 'big'.
82        If this is not specified, the byteorder is that of the given `object`.
83    track_times
84        Whether time data associated with the leaf are recorded (object
85        access time, raw data modification time, metadata change time, object
86        birth time); default True.  Semantics of these times depend on their
87        implementation in the HDF5 library: refer to documentation of the
88        H5O_info_t data structure.  As of HDF5 1.8.15, only ctime (metadata
89        change time) is implemented.
90
91        .. versionadded:: 3.4.3
92
93    """
94
95    # Class identifier.
96    _c_classid = 'ARRAY'
97
98    # Lazy read-only attributes
99    # `````````````````````````
100    @lazyattr
101    def dtype(self):
102        """The NumPy ``dtype`` that most closely matches this array."""
103
104        return self.atom.dtype
105
106    # Properties
107    # ~~~~~~~~~~
108
109    @property
110    def nrows(self):
111        "The number of rows in the array."
112        if self.shape == ():
113            return SizeType(1)  # scalar case
114        else:
115            return self.shape[self.maindim]
116
117    @property
118    def rowsize(self):
119        "The size of the rows in bytes in dimensions orthogonal to *maindim*."
120        maindim = self.maindim
121        rowsize = self.atom.size
122        for i, dim in enumerate(self.shape):
123            if i != maindim:
124                rowsize *= dim
125        return rowsize
126
127    @property
128    def size_in_memory(self):
129        """The size of this array's data in bytes when it is fully loaded into
130        memory."""
131        return self.nrows * self.rowsize
132
133    # Other methods
134    # ~~~~~~~~~~~~~
135    def __init__(self, parentnode, name,
136                 obj=None, title="",
137                 byteorder=None, _log=True, _atom=None,
138                 track_times=True):
139
140        self._v_version = None
141        """The object version of this array."""
142        self._v_new = new = obj is not None
143        """Is this the first time the node has been created?"""
144        self._v_new_title = title
145        """New title for this node."""
146        self._obj = obj
147        """The object to be stored in the array.  It can be any of numpy,
148        list, tuple, string, integer of floating point types, provided
149        that they are regular (i.e. they are not like ``[[1, 2], 2]``).
150
151        .. versionchanged:: 3.0
152           Renamed form *_object* into *_obj*.
153
154        """
155
156        self._v_convert = True
157        """Whether the ``Array`` object must be converted or not."""
158
159        # Miscellaneous iteration rubbish.
160        self._start = None
161        """Starting row for the current iteration."""
162        self._stop = None
163        """Stopping row for the current iteration."""
164        self._step = None
165        """Step size for the current iteration."""
166        self._nrowsread = None
167        """Number of rows read up to the current state of iteration."""
168        self._startb = None
169        """Starting row for current buffer."""
170        self._stopb = None
171        """Stopping row for current buffer. """
172        self._row = None
173        """Current row in iterators (sentinel)."""
174        self._init = False
175        """Whether we are in the middle of an iteration or not (sentinel)."""
176        self.listarr = None
177        """Current buffer in iterators."""
178
179        # Documented (*public*) attributes.
180        self.atom = _atom
181        """An Atom (see :ref:`AtomClassDescr`) instance representing the *type*
182        and *shape* of the atomic objects to be saved.
183        """
184        self.shape = None
185        """The shape of the stored array."""
186        self.nrow = None
187        """On iterators, this is the index of the current row."""
188        self.extdim = -1   # ordinary arrays are not enlargeable
189        """The index of the enlargeable dimension."""
190
191        # Ordinary arrays have no filters: leaf is created with default ones.
192        super(Array, self).__init__(parentnode, name, new, Filters(),
193                                    byteorder, _log, track_times)
194
195    def _g_create(self):
196        """Save a new array in file."""
197
198        self._v_version = obversion
199        try:
200            # `Leaf._g_post_init_hook()` should be setting the flavor on disk.
201            self._flavor = flavor = flavor_of(self._obj)
202            nparr = array_as_internal(self._obj, flavor)
203        except:  # XXX
204            # Problems converting data. Close the node and re-raise exception.
205            self.close(flush=0)
206            raise
207
208        # Raise an error in case of unsupported object
209        if nparr.dtype.kind in ['V', 'U', 'O']:  # in void, unicode, object
210            raise TypeError("Array objects cannot currently deal with void, "
211                            "unicode or object arrays")
212
213        # Decrease the number of references to the object
214        self._obj = None
215
216        # Fix the byteorder of data
217        nparr = self._g_fix_byteorder_data(nparr, nparr.dtype.byteorder)
218
219        # Create the array on-disk
220        try:
221            # ``self._v_objectid`` needs to be set because would be
222            # needed for setting attributes in some descendants later
223            # on
224            (self._v_objectid, self.shape, self.atom) = self._create_array(
225                nparr, self._v_new_title, self.atom)
226        except:  # XXX
227            # Problems creating the Array on disk. Close node and re-raise.
228            self.close(flush=0)
229            raise
230
231        # Compute the optimal buffer size
232        self.nrowsinbuf = self._calc_nrowsinbuf()
233        # Arrays don't have chunkshapes (so, set it to None)
234        self._v_chunkshape = None
235
236        return self._v_objectid
237
238    def _g_open(self):
239        """Get the metadata info for an array in file."""
240
241        (oid, self.atom, self.shape, self._v_chunkshape) = self._open_array()
242
243        self.nrowsinbuf = self._calc_nrowsinbuf()
244
245        return oid
246
247    def get_enum(self):
248        """Get the enumerated type associated with this array.
249
250        If this array is of an enumerated type, the corresponding Enum instance
251        (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated
252        type, a TypeError is raised.
253
254        """
255
256        if self.atom.kind != 'enum':
257            raise TypeError("array ``%s`` is not of an enumerated type"
258                            % self._v_pathname)
259
260        return self.atom.enum
261
262    def iterrows(self, start=None, stop=None, step=None):
263        """Iterate over the rows of the array.
264
265        This method returns an iterator yielding an object of the current
266        flavor for each selected row in the array.  The returned rows are taken
267        from the *main dimension*.
268
269        If a range is not supplied, *all the rows* in the array are iterated
270        upon - you can also use the :meth:`Array.__iter__` special method for
271        that purpose.  If you only want to iterate over a given *range of rows*
272        in the array, you may use the start, stop and step parameters.
273
274        Examples
275        --------
276
277        ::
278
279            result = [row for row in arrayInstance.iterrows(step=4)]
280
281        .. versionchanged:: 3.0
282           If the *start* parameter is provided and *stop* is None then the
283           array is iterated from *start* to the last line.
284           In PyTables < 3.0 only one element was returned.
285
286        """
287
288        try:
289            (self._start, self._stop, self._step) = self._process_range(
290                start, stop, step)
291        except IndexError:
292            # If problems with indexes, silently return the null tuple
293            return ()
294        self._init_loop()
295        return self
296
297    def __iter__(self):
298        """Iterate over the rows of the array.
299
300        This is equivalent to calling :meth:`Array.iterrows` with default
301        arguments, i.e. it iterates over *all the rows* in the array.
302
303        Examples
304        --------
305
306        ::
307
308            result = [row[2] for row in array]
309
310        Which is equivalent to::
311
312            result = [row[2] for row in array.iterrows()]
313
314        """
315
316        if not self._init:
317            # If the iterator is called directly, assign default variables
318            self._start = 0
319            self._stop = self.nrows
320            self._step = 1
321            # and initialize the loop
322            self._init_loop()
323        return self
324
325    def _init_loop(self):
326        """Initialization for the __iter__ iterator."""
327
328        self._nrowsread = self._start
329        self._startb = self._start
330        self._row = -1   # Sentinel
331        self._init = True  # Sentinel
332        self.nrow = SizeType(self._start - self._step)    # row number
333
334    def __next__(self):
335        """Get the next element of the array during an iteration.
336
337        The element is returned as an object of the current flavor.
338
339        """
340
341        # this could probably be sped up for long iterations by reusing the
342        # listarr buffer
343        if self._nrowsread >= self._stop:
344            self._init = False
345            self.listarr = None        # fixes issue #308
346            raise StopIteration        # end of iteration
347        else:
348            # Read a chunk of rows
349            if self._row + 1 >= self.nrowsinbuf or self._row < 0:
350                self._stopb = self._startb + self._step * self.nrowsinbuf
351                # Protection for reading more elements than needed
352                if self._stopb > self._stop:
353                    self._stopb = self._stop
354                listarr = self._read(self._startb, self._stopb, self._step)
355                # Swap the axes to easy the return of elements
356                if self.extdim > 0:
357                    listarr = listarr.swapaxes(self.extdim, 0)
358                self.listarr = internal_to_flavor(listarr, self.flavor)
359                self._row = -1
360                self._startb = self._stopb
361            self._row += 1
362            self.nrow += self._step
363            self._nrowsread += self._step
364            # Fixes bug #968132
365            # if self.listarr.shape:
366            if self.shape:
367                return self.listarr[self._row]
368            else:
369                return self.listarr    # Scalar case
370
371    def _interpret_indexing(self, keys):
372        """Internal routine used by __getitem__ and __setitem__"""
373
374        maxlen = len(self.shape)
375        shape = (maxlen,)
376        startl = numpy.empty(shape=shape, dtype=SizeType)
377        stopl = numpy.empty(shape=shape, dtype=SizeType)
378        stepl = numpy.empty(shape=shape, dtype=SizeType)
379        stop_None = numpy.zeros(shape=shape, dtype=SizeType)
380        if not isinstance(keys, tuple):
381            keys = (keys,)
382        nkeys = len(keys)
383        dim = 0
384        # Here is some problem when dealing with [...,...] params
385        # but this is a bit weird way to pass parameters anyway
386        for key in keys:
387            ellipsis = 0  # Sentinel
388            if isinstance(key, type(Ellipsis)):
389                ellipsis = 1
390                for diml in range(dim, len(self.shape) - (nkeys - dim) + 1):
391                    startl[dim] = 0
392                    stopl[dim] = self.shape[diml]
393                    stepl[dim] = 1
394                    dim += 1
395            elif dim >= maxlen:
396                raise IndexError("Too many indices for object '%s'" %
397                                 self._v_pathname)
398            elif is_idx(key):
399                key = operator.index(key)
400
401                # Protection for index out of range
402                if key >= self.shape[dim]:
403                    raise IndexError("Index out of range")
404                if key < 0:
405                    # To support negative values (Fixes bug #968149)
406                    key += self.shape[dim]
407                start, stop, step = self._process_range(
408                    key, key + 1, 1, dim=dim)
409                stop_None[dim] = 1
410            elif isinstance(key, slice):
411                start, stop, step = self._process_range(
412                    key.start, key.stop, key.step, dim=dim)
413            else:
414                raise TypeError("Non-valid index or slice: %s" % key)
415            if not ellipsis:
416                startl[dim] = start
417                stopl[dim] = stop
418                stepl[dim] = step
419                dim += 1
420
421        # Complete the other dimensions, if needed
422        if dim < len(self.shape):
423            for diml in range(dim, len(self.shape)):
424                startl[dim] = 0
425                stopl[dim] = self.shape[diml]
426                stepl[dim] = 1
427                dim += 1
428
429        # Compute the shape for the container properly. Fixes #1288792
430        shape = []
431        for dim in range(len(self.shape)):
432            # The negative division operates differently with python scalars
433            # and numpy scalars (which are similar to C conventions). See:
434            # http://www.python.org/doc/faq/programming.html#why-does-22-10-return-3
435            # and
436            # http://www.peterbe.com/Integer-division-in-programming-languages
437            # for more info on this issue.
438            # I've finally decided to rely on the len(xrange) function.
439            # F. Alted 2006-09-25
440            # Switch to `lrange` to allow long ranges (see #99).
441            # use xrange, since it supports large integers as of Python 2.6
442            # see github #181
443            new_dim = len(range(startl[dim], stopl[dim], stepl[dim]))
444            if not (new_dim == 1 and stop_None[dim]):
445                shape.append(new_dim)
446
447        return startl, stopl, stepl, shape
448
449    def _fancy_selection(self, args):
450        """Performs a NumPy-style fancy selection in `self`.
451
452        Implements advanced NumPy-style selection operations in
453        addition to the standard slice-and-int behavior.
454
455        Indexing arguments may be ints, slices or lists of indices.
456
457        Note: This is a backport from the h5py project.
458
459        """
460
461        # Internal functions
462
463        def validate_number(num, length):
464            """Validate a list member for the given axis length."""
465
466            try:
467                num = int(num)
468            except TypeError:
469                raise TypeError("Illegal index: %r" % num)
470            if num > length - 1:
471                raise IndexError("Index out of bounds: %d" % num)
472
473        def expand_ellipsis(args, rank):
474            """Expand ellipsis objects and fill in missing axes."""
475
476            n_el = sum(1 for arg in args if arg is Ellipsis)
477            if n_el > 1:
478                raise IndexError("Only one ellipsis may be used.")
479            elif n_el == 0 and len(args) != rank:
480                args = args + (Ellipsis,)
481
482            final_args = []
483            n_args = len(args)
484            for idx, arg in enumerate(args):
485                if arg is Ellipsis:
486                    final_args.extend((slice(None),) * (rank - n_args + 1))
487                else:
488                    final_args.append(arg)
489
490            if len(final_args) > rank:
491                raise IndexError("Too many indices.")
492
493            return final_args
494
495        def translate_slice(exp, length):
496            """Given a slice object, return a 3-tuple (start, count, step)
497
498            This is for for use with the hyperslab selection routines.
499
500            """
501
502            start, stop, step = exp.start, exp.stop, exp.step
503            if start is None:
504                start = 0
505            else:
506                start = int(start)
507            if stop is None:
508                stop = length
509            else:
510                stop = int(stop)
511            if step is None:
512                step = 1
513            else:
514                step = int(step)
515
516            if step < 1:
517                raise IndexError("Step must be >= 1 (got %d)" % step)
518            if stop == start:
519                raise IndexError("Zero-length selections are not allowed")
520            if stop < start:
521                raise IndexError("Reverse-order selections are not allowed")
522            if start < 0:
523                start = length + start
524            if stop < 0:
525                stop = length + stop
526
527            if not 0 <= start <= (length - 1):
528                raise IndexError(
529                    "Start index %s out of range (0-%d)" % (start, length - 1))
530            if not 1 <= stop <= length:
531                raise IndexError(
532                    "Stop index %s out of range (1-%d)" % (stop, length))
533
534            count = (stop - start) // step
535            if (stop - start) % step != 0:
536                count += 1
537
538            if start + count > length:
539                raise IndexError(
540                    "Selection out of bounds (%d; axis has %d)" %
541                    (start + count, length))
542
543            return start, count, step
544
545        # Main code for _fancy_selection
546        mshape = []
547        selection = []
548
549        if not isinstance(args, tuple):
550            args = (args,)
551
552        args = expand_ellipsis(args, len(self.shape))
553
554        list_seen = False
555        reorder = None
556        for idx, (exp, length) in enumerate(zip(args, self.shape)):
557            if isinstance(exp, slice):
558                start, count, step = translate_slice(exp, length)
559                selection.append((start, count, step, idx, "AND"))
560                mshape.append(count)
561            else:
562                try:
563                    exp = list(exp)
564                except TypeError:
565                    exp = [exp]  # Handle scalar index as a list of length 1
566                    mshape.append(0)  # Keep track of scalar index for NumPy
567                else:
568                    mshape.append(len(exp))
569                if len(exp) == 0:
570                    raise IndexError(
571                        "Empty selections are not allowed (axis %d)" % idx)
572                elif len(exp) > 1:
573                    if list_seen:
574                        raise IndexError("Only one selection list is allowed")
575                    else:
576                        list_seen = True
577                else:
578                    if (not isinstance(exp[0], (int, numpy.integer)) or
579                        (isinstance(exp[0], numpy.ndarray) and not
580                            numpy.issubdtype(exp[0].dtype, numpy.integer))):
581                        raise TypeError("Only integer coordinates allowed.")
582
583                nexp = numpy.asarray(exp, dtype="i8")
584                # Convert negative values
585                nexp = numpy.where(nexp < 0, length + nexp, nexp)
586                # Check whether the list is ordered or not
587                # (only one unordered list is allowed)
588                if not len(nexp) == len(numpy.unique(nexp)):
589                    raise IndexError(
590                        "Selection lists cannot have repeated values")
591                neworder = nexp.argsort()
592                if (neworder.shape != (len(exp),) or
593                        numpy.sum(
594                            numpy.abs(
595                                neworder - numpy.arange(len(exp)))) != 0):
596                    if reorder is not None:
597                        raise IndexError(
598                            "Only one selection list can be unordered")
599                    corrected_idx = sum(1 for x in mshape if x != 0) - 1
600                    reorder = (corrected_idx, neworder)
601                    nexp = nexp[neworder]
602                for select_idx in range(len(nexp) + 1):
603                    # This crazy piece of code performs a list selection
604                    # using HDF5 hyperslabs.
605                    # For each index, perform a "NOTB" selection on every
606                    # portion of *this axis* which falls *outside* the list
607                    # selection.  For this to work, the input array MUST be
608                    # monotonically increasing.
609                    if select_idx < len(nexp):
610                        validate_number(nexp[select_idx], length)
611                    if select_idx == 0:
612                        start = 0
613                        count = nexp[0]
614                    elif select_idx == len(nexp):
615                        start = nexp[-1] + 1
616                        count = length - start
617                    else:
618                        start = nexp[select_idx - 1] + 1
619                        count = nexp[select_idx] - start
620                    if count > 0:
621                        selection.append((start, count, 1, idx, "NOTB"))
622
623        mshape = tuple(x for x in mshape if x != 0)
624        return selection, reorder, mshape
625
626    def __getitem__(self, key):
627        """Get a row, a range of rows or a slice from the array.
628
629        The set of tokens allowed for the key is the same as that for extended
630        slicing in Python (including the Ellipsis or ... token).  The result is
631        an object of the current flavor; its shape depends on the kind of slice
632        used as key and the shape of the array itself.
633
634        Furthermore, NumPy-style fancy indexing, where a list of indices in a
635        certain axis is specified, is also supported.  Note that only one list
636        per selection is supported right now.  Finally, NumPy-style point and
637        boolean selections are supported as well.
638
639        Examples
640        --------
641
642        ::
643
644            array1 = array[4]                       # simple selection
645            array2 = array[4:1000:2]                # slice selection
646            array3 = array[1, ..., ::2, 1:4, 4:]    # general slice selection
647            array4 = array[1, [1,5,10], ..., -1]    # fancy selection
648            array5 = array[np.where(array[:] > 4)]  # point selection
649            array6 = array[array[:] > 4]            # boolean selection
650
651        """
652
653        self._g_check_open()
654
655        try:
656            # First, try with a regular selection
657            startl, stopl, stepl, shape = self._interpret_indexing(key)
658            arr = self._read_slice(startl, stopl, stepl, shape)
659        except TypeError:
660            # Then, try with a point-wise selection
661            try:
662                coords = self._point_selection(key)
663                arr = self._read_coords(coords)
664            except TypeError:
665                # Finally, try with a fancy selection
666                selection, reorder, shape = self._fancy_selection(key)
667                arr = self._read_selection(selection, reorder, shape)
668
669        if self.flavor == "numpy" or not self._v_convert:
670            return arr
671
672        return internal_to_flavor(arr, self.flavor)
673
674    def __setitem__(self, key, value):
675        """Set a row, a range of rows or a slice in the array.
676
677        It takes different actions depending on the type of the key parameter:
678        if it is an integer, the corresponding array row is set to value (the
679        value is broadcast when needed).  If key is a slice, the row slice
680        determined by it is set to value (as usual, if the slice to be updated
681        exceeds the actual shape of the array, only the values in the existing
682        range are updated).
683
684        If value is a multidimensional object, then its shape must be
685        compatible with the shape determined by key, otherwise, a ValueError
686        will be raised.
687
688        Furthermore, NumPy-style fancy indexing, where a list of indices in a
689        certain axis is specified, is also supported.  Note that only one list
690        per selection is supported right now.  Finally, NumPy-style point and
691        boolean selections are supported as well.
692
693        Examples
694        --------
695
696        ::
697
698            a1[0] = 333        # assign an integer to a Integer Array row
699            a2[0] = 'b'        # assign a string to a string Array row
700            a3[1:4] = 5        # broadcast 5 to slice 1:4
701            a4[1:4:2] = 'xXx'  # broadcast 'xXx' to slice 1:4:2
702
703            # General slice update (a5.shape = (4,3,2,8,5,10).
704            a5[1, ..., ::2, 1:4, 4:] = numpy.arange(1728, shape=(4,3,2,4,3,6))
705            a6[1, [1,5,10], ..., -1] = arr    # fancy selection
706            a7[np.where(a6[:] > 4)] = 4       # point selection + broadcast
707            a8[arr > 4] = arr2                # boolean selection
708
709        """
710
711        self._g_check_open()
712
713        # Create an array compliant with the specified slice
714        nparr = convert_to_np_atom2(value, self.atom)
715        if nparr.size == 0:
716            return
717
718        # truncate data if least_significant_digit filter is set
719        # TODO: add the least_significant_digit attribute to the array on disk
720        if (self.filters.least_significant_digit is not None and
721                not numpy.issubdtype(nparr.dtype, numpy.signedinteger)):
722            nparr = quantize(nparr, self.filters.least_significant_digit)
723
724        try:
725            startl, stopl, stepl, shape = self._interpret_indexing(key)
726            self._write_slice(startl, stopl, stepl, shape, nparr)
727        except TypeError:
728            # Then, try with a point-wise selection
729            try:
730                coords = self._point_selection(key)
731                self._write_coords(coords, nparr)
732            except TypeError:
733                selection, reorder, shape = self._fancy_selection(key)
734                self._write_selection(selection, reorder, shape, nparr)
735
736    def _check_shape(self, nparr, slice_shape):
737        """Test that nparr shape is consistent with underlying object.
738
739        If not, try creating a new nparr object, using broadcasting if
740        necessary.
741
742        """
743
744        if nparr.shape != (slice_shape + self.atom.dtype.shape):
745            # Create an array compliant with the specified shape
746            narr = numpy.empty(shape=slice_shape, dtype=self.atom.dtype)
747
748            # Assign the value to it. It will raise a ValueError exception
749            # if the objects cannot be broadcast to a single shape.
750            narr[...] = nparr
751            return narr
752        else:
753            return nparr
754
755    def _read_slice(self, startl, stopl, stepl, shape):
756        """Read a slice based on `startl`, `stopl` and `stepl`."""
757
758        nparr = numpy.empty(dtype=self.atom.dtype, shape=shape)
759        # Protection against reading empty arrays
760        if 0 not in shape:
761            # Arrays that have non-zero dimensionality
762            self._g_read_slice(startl, stopl, stepl, nparr)
763        # For zero-shaped arrays, return the scalar
764        if nparr.shape == ():
765            nparr = nparr[()]
766        return nparr
767
768    def _read_coords(self, coords):
769        """Read a set of points defined by `coords`."""
770
771        nparr = numpy.empty(dtype=self.atom.dtype, shape=len(coords))
772        if len(coords) > 0:
773            self._g_read_coords(coords, nparr)
774        # For zero-shaped arrays, return the scalar
775        if nparr.shape == ():
776            nparr = nparr[()]
777        return nparr
778
779    def _read_selection(self, selection, reorder, shape):
780        """Read a `selection`.
781
782        Reorder if necessary.
783
784        """
785
786        # Create the container for the slice
787        nparr = numpy.empty(dtype=self.atom.dtype, shape=shape)
788        # Arrays that have non-zero dimensionality
789        self._g_read_selection(selection, nparr)
790        # For zero-shaped arrays, return the scalar
791        if nparr.shape == ():
792            nparr = nparr[()]
793        elif reorder is not None:
794            # We need to reorder the array
795            idx, neworder = reorder
796            k = [slice(None)] * len(shape)
797            k[idx] = neworder.argsort()
798            # Apparently, a copy is not needed here, but doing it
799            # for symmetry with the `_write_selection()` method.
800            nparr = nparr[tuple(k)].copy()
801        return nparr
802
803    def _write_slice(self, startl, stopl, stepl, shape, nparr):
804        """Write `nparr` in a slice based on `startl`, `stopl` and `stepl`."""
805
806        nparr = self._check_shape(nparr, tuple(shape))
807        countl = ((stopl - startl - 1) // stepl) + 1
808        self._g_write_slice(startl, stepl, countl, nparr)
809
810    def _write_coords(self, coords, nparr):
811        """Write `nparr` values in points defined by `coords` coordinates."""
812
813        if len(coords) > 0:
814            nparr = self._check_shape(nparr, (len(coords),))
815            self._g_write_coords(coords, nparr)
816
817    def _write_selection(self, selection, reorder, shape, nparr):
818        """Write `nparr` in `selection`.
819
820        Reorder if necessary.
821
822        """
823
824        nparr = self._check_shape(nparr, tuple(shape))
825        # Check whether we should reorder the array
826        if reorder is not None:
827            idx, neworder = reorder
828            k = [slice(None)] * len(shape)
829            k[idx] = neworder
830            # For a reason a don't understand well, we need a copy of
831            # the reordered array
832            nparr = nparr[tuple(k)].copy()
833        self._g_write_selection(selection, nparr)
834
835    def _read(self, start, stop, step, out=None):
836        """Read the array from disk without slice or flavor processing."""
837
838        nrowstoread = len(range(start, stop, step))
839        shape = list(self.shape)
840        if shape:
841            shape[self.maindim] = nrowstoread
842        if out is None:
843            arr = numpy.empty(dtype=self.atom.dtype, shape=shape)
844        else:
845            bytes_required = self.rowsize * nrowstoread
846            # if buffer is too small, it will segfault
847            if bytes_required != out.nbytes:
848                raise ValueError(('output array size invalid, got {0} bytes, '
849                                  'need {1} bytes').format(out.nbytes,
850                                                           bytes_required))
851            if not out.flags['C_CONTIGUOUS']:
852                raise ValueError('output array not C contiguous')
853            arr = out
854        # Protection against reading empty arrays
855        if 0 not in shape:
856            # Arrays that have non-zero dimensionality
857            self._read_array(start, stop, step, arr)
858        # data is always read in the system byteorder
859        # if the out array's byteorder is different, do a byteswap
860        if (out is not None and
861                byteorders[arr.dtype.byteorder] != sys.byteorder):
862            arr.byteswap(True)
863        return arr
864
865    def read(self, start=None, stop=None, step=None, out=None):
866        """Get data in the array as an object of the current flavor.
867
868        The start, stop and step parameters can be used to select only a
869        *range of rows* in the array.  Their meanings are the same as in
870        the built-in range() Python function, except that negative values
871        of step are not allowed yet. Moreover, if only start is specified,
872        then stop will be set to start + 1. If you do not specify neither
873        start nor stop, then *all the rows* in the array are selected.
874
875        The out parameter may be used to specify a NumPy array to receive
876        the output data.  Note that the array must have the same size as
877        the data selected with the other parameters.  Note that the array's
878        datatype is not checked and no type casting is performed, so if it
879        does not match the datatype on disk, the output will not be correct.
880        Also, this parameter is only valid when the array's flavor is set
881        to 'numpy'.  Otherwise, a TypeError will be raised.
882
883        When data is read from disk in NumPy format, the output will be
884        in the current system's byteorder, regardless of how it is stored
885        on disk.
886        The exception is when an output buffer is supplied, in which case
887        the output will be in the byteorder of that output buffer.
888
889        .. versionchanged:: 3.0
890           Added the *out* parameter.
891
892        """
893
894        self._g_check_open()
895        if out is not None and self.flavor != 'numpy':
896            msg = ("Optional 'out' argument may only be supplied if array "
897                   "flavor is 'numpy', currently is {0}").format(self.flavor)
898            raise TypeError(msg)
899        (start, stop, step) = self._process_range_read(start, stop, step)
900        arr = self._read(start, stop, step, out)
901        return internal_to_flavor(arr, self.flavor)
902
903    def _g_copy_with_stats(self, group, name, start, stop, step,
904                           title, filters, chunkshape, _log, **kwargs):
905        """Private part of Leaf.copy() for each kind of leaf."""
906
907        # Compute the correct indices.
908        (start, stop, step) = self._process_range_read(start, stop, step)
909        # Get the slice of the array
910        # (non-buffered version)
911        if self.shape:
912            arr = self[start:stop:step]
913        else:
914            arr = self[()]
915        # Build the new Array object.  Use the _atom reserved keyword
916        # just in case the array is being copied from a native HDF5
917        # with atomic types different from scalars.
918        # For details, see #275 of trac.
919        object_ = Array(group, name, arr, title=title, _log=_log,
920                        _atom=self.atom)
921        nbytes = numpy.prod(self.shape, dtype=SizeType) * self.atom.size
922
923        return (object_, nbytes)
924
925    def __repr__(self):
926        """This provides more metainfo in addition to standard __str__"""
927
928        return """%s
929  atom := %r
930  maindim := %r
931  flavor := %r
932  byteorder := %r
933  chunkshape := %r""" % (self, self.atom, self.maindim,
934                         self.flavor, self.byteorder,
935                         self.chunkshape)
936
937
938class ImageArray(Array):
939    """Array containing an image.
940
941    This class has no additional behaviour or functionality compared to
942    that of an ordinary array.  It simply enables the user to open an
943    ``IMAGE`` HDF5 node as a normal `Array` node in PyTables.
944
945    """
946
947    # Class identifier.
948    _c_classid = 'IMAGE'
949