1# -*- coding: utf-8 -*-
2
3########################################################################
4#
5# License: BSD
6# Created: September 4, 2002
7# Author: Francesc Alted - faltet@pytables.com
8#
9# $Id$
10#
11########################################################################
12
13"""Here is defined the Table class."""
14
15import math
16import operator
17import os.path
18import sys
19import warnings
20
21from functools import reduce as _reduce
22from time import time
23
24import numpy
25import numexpr
26
27from . import tableextension
28from .lrucacheextension import ObjectCache, NumCache
29from .atom import Atom
30from .conditions import compile_condition
31from numexpr.necompiler import getType as numexpr_getType, double
32from numexpr.expressions import functions as numexpr_functions
33from .flavor import flavor_of, array_as_internal, internal_to_flavor
34from .utils import is_idx, lazyattr, SizeType, NailedDict as CacheDict
35from .leaf import Leaf
36from .description import (IsDescription, Description, Col, descr_from_dtype)
37from .exceptions import (
38    NodeError, HDF5ExtError, PerformanceWarning, OldIndexWarning,
39    NoSuchNodeError)
40from .utilsextension import get_nested_field
41
42from .path import join_path, split_path
43from .index import (
44    OldIndex, default_index_filters, default_auto_index, Index, IndexesDescG,
45    IndexesTableG)
46
47
48profile = False
49# profile = True  # Uncomment for profiling
50if profile:
51    from .utils import show_stats
52
53
54# 2.2: Added support for complex types. Introduced in version 0.9.
55# 2.2.1: Added suport for time types.
56# 2.3: Changed the indexes naming schema.
57# 2.4: Changed indexes naming schema (again).
58# 2.5: Added the FIELD_%d_FILL attributes.
59# 2.6: Added the FLAVOR attribute (optional).
60# 2.7: Numeric and numarray flavors are gone.
61obversion = "2.7"  # The Table VERSION number
62
63
64try:
65    # int_, long_ are only available in numexpr >= 2.1
66    from numexpr.necompiler import int_, long_
67except ImportError:
68    int_ = int
69    long_ = int
70
71# Maps NumPy types to the types used by Numexpr.
72_nxtype_from_nptype = {
73    numpy.bool_: bool,
74    numpy.int8: int_,
75    numpy.int16: int_,
76    numpy.int32: int_,
77    numpy.int64: long_,
78    numpy.uint8: int_,
79    numpy.uint16: int_,
80    numpy.uint32: long_,
81    numpy.uint64: long_,
82    numpy.float32: float,
83    numpy.float64: double,
84    numpy.complex64: complex,
85    numpy.complex128: complex,
86    numpy.bytes_: bytes,
87}
88
89_nxtype_from_nptype[numpy.str_] = str
90
91if hasattr(numpy, 'float16'):
92    _nxtype_from_nptype[numpy.float16] = float    # XXX: check
93if hasattr(numpy, 'float96'):
94    _nxtype_from_nptype[numpy.float96] = double   # XXX: check
95if hasattr(numpy, 'float128'):
96    _nxtype_from_nptype[numpy.float128] = double  # XXX: check
97if hasattr(numpy, 'complec192'):
98    _nxtype_from_nptype[numpy.complex192] = complex  # XXX: check
99if hasattr(numpy, 'complex256'):
100    _nxtype_from_nptype[numpy.complex256] = complex  # XXX: check
101
102
103# The NumPy scalar type corresponding to `SizeType`.
104_npsizetype = numpy.array(SizeType(0)).dtype.type
105
106
107def _index_name_of(node):
108    return '_i_%s' % node._v_name
109
110
111def _index_pathname_of(node):
112    nodeParentPath = split_path(node._v_pathname)[0]
113    return join_path(nodeParentPath, _index_name_of(node))
114
115
116def _index_pathname_of_column(table, colpathname):
117    return join_path(_index_pathname_of(table), colpathname)
118
119
120# The next are versions that work with just paths (i.e. we don't need
121# a node instance for using them, which can be critical in certain
122# situations)
123
124
125def _index_name_of_(nodeName):
126    return '_i_%s' % nodeName
127
128
129def _index_pathname_of_(nodePath):
130    nodeParentPath, nodeName = split_path(nodePath)
131    return join_path(nodeParentPath, _index_name_of_(nodeName))
132
133
134def _index_pathname_of_column_(tablePath, colpathname):
135    return join_path(_index_pathname_of_(tablePath), colpathname)
136
137
138def restorecache(self):
139    # Define a cache for sparse table reads
140    params = self._v_file.params
141    chunksize = self._v_chunkshape[0]
142    nslots = params['TABLE_MAX_SIZE'] / (chunksize * self._v_dtype.itemsize)
143    self._chunkcache = NumCache((nslots, chunksize), self._v_dtype,
144                                'table chunk cache')
145    self._seqcache = ObjectCache(params['ITERSEQ_MAX_SLOTS'],
146                                 params['ITERSEQ_MAX_SIZE'],
147                                 'Iter sequence cache')
148    self._dirtycache = False
149
150
151def _table__where_indexed(self, compiled, condition, condvars,
152                          start, stop, step):
153    if profile:
154        tref = time()
155    if profile:
156        show_stats("Entering table_whereIndexed", tref)
157    self._use_index = True
158    # Clean the table caches for indexed queries if needed
159    if self._dirtycache:
160        restorecache(self)
161
162    # Get the values in expression that are not columns
163    values = []
164    for key, value in condvars.items():
165        if isinstance(value, numpy.ndarray):
166            values.append((key, value.item()))
167    # Build a key for the sequence cache
168    seqkey = (condition, tuple(values), (start, stop, step))
169    # Do a lookup in sequential cache for this query
170    nslot = self._seqcache.getslot(seqkey)
171    if nslot >= 0:
172        # Get the row sequence from the cache
173        seq = self._seqcache.getitem(nslot)
174        if len(seq) == 0:
175            return iter([])
176        # seq is a list.
177        seq = numpy.array(seq, dtype='int64')
178        # Correct the ranges in cached sequence
179        if (start, stop, step) != (0, self.nrows, 1):
180            seq = seq[(seq >= start) & (
181                seq < stop) & ((seq - start) % step == 0)]
182        return self.itersequence(seq)
183    else:
184        # No luck.  self._seqcache will be populated
185        # in the iterator if possible. (Row._finish_riterator)
186        self._seqcache_key = seqkey
187
188    # Compute the chunkmap for every index in indexed expression
189    idxexprs = compiled.index_expressions
190    strexpr = compiled.string_expression
191    cmvars = {}
192    tcoords = 0
193    for i, idxexpr in enumerate(idxexprs):
194        var, ops, lims = idxexpr
195        col = condvars[var]
196        index = col.index
197        assert index is not None, "the chosen column is not indexed"
198        assert not index.dirty, "the chosen column has a dirty index"
199
200        # Get the number of rows that the indexed condition yields.
201        range_ = index.get_lookup_range(ops, lims)
202        ncoords = index.search(range_)
203        tcoords += ncoords
204        if index.reduction == 1 and ncoords == 0:
205            # No values from index condition, thus the chunkmap should be empty
206            nrowsinchunk = self.chunkshape[0]
207            nchunks = int(math.ceil(float(self.nrows) / nrowsinchunk))
208            chunkmap = numpy.zeros(shape=nchunks, dtype="bool")
209        else:
210            # Get the chunkmap from the index
211            chunkmap = index.get_chunkmap()
212        # Assign the chunkmap to the cmvars dictionary
213        cmvars["e%d" % i] = chunkmap
214
215    if index.reduction == 1 and tcoords == 0:
216        # No candidates found in any indexed expression component, so leave now
217        self._seqcache.setitem(seqkey, [], 1)
218        return iter([])
219
220    # Compute the final chunkmap
221    chunkmap = numexpr.evaluate(strexpr, cmvars)
222    if not chunkmap.any():
223        # The chunkmap is all False, so the result is empty
224        self._seqcache.setitem(seqkey, [], 1)
225        return iter([])
226
227    if profile:
228        show_stats("Exiting table_whereIndexed", tref)
229    return chunkmap
230
231
232def create_indexes_table(table):
233    itgroup = IndexesTableG(
234        table._v_parent, _index_name_of(table),
235        "Indexes container for table " + table._v_pathname, new=True)
236    return itgroup
237
238
239def create_indexes_descr(igroup, dname, iname, filters):
240    idgroup = IndexesDescG(
241        igroup, iname,
242        "Indexes container for sub-description " + dname,
243        filters=filters, new=True)
244    return idgroup
245
246
247def _column__create_index(self, optlevel, kind, filters, tmp_dir,
248                          blocksizes, verbose):
249    name = self.name
250    table = self.table
251    dtype = self.dtype
252    descr = self.descr
253    index = self.index
254    get_node = table._v_file._get_node
255
256    # Warn if the index already exists
257    if index:
258        raise ValueError("%s for column '%s' already exists. If you want to "
259                         "re-create it, please, try with reindex() method "
260                         "better" % (str(index), str(self.pathname)))
261
262    # Check that the datatype is indexable.
263    if dtype.str[1:] == 'u8':
264        raise NotImplementedError(
265            "indexing 64-bit unsigned integer columns "
266            "is not supported yet, sorry")
267    if dtype.kind == 'c':
268        raise TypeError("complex columns can not be indexed")
269    if dtype.shape != ():
270        raise TypeError("multidimensional columns can not be indexed")
271
272    # Get the indexes group for table, and if not exists, create it
273    try:
274        itgroup = get_node(_index_pathname_of(table))
275    except NoSuchNodeError:
276        itgroup = create_indexes_table(table)
277
278    # Create the necessary intermediate groups for descriptors
279    idgroup = itgroup
280    dname = ""
281    pathname = descr._v_pathname
282    if pathname != '':
283        inames = pathname.split('/')
284        for iname in inames:
285            if dname == '':
286                dname = iname
287            else:
288                dname += '/' + iname
289            try:
290                idgroup = get_node('%s/%s' % (itgroup._v_pathname, dname))
291            except NoSuchNodeError:
292                idgroup = create_indexes_descr(idgroup, dname, iname, filters)
293
294    # Create the atom
295    assert dtype.shape == ()
296    atom = Atom.from_dtype(numpy.dtype((dtype, (0,))))
297
298    # Protection on tables larger than the expected rows (perhaps the
299    # user forgot to pass this parameter to the Table constructor?)
300    expectedrows = table._v_expectedrows
301    if table.nrows > expectedrows:
302        expectedrows = table.nrows
303
304    # Create the index itself
305    index = Index(
306        idgroup, name, atom=atom,
307        title="Index for %s column" % name,
308        kind=kind,
309        optlevel=optlevel,
310        filters=filters,
311        tmp_dir=tmp_dir,
312        expectedrows=expectedrows,
313        byteorder=table.byteorder,
314        blocksizes=blocksizes)
315
316    table._set_column_indexing(self.pathname, True)
317
318    # Feed the index with values
319
320    # Add rows to the index if necessary
321    if table.nrows > 0:
322        indexedrows = table._add_rows_to_index(
323            self.pathname, 0, table.nrows, lastrow=True, update=False)
324    else:
325        indexedrows = 0
326    index.dirty = False
327    table._indexedrows = indexedrows
328    table._unsaved_indexedrows = table.nrows - indexedrows
329
330    # Optimize the index that has been already filled-up
331    index.optimize(verbose=verbose)
332
333    # We cannot do a flush here because when reindexing during a
334    # flush, the indexes are created anew, and that creates a nested
335    # call to flush().
336    # table.flush()
337
338    return indexedrows
339
340
341class _ColIndexes(dict):
342    """Provides a nice representation of column indexes."""
343
344    def __repr__(self):
345        """Gives a detailed Description column representation."""
346
347        rep = ['  \"%s\": %s' % (k, self[k]) for k in self.keys()]
348        return '{\n  %s}' % (',\n  '.join(rep))
349
350
351class Table(tableextension.Table, Leaf):
352    """This class represents heterogeneous datasets in an HDF5 file.
353
354    Tables are leaves (see the Leaf class in :ref:`LeafClassDescr`) whose data
355    consists of a unidimensional sequence of *rows*, where each row contains
356    one or more *fields*.  Fields have an associated unique *name* and
357    *position*, with the first field having position 0.  All rows have the same
358    fields, which are arranged in *columns*.
359
360    Fields can have any type supported by the Col class (see
361    :ref:`ColClassDescr`) and its descendants, which support multidimensional
362    data.  Moreover, a field can be *nested* (to an arbitrary depth), meaning
363    that it includes further fields inside.  A field named x inside a nested
364    field a in a table can be accessed as the field a/x (its *path name*) from
365    the table.
366
367    The structure of a table is declared by its description, which is made
368    available in the Table.description attribute (see :class:`Table`).
369
370    This class provides new methods to read, write and search table data
371    efficiently.  It also provides special Python methods to allow accessing
372    the table as a normal sequence or array (with extended slicing supported).
373
374    PyTables supports *in-kernel* searches working simultaneously on several
375    columns using complex conditions.  These are faster than selections using
376    Python expressions.  See the :meth:`Table.where` method for more
377    information on in-kernel searches.
378
379    Non-nested columns can be *indexed*.  Searching an indexed column can be
380    several times faster than searching a non-nested one.  Search methods
381    automatically take advantage of indexing where available.
382
383    When iterating a table, an object from the Row (see :ref:`RowClassDescr`)
384    class is used.  This object allows to read and write data one row at a
385    time, as well as to perform queries which are not supported by in-kernel
386    syntax (at a much lower speed, of course).
387
388    Objects of this class support access to individual columns via *natural
389    naming* through the :attr:`Table.cols` accessor.  Nested columns are
390    mapped to Cols instances, and non-nested ones to Column instances.
391    See the Column class in :ref:`ColumnClassDescr` for examples of this
392    feature.
393
394    Parameters
395    ----------
396    parentnode
397        The parent :class:`Group` object.
398
399        .. versionchanged:: 3.0
400           Renamed from *parentNode* to *parentnode*.
401
402    name : str
403        The name of this node in its parent group.
404    description
405        An IsDescription subclass or a dictionary where the keys are the field
406        names, and the values the type definitions. In addition, a pure NumPy
407        dtype is accepted.  If None, the table metadata is read from disk,
408        else, it's taken from previous parameters.
409    title
410        Sets a TITLE attribute on the HDF5 table entity.
411    filters : Filters
412        An instance of the Filters class that provides information about the
413        desired I/O filters to be applied during the life of this object.
414    expectedrows
415        A user estimate about the number of rows that will be on table. If not
416        provided, the default value is ``EXPECTED_ROWS_TABLE`` (see
417        ``tables/parameters.py``).  If you plan to save bigger tables, try
418        providing a guess; this will optimize the HDF5 B-Tree creation and
419        management process time and memory used.
420    chunkshape
421        The shape of the data chunk to be read or written as a single HDF5 I/O
422        operation. The filters are applied to those chunks of data. Its rank
423        for tables has to be 1.  If ``None``, a sensible value is calculated
424        based on the `expectedrows` parameter (which is recommended).
425    byteorder
426        The byteorder of the data *on-disk*, specified as 'little' or 'big'. If
427        this is not specified, the byteorder is that of the platform, unless
428        you passed a recarray as the `description`, in which case the recarray
429        byteorder will be chosen.
430    track_times
431        Whether time data associated with the leaf are recorded (object
432        access time, raw data modification time, metadata change time, object
433        birth time); default True.  Semantics of these times depend on their
434        implementation in the HDF5 library: refer to documentation of the
435        H5O_info_t data structure.  As of HDF5 1.8.15, only ctime (metadata
436        change time) is implemented.
437
438        .. versionadded:: 3.4.3
439
440    Notes
441    -----
442    The instance variables below are provided in addition to those in
443    Leaf (see :ref:`LeafClassDescr`).  Please note that there are several
444    col* dictionaries to ease retrieving information about a column
445    directly by its path name, avoiding the need to walk through
446    Table.description or Table.cols.
447
448
449    .. rubric:: Table attributes
450
451    .. attribute:: coldescrs
452
453        Maps the name of a column to its Col description (see
454        :ref:`ColClassDescr`).
455
456    .. attribute:: coldflts
457
458        Maps the name of a column to its default value.
459
460    .. attribute:: coldtypes
461
462        Maps the name of a column to its NumPy data type.
463
464    .. attribute:: colindexed
465
466        Is the column which name is used as a key indexed?
467
468    .. attribute:: colinstances
469
470        Maps the name of a column to its Column (see
471        :ref:`ColumnClassDescr`) or Cols (see :ref:`ColsClassDescr`)
472        instance.
473
474    .. attribute:: colnames
475
476        A list containing the names of *top-level* columns in the table.
477
478    .. attribute:: colpathnames
479
480        A list containing the pathnames of *bottom-level* columns in
481        the table.
482
483        These are the leaf columns obtained when walking the table
484        description left-to-right, bottom-first. Columns inside a
485        nested column have slashes (/) separating name components in
486        their pathname.
487
488    .. attribute:: cols
489
490        A Cols instance that provides *natural naming* access to
491        non-nested (Column, see :ref:`ColumnClassDescr`) and nested
492        (Cols, see :ref:`ColsClassDescr`) columns.
493
494    .. attribute:: coltypes
495
496        Maps the name of a column to its PyTables data type.
497
498    .. attribute:: description
499
500        A Description instance (see :ref:`DescriptionClassDescr`)
501        reflecting the structure of the table.
502
503    .. attribute:: extdim
504
505        The index of the enlargeable dimension (always 0 for tables).
506
507    .. attribute:: indexed
508
509        Does this table have any indexed columns?
510
511    .. attribute:: nrows
512
513        The current number of rows in the table.
514
515    """
516
517    # Class identifier.
518    _c_classid = 'TABLE'
519
520    # Properties
521    # ~~~~~~~~~~
522    @lazyattr
523    def row(self):
524        """The associated Row instance (see :ref:`RowClassDescr`)."""
525
526        return tableextension.Row(self)
527
528    @lazyattr
529    def dtype(self):
530        """The NumPy ``dtype`` that most closely matches this table."""
531
532        return self.description._v_dtype
533
534    # Read-only shorthands
535    # ````````````````````
536
537    @property
538    def shape(self):
539        """The shape of this table."""
540        return (self.nrows,)
541
542    @property
543    def rowsize(self):
544        """The size in bytes of each row in the table."""
545        return self.description._v_dtype.itemsize
546
547    @property
548    def size_in_memory(self):
549        """The size of this table's data in bytes when it is fully loaded into
550        memory.  This may be used in combination with size_on_disk to calculate
551        the compression ratio of the data."""
552        return self.nrows * self.rowsize
553
554    # Lazy attributes
555    # ```````````````
556    @lazyattr
557    def _v_iobuf(self):
558        """A buffer for doing I/O."""
559
560        return self._get_container(self.nrowsinbuf)
561
562    @lazyattr
563    def _v_wdflts(self):
564        """The defaults for writing in recarray format."""
565
566        # First, do a check to see whether we need to set default values
567        # different from 0 or not.
568        for coldflt in self.coldflts.values():
569            if isinstance(coldflt, numpy.ndarray) or coldflt:
570                break
571        else:
572            # No default different from 0 found.  Returning None.
573            return None
574        wdflts = self._get_container(1)
575        for colname, coldflt in self.coldflts.items():
576            ra = get_nested_field(wdflts, colname)
577            ra[:] = coldflt
578        return wdflts
579
580    @lazyattr
581    def _colunaligned(self):
582        """The pathnames of unaligned, *unidimensional* columns."""
583        colunaligned, rarr = [], self._get_container(0)
584        for colpathname in self.colpathnames:
585            carr = get_nested_field(rarr, colpathname)
586            if not carr.flags.aligned and carr.ndim == 1:
587                colunaligned.append(colpathname)
588        return frozenset(colunaligned)
589
590    # Index-related properties
591    # ````````````````````````
592
593    # **************** WARNING! ***********************
594    # This function can be called during the destruction time of a table
595    # so measures have been taken so that it doesn't have to revive
596    # another node (which can fool the LRU cache). The solution devised
597    # has been to add a cache for autoindex (Table._autoindex), populate
598    # it in creation time of the cache (which is a safe period) and then
599    # update the cache whenever it changes.
600    # This solves the error when running test_indexes.py ManyNodesTestCase.
601    # F. Alted 2007-04-20
602    # **************************************************
603
604    @property
605    def autoindex(self):
606        """Automatically keep column indexes up to date?
607
608        Setting this value states whether existing indexes should be
609        automatically updated after an append operation or recomputed
610        after an index-invalidating operation (i.e. removal and
611        modification of rows).  The default is true.
612
613        This value gets into effect whenever a column is altered.  If you
614        don't have automatic indexing activated and you want to do an an
615        immediate update use `Table.flush_rows_to_index()`; for an immediate
616        reindexing of invalidated indexes, use `Table.reindex_dirty()`.
617
618        This value is persistent.
619
620        .. versionchanged:: 3.0
621           The *autoIndex* property has been renamed into *autoindex*.
622        """
623
624        if self._autoindex is None:
625            try:
626                indexgroup = self._v_file._get_node(_index_pathname_of(self))
627            except NoSuchNodeError:
628                self._autoindex = default_auto_index  # update cache
629                return self._autoindex
630            else:
631                self._autoindex = indexgroup.auto   # update cache
632                return self._autoindex
633        else:
634            # The value is in cache, return it
635            return self._autoindex
636
637    @autoindex.setter
638    def autoindex(self, auto):
639        auto = bool(auto)
640        try:
641            indexgroup = self._v_file._get_node(_index_pathname_of(self))
642        except NoSuchNodeError:
643            indexgroup = create_indexes_table(self)
644        indexgroup.auto = auto
645        # Update the cache in table instance as well
646        self._autoindex = auto
647
648    @property
649    def indexedcolpathnames(self):
650        """List of pathnames of indexed columns in the table."""
651        return [_colpname for _colpname in self.colpathnames if self.colindexed[_colpname]]
652
653    @property
654    def colindexes(self):
655        """A dictionary with the indexes of the indexed columns."""
656        return _ColIndexes(
657            ((_colpname, self.cols._f_col(_colpname).index)
658                for _colpname in self.colpathnames
659                if self.colindexed[_colpname]))
660
661    @property
662    def _dirtyindexes(self):
663        """Whether some index in table is dirty."""
664        return self._condition_cache._nailcount > 0
665
666    # Other methods
667    # ~~~~~~~~~~~~~
668    def __init__(self, parentnode, name,
669                 description=None, title="", filters=None,
670                 expectedrows=None, chunkshape=None,
671                 byteorder=None, _log=True, track_times=True):
672
673        self._v_new = new = description is not None
674        """Is this the first time the node has been created?"""
675        self._v_new_title = title
676        """New title for this node."""
677        self._v_new_filters = filters
678        """New filter properties for this node."""
679        self.extdim = 0   # Tables only have one dimension currently
680        """The index of the enlargeable dimension (always 0 for tables)."""
681        self._v_recarray = None
682        """A structured array to be stored in the table."""
683        self._rabyteorder = None
684        """The computed byteorder of the self._v_recarray."""
685        if expectedrows is None:
686            expectedrows = parentnode._v_file.params['EXPECTED_ROWS_TABLE']
687        self._v_expectedrows = expectedrows
688        """The expected number of rows to be stored in the table."""
689        self.nrows = SizeType(0)
690        """The current number of rows in the table."""
691        self.description = None
692        """A Description instance (see :ref:`DescriptionClassDescr`)
693        reflecting the structure of the table."""
694        self._time64colnames = []
695        """The names of ``Time64`` columns."""
696        self._strcolnames = []
697        """The names of ``String`` columns."""
698        self._colenums = {}
699        """Maps the name of an enumerated column to its ``Enum`` instance."""
700        self._v_chunkshape = None
701        """Private storage for the `chunkshape` property of the leaf."""
702
703        self.indexed = False
704        """Does this table have any indexed columns?"""
705        self._indexedrows = 0
706        """Number of rows indexed in disk."""
707        self._unsaved_indexedrows = 0
708        """Number of rows indexed in memory but still not in disk."""
709        self._listoldindexes = []
710        """The list of columns with old indexes."""
711        self._autoindex = None
712        """Private variable that caches the value for autoindex."""
713
714        self.colnames = []
715        """A list containing the names of *top-level* columns in the table."""
716        self.colpathnames = []
717        """A list containing the pathnames of *bottom-level* columns in the
718        table.
719
720        These are the leaf columns obtained when walking the
721        table description left-to-right, bottom-first.  Columns inside a
722        nested column have slashes (/) separating name components in
723        their pathname.
724        """
725        self.colinstances = {}
726        """Maps the name of a column to its Column (see
727        :ref:`ColumnClassDescr`) or Cols (see :ref:`ColsClassDescr`)
728        instance."""
729        self.coldescrs = {}
730        """Maps the name of a column to its Col description (see
731        :ref:`ColClassDescr`)."""
732        self.coltypes = {}
733        """Maps the name of a column to its PyTables data type."""
734        self.coldtypes = {}
735        """Maps the name of a column to its NumPy data type."""
736        self.coldflts = {}
737        """Maps the name of a column to its default value."""
738        self.colindexed = {}
739        """Is the column which name is used as a key indexed?"""
740
741        self._use_index = False
742        """Whether an index can be used or not in a search.  Boolean."""
743        self._where_condition = None
744        """Condition function and argument list for selection of values."""
745        self._seqcache_key = None
746        """The key under which to save a query's results (list of row indexes)
747        or None to not save."""
748        max_slots = parentnode._v_file.params['COND_CACHE_SLOTS']
749        self._condition_cache = CacheDict(max_slots)
750        """Cache of already compiled conditions."""
751        self._exprvars_cache = {}
752        """Cache of variables participating in numexpr expressions."""
753        self._enabled_indexing_in_queries = True
754        """Is indexing enabled in queries?  *Use only for testing.*"""
755        self._empty_array_cache = {}
756        """Cache of empty arrays."""
757
758        self._v_dtype = None
759        """The NumPy datatype fopr this table."""
760        self.cols = None
761        """
762        A Cols instance that provides *natural naming* access to non-nested
763        (Column, see :ref:`ColumnClassDescr`) and nested (Cols, see
764        :ref:`ColsClassDescr`) columns.
765        """
766        self._dirtycache = True
767        """Whether the data caches are dirty or not. Initially set to yes."""
768        self._descflavor = None
769        """Temporarily keeps the flavor of a description with data."""
770
771        # Initialize this object in case is a new Table
772
773        # Try purely descriptive description objects.
774        if new and isinstance(description, dict):
775            # Dictionary case
776            self.description = Description(description, ptparams=parentnode._v_file.params)
777        elif new and (type(description) == type(IsDescription)
778                      and issubclass(description, IsDescription)):
779            # IsDescription subclass case
780            descr = description()
781            self.description = Description(descr.columns, ptparams=parentnode._v_file.params)
782        elif new and isinstance(description, Description):
783            # It is a Description instance already
784            self.description = description
785
786        # No description yet?
787        if new and self.description is None:
788            # Try NumPy dtype instances
789            if isinstance(description, numpy.dtype):
790                self.description, self._rabyteorder = \
791                    descr_from_dtype(description, ptparams=parentnode._v_file.params)
792
793        # No description yet?
794        if new and self.description is None:
795            # Try structured array description objects.
796            try:
797                self._descflavor = flavor = flavor_of(description)
798            except TypeError:  # probably not an array
799                pass
800            else:
801                if flavor == 'python':
802                    nparray = numpy.rec.array(description)
803                else:
804                    nparray = array_as_internal(description, flavor)
805                self.nrows = nrows = SizeType(nparray.size)
806                # If `self._v_recarray` is set, it will be used as the
807                # initial buffer.
808                if nrows > 0:
809                    self._v_recarray = nparray
810                self.description, self._rabyteorder = \
811                    descr_from_dtype(nparray.dtype, ptparams=parentnode._v_file.params)
812
813        # No description yet?
814        if new and self.description is None:
815            raise TypeError(
816                "the ``description`` argument is not of a supported type: "
817                "``IsDescription`` subclass, ``Description`` instance, "
818                "dictionary, or structured array")
819
820        # Check the chunkshape parameter
821        if new and chunkshape is not None:
822            if isinstance(chunkshape, (int, numpy.integer)):
823                chunkshape = (chunkshape,)
824            try:
825                chunkshape = tuple(chunkshape)
826            except TypeError:
827                raise TypeError(
828                    "`chunkshape` parameter must be an integer or sequence "
829                    "and you passed a %s" % type(chunkshape))
830            if len(chunkshape) != 1:
831                raise ValueError("`chunkshape` rank (length) must be 1: %r"
832                                 % (chunkshape,))
833            self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)
834
835        super(Table, self).__init__(parentnode, name, new, filters,
836                                    byteorder, _log, track_times)
837
838    def _g_post_init_hook(self):
839        # We are putting here the index-related issues
840        # as well as filling general info for table
841        # This is needed because we need first the index objects created
842
843        # First, get back the flavor of input data (if any) for
844        # `Leaf._g_post_init_hook()`.
845        self._flavor, self._descflavor = self._descflavor, None
846        super(Table, self)._g_post_init_hook()
847
848        # Create a cols accessor.
849        self.cols = Cols(self, self.description)
850
851        # Place the `Cols` and `Column` objects into `self.colinstances`.
852        colinstances, cols = self.colinstances, self.cols
853        for colpathname in self.description._v_pathnames:
854            colinstances[colpathname] = cols._g_col(colpathname)
855
856        if self._v_new:
857            # Columns are never indexed on creation.
858            self.colindexed = dict((cpn, False) for cpn in self.colpathnames)
859            return
860
861        # The following code is only for opened tables.
862
863        # Do the indexes group exist?
864        indexesgrouppath = _index_pathname_of(self)
865        igroup = indexesgrouppath in self._v_file
866        oldindexes = False
867        for colobj in self.description._f_walk(type="Col"):
868            colname = colobj._v_pathname
869            # Is this column indexed?
870            if igroup:
871                indexname = _index_pathname_of_column(self, colname)
872                indexed = indexname in self._v_file
873                self.colindexed[colname] = indexed
874                if indexed:
875                    column = self.cols._g_col(colname)
876                    indexobj = column.index
877                    if isinstance(indexobj, OldIndex):
878                        indexed = False  # Not a vaild index
879                        oldindexes = True
880                        self._listoldindexes.append(colname)
881                    else:
882                        # Tell the condition cache about columns with dirty
883                        # indexes.
884                        if indexobj.dirty:
885                            self._condition_cache.nail()
886            else:
887                indexed = False
888                self.colindexed[colname] = False
889            if indexed:
890                self.indexed = True
891
892        if oldindexes:  # this should only appear under 2.x Pro
893            warnings.warn(
894                "table ``%s`` has column indexes with PyTables 1.x format. "
895                "Unfortunately, this format is not supported in "
896                "PyTables 2.x series. Note that you can use the "
897                "``ptrepack`` utility in order to recreate the indexes. "
898                "The 1.x indexed columns found are: %s" %
899                (self._v_pathname, self._listoldindexes),
900                OldIndexWarning)
901
902        # It does not matter to which column 'indexobj' belongs,
903        # since their respective index objects share
904        # the same number of elements.
905        if self.indexed:
906            self._indexedrows = indexobj.nelements
907            self._unsaved_indexedrows = self.nrows - self._indexedrows
908            # Put the autoindex value in a cache variable
909            self._autoindex = self.autoindex
910
911    def _calc_nrowsinbuf(self):
912        """Calculate the number of rows that fits on a PyTables buffer."""
913
914        params = self._v_file.params
915        # Compute the nrowsinbuf
916        rowsize = self.rowsize
917        buffersize = params['IO_BUFFER_SIZE']
918        if rowsize != 0:
919            nrowsinbuf = buffersize // rowsize
920            # The number of rows in buffer needs to be an exact multiple of
921            # chunkshape[0] for queries using indexed columns.
922            # Fixes #319 and probably #409 too.
923            nrowsinbuf -= nrowsinbuf % self.chunkshape[0]
924        else:
925            nrowsinbuf = 1
926
927        # tableextension.pyx performs an assertion
928        # to make sure nrowsinbuf is greater than or
929        # equal to the chunksize.
930        # See gh-206 and gh-238
931        if self.chunkshape is not None:
932            if nrowsinbuf < self.chunkshape[0]:
933                nrowsinbuf = self.chunkshape[0]
934
935        # Safeguard against row sizes being extremely large
936        if nrowsinbuf == 0:
937            nrowsinbuf = 1
938            # If rowsize is too large, issue a Performance warning
939            maxrowsize = params['BUFFER_TIMES'] * buffersize
940            if rowsize > maxrowsize:
941                warnings.warn("""\
942The Table ``%s`` is exceeding the maximum recommended rowsize (%d bytes);
943be ready to see PyTables asking for *lots* of memory and possibly slow
944I/O.  You may want to reduce the rowsize by trimming the value of
945dimensions that are orthogonal (and preferably close) to the *main*
946dimension of this leave.  Alternatively, in case you have specified a
947very small/large chunksize, you may want to increase/decrease it."""
948                              % (self._v_pathname, maxrowsize),
949                              PerformanceWarning)
950        return nrowsinbuf
951
952    def _getemptyarray(self, dtype):
953        # Acts as a cache for empty arrays
954        key = dtype
955        if key in self._empty_array_cache:
956            return self._empty_array_cache[key]
957        else:
958            self._empty_array_cache[
959                key] = arr = numpy.empty(shape=0, dtype=key)
960            return arr
961
962    def _get_container(self, shape):
963        "Get the appropriate buffer for data depending on table nestedness."
964
965        # This is *much* faster than the numpy.rec.array counterpart
966        return numpy.empty(shape=shape, dtype=self._v_dtype)
967
968    def _get_type_col_names(self, type_):
969        """Returns a list containing 'type_' column names."""
970
971        return [colobj._v_pathname
972                for colobj in self.description._f_walk('Col')
973                if colobj.type == type_]
974
975    def _get_enum_map(self):
976        """Return mapping from enumerated column names to `Enum` instances."""
977
978        enumMap = {}
979        for colobj in self.description._f_walk('Col'):
980            if colobj.kind == 'enum':
981                enumMap[colobj._v_pathname] = colobj.enum
982        return enumMap
983
984    def _g_create(self):
985        """Create a new table on disk."""
986
987        # Warning against assigning too much columns...
988        # F. Alted 2005-06-05
989        maxColumns = self._v_file.params['MAX_COLUMNS']
990        if (len(self.description._v_names) > maxColumns):
991            warnings.warn(
992                "table ``%s`` is exceeding the recommended "
993                "maximum number of columns (%d); "
994                "be ready to see PyTables asking for *lots* of memory "
995                "and possibly slow I/O" % (self._v_pathname, maxColumns),
996                PerformanceWarning)
997
998        # 1. Create the HDF5 table (some parameters need to be computed).
999
1000        # Fix the byteorder of the recarray and update the number of
1001        # expected rows if necessary
1002        if self._v_recarray is not None:
1003            self._v_recarray = self._g_fix_byteorder_data(self._v_recarray,
1004                                                          self._rabyteorder)
1005            if len(self._v_recarray) > self._v_expectedrows:
1006                self._v_expectedrows = len(self._v_recarray)
1007        # Compute a sensible chunkshape
1008        if self._v_chunkshape is None:
1009            self._v_chunkshape = self._calc_chunkshape(
1010                self._v_expectedrows, self.rowsize, self.rowsize)
1011        # Correct the byteorder, if still needed
1012        if self.byteorder is None:
1013            self.byteorder = sys.byteorder
1014
1015        # Cache some data which is already in the description.
1016        # This is necessary to happen before creation time in order
1017        # to be able to populate the self._v_wdflts
1018        self._cache_description_data()
1019
1020        # After creating the table, ``self._v_objectid`` needs to be
1021        # set because it is needed for setting attributes afterwards.
1022        self._v_objectid = self._create_table(
1023            self._v_new_title, self.filters.complib or '', obversion)
1024        self._v_recarray = None  # not useful anymore
1025        self._rabyteorder = None  # not useful anymore
1026
1027        # 2. Compute or get chunk shape and buffer size parameters.
1028        self.nrowsinbuf = self._calc_nrowsinbuf()
1029
1030        # 3. Get field fill attributes from the table description and
1031        #    set them on disk.
1032        if self._v_file.params['PYTABLES_SYS_ATTRS']:
1033            set_attr = self._v_attrs._g__setattr
1034            for i, colobj in enumerate(self.description._f_walk(type="Col")):
1035                fieldname = "FIELD_%d_FILL" % i
1036                set_attr(fieldname, colobj.dflt)
1037
1038        return self._v_objectid
1039
1040    def _g_open(self):
1041        """Opens a table from disk and read the metadata on it.
1042
1043        Creates an user description on the flight to easy the access to
1044        the actual data.
1045
1046        """
1047
1048        # 1. Open the HDF5 table and get some data from it.
1049        self._v_objectid, description, chunksize = self._get_info()
1050        self._v_expectedrows = self.nrows  # the actual number of rows
1051
1052        # 2. Create an instance description to host the record fields.
1053        validate = not self._v_file._isPTFile  # only for non-PyTables files
1054        self.description = Description(description, validate=validate, ptparams=self._v_file.params)
1055
1056        # 3. Compute or get chunk shape and buffer size parameters.
1057        if chunksize == 0:
1058            self._v_chunkshape = self._calc_chunkshape(
1059                self._v_expectedrows, self.rowsize, self.rowsize)
1060        else:
1061            self._v_chunkshape = (chunksize,)
1062        self.nrowsinbuf = self._calc_nrowsinbuf()
1063
1064        # 4. If there are field fill attributes, get them from disk and
1065        #    set them in the table description.
1066        if self._v_file.params['PYTABLES_SYS_ATTRS']:
1067            if "FIELD_0_FILL" in self._v_attrs._f_list("sys"):
1068                i = 0
1069                get_attr = self._v_attrs.__getattr__
1070                for objcol in self.description._f_walk(type="Col"):
1071                    colname = objcol._v_pathname
1072                    # Get the default values for each column
1073                    fieldname = "FIELD_%s_FILL" % i
1074                    defval = get_attr(fieldname)
1075                    if defval is not None:
1076                        objcol.dflt = defval
1077                    else:
1078                        warnings.warn("could not load default value "
1079                                      "for the ``%s`` column of table ``%s``; "
1080                                      "using ``%r`` instead"
1081                                      % (colname, self._v_pathname,
1082                                          objcol.dflt))
1083                        defval = objcol.dflt
1084                    i += 1
1085
1086                # Set also the correct value in the desc._v_dflts dictionary
1087                for descr in self.description._f_walk(type="Description"):
1088                    for name in descr._v_names:
1089                        objcol = descr._v_colobjects[name]
1090                        if isinstance(objcol, Col):
1091                            descr._v_dflts[objcol._v_name] = objcol.dflt
1092
1093        # 5. Cache some data which is already in the description.
1094        self._cache_description_data()
1095
1096        return self._v_objectid
1097
1098    def _cache_description_data(self):
1099        """Cache some data which is already in the description.
1100
1101        Some information is extracted from `self.description` to build
1102        some useful (but redundant) structures:
1103
1104        * `self.colnames`
1105        * `self.colpathnames`
1106        * `self.coldescrs`
1107        * `self.coltypes`
1108        * `self.coldtypes`
1109        * `self.coldflts`
1110        * `self._v_dtype`
1111        * `self._time64colnames`
1112        * `self._strcolnames`
1113        * `self._colenums`
1114
1115        """
1116
1117        self.colnames = list(self.description._v_names)
1118        self.colpathnames = [
1119            col._v_pathname for col in self.description._f_walk()
1120            if not hasattr(col, '_v_names')]  # bottom-level
1121
1122        # Find ``time64`` column names.
1123        self._time64colnames = self._get_type_col_names('time64')
1124        # Find ``string`` column names.
1125        self._strcolnames = self._get_type_col_names('string')
1126        # Get a mapping of enumerated columns to their `Enum` instances.
1127        self._colenums = self._get_enum_map()
1128
1129        # Get info about columns
1130        for colobj in self.description._f_walk(type="Col"):
1131            colname = colobj._v_pathname
1132            # Get the column types, types and defaults
1133            self.coldescrs[colname] = colobj
1134            self.coltypes[colname] = colobj.type
1135            self.coldtypes[colname] = colobj.dtype
1136            self.coldflts[colname] = colobj.dflt
1137
1138        # Assign _v_dtype for this table
1139        self._v_dtype = self.description._v_dtype
1140
1141    def _get_column_instance(self, colpathname):
1142        """Get the instance of the column with the given `colpathname`.
1143
1144        If the column does not exist in the table, a `KeyError` is
1145        raised.
1146
1147        """
1148
1149        try:
1150            return _reduce(getattr, colpathname.split('/'), self.description)
1151        except AttributeError:
1152            raise KeyError("table ``%s`` does not have a column named ``%s``"
1153                           % (self._v_pathname, colpathname))
1154
1155    _check_column = _get_column_instance
1156
1157    def _disable_indexing_in_queries(self):
1158        """Force queries not to use indexing.
1159
1160        *Use only for testing.*
1161
1162        """
1163
1164        if not self._enabled_indexing_in_queries:
1165            return  # already disabled
1166        # The nail avoids setting/getting compiled conditions in/from
1167        # the cache where indexing is used.
1168        self._condition_cache.nail()
1169        self._enabled_indexing_in_queries = False
1170
1171    def _enable_indexing_in_queries(self):
1172        """Allow queries to use indexing.
1173
1174        *Use only for testing.*
1175
1176        """
1177
1178        if self._enabled_indexing_in_queries:
1179            return  # already enabled
1180        self._condition_cache.unnail()
1181        self._enabled_indexing_in_queries = True
1182
1183    def _required_expr_vars(self, expression, uservars, depth=1):
1184        """Get the variables required by the `expression`.
1185
1186        A new dictionary defining the variables used in the `expression`
1187        is returned.  Required variables are first looked up in the
1188        `uservars` mapping, then in the set of top-level columns of the
1189        table.  Unknown variables cause a `NameError` to be raised.
1190
1191        When `uservars` is `None`, the local and global namespace where
1192        the API callable which uses this method is called is sought
1193        instead.  This mechanism will not work as expected if this
1194        method is not used *directly* from an API callable.  To disable
1195        this mechanism, just specify a mapping as `uservars`.
1196
1197        Nested columns and columns from other tables are not allowed
1198        (`TypeError` and `ValueError` are raised, respectively).  Also,
1199        non-column variable values are converted to NumPy arrays.
1200
1201        `depth` specifies the depth of the frame in order to reach local
1202        or global variables.
1203
1204        """
1205
1206        # Get the names of variables used in the expression.
1207        exprvarscache = self._exprvars_cache
1208        if expression not in exprvarscache:
1209            # Protection against growing the cache too much
1210            if len(exprvarscache) > 256:
1211                # Remove 10 (arbitrary) elements from the cache
1212                for k in list(exprvarscache.keys())[:10]:
1213                    del exprvarscache[k]
1214            cexpr = compile(expression, '<string>', 'eval')
1215            exprvars = [var for var in cexpr.co_names
1216                        if var not in ['None', 'False', 'True']
1217                        and var not in numexpr_functions]
1218            exprvarscache[expression] = exprvars
1219        else:
1220            exprvars = exprvarscache[expression]
1221
1222        # Get the local and global variable mappings of the user frame
1223        # if no mapping has been explicitly given for user variables.
1224        user_locals, user_globals = {}, {}
1225        if uservars is None:
1226            # We use specified depth to get the frame where the API
1227            # callable using this method is called.  For instance:
1228            #
1229            # * ``table._required_expr_vars()`` (depth 0) is called by
1230            # * ``table._where()`` (depth 1) is called by
1231            # * ``table.where()`` (depth 2) is called by
1232            # * user-space functions (depth 3)
1233            user_frame = sys._getframe(depth)
1234            user_locals = user_frame.f_locals
1235            user_globals = user_frame.f_globals
1236
1237        colinstances = self.colinstances
1238        tblfile, tblpath = self._v_file, self._v_pathname
1239        # Look for the required variables first among the ones
1240        # explicitly provided by the user, then among implicit columns,
1241        # then among external variables (only if no explicit variables).
1242        reqvars = {}
1243        for var in exprvars:
1244            # Get the value.
1245            if uservars is not None and var in uservars:
1246                val = uservars[var]
1247            elif var in colinstances:
1248                val = colinstances[var]
1249            elif uservars is None and var in user_locals:
1250                val = user_locals[var]
1251            elif uservars is None and var in user_globals:
1252                val = user_globals[var]
1253            else:
1254                raise NameError("name ``%s`` is not defined" % var)
1255
1256            # Check the value.
1257            if hasattr(val, 'pathname'):  # non-nested column
1258                if val.shape[1:] != ():
1259                    raise NotImplementedError(
1260                        "variable ``%s`` refers to "
1261                        "a multidimensional column, "
1262                        "not yet supported in conditions, sorry" % var)
1263                if (val._table_file is not tblfile or
1264                        val._table_path != tblpath):
1265                    raise ValueError("variable ``%s`` refers to a column "
1266                                     "which is not part of table ``%s``"
1267                                     % (var, tblpath))
1268                if val.dtype.str[1:] == 'u8':
1269                    raise NotImplementedError(
1270                        "variable ``%s`` refers to "
1271                        "a 64-bit unsigned integer column, "
1272                        "not yet supported in conditions, sorry; "
1273                        "please use regular Python selections" % var)
1274            elif hasattr(val, '_v_colpathnames'):  # nested column
1275                raise TypeError(
1276                    "variable ``%s`` refers to a nested column, "
1277                    "not allowed in conditions" % var)
1278            else:  # only non-column values are converted to arrays
1279                # XXX: not 100% sure about this
1280                if isinstance(val, str):
1281                    val = numpy.asarray(val.encode('ascii'))
1282                else:
1283                    val = numpy.asarray(val)
1284            reqvars[var] = val
1285        return reqvars
1286
1287    def _get_condition_key(self, condition, condvars):
1288        """Get the condition cache key for `condition` with `condvars`.
1289
1290        Currently, the key is a tuple of `condition`, column variables
1291        names, normal variables names, column paths and variable paths
1292        (all are tuples).
1293
1294        """
1295
1296        # Variable names for column and normal variables.
1297        colnames, varnames = [], []
1298        # Column paths and types for each of the previous variable.
1299        colpaths, vartypes = [], []
1300        for (var, val) in condvars.items():
1301            if hasattr(val, 'pathname'):  # column
1302                colnames.append(var)
1303                colpaths.append(val.pathname)
1304            else:  # array
1305                try:
1306                    varnames.append(var)
1307                    vartypes.append(numexpr_getType(val))  # expensive
1308                except ValueError:
1309                    # This is more clear than the error given by Numexpr.
1310                    raise TypeError("variable ``%s`` has data type ``%s``, "
1311                                    "not allowed in conditions"
1312                                    % (var, val.dtype.name))
1313        colnames, varnames = tuple(colnames), tuple(varnames)
1314        colpaths, vartypes = tuple(colpaths), tuple(vartypes)
1315        condkey = (condition, colnames, varnames, colpaths, vartypes)
1316        return condkey
1317
1318    def _compile_condition(self, condition, condvars):
1319        """Compile the `condition` and extract usable index conditions.
1320
1321        This method returns an instance of ``CompiledCondition``.  See
1322        the ``compile_condition()`` function in the ``conditions``
1323        module for more information about the compilation process.
1324
1325        This method makes use of the condition cache when possible.
1326
1327        """
1328
1329        # Look up the condition in the condition cache.
1330        condcache = self._condition_cache
1331        condkey = self._get_condition_key(condition, condvars)
1332        compiled = condcache.get(condkey)
1333        if compiled:
1334            return compiled.with_replaced_vars(condvars)  # bingo!
1335
1336        # Bad luck, the condition must be parsed and compiled.
1337        # Fortunately, the key provides some valuable information. ;)
1338        (condition, colnames, varnames, colpaths, vartypes) = condkey
1339
1340        # Extract more information from referenced columns.
1341
1342        # start with normal variables
1343        typemap = dict(list(zip(varnames, vartypes)))
1344        indexedcols = []
1345        for colname in colnames:
1346            col = condvars[colname]
1347
1348            # Extract types from *all* the given variables.
1349            coltype = col.dtype.type
1350            typemap[colname] = _nxtype_from_nptype[coltype]
1351
1352            # Get the set of columns with usable indexes.
1353            if (self._enabled_indexing_in_queries  # no in-kernel searches
1354                    and self.colindexed[col.pathname] and not col.index.dirty):
1355                indexedcols.append(colname)
1356
1357        indexedcols = frozenset(indexedcols)
1358        # Now let ``compile_condition()`` do the Numexpr-related job.
1359        compiled = compile_condition(condition, typemap, indexedcols)
1360
1361        # Check that there actually are columns in the condition.
1362        if not set(compiled.parameters).intersection(set(colnames)):
1363            raise ValueError("there are no columns taking part "
1364                             "in condition ``%s``" % (condition,))
1365
1366        # Store the compiled condition in the cache and return it.
1367        condcache[condkey] = compiled
1368        return compiled.with_replaced_vars(condvars)
1369
1370    def will_query_use_indexing(self, condition, condvars=None):
1371        """Will a query for the condition use indexing?
1372
1373        The meaning of the condition and *condvars* arguments is the same as in
1374        the :meth:`Table.where` method. If condition can use indexing, this
1375        method returns a frozenset with the path names of the columns whose
1376        index is usable. Otherwise, it returns an empty list.
1377
1378        This method is mainly intended for testing. Keep in mind that changing
1379        the set of indexed columns or their dirtiness may make this method
1380        return different values for the same arguments at different times.
1381
1382        """
1383
1384        # Compile the condition and extract usable index conditions.
1385        condvars = self._required_expr_vars(condition, condvars, depth=2)
1386        compiled = self._compile_condition(condition, condvars)
1387        # Return the columns in indexed expressions
1388        idxcols = [condvars[var].pathname for var in compiled.index_variables]
1389        return frozenset(idxcols)
1390
1391    def where(self, condition, condvars=None,
1392              start=None, stop=None, step=None):
1393        """Iterate over values fulfilling a condition.
1394
1395        This method returns a Row iterator (see :ref:`RowClassDescr`) which
1396        only selects rows in the table that satisfy the given condition (an
1397        expression-like string).
1398
1399        The condvars mapping may be used to define the variable names appearing
1400        in the condition. condvars should consist of identifier-like strings
1401        pointing to Column (see :ref:`ColumnClassDescr`) instances *of this
1402        table*, or to other values (which will be converted to arrays). A
1403        default set of condition variables is provided where each top-level,
1404        non-nested column with an identifier-like name appears. Variables in
1405        condvars override the default ones.
1406
1407        When condvars is not provided or None, the current local and global
1408        namespace is sought instead of condvars. The previous mechanism is
1409        mostly intended for interactive usage. To disable it, just specify a
1410        (maybe empty) mapping as condvars.
1411
1412        If a range is supplied (by setting some of the start, stop or step
1413        parameters), only the rows in that range and fulfilling the condition
1414        are used. The meaning of the start, stop and step parameters is the
1415        same as for Python slices.
1416
1417        When possible, indexed columns participating in the condition will be
1418        used to speed up the search. It is recommended that you place the
1419        indexed columns as left and out in the condition as possible. Anyway,
1420        this method has always better performance than regular Python
1421        selections on the table.
1422
1423        You can mix this method with regular Python selections in order to
1424        support even more complex queries. It is strongly recommended that you
1425        pass the most restrictive condition as the parameter to this method if
1426        you want to achieve maximum performance.
1427
1428        .. warning::
1429
1430            When in the middle of a table row iterator, you should not
1431            use methods that can change the number of rows in the table
1432            (like :meth:`Table.append` or :meth:`Table.remove_rows`) or
1433            unexpected errors will happen.
1434
1435        Examples
1436        --------
1437
1438        ::
1439
1440            >>> passvalues = [ row['col3'] for row in
1441            ...                table.where('(col1 > 0) & (col2 <= 20)', step=5)
1442            ...                if your_function(row['col2']) ]
1443            >>> print("Values that pass the cuts:", passvalues)
1444
1445        .. note::
1446
1447            A special care should be taken when the query condition includes
1448            string literals.  Indeed Python 2 string literals are string of
1449            bytes while Python 3 strings are unicode objects.
1450
1451            Let's assume that the table ``table`` has the following
1452            structure::
1453
1454                class Record(IsDescription):
1455                    col1 = StringCol(4)  # 4-character String of bytes
1456                    col2 = IntCol()
1457                    col3 = FloatCol()
1458
1459            The type of "col1" do not change depending on the Python version
1460            used (of course) and it always corresponds to strings of bytes.
1461
1462            Any condition involving "col1" should be written using the
1463            appropriate type for string literals in order to avoid
1464            :exc:`TypeError`\ s.
1465
1466            The code below will work fine in Python 2 but will fail with a
1467            :exc:`TypeError` in Python 3::
1468
1469                condition = 'col1 == "AAAA"'
1470                for record in table.where(condition):  # TypeError in Python3
1471                    # do something with "record"
1472
1473            The reason is that in Python 3 "condition" implies a comparison
1474            between a string of bytes ("col1" contents) and an unicode literal
1475            ("AAAA").
1476
1477            The correct way to write the condition is::
1478
1479                condition = 'col1 == b"AAAA"'
1480
1481        .. versionchanged:: 3.0
1482           The start, stop and step parameters now behave like in slice.
1483
1484        """
1485
1486        return self._where(condition, condvars, start, stop, step)
1487
1488    def _where(self, condition, condvars, start=None, stop=None, step=None):
1489        """Low-level counterpart of `self.where()`."""
1490
1491        if profile:
1492            tref = time()
1493        if profile:
1494            show_stats("Entering table._where", tref)
1495        # Adjust the slice to be used.
1496        (start, stop, step) = self._process_range_read(start, stop, step)
1497        if start >= stop:  # empty range, reset conditions
1498            self._use_index = False
1499            self._where_condition = None
1500            return iter([])
1501
1502        # Compile the condition and extract usable index conditions.
1503        condvars = self._required_expr_vars(condition, condvars, depth=3)
1504        compiled = self._compile_condition(condition, condvars)
1505
1506        # Can we use indexes?
1507        if compiled.index_expressions:
1508            chunkmap = _table__where_indexed(
1509                self, compiled, condition, condvars, start, stop, step)
1510            if not isinstance(chunkmap, numpy.ndarray):
1511                # If it is not a NumPy array it should be an iterator
1512                # Reset conditions
1513                self._use_index = False
1514                self._where_condition = None
1515                # ...and return the iterator
1516                return chunkmap
1517        else:
1518            chunkmap = None  # default to an in-kernel query
1519
1520        args = [condvars[param] for param in compiled.parameters]
1521        self._where_condition = (compiled.function, args, compiled.kwargs)
1522        row = tableextension.Row(self)
1523        if profile:
1524            show_stats("Exiting table._where", tref)
1525        return row._iter(start, stop, step, chunkmap=chunkmap)
1526
1527    def read_where(self, condition, condvars=None, field=None,
1528                   start=None, stop=None, step=None):
1529        """Read table data fulfilling the given *condition*.
1530
1531        This method is similar to :meth:`Table.read`, having their common
1532        arguments and return values the same meanings. However, only the rows
1533        fulfilling the *condition* are included in the result.
1534
1535        The meaning of the other arguments is the same as in the
1536        :meth:`Table.where` method.
1537
1538        """
1539
1540        self._g_check_open()
1541        coords = [p.nrow for p in
1542                  self._where(condition, condvars, start, stop, step)]
1543        self._where_condition = None  # reset the conditions
1544        if len(coords) > 1:
1545            cstart, cstop = coords[0], coords[-1] + 1
1546            if cstop - cstart == len(coords):
1547                # Chances for monotonically increasing row values. Refine.
1548                inc_seq = numpy.alltrue(
1549                    numpy.arange(cstart, cstop) == numpy.array(coords))
1550                if inc_seq:
1551                    return self.read(cstart, cstop, field=field)
1552        return self.read_coordinates(coords, field)
1553
1554    def append_where(self, dstTable, condition=None, condvars=None,
1555                     start=None, stop=None, step=None):
1556        """Append rows fulfilling the condition to the dstTable table.
1557
1558        dstTable must be capable of taking the rows resulting from the query,
1559        i.e. it must have columns with the expected names and compatible
1560        types. The meaning of the other arguments is the same as in the
1561        :meth:`Table.where` method.
1562
1563        The number of rows appended to dstTable is returned as a result.
1564
1565        .. versionchanged:: 3.0
1566           The *whereAppend* method has been renamed into *append_where*.
1567
1568        """
1569
1570        self._g_check_open()
1571
1572        # Check that the destination file is not in read-only mode.
1573        dstTable._v_file._check_writable()
1574
1575        # Row objects do not support nested columns, so we must iterate
1576        # over the flat column paths.  When rows support nesting,
1577        # ``self.colnames`` can be directly iterated upon.
1578        colNames = [colName for colName in self.colpathnames]
1579        dstRow = dstTable.row
1580        nrows = 0
1581        if condition is not None:
1582            srcRows = self._where(condition, condvars, start, stop, step)
1583        else:
1584            srcRows = self.iterrows(start, stop, step)
1585        for srcRow in srcRows:
1586            for colName in colNames:
1587                dstRow[colName] = srcRow[colName]
1588            dstRow.append()
1589            nrows += 1
1590        dstTable.flush()
1591        return nrows
1592
1593    def get_where_list(self, condition, condvars=None, sort=False,
1594                       start=None, stop=None, step=None):
1595        """Get the row coordinates fulfilling the given condition.
1596
1597        The coordinates are returned as a list of the current flavor.  sort
1598        means that you want to retrieve the coordinates ordered. The default is
1599        to not sort them.
1600
1601        The meaning of the other arguments is the same as in the
1602        :meth:`Table.where` method.
1603
1604        """
1605
1606        self._g_check_open()
1607
1608        coords = [p.nrow for p in
1609                  self._where(condition, condvars, start, stop, step)]
1610        coords = numpy.array(coords, dtype=SizeType)
1611        # Reset the conditions
1612        self._where_condition = None
1613        if sort:
1614            coords = numpy.sort(coords)
1615        return internal_to_flavor(coords, self.flavor)
1616
1617    def itersequence(self, sequence):
1618        """Iterate over a sequence of row coordinates."""
1619
1620        if not hasattr(sequence, '__getitem__'):
1621            raise TypeError(("Wrong 'sequence' parameter type. Only sequences "
1622                             "are suported."))
1623        # start, stop and step are necessary for the new iterator for
1624        # coordinates, and perhaps it would be useful to add them as
1625        # parameters in the future (not now, because I've just removed
1626        # the `sort` argument for 2.1).
1627        #
1628        # *Important note*: Negative values for step are not supported
1629        # for the general case, but only for the itersorted() and
1630        # read_sorted() purposes!  The self._process_range_read will raise
1631        # an appropiate error.
1632        # F. Alted 2008-09-18
1633        # A.V. 20130513: _process_range_read --> _process_range
1634        (start, stop, step) = self._process_range(None, None, None)
1635        if (start > stop) or (len(sequence) == 0):
1636            return iter([])
1637        row = tableextension.Row(self)
1638        return row._iter(start, stop, step, coords=sequence)
1639
1640    def _check_sortby_csi(self, sortby, checkCSI):
1641        if isinstance(sortby, Column):
1642            icol = sortby
1643        elif isinstance(sortby, str):
1644            icol = self.cols._f_col(sortby)
1645        else:
1646            raise TypeError(
1647                "`sortby` can only be a `Column` or string object, "
1648                "but you passed an object of type: %s" % type(sortby))
1649        if icol.is_indexed and icol.index.kind == "full":
1650            if checkCSI and not icol.index.is_csi:
1651                # The index exists, but it is not a CSI one.
1652                raise ValueError(
1653                    "Field `%s` must have associated a CSI index "
1654                    "in table `%s`, but the existing one is not. "
1655                    % (sortby, self))
1656            return icol.index
1657        else:
1658            raise ValueError(
1659                "Field `%s` must have associated a 'full' index "
1660                "in table `%s`." % (sortby, self))
1661
1662    def itersorted(self, sortby, checkCSI=False,
1663                   start=None, stop=None, step=None):
1664        """Iterate table data following the order of the index of sortby
1665        column.
1666
1667        The sortby column must have associated a full index.  If you want to
1668        ensure a fully sorted order, the index must be a CSI one.  You may want
1669        to use the checkCSI argument in order to explicitly check for the
1670        existence of a CSI index.
1671
1672        The meaning of the start, stop and step arguments is the same as in
1673        :meth:`Table.read`.
1674
1675        .. versionchanged:: 3.0
1676           If the *start* parameter is provided and *stop* is None then the
1677           table is iterated from *start* to the last line.
1678           In PyTables < 3.0 only one element was returned.
1679
1680        """
1681
1682        index = self._check_sortby_csi(sortby, checkCSI)
1683        # Adjust the slice to be used.
1684        (start, stop, step) = self._process_range(start, stop, step,
1685                                                  warn_negstep=False)
1686        if (start > stop and 0 < step) or (start < stop and 0 > step):
1687            # Fall-back action is to return an empty iterator
1688            return iter([])
1689        row = tableextension.Row(self)
1690        return row._iter(start, stop, step, coords=index)
1691
1692    def read_sorted(self, sortby, checkCSI=False, field=None,
1693                    start=None, stop=None, step=None):
1694        """Read table data following the order of the index of sortby column.
1695
1696        The sortby column must have associated a full index.  If you want to
1697        ensure a fully sorted order, the index must be a CSI one.  You may want
1698        to use the checkCSI argument in order to explicitly check for the
1699        existence of a CSI index.
1700
1701        If field is supplied only the named column will be selected.  If the
1702        column is not nested, an *array* of the current flavor will be
1703        returned; if it is, a *structured array* will be used instead.  If no
1704        field is specified, all the columns will be returned in a structured
1705        array of the current flavor.
1706
1707        The meaning of the start, stop and step arguments is the same as in
1708        :meth:`Table.read`.
1709
1710        .. versionchanged:: 3.0
1711           The start, stop and step parameters now behave like in slice.
1712
1713        """
1714
1715        self._g_check_open()
1716        index = self._check_sortby_csi(sortby, checkCSI)
1717        coords = index[start:stop:step]
1718        return self.read_coordinates(coords, field)
1719
1720    def iterrows(self, start=None, stop=None, step=None):
1721        """Iterate over the table using a Row instance.
1722
1723        If a range is not supplied, *all the rows* in the table are iterated
1724        upon - you can also use the :meth:`Table.__iter__` special method for
1725        that purpose. If you want to iterate over a given *range of rows* in
1726        the table, you may use the start, stop and step parameters.
1727
1728        .. warning::
1729
1730            When in the middle of a table row iterator, you should not
1731            use methods that can change the number of rows in the table
1732            (like :meth:`Table.append` or :meth:`Table.remove_rows`) or
1733            unexpected errors will happen.
1734
1735        See Also
1736        --------
1737        tableextension.Row : the table row iterator and field accessor
1738
1739        Examples
1740        --------
1741
1742        ::
1743
1744            result = [ row['var2'] for row in table.iterrows(step=5)
1745                                                    if row['var1'] <= 20 ]
1746
1747        .. versionchanged:: 3.0
1748           If the *start* parameter is provided and *stop* is None then the
1749           table is iterated from *start* to the last line.
1750           In PyTables < 3.0 only one element was returned.
1751
1752        """
1753        (start, stop, step) = self._process_range(start, stop, step,
1754                                                  warn_negstep=False)
1755        if (start > stop and 0 < step) or (start < stop and 0 > step):
1756            # Fall-back action is to return an empty iterator
1757            return iter([])
1758        row = tableextension.Row(self)
1759        return row._iter(start, stop, step)
1760
1761    def __iter__(self):
1762        """Iterate over the table using a Row instance.
1763
1764        This is equivalent to calling :meth:`Table.iterrows` with default
1765        arguments, i.e. it iterates over *all the rows* in the table.
1766
1767        See Also
1768        --------
1769        tableextension.Row : the table row iterator and field accessor
1770
1771        Examples
1772        --------
1773
1774        ::
1775
1776            result = [ row['var2'] for row in table if row['var1'] <= 20 ]
1777
1778        Which is equivalent to::
1779
1780            result = [ row['var2'] for row in table.iterrows()
1781                                                    if row['var1'] <= 20 ]
1782
1783        """
1784
1785        return self.iterrows()
1786
1787    def _read(self, start, stop, step, field=None, out=None):
1788        """Read a range of rows and return an in-memory object."""
1789
1790        select_field = None
1791        if field:
1792            if field not in self.coldtypes:
1793                if field in self.description._v_names:
1794                    # Remember to select this field
1795                    select_field = field
1796                    field = None
1797                else:
1798                    raise KeyError(("Field {0} not found in table "
1799                                    "{1}").format(field, self))
1800            else:
1801                # The column hangs directly from the top
1802                dtype_field = self.coldtypes[field]
1803
1804        # Return a rank-0 array if start > stop
1805        if (start >= stop and 0 < step) or (start <= stop and 0 > step):
1806            if field is None:
1807                nra = self._get_container(0)
1808                return nra
1809            return numpy.empty(shape=0, dtype=dtype_field)
1810
1811        nrows = len(range(start, stop, step))
1812
1813        if out is None:
1814            # Compute the shape of the resulting column object
1815            if field:
1816                # Create a container for the results
1817                result = numpy.empty(shape=nrows, dtype=dtype_field)
1818            else:
1819                # Recarray case
1820                result = self._get_container(nrows)
1821        else:
1822            # there is no fast way to byteswap, since different columns may
1823            # have different byteorders
1824            if not out.dtype.isnative:
1825                raise ValueError(("output array must be in system's byteorder "
1826                                  "or results will be incorrect"))
1827            if field:
1828                bytes_required = dtype_field.itemsize * nrows
1829            else:
1830                bytes_required = self.rowsize * nrows
1831            if bytes_required != out.nbytes:
1832                raise ValueError(('output array size invalid, got {0} bytes, '
1833                                  'need {1} bytes').format(out.nbytes,
1834                                                           bytes_required))
1835            if not out.flags['C_CONTIGUOUS']:
1836                raise ValueError('output array not C contiguous')
1837            result = out
1838
1839        # Call the routine to fill-up the resulting array
1840        if step == 1 and not field:
1841            # This optimization works three times faster than
1842            # the row._fill_col method (up to 170 MB/s on a pentium IV @ 2GHz)
1843            self._read_records(start, stop - start, result)
1844        # Warning!: _read_field_name should not be used until
1845        # H5TBread_fields_name in tableextension will be finished
1846        # F. Alted 2005/05/26
1847        # XYX Ho implementem per a PyTables 2.0??
1848        elif field and step > 15 and 0:
1849            # For step>15, this seems to work always faster than row._fill_col.
1850            self._read_field_name(result, start, stop, step, field)
1851        else:
1852            self.row._fill_col(result, start, stop, step, field)
1853
1854        if select_field:
1855            return result[select_field]
1856        else:
1857            return result
1858
1859    def read(self, start=None, stop=None, step=None, field=None, out=None):
1860        """Get data in the table as a (record) array.
1861
1862        The start, stop and step parameters can be used to select only
1863        a *range of rows* in the table. Their meanings are the same as
1864        in the built-in Python slices.
1865
1866        If field is supplied only the named column will be selected.
1867        If the column is not nested, an *array* of the current flavor
1868        will be returned; if it is, a *structured array* will be used
1869        instead.  If no field is specified, all the columns will be
1870        returned in a structured array of the current flavor.
1871
1872        Columns under a nested column can be specified in the field
1873        parameter by using a slash character (/) as a separator (e.g.
1874        'position/x').
1875
1876        The out parameter may be used to specify a NumPy array to
1877        receive the output data.  Note that the array must have the
1878        same size as the data selected with the other parameters.
1879        Note that the array's datatype is not checked and no type
1880        casting is performed, so if it does not match the datatype on
1881        disk, the output will not be correct.
1882
1883        When specifying a single nested column with the field parameter,
1884        and supplying an output buffer with the out parameter, the
1885        output buffer must contain all columns in the table.
1886        The data in all columns will be read into the output buffer.
1887        However, only the specified nested column will be returned from
1888        the method call.
1889
1890        When data is read from disk in NumPy format, the output will be
1891        in the current system's byteorder, regardless of how it is
1892        stored on disk. If the out parameter is specified, the output
1893        array also must be in the current system's byteorder.
1894
1895        .. versionchanged:: 3.0
1896           Added the *out* parameter.  Also the start, stop and step
1897           parameters now behave like in slice.
1898
1899        Examples
1900        --------
1901
1902        Reading the entire table::
1903
1904            t.read()
1905
1906        Reading record n. 6::
1907
1908            t.read(6, 7)
1909
1910        Reading from record n. 6 to the end of the table::
1911
1912            t.read(6)
1913
1914        """
1915
1916        self._g_check_open()
1917
1918        if field:
1919            self._check_column(field)
1920
1921        if out is not None and self.flavor != 'numpy':
1922            msg = ("Optional 'out' argument may only be supplied if array "
1923                   "flavor is 'numpy', currently is {0}").format(self.flavor)
1924            raise TypeError(msg)
1925
1926        start, stop, step = self._process_range(start, stop, step,
1927                                                warn_negstep=False)
1928
1929        arr = self._read(start, stop, step, field, out)
1930        return internal_to_flavor(arr, self.flavor)
1931
1932    def _read_coordinates(self, coords, field=None):
1933        """Private part of `read_coordinates()` with no flavor conversion."""
1934
1935        coords = self._point_selection(coords)
1936
1937        ncoords = len(coords)
1938        # Create a read buffer only if needed
1939        if field is None or ncoords > 0:
1940            # Doing a copy is faster when ncoords is small (<1000)
1941            if ncoords < min(1000, self.nrowsinbuf):
1942                result = self._v_iobuf[:ncoords].copy()
1943            else:
1944                result = self._get_container(ncoords)
1945
1946        # Do the real read
1947        if ncoords > 0:
1948            # Turn coords into an array of coordinate indexes, if necessary
1949            if not (isinstance(coords, numpy.ndarray) and
1950                    coords.dtype.type is _npsizetype and
1951                    coords.flags.contiguous and
1952                    coords.flags.aligned):
1953                # Get a contiguous and aligned coordinate array
1954                coords = numpy.array(coords, dtype=SizeType)
1955            self._read_elements(coords, result)
1956
1957        # Do the final conversions, if needed
1958        if field:
1959            if ncoords > 0:
1960                result = get_nested_field(result, field)
1961            else:
1962                # Get an empty array from the cache
1963                result = self._getemptyarray(self.coldtypes[field])
1964        return result
1965
1966    def read_coordinates(self, coords, field=None):
1967        """Get a set of rows given their indexes as a (record) array.
1968
1969        This method works much like the :meth:`Table.read` method, but it uses
1970        a sequence (coords) of row indexes to select the wanted columns,
1971        instead of a column range.
1972
1973        The selected rows are returned in an array or structured array of the
1974        current flavor.
1975
1976        """
1977
1978        self._g_check_open()
1979        result = self._read_coordinates(coords, field)
1980        return internal_to_flavor(result, self.flavor)
1981
1982    def get_enum(self, colname):
1983        """Get the enumerated type associated with the named column.
1984
1985        If the column named colname (a string) exists and is of an enumerated
1986        type, the corresponding Enum instance (see :ref:`EnumClassDescr`) is
1987        returned. If it is not of an enumerated type, a TypeError is raised. If
1988        the column does not exist, a KeyError is raised.
1989
1990        """
1991
1992        self._check_column(colname)
1993
1994        try:
1995            return self._colenums[colname]
1996        except KeyError:
1997            raise TypeError(
1998                "column ``%s`` of table ``%s`` is not of an enumerated type"
1999                % (colname, self._v_pathname))
2000
2001    def col(self, name):
2002        """Get a column from the table.
2003
2004        If a column called name exists in the table, it is read and returned as
2005        a NumPy object. If it does not exist, a KeyError is raised.
2006
2007        Examples
2008        --------
2009
2010        ::
2011
2012            narray = table.col('var2')
2013
2014        That statement is equivalent to::
2015
2016            narray = table.read(field='var2')
2017
2018        Here you can see how this method can be used as a shorthand for the
2019        :meth:`Table.read` method.
2020
2021        """
2022
2023        return self.read(field=name)
2024
2025    def __getitem__(self, key):
2026        """Get a row or a range of rows from the table.
2027
2028        If key argument is an integer, the corresponding table row is returned
2029        as a record of the current flavor. If key is a slice, the range of rows
2030        determined by it is returned as a structured array of the current
2031        flavor.
2032
2033        In addition, NumPy-style point selections are supported.  In
2034        particular, if key is a list of row coordinates, the set of rows
2035        determined by it is returned.  Furthermore, if key is an array of
2036        boolean values, only the coordinates where key is True are returned.
2037        Note that for the latter to work it is necessary that key list would
2038        contain exactly as many rows as the table has.
2039
2040        Examples
2041        --------
2042
2043        ::
2044
2045            record = table[4]
2046            recarray = table[4:1000:2]
2047            recarray = table[[4,1000]]   # only retrieves rows 4 and 1000
2048            recarray = table[[True, False, ..., True]]
2049
2050        Those statements are equivalent to::
2051
2052            record = table.read(start=4)[0]
2053            recarray = table.read(start=4, stop=1000, step=2)
2054            recarray = table.read_coordinates([4,1000])
2055            recarray = table.read_coordinates([True, False, ..., True])
2056
2057        Here, you can see how indexing can be used as a shorthand for the
2058        :meth:`Table.read` and :meth:`Table.read_coordinates` methods.
2059
2060        """
2061
2062        self._g_check_open()
2063
2064        if is_idx(key):
2065            key = operator.index(key)
2066
2067            # Index out of range protection
2068            if key >= self.nrows:
2069                raise IndexError("Index out of range")
2070            if key < 0:
2071                # To support negative values
2072                key += self.nrows
2073            (start, stop, step) = self._process_range(key, key + 1, 1)
2074            return self.read(start, stop, step)[0]
2075        elif isinstance(key, slice):
2076            (start, stop, step) = self._process_range(
2077                key.start, key.stop, key.step)
2078            return self.read(start, stop, step)
2079        # Try with a boolean or point selection
2080        elif type(key) in (list, tuple) or isinstance(key, numpy.ndarray):
2081            return self._read_coordinates(key, None)
2082        else:
2083            raise IndexError("Invalid index or slice: %r" % (key,))
2084
2085    def __setitem__(self, key, value):
2086        """Set a row or a range of rows in the table.
2087
2088        It takes different actions depending on the type of the *key*
2089        parameter: if it is an integer, the corresponding table row is
2090        set to *value* (a record or sequence capable of being converted
2091        to the table structure).  If *key* is a slice, the row slice
2092        determined by it is set to *value* (a record array or sequence
2093        capable of being converted to the table structure).
2094
2095        In addition, NumPy-style point selections are supported.  In
2096        particular, if key is a list of row coordinates, the set of rows
2097        determined by it is set to value.  Furthermore, if key is an array of
2098        boolean values, only the coordinates where key is True are set to
2099        values from value.  Note that for the latter to work it is necessary
2100        that key list would contain exactly as many rows as the table has.
2101
2102        Examples
2103        --------
2104
2105        ::
2106
2107            # Modify just one existing row
2108            table[2] = [456,'db2',1.2]
2109
2110            # Modify two existing rows
2111            rows = numpy.rec.array([[457,'db1',1.2],[6,'de2',1.3]],
2112                                   formats='i4,a3,f8')
2113            table[1:30:2] = rows             # modify a table slice
2114            table[[1,3]] = rows              # only modifies rows 1 and 3
2115            table[[True,False,True]] = rows  # only modifies rows 0 and 2
2116
2117        Which is equivalent to::
2118
2119            table.modify_rows(start=2, rows=[456,'db2',1.2])
2120            rows = numpy.rec.array([[457,'db1',1.2],[6,'de2',1.3]],
2121                                   formats='i4,a3,f8')
2122            table.modify_rows(start=1, stop=3, step=2, rows=rows)
2123            table.modify_coordinates([1,3,2], rows)
2124            table.modify_coordinates([True, False, True], rows)
2125
2126        Here, you can see how indexing can be used as a shorthand for the
2127        :meth:`Table.modify_rows` and :meth:`Table.modify_coordinates`
2128        methods.
2129
2130        """
2131
2132        self._g_check_open()
2133        self._v_file._check_writable()
2134
2135        if is_idx(key):
2136            key = operator.index(key)
2137
2138            # Index out of range protection
2139            if key >= self.nrows:
2140                raise IndexError("Index out of range")
2141            if key < 0:
2142                # To support negative values
2143                key += self.nrows
2144            return self.modify_rows(key, key + 1, 1, [value])
2145        elif isinstance(key, slice):
2146            (start, stop, step) = self._process_range(
2147                key.start, key.stop, key.step)
2148            return self.modify_rows(start, stop, step, value)
2149        # Try with a boolean or point selection
2150        elif type(key) in (list, tuple) or isinstance(key, numpy.ndarray):
2151            return self.modify_coordinates(key, value)
2152        else:
2153            raise IndexError("Invalid index or slice: %r" % (key,))
2154
2155    def _save_buffered_rows(self, wbufRA, lenrows):
2156        """Update the indexes after a flushing of rows."""
2157
2158        self._open_append(wbufRA)
2159        self._append_records(lenrows)
2160        self._close_append()
2161        if self.indexed:
2162            self._unsaved_indexedrows += lenrows
2163            # The table caches for indexed queries are dirty now
2164            self._dirtycache = True
2165            if self.autoindex:
2166                # Flush the unindexed rows
2167                self.flush_rows_to_index(_lastrow=False)
2168            else:
2169                # All the columns are dirty now
2170                self._mark_columns_as_dirty(self.colpathnames)
2171
2172    def append(self, rows):
2173        """Append a sequence of rows to the end of the table.
2174
2175        The rows argument may be any object which can be converted to
2176        a structured array compliant with the table structure
2177        (otherwise, a ValueError is raised).  This includes NumPy
2178        structured arrays, lists of tuples or array records, and a
2179        string or Python buffer.
2180
2181        Examples
2182        --------
2183
2184        ::
2185
2186            import tables as tb
2187
2188            class Particle(tb.IsDescription):
2189                name        = tb.StringCol(16, pos=1) # 16-character String
2190                lati        = tb.IntCol(pos=2)        # integer
2191                longi       = tb.IntCol(pos=3)        # integer
2192                pressure    = tb.Float32Col(pos=4)  # float  (single-precision)
2193                temperature = tb.FloatCol(pos=5)    # double (double-precision)
2194
2195            fileh = tb.open_file('test4.h5', mode='w')
2196            table = fileh.create_table(fileh.root, 'table', Particle,
2197                                       "A table")
2198
2199            # Append several rows in only one call
2200            table.append([("Particle:     10", 10, 0, 10 * 10, 10**2),
2201                          ("Particle:     11", 11, -1, 11 * 11, 11**2),
2202                          ("Particle:     12", 12, -2, 12 * 12, 12**2)])
2203            fileh.close()
2204
2205        """
2206
2207        self._g_check_open()
2208        self._v_file._check_writable()
2209
2210        if not self._chunked:
2211            raise HDF5ExtError(
2212                "You cannot append rows to a non-chunked table.", h5bt=False)
2213
2214        # Try to convert the object into a recarray compliant with table
2215        try:
2216            iflavor = flavor_of(rows)
2217            if iflavor != 'python':
2218                rows = array_as_internal(rows, iflavor)
2219            # Works for Python structures and always copies the original,
2220            # so the resulting object is safe for in-place conversion.
2221            wbufRA = numpy.rec.array(rows, dtype=self._v_dtype)
2222        except Exception as exc:  # XXX
2223            raise ValueError("rows parameter cannot be converted into a "
2224                             "recarray object compliant with table '%s'. "
2225                             "The error was: <%s>" % (str(self), exc))
2226        lenrows = wbufRA.shape[0]
2227        # If the number of rows to append is zero, don't do anything else
2228        if lenrows > 0:
2229            # Save write buffer to disk
2230            self._save_buffered_rows(wbufRA, lenrows)
2231
2232    def _conv_to_recarr(self, obj):
2233        """Try to convert the object into a recarray."""
2234
2235        try:
2236            iflavor = flavor_of(obj)
2237            if iflavor != 'python':
2238                obj = array_as_internal(obj, iflavor)
2239            if hasattr(obj, "shape") and obj.shape == ():
2240                # To allow conversion of scalars (void type) into arrays.
2241                # See http://projects.scipy.org/scipy/numpy/ticket/315
2242                # for discussion on how to pass buffers to constructors
2243                # See also http://projects.scipy.org/scipy/numpy/ticket/348
2244                recarr = numpy.array([obj], dtype=self._v_dtype)
2245            else:
2246                # Works for Python structures and always copies the original,
2247                # so the resulting object is safe for in-place conversion.
2248                recarr = numpy.rec.array(obj, dtype=self._v_dtype)
2249        except Exception as exc:  # XXX
2250            raise ValueError("Object cannot be converted into a recarray "
2251                             "object compliant with table format '%s'. "
2252                             "The error was: <%s>" %
2253                             (self.description._v_nested_descr, exc))
2254
2255        return recarr
2256
2257    def modify_coordinates(self, coords, rows):
2258        """Modify a series of rows in positions specified in coords.
2259
2260        The values in the selected rows will be modified with the data given in
2261        rows.  This method returns the number of rows modified.
2262
2263        The possible values for the rows argument are the same as in
2264        :meth:`Table.append`.
2265
2266        """
2267
2268        if rows is None:      # Nothing to be done
2269            return SizeType(0)
2270
2271        # Convert the coordinates to something expected by HDF5
2272        coords = self._point_selection(coords)
2273
2274        lcoords = len(coords)
2275        if len(rows) < lcoords:
2276            raise ValueError("The value has not enough elements to fill-in "
2277                             "the specified range")
2278
2279        # Convert rows into a recarray
2280        recarr = self._conv_to_recarr(rows)
2281
2282        if len(coords) > 0:
2283            # Do the actual update of rows
2284            self._update_elements(lcoords, coords, recarr)
2285
2286        # Redo the index if needed
2287        self._reindex(self.colpathnames)
2288
2289        return SizeType(lcoords)
2290
2291    def modify_rows(self, start=None, stop=None, step=None, rows=None):
2292        """Modify a series of rows in the slice [start:stop:step].
2293
2294        The values in the selected rows will be modified with the data given in
2295        rows.  This method returns the number of rows modified.  Should the
2296        modification exceed the length of the table, an IndexError is raised
2297        before changing data.
2298
2299        The possible values for the rows argument are the same as in
2300        :meth:`Table.append`.
2301
2302        """
2303
2304        if step is None:
2305            step = 1
2306        if rows is None:      # Nothing to be done
2307            return SizeType(0)
2308        if start is None:
2309            start = 0
2310
2311        if start < 0:
2312            raise ValueError("'start' must have a positive value.")
2313        if step < 1:
2314            raise ValueError(
2315                "'step' must have a value greater or equal than 1.")
2316        if stop is None:
2317            # compute the stop value. start + len(rows)*step does not work
2318            stop = start + (len(rows) - 1) * step + 1
2319
2320        (start, stop, step) = self._process_range(start, stop, step)
2321        if stop > self.nrows:
2322            raise IndexError("This modification will exceed the length of "
2323                             "the table. Giving up.")
2324        # Compute the number of rows to read.
2325        nrows = len(range(start, stop, step))
2326        if len(rows) != nrows:
2327            raise ValueError("The value has different elements than the "
2328                             "specified range")
2329
2330        # Convert rows into a recarray
2331        recarr = self._conv_to_recarr(rows)
2332
2333        lenrows = len(recarr)
2334        if start + lenrows > self.nrows:
2335            raise IndexError("This modification will exceed the length of the "
2336                             "table. Giving up.")
2337
2338        # Do the actual update
2339        self._update_records(start, stop, step, recarr)
2340
2341        # Redo the index if needed
2342        self._reindex(self.colpathnames)
2343
2344        return SizeType(lenrows)
2345
2346    def modify_column(self, start=None, stop=None, step=None,
2347                      column=None, colname=None):
2348        """Modify one single column in the row slice [start:stop:step].
2349
2350        The colname argument specifies the name of the column in the
2351        table to be modified with the data given in column.  This
2352        method returns the number of rows modified.  Should the
2353        modification exceed the length of the table, an IndexError is
2354        raised before changing data.
2355
2356        The *column* argument may be any object which can be converted
2357        to a (record) array compliant with the structure of the column
2358        to be modified (otherwise, a ValueError is raised).  This
2359        includes NumPy (record) arrays, lists of scalars, tuples or
2360        array records, and a string or Python buffer.
2361
2362        """
2363        if step is None:
2364            step = 1
2365        if not isinstance(colname, str):
2366            raise TypeError("The 'colname' parameter must be a string.")
2367        self._v_file._check_writable()
2368
2369        if column is None:      # Nothing to be done
2370            return SizeType(0)
2371        if start is None:
2372            start = 0
2373
2374        if start < 0:
2375            raise ValueError("'start' must have a positive value.")
2376        if step < 1:
2377            raise ValueError(
2378                "'step' must have a value greater or equal than 1.")
2379        # Get the column format to be modified:
2380        objcol = self._get_column_instance(colname)
2381        descr = [objcol._v_parent._v_nested_descr[objcol._v_pos]]
2382        # Try to convert the column object into a NumPy ndarray
2383        try:
2384            # If the column is a recarray (or kind of), convert into ndarray
2385            if hasattr(column, 'dtype') and column.dtype.kind == 'V':
2386                column = numpy.rec.array(column, dtype=descr).field(0)
2387            else:
2388                # Make sure the result is always a *copy* of the original,
2389                # so the resulting object is safe for in-place conversion.
2390                iflavor = flavor_of(column)
2391                column = array_as_internal(column, iflavor)
2392        except Exception as exc:  # XXX
2393            raise ValueError("column parameter cannot be converted into a "
2394                             "ndarray object compliant with specified column "
2395                             "'%s'. The error was: <%s>" % (str(column), exc))
2396
2397        # Get rid of single-dimensional dimensions
2398        column = column.squeeze()
2399        if column.shape == ():
2400            # Oops, stripped off to much dimensions
2401            column.shape = (1,)
2402
2403        if stop is None:
2404            # compute the stop value. start + len(rows)*step does not work
2405            stop = start + (len(column) - 1) * step + 1
2406        (start, stop, step) = self._process_range(start, stop, step)
2407        if stop > self.nrows:
2408            raise IndexError("This modification will exceed the length of "
2409                             "the table. Giving up.")
2410        # Compute the number of rows to read.
2411        nrows = len(range(start, stop, step))
2412        if len(column) < nrows:
2413            raise ValueError("The value has not enough elements to fill-in "
2414                             "the specified range")
2415        # Now, read the original values:
2416        mod_recarr = self._read(start, stop, step)
2417        # Modify the appropriate column in the original recarray
2418        mod_col = get_nested_field(mod_recarr, colname)
2419        mod_col[:] = column
2420        # save this modified rows in table
2421        self._update_records(start, stop, step, mod_recarr)
2422        # Redo the index if needed
2423        self._reindex([colname])
2424
2425        return SizeType(nrows)
2426
2427    def modify_columns(self, start=None, stop=None, step=None,
2428                       columns=None, names=None):
2429        """Modify a series of columns in the row slice [start:stop:step].
2430
2431        The names argument specifies the names of the columns in the
2432        table to be modified with the data given in columns.  This
2433        method returns the number of rows modified.  Should the
2434        modification exceed the length of the table, an IndexError
2435        is raised before changing data.
2436
2437        The columns argument may be any object which can be converted
2438        to a structured array compliant with the structure of the
2439        columns to be modified (otherwise, a ValueError is raised).
2440        This includes NumPy structured arrays, lists of tuples or array
2441        records, and a string or Python buffer.
2442
2443        """
2444        if step is None:
2445            step = 1
2446        if type(names) not in (list, tuple):
2447            raise TypeError("The 'names' parameter must be a list of strings.")
2448
2449        if columns is None:  # Nothing to be done
2450            return SizeType(0)
2451        if start is None:
2452            start = 0
2453        if start < 0:
2454            raise ValueError("'start' must have a positive value.")
2455        if step < 1:
2456            raise ValueError(("'step' must have a value greater or "
2457                              "equal than 1."))
2458        descr = []
2459        for colname in names:
2460            objcol = self._get_column_instance(colname)
2461            descr.append(objcol._v_parent._v_nested_descr[objcol._v_pos])
2462            # descr.append(objcol._v_parent._v_dtype[objcol._v_pos])
2463        # Try to convert the columns object into a recarray
2464        try:
2465            # Make sure the result is always a *copy* of the original,
2466            # so the resulting object is safe for in-place conversion.
2467            iflavor = flavor_of(columns)
2468            if iflavor != 'python':
2469                columns = array_as_internal(columns, iflavor)
2470                recarray = numpy.rec.array(columns, dtype=descr)
2471            else:
2472                recarray = numpy.rec.fromarrays(columns, dtype=descr)
2473        except Exception as exc:  # XXX
2474            raise ValueError("columns parameter cannot be converted into a "
2475                             "recarray object compliant with table '%s'. "
2476                             "The error was: <%s>" % (str(self), exc))
2477
2478        if stop is None:
2479            # compute the stop value. start + len(rows)*step does not work
2480            stop = start + (len(recarray) - 1) * step + 1
2481        (start, stop, step) = self._process_range(start, stop, step)
2482        if stop > self.nrows:
2483            raise IndexError("This modification will exceed the length of "
2484                             "the table. Giving up.")
2485        # Compute the number of rows to read.
2486        nrows = len(range(start, stop, step))
2487        if len(recarray) < nrows:
2488            raise ValueError("The value has not enough elements to fill-in "
2489                             "the specified range")
2490        # Now, read the original values:
2491        mod_recarr = self._read(start, stop, step)
2492        # Modify the appropriate columns in the original recarray
2493        for i, name in enumerate(recarray.dtype.names):
2494            mod_col = get_nested_field(mod_recarr, names[i])
2495            mod_col[:] = recarray[name].squeeze()
2496        # save this modified rows in table
2497        self._update_records(start, stop, step, mod_recarr)
2498        # Redo the index if needed
2499        self._reindex(names)
2500
2501        return SizeType(nrows)
2502
2503    def flush_rows_to_index(self, _lastrow=True):
2504        """Add remaining rows in buffers to non-dirty indexes.
2505
2506        This can be useful when you have chosen non-automatic indexing
2507        for the table (see the :attr:`Table.autoindex` property in
2508        :class:`Table`) and you want to update the indexes on it.
2509
2510        """
2511
2512        rowsadded = 0
2513        if self.indexed:
2514            # Update the number of unsaved indexed rows
2515            start = self._indexedrows
2516            nrows = self._unsaved_indexedrows
2517            for (colname, colindexed) in self.colindexed.items():
2518                if colindexed:
2519                    col = self.cols._g_col(colname)
2520                    if nrows > 0 and not col.index.dirty:
2521                        rowsadded = self._add_rows_to_index(
2522                            colname, start, nrows, _lastrow, update=True)
2523            self._unsaved_indexedrows -= rowsadded
2524            self._indexedrows += rowsadded
2525        return rowsadded
2526
2527    def _add_rows_to_index(self, colname, start, nrows, lastrow, update):
2528        """Add more elements to the existing index."""
2529
2530        # This method really belongs to Column, but since it makes extensive
2531        # use of the table, it gets dangerous when closing the file, since the
2532        # column may be accessing a table which is being destroyed.
2533        index = self.cols._g_col(colname).index
2534        slicesize = index.slicesize
2535        # The next loop does not rely on xrange so that it can
2536        # deal with long ints (i.e. more than 32-bit integers)
2537        # This allows to index columns with more than 2**31 rows
2538        # F. Alted 2005-05-09
2539        startLR = index.sorted.nrows * slicesize
2540        indexedrows = startLR - start
2541        stop = start + nrows - slicesize + 1
2542        while startLR < stop:
2543            index.append(
2544                [self._read(startLR, startLR + slicesize, 1, colname)],
2545                update=update)
2546            indexedrows += slicesize
2547            startLR += slicesize
2548        # index the remaining rows in last row
2549        if lastrow and startLR < self.nrows:
2550            index.append_last_row(
2551                [self._read(startLR, self.nrows, 1, colname)],
2552                update=update)
2553            indexedrows += self.nrows - startLR
2554        return indexedrows
2555
2556    def remove_rows(self, start=None, stop=None, step=None):
2557        """Remove a range of rows in the table.
2558
2559        If only start is supplied, that row and all following will be deleted.
2560        If a range is supplied, i.e. both the start and stop parameters are
2561        passed, all the rows in the range are removed.
2562
2563        .. versionchanged:: 3.0
2564           The start, stop and step parameters now behave like in slice.
2565
2566        .. seealso:: remove_row()
2567
2568        Parameters
2569        ----------
2570        start : int
2571            Sets the starting row to be removed. It accepts negative values
2572            meaning that the count starts from the end.  A value of 0 means the
2573            first row.
2574        stop : int
2575            Sets the last row to be removed to stop-1, i.e. the end point is
2576            omitted (in the Python range() tradition). Negative values are also
2577            accepted. If None all rows after start will be removed.
2578        step : int
2579            The step size between rows to remove.
2580
2581            .. versionadded:: 3.0
2582
2583        Examples
2584        --------
2585
2586        Removing rows from 5 to 10 (excluded)::
2587
2588            t.remove_rows(5, 10)
2589
2590        Removing all rows starting from the 10th::
2591
2592            t.remove_rows(10)
2593
2594        Removing the 6th row::
2595
2596            t.remove_rows(6, 7)
2597
2598        .. note::
2599
2600            removing a single row can be done using the specific
2601            :meth:`remove_row` method.
2602
2603        """
2604
2605        (start, stop, step) = self._process_range(start, stop, step)
2606        nrows = self._remove_rows(start, stop, step)
2607        # remove_rows is a invalidating index operation
2608        self._reindex(self.colpathnames)
2609
2610        return SizeType(nrows)
2611
2612    def remove_row(self, n):
2613        """Removes a row from the table.
2614
2615        Parameters
2616        ----------
2617        n : int
2618            The index of the row to remove.
2619
2620
2621        .. versionadded:: 3.0
2622
2623        Examples
2624        --------
2625
2626        Remove row 15::
2627
2628            table.remove_row(15)
2629
2630        Which is equivalent to::
2631
2632            table.remove_rows(15, 16)
2633
2634        .. warning::
2635
2636            This is not equivalent to::
2637
2638                table.remove_rows(15)
2639
2640        """
2641
2642        self.remove_rows(start=n, stop=n + 1)
2643
2644    def _g_update_dependent(self):
2645        super(Table, self)._g_update_dependent()
2646
2647        # Update the new path in columns
2648        self.cols._g_update_table_location(self)
2649
2650        # Update the new path in the Row instance, if cached.  Fixes #224.
2651        if 'row' in self.__dict__:
2652            self.__dict__['row'] = tableextension.Row(self)
2653
2654    def _g_move(self, newparent, newname):
2655        """Move this node in the hierarchy.
2656
2657        This overloads the Node._g_move() method.
2658
2659        """
2660
2661        itgpathname = _index_pathname_of(self)
2662
2663        # First, move the table to the new location.
2664        super(Table, self)._g_move(newparent, newname)
2665
2666        # Then move the associated index group (if any).
2667        try:
2668            itgroup = self._v_file._get_node(itgpathname)
2669        except NoSuchNodeError:
2670            pass
2671        else:
2672            newigroup = self._v_parent
2673            newiname = _index_name_of(self)
2674            itgroup._g_move(newigroup, newiname)
2675
2676    def _g_remove(self, recursive=False, force=False):
2677        # Remove the associated index group (if any).
2678        itgpathname = _index_pathname_of(self)
2679        try:
2680            itgroup = self._v_file._get_node(itgpathname)
2681        except NoSuchNodeError:
2682            pass
2683        else:
2684            itgroup._f_remove(recursive=True)
2685            self.indexed = False   # there are indexes no more
2686
2687        # Remove the leaf itself from the hierarchy.
2688        super(Table, self)._g_remove(recursive, force)
2689
2690    def _set_column_indexing(self, colpathname, indexed):
2691        """Mark the referred column as indexed or non-indexed."""
2692
2693        colindexed = self.colindexed
2694        isindexed, wasindexed = bool(indexed), colindexed[colpathname]
2695        if isindexed == wasindexed:
2696            return  # indexing state is unchanged
2697
2698        # Changing the set of indexed columns invalidates the condition cache
2699        self._condition_cache.clear()
2700        colindexed[colpathname] = isindexed
2701        self.indexed = max(colindexed.values())  # this is an OR :)
2702
2703    def _mark_columns_as_dirty(self, colnames):
2704        """Mark column indexes in `colnames` as dirty."""
2705
2706        assert len(colnames) > 0
2707        if self.indexed:
2708            colindexed, cols = self.colindexed, self.cols
2709            # Mark the proper indexes as dirty
2710            for colname in colnames:
2711                if colindexed[colname]:
2712                    col = cols._g_col(colname)
2713                    col.index.dirty = True
2714
2715    def _reindex(self, colnames):
2716        """Re-index columns in `colnames` if automatic indexing is true."""
2717
2718        if self.indexed:
2719            colindexed, cols = self.colindexed, self.cols
2720            colstoindex = []
2721            # Mark the proper indexes as dirty
2722            for colname in colnames:
2723                if colindexed[colname]:
2724                    col = cols._g_col(colname)
2725                    col.index.dirty = True
2726                    colstoindex.append(colname)
2727            # Now, re-index the dirty ones
2728            if self.autoindex and colstoindex:
2729                self._do_reindex(dirty=True)
2730            # The table caches for indexed queries are dirty now
2731            self._dirtycache = True
2732
2733    def _do_reindex(self, dirty):
2734        """Common code for `reindex()` and `reindex_dirty()`."""
2735
2736        indexedrows = 0
2737        for (colname, colindexed) in self.colindexed.items():
2738            if colindexed:
2739                indexcol = self.cols._g_col(colname)
2740                indexedrows = indexcol._do_reindex(dirty)
2741        # Update counters in case some column has been updated
2742        if indexedrows > 0:
2743            self._indexedrows = indexedrows
2744            self._unsaved_indexedrows = self.nrows - indexedrows
2745
2746        return SizeType(indexedrows)
2747
2748    def reindex(self):
2749        """Recompute all the existing indexes in the table.
2750
2751        This can be useful when you suspect that, for any reason, the
2752        index information for columns is no longer valid and want to
2753        rebuild the indexes on it.
2754
2755        """
2756
2757        self._do_reindex(dirty=False)
2758
2759    def reindex_dirty(self):
2760        """Recompute the existing indexes in table, *if* they are dirty.
2761
2762        This can be useful when you have set :attr:`Table.autoindex`
2763        (see :class:`Table`) to false for the table and you want to
2764        update the indexes after a invalidating index operation
2765        (:meth:`Table.remove_rows`, for example).
2766
2767        """
2768
2769        self._do_reindex(dirty=True)
2770
2771    def _g_copy_rows(self, object, start, stop, step, sortby, checkCSI):
2772        "Copy rows from self to object"
2773        if sortby is None:
2774            self._g_copy_rows_optim(object, start, stop, step)
2775            return
2776        lenbuf = self.nrowsinbuf
2777        absstep = step
2778        if step < 0:
2779            absstep = -step
2780            start, stop = stop + 1, start + 1
2781        if sortby is not None:
2782            index = self._check_sortby_csi(sortby, checkCSI)
2783        for start2 in range(start, stop, absstep * lenbuf):
2784            stop2 = start2 + absstep * lenbuf
2785            if stop2 > stop:
2786                stop2 = stop
2787            # The next 'if' is not needed, but it doesn't bother either
2788            if sortby is None:
2789                rows = self[start2:stop2:step]
2790            else:
2791                coords = index[start2:stop2:step]
2792                rows = self.read_coordinates(coords)
2793            # Save the records on disk
2794            object.append(rows)
2795        object.flush()
2796
2797    def _g_copy_rows_optim(self, object, start, stop, step):
2798        """Copy rows from self to object (optimized version)"""
2799
2800        nrowsinbuf = self.nrowsinbuf
2801        object._open_append(self._v_iobuf)
2802        nrowsdest = object.nrows
2803        for start2 in range(start, stop, step * nrowsinbuf):
2804            # Save the records on disk
2805            stop2 = start2 + step * nrowsinbuf
2806            if stop2 > stop:
2807                stop2 = stop
2808            # Optimized version (it saves some conversions)
2809            nrows = ((stop2 - start2 - 1) // step) + 1
2810            self.row._fill_col(self._v_iobuf, start2, stop2, step, None)
2811            # The output buffer is created anew,
2812            # so the operation is safe to in-place conversion.
2813            object._append_records(nrows)
2814            nrowsdest += nrows
2815        object._close_append()
2816
2817    def _g_prop_indexes(self, other):
2818        """Generate index in `other` table for every indexed column here."""
2819
2820        oldcols, newcols = self.colinstances, other.colinstances
2821        for colname in newcols:
2822            if (isinstance(oldcols[colname], Column)):
2823                oldcolindexed = oldcols[colname].is_indexed
2824                if oldcolindexed:
2825                    oldcolindex = oldcols[colname].index
2826                    newcol = newcols[colname]
2827                    newcol.create_index(
2828                        kind=oldcolindex.kind, optlevel=oldcolindex.optlevel,
2829                        filters=oldcolindex.filters, tmp_dir=None)
2830
2831    def _g_copy_with_stats(self, group, name, start, stop, step,
2832                           title, filters, chunkshape, _log, **kwargs):
2833        """Private part of Leaf.copy() for each kind of leaf."""
2834
2835        # Get the private args for the Table flavor of copy()
2836        sortby = kwargs.pop('sortby', None)
2837        propindexes = kwargs.pop('propindexes', False)
2838        checkCSI = kwargs.pop('checkCSI', False)
2839        # Compute the correct indices.
2840        (start, stop, step) = self._process_range_read(
2841            start, stop, step, warn_negstep=sortby is None)
2842        # And the number of final rows
2843        nrows = len(range(start, stop, step))
2844        # Create the new table and copy the selected data.
2845        newtable = Table(group, name, self.description, title=title,
2846                         filters=filters, expectedrows=nrows,
2847                         chunkshape=chunkshape,
2848                         _log=_log)
2849        self._g_copy_rows(newtable, start, stop, step, sortby, checkCSI)
2850        nbytes = newtable.nrows * newtable.rowsize
2851        # Generate equivalent indexes in the new table, if required.
2852        if propindexes and self.indexed:
2853            self._g_prop_indexes(newtable)
2854        return (newtable, nbytes)
2855
2856    # This overloading of copy is needed here in order to document
2857    # the additional keywords for the Table case.
2858    def copy(self, newparent=None, newname=None, overwrite=False,
2859             createparents=False, **kwargs):
2860        """Copy this table and return the new one.
2861
2862        This method has the behavior and keywords described in
2863        :meth:`Leaf.copy`.  Moreover, it recognises the following additional
2864        keyword arguments.
2865
2866        Parameters
2867        ----------
2868        sortby
2869            If specified, and sortby corresponds to a column with an index,
2870            then the copy will be sorted by this index.  If you want to ensure
2871            a fully sorted order, the index must be a CSI one.  A reverse
2872            sorted copy can be achieved by specifying a negative value for the
2873            step keyword.  If sortby is omitted or None, the original table
2874            order is used.
2875        checkCSI
2876            If true and a CSI index does not exist for the sortby column, an
2877            error will be raised.  If false (the default), it does nothing.
2878            You can use this flag in order to explicitly check for the
2879            existence of a CSI index.
2880        propindexes
2881            If true, the existing indexes in the source table are propagated
2882            (created) to the new one.  If false (the default), the indexes are
2883            not propagated.
2884
2885        """
2886
2887        return super(Table, self).copy(
2888            newparent, newname, overwrite, createparents, **kwargs)
2889
2890    def flush(self):
2891        """Flush the table buffers."""
2892
2893        # Flush rows that remains to be appended
2894        if 'row' in self.__dict__:
2895            self.row._flush_buffered_rows()
2896        if self.indexed and self.autoindex:
2897            # Flush any unindexed row
2898            rowsadded = self.flush_rows_to_index(_lastrow=True)
2899            assert rowsadded <= 0 or self._indexedrows == self.nrows, \
2900                ("internal error: the number of indexed rows (%d) "
2901                 "and rows in the table (%d) is not equal; "
2902                 "please report this to the authors."
2903                 % (self._indexedrows, self.nrows))
2904            if self._dirtyindexes:
2905                # Finally, re-index any dirty column
2906                self.reindex_dirty()
2907
2908        super(Table, self).flush()
2909
2910    def _g_pre_kill_hook(self):
2911        """Code to be called before killing the node."""
2912
2913        # Flush the buffers before to clean-up them
2914        # self.flush()
2915        # It seems that flushing during the __del__ phase is a sure receipt for
2916        # bringing all kind of problems:
2917        # 1. Illegal Instruction
2918        # 2. Malloc(): trying to call free() twice
2919        # 3. Bus Error
2920        # 4. Segmentation fault
2921        # So, the best would be doing *nothing* at all in this __del__ phase.
2922        # As a consequence, the I/O will not be cleaned until a call to
2923        # Table.flush() would be done. This could lead to a potentially large
2924        # memory consumption.
2925        # NOTE: The user should make a call to Table.flush() whenever he has
2926        #       finished working with his table.
2927        # I've added a Performance warning in order to compel the user to
2928        # call self.flush() before the table is being preempted.
2929        # F. Alted 2006-08-03
2930        if (('row' in self.__dict__ and self.row._get_unsaved_nrows() > 0) or
2931            (self.indexed and self.autoindex and
2932             (self._unsaved_indexedrows > 0 or self._dirtyindexes))):
2933            warnings.warn(("table ``%s`` is being preempted from alive nodes "
2934                           "without its buffers being flushed or with some "
2935                           "index being dirty.  This may lead to very "
2936                           "ineficient use of resources and even to fatal "
2937                           "errors in certain situations.  Please do a call "
2938                           "to the .flush() or .reindex_dirty() methods on "
2939                           "this table before start using other nodes.")
2940                          % (self._v_pathname), PerformanceWarning)
2941        # Get rid of the IO buffers (if they have been created at all)
2942        mydict = self.__dict__
2943        if '_v_iobuf' in mydict:
2944            del mydict['_v_iobuf']
2945        if '_v_wdflts' in mydict:
2946            del mydict['_v_wdflts']
2947
2948    def _f_close(self, flush=True):
2949        if not self._v_isopen:
2950            return  # the node is already closed
2951
2952        # .. note::
2953        #
2954        #   As long as ``Table`` objects access their indices on closing,
2955        #   ``File.close()`` will need to make *two separate passes*
2956        #   to first close ``Table`` objects and then ``Index`` hierarchies.
2957        #
2958
2959        # Flush right now so the row object does not get in the middle.
2960        if flush:
2961            self.flush()
2962
2963        # Some warnings can be issued after calling `self._g_set_location()`
2964        # in `self.__init__()`.  If warnings are turned into exceptions,
2965        # `self._g_post_init_hook` may not be called and `self.cols` not set.
2966        # One example of this is
2967        # ``test_create.createTestCase.test05_maxFieldsExceeded()``.
2968        cols = self.cols
2969        if cols is not None:
2970            cols._g_close()
2971
2972        # Close myself as a leaf.
2973        super(Table, self)._f_close(False)
2974
2975    def __repr__(self):
2976        """This provides column metainfo in addition to standard __str__"""
2977
2978        if self.indexed:
2979            format = """\
2980%s
2981  description := %r
2982  byteorder := %r
2983  chunkshape := %r
2984  autoindex := %r
2985  colindexes := %r"""
2986            return format % (str(self), self.description, self.byteorder,
2987                             self.chunkshape, self.autoindex,
2988                             _ColIndexes(self.colindexes))
2989        else:
2990            return """\
2991%s
2992  description := %r
2993  byteorder := %r
2994  chunkshape := %r""" % \
2995                (str(self), self.description, self.byteorder, self.chunkshape)
2996
2997
2998class Cols(object):
2999    """Container for columns in a table or nested column.
3000
3001    This class is used as an *accessor* to the columns in a table or nested
3002    column.  It supports the *natural naming* convention, so that you can
3003    access the different columns as attributes which lead to Column instances
3004    (for non-nested columns) or other Cols instances (for nested columns).
3005
3006    For instance, if table.cols is a Cols instance with a column named col1
3007    under it, the later can be accessed as table.cols.col1. If col1 is nested
3008    and contains a col2 column, this can be accessed as table.cols.col1.col2
3009    and so on. Because of natural naming, the names of members start with
3010    special prefixes, like in the Group class (see :ref:`GroupClassDescr`).
3011
3012    Like the Column class (see :ref:`ColumnClassDescr`), Cols supports item
3013    access to read and write ranges of values in the table or nested column.
3014
3015
3016    .. rubric:: Cols attributes
3017
3018    .. attribute:: _v_colnames
3019
3020        A list of the names of the columns hanging directly
3021        from the associated table or nested column.  The order of
3022        the names matches the order of their respective columns in
3023        the containing table.
3024
3025    .. attribute:: _v_colpathnames
3026
3027        A list of the pathnames of all the columns under the
3028        associated table or nested column (in preorder).  If it does
3029        not contain nested columns, this is exactly the same as the
3030        :attr:`Cols._v_colnames` attribute.
3031
3032    .. attribute:: _v_desc
3033
3034        The associated Description instance (see
3035        :ref:`DescriptionClassDescr`).
3036
3037    """
3038
3039    @property
3040    def _v_table(self):
3041        "The parent Table instance (see :ref:`TableClassDescr`)."
3042        return self._v__tableFile._get_node(self._v__tablePath)
3043
3044    def __init__(self, table, desc):
3045        myDict = self.__dict__
3046        myDict['_v__tableFile'] = table._v_file
3047        myDict['_v__tablePath'] = table._v_pathname
3048        myDict['_v_desc'] = desc
3049        myDict['_v_colnames'] = desc._v_names
3050        myDict['_v_colpathnames'] = table.description._v_pathnames
3051        # Put the column in the local dictionary
3052        for name in desc._v_names:
3053            if name in desc._v_types:
3054                myDict[name] = Column(table, name, desc)
3055            else:
3056                myDict[name] = Cols(table, desc._v_colobjects[name])
3057
3058    def _g_update_table_location(self, table):
3059        """Updates the location information about the associated `table`."""
3060
3061        myDict = self.__dict__
3062        myDict['_v__tableFile'] = table._v_file
3063        myDict['_v__tablePath'] = table._v_pathname
3064
3065        # Update the locations in individual columns.
3066        for colname in self._v_colnames:
3067            myDict[colname]._g_update_table_location(table)
3068
3069    def __len__(self):
3070        """Get the number of top level columns in table."""
3071
3072        return len(self._v_colnames)
3073
3074    def _f_col(self, colname):
3075        """Get an accessor to the column colname.
3076
3077        This method returns a Column instance (see :ref:`ColumnClassDescr`) if
3078        the requested column is not nested, and a Cols instance (see
3079        :ref:`ColsClassDescr`) if it is.  You may use full column pathnames in
3080        colname.
3081
3082        Calling cols._f_col('col1/col2') is equivalent to using cols.col1.col2.
3083        However, the first syntax is more intended for programmatic use.  It is
3084        also better if you want to access columns with names that are not valid
3085        Python identifiers.
3086
3087        """
3088
3089        if not isinstance(colname, str):
3090            raise TypeError("Parameter can only be an string. You passed "
3091                            "object: %s" % colname)
3092        if ((colname.find('/') > -1 and
3093             colname not in self._v_colpathnames) and
3094                colname not in self._v_colnames):
3095            raise KeyError(("Cols accessor ``%s.cols%s`` does not have a "
3096                            "column named ``%s``")
3097                           % (self._v__tablePath, self._v_desc._v_pathname,
3098                              colname))
3099
3100        return self._g_col(colname)
3101
3102    def _g_col(self, colname):
3103        """Like `self._f_col()` but it does not check arguments."""
3104
3105        # Get the Column or Description object
3106        inames = colname.split('/')
3107        cols = self
3108        for iname in inames:
3109            cols = cols.__dict__[iname]
3110        return cols
3111
3112    def __getitem__(self, key):
3113        """Get a row or a range of rows from a table or nested column.
3114
3115        If key argument is an integer, the corresponding nested type row is
3116        returned as a record of the current flavor. If key is a slice, the
3117        range of rows determined by it is returned as a structured array of the
3118        current flavor.
3119
3120        Examples
3121        --------
3122
3123        ::
3124
3125            record = table.cols[4]  # equivalent to table[4]
3126            recarray = table.cols.Info[4:1000:2]
3127
3128        Those statements are equivalent to::
3129
3130            nrecord = table.read(start=4)[0]
3131            nrecarray = table.read(start=4, stop=1000, step=2).field('Info')
3132
3133        Here you can see how a mix of natural naming, indexing and slicing can
3134        be used as shorthands for the :meth:`Table.read` method.
3135
3136        """
3137
3138        table = self._v_table
3139        nrows = table.nrows
3140        if is_idx(key):
3141            key = operator.index(key)
3142
3143            # Index out of range protection
3144            if key >= nrows:
3145                raise IndexError("Index out of range")
3146            if key < 0:
3147                # To support negative values
3148                key += nrows
3149            (start, stop, step) = table._process_range(key, key + 1, 1)
3150            colgroup = self._v_desc._v_pathname
3151            if colgroup == "":  # The root group
3152                return table.read(start, stop, step)[0]
3153            else:
3154                crecord = table.read(start, stop, step)[0]
3155                return crecord[colgroup]
3156        elif isinstance(key, slice):
3157            (start, stop, step) = table._process_range(
3158                key.start, key.stop, key.step)
3159            colgroup = self._v_desc._v_pathname
3160            if colgroup == "":  # The root group
3161                return table.read(start, stop, step)
3162            else:
3163                crecarray = table.read(start, stop, step)
3164                if hasattr(crecarray, "field"):
3165                    return crecarray.field(colgroup)  # RecArray case
3166                else:
3167                    return get_nested_field(crecarray, colgroup)  # numpy case
3168        else:
3169            raise TypeError("invalid index or slice: %r" % (key,))
3170
3171    def __setitem__(self, key, value):
3172        """Set a row or a range of rows in a table or nested column.
3173
3174        If key argument is an integer, the corresponding row is set to
3175        value. If key is a slice, the range of rows determined by it is set to
3176        value.
3177
3178        Examples
3179        --------
3180
3181        ::
3182
3183            table.cols[4] = record
3184            table.cols.Info[4:1000:2] = recarray
3185
3186        Those statements are equivalent to::
3187
3188            table.modify_rows(4, rows=record)
3189            table.modify_column(4, 1000, 2, colname='Info', column=recarray)
3190
3191        Here you can see how a mix of natural naming, indexing and slicing
3192        can be used as shorthands for the :meth:`Table.modify_rows` and
3193        :meth:`Table.modify_column` methods.
3194
3195        """
3196
3197        table = self._v_table
3198        nrows = table.nrows
3199        if is_idx(key):
3200            key = operator.index(key)
3201
3202            # Index out of range protection
3203            if key >= nrows:
3204                raise IndexError("Index out of range")
3205            if key < 0:
3206                # To support negative values
3207                key += nrows
3208            (start, stop, step) = table._process_range(key, key + 1, 1)
3209        elif isinstance(key, slice):
3210            (start, stop, step) = table._process_range(
3211                key.start, key.stop, key.step)
3212        else:
3213            raise TypeError("invalid index or slice: %r" % (key,))
3214
3215        # Actually modify the correct columns
3216        colgroup = self._v_desc._v_pathname
3217        if colgroup == "":  # The root group
3218            table.modify_rows(start, stop, step, rows=value)
3219        else:
3220            table.modify_column(
3221                start, stop, step, colname=colgroup, column=value)
3222
3223    def _g_close(self):
3224        # First, close the columns (ie possible indices open)
3225        for col in self._v_colnames:
3226            colobj = self._g_col(col)
3227            if isinstance(colobj, Column):
3228                colobj.close()
3229                # Delete the reference to column
3230                del self.__dict__[col]
3231            else:
3232                colobj._g_close()
3233
3234        self.__dict__.clear()
3235
3236    def __str__(self):
3237        """The string representation for this object."""
3238
3239        # The pathname
3240        tablepathname = self._v__tablePath
3241        descpathname = self._v_desc._v_pathname
3242        if descpathname:
3243            descpathname = "." + descpathname
3244        # Get this class name
3245        classname = self.__class__.__name__
3246        # The number of columns
3247        ncols = len(self._v_colnames)
3248        return "%s.cols%s (%s), %s columns" % \
3249               (tablepathname, descpathname, classname, ncols)
3250
3251    def __repr__(self):
3252        """A detailed string representation for this object."""
3253
3254        out = str(self) + "\n"
3255        for name in self._v_colnames:
3256            # Get this class name
3257            classname = getattr(self, name).__class__.__name__
3258            # The type
3259            if name in self._v_desc._v_dtypes:
3260                tcol = self._v_desc._v_dtypes[name]
3261                # The shape for this column
3262                shape = (self._v_table.nrows,) + \
3263                    self._v_desc._v_dtypes[name].shape
3264            else:
3265                tcol = "Description"
3266                # Description doesn't have a shape currently
3267                shape = ()
3268            out += "  %s (%s%s, %s)" % (name, classname, shape, tcol) + "\n"
3269        return out
3270
3271
3272class Column(object):
3273    """Accessor for a non-nested column in a table.
3274
3275    Each instance of this class is associated with one *non-nested* column of a
3276    table. These instances are mainly used to read and write data from the
3277    table columns using item access (like the Cols class - see
3278    :ref:`ColsClassDescr`), but there are a few other associated methods to
3279    deal with indexes.
3280
3281    .. rubric:: Column attributes
3282
3283    .. attribute:: descr
3284
3285        The Description (see :ref:`DescriptionClassDescr`) instance of the
3286        parent table or nested column.
3287
3288    .. attribute:: name
3289
3290        The name of the associated column.
3291
3292    .. attribute:: pathname
3293
3294        The complete pathname of the associated column (the same as
3295        Column.name if the column is not inside a nested column).
3296
3297    Parameters
3298    ----------
3299    table
3300        The parent table instance
3301    name
3302        The name of the column that is associated with this object
3303    descr
3304        The parent description object
3305
3306    """
3307
3308    # Lazy read-only attributes
3309    # `````````````````````````
3310    @lazyattr
3311    def dtype(self):
3312        """The NumPy dtype that most closely matches this column."""
3313
3314        return self.descr._v_dtypes[self.name].base  # Get rid of shape info
3315
3316    @lazyattr
3317    def type(self):
3318        """The PyTables type of the column (a string)."""
3319
3320        return self.descr._v_types[self.name]
3321
3322    # Properties
3323    # ~~~~~~~~~~
3324
3325    @property
3326    def table(self):
3327        """The parent Table instance (see :ref:`TableClassDescr`)."""
3328        return self._table_file._get_node(self._table_path)
3329
3330    @property
3331    def index(self):
3332        """The Index instance (see :ref:`IndexClassDescr`) associated with this
3333        column (None if the column is not indexed)."""
3334        indexPath = _index_pathname_of_column_(self._table_path, self.pathname)
3335        try:
3336            index = self._table_file._get_node(indexPath)
3337        except NodeError:
3338            index = None  # The column is not indexed
3339        return index
3340
3341    @lazyattr
3342    def _itemtype(self):
3343        return self.descr._v_dtypes[self.name]
3344
3345    @property
3346    def shape(self):
3347        "The shape of this column."
3348        return (self.table.nrows,) + self.descr._v_dtypes[self.name].shape
3349
3350    @property
3351    def is_indexed(self):
3352        "True if the column is indexed, false otherwise."
3353        if self.index is None:
3354            return False
3355        else:
3356            return True
3357
3358    @property
3359    def maindim(self):
3360        """"The dimension along which iterators work. Its value is 0 (i.e. the
3361        first dimension)."""
3362        return 0
3363
3364    def __init__(self, table, name, descr):
3365        self._table_file = table._v_file
3366        self._table_path = table._v_pathname
3367        self.name = name
3368        """The name of the associated column."""
3369        self.pathname = descr._v_colobjects[name]._v_pathname
3370        """The complete pathname of the associated column (the same as
3371        Column.name if the column is not inside a nested column)."""
3372        self.descr = descr
3373        """The Description (see :ref:`DescriptionClassDescr`) instance of the
3374        parent table or nested column."""
3375
3376    def _g_update_table_location(self, table):
3377        """Updates the location information about the associated `table`."""
3378
3379        self._table_file = table._v_file
3380        self._table_path = table._v_pathname
3381
3382    def __len__(self):
3383        """Get the number of elements in the column.
3384
3385        This matches the length in rows of the parent table.
3386
3387        """
3388
3389        return self.table.nrows
3390
3391    def __getitem__(self, key):
3392        """Get a row or a range of rows from a column.
3393
3394        If key argument is an integer, the corresponding element in the column
3395        is returned as an object of the current flavor.  If key is a slice, the
3396        range of elements determined by it is returned as an array of the
3397        current flavor.
3398
3399        Examples
3400        --------
3401
3402        ::
3403
3404            print("Column handlers:")
3405            for name in table.colnames:
3406                print(table.cols._f_col(name))
3407                print("Select table.cols.name[1]-->", table.cols.name[1])
3408                print("Select table.cols.name[1:2]-->", table.cols.name[1:2])
3409                print("Select table.cols.name[:]-->", table.cols.name[:])
3410                print("Select table.cols._f_col('name')[:]-->",
3411                                                table.cols._f_col('name')[:])
3412
3413        The output of this for a certain arbitrary table is::
3414
3415            Column handlers:
3416            /table.cols.name (Column(), string, idx=None)
3417            /table.cols.lati (Column(), int32, idx=None)
3418            /table.cols.longi (Column(), int32, idx=None)
3419            /table.cols.vector (Column(2,), int32, idx=None)
3420            /table.cols.matrix2D (Column(2, 2), float64, idx=None)
3421            Select table.cols.name[1]--> Particle:     11
3422            Select table.cols.name[1:2]--> ['Particle:     11']
3423            Select table.cols.name[:]--> ['Particle:     10'
3424             'Particle:     11' 'Particle:     12'
3425             'Particle:     13' 'Particle:     14']
3426            Select table.cols._f_col('name')[:]--> ['Particle:     10'
3427             'Particle:     11' 'Particle:     12'
3428             'Particle:     13' 'Particle:     14']
3429
3430        See the :file:`examples/table2.py` file for a more complete example.
3431
3432        """
3433
3434        table = self.table
3435
3436        # Generalized key support not there yet, but at least allow
3437        # for a tuple with one single element (the main dimension).
3438        # (key,) --> key
3439        if isinstance(key, tuple) and len(key) == 1:
3440            key = key[0]
3441
3442        if is_idx(key):
3443            key = operator.index(key)
3444
3445            # Index out of range protection
3446            if key >= table.nrows:
3447                raise IndexError("Index out of range")
3448            if key < 0:
3449                # To support negative values
3450                key += table.nrows
3451            (start, stop, step) = table._process_range(key, key + 1, 1)
3452            return table.read(start, stop, step, self.pathname)[0]
3453        elif isinstance(key, slice):
3454            (start, stop, step) = table._process_range(
3455                key.start, key.stop, key.step)
3456            return table.read(start, stop, step, self.pathname)
3457        else:
3458            raise TypeError(
3459                "'%s' key type is not valid in this context" % key)
3460
3461    def __iter__(self):
3462        """Iterate through all items in the column."""
3463
3464        table = self.table
3465        itemsize = self.dtype.itemsize
3466        nrowsinbuf = table._v_file.params['IO_BUFFER_SIZE'] // itemsize
3467        buf = numpy.empty((nrowsinbuf, ), self._itemtype)
3468        max_row = len(self)
3469        for start_row in range(0, len(self), nrowsinbuf):
3470            end_row = min(start_row + nrowsinbuf, max_row)
3471            buf_slice = buf[0:end_row - start_row]
3472            table.read(start_row, end_row, 1, field=self.pathname,
3473                       out=buf_slice)
3474            for row in buf_slice:
3475                yield row
3476
3477    def __setitem__(self, key, value):
3478        """Set a row or a range of rows in a column.
3479
3480        If key argument is an integer, the corresponding element is set to
3481        value.  If key is a slice, the range of elements determined by it is
3482        set to value.
3483
3484        Examples
3485        --------
3486
3487        ::
3488
3489            # Modify row 1
3490            table.cols.col1[1] = -1
3491
3492            # Modify rows 1 and 3
3493            table.cols.col1[1::2] = [2,3]
3494
3495        Which is equivalent to::
3496
3497            # Modify row 1
3498            table.modify_columns(start=1, columns=[[-1]], names=['col1'])
3499
3500            # Modify rows 1 and 3
3501            columns = numpy.rec.fromarrays([[2,3]], formats='i4')
3502            table.modify_columns(start=1, step=2, columns=columns,
3503                                 names=['col1'])
3504
3505        """
3506
3507        table = self.table
3508        table._v_file._check_writable()
3509
3510        # Generalized key support not there yet, but at least allow
3511        # for a tuple with one single element (the main dimension).
3512        # (key,) --> key
3513        if isinstance(key, tuple) and len(key) == 1:
3514            key = key[0]
3515
3516        if is_idx(key):
3517            key = operator.index(key)
3518
3519            # Index out of range protection
3520            if key >= table.nrows:
3521                raise IndexError("Index out of range")
3522            if key < 0:
3523                # To support negative values
3524                key += table.nrows
3525            return table.modify_column(key, key + 1, 1,
3526                                       [[value]], self.pathname)
3527        elif isinstance(key, slice):
3528            (start, stop, step) = table._process_range(
3529                key.start, key.stop, key.step)
3530            return table.modify_column(start, stop, step,
3531                                       value, self.pathname)
3532        else:
3533            raise ValueError("Non-valid index or slice: %s" % key)
3534
3535    def create_index(self, optlevel=6, kind="medium", filters=None,
3536                     tmp_dir=None, _blocksizes=None, _testmode=False,
3537                     _verbose=False):
3538        """Create an index for this column.
3539
3540        .. warning::
3541
3542            In some situations it is useful to get a completely sorted
3543            index (CSI).  For those cases, it is best to use the
3544            :meth:`Column.create_csindex` method instead.
3545
3546        Parameters
3547        ----------
3548        optlevel : int
3549            The optimization level for building the index.  The levels ranges
3550            from 0 (no optimization) up to 9 (maximum optimization).  Higher
3551            levels of optimization mean better chances for reducing the entropy
3552            of the index at the price of using more CPU, memory and I/O
3553            resources for creating the index.
3554        kind : str
3555            The kind of the index to be built.  It can take the 'ultralight',
3556            'light', 'medium' or 'full' values.  Lighter kinds ('ultralight'
3557            and 'light') mean that the index takes less space on disk, but will
3558            perform queries slower.  Heavier kinds ('medium' and 'full') mean
3559            better chances for reducing the entropy of the index (increasing
3560            the query speed) at the price of using more disk space as well as
3561            more CPU, memory and I/O resources for creating the index.
3562
3563            Note that selecting a full kind with an optlevel of 9 (the maximum)
3564            guarantees the creation of an index with zero entropy, that is, a
3565            completely sorted index (CSI) - provided that the number of rows in
3566            the table does not exceed the 2**48 figure (that is more than 100
3567            trillions of rows).  See :meth:`Column.create_csindex` method for a
3568            more direct way to create a CSI index.
3569        filters : Filters
3570            Specify the Filters instance used to compress the index.  If None,
3571            default index filters will be used (currently, zlib level 1 with
3572            shuffling).
3573        tmp_dir
3574            When kind is other than 'ultralight', a temporary file is created
3575            during the index build process.  You can use the tmp_dir argument
3576            to specify the directory for this temporary file.  The default is
3577            to create it in the same directory as the file containing the
3578            original table.
3579
3580        """
3581
3582        kinds = ['ultralight', 'light', 'medium', 'full']
3583        if kind not in kinds:
3584            raise ValueError("Kind must have any of these values: %s" % kinds)
3585        if (not isinstance(optlevel, int) or
3586                (optlevel < 0 or optlevel > 9)):
3587            raise ValueError("Optimization level must be an integer in the "
3588                             "range 0-9")
3589        if filters is None:
3590            filters = default_index_filters
3591        if tmp_dir is None:
3592            tmp_dir = os.path.dirname(self._table_file.filename)
3593        else:
3594            if not os.path.isdir(tmp_dir):
3595                raise ValueError("Temporary directory '%s' does not exist" %
3596                                 tmp_dir)
3597        if (_blocksizes is not None and
3598                (not isinstance(_blocksizes, tuple) or len(_blocksizes) != 4)):
3599            raise ValueError("_blocksizes must be a tuple with exactly 4 "
3600                             "elements")
3601        idxrows = _column__create_index(self, optlevel, kind, filters,
3602                                        tmp_dir, _blocksizes, _verbose)
3603        return SizeType(idxrows)
3604
3605    def create_csindex(self, filters=None, tmp_dir=None,
3606                       _blocksizes=None, _testmode=False, _verbose=False):
3607        """Create a completely sorted index (CSI) for this column.
3608
3609        This method guarantees the creation of an index with zero entropy, that
3610        is, a completely sorted index (CSI) -- provided that the number of rows
3611        in the table does not exceed the 2**48 figure (that is more than 100
3612        trillions of rows).  A CSI index is needed for some table methods (like
3613        :meth:`Table.itersorted` or :meth:`Table.read_sorted`) in order to
3614        ensure completely sorted results.
3615
3616        For the meaning of filters and tmp_dir arguments see
3617        :meth:`Column.create_index`.
3618
3619        Notes
3620        -----
3621        This method is equivalent to
3622        Column.create_index(optlevel=9, kind='full', ...).
3623
3624        """
3625
3626        return self.create_index(
3627            kind='full', optlevel=9, filters=filters, tmp_dir=tmp_dir,
3628            _blocksizes=_blocksizes, _testmode=_testmode, _verbose=_verbose)
3629
3630    def _do_reindex(self, dirty):
3631        """Common code for reindex() and reindex_dirty() codes."""
3632
3633        index = self.index
3634        dodirty = True
3635        if dirty and not index.dirty:
3636            dodirty = False
3637        if index is not None and dodirty:
3638            self._table_file._check_writable()
3639            # Get the old index parameters
3640            kind = index.kind
3641            optlevel = index.optlevel
3642            filters = index.filters
3643            # We *need* to tell the index that it is going to be undirty.
3644            # This is needed here so as to unnail() the condition cache.
3645            index.dirty = False
3646            # Delete the existing Index
3647            index._f_remove()
3648            # Create a new Index with the previous parameters
3649            return SizeType(self.create_index(
3650                kind=kind, optlevel=optlevel, filters=filters))
3651        else:
3652            return SizeType(0)  # The column is not intended for indexing
3653
3654    def reindex(self):
3655        """Recompute the index associated with this column.
3656
3657        This can be useful when you suspect that, for any reason,
3658        the index information is no longer valid and you want to rebuild it.
3659
3660        This method does nothing if the column is not indexed.
3661
3662        """
3663
3664        self._do_reindex(dirty=False)
3665
3666    def reindex_dirty(self):
3667        """Recompute the associated index only if it is dirty.
3668
3669        This can be useful when you have set :attr:`Table.autoindex` to false
3670        for the table and you want to update the column's index after an
3671        invalidating index operation (like :meth:`Table.remove_rows`).
3672
3673        This method does nothing if the column is not indexed.
3674
3675        """
3676
3677        self._do_reindex(dirty=True)
3678
3679    def remove_index(self):
3680        """Remove the index associated with this column.
3681
3682        This method does nothing if the column is not indexed. The removed
3683        index can be created again by calling the :meth:`Column.create_index`
3684        method.
3685
3686        """
3687
3688        self._table_file._check_writable()
3689
3690        # Remove the index if existing.
3691        if self.is_indexed:
3692            index = self.index
3693            index._f_remove()
3694            self.table._set_column_indexing(self.pathname, False)
3695
3696    def close(self):
3697        """Close this column."""
3698
3699        self.__dict__.clear()
3700
3701    def __str__(self):
3702        """The string representation for this object."""
3703
3704        # The pathname
3705        tablepathname = self._table_path
3706        pathname = self.pathname.replace('/', '.')
3707        # Get this class name
3708        classname = self.__class__.__name__
3709        # The shape for this column
3710        shape = self.shape
3711        # The type
3712        tcol = self.descr._v_types[self.name]
3713        return "%s.cols.%s (%s%s, %s, idx=%s)" % \
3714               (tablepathname, pathname, classname, shape, tcol, self.index)
3715
3716    def __repr__(self):
3717        """A detailed string representation for this object."""
3718
3719        return str(self)
3720
3721
3722## Local Variables:
3723## mode: python
3724## py-indent-offset: 4
3725## tab-width: 4
3726## fill-column: 72
3727## End:
3728