1# -*- coding: utf-8 -*-
2
3########################################################################
4#
5# License: BSD
6# Created: May 26, 2003
7# Author: Francesc Alted - faltet@pytables.com
8#
9# $Id$
10#
11########################################################################
12
13"""Here is defined the AttributeSet class."""
14
15import re
16import sys
17import warnings
18import pickle
19import numpy
20
21from . import hdf5extension
22from .utils import SizeType
23from .registry import class_name_dict
24from .exceptions import ClosedNodeError, PerformanceWarning
25from .path import check_attribute_name
26from .undoredo import attr_to_shadow
27from .filters import Filters
28
29
30
31# System attributes
32SYS_ATTRS = ["CLASS", "VERSION", "TITLE", "NROWS", "EXTDIM",
33             "ENCODING", "PYTABLES_FORMAT_VERSION",
34             "FLAVOR", "FILTERS", "AUTO_INDEX",
35             "DIRTY", "NODE_TYPE", "NODE_TYPE_VERSION",
36             "PSEUDOATOM"]
37# Prefixes of other system attributes
38SYS_ATTRS_PREFIXES = ["FIELD_"]
39# RO_ATTRS will be disabled and let the user modify them if they
40# want to. The user is still not allowed to remove or rename
41# system attributes. Francesc Alted 2004-12-19
42# Read-only attributes:
43# RO_ATTRS = ["CLASS", "FLAVOR", "VERSION", "NROWS", "EXTDIM",
44#             "PYTABLES_FORMAT_VERSION", "FILTERS",
45#             "NODE_TYPE", "NODE_TYPE_VERSION"]
46# RO_ATTRS = []
47
48# The next attributes are not meant to be copied during a Node copy process
49SYS_ATTRS_NOTTOBECOPIED = ["CLASS", "VERSION", "TITLE", "NROWS", "EXTDIM",
50                           "PYTABLES_FORMAT_VERSION", "FILTERS", "ENCODING"]
51# Attributes forced to be copied during node copies
52FORCE_COPY_CLASS = ['CLASS', 'VERSION']
53# Regular expression for column default values.
54_field_fill_re = re.compile('^FIELD_[0-9]+_FILL$')
55# Regular expression for fixing old pickled filters.
56_old_filters_re = re.compile(br'\(([ic])tables\.Leaf\n')
57# Fixed version of the previous string.
58_new_filters_sub = br'(\1tables.filters\n'
59
60
61def issysattrname(name):
62    "Check if a name is a system attribute or not"
63
64    if (name in SYS_ATTRS or
65            numpy.prod([name.startswith(prefix)
66                        for prefix in SYS_ATTRS_PREFIXES])):
67        return True
68    else:
69        return False
70
71
72class AttributeSet(hdf5extension.AttributeSet, object):
73    """Container for the HDF5 attributes of a Node.
74
75    This class provides methods to create new HDF5 node attributes,
76    and to get, rename or delete existing ones.
77
78    Like in Group instances (see :ref:`GroupClassDescr`), AttributeSet
79    instances make use of the *natural naming* convention, i.e. you can
80    access the attributes on disk as if they were normal Python
81    attributes of the AttributeSet instance.
82
83    This offers the user a very convenient way to access HDF5 node
84    attributes. However, for this reason and in order not to pollute the
85    object namespace, one can not assign *normal* attributes to
86    AttributeSet instances, and their members use names which start by
87    special prefixes as happens with Group objects.
88
89    .. rubric:: Notes on native and pickled attributes
90
91    The values of most basic types are saved as HDF5 native data in the
92    HDF5 file.  This includes Python bool, int, float, complex and str
93    (but not long nor unicode) values, as well as their NumPy scalar
94    versions and homogeneous or *structured* NumPy arrays of them.  When
95    read, these values are always loaded as NumPy scalar or array
96    objects, as needed.
97
98    For that reason, attributes in native HDF5 files will be always
99    mapped into NumPy objects.  Specifically, a multidimensional
100    attribute will be mapped into a multidimensional ndarray and a
101    scalar will be mapped into a NumPy scalar object (for example, a
102    scalar H5T_NATIVE_LLONG will be read and returned as a numpy.int64
103    scalar).
104
105    However, other kinds of values are serialized using pickle, so you
106    only will be able to correctly retrieve them using a Python-aware
107    HDF5 library.  Thus, if you want to save Python scalar values and
108    make sure you are able to read them with generic HDF5 tools, you
109    should make use of *scalar or homogeneous/structured array NumPy
110    objects* (for example, numpy.int64(1) or numpy.array([1, 2, 3],
111    dtype='int16')).
112
113    One more advice: because of the various potential difficulties in
114    restoring a Python object stored in an attribute, you may end up
115    getting a pickle string where a Python object is expected. If this
116    is the case, you may wish to run pickle.loads() on that string to
117    get an idea of where things went wrong, as shown in this example::
118
119        >>> import os, tempfile
120        >>> import tables
121        >>>
122        >>> class MyClass(object):
123        ...   foo = 'bar'
124        ...
125        >>> myObject = MyClass()  # save object of custom class in HDF5 attr
126        >>> h5fname = tempfile.mktemp(suffix='.h5')
127        >>> h5f = tables.open_file(h5fname, 'w')
128        >>> h5f.root._v_attrs.obj = myObject  # store the object
129        >>> print(h5f.root._v_attrs.obj.foo)  # retrieve it
130        bar
131        >>> h5f.close()
132        >>>
133        >>> del MyClass, myObject  # delete class of object and reopen file
134        >>> h5f = tables.open_file(h5fname, 'r')
135        >>> print(repr(h5f.root._v_attrs.obj))
136        'ccopy_reg\\n_reconstructor...
137        >>> import pickle  # let's unpickle that to see what went wrong
138        >>> pickle.loads(h5f.root._v_attrs.obj)
139        Traceback (most recent call last):
140        ...
141        AttributeError: 'module' object has no attribute 'MyClass'
142        >>> # So the problem was not in the stored object,
143        ... # but in the *environment* where it was restored.
144        ... h5f.close()
145        >>> os.remove(h5fname)
146
147
148    .. rubric:: Notes on AttributeSet methods
149
150    Note that this class overrides the __getattr__(), __setattr__(),
151    __delattr__() and __dir__() special methods.  This allows you to
152    read, assign or delete attributes on disk by just using the next constructs::
153
154        leaf.attrs.myattr = 'str attr'    # set a string (native support)
155        leaf.attrs.myattr2 = 3            # set an integer (native support)
156        leaf.attrs.myattr3 = [3, (1, 2)]  # a generic object (Pickled)
157        attrib = leaf.attrs.myattr        # get the attribute ``myattr``
158        del leaf.attrs.myattr             # delete the attribute ``myattr``
159
160    In addition, the dictionary-like __getitem__(), __setitem__() and
161    __delitem__() methods are available, so you may write things like
162    this::
163
164        for name in node._v_attrs._f_list():
165            print("name: %s, value: %s" % (name, node._v_attrs[name]))
166
167    Use whatever idiom you prefer to access the attributes.
168
169    Finally, on interactive python sessions you may get autocompletions of
170    attributes named as *valid python identifiers* by pressing the  `[Tab]`
171    key, or to use the dir() global function.
172
173    If an attribute is set on a target node that already has a large
174    number of attributes, a PerformanceWarning will be issued.
175
176
177    .. rubric:: AttributeSet attributes
178
179    .. attribute:: _v_attrnames
180
181        A list with all attribute names.
182
183    .. attribute:: _v_attrnamessys
184
185        A list with system attribute names.
186
187    .. attribute:: _v_attrnamesuser
188
189        A list with user attribute names.
190
191    .. attribute:: _v_unimplemented
192
193        A list of attribute names with unimplemented native HDF5 types.
194
195    """
196
197    def _g_getnode(self):
198        return self._v__nodefile._get_node(self._v__nodepath)
199
200    @property
201    def _v_node(self):
202        """The :class:`Node` instance this attribute set is associated with."""
203        return self._g_getnode()
204
205    def __init__(self, node):
206        """Create the basic structures to keep the attribute information.
207
208        Reads all the HDF5 attributes (if any) on disk for the node "node".
209
210        Parameters
211        ----------
212        node
213            The parent node
214
215        """
216
217        # Refuse to create an instance of an already closed node
218        if not node._v_isopen:
219            raise ClosedNodeError("the node for attribute set is closed")
220
221        dict_ = self.__dict__
222
223        self._g_new(node)
224        dict_["_v__nodefile"] = node._v_file
225        dict_["_v__nodepath"] = node._v_pathname
226        dict_["_v_attrnames"] = self._g_list_attr(node)
227        # The list of unimplemented attribute names
228        dict_["_v_unimplemented"] = []
229
230        # Get the file version format. This is an optimization
231        # in order to avoid accessing it too much.
232        try:
233            format_version = node._v_file.format_version
234        except AttributeError:
235            parsed_version = None
236        else:
237            if format_version == 'unknown':
238                parsed_version = None
239            else:
240                parsed_version = tuple(map(int, format_version.split('.')))
241        dict_["_v__format_version"] = parsed_version
242        # Split the attribute list in system and user lists
243        dict_["_v_attrnamessys"] = []
244        dict_["_v_attrnamesuser"] = []
245        for attr in self._v_attrnames:
246            # put the attributes on the local dictionary to allow
247            # tab-completion
248            self.__getattr__(attr)
249            if issysattrname(attr):
250                self._v_attrnamessys.append(attr)
251            else:
252                self._v_attrnamesuser.append(attr)
253
254        # Sort the attributes
255        self._v_attrnames.sort()
256        self._v_attrnamessys.sort()
257        self._v_attrnamesuser.sort()
258
259    def _g_update_node_location(self, node):
260        """Updates the location information about the associated `node`."""
261
262        dict_ = self.__dict__
263        dict_['_v__nodefile'] = node._v_file
264        dict_['_v__nodepath'] = node._v_pathname
265        # hdf5extension operations:
266        self._g_new(node)
267
268
269    def _f_list(self, attrset='user'):
270        """Get a list of attribute names.
271
272        The attrset string selects the attribute set to be used.  A
273        'user' value returns only user attributes (this is the default).
274        A 'sys' value returns only system attributes.  Finally, 'all'
275        returns both system and user attributes.
276
277        """
278
279        if attrset == "user":
280            return self._v_attrnamesuser[:]
281        elif attrset == "sys":
282            return self._v_attrnamessys[:]
283        elif attrset == "all":
284            return self._v_attrnames[:]
285
286    def __dir__(self):
287        """Autocomplete only children named as valid python identifiers.
288
289        Only PY3 supports this special method.
290        """
291        return list(set(c for c in
292                    super(AttributeSet, self).__dir__() + self._v_attrnames
293                    if c.isidentifier()))
294
295    def __getattr__(self, name):
296        """Get the attribute named "name"."""
297
298        # If attribute does not exist, raise AttributeError
299        if not name in self._v_attrnames:
300            raise AttributeError("Attribute '%s' does not exist in node: "
301                                 "'%s'" % (name, self._v__nodepath))
302
303        # Read the attribute from disk. This is an optimization to read
304        # quickly system attributes that are _string_ values, but it
305        # takes care of other types as well as for example NROWS for
306        # Tables and EXTDIM for EArrays
307        format_version = self._v__format_version
308        value = self._g_getattr(self._v_node, name)
309
310        # Check whether the value is pickled
311        # Pickled values always seems to end with a "."
312        maybe_pickled = (
313            isinstance(value, numpy.generic) and  # NumPy scalar?
314            value.dtype.type == numpy.bytes_ and  # string type?
315            value.itemsize > 0 and value.endswith(b'.'))
316
317        if (maybe_pickled and value in [b"0", b"0."]):
318            # Workaround for a bug in many versions of Python (starting
319            # somewhere after Python 2.6.1).  See ticket #253.
320            retval = value
321        elif (maybe_pickled and _field_fill_re.match(name)
322              and format_version == (1, 5)):
323            # This format was used during the first 1.2 releases, just
324            # for string defaults.
325            try:
326                retval = pickle.loads(value)
327                retval = numpy.array(retval)
328            except ImportError:
329                retval = None  # signal error avoiding exception
330        elif maybe_pickled and name == 'FILTERS' and format_version is not None and format_version < (2, 0):
331            # This is a big hack, but we don't have other way to recognize
332            # pickled filters of PyTables 1.x files.
333            value = _old_filters_re.sub(_new_filters_sub, value, 1)
334            retval = pickle.loads(value)  # pass unpickling errors through
335        elif maybe_pickled:
336            try:
337                retval = pickle.loads(value)
338            # except cPickle.UnpicklingError:
339            # It seems that pickle may raise other errors than UnpicklingError
340            # Perhaps it would be better just an "except:" clause?
341            # except (cPickle.UnpicklingError, ImportError):
342            # Definitely (see SF bug #1254636)
343            except UnicodeDecodeError:
344                # Object maybe pickled on python 2 and unpickled on python 3.
345                # encoding='bytes' was added in python 3.4 to resolve this.
346                # However 'bytes' mangles class attributes as they are
347                # unplicked as bytestrings. Hence try 'latin1' first.
348                # Ref: http://bugs.python.org/issue6784
349                try:
350                    retval = pickle.loads(value, encoding='latin1')
351                except TypeError:
352                    try:
353                        retval = pickle.loads(value, encoding='bytes')
354                    except:
355                        retval = value
356                except:
357                    retval = value
358            except:
359                # catch other unpickling errors:
360                # ivb (2005-09-07): It is too hard to tell
361                # whether the unpickling failed
362                # because of the string not being a pickle one at all,
363                # because of a malformed pickle string,
364                # or because of some other problem in object reconstruction,
365                # thus making inconvenient even the issuing of a warning here.
366                # The documentation contains a note on this issue,
367                # explaining how the user can tell where the problem was.
368                retval = value
369            # Additional check for allowing a workaround for #307
370            if isinstance(retval, str) and retval == u'':
371                retval = numpy.array(retval)[()]
372        elif name == 'FILTERS' and format_version is not None and format_version >= (2, 0):
373            retval = Filters._unpack(value)
374        elif name == 'TITLE' and not isinstance(value, str):
375            retval = value.decode('utf-8')
376        elif (issysattrname(name) and isinstance(value, (bytes, str)) and
377              not isinstance(value, str) and not _field_fill_re.match(name)):
378            # system attributes should always be str
379            # python 3, bytes and not "FIELD_[0-9]+_FILL"
380            retval = value.decode('utf-8')
381        else:
382            retval = value
383
384        # Put this value in local directory
385        self.__dict__[name] = retval
386        return retval
387
388    def _g__setattr(self, name, value):
389        """Set a PyTables attribute.
390
391        Sets a (maybe new) PyTables attribute with the specified `name`
392        and `value`.  If the attribute already exists, it is simply
393        replaced.
394
395        It does not log the change.
396
397        """
398
399        # Save this attribute to disk
400        # (overwriting an existing one if needed)
401        stvalue = value
402        if issysattrname(name):
403            if name in ["EXTDIM", "AUTO_INDEX", "DIRTY", "NODE_TYPE_VERSION"]:
404                stvalue = numpy.array(value, dtype=numpy.int32)
405                value = stvalue[()]
406            elif name == "NROWS":
407                stvalue = numpy.array(value, dtype=SizeType)
408                value = stvalue[()]
409            elif name == "FILTERS" and self._v__format_version is not None and self._v__format_version >= (2, 0):
410                stvalue = value._pack()
411                # value will remain as a Filters instance here
412        # Convert value from a Python scalar into a NumPy scalar
413        # (only in case it has not been converted yet)
414        # Fixes ticket #59
415        if (stvalue is value and
416                type(value) in (bool, bytes, int, float, complex, str,
417                                numpy.unicode_)):
418            # Additional check for allowing a workaround for #307
419            if isinstance(value, str) and len(value) == 0:
420                stvalue = numpy.array(u'')
421            else:
422                stvalue = numpy.array(value)
423            value = stvalue[()]
424
425        self._g_setattr(self._v_node, name, stvalue)
426
427        # New attribute or value. Introduce it into the local
428        # directory
429        self.__dict__[name] = value
430
431        # Finally, add this attribute to the list if not present
432        attrnames = self._v_attrnames
433        if not name in attrnames:
434            attrnames.append(name)
435            attrnames.sort()
436            if issysattrname(name):
437                attrnamessys = self._v_attrnamessys
438                attrnamessys.append(name)
439                attrnamessys.sort()
440            else:
441                attrnamesuser = self._v_attrnamesuser
442                attrnamesuser.append(name)
443                attrnamesuser.sort()
444
445    def __setattr__(self, name, value):
446        """Set a PyTables attribute.
447
448        Sets a (maybe new) PyTables attribute with the specified `name`
449        and `value`.  If the attribute already exists, it is simply
450        replaced.
451
452        A ``ValueError`` is raised when the name starts with a reserved
453        prefix or contains a ``/``.  A `NaturalNameWarning` is issued if
454        the name is not a valid Python identifier.  A
455        `PerformanceWarning` is issued when the recommended maximum
456        number of attributes in a node is going to be exceeded.
457
458        """
459
460        nodefile = self._v__nodefile
461        attrnames = self._v_attrnames
462
463        # Check for name validity
464        check_attribute_name(name)
465
466        nodefile._check_writable()
467
468        # Check if there are too many attributes.
469        max_node_attrs = nodefile.params['MAX_NODE_ATTRS']
470        if len(attrnames) >= max_node_attrs:
471            warnings.warn("""\
472node ``%s`` is exceeding the recommended maximum number of attributes (%d);\
473be ready to see PyTables asking for *lots* of memory and possibly slow I/O"""
474                          % (self._v__nodepath, max_node_attrs),
475                          PerformanceWarning)
476
477        undo_enabled = nodefile.is_undo_enabled()
478        # Log old attribute removal (if any).
479        if undo_enabled and (name in attrnames):
480            self._g_del_and_log(name)
481
482        # Set the attribute.
483        self._g__setattr(name, value)
484
485        # Log new attribute addition.
486        if undo_enabled:
487            self._g_log_add(name)
488
489    def _g_log_add(self, name):
490        self._v__nodefile._log('ADDATTR', self._v__nodepath, name)
491
492
493    def _g_del_and_log(self, name):
494        nodefile = self._v__nodefile
495        node_pathname = self._v__nodepath
496        # Log *before* moving to use the right shadow name.
497        nodefile._log('DELATTR', node_pathname, name)
498        attr_to_shadow(nodefile, node_pathname, name)
499
500
501    def _g__delattr(self, name):
502        """Delete a PyTables attribute.
503
504        Deletes the specified existing PyTables attribute.
505
506        It does not log the change.
507
508        """
509
510        # Delete the attribute from disk
511        self._g_remove(self._v_node, name)
512
513        # Delete the attribute from local lists
514        self._v_attrnames.remove(name)
515        if name in self._v_attrnamessys:
516            self._v_attrnamessys.remove(name)
517        else:
518            self._v_attrnamesuser.remove(name)
519
520        # Delete the attribute from the local directory
521        # closes (#1049285)
522        del self.__dict__[name]
523
524    def __delattr__(self, name):
525        """Delete a PyTables attribute.
526
527        Deletes the specified existing PyTables attribute from the
528        attribute set.  If a nonexistent or system attribute is
529        specified, an ``AttributeError`` is raised.
530
531        """
532
533        nodefile = self._v__nodefile
534
535        # Check if attribute exists
536        if name not in self._v_attrnames:
537            raise AttributeError(
538                "Attribute ('%s') does not exist in node '%s'"
539                % (name, self._v__nodepath))
540
541        nodefile._check_writable()
542
543        # Remove the PyTables attribute or move it to shadow.
544        if nodefile.is_undo_enabled():
545            self._g_del_and_log(name)
546        else:
547            self._g__delattr(name)
548
549    def __getitem__(self, name):
550        """The dictionary like interface for __getattr__()."""
551
552        try:
553            return self.__getattr__(name)
554        except AttributeError:
555            # Capture the AttributeError an re-raise a KeyError one
556            raise KeyError(
557                "Attribute ('%s') does not exist in node '%s'"
558                % (name, self._v__nodepath))
559
560    def __setitem__(self, name, value):
561        """The dictionary like interface for __setattr__()."""
562
563        self.__setattr__(name, value)
564
565    def __delitem__(self, name):
566        """The dictionary like interface for __delattr__()."""
567
568        try:
569            self.__delattr__(name)
570        except AttributeError:
571            # Capture the AttributeError an re-raise a KeyError one
572            raise KeyError(
573                "Attribute ('%s') does not exist in node '%s'"
574                % (name, self._v__nodepath))
575
576    def __contains__(self, name):
577        """Is there an attribute with that name?
578
579        A true value is returned if the attribute set has an attribute
580        with the given name, false otherwise.
581
582        """
583
584        return name in self._v_attrnames
585
586    def _f_rename(self, oldattrname, newattrname):
587        """Rename an attribute from oldattrname to newattrname."""
588
589        if oldattrname == newattrname:
590            # Do nothing
591            return
592
593        # First, fetch the value of the oldattrname
594        attrvalue = getattr(self, oldattrname)
595
596        # Now, create the new attribute
597        setattr(self, newattrname, attrvalue)
598
599        # Finally, remove the old attribute
600        delattr(self, oldattrname)
601
602    def _g_copy(self, newset, set_attr=None, copyclass=False):
603        """Copy set attributes.
604
605        Copies all user and allowed system PyTables attributes to the
606        given attribute set, replacing the existing ones.
607
608        You can specify a *bound* method of the destination set that
609        will be used to set its attributes.  Else, its `_g__setattr`
610        method will be used.
611
612        Changes are logged depending on the chosen setting method.  The
613        default setting method does not log anything.
614
615        .. versionchanged:: 3.0
616           The *newSet* parameter has been renamed into *newset*.
617
618        .. versionchanged:: 3.0
619           The *copyClass* parameter has been renamed into *copyclass*.
620
621        """
622
623        copysysattrs = newset._v__nodefile.params['PYTABLES_SYS_ATTRS']
624        if set_attr is None:
625            set_attr = newset._g__setattr
626
627        for attrname in self._v_attrnamesuser:
628            # Do not copy the unimplemented attributes.
629            if attrname not in self._v_unimplemented:
630                set_attr(attrname, getattr(self, attrname))
631        # Copy the system attributes that we are allowed to.
632        if copysysattrs:
633            for attrname in self._v_attrnamessys:
634                if ((attrname not in SYS_ATTRS_NOTTOBECOPIED) and
635                    # Do not copy the FIELD_ attributes in tables as this can
636                    # be really *slow* (don't know exactly the reason).
637                    # See #304.
638                        not attrname.startswith("FIELD_")):
639                    set_attr(attrname, getattr(self, attrname))
640            # Copy CLASS and VERSION attributes if requested
641            if copyclass:
642                for attrname in FORCE_COPY_CLASS:
643                    if attrname in self._v_attrnamessys:
644                        set_attr(attrname, getattr(self, attrname))
645
646    def _f_copy(self, where):
647        """Copy attributes to the where node.
648
649        Copies all user and certain system attributes to the given where
650        node (a Node instance - see :ref:`NodeClassDescr`), replacing
651        the existing ones.
652
653        """
654
655        # AttributeSet must be defined in order to define a Node.
656        # However, we need to know Node here.
657        # Using class_name_dict avoids a circular import.
658        if not isinstance(where, class_name_dict['Node']):
659            raise TypeError("destination object is not a node: %r" % (where,))
660        self._g_copy(where._v_attrs, where._v_attrs.__setattr__)
661
662    def _g_close(self):
663        # Nothing will be done here, as the existing instance is completely
664        # operative now.
665        pass
666
667    def __str__(self):
668        """The string representation for this object."""
669
670        # The pathname
671        pathname = self._v__nodepath
672        # Get this class name
673        classname = self.__class__.__name__
674        # The attribute names
675        attrnumber = len([n for n in self._v_attrnames])
676        return "%s._v_attrs (%s), %s attributes" % \
677               (pathname, classname, attrnumber)
678
679    def __repr__(self):
680        """A detailed string representation for this object."""
681
682        # print additional info only if there are attributes to show
683        attrnames = [n for n in self._v_attrnames]
684        if len(attrnames):
685            rep = ['%s := %r' % (attr, getattr(self, attr))
686                   for attr in attrnames]
687            attrlist = '[%s]' % (',\n    '.join(rep))
688
689            return "%s:\n   %s" % (str(self), attrlist)
690        else:
691            return str(self)
692
693
694class NotLoggedAttributeSet(AttributeSet):
695    def _g_log_add(self, name):
696        pass
697
698
699    def _g_del_and_log(self, name):
700        self._g__delattr(name)
701
702
703## Local Variables:
704## mode: python
705## py-indent-offset: 4
706## tab-width: 4
707## fill-column: 72
708## End:
709