1# -*- coding: utf-8 -*- 2 3######################################################################## 4# 5# License: BSD 6# Created: May 26, 2003 7# Author: Francesc Alted - faltet@pytables.com 8# 9# $Id$ 10# 11######################################################################## 12 13"""Here is defined the AttributeSet class.""" 14 15import re 16import sys 17import warnings 18import pickle 19import numpy 20 21from . import hdf5extension 22from .utils import SizeType 23from .registry import class_name_dict 24from .exceptions import ClosedNodeError, PerformanceWarning 25from .path import check_attribute_name 26from .undoredo import attr_to_shadow 27from .filters import Filters 28 29 30 31# System attributes 32SYS_ATTRS = ["CLASS", "VERSION", "TITLE", "NROWS", "EXTDIM", 33 "ENCODING", "PYTABLES_FORMAT_VERSION", 34 "FLAVOR", "FILTERS", "AUTO_INDEX", 35 "DIRTY", "NODE_TYPE", "NODE_TYPE_VERSION", 36 "PSEUDOATOM"] 37# Prefixes of other system attributes 38SYS_ATTRS_PREFIXES = ["FIELD_"] 39# RO_ATTRS will be disabled and let the user modify them if they 40# want to. The user is still not allowed to remove or rename 41# system attributes. Francesc Alted 2004-12-19 42# Read-only attributes: 43# RO_ATTRS = ["CLASS", "FLAVOR", "VERSION", "NROWS", "EXTDIM", 44# "PYTABLES_FORMAT_VERSION", "FILTERS", 45# "NODE_TYPE", "NODE_TYPE_VERSION"] 46# RO_ATTRS = [] 47 48# The next attributes are not meant to be copied during a Node copy process 49SYS_ATTRS_NOTTOBECOPIED = ["CLASS", "VERSION", "TITLE", "NROWS", "EXTDIM", 50 "PYTABLES_FORMAT_VERSION", "FILTERS", "ENCODING"] 51# Attributes forced to be copied during node copies 52FORCE_COPY_CLASS = ['CLASS', 'VERSION'] 53# Regular expression for column default values. 54_field_fill_re = re.compile('^FIELD_[0-9]+_FILL$') 55# Regular expression for fixing old pickled filters. 56_old_filters_re = re.compile(br'\(([ic])tables\.Leaf\n') 57# Fixed version of the previous string. 58_new_filters_sub = br'(\1tables.filters\n' 59 60 61def issysattrname(name): 62 "Check if a name is a system attribute or not" 63 64 if (name in SYS_ATTRS or 65 numpy.prod([name.startswith(prefix) 66 for prefix in SYS_ATTRS_PREFIXES])): 67 return True 68 else: 69 return False 70 71 72class AttributeSet(hdf5extension.AttributeSet, object): 73 """Container for the HDF5 attributes of a Node. 74 75 This class provides methods to create new HDF5 node attributes, 76 and to get, rename or delete existing ones. 77 78 Like in Group instances (see :ref:`GroupClassDescr`), AttributeSet 79 instances make use of the *natural naming* convention, i.e. you can 80 access the attributes on disk as if they were normal Python 81 attributes of the AttributeSet instance. 82 83 This offers the user a very convenient way to access HDF5 node 84 attributes. However, for this reason and in order not to pollute the 85 object namespace, one can not assign *normal* attributes to 86 AttributeSet instances, and their members use names which start by 87 special prefixes as happens with Group objects. 88 89 .. rubric:: Notes on native and pickled attributes 90 91 The values of most basic types are saved as HDF5 native data in the 92 HDF5 file. This includes Python bool, int, float, complex and str 93 (but not long nor unicode) values, as well as their NumPy scalar 94 versions and homogeneous or *structured* NumPy arrays of them. When 95 read, these values are always loaded as NumPy scalar or array 96 objects, as needed. 97 98 For that reason, attributes in native HDF5 files will be always 99 mapped into NumPy objects. Specifically, a multidimensional 100 attribute will be mapped into a multidimensional ndarray and a 101 scalar will be mapped into a NumPy scalar object (for example, a 102 scalar H5T_NATIVE_LLONG will be read and returned as a numpy.int64 103 scalar). 104 105 However, other kinds of values are serialized using pickle, so you 106 only will be able to correctly retrieve them using a Python-aware 107 HDF5 library. Thus, if you want to save Python scalar values and 108 make sure you are able to read them with generic HDF5 tools, you 109 should make use of *scalar or homogeneous/structured array NumPy 110 objects* (for example, numpy.int64(1) or numpy.array([1, 2, 3], 111 dtype='int16')). 112 113 One more advice: because of the various potential difficulties in 114 restoring a Python object stored in an attribute, you may end up 115 getting a pickle string where a Python object is expected. If this 116 is the case, you may wish to run pickle.loads() on that string to 117 get an idea of where things went wrong, as shown in this example:: 118 119 >>> import os, tempfile 120 >>> import tables 121 >>> 122 >>> class MyClass(object): 123 ... foo = 'bar' 124 ... 125 >>> myObject = MyClass() # save object of custom class in HDF5 attr 126 >>> h5fname = tempfile.mktemp(suffix='.h5') 127 >>> h5f = tables.open_file(h5fname, 'w') 128 >>> h5f.root._v_attrs.obj = myObject # store the object 129 >>> print(h5f.root._v_attrs.obj.foo) # retrieve it 130 bar 131 >>> h5f.close() 132 >>> 133 >>> del MyClass, myObject # delete class of object and reopen file 134 >>> h5f = tables.open_file(h5fname, 'r') 135 >>> print(repr(h5f.root._v_attrs.obj)) 136 'ccopy_reg\\n_reconstructor... 137 >>> import pickle # let's unpickle that to see what went wrong 138 >>> pickle.loads(h5f.root._v_attrs.obj) 139 Traceback (most recent call last): 140 ... 141 AttributeError: 'module' object has no attribute 'MyClass' 142 >>> # So the problem was not in the stored object, 143 ... # but in the *environment* where it was restored. 144 ... h5f.close() 145 >>> os.remove(h5fname) 146 147 148 .. rubric:: Notes on AttributeSet methods 149 150 Note that this class overrides the __getattr__(), __setattr__(), 151 __delattr__() and __dir__() special methods. This allows you to 152 read, assign or delete attributes on disk by just using the next constructs:: 153 154 leaf.attrs.myattr = 'str attr' # set a string (native support) 155 leaf.attrs.myattr2 = 3 # set an integer (native support) 156 leaf.attrs.myattr3 = [3, (1, 2)] # a generic object (Pickled) 157 attrib = leaf.attrs.myattr # get the attribute ``myattr`` 158 del leaf.attrs.myattr # delete the attribute ``myattr`` 159 160 In addition, the dictionary-like __getitem__(), __setitem__() and 161 __delitem__() methods are available, so you may write things like 162 this:: 163 164 for name in node._v_attrs._f_list(): 165 print("name: %s, value: %s" % (name, node._v_attrs[name])) 166 167 Use whatever idiom you prefer to access the attributes. 168 169 Finally, on interactive python sessions you may get autocompletions of 170 attributes named as *valid python identifiers* by pressing the `[Tab]` 171 key, or to use the dir() global function. 172 173 If an attribute is set on a target node that already has a large 174 number of attributes, a PerformanceWarning will be issued. 175 176 177 .. rubric:: AttributeSet attributes 178 179 .. attribute:: _v_attrnames 180 181 A list with all attribute names. 182 183 .. attribute:: _v_attrnamessys 184 185 A list with system attribute names. 186 187 .. attribute:: _v_attrnamesuser 188 189 A list with user attribute names. 190 191 .. attribute:: _v_unimplemented 192 193 A list of attribute names with unimplemented native HDF5 types. 194 195 """ 196 197 def _g_getnode(self): 198 return self._v__nodefile._get_node(self._v__nodepath) 199 200 @property 201 def _v_node(self): 202 """The :class:`Node` instance this attribute set is associated with.""" 203 return self._g_getnode() 204 205 def __init__(self, node): 206 """Create the basic structures to keep the attribute information. 207 208 Reads all the HDF5 attributes (if any) on disk for the node "node". 209 210 Parameters 211 ---------- 212 node 213 The parent node 214 215 """ 216 217 # Refuse to create an instance of an already closed node 218 if not node._v_isopen: 219 raise ClosedNodeError("the node for attribute set is closed") 220 221 dict_ = self.__dict__ 222 223 self._g_new(node) 224 dict_["_v__nodefile"] = node._v_file 225 dict_["_v__nodepath"] = node._v_pathname 226 dict_["_v_attrnames"] = self._g_list_attr(node) 227 # The list of unimplemented attribute names 228 dict_["_v_unimplemented"] = [] 229 230 # Get the file version format. This is an optimization 231 # in order to avoid accessing it too much. 232 try: 233 format_version = node._v_file.format_version 234 except AttributeError: 235 parsed_version = None 236 else: 237 if format_version == 'unknown': 238 parsed_version = None 239 else: 240 parsed_version = tuple(map(int, format_version.split('.'))) 241 dict_["_v__format_version"] = parsed_version 242 # Split the attribute list in system and user lists 243 dict_["_v_attrnamessys"] = [] 244 dict_["_v_attrnamesuser"] = [] 245 for attr in self._v_attrnames: 246 # put the attributes on the local dictionary to allow 247 # tab-completion 248 self.__getattr__(attr) 249 if issysattrname(attr): 250 self._v_attrnamessys.append(attr) 251 else: 252 self._v_attrnamesuser.append(attr) 253 254 # Sort the attributes 255 self._v_attrnames.sort() 256 self._v_attrnamessys.sort() 257 self._v_attrnamesuser.sort() 258 259 def _g_update_node_location(self, node): 260 """Updates the location information about the associated `node`.""" 261 262 dict_ = self.__dict__ 263 dict_['_v__nodefile'] = node._v_file 264 dict_['_v__nodepath'] = node._v_pathname 265 # hdf5extension operations: 266 self._g_new(node) 267 268 269 def _f_list(self, attrset='user'): 270 """Get a list of attribute names. 271 272 The attrset string selects the attribute set to be used. A 273 'user' value returns only user attributes (this is the default). 274 A 'sys' value returns only system attributes. Finally, 'all' 275 returns both system and user attributes. 276 277 """ 278 279 if attrset == "user": 280 return self._v_attrnamesuser[:] 281 elif attrset == "sys": 282 return self._v_attrnamessys[:] 283 elif attrset == "all": 284 return self._v_attrnames[:] 285 286 def __dir__(self): 287 """Autocomplete only children named as valid python identifiers. 288 289 Only PY3 supports this special method. 290 """ 291 return list(set(c for c in 292 super(AttributeSet, self).__dir__() + self._v_attrnames 293 if c.isidentifier())) 294 295 def __getattr__(self, name): 296 """Get the attribute named "name".""" 297 298 # If attribute does not exist, raise AttributeError 299 if not name in self._v_attrnames: 300 raise AttributeError("Attribute '%s' does not exist in node: " 301 "'%s'" % (name, self._v__nodepath)) 302 303 # Read the attribute from disk. This is an optimization to read 304 # quickly system attributes that are _string_ values, but it 305 # takes care of other types as well as for example NROWS for 306 # Tables and EXTDIM for EArrays 307 format_version = self._v__format_version 308 value = self._g_getattr(self._v_node, name) 309 310 # Check whether the value is pickled 311 # Pickled values always seems to end with a "." 312 maybe_pickled = ( 313 isinstance(value, numpy.generic) and # NumPy scalar? 314 value.dtype.type == numpy.bytes_ and # string type? 315 value.itemsize > 0 and value.endswith(b'.')) 316 317 if (maybe_pickled and value in [b"0", b"0."]): 318 # Workaround for a bug in many versions of Python (starting 319 # somewhere after Python 2.6.1). See ticket #253. 320 retval = value 321 elif (maybe_pickled and _field_fill_re.match(name) 322 and format_version == (1, 5)): 323 # This format was used during the first 1.2 releases, just 324 # for string defaults. 325 try: 326 retval = pickle.loads(value) 327 retval = numpy.array(retval) 328 except ImportError: 329 retval = None # signal error avoiding exception 330 elif maybe_pickled and name == 'FILTERS' and format_version is not None and format_version < (2, 0): 331 # This is a big hack, but we don't have other way to recognize 332 # pickled filters of PyTables 1.x files. 333 value = _old_filters_re.sub(_new_filters_sub, value, 1) 334 retval = pickle.loads(value) # pass unpickling errors through 335 elif maybe_pickled: 336 try: 337 retval = pickle.loads(value) 338 # except cPickle.UnpicklingError: 339 # It seems that pickle may raise other errors than UnpicklingError 340 # Perhaps it would be better just an "except:" clause? 341 # except (cPickle.UnpicklingError, ImportError): 342 # Definitely (see SF bug #1254636) 343 except UnicodeDecodeError: 344 # Object maybe pickled on python 2 and unpickled on python 3. 345 # encoding='bytes' was added in python 3.4 to resolve this. 346 # However 'bytes' mangles class attributes as they are 347 # unplicked as bytestrings. Hence try 'latin1' first. 348 # Ref: http://bugs.python.org/issue6784 349 try: 350 retval = pickle.loads(value, encoding='latin1') 351 except TypeError: 352 try: 353 retval = pickle.loads(value, encoding='bytes') 354 except: 355 retval = value 356 except: 357 retval = value 358 except: 359 # catch other unpickling errors: 360 # ivb (2005-09-07): It is too hard to tell 361 # whether the unpickling failed 362 # because of the string not being a pickle one at all, 363 # because of a malformed pickle string, 364 # or because of some other problem in object reconstruction, 365 # thus making inconvenient even the issuing of a warning here. 366 # The documentation contains a note on this issue, 367 # explaining how the user can tell where the problem was. 368 retval = value 369 # Additional check for allowing a workaround for #307 370 if isinstance(retval, str) and retval == u'': 371 retval = numpy.array(retval)[()] 372 elif name == 'FILTERS' and format_version is not None and format_version >= (2, 0): 373 retval = Filters._unpack(value) 374 elif name == 'TITLE' and not isinstance(value, str): 375 retval = value.decode('utf-8') 376 elif (issysattrname(name) and isinstance(value, (bytes, str)) and 377 not isinstance(value, str) and not _field_fill_re.match(name)): 378 # system attributes should always be str 379 # python 3, bytes and not "FIELD_[0-9]+_FILL" 380 retval = value.decode('utf-8') 381 else: 382 retval = value 383 384 # Put this value in local directory 385 self.__dict__[name] = retval 386 return retval 387 388 def _g__setattr(self, name, value): 389 """Set a PyTables attribute. 390 391 Sets a (maybe new) PyTables attribute with the specified `name` 392 and `value`. If the attribute already exists, it is simply 393 replaced. 394 395 It does not log the change. 396 397 """ 398 399 # Save this attribute to disk 400 # (overwriting an existing one if needed) 401 stvalue = value 402 if issysattrname(name): 403 if name in ["EXTDIM", "AUTO_INDEX", "DIRTY", "NODE_TYPE_VERSION"]: 404 stvalue = numpy.array(value, dtype=numpy.int32) 405 value = stvalue[()] 406 elif name == "NROWS": 407 stvalue = numpy.array(value, dtype=SizeType) 408 value = stvalue[()] 409 elif name == "FILTERS" and self._v__format_version is not None and self._v__format_version >= (2, 0): 410 stvalue = value._pack() 411 # value will remain as a Filters instance here 412 # Convert value from a Python scalar into a NumPy scalar 413 # (only in case it has not been converted yet) 414 # Fixes ticket #59 415 if (stvalue is value and 416 type(value) in (bool, bytes, int, float, complex, str, 417 numpy.unicode_)): 418 # Additional check for allowing a workaround for #307 419 if isinstance(value, str) and len(value) == 0: 420 stvalue = numpy.array(u'') 421 else: 422 stvalue = numpy.array(value) 423 value = stvalue[()] 424 425 self._g_setattr(self._v_node, name, stvalue) 426 427 # New attribute or value. Introduce it into the local 428 # directory 429 self.__dict__[name] = value 430 431 # Finally, add this attribute to the list if not present 432 attrnames = self._v_attrnames 433 if not name in attrnames: 434 attrnames.append(name) 435 attrnames.sort() 436 if issysattrname(name): 437 attrnamessys = self._v_attrnamessys 438 attrnamessys.append(name) 439 attrnamessys.sort() 440 else: 441 attrnamesuser = self._v_attrnamesuser 442 attrnamesuser.append(name) 443 attrnamesuser.sort() 444 445 def __setattr__(self, name, value): 446 """Set a PyTables attribute. 447 448 Sets a (maybe new) PyTables attribute with the specified `name` 449 and `value`. If the attribute already exists, it is simply 450 replaced. 451 452 A ``ValueError`` is raised when the name starts with a reserved 453 prefix or contains a ``/``. A `NaturalNameWarning` is issued if 454 the name is not a valid Python identifier. A 455 `PerformanceWarning` is issued when the recommended maximum 456 number of attributes in a node is going to be exceeded. 457 458 """ 459 460 nodefile = self._v__nodefile 461 attrnames = self._v_attrnames 462 463 # Check for name validity 464 check_attribute_name(name) 465 466 nodefile._check_writable() 467 468 # Check if there are too many attributes. 469 max_node_attrs = nodefile.params['MAX_NODE_ATTRS'] 470 if len(attrnames) >= max_node_attrs: 471 warnings.warn("""\ 472node ``%s`` is exceeding the recommended maximum number of attributes (%d);\ 473be ready to see PyTables asking for *lots* of memory and possibly slow I/O""" 474 % (self._v__nodepath, max_node_attrs), 475 PerformanceWarning) 476 477 undo_enabled = nodefile.is_undo_enabled() 478 # Log old attribute removal (if any). 479 if undo_enabled and (name in attrnames): 480 self._g_del_and_log(name) 481 482 # Set the attribute. 483 self._g__setattr(name, value) 484 485 # Log new attribute addition. 486 if undo_enabled: 487 self._g_log_add(name) 488 489 def _g_log_add(self, name): 490 self._v__nodefile._log('ADDATTR', self._v__nodepath, name) 491 492 493 def _g_del_and_log(self, name): 494 nodefile = self._v__nodefile 495 node_pathname = self._v__nodepath 496 # Log *before* moving to use the right shadow name. 497 nodefile._log('DELATTR', node_pathname, name) 498 attr_to_shadow(nodefile, node_pathname, name) 499 500 501 def _g__delattr(self, name): 502 """Delete a PyTables attribute. 503 504 Deletes the specified existing PyTables attribute. 505 506 It does not log the change. 507 508 """ 509 510 # Delete the attribute from disk 511 self._g_remove(self._v_node, name) 512 513 # Delete the attribute from local lists 514 self._v_attrnames.remove(name) 515 if name in self._v_attrnamessys: 516 self._v_attrnamessys.remove(name) 517 else: 518 self._v_attrnamesuser.remove(name) 519 520 # Delete the attribute from the local directory 521 # closes (#1049285) 522 del self.__dict__[name] 523 524 def __delattr__(self, name): 525 """Delete a PyTables attribute. 526 527 Deletes the specified existing PyTables attribute from the 528 attribute set. If a nonexistent or system attribute is 529 specified, an ``AttributeError`` is raised. 530 531 """ 532 533 nodefile = self._v__nodefile 534 535 # Check if attribute exists 536 if name not in self._v_attrnames: 537 raise AttributeError( 538 "Attribute ('%s') does not exist in node '%s'" 539 % (name, self._v__nodepath)) 540 541 nodefile._check_writable() 542 543 # Remove the PyTables attribute or move it to shadow. 544 if nodefile.is_undo_enabled(): 545 self._g_del_and_log(name) 546 else: 547 self._g__delattr(name) 548 549 def __getitem__(self, name): 550 """The dictionary like interface for __getattr__().""" 551 552 try: 553 return self.__getattr__(name) 554 except AttributeError: 555 # Capture the AttributeError an re-raise a KeyError one 556 raise KeyError( 557 "Attribute ('%s') does not exist in node '%s'" 558 % (name, self._v__nodepath)) 559 560 def __setitem__(self, name, value): 561 """The dictionary like interface for __setattr__().""" 562 563 self.__setattr__(name, value) 564 565 def __delitem__(self, name): 566 """The dictionary like interface for __delattr__().""" 567 568 try: 569 self.__delattr__(name) 570 except AttributeError: 571 # Capture the AttributeError an re-raise a KeyError one 572 raise KeyError( 573 "Attribute ('%s') does not exist in node '%s'" 574 % (name, self._v__nodepath)) 575 576 def __contains__(self, name): 577 """Is there an attribute with that name? 578 579 A true value is returned if the attribute set has an attribute 580 with the given name, false otherwise. 581 582 """ 583 584 return name in self._v_attrnames 585 586 def _f_rename(self, oldattrname, newattrname): 587 """Rename an attribute from oldattrname to newattrname.""" 588 589 if oldattrname == newattrname: 590 # Do nothing 591 return 592 593 # First, fetch the value of the oldattrname 594 attrvalue = getattr(self, oldattrname) 595 596 # Now, create the new attribute 597 setattr(self, newattrname, attrvalue) 598 599 # Finally, remove the old attribute 600 delattr(self, oldattrname) 601 602 def _g_copy(self, newset, set_attr=None, copyclass=False): 603 """Copy set attributes. 604 605 Copies all user and allowed system PyTables attributes to the 606 given attribute set, replacing the existing ones. 607 608 You can specify a *bound* method of the destination set that 609 will be used to set its attributes. Else, its `_g__setattr` 610 method will be used. 611 612 Changes are logged depending on the chosen setting method. The 613 default setting method does not log anything. 614 615 .. versionchanged:: 3.0 616 The *newSet* parameter has been renamed into *newset*. 617 618 .. versionchanged:: 3.0 619 The *copyClass* parameter has been renamed into *copyclass*. 620 621 """ 622 623 copysysattrs = newset._v__nodefile.params['PYTABLES_SYS_ATTRS'] 624 if set_attr is None: 625 set_attr = newset._g__setattr 626 627 for attrname in self._v_attrnamesuser: 628 # Do not copy the unimplemented attributes. 629 if attrname not in self._v_unimplemented: 630 set_attr(attrname, getattr(self, attrname)) 631 # Copy the system attributes that we are allowed to. 632 if copysysattrs: 633 for attrname in self._v_attrnamessys: 634 if ((attrname not in SYS_ATTRS_NOTTOBECOPIED) and 635 # Do not copy the FIELD_ attributes in tables as this can 636 # be really *slow* (don't know exactly the reason). 637 # See #304. 638 not attrname.startswith("FIELD_")): 639 set_attr(attrname, getattr(self, attrname)) 640 # Copy CLASS and VERSION attributes if requested 641 if copyclass: 642 for attrname in FORCE_COPY_CLASS: 643 if attrname in self._v_attrnamessys: 644 set_attr(attrname, getattr(self, attrname)) 645 646 def _f_copy(self, where): 647 """Copy attributes to the where node. 648 649 Copies all user and certain system attributes to the given where 650 node (a Node instance - see :ref:`NodeClassDescr`), replacing 651 the existing ones. 652 653 """ 654 655 # AttributeSet must be defined in order to define a Node. 656 # However, we need to know Node here. 657 # Using class_name_dict avoids a circular import. 658 if not isinstance(where, class_name_dict['Node']): 659 raise TypeError("destination object is not a node: %r" % (where,)) 660 self._g_copy(where._v_attrs, where._v_attrs.__setattr__) 661 662 def _g_close(self): 663 # Nothing will be done here, as the existing instance is completely 664 # operative now. 665 pass 666 667 def __str__(self): 668 """The string representation for this object.""" 669 670 # The pathname 671 pathname = self._v__nodepath 672 # Get this class name 673 classname = self.__class__.__name__ 674 # The attribute names 675 attrnumber = len([n for n in self._v_attrnames]) 676 return "%s._v_attrs (%s), %s attributes" % \ 677 (pathname, classname, attrnumber) 678 679 def __repr__(self): 680 """A detailed string representation for this object.""" 681 682 # print additional info only if there are attributes to show 683 attrnames = [n for n in self._v_attrnames] 684 if len(attrnames): 685 rep = ['%s := %r' % (attr, getattr(self, attr)) 686 for attr in attrnames] 687 attrlist = '[%s]' % (',\n '.join(rep)) 688 689 return "%s:\n %s" % (str(self), attrlist) 690 else: 691 return str(self) 692 693 694class NotLoggedAttributeSet(AttributeSet): 695 def _g_log_add(self, name): 696 pass 697 698 699 def _g_del_and_log(self, name): 700 self._g__delattr(name) 701 702 703## Local Variables: 704## mode: python 705## py-indent-offset: 4 706## tab-width: 4 707## fill-column: 72 708## End: 709