1# cython: binding=True 2# cython: auto_pickle=False 3# cython: language_level=2 4 5""" 6The ``lxml.objectify`` module implements a Python object API for XML. 7It is based on `lxml.etree`. 8""" 9 10from __future__ import absolute_import 11 12cimport cython 13 14from lxml.includes.etreepublic cimport _Document, _Element, ElementBase, ElementClassLookup 15from lxml.includes.etreepublic cimport elementFactory, import_lxml__etree, textOf, pyunicode 16from lxml.includes.tree cimport const_xmlChar, _xcstr 17from lxml cimport python 18from lxml.includes cimport tree 19 20cimport lxml.includes.etreepublic as cetree 21cimport libc.string as cstring_h # not to be confused with stdlib 'string' 22from libc.string cimport const_char 23 24__all__ = [u'BoolElement', u'DataElement', u'E', u'Element', u'ElementMaker', 25 u'FloatElement', u'IntElement', u'LongElement', u'NoneElement', 26 u'NumberElement', u'ObjectPath', u'ObjectifiedDataElement', 27 u'ObjectifiedElement', u'ObjectifyElementClassLookup', 28 u'PYTYPE_ATTRIBUTE', u'PyType', u'StringElement', u'SubElement', 29 u'XML', u'annotate', u'deannotate', u'dump', u'enable_recursive_str', 30 u'fromstring', u'getRegisteredTypes', u'makeparser', u'parse', 31 u'pyannotate', u'pytypename', u'set_default_parser', 32 u'set_pytype_attribute_tag', u'xsiannotate'] 33 34cdef object etree 35from lxml import etree 36# initialize C-API of lxml.etree 37import_lxml__etree() 38 39__version__ = etree.__version__ 40 41cdef object re 42import re 43 44cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError) 45cdef object is_special_method = re.compile(u'__.*__$').match 46 47 48# Duplicated from apihelpers.pxi, since dependencies obstruct 49# including apihelpers.pxi. 50cdef strrepr(s): 51 """Build a representation of strings which we can use in __repr__ 52 methods, e.g. _Element.__repr__(). 53 """ 54 return s.encode('unicode-escape') if python.IS_PYTHON2 else s 55 56 57cdef object _typename(object t): 58 cdef const_char* c_name 59 c_name = python._fqtypename(t) 60 s = cstring_h.strrchr(c_name, c'.') 61 if s is not NULL: 62 c_name = s + 1 63 return pyunicode(<const_xmlChar*>c_name) 64 65 66# namespace/name for "pytype" hint attribute 67cdef object PYTYPE_NAMESPACE 68cdef bytes PYTYPE_NAMESPACE_UTF8 69cdef const_xmlChar* _PYTYPE_NAMESPACE 70 71cdef object PYTYPE_ATTRIBUTE_NAME 72cdef bytes PYTYPE_ATTRIBUTE_NAME_UTF8 73cdef const_xmlChar* _PYTYPE_ATTRIBUTE_NAME 74 75PYTYPE_ATTRIBUTE = None 76 77cdef unicode TREE_PYTYPE_NAME = u"TREE" 78 79cdef tuple _unicodeAndUtf8(s): 80 return s, python.PyUnicode_AsUTF8String(s) 81 82def set_pytype_attribute_tag(attribute_tag=None): 83 u"""set_pytype_attribute_tag(attribute_tag=None) 84 Change name and namespace of the XML attribute that holds Python type 85 information. 86 87 Do not use this unless you know what you are doing. 88 89 Reset by calling without argument. 90 91 Default: "{http://codespeak.net/lxml/objectify/pytype}pytype" 92 """ 93 global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME 94 global PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 95 global PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 96 if attribute_tag is None: 97 PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 = \ 98 _unicodeAndUtf8(u"http://codespeak.net/lxml/objectify/pytype") 99 PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 = \ 100 _unicodeAndUtf8(u"pytype") 101 else: 102 PYTYPE_NAMESPACE_UTF8, PYTYPE_ATTRIBUTE_NAME_UTF8 = \ 103 cetree.getNsTag(attribute_tag) 104 PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8.decode('utf8') 105 PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8.decode('utf8') 106 107 _PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8 108 _PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8 109 PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName( 110 _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) 111 112set_pytype_attribute_tag() 113 114 115# namespaces for XML Schema 116cdef object XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 117XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 = \ 118 _unicodeAndUtf8(u"http://www.w3.org/2001/XMLSchema") 119cdef const_xmlChar* _XML_SCHEMA_NS = _xcstr(XML_SCHEMA_NS_UTF8) 120 121cdef object XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 122XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 = \ 123 _unicodeAndUtf8(u"http://www.w3.org/2001/XMLSchema-instance") 124cdef const_xmlChar* _XML_SCHEMA_INSTANCE_NS = _xcstr(XML_SCHEMA_INSTANCE_NS_UTF8) 125 126cdef object XML_SCHEMA_INSTANCE_NIL_ATTR = u"{%s}nil" % XML_SCHEMA_INSTANCE_NS 127cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR = u"{%s}type" % XML_SCHEMA_INSTANCE_NS 128 129 130################################################################################ 131# Element class for the main API 132 133cdef class ObjectifiedElement(ElementBase): 134 u"""Main XML Element class. 135 136 Element children are accessed as object attributes. Multiple children 137 with the same name are available through a list index. Example:: 138 139 >>> root = XML("<root><c1><c2>0</c2><c2>1</c2></c1></root>") 140 >>> second_c2 = root.c1.c2[1] 141 >>> print(second_c2.text) 142 1 143 144 Note that you cannot (and must not) instantiate this class or its 145 subclasses. 146 """ 147 def __iter__(self): 148 u"""Iterate over self and all siblings with the same tag. 149 """ 150 parent = self.getparent() 151 if parent is None: 152 return iter([self]) 153 return etree.ElementChildIterator(parent, tag=self.tag) 154 155 def __str__(self): 156 if __RECURSIVE_STR: 157 return _dump(self, 0) 158 else: 159 return textOf(self._c_node) or u'' 160 161 # pickle support for objectified Element 162 def __reduce__(self): 163 return fromstring, (etree.tostring(self),) 164 165 @property 166 def text(self): 167 return textOf(self._c_node) 168 169 @property 170 def __dict__(self): 171 """A fake implementation for __dict__ to support dir() etc. 172 173 Note that this only considers the first child with a given name. 174 """ 175 cdef _Element child 176 cdef dict children 177 c_ns = tree._getNs(self._c_node) 178 tag = u"{%s}*" % pyunicode(c_ns) if c_ns is not NULL else None 179 children = {} 180 for child in etree.ElementChildIterator(self, tag=tag): 181 if c_ns is NULL and tree._getNs(child._c_node) is not NULL: 182 continue 183 name = pyunicode(child._c_node.name) 184 if name not in children: 185 children[name] = child 186 return children 187 188 def __len__(self): 189 u"""Count self and siblings with the same tag. 190 """ 191 return _countSiblings(self._c_node) 192 193 def countchildren(self): 194 u"""countchildren(self) 195 196 Return the number of children of this element, regardless of their 197 name. 198 """ 199 # copied from etree 200 cdef Py_ssize_t c 201 cdef tree.xmlNode* c_node 202 c = 0 203 c_node = self._c_node.children 204 while c_node is not NULL: 205 if tree._isElement(c_node): 206 c += 1 207 c_node = c_node.next 208 return c 209 210 def getchildren(self): 211 u"""getchildren(self) 212 213 Returns a sequence of all direct children. The elements are 214 returned in document order. 215 """ 216 cdef tree.xmlNode* c_node 217 result = [] 218 c_node = self._c_node.children 219 while c_node is not NULL: 220 if tree._isElement(c_node): 221 result.append(cetree.elementFactory(self._doc, c_node)) 222 c_node = c_node.next 223 return result 224 225 def __getattr__(self, tag): 226 u"""Return the (first) child with the given tag name. If no namespace 227 is provided, the child will be looked up in the same one as self. 228 """ 229 if is_special_method(tag): 230 return object.__getattr__(self, tag) 231 return _lookupChildOrRaise(self, tag) 232 233 def __setattr__(self, tag, value): 234 u"""Set the value of the (first) child with the given tag name. If no 235 namespace is provided, the child will be looked up in the same one as 236 self. 237 """ 238 cdef _Element element 239 # properties are looked up /after/ __setattr__, so we must emulate them 240 if tag == u'text' or tag == u'pyval': 241 # read-only ! 242 raise TypeError, f"attribute '{tag}' of '{_typename(self)}' objects is not writable" 243 elif tag == u'tail': 244 cetree.setTailText(self._c_node, value) 245 return 246 elif tag == u'tag': 247 ElementBase.tag.__set__(self, value) 248 return 249 elif tag == u'base': 250 ElementBase.base.__set__(self, value) 251 return 252 tag = _buildChildTag(self, tag) 253 element = _lookupChild(self, tag) 254 if element is None: 255 _appendValue(self, tag, value) 256 else: 257 _replaceElement(element, value) 258 259 def __delattr__(self, tag): 260 child = _lookupChildOrRaise(self, tag) 261 self.remove(child) 262 263 def addattr(self, tag, value): 264 u"""addattr(self, tag, value) 265 266 Add a child value to the element. 267 268 As opposed to append(), it sets a data value, not an element. 269 """ 270 _appendValue(self, _buildChildTag(self, tag), value) 271 272 def __getitem__(self, key): 273 u"""Return a sibling, counting from the first child of the parent. The 274 method behaves like both a dict and a sequence. 275 276 * If argument is an integer, returns the sibling at that position. 277 278 * If argument is a string, does the same as getattr(). This can be 279 used to provide namespaces for element lookup, or to look up 280 children with special names (``text`` etc.). 281 282 * If argument is a slice object, returns the matching slice. 283 """ 284 cdef tree.xmlNode* c_self_node 285 cdef tree.xmlNode* c_parent 286 cdef tree.xmlNode* c_node 287 cdef Py_ssize_t c_index 288 if python._isString(key): 289 return _lookupChildOrRaise(self, key) 290 elif isinstance(key, slice): 291 return list(self)[key] 292 # normal item access 293 c_index = key # raises TypeError if necessary 294 c_self_node = self._c_node 295 c_parent = c_self_node.parent 296 if c_parent is NULL: 297 if c_index == 0 or c_index == -1: 298 return self 299 raise IndexError, unicode(key) 300 if c_index < 0: 301 c_node = c_parent.last 302 else: 303 c_node = c_parent.children 304 c_node = _findFollowingSibling( 305 c_node, tree._getNs(c_self_node), c_self_node.name, c_index) 306 if c_node is NULL: 307 raise IndexError, unicode(key) 308 return elementFactory(self._doc, c_node) 309 310 def __setitem__(self, key, value): 311 u"""Set the value of a sibling, counting from the first child of the 312 parent. Implements key assignment, item assignment and slice 313 assignment. 314 315 * If argument is an integer, sets the sibling at that position. 316 317 * If argument is a string, does the same as setattr(). This is used 318 to provide namespaces for element lookup. 319 320 * If argument is a sequence (list, tuple, etc.), assign the contained 321 items to the siblings. 322 """ 323 cdef _Element element 324 cdef tree.xmlNode* c_node 325 if python._isString(key): 326 key = _buildChildTag(self, key) 327 element = _lookupChild(self, key) 328 if element is None: 329 _appendValue(self, key, value) 330 else: 331 _replaceElement(element, value) 332 return 333 334 if self._c_node.parent is NULL: 335 # the 'root[i] = ...' case 336 raise TypeError, u"assignment to root element is invalid" 337 338 if isinstance(key, slice): 339 # slice assignment 340 _setSlice(key, self, value) 341 else: 342 # normal index assignment 343 if key < 0: 344 c_node = self._c_node.parent.last 345 else: 346 c_node = self._c_node.parent.children 347 c_node = _findFollowingSibling( 348 c_node, tree._getNs(self._c_node), self._c_node.name, key) 349 if c_node is NULL: 350 raise IndexError, unicode(key) 351 element = elementFactory(self._doc, c_node) 352 _replaceElement(element, value) 353 354 def __delitem__(self, key): 355 parent = self.getparent() 356 if parent is None: 357 raise TypeError, u"deleting items not supported by root element" 358 if isinstance(key, slice): 359 # slice deletion 360 del_items = list(self)[key] 361 remove = parent.remove 362 for el in del_items: 363 remove(el) 364 else: 365 # normal index deletion 366 sibling = self.__getitem__(key) 367 parent.remove(sibling) 368 369 def descendantpaths(self, prefix=None): 370 u"""descendantpaths(self, prefix=None) 371 372 Returns a list of object path expressions for all descendants. 373 """ 374 if prefix is not None and not python._isString(prefix): 375 prefix = u'.'.join(prefix) 376 return _build_descendant_paths(self._c_node, prefix) 377 378 379cdef inline bint _tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name): 380 if c_node.name != c_name: 381 return 0 382 if c_href == NULL: 383 return 1 384 c_node_href = tree._getNs(c_node) 385 if c_node_href == NULL: 386 return c_href[0] == c'\0' 387 return tree.xmlStrcmp(c_node_href, c_href) == 0 388 389 390cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node): 391 cdef tree.xmlNode* c_node 392 cdef Py_ssize_t count 393 c_tag = c_start_node.name 394 c_href = tree._getNs(c_start_node) 395 count = 1 396 c_node = c_start_node.next 397 while c_node is not NULL: 398 if c_node.type == tree.XML_ELEMENT_NODE and \ 399 _tagMatches(c_node, c_href, c_tag): 400 count += 1 401 c_node = c_node.next 402 c_node = c_start_node.prev 403 while c_node is not NULL: 404 if c_node.type == tree.XML_ELEMENT_NODE and \ 405 _tagMatches(c_node, c_href, c_tag): 406 count += 1 407 c_node = c_node.prev 408 return count 409 410cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, 411 const_xmlChar* href, const_xmlChar* name, 412 Py_ssize_t index): 413 cdef tree.xmlNode* (*next)(tree.xmlNode*) 414 if index >= 0: 415 next = cetree.nextElement 416 else: 417 index = -1 - index 418 next = cetree.previousElement 419 while c_node is not NULL: 420 if c_node.type == tree.XML_ELEMENT_NODE and \ 421 _tagMatches(c_node, href, name): 422 index = index - 1 423 if index < 0: 424 return c_node 425 c_node = next(c_node) 426 return NULL 427 428cdef object _lookupChild(_Element parent, tag): 429 cdef tree.xmlNode* c_result 430 cdef tree.xmlNode* c_node 431 c_node = parent._c_node 432 ns, tag = cetree.getNsTagWithEmptyNs(tag) 433 c_tag = tree.xmlDictExists( 434 c_node.doc.dict, _xcstr(tag), python.PyBytes_GET_SIZE(tag)) 435 if c_tag is NULL: 436 return None # not in the hash map => not in the tree 437 if ns is None: 438 # either inherit ns from parent or use empty (i.e. no) namespace 439 c_href = tree._getNs(c_node) or <const_xmlChar*>'' 440 else: 441 c_href = _xcstr(ns) 442 c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0) 443 if c_result is NULL: 444 return None 445 return elementFactory(parent._doc, c_result) 446 447cdef object _lookupChildOrRaise(_Element parent, tag): 448 element = _lookupChild(parent, tag) 449 if element is None: 450 raise AttributeError, u"no such child: " + _buildChildTag(parent, tag) 451 return element 452 453cdef object _buildChildTag(_Element parent, tag): 454 ns, tag = cetree.getNsTag(tag) 455 c_tag = _xcstr(tag) 456 c_href = tree._getNs(parent._c_node) if ns is None else _xcstr(ns) 457 return cetree.namespacedNameFromNsName(c_href, c_tag) 458 459cdef _replaceElement(_Element element, value): 460 cdef _Element new_element 461 if isinstance(value, _Element): 462 # deep copy the new element 463 new_element = cetree.deepcopyNodeToDocument( 464 element._doc, (<_Element>value)._c_node) 465 new_element.tag = element.tag 466 elif isinstance(value, (list, tuple)): 467 element[:] = value 468 return 469 else: 470 new_element = element.makeelement(element.tag) 471 _setElementValue(new_element, value) 472 element.getparent().replace(element, new_element) 473 474cdef _appendValue(_Element parent, tag, value): 475 cdef _Element new_element 476 if isinstance(value, _Element): 477 # deep copy the new element 478 new_element = cetree.deepcopyNodeToDocument( 479 parent._doc, (<_Element>value)._c_node) 480 new_element.tag = tag 481 cetree.appendChildToElement(parent, new_element) 482 elif isinstance(value, (list, tuple)): 483 for item in value: 484 _appendValue(parent, tag, item) 485 else: 486 new_element = cetree.makeElement( 487 tag, parent._doc, None, None, None, None, None) 488 _setElementValue(new_element, value) 489 cetree.appendChildToElement(parent, new_element) 490 491cdef _setElementValue(_Element element, value): 492 if value is None: 493 cetree.setAttributeValue( 494 element, XML_SCHEMA_INSTANCE_NIL_ATTR, u"true") 495 elif isinstance(value, _Element): 496 _replaceElement(element, value) 497 return 498 else: 499 cetree.delAttributeFromNsName( 500 element._c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") 501 if python._isString(value): 502 pytype_name = u"str" 503 py_type = <PyType>_PYTYPE_DICT.get(pytype_name) 504 else: 505 pytype_name = _typename(value) 506 py_type = <PyType>_PYTYPE_DICT.get(pytype_name) 507 if py_type is not None: 508 value = py_type.stringify(value) 509 else: 510 value = unicode(value) 511 if py_type is not None: 512 cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name) 513 else: 514 cetree.delAttributeFromNsName( 515 element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) 516 cetree.setNodeText(element._c_node, value) 517 518cdef _setSlice(sliceobject, _Element target, items): 519 cdef _Element parent 520 cdef tree.xmlNode* c_node 521 cdef Py_ssize_t c_step, c_start, pos 522 # collect existing slice 523 if (<slice>sliceobject).step is None: 524 c_step = 1 525 else: 526 c_step = (<slice>sliceobject).step 527 if c_step == 0: 528 raise ValueError, u"Invalid slice" 529 cdef list del_items = target[sliceobject] 530 531 # collect new values 532 new_items = [] 533 tag = target.tag 534 for item in items: 535 if isinstance(item, _Element): 536 # deep copy the new element 537 new_element = cetree.deepcopyNodeToDocument( 538 target._doc, (<_Element>item)._c_node) 539 new_element.tag = tag 540 else: 541 new_element = cetree.makeElement( 542 tag, target._doc, None, None, None, None, None) 543 _setElementValue(new_element, item) 544 new_items.append(new_element) 545 546 # sanity check - raise what a list would raise 547 if c_step != 1 and len(del_items) != len(new_items): 548 raise ValueError, \ 549 f"attempt to assign sequence of size {len(new_items)} to extended slice of size {len(del_items)}" 550 551 # replace existing items 552 pos = 0 553 parent = target.getparent() 554 replace = parent.replace 555 while pos < len(new_items) and pos < len(del_items): 556 replace(del_items[pos], new_items[pos]) 557 pos += 1 558 # remove leftover items 559 if pos < len(del_items): 560 remove = parent.remove 561 while pos < len(del_items): 562 remove(del_items[pos]) 563 pos += 1 564 # append remaining new items 565 if pos < len(new_items): 566 # the sanity check above guarantees (step == 1) 567 if pos > 0: 568 item = new_items[pos-1] 569 else: 570 if (<slice>sliceobject).start > 0: 571 c_node = parent._c_node.children 572 else: 573 c_node = parent._c_node.last 574 c_node = _findFollowingSibling( 575 c_node, tree._getNs(target._c_node), target._c_node.name, 576 (<slice>sliceobject).start - 1) 577 if c_node is NULL: 578 while pos < len(new_items): 579 cetree.appendChildToElement(parent, new_items[pos]) 580 pos += 1 581 return 582 item = cetree.elementFactory(parent._doc, c_node) 583 while pos < len(new_items): 584 add = item.addnext 585 item = new_items[pos] 586 add(item) 587 pos += 1 588 589################################################################################ 590# Data type support in subclasses 591 592cdef class ObjectifiedDataElement(ObjectifiedElement): 593 u"""This is the base class for all data type Elements. Subclasses should 594 override the 'pyval' property and possibly the __str__ method. 595 """ 596 @property 597 def pyval(self): 598 return textOf(self._c_node) 599 600 def __str__(self): 601 return textOf(self._c_node) or '' 602 603 def __repr__(self): 604 return strrepr(textOf(self._c_node) or '') 605 606 def _setText(self, s): 607 u"""For use in subclasses only. Don't use unless you know what you are 608 doing. 609 """ 610 cetree.setNodeText(self._c_node, s) 611 612 613cdef class NumberElement(ObjectifiedDataElement): 614 cdef object _parse_value 615 616 def _setValueParser(self, function): 617 u"""Set the function that parses the Python value from a string. 618 619 Do not use this unless you know what you are doing. 620 """ 621 self._parse_value = function 622 623 @property 624 def pyval(self): 625 return _parseNumber(self) 626 627 def __int__(self): 628 return int(_parseNumber(self)) 629 630 def __long__(self): 631 return long(_parseNumber(self)) 632 633 def __float__(self): 634 return float(_parseNumber(self)) 635 636 def __complex__(self): 637 return complex(_parseNumber(self)) 638 639 def __str__(self): 640 return unicode(_parseNumber(self)) 641 642 def __repr__(self): 643 return repr(_parseNumber(self)) 644 645 def __oct__(self): 646 return oct(_parseNumber(self)) 647 648 def __hex__(self): 649 return hex(_parseNumber(self)) 650 651 def __richcmp__(self, other, int op): 652 return _richcmpPyvals(self, other, op) 653 654 def __hash__(self): 655 return hash(_parseNumber(self)) 656 657 def __add__(self, other): 658 return _numericValueOf(self) + _numericValueOf(other) 659 660 def __radd__(self, other): 661 return _numericValueOf(other) + _numericValueOf(self) 662 663 def __sub__(self, other): 664 return _numericValueOf(self) - _numericValueOf(other) 665 666 def __rsub__(self, other): 667 return _numericValueOf(other) - _numericValueOf(self) 668 669 def __mul__(self, other): 670 return _numericValueOf(self) * _numericValueOf(other) 671 672 def __rmul__(self, other): 673 return _numericValueOf(other) * _numericValueOf(self) 674 675 def __div__(self, other): 676 return _numericValueOf(self) / _numericValueOf(other) 677 678 def __rdiv__(self, other): 679 return _numericValueOf(other) / _numericValueOf(self) 680 681 def __truediv__(self, other): 682 return _numericValueOf(self) / _numericValueOf(other) 683 684 def __rtruediv__(self, other): 685 return _numericValueOf(other) / _numericValueOf(self) 686 687 def __floordiv__(self, other): 688 return _numericValueOf(self) // _numericValueOf(other) 689 690 def __rfloordiv__(self, other): 691 return _numericValueOf(other) // _numericValueOf(self) 692 693 def __mod__(self, other): 694 return _numericValueOf(self) % _numericValueOf(other) 695 696 def __rmod__(self, other): 697 return _numericValueOf(other) % _numericValueOf(self) 698 699 def __divmod__(self, other): 700 return divmod(_numericValueOf(self), _numericValueOf(other)) 701 702 def __rdivmod__(self, other): 703 return divmod(_numericValueOf(other), _numericValueOf(self)) 704 705 def __pow__(self, other, modulo): 706 if modulo is None: 707 return _numericValueOf(self) ** _numericValueOf(other) 708 else: 709 return pow(_numericValueOf(self), _numericValueOf(other), modulo) 710 711 def __rpow__(self, other, modulo): 712 if modulo is None: 713 return _numericValueOf(other) ** _numericValueOf(self) 714 else: 715 return pow(_numericValueOf(other), _numericValueOf(self), modulo) 716 717 def __neg__(self): 718 return - _numericValueOf(self) 719 720 def __pos__(self): 721 return + _numericValueOf(self) 722 723 def __abs__(self): 724 return abs( _numericValueOf(self) ) 725 726 def __bool__(self): 727 return bool(_numericValueOf(self)) 728 729 def __invert__(self): 730 return ~ _numericValueOf(self) 731 732 def __lshift__(self, other): 733 return _numericValueOf(self) << _numericValueOf(other) 734 735 def __rlshift__(self, other): 736 return _numericValueOf(other) << _numericValueOf(self) 737 738 def __rshift__(self, other): 739 return _numericValueOf(self) >> _numericValueOf(other) 740 741 def __rrshift__(self, other): 742 return _numericValueOf(other) >> _numericValueOf(self) 743 744 def __and__(self, other): 745 return _numericValueOf(self) & _numericValueOf(other) 746 747 def __rand__(self, other): 748 return _numericValueOf(other) & _numericValueOf(self) 749 750 def __or__(self, other): 751 return _numericValueOf(self) | _numericValueOf(other) 752 753 def __ror__(self, other): 754 return _numericValueOf(other) | _numericValueOf(self) 755 756 def __xor__(self, other): 757 return _numericValueOf(self) ^ _numericValueOf(other) 758 759 def __rxor__(self, other): 760 return _numericValueOf(other) ^ _numericValueOf(self) 761 762 763cdef class IntElement(NumberElement): 764 def _init(self): 765 self._parse_value = int 766 767 def __index__(self): 768 return int(_parseNumber(self)) 769 770 771cdef class LongElement(NumberElement): 772 def _init(self): 773 self._parse_value = long 774 775 def __index__(self): 776 return int(_parseNumber(self)) 777 778 779cdef class FloatElement(NumberElement): 780 def _init(self): 781 self._parse_value = float 782 783 784cdef class StringElement(ObjectifiedDataElement): 785 u"""String data class. 786 787 Note that this class does *not* support the sequence protocol of strings: 788 len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported. 789 Instead, use the .text attribute to get a 'real' string. 790 """ 791 @property 792 def pyval(self): 793 return textOf(self._c_node) or u'' 794 795 def __repr__(self): 796 return repr(textOf(self._c_node) or u'') 797 798 def strlen(self): 799 text = textOf(self._c_node) 800 if text is None: 801 return 0 802 else: 803 return len(text) 804 805 def __bool__(self): 806 return bool(textOf(self._c_node)) 807 808 def __richcmp__(self, other, int op): 809 return _richcmpPyvals(self, other, op) 810 811 def __hash__(self): 812 return hash(textOf(self._c_node) or u'') 813 814 def __add__(self, other): 815 text = _strValueOf(self) 816 other = _strValueOf(other) 817 return text + other 818 819 def __radd__(self, other): 820 text = _strValueOf(self) 821 other = _strValueOf(other) 822 return other + text 823 824 def __mul__(self, other): 825 if isinstance(self, StringElement): 826 return (textOf((<StringElement>self)._c_node) or '') * _numericValueOf(other) 827 elif isinstance(other, StringElement): 828 return _numericValueOf(self) * (textOf((<StringElement>other)._c_node) or '') 829 else: 830 return NotImplemented 831 832 def __rmul__(self, other): 833 return _numericValueOf(other) * (textOf((<StringElement>self)._c_node) or '') 834 835 def __mod__(self, other): 836 return (_strValueOf(self) or '') % other 837 838 def __int__(self): 839 return int(textOf(self._c_node)) 840 841 def __long__(self): 842 return long(textOf(self._c_node)) 843 844 def __float__(self): 845 return float(textOf(self._c_node)) 846 847 def __complex__(self): 848 return complex(textOf(self._c_node)) 849 850 851cdef class NoneElement(ObjectifiedDataElement): 852 def __str__(self): 853 return u"None" 854 855 def __repr__(self): 856 return "None" 857 858 def __bool__(self): 859 return False 860 861 def __richcmp__(self, other, int op): 862 if other is None or self is None: 863 return python.PyObject_RichCompare(None, None, op) 864 if isinstance(self, NoneElement): 865 return python.PyObject_RichCompare(None, other, op) 866 else: 867 return python.PyObject_RichCompare(self, None, op) 868 869 def __hash__(self): 870 return hash(None) 871 872 @property 873 def pyval(self): 874 return None 875 876 877cdef class BoolElement(IntElement): 878 u"""Boolean type base on string values: 'true' or 'false'. 879 880 Note that this inherits from IntElement to mimic the behaviour of 881 Python's bool type. 882 """ 883 def _init(self): 884 self._parse_value = _parseBool # wraps as Python callable 885 886 def __bool__(self): 887 return _parseBool(textOf(self._c_node)) 888 889 def __int__(self): 890 return 0 + _parseBool(textOf(self._c_node)) 891 892 def __float__(self): 893 return 0.0 + _parseBool(textOf(self._c_node)) 894 895 def __richcmp__(self, other, int op): 896 return _richcmpPyvals(self, other, op) 897 898 def __hash__(self): 899 return hash(_parseBool(textOf(self._c_node))) 900 901 def __str__(self): 902 return unicode(_parseBool(textOf(self._c_node))) 903 904 def __repr__(self): 905 return repr(_parseBool(textOf(self._c_node))) 906 907 @property 908 def pyval(self): 909 return _parseBool(textOf(self._c_node)) 910 911 912cdef _checkBool(s): 913 cdef int value = -1 914 if s is not None: 915 value = __parseBoolAsInt(s) 916 if value == -1: 917 raise ValueError 918 919 920cdef bint _parseBool(s) except -1: 921 cdef int value 922 if s is None: 923 return False 924 value = __parseBoolAsInt(s) 925 if value == -1: 926 raise ValueError, f"Invalid boolean value: '{s}'" 927 return value 928 929 930cdef inline int __parseBoolAsInt(text) except -2: 931 if text == 'false': 932 return 0 933 elif text == 'true': 934 return 1 935 elif text == '0': 936 return 0 937 elif text == '1': 938 return 1 939 return -1 940 941 942cdef object _parseNumber(NumberElement element): 943 return element._parse_value(textOf(element._c_node)) 944 945 946cdef enum NumberParserState: 947 NPS_SPACE_PRE = 0 948 NPS_SIGN = 1 949 NPS_DIGITS = 2 950 NPS_POINT_LEAD = 3 951 NPS_POINT = 4 952 NPS_FRACTION = 5 953 NPS_EXP = 6 954 NPS_EXP_SIGN = 7 955 NPS_DIGITS_EXP = 8 956 NPS_SPACE_TAIL = 9 957 NPS_INF1 = 20 958 NPS_INF2 = 21 959 NPS_INF3 = 22 960 NPS_NAN1 = 23 961 NPS_NAN2 = 24 962 NPS_NAN3 = 25 963 NPS_ERROR = 99 964 965 966ctypedef fused bytes_unicode: 967 bytes 968 unicode 969 970 971cdef _checkNumber(bytes_unicode s, bint allow_float): 972 cdef Py_UCS4 c 973 cdef NumberParserState state = NPS_SPACE_PRE 974 975 for c in s: 976 if c.isdigit() if (bytes_unicode is unicode) else c in b'0123456789': 977 if state in (NPS_DIGITS, NPS_FRACTION, NPS_DIGITS_EXP): 978 pass 979 elif state in (NPS_SPACE_PRE, NPS_SIGN): 980 state = NPS_DIGITS 981 elif state in (NPS_POINT_LEAD, NPS_POINT): 982 state = NPS_FRACTION 983 elif state in (NPS_EXP, NPS_EXP_SIGN): 984 state = NPS_DIGITS_EXP 985 else: 986 state = NPS_ERROR 987 else: 988 if c == u'.': 989 if state in (NPS_SPACE_PRE, NPS_SIGN): 990 state = NPS_POINT_LEAD 991 elif state == NPS_DIGITS: 992 state = NPS_POINT 993 else: 994 state = NPS_ERROR 995 if not allow_float: 996 state = NPS_ERROR 997 elif c in u'-+': 998 if state == NPS_SPACE_PRE: 999 state = NPS_SIGN 1000 elif state == NPS_EXP: 1001 state = NPS_EXP_SIGN 1002 else: 1003 state = NPS_ERROR 1004 elif c == u'E': 1005 if state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION): 1006 state = NPS_EXP 1007 else: 1008 state = NPS_ERROR 1009 if not allow_float: 1010 state = NPS_ERROR 1011 # Allow INF and NaN. XMLSchema requires case, we don't, like Python. 1012 elif c in u'iI': 1013 state = NPS_INF1 if allow_float and state in (NPS_SPACE_PRE, NPS_SIGN) else NPS_ERROR 1014 elif c in u'fF': 1015 state = NPS_INF3 if state == NPS_INF2 else NPS_ERROR 1016 elif c in u'aA': 1017 state = NPS_NAN2 if state == NPS_NAN1 else NPS_ERROR 1018 elif c in u'nN': 1019 # Python also allows [+-]NaN, so let's accept that. 1020 if state in (NPS_SPACE_PRE, NPS_SIGN): 1021 state = NPS_NAN1 if allow_float else NPS_ERROR 1022 elif state == NPS_NAN2: 1023 state = NPS_NAN3 1024 elif state == NPS_INF1: 1025 state = NPS_INF2 1026 else: 1027 state = NPS_ERROR 1028 # Allow spaces around text values. 1029 else: 1030 if c.isspace() if (bytes_unicode is unicode) else c in b'\x09\x0a\x0b\x0c\x0d\x20': 1031 if state in (NPS_SPACE_PRE, NPS_SPACE_TAIL): 1032 pass 1033 elif state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3): 1034 state = NPS_SPACE_TAIL 1035 else: 1036 state = NPS_ERROR 1037 else: 1038 state = NPS_ERROR 1039 1040 if state == NPS_ERROR: 1041 break 1042 1043 if state not in (NPS_DIGITS, NPS_FRACTION, NPS_POINT, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3, NPS_SPACE_TAIL): 1044 raise ValueError 1045 1046 1047cdef _checkInt(s): 1048 if python.IS_PYTHON2 and type(s) is bytes: 1049 return _checkNumber(<bytes>s, allow_float=False) 1050 else: 1051 return _checkNumber(<unicode>s, allow_float=False) 1052 1053 1054cdef _checkFloat(s): 1055 if python.IS_PYTHON2 and type(s) is bytes: 1056 return _checkNumber(<bytes>s, allow_float=True) 1057 else: 1058 return _checkNumber(<unicode>s, allow_float=True) 1059 1060 1061cdef object _strValueOf(obj): 1062 if python._isString(obj): 1063 return obj 1064 if isinstance(obj, _Element): 1065 return textOf((<_Element>obj)._c_node) or u'' 1066 if obj is None: 1067 return u'' 1068 return unicode(obj) 1069 1070 1071cdef object _numericValueOf(obj): 1072 if isinstance(obj, NumberElement): 1073 return _parseNumber(<NumberElement>obj) 1074 try: 1075 # not always numeric, but Python will raise the right exception 1076 return obj.pyval 1077 except AttributeError: 1078 pass 1079 return obj 1080 1081 1082cdef _richcmpPyvals(left, right, int op): 1083 left = getattr(left, 'pyval', left) 1084 right = getattr(right, 'pyval', right) 1085 return python.PyObject_RichCompare(left, right, op) 1086 1087 1088################################################################################ 1089# Python type registry 1090 1091cdef class PyType: 1092 u"""PyType(self, name, type_check, type_class, stringify=None) 1093 User defined type. 1094 1095 Named type that contains a type check function, a type class that 1096 inherits from ObjectifiedDataElement and an optional "stringification" 1097 function. The type check must take a string as argument and raise 1098 ValueError or TypeError if it cannot handle the string value. It may be 1099 None in which case it is not considered for type guessing. For registered 1100 named types, the 'stringify' function (or unicode() if None) is used to 1101 convert a Python object with type name 'name' to the string representation 1102 stored in the XML tree. 1103 1104 Example:: 1105 1106 PyType('int', int, MyIntClass).register() 1107 1108 Note that the order in which types are registered matters. The first 1109 matching type will be used. 1110 """ 1111 cdef readonly object name 1112 cdef readonly object type_check 1113 cdef readonly object stringify 1114 cdef object _type 1115 cdef list _schema_types 1116 def __init__(self, name, type_check, type_class, stringify=None): 1117 if isinstance(name, bytes): 1118 name = (<bytes>name).decode('ascii') 1119 elif not isinstance(name, unicode): 1120 raise TypeError, u"Type name must be a string" 1121 if type_check is not None and not callable(type_check): 1122 raise TypeError, u"Type check function must be callable (or None)" 1123 if name != TREE_PYTYPE_NAME and \ 1124 not issubclass(type_class, ObjectifiedDataElement): 1125 raise TypeError, \ 1126 u"Data classes must inherit from ObjectifiedDataElement" 1127 self.name = name 1128 self._type = type_class 1129 self.type_check = type_check 1130 if stringify is None: 1131 stringify = unicode 1132 self.stringify = stringify 1133 self._schema_types = [] 1134 1135 def __repr__(self): 1136 return "PyType(%s, %s)" % (self.name, self._type.__name__) 1137 1138 def register(self, before=None, after=None): 1139 u"""register(self, before=None, after=None) 1140 1141 Register the type. 1142 1143 The additional keyword arguments 'before' and 'after' accept a 1144 sequence of type names that must appear before/after the new type in 1145 the type list. If any of them is not currently known, it is simply 1146 ignored. Raises ValueError if the dependencies cannot be fulfilled. 1147 """ 1148 if self.name == TREE_PYTYPE_NAME: 1149 raise ValueError, u"Cannot register tree type" 1150 if self.type_check is not None: 1151 for item in _TYPE_CHECKS: 1152 if item[0] is self.type_check: 1153 _TYPE_CHECKS.remove(item) 1154 break 1155 entry = (self.type_check, self) 1156 first_pos = 0 1157 last_pos = -1 1158 if before or after: 1159 if before is None: 1160 before = () 1161 elif after is None: 1162 after = () 1163 for i, (check, pytype) in enumerate(_TYPE_CHECKS): 1164 if last_pos == -1 and pytype.name in before: 1165 last_pos = i 1166 if pytype.name in after: 1167 first_pos = i+1 1168 if last_pos == -1: 1169 _TYPE_CHECKS.append(entry) 1170 elif first_pos > last_pos: 1171 raise ValueError, u"inconsistent before/after dependencies" 1172 else: 1173 _TYPE_CHECKS.insert(last_pos, entry) 1174 1175 _PYTYPE_DICT[self.name] = self 1176 for xs_type in self._schema_types: 1177 _SCHEMA_TYPE_DICT[xs_type] = self 1178 1179 def unregister(self): 1180 u"unregister(self)" 1181 if _PYTYPE_DICT.get(self.name) is self: 1182 del _PYTYPE_DICT[self.name] 1183 for xs_type, pytype in list(_SCHEMA_TYPE_DICT.items()): 1184 if pytype is self: 1185 del _SCHEMA_TYPE_DICT[xs_type] 1186 if self.type_check is None: 1187 return 1188 try: 1189 _TYPE_CHECKS.remove( (self.type_check, self) ) 1190 except ValueError: 1191 pass 1192 1193 property xmlSchemaTypes: 1194 u"""The list of XML Schema datatypes this Python type maps to. 1195 1196 Note that this must be set before registering the type! 1197 """ 1198 def __get__(self): 1199 return self._schema_types 1200 def __set__(self, types): 1201 self._schema_types = list(map(unicode, types)) 1202 1203 1204cdef dict _PYTYPE_DICT = {} 1205cdef dict _SCHEMA_TYPE_DICT = {} 1206cdef list _TYPE_CHECKS = [] 1207 1208cdef unicode _lower_bool(b): 1209 return u"true" if b else u"false" 1210 1211cdef _pytypename(obj): 1212 return u"str" if python._isString(obj) else _typename(obj) 1213 1214def pytypename(obj): 1215 u"""pytypename(obj) 1216 1217 Find the name of the corresponding PyType for a Python object. 1218 """ 1219 return _pytypename(obj) 1220 1221cdef _registerPyTypes(): 1222 pytype = PyType(u'int', _checkInt, IntElement) # wraps functions for Python 1223 pytype.xmlSchemaTypes = (u"integer", u"int", u"short", u"byte", u"unsignedShort", 1224 u"unsignedByte", u"nonPositiveInteger", 1225 u"negativeInteger", u"long", u"nonNegativeInteger", 1226 u"unsignedLong", u"unsignedInt", u"positiveInteger",) 1227 pytype.register() 1228 1229 # 'long' type just for backwards compatibility 1230 pytype = PyType(u'long', None, IntElement) 1231 pytype.register() 1232 1233 pytype = PyType(u'float', _checkFloat, FloatElement, repr) # wraps _parseFloat for Python 1234 pytype.xmlSchemaTypes = (u"double", u"float") 1235 pytype.register() 1236 1237 pytype = PyType(u'bool', _checkBool, BoolElement, _lower_bool) # wraps functions for Python 1238 pytype.xmlSchemaTypes = (u"boolean",) 1239 pytype.register() 1240 1241 pytype = PyType(u'str', None, StringElement) 1242 pytype.xmlSchemaTypes = (u"string", u"normalizedString", u"token", u"language", 1243 u"Name", u"NCName", u"ID", u"IDREF", u"ENTITY", 1244 u"NMTOKEN", ) 1245 pytype.register() 1246 1247 # since lxml 2.0 1248 pytype = PyType(u'NoneType', None, NoneElement) 1249 pytype.register() 1250 1251 # backwards compatibility 1252 pytype = PyType(u'none', None, NoneElement) 1253 pytype.register() 1254 1255# non-registered PyType for inner tree elements 1256cdef PyType TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement) 1257 1258_registerPyTypes() 1259 1260def getRegisteredTypes(): 1261 u"""getRegisteredTypes() 1262 1263 Returns a list of the currently registered PyType objects. 1264 1265 To add a new type, retrieve this list and call unregister() for all 1266 entries. Then add the new type at a suitable position (possibly replacing 1267 an existing one) and call register() for all entries. 1268 1269 This is necessary if the new type interferes with the type check functions 1270 of existing ones (normally only int/float/bool) and must the tried before 1271 other types. To add a type that is not yet parsable by the current type 1272 check functions, you can simply register() it, which will append it to the 1273 end of the type list. 1274 """ 1275 cdef list types = [] 1276 cdef set known = set() 1277 for check, pytype in _TYPE_CHECKS: 1278 name = pytype.name 1279 if name not in known: 1280 known.add(name) 1281 types.append(pytype) 1282 for pytype in _PYTYPE_DICT.values(): 1283 name = pytype.name 1284 if name not in known: 1285 known.add(name) 1286 types.append(pytype) 1287 return types 1288 1289cdef PyType _guessPyType(value, PyType defaulttype): 1290 if value is None: 1291 return None 1292 for type_check, tested_pytype in _TYPE_CHECKS: 1293 try: 1294 type_check(value) 1295 return <PyType>tested_pytype 1296 except IGNORABLE_ERRORS: 1297 # could not be parsed as the specified type => ignore 1298 pass 1299 return defaulttype 1300 1301cdef object _guessElementClass(tree.xmlNode* c_node): 1302 value = textOf(c_node) 1303 if value is None: 1304 return None 1305 if value == '': 1306 return StringElement 1307 1308 for type_check, pytype in _TYPE_CHECKS: 1309 try: 1310 type_check(value) 1311 return (<PyType>pytype)._type 1312 except IGNORABLE_ERRORS: 1313 pass 1314 return None 1315 1316################################################################################ 1317# adapted ElementMaker supports registered PyTypes 1318 1319@cython.final 1320@cython.internal 1321cdef class _ObjectifyElementMakerCaller: 1322 cdef object _tag 1323 cdef object _nsmap 1324 cdef object _element_factory 1325 cdef bint _annotate 1326 1327 def __call__(self, *children, **attrib): 1328 u"__call__(self, *children, **attrib)" 1329 cdef _ObjectifyElementMakerCaller elementMaker 1330 cdef _Element element 1331 cdef _Element childElement 1332 cdef bint has_children 1333 cdef bint has_string_value 1334 if self._element_factory is None: 1335 element = _makeElement(self._tag, None, attrib, self._nsmap) 1336 else: 1337 element = self._element_factory(self._tag, attrib, self._nsmap) 1338 1339 pytype_name = None 1340 has_children = False 1341 has_string_value = False 1342 for child in children: 1343 if child is None: 1344 if len(children) == 1: 1345 cetree.setAttributeValue( 1346 element, XML_SCHEMA_INSTANCE_NIL_ATTR, u"true") 1347 elif python._isString(child): 1348 _add_text(element, child) 1349 has_string_value = True 1350 elif isinstance(child, _Element): 1351 cetree.appendChildToElement(element, <_Element>child) 1352 has_children = True 1353 elif isinstance(child, _ObjectifyElementMakerCaller): 1354 elementMaker = <_ObjectifyElementMakerCaller>child 1355 if elementMaker._element_factory is None: 1356 cetree.makeSubElement(element, elementMaker._tag, 1357 None, None, None, None) 1358 else: 1359 childElement = elementMaker._element_factory( 1360 elementMaker._tag) 1361 cetree.appendChildToElement(element, childElement) 1362 has_children = True 1363 elif isinstance(child, dict): 1364 for name, value in child.items(): 1365 # keyword arguments in attrib take precedence 1366 if name in attrib: 1367 continue 1368 pytype = _PYTYPE_DICT.get(_typename(value)) 1369 if pytype is not None: 1370 value = (<PyType>pytype).stringify(value) 1371 elif not python._isString(value): 1372 value = unicode(value) 1373 cetree.setAttributeValue(element, name, value) 1374 else: 1375 if pytype_name is not None: 1376 # concatenation always makes the result a string 1377 has_string_value = True 1378 pytype_name = _typename(child) 1379 pytype = _PYTYPE_DICT.get(_typename(child)) 1380 if pytype is not None: 1381 _add_text(element, (<PyType>pytype).stringify(child)) 1382 else: 1383 has_string_value = True 1384 child = unicode(child) 1385 _add_text(element, child) 1386 1387 if self._annotate and not has_children: 1388 if has_string_value: 1389 cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, u"str") 1390 elif pytype_name is not None: 1391 cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name) 1392 1393 return element 1394 1395cdef _add_text(_Element elem, text): 1396 # add text to the tree in construction, either as element text or 1397 # tail text, depending on the current tree state 1398 cdef tree.xmlNode* c_child 1399 c_child = cetree.findChildBackwards(elem._c_node, 0) 1400 if c_child is not NULL: 1401 old = cetree.tailOf(c_child) 1402 if old is not None: 1403 text = old + text 1404 cetree.setTailText(c_child, text) 1405 else: 1406 old = cetree.textOf(elem._c_node) 1407 if old is not None: 1408 text = old + text 1409 cetree.setNodeText(elem._c_node, text) 1410 1411cdef class ElementMaker: 1412 u"""ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None) 1413 1414 An ElementMaker that can be used for constructing trees. 1415 1416 Example:: 1417 1418 >>> M = ElementMaker(annotate=False) 1419 >>> attributes = {'class': 'par'} 1420 >>> html = M.html( M.body( M.p('hello', attributes, M.br, 'objectify', style="font-weight: bold") ) ) 1421 1422 >>> from lxml.etree import tostring 1423 >>> print(tostring(html, method='html').decode('ascii')) 1424 <html><body><p style="font-weight: bold" class="par">hello<br>objectify</p></body></html> 1425 1426 To create tags that are not valid Python identifiers, call the factory 1427 directly and pass the tag name as first argument:: 1428 1429 >>> root = M('tricky-tag', 'some text') 1430 >>> print(root.tag) 1431 tricky-tag 1432 >>> print(root.text) 1433 some text 1434 1435 Note that this module has a predefined ElementMaker instance called ``E``. 1436 """ 1437 cdef object _makeelement 1438 cdef object _namespace 1439 cdef object _nsmap 1440 cdef bint _annotate 1441 cdef dict _cache 1442 def __init__(self, *, namespace=None, nsmap=None, annotate=True, 1443 makeelement=None): 1444 if nsmap is None: 1445 nsmap = _DEFAULT_NSMAP if annotate else {} 1446 self._nsmap = nsmap 1447 self._namespace = None if namespace is None else u"{%s}" % namespace 1448 self._annotate = annotate 1449 if makeelement is not None: 1450 if not callable(makeelement): 1451 raise TypeError( 1452 f"argument of 'makeelement' parameter must be callable, got {type(makeelement)}") 1453 self._makeelement = makeelement 1454 else: 1455 self._makeelement = None 1456 self._cache = {} 1457 1458 @cython.final 1459 cdef _build_element_maker(self, tag, bint caching): 1460 cdef _ObjectifyElementMakerCaller element_maker 1461 element_maker = _ObjectifyElementMakerCaller.__new__(_ObjectifyElementMakerCaller) 1462 if self._namespace is not None and tag[0] != u"{": 1463 element_maker._tag = self._namespace + tag 1464 else: 1465 element_maker._tag = tag 1466 element_maker._nsmap = self._nsmap 1467 element_maker._annotate = self._annotate 1468 element_maker._element_factory = self._makeelement 1469 if caching: 1470 if len(self._cache) > 200: 1471 self._cache.clear() 1472 self._cache[tag] = element_maker 1473 return element_maker 1474 1475 def __getattr__(self, tag): 1476 element_maker = self._cache.get(tag) 1477 if element_maker is None: 1478 if is_special_method(tag): 1479 return object.__getattr__(self, tag) 1480 return self._build_element_maker(tag, caching=True) 1481 return element_maker 1482 1483 def __call__(self, tag, *args, **kwargs): 1484 element_maker = self._cache.get(tag) 1485 if element_maker is None: 1486 element_maker = self._build_element_maker( 1487 tag, caching=not is_special_method(tag)) 1488 return element_maker(*args, **kwargs) 1489 1490################################################################################ 1491# Recursive element dumping 1492 1493cdef bint __RECURSIVE_STR = 0 # default: off 1494 1495def enable_recursive_str(on=True): 1496 u"""enable_recursive_str(on=True) 1497 1498 Enable a recursively generated tree representation for str(element), 1499 based on objectify.dump(element). 1500 """ 1501 global __RECURSIVE_STR 1502 __RECURSIVE_STR = on 1503 1504def dump(_Element element not None): 1505 u"""dump(_Element element not None) 1506 1507 Return a recursively generated string representation of an element. 1508 """ 1509 return _dump(element, 0) 1510 1511cdef object _dump(_Element element, int indent): 1512 indentstr = u" " * indent 1513 if isinstance(element, ObjectifiedDataElement): 1514 value = repr(element) 1515 else: 1516 value = textOf(element._c_node) 1517 if value is not None: 1518 if not value.strip(): 1519 value = None 1520 else: 1521 value = repr(value) 1522 result = f"{indentstr}{element.tag} = {value} [{_typename(element)}]\n" 1523 xsi_ns = u"{%s}" % XML_SCHEMA_INSTANCE_NS 1524 pytype_ns = u"{%s}" % PYTYPE_NAMESPACE 1525 for name, value in sorted(cetree.iterattributes(element, 3)): 1526 if u'{' in name: 1527 if name == PYTYPE_ATTRIBUTE: 1528 if value == TREE_PYTYPE_NAME: 1529 continue 1530 else: 1531 name = name.replace(pytype_ns, u'py:') 1532 name = name.replace(xsi_ns, u'xsi:') 1533 result += f"{indentstr} * {name} = {value!r}\n" 1534 1535 indent += 1 1536 for child in element.iterchildren(): 1537 result += _dump(child, indent) 1538 if indent == 1: 1539 return result[:-1] # strip last '\n' 1540 else: 1541 return result 1542 1543 1544################################################################################ 1545# Pickle support for objectified ElementTree 1546 1547def __unpickleElementTree(data): 1548 return etree.ElementTree(fromstring(data)) 1549 1550cdef _setupPickle(elementTreeReduceFunction): 1551 if python.IS_PYTHON2: 1552 import copy_reg as copyreg 1553 else: 1554 import copyreg 1555 copyreg.pickle(etree._ElementTree, 1556 elementTreeReduceFunction, __unpickleElementTree) 1557 1558def pickleReduceElementTree(obj): 1559 return __unpickleElementTree, (etree.tostring(obj),) 1560 1561_setupPickle(pickleReduceElementTree) 1562del pickleReduceElementTree 1563 1564################################################################################ 1565# Element class lookup 1566 1567cdef class ObjectifyElementClassLookup(ElementClassLookup): 1568 u"""ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None) 1569 Element class lookup method that uses the objectify classes. 1570 """ 1571 cdef object empty_data_class 1572 cdef object tree_class 1573 def __init__(self, tree_class=None, empty_data_class=None): 1574 u"""Lookup mechanism for objectify. 1575 1576 The default Element classes can be replaced by passing subclasses of 1577 ObjectifiedElement and ObjectifiedDataElement as keyword arguments. 1578 'tree_class' defines inner tree classes (defaults to 1579 ObjectifiedElement), 'empty_data_class' defines the default class for 1580 empty data elements (defaults to StringElement). 1581 """ 1582 self._lookup_function = _lookupElementClass 1583 if tree_class is None: 1584 tree_class = ObjectifiedElement 1585 self.tree_class = tree_class 1586 if empty_data_class is None: 1587 empty_data_class = StringElement 1588 self.empty_data_class = empty_data_class 1589 1590cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node): 1591 cdef ObjectifyElementClassLookup lookup 1592 lookup = <ObjectifyElementClassLookup>state 1593 # if element has children => no data class 1594 if cetree.hasChild(c_node): 1595 return lookup.tree_class 1596 1597 # if element is defined as xsi:nil, return NoneElement class 1598 if u"true" == cetree.attributeValueFromNsName( 1599 c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil"): 1600 return NoneElement 1601 1602 # check for Python type hint 1603 value = cetree.attributeValueFromNsName( 1604 c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) 1605 if value is not None: 1606 if value == TREE_PYTYPE_NAME: 1607 return lookup.tree_class 1608 py_type = <PyType>_PYTYPE_DICT.get(value) 1609 if py_type is not None: 1610 return py_type._type 1611 # unknown 'pyval' => try to figure it out ourself, just go on 1612 1613 # check for XML Schema type hint 1614 value = cetree.attributeValueFromNsName( 1615 c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type") 1616 1617 if value is not None: 1618 schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value) 1619 if schema_type is None and u':' in value: 1620 prefix, value = value.split(u':', 1) 1621 schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value) 1622 if schema_type is not None: 1623 return schema_type._type 1624 1625 # otherwise determine class based on text content type 1626 el_class = _guessElementClass(c_node) 1627 if el_class is not None: 1628 return el_class 1629 1630 # if element is a root node => default to tree node 1631 if c_node.parent is NULL or not tree._isElement(c_node.parent): 1632 return lookup.tree_class 1633 1634 return lookup.empty_data_class 1635 1636 1637################################################################################ 1638# Type annotations 1639 1640cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype): 1641 if pytype is None: 1642 return None 1643 value = textOf(c_node) 1644 try: 1645 pytype.type_check(value) 1646 return pytype 1647 except IGNORABLE_ERRORS: 1648 # could not be parsed as the specified type => ignore 1649 pass 1650 return None 1651 1652def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False, 1653 empty_pytype=None): 1654 u"""pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None) 1655 1656 Recursively annotates the elements of an XML tree with 'pytype' 1657 attributes. 1658 1659 If the 'ignore_old' keyword argument is True (the default), current 'pytype' 1660 attributes will be ignored and replaced. Otherwise, they will be checked 1661 and only replaced if they no longer fit the current text value. 1662 1663 Setting the keyword argument ``ignore_xsi`` to True makes the function 1664 additionally ignore existing ``xsi:type`` annotations. The default is to 1665 use them as a type hint. 1666 1667 The default annotation of empty elements can be set with the 1668 ``empty_pytype`` keyword argument. The default is not to annotate empty 1669 elements. Pass 'str', for example, to make string values the default. 1670 """ 1671 cdef _Element element 1672 element = cetree.rootNodeOrRaise(element_or_tree) 1673 _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype) 1674 1675def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False, 1676 empty_type=None): 1677 u"""xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None) 1678 1679 Recursively annotates the elements of an XML tree with 'xsi:type' 1680 attributes. 1681 1682 If the 'ignore_old' keyword argument is True (the default), current 1683 'xsi:type' attributes will be ignored and replaced. Otherwise, they will be 1684 checked and only replaced if they no longer fit the current text value. 1685 1686 Note that the mapping from Python types to XSI types is usually ambiguous. 1687 Currently, only the first XSI type name in the corresponding PyType 1688 definition will be used for annotation. Thus, you should consider naming 1689 the widest type first if you define additional types. 1690 1691 Setting the keyword argument ``ignore_pytype`` to True makes the function 1692 additionally ignore existing ``pytype`` annotations. The default is to 1693 use them as a type hint. 1694 1695 The default annotation of empty elements can be set with the 1696 ``empty_type`` keyword argument. The default is not to annotate empty 1697 elements. Pass 'string', for example, to make string values the default. 1698 """ 1699 cdef _Element element 1700 element = cetree.rootNodeOrRaise(element_or_tree) 1701 _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None) 1702 1703def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False, 1704 empty_pytype=None, empty_type=None, annotate_xsi=0, 1705 annotate_pytype=1): 1706 u"""annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1) 1707 1708 Recursively annotates the elements of an XML tree with 'xsi:type' 1709 and/or 'py:pytype' attributes. 1710 1711 If the 'ignore_old' keyword argument is True (the default), current 1712 'py:pytype' attributes will be ignored for the type annotation. Set to False 1713 if you want reuse existing 'py:pytype' information (iff appropriate for the 1714 element text value). 1715 1716 If the 'ignore_xsi' keyword argument is False (the default), existing 1717 'xsi:type' attributes will be used for the type annotation, if they fit the 1718 element text values. 1719 1720 Note that the mapping from Python types to XSI types is usually ambiguous. 1721 Currently, only the first XSI type name in the corresponding PyType 1722 definition will be used for annotation. Thus, you should consider naming 1723 the widest type first if you define additional types. 1724 1725 The default 'py:pytype' annotation of empty elements can be set with the 1726 ``empty_pytype`` keyword argument. Pass 'str', for example, to make 1727 string values the default. 1728 1729 The default 'xsi:type' annotation of empty elements can be set with the 1730 ``empty_type`` keyword argument. The default is not to annotate empty 1731 elements. Pass 'string', for example, to make string values the default. 1732 1733 The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype' 1734 (default: 1) control which kind(s) of annotation to use. 1735 """ 1736 cdef _Element element 1737 element = cetree.rootNodeOrRaise(element_or_tree) 1738 _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi, 1739 ignore_old, empty_type, empty_pytype) 1740 1741 1742cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype, 1743 bint ignore_xsi, bint ignore_pytype, 1744 empty_type_name, empty_pytype_name): 1745 cdef _Document doc 1746 cdef tree.xmlNode* c_node 1747 cdef PyType empty_pytype, StrType, NoneType 1748 1749 if not annotate_xsi and not annotate_pytype: 1750 return 1751 1752 if empty_type_name is not None: 1753 if isinstance(empty_type_name, bytes): 1754 empty_type_name = (<bytes>empty_type_name).decode("ascii") 1755 empty_pytype = <PyType>_SCHEMA_TYPE_DICT.get(empty_type_name) 1756 elif empty_pytype_name is not None: 1757 if isinstance(empty_pytype_name, bytes): 1758 empty_pytype_name = (<bytes>empty_pytype_name).decode("ascii") 1759 empty_pytype = <PyType>_PYTYPE_DICT.get(empty_pytype_name) 1760 else: 1761 empty_pytype = None 1762 1763 StrType = <PyType>_PYTYPE_DICT.get(u'str') 1764 NoneType = <PyType>_PYTYPE_DICT.get(u'NoneType') 1765 1766 doc = element._doc 1767 c_node = element._c_node 1768 tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) 1769 if c_node.type == tree.XML_ELEMENT_NODE: 1770 _annotate_element(c_node, doc, annotate_xsi, annotate_pytype, 1771 ignore_xsi, ignore_pytype, 1772 empty_type_name, empty_pytype, StrType, NoneType) 1773 tree.END_FOR_EACH_ELEMENT_FROM(c_node) 1774 1775cdef int _annotate_element(tree.xmlNode* c_node, _Document doc, 1776 bint annotate_xsi, bint annotate_pytype, 1777 bint ignore_xsi, bint ignore_pytype, 1778 empty_type_name, PyType empty_pytype, 1779 PyType StrType, PyType NoneType) except -1: 1780 cdef tree.xmlNs* c_ns 1781 cdef PyType pytype = None 1782 typename = None 1783 istree = 0 1784 1785 # if element is defined as xsi:nil, represent it as None 1786 if cetree.attributeValueFromNsName( 1787 c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") == "true": 1788 pytype = NoneType 1789 1790 if pytype is None and not ignore_xsi: 1791 # check that old xsi type value is valid 1792 typename = cetree.attributeValueFromNsName( 1793 c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type") 1794 if typename is not None: 1795 pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename) 1796 if pytype is None and u':' in typename: 1797 prefix, typename = typename.split(u':', 1) 1798 pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename) 1799 if pytype is not None and pytype is not StrType: 1800 # StrType does not have a typecheck but is the default 1801 # anyway, so just accept it if given as type 1802 # information 1803 pytype = _check_type(c_node, pytype) 1804 if pytype is None: 1805 typename = None 1806 1807 if pytype is None and not ignore_pytype: 1808 # check that old pytype value is valid 1809 old_pytypename = cetree.attributeValueFromNsName( 1810 c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) 1811 if old_pytypename is not None: 1812 if old_pytypename == TREE_PYTYPE_NAME: 1813 if not cetree.hasChild(c_node): 1814 # only case where we should keep it, 1815 # everything else is clear enough 1816 pytype = TREE_PYTYPE 1817 else: 1818 if old_pytypename == 'none': 1819 # transition from lxml 1.x 1820 old_pytypename = "NoneType" 1821 pytype = <PyType>_PYTYPE_DICT.get(old_pytypename) 1822 if pytype is not None and pytype is not StrType: 1823 # StrType does not have a typecheck but is the 1824 # default anyway, so just accept it if given as 1825 # type information 1826 pytype = _check_type(c_node, pytype) 1827 1828 if pytype is None: 1829 # try to guess type 1830 if not cetree.hasChild(c_node): 1831 # element has no children => data class 1832 pytype = _guessPyType(textOf(c_node), StrType) 1833 else: 1834 istree = 1 1835 1836 if pytype is None: 1837 # use default type for empty elements 1838 if cetree.hasText(c_node): 1839 pytype = StrType 1840 else: 1841 pytype = empty_pytype 1842 if typename is None: 1843 typename = empty_type_name 1844 1845 if pytype is not None: 1846 if typename is None: 1847 if not istree: 1848 if pytype._schema_types: 1849 # pytype->xsi:type is a 1:n mapping 1850 # simply take the first 1851 typename = pytype._schema_types[0] 1852 elif typename not in pytype._schema_types: 1853 typename = pytype._schema_types[0] 1854 1855 if annotate_xsi: 1856 if typename is None or istree: 1857 cetree.delAttributeFromNsName( 1858 c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type") 1859 else: 1860 # update or create attribute 1861 typename_utf8 = cetree.utf8(typename) 1862 c_ns = cetree.findOrBuildNodeNsPrefix( 1863 doc, c_node, _XML_SCHEMA_NS, <unsigned char*>'xsd') 1864 if c_ns is not NULL: 1865 if b':' in typename_utf8: 1866 prefix, name = typename_utf8.split(b':', 1) 1867 if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0': 1868 typename_utf8 = name 1869 elif tree.xmlStrcmp(_xcstr(prefix), c_ns.prefix) != 0: 1870 typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + name 1871 elif c_ns.prefix is not NULL and c_ns.prefix[0] != c'\0': 1872 typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + typename_utf8 1873 c_ns = cetree.findOrBuildNodeNsPrefix( 1874 doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi') 1875 tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"type", _xcstr(typename_utf8)) 1876 1877 if annotate_pytype: 1878 if pytype is None: 1879 # delete attribute if it exists 1880 cetree.delAttributeFromNsName( 1881 c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) 1882 else: 1883 # update or create attribute 1884 c_ns = cetree.findOrBuildNodeNsPrefix( 1885 doc, c_node, _PYTYPE_NAMESPACE, <unsigned char*>'py') 1886 pytype_name = cetree.utf8(pytype.name) 1887 tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME, 1888 _xcstr(pytype_name)) 1889 if pytype is NoneType: 1890 c_ns = cetree.findOrBuildNodeNsPrefix( 1891 doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi') 1892 tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"nil", <unsigned char*>"true") 1893 1894 return 0 1895 1896cdef object _strip_attributes = etree.strip_attributes 1897cdef object _cleanup_namespaces = etree.cleanup_namespaces 1898 1899def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True, 1900 bint xsi_nil=False, bint cleanup_namespaces=False): 1901 u"""deannotate(element_or_tree, pytype=True, xsi=True, xsi_nil=False, cleanup_namespaces=False) 1902 1903 Recursively de-annotate the elements of an XML tree by removing 'py:pytype' 1904 and/or 'xsi:type' attributes and/or 'xsi:nil' attributes. 1905 1906 If the 'pytype' keyword argument is True (the default), 'py:pytype' 1907 attributes will be removed. If the 'xsi' keyword argument is True (the 1908 default), 'xsi:type' attributes will be removed. 1909 If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil' 1910 attributes will be removed. 1911 1912 Note that this does not touch the namespace declarations by 1913 default. If you want to remove unused namespace declarations from 1914 the tree, pass the option ``cleanup_namespaces=True``. 1915 """ 1916 cdef list attribute_names = [] 1917 1918 if pytype: 1919 attribute_names.append(PYTYPE_ATTRIBUTE) 1920 if xsi: 1921 attribute_names.append(XML_SCHEMA_INSTANCE_TYPE_ATTR) 1922 if xsi_nil: 1923 attribute_names.append(XML_SCHEMA_INSTANCE_NIL_ATTR) 1924 1925 _strip_attributes(element_or_tree, *attribute_names) 1926 if cleanup_namespaces: 1927 _cleanup_namespaces(element_or_tree) 1928 1929################################################################################ 1930# Module level parser setup 1931 1932cdef object __DEFAULT_PARSER 1933__DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True) 1934__DEFAULT_PARSER.set_element_class_lookup( ObjectifyElementClassLookup() ) 1935 1936cdef object objectify_parser 1937objectify_parser = __DEFAULT_PARSER 1938 1939def set_default_parser(new_parser = None): 1940 u"""set_default_parser(new_parser = None) 1941 1942 Replace the default parser used by objectify's Element() and 1943 fromstring() functions. 1944 1945 The new parser must be an etree.XMLParser. 1946 1947 Call without arguments to reset to the original parser. 1948 """ 1949 global objectify_parser 1950 if new_parser is None: 1951 objectify_parser = __DEFAULT_PARSER 1952 elif isinstance(new_parser, etree.XMLParser): 1953 objectify_parser = new_parser 1954 else: 1955 raise TypeError, u"parser must inherit from lxml.etree.XMLParser" 1956 1957def makeparser(**kw): 1958 u"""makeparser(remove_blank_text=True, **kw) 1959 1960 Create a new XML parser for objectify trees. 1961 1962 You can pass all keyword arguments that are supported by 1963 ``etree.XMLParser()``. Note that this parser defaults to removing 1964 blank text. You can disable this by passing the 1965 ``remove_blank_text`` boolean keyword option yourself. 1966 """ 1967 if 'remove_blank_text' not in kw: 1968 kw['remove_blank_text'] = True 1969 parser = etree.XMLParser(**kw) 1970 parser.set_element_class_lookup( ObjectifyElementClassLookup() ) 1971 return parser 1972 1973cdef _Element _makeElement(tag, text, attrib, nsmap): 1974 return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap) 1975 1976################################################################################ 1977# Module level factory functions 1978 1979cdef object _fromstring 1980_fromstring = etree.fromstring 1981 1982SubElement = etree.SubElement 1983 1984def fromstring(xml, parser=None, *, base_url=None): 1985 u"""fromstring(xml, parser=None, base_url=None) 1986 1987 Objectify specific version of the lxml.etree fromstring() function 1988 that uses the objectify parser. 1989 1990 You can pass a different parser as second argument. 1991 1992 The ``base_url`` keyword argument allows to set the original base URL of 1993 the document to support relative Paths when looking up external entities 1994 (DTD, XInclude, ...). 1995 """ 1996 if parser is None: 1997 parser = objectify_parser 1998 return _fromstring(xml, parser, base_url=base_url) 1999 2000def XML(xml, parser=None, *, base_url=None): 2001 u"""XML(xml, parser=None, base_url=None) 2002 2003 Objectify specific version of the lxml.etree XML() literal factory 2004 that uses the objectify parser. 2005 2006 You can pass a different parser as second argument. 2007 2008 The ``base_url`` keyword argument allows to set the original base URL of 2009 the document to support relative Paths when looking up external entities 2010 (DTD, XInclude, ...). 2011 """ 2012 if parser is None: 2013 parser = objectify_parser 2014 return _fromstring(xml, parser, base_url=base_url) 2015 2016cdef object _parse 2017_parse = etree.parse 2018 2019def parse(f, parser=None, *, base_url=None): 2020 u"""parse(f, parser=None, base_url=None) 2021 2022 Parse a file or file-like object with the objectify parser. 2023 2024 You can pass a different parser as second argument. 2025 2026 The ``base_url`` keyword allows setting a URL for the document 2027 when parsing from a file-like object. This is needed when looking 2028 up external entities (DTD, XInclude, ...) with relative paths. 2029 """ 2030 if parser is None: 2031 parser = objectify_parser 2032 return _parse(f, parser, base_url=base_url) 2033 2034cdef dict _DEFAULT_NSMAP = { 2035 "py" : PYTYPE_NAMESPACE, 2036 "xsi" : XML_SCHEMA_INSTANCE_NS, 2037 "xsd" : XML_SCHEMA_NS 2038} 2039 2040E = ElementMaker() 2041 2042def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes): 2043 u"""Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes) 2044 2045 Objectify specific version of the lxml.etree Element() factory that 2046 always creates a structural (tree) element. 2047 2048 NOTE: requires parser based element class lookup activated in lxml.etree! 2049 """ 2050 if attrib is not None: 2051 if _attributes: 2052 attrib = dict(attrib) 2053 attrib.update(_attributes) 2054 _attributes = attrib 2055 if _pytype is None: 2056 _pytype = TREE_PYTYPE_NAME 2057 if nsmap is None: 2058 nsmap = _DEFAULT_NSMAP 2059 _attributes[PYTYPE_ATTRIBUTE] = _pytype 2060 return _makeElement(_tag, None, _attributes, nsmap) 2061 2062def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None, 2063 **_attributes): 2064 u"""DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes) 2065 2066 Create a new element from a Python value and XML attributes taken from 2067 keyword arguments or a dictionary passed as second argument. 2068 2069 Automatically adds a 'pytype' attribute for the Python type of the value, 2070 if the type can be identified. If '_pytype' or '_xsi' are among the 2071 keyword arguments, they will be used instead. 2072 2073 If the _value argument is an ObjectifiedDataElement instance, its py:pytype, 2074 xsi:type and other attributes and nsmap are reused unless they are redefined 2075 in attrib and/or keyword arguments. 2076 """ 2077 if nsmap is None: 2078 nsmap = _DEFAULT_NSMAP 2079 if attrib is not None and attrib: 2080 if _attributes: 2081 attrib = dict(attrib) 2082 attrib.update(_attributes) 2083 _attributes = attrib 2084 if isinstance(_value, ObjectifiedElement): 2085 if _pytype is None: 2086 if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP: 2087 # special case: no change! 2088 return _value.__copy__() 2089 if isinstance(_value, ObjectifiedDataElement): 2090 # reuse existing nsmap unless redefined in nsmap parameter 2091 temp = _value.nsmap 2092 if temp is not None and temp: 2093 temp = dict(temp) 2094 temp.update(nsmap) 2095 nsmap = temp 2096 # reuse existing attributes unless redefined in attrib/_attributes 2097 temp = _value.attrib 2098 if temp is not None and temp: 2099 temp = dict(temp) 2100 temp.update(_attributes) 2101 _attributes = temp 2102 # reuse existing xsi:type or py:pytype attributes, unless provided as 2103 # arguments 2104 if _xsi is None and _pytype is None: 2105 _xsi = _attributes.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) 2106 _pytype = _attributes.get(PYTYPE_ATTRIBUTE) 2107 2108 if _xsi is not None: 2109 if u':' in _xsi: 2110 prefix, name = _xsi.split(u':', 1) 2111 ns = nsmap.get(prefix) 2112 if ns != XML_SCHEMA_NS: 2113 raise ValueError, u"XSD types require the XSD namespace" 2114 elif nsmap is _DEFAULT_NSMAP: 2115 name = _xsi 2116 _xsi = u'xsd:' + _xsi 2117 else: 2118 name = _xsi 2119 for prefix, ns in nsmap.items(): 2120 if ns == XML_SCHEMA_NS: 2121 if prefix is not None and prefix: 2122 _xsi = prefix + u':' + _xsi 2123 break 2124 else: 2125 raise ValueError, u"XSD types require the XSD namespace" 2126 _attributes[XML_SCHEMA_INSTANCE_TYPE_ATTR] = _xsi 2127 if _pytype is None: 2128 # allow using unregistered or even wrong xsi:type names 2129 py_type = <PyType>_SCHEMA_TYPE_DICT.get(_xsi) 2130 if py_type is None: 2131 py_type = <PyType>_SCHEMA_TYPE_DICT.get(name) 2132 if py_type is not None: 2133 _pytype = py_type.name 2134 2135 if _pytype is None: 2136 _pytype = _pytypename(_value) 2137 2138 if _value is None and _pytype != u"str": 2139 _pytype = _pytype or u"NoneType" 2140 strval = None 2141 elif python._isString(_value): 2142 strval = _value 2143 elif isinstance(_value, bool): 2144 if _value: 2145 strval = u"true" 2146 else: 2147 strval = u"false" 2148 else: 2149 py_type = <PyType>_PYTYPE_DICT.get(_pytype) 2150 stringify = unicode if py_type is None else py_type.stringify 2151 strval = stringify(_value) 2152 2153 if _pytype is not None: 2154 if _pytype == u"NoneType" or _pytype == u"none": 2155 strval = None 2156 _attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = u"true" 2157 else: 2158 # check if type information from arguments is valid 2159 py_type = <PyType>_PYTYPE_DICT.get(_pytype) 2160 if py_type is not None: 2161 if py_type.type_check is not None: 2162 py_type.type_check(strval) 2163 _attributes[PYTYPE_ATTRIBUTE] = _pytype 2164 2165 return _makeElement(u"value", strval, _attributes, nsmap) 2166 2167 2168################################################################################ 2169# ObjectPath 2170 2171include "objectpath.pxi" 2172