1# cython: binding=True
2# cython: auto_pickle=False
3# cython: language_level=2
4
5"""
6The ``lxml.objectify`` module implements a Python object API for XML.
7It is based on `lxml.etree`.
8"""
9
10from __future__ import absolute_import
11
12cimport cython
13
14from lxml.includes.etreepublic cimport _Document, _Element, ElementBase, ElementClassLookup
15from lxml.includes.etreepublic cimport elementFactory, import_lxml__etree, textOf, pyunicode
16from lxml.includes.tree cimport const_xmlChar, _xcstr
17from lxml cimport python
18from lxml.includes cimport tree
19
20cimport lxml.includes.etreepublic as cetree
21cimport libc.string as cstring_h   # not to be confused with stdlib 'string'
22from libc.string cimport const_char
23
24__all__ = [u'BoolElement', u'DataElement', u'E', u'Element', u'ElementMaker',
25           u'FloatElement', u'IntElement', u'LongElement', u'NoneElement',
26           u'NumberElement', u'ObjectPath', u'ObjectifiedDataElement',
27           u'ObjectifiedElement', u'ObjectifyElementClassLookup',
28           u'PYTYPE_ATTRIBUTE', u'PyType', u'StringElement', u'SubElement',
29           u'XML', u'annotate', u'deannotate', u'dump', u'enable_recursive_str',
30           u'fromstring', u'getRegisteredTypes', u'makeparser', u'parse',
31           u'pyannotate', u'pytypename', u'set_default_parser',
32           u'set_pytype_attribute_tag', u'xsiannotate']
33
34cdef object etree
35from lxml import etree
36# initialize C-API of lxml.etree
37import_lxml__etree()
38
39__version__ = etree.__version__
40
41cdef object re
42import re
43
44cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError)
45cdef object is_special_method = re.compile(u'__.*__$').match
46
47
48# Duplicated from apihelpers.pxi, since dependencies obstruct
49# including apihelpers.pxi.
50cdef strrepr(s):
51    """Build a representation of strings which we can use in __repr__
52    methods, e.g. _Element.__repr__().
53    """
54    return s.encode('unicode-escape') if python.IS_PYTHON2 else s
55
56
57cdef object _typename(object t):
58    cdef const_char* c_name
59    c_name = python._fqtypename(t)
60    s = cstring_h.strrchr(c_name, c'.')
61    if s is not NULL:
62        c_name = s + 1
63    return pyunicode(<const_xmlChar*>c_name)
64
65
66# namespace/name for "pytype" hint attribute
67cdef object PYTYPE_NAMESPACE
68cdef bytes PYTYPE_NAMESPACE_UTF8
69cdef const_xmlChar* _PYTYPE_NAMESPACE
70
71cdef object PYTYPE_ATTRIBUTE_NAME
72cdef bytes PYTYPE_ATTRIBUTE_NAME_UTF8
73cdef const_xmlChar* _PYTYPE_ATTRIBUTE_NAME
74
75PYTYPE_ATTRIBUTE = None
76
77cdef unicode TREE_PYTYPE_NAME = u"TREE"
78
79cdef tuple _unicodeAndUtf8(s):
80    return s, python.PyUnicode_AsUTF8String(s)
81
82def set_pytype_attribute_tag(attribute_tag=None):
83    u"""set_pytype_attribute_tag(attribute_tag=None)
84    Change name and namespace of the XML attribute that holds Python type
85    information.
86
87    Do not use this unless you know what you are doing.
88
89    Reset by calling without argument.
90
91    Default: "{http://codespeak.net/lxml/objectify/pytype}pytype"
92    """
93    global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME
94    global PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8
95    global PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8
96    if attribute_tag is None:
97        PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 = \
98            _unicodeAndUtf8(u"http://codespeak.net/lxml/objectify/pytype")
99        PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
100            _unicodeAndUtf8(u"pytype")
101    else:
102        PYTYPE_NAMESPACE_UTF8, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
103            cetree.getNsTag(attribute_tag)
104        PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8.decode('utf8')
105        PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8.decode('utf8')
106
107    _PYTYPE_NAMESPACE      = PYTYPE_NAMESPACE_UTF8
108    _PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8
109    PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName(
110        _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
111
112set_pytype_attribute_tag()
113
114
115# namespaces for XML Schema
116cdef object XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8
117XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 = \
118    _unicodeAndUtf8(u"http://www.w3.org/2001/XMLSchema")
119cdef const_xmlChar* _XML_SCHEMA_NS = _xcstr(XML_SCHEMA_NS_UTF8)
120
121cdef object XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8
122XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 = \
123    _unicodeAndUtf8(u"http://www.w3.org/2001/XMLSchema-instance")
124cdef const_xmlChar* _XML_SCHEMA_INSTANCE_NS = _xcstr(XML_SCHEMA_INSTANCE_NS_UTF8)
125
126cdef object XML_SCHEMA_INSTANCE_NIL_ATTR = u"{%s}nil" % XML_SCHEMA_INSTANCE_NS
127cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR = u"{%s}type" % XML_SCHEMA_INSTANCE_NS
128
129
130################################################################################
131# Element class for the main API
132
133cdef class ObjectifiedElement(ElementBase):
134    u"""Main XML Element class.
135
136    Element children are accessed as object attributes.  Multiple children
137    with the same name are available through a list index.  Example::
138
139       >>> root = XML("<root><c1><c2>0</c2><c2>1</c2></c1></root>")
140       >>> second_c2 = root.c1.c2[1]
141       >>> print(second_c2.text)
142       1
143
144    Note that you cannot (and must not) instantiate this class or its
145    subclasses.
146    """
147    def __iter__(self):
148        u"""Iterate over self and all siblings with the same tag.
149        """
150        parent = self.getparent()
151        if parent is None:
152            return iter([self])
153        return etree.ElementChildIterator(parent, tag=self.tag)
154
155    def __str__(self):
156        if __RECURSIVE_STR:
157            return _dump(self, 0)
158        else:
159            return textOf(self._c_node) or u''
160
161    # pickle support for objectified Element
162    def __reduce__(self):
163        return fromstring, (etree.tostring(self),)
164
165    @property
166    def text(self):
167        return textOf(self._c_node)
168
169    @property
170    def __dict__(self):
171        """A fake implementation for __dict__ to support dir() etc.
172
173        Note that this only considers the first child with a given name.
174        """
175        cdef _Element child
176        cdef dict children
177        c_ns = tree._getNs(self._c_node)
178        tag = u"{%s}*" % pyunicode(c_ns) if c_ns is not NULL else None
179        children = {}
180        for child in etree.ElementChildIterator(self, tag=tag):
181            if c_ns is NULL and tree._getNs(child._c_node) is not NULL:
182                continue
183            name = pyunicode(child._c_node.name)
184            if name not in children:
185                children[name] = child
186        return children
187
188    def __len__(self):
189        u"""Count self and siblings with the same tag.
190        """
191        return _countSiblings(self._c_node)
192
193    def countchildren(self):
194        u"""countchildren(self)
195
196        Return the number of children of this element, regardless of their
197        name.
198        """
199        # copied from etree
200        cdef Py_ssize_t c
201        cdef tree.xmlNode* c_node
202        c = 0
203        c_node = self._c_node.children
204        while c_node is not NULL:
205            if tree._isElement(c_node):
206                c += 1
207            c_node = c_node.next
208        return c
209
210    def getchildren(self):
211        u"""getchildren(self)
212
213        Returns a sequence of all direct children.  The elements are
214        returned in document order.
215        """
216        cdef tree.xmlNode* c_node
217        result = []
218        c_node = self._c_node.children
219        while c_node is not NULL:
220            if tree._isElement(c_node):
221                result.append(cetree.elementFactory(self._doc, c_node))
222            c_node = c_node.next
223        return result
224
225    def __getattr__(self, tag):
226        u"""Return the (first) child with the given tag name.  If no namespace
227        is provided, the child will be looked up in the same one as self.
228        """
229        if is_special_method(tag):
230            return object.__getattr__(self, tag)
231        return _lookupChildOrRaise(self, tag)
232
233    def __setattr__(self, tag, value):
234        u"""Set the value of the (first) child with the given tag name.  If no
235        namespace is provided, the child will be looked up in the same one as
236        self.
237        """
238        cdef _Element element
239        # properties are looked up /after/ __setattr__, so we must emulate them
240        if tag == u'text' or tag == u'pyval':
241            # read-only !
242            raise TypeError, f"attribute '{tag}' of '{_typename(self)}' objects is not writable"
243        elif tag == u'tail':
244            cetree.setTailText(self._c_node, value)
245            return
246        elif tag == u'tag':
247            ElementBase.tag.__set__(self, value)
248            return
249        elif tag == u'base':
250            ElementBase.base.__set__(self, value)
251            return
252        tag = _buildChildTag(self, tag)
253        element = _lookupChild(self, tag)
254        if element is None:
255            _appendValue(self, tag, value)
256        else:
257            _replaceElement(element, value)
258
259    def __delattr__(self, tag):
260        child = _lookupChildOrRaise(self, tag)
261        self.remove(child)
262
263    def addattr(self, tag, value):
264        u"""addattr(self, tag, value)
265
266        Add a child value to the element.
267
268        As opposed to append(), it sets a data value, not an element.
269        """
270        _appendValue(self, _buildChildTag(self, tag), value)
271
272    def __getitem__(self, key):
273        u"""Return a sibling, counting from the first child of the parent.  The
274        method behaves like both a dict and a sequence.
275
276        * If argument is an integer, returns the sibling at that position.
277
278        * If argument is a string, does the same as getattr().  This can be
279          used to provide namespaces for element lookup, or to look up
280          children with special names (``text`` etc.).
281
282        * If argument is a slice object, returns the matching slice.
283        """
284        cdef tree.xmlNode* c_self_node
285        cdef tree.xmlNode* c_parent
286        cdef tree.xmlNode* c_node
287        cdef Py_ssize_t c_index
288        if python._isString(key):
289            return _lookupChildOrRaise(self, key)
290        elif isinstance(key, slice):
291            return list(self)[key]
292        # normal item access
293        c_index = key   # raises TypeError if necessary
294        c_self_node = self._c_node
295        c_parent = c_self_node.parent
296        if c_parent is NULL:
297            if c_index == 0 or c_index == -1:
298                return self
299            raise IndexError, unicode(key)
300        if c_index < 0:
301            c_node = c_parent.last
302        else:
303            c_node = c_parent.children
304        c_node = _findFollowingSibling(
305            c_node, tree._getNs(c_self_node), c_self_node.name, c_index)
306        if c_node is NULL:
307            raise IndexError, unicode(key)
308        return elementFactory(self._doc, c_node)
309
310    def __setitem__(self, key, value):
311        u"""Set the value of a sibling, counting from the first child of the
312        parent.  Implements key assignment, item assignment and slice
313        assignment.
314
315        * If argument is an integer, sets the sibling at that position.
316
317        * If argument is a string, does the same as setattr().  This is used
318          to provide namespaces for element lookup.
319
320        * If argument is a sequence (list, tuple, etc.), assign the contained
321          items to the siblings.
322        """
323        cdef _Element element
324        cdef tree.xmlNode* c_node
325        if python._isString(key):
326            key = _buildChildTag(self, key)
327            element = _lookupChild(self, key)
328            if element is None:
329                _appendValue(self, key, value)
330            else:
331                _replaceElement(element, value)
332            return
333
334        if self._c_node.parent is NULL:
335            # the 'root[i] = ...' case
336            raise TypeError, u"assignment to root element is invalid"
337
338        if isinstance(key, slice):
339            # slice assignment
340            _setSlice(key, self, value)
341        else:
342            # normal index assignment
343            if key < 0:
344                c_node = self._c_node.parent.last
345            else:
346                c_node = self._c_node.parent.children
347            c_node = _findFollowingSibling(
348                c_node, tree._getNs(self._c_node), self._c_node.name, key)
349            if c_node is NULL:
350                raise IndexError, unicode(key)
351            element = elementFactory(self._doc, c_node)
352            _replaceElement(element, value)
353
354    def __delitem__(self, key):
355        parent = self.getparent()
356        if parent is None:
357            raise TypeError, u"deleting items not supported by root element"
358        if isinstance(key, slice):
359            # slice deletion
360            del_items = list(self)[key]
361            remove = parent.remove
362            for el in del_items:
363                remove(el)
364        else:
365            # normal index deletion
366            sibling = self.__getitem__(key)
367            parent.remove(sibling)
368
369    def descendantpaths(self, prefix=None):
370        u"""descendantpaths(self, prefix=None)
371
372        Returns a list of object path expressions for all descendants.
373        """
374        if prefix is not None and not python._isString(prefix):
375            prefix = u'.'.join(prefix)
376        return _build_descendant_paths(self._c_node, prefix)
377
378
379cdef inline bint _tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
380    if c_node.name != c_name:
381        return 0
382    if c_href == NULL:
383        return 1
384    c_node_href = tree._getNs(c_node)
385    if c_node_href == NULL:
386        return c_href[0] == c'\0'
387    return tree.xmlStrcmp(c_node_href, c_href) == 0
388
389
390cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node):
391    cdef tree.xmlNode* c_node
392    cdef Py_ssize_t count
393    c_tag  = c_start_node.name
394    c_href = tree._getNs(c_start_node)
395    count = 1
396    c_node = c_start_node.next
397    while c_node is not NULL:
398        if c_node.type == tree.XML_ELEMENT_NODE and \
399               _tagMatches(c_node, c_href, c_tag):
400            count += 1
401        c_node = c_node.next
402    c_node = c_start_node.prev
403    while c_node is not NULL:
404        if c_node.type == tree.XML_ELEMENT_NODE and \
405               _tagMatches(c_node, c_href, c_tag):
406            count += 1
407        c_node = c_node.prev
408    return count
409
410cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node,
411                                         const_xmlChar* href, const_xmlChar* name,
412                                         Py_ssize_t index):
413    cdef tree.xmlNode* (*next)(tree.xmlNode*)
414    if index >= 0:
415        next = cetree.nextElement
416    else:
417        index = -1 - index
418        next = cetree.previousElement
419    while c_node is not NULL:
420        if c_node.type == tree.XML_ELEMENT_NODE and \
421               _tagMatches(c_node, href, name):
422            index = index - 1
423            if index < 0:
424                return c_node
425        c_node = next(c_node)
426    return NULL
427
428cdef object _lookupChild(_Element parent, tag):
429    cdef tree.xmlNode* c_result
430    cdef tree.xmlNode* c_node
431    c_node = parent._c_node
432    ns, tag = cetree.getNsTagWithEmptyNs(tag)
433    c_tag = tree.xmlDictExists(
434        c_node.doc.dict, _xcstr(tag), python.PyBytes_GET_SIZE(tag))
435    if c_tag is NULL:
436        return None # not in the hash map => not in the tree
437    if ns is None:
438        # either inherit ns from parent or use empty (i.e. no) namespace
439        c_href = tree._getNs(c_node) or <const_xmlChar*>''
440    else:
441        c_href = _xcstr(ns)
442    c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0)
443    if c_result is NULL:
444        return None
445    return elementFactory(parent._doc, c_result)
446
447cdef object _lookupChildOrRaise(_Element parent, tag):
448    element = _lookupChild(parent, tag)
449    if element is None:
450        raise AttributeError, u"no such child: " + _buildChildTag(parent, tag)
451    return element
452
453cdef object _buildChildTag(_Element parent, tag):
454    ns, tag = cetree.getNsTag(tag)
455    c_tag = _xcstr(tag)
456    c_href = tree._getNs(parent._c_node) if ns is None else _xcstr(ns)
457    return cetree.namespacedNameFromNsName(c_href, c_tag)
458
459cdef _replaceElement(_Element element, value):
460    cdef _Element new_element
461    if isinstance(value, _Element):
462        # deep copy the new element
463        new_element = cetree.deepcopyNodeToDocument(
464            element._doc, (<_Element>value)._c_node)
465        new_element.tag = element.tag
466    elif isinstance(value, (list, tuple)):
467        element[:] = value
468        return
469    else:
470        new_element = element.makeelement(element.tag)
471        _setElementValue(new_element, value)
472    element.getparent().replace(element, new_element)
473
474cdef _appendValue(_Element parent, tag, value):
475    cdef _Element new_element
476    if isinstance(value, _Element):
477        # deep copy the new element
478        new_element = cetree.deepcopyNodeToDocument(
479            parent._doc, (<_Element>value)._c_node)
480        new_element.tag = tag
481        cetree.appendChildToElement(parent, new_element)
482    elif isinstance(value, (list, tuple)):
483        for item in value:
484            _appendValue(parent, tag, item)
485    else:
486        new_element = cetree.makeElement(
487            tag, parent._doc, None, None, None, None, None)
488        _setElementValue(new_element, value)
489        cetree.appendChildToElement(parent, new_element)
490
491cdef _setElementValue(_Element element, value):
492    if value is None:
493        cetree.setAttributeValue(
494            element, XML_SCHEMA_INSTANCE_NIL_ATTR, u"true")
495    elif isinstance(value, _Element):
496        _replaceElement(element, value)
497        return
498    else:
499        cetree.delAttributeFromNsName(
500            element._c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil")
501        if python._isString(value):
502            pytype_name = u"str"
503            py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
504        else:
505            pytype_name = _typename(value)
506            py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
507            if py_type is not None:
508                value = py_type.stringify(value)
509            else:
510                value = unicode(value)
511        if py_type is not None:
512            cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
513        else:
514            cetree.delAttributeFromNsName(
515                element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
516    cetree.setNodeText(element._c_node, value)
517
518cdef _setSlice(sliceobject, _Element target, items):
519    cdef _Element parent
520    cdef tree.xmlNode* c_node
521    cdef Py_ssize_t c_step, c_start, pos
522    # collect existing slice
523    if (<slice>sliceobject).step is None:
524        c_step = 1
525    else:
526        c_step = (<slice>sliceobject).step
527    if c_step == 0:
528        raise ValueError, u"Invalid slice"
529    cdef list del_items = target[sliceobject]
530
531    # collect new values
532    new_items = []
533    tag = target.tag
534    for item in items:
535        if isinstance(item, _Element):
536            # deep copy the new element
537            new_element = cetree.deepcopyNodeToDocument(
538                target._doc, (<_Element>item)._c_node)
539            new_element.tag = tag
540        else:
541            new_element = cetree.makeElement(
542                tag, target._doc, None, None, None, None, None)
543            _setElementValue(new_element, item)
544        new_items.append(new_element)
545
546    # sanity check - raise what a list would raise
547    if c_step != 1 and len(del_items) != len(new_items):
548        raise ValueError, \
549            f"attempt to assign sequence of size {len(new_items)} to extended slice of size {len(del_items)}"
550
551    # replace existing items
552    pos = 0
553    parent = target.getparent()
554    replace = parent.replace
555    while pos < len(new_items) and pos < len(del_items):
556        replace(del_items[pos], new_items[pos])
557        pos += 1
558    # remove leftover items
559    if pos < len(del_items):
560        remove = parent.remove
561        while pos < len(del_items):
562            remove(del_items[pos])
563            pos += 1
564    # append remaining new items
565    if pos < len(new_items):
566        # the sanity check above guarantees (step == 1)
567        if pos > 0:
568            item = new_items[pos-1]
569        else:
570            if (<slice>sliceobject).start > 0:
571                c_node = parent._c_node.children
572            else:
573                c_node = parent._c_node.last
574            c_node = _findFollowingSibling(
575                c_node, tree._getNs(target._c_node), target._c_node.name,
576                (<slice>sliceobject).start - 1)
577            if c_node is NULL:
578                while pos < len(new_items):
579                    cetree.appendChildToElement(parent, new_items[pos])
580                    pos += 1
581                return
582            item = cetree.elementFactory(parent._doc, c_node)
583        while pos < len(new_items):
584            add = item.addnext
585            item = new_items[pos]
586            add(item)
587            pos += 1
588
589################################################################################
590# Data type support in subclasses
591
592cdef class ObjectifiedDataElement(ObjectifiedElement):
593    u"""This is the base class for all data type Elements.  Subclasses should
594    override the 'pyval' property and possibly the __str__ method.
595    """
596    @property
597    def pyval(self):
598        return textOf(self._c_node)
599
600    def __str__(self):
601        return textOf(self._c_node) or ''
602
603    def __repr__(self):
604        return strrepr(textOf(self._c_node) or '')
605
606    def _setText(self, s):
607        u"""For use in subclasses only. Don't use unless you know what you are
608        doing.
609        """
610        cetree.setNodeText(self._c_node, s)
611
612
613cdef class NumberElement(ObjectifiedDataElement):
614    cdef object _parse_value
615
616    def _setValueParser(self, function):
617        u"""Set the function that parses the Python value from a string.
618
619        Do not use this unless you know what you are doing.
620        """
621        self._parse_value = function
622
623    @property
624    def pyval(self):
625        return _parseNumber(self)
626
627    def __int__(self):
628        return int(_parseNumber(self))
629
630    def __long__(self):
631        return long(_parseNumber(self))
632
633    def __float__(self):
634        return float(_parseNumber(self))
635
636    def __complex__(self):
637        return complex(_parseNumber(self))
638
639    def __str__(self):
640        return unicode(_parseNumber(self))
641
642    def __repr__(self):
643        return repr(_parseNumber(self))
644
645    def __oct__(self):
646        return oct(_parseNumber(self))
647
648    def __hex__(self):
649        return hex(_parseNumber(self))
650
651    def __richcmp__(self, other, int op):
652        return _richcmpPyvals(self, other, op)
653
654    def __hash__(self):
655        return hash(_parseNumber(self))
656
657    def __add__(self, other):
658        return _numericValueOf(self) + _numericValueOf(other)
659
660    def __radd__(self, other):
661        return _numericValueOf(other) + _numericValueOf(self)
662
663    def __sub__(self, other):
664        return _numericValueOf(self) - _numericValueOf(other)
665
666    def __rsub__(self, other):
667        return _numericValueOf(other) - _numericValueOf(self)
668
669    def __mul__(self, other):
670        return _numericValueOf(self) * _numericValueOf(other)
671
672    def __rmul__(self, other):
673        return _numericValueOf(other) * _numericValueOf(self)
674
675    def __div__(self, other):
676        return _numericValueOf(self) / _numericValueOf(other)
677
678    def __rdiv__(self, other):
679        return _numericValueOf(other) / _numericValueOf(self)
680
681    def __truediv__(self, other):
682        return _numericValueOf(self) / _numericValueOf(other)
683
684    def __rtruediv__(self, other):
685        return _numericValueOf(other) / _numericValueOf(self)
686
687    def __floordiv__(self, other):
688        return _numericValueOf(self) // _numericValueOf(other)
689
690    def __rfloordiv__(self, other):
691        return _numericValueOf(other) // _numericValueOf(self)
692
693    def __mod__(self, other):
694        return _numericValueOf(self) % _numericValueOf(other)
695
696    def __rmod__(self, other):
697        return _numericValueOf(other) % _numericValueOf(self)
698
699    def __divmod__(self, other):
700        return divmod(_numericValueOf(self), _numericValueOf(other))
701
702    def __rdivmod__(self, other):
703        return divmod(_numericValueOf(other), _numericValueOf(self))
704
705    def __pow__(self, other, modulo):
706        if modulo is None:
707            return _numericValueOf(self) ** _numericValueOf(other)
708        else:
709            return pow(_numericValueOf(self), _numericValueOf(other), modulo)
710
711    def __rpow__(self, other, modulo):
712        if modulo is None:
713            return _numericValueOf(other) ** _numericValueOf(self)
714        else:
715            return pow(_numericValueOf(other), _numericValueOf(self), modulo)
716
717    def __neg__(self):
718        return - _numericValueOf(self)
719
720    def __pos__(self):
721        return + _numericValueOf(self)
722
723    def __abs__(self):
724        return abs( _numericValueOf(self) )
725
726    def __bool__(self):
727        return bool(_numericValueOf(self))
728
729    def __invert__(self):
730        return ~ _numericValueOf(self)
731
732    def __lshift__(self, other):
733        return _numericValueOf(self) << _numericValueOf(other)
734
735    def __rlshift__(self, other):
736        return _numericValueOf(other) << _numericValueOf(self)
737
738    def __rshift__(self, other):
739        return _numericValueOf(self) >> _numericValueOf(other)
740
741    def __rrshift__(self, other):
742        return _numericValueOf(other) >> _numericValueOf(self)
743
744    def __and__(self, other):
745        return _numericValueOf(self) & _numericValueOf(other)
746
747    def __rand__(self, other):
748        return _numericValueOf(other) & _numericValueOf(self)
749
750    def __or__(self, other):
751        return _numericValueOf(self) | _numericValueOf(other)
752
753    def __ror__(self, other):
754        return _numericValueOf(other) | _numericValueOf(self)
755
756    def __xor__(self, other):
757        return _numericValueOf(self) ^ _numericValueOf(other)
758
759    def __rxor__(self, other):
760        return _numericValueOf(other) ^ _numericValueOf(self)
761
762
763cdef class IntElement(NumberElement):
764    def _init(self):
765        self._parse_value = int
766
767    def __index__(self):
768        return int(_parseNumber(self))
769
770
771cdef class LongElement(NumberElement):
772    def _init(self):
773        self._parse_value = long
774
775    def __index__(self):
776        return int(_parseNumber(self))
777
778
779cdef class FloatElement(NumberElement):
780    def _init(self):
781        self._parse_value = float
782
783
784cdef class StringElement(ObjectifiedDataElement):
785    u"""String data class.
786
787    Note that this class does *not* support the sequence protocol of strings:
788    len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported.
789    Instead, use the .text attribute to get a 'real' string.
790    """
791    @property
792    def pyval(self):
793        return textOf(self._c_node) or u''
794
795    def __repr__(self):
796        return repr(textOf(self._c_node) or u'')
797
798    def strlen(self):
799        text = textOf(self._c_node)
800        if text is None:
801            return 0
802        else:
803            return len(text)
804
805    def __bool__(self):
806        return bool(textOf(self._c_node))
807
808    def __richcmp__(self, other, int op):
809        return _richcmpPyvals(self, other, op)
810
811    def __hash__(self):
812        return hash(textOf(self._c_node) or u'')
813
814    def __add__(self, other):
815        text  = _strValueOf(self)
816        other = _strValueOf(other)
817        return text + other
818
819    def __radd__(self, other):
820        text  = _strValueOf(self)
821        other = _strValueOf(other)
822        return other + text
823
824    def __mul__(self, other):
825        if isinstance(self, StringElement):
826            return (textOf((<StringElement>self)._c_node) or '') * _numericValueOf(other)
827        elif isinstance(other, StringElement):
828            return _numericValueOf(self) * (textOf((<StringElement>other)._c_node) or '')
829        else:
830            return NotImplemented
831
832    def __rmul__(self, other):
833        return _numericValueOf(other) * (textOf((<StringElement>self)._c_node) or '')
834
835    def __mod__(self, other):
836        return (_strValueOf(self) or '') % other
837
838    def __int__(self):
839        return int(textOf(self._c_node))
840
841    def __long__(self):
842        return long(textOf(self._c_node))
843
844    def __float__(self):
845        return float(textOf(self._c_node))
846
847    def __complex__(self):
848        return complex(textOf(self._c_node))
849
850
851cdef class NoneElement(ObjectifiedDataElement):
852    def __str__(self):
853        return u"None"
854
855    def __repr__(self):
856        return "None"
857
858    def __bool__(self):
859        return False
860
861    def __richcmp__(self, other, int op):
862        if other is None or self is None:
863            return python.PyObject_RichCompare(None, None, op)
864        if isinstance(self, NoneElement):
865            return python.PyObject_RichCompare(None, other, op)
866        else:
867            return python.PyObject_RichCompare(self, None, op)
868
869    def __hash__(self):
870        return hash(None)
871
872    @property
873    def pyval(self):
874        return None
875
876
877cdef class BoolElement(IntElement):
878    u"""Boolean type base on string values: 'true' or 'false'.
879
880    Note that this inherits from IntElement to mimic the behaviour of
881    Python's bool type.
882    """
883    def _init(self):
884        self._parse_value = _parseBool  # wraps as Python callable
885
886    def __bool__(self):
887        return _parseBool(textOf(self._c_node))
888
889    def __int__(self):
890        return 0 + _parseBool(textOf(self._c_node))
891
892    def __float__(self):
893        return 0.0 + _parseBool(textOf(self._c_node))
894
895    def __richcmp__(self, other, int op):
896        return _richcmpPyvals(self, other, op)
897
898    def __hash__(self):
899        return hash(_parseBool(textOf(self._c_node)))
900
901    def __str__(self):
902        return unicode(_parseBool(textOf(self._c_node)))
903
904    def __repr__(self):
905        return repr(_parseBool(textOf(self._c_node)))
906
907    @property
908    def pyval(self):
909        return _parseBool(textOf(self._c_node))
910
911
912cdef _checkBool(s):
913    cdef int value = -1
914    if s is not None:
915        value = __parseBoolAsInt(s)
916    if value == -1:
917        raise ValueError
918
919
920cdef bint _parseBool(s) except -1:
921    cdef int value
922    if s is None:
923        return False
924    value = __parseBoolAsInt(s)
925    if value == -1:
926        raise ValueError, f"Invalid boolean value: '{s}'"
927    return value
928
929
930cdef inline int __parseBoolAsInt(text) except -2:
931    if text == 'false':
932        return 0
933    elif text == 'true':
934        return 1
935    elif text == '0':
936        return 0
937    elif text == '1':
938        return 1
939    return -1
940
941
942cdef object _parseNumber(NumberElement element):
943    return element._parse_value(textOf(element._c_node))
944
945
946cdef enum NumberParserState:
947    NPS_SPACE_PRE = 0
948    NPS_SIGN = 1
949    NPS_DIGITS = 2
950    NPS_POINT_LEAD = 3
951    NPS_POINT = 4
952    NPS_FRACTION = 5
953    NPS_EXP = 6
954    NPS_EXP_SIGN = 7
955    NPS_DIGITS_EXP = 8
956    NPS_SPACE_TAIL = 9
957    NPS_INF1 = 20
958    NPS_INF2 = 21
959    NPS_INF3 = 22
960    NPS_NAN1 = 23
961    NPS_NAN2 = 24
962    NPS_NAN3 = 25
963    NPS_ERROR = 99
964
965
966ctypedef fused bytes_unicode:
967    bytes
968    unicode
969
970
971cdef _checkNumber(bytes_unicode s, bint allow_float):
972    cdef Py_UCS4 c
973    cdef NumberParserState state = NPS_SPACE_PRE
974
975    for c in s:
976        if c.isdigit() if (bytes_unicode is unicode) else c in b'0123456789':
977            if state in (NPS_DIGITS, NPS_FRACTION, NPS_DIGITS_EXP):
978                pass
979            elif state in (NPS_SPACE_PRE, NPS_SIGN):
980                state = NPS_DIGITS
981            elif state in (NPS_POINT_LEAD, NPS_POINT):
982                state = NPS_FRACTION
983            elif state in (NPS_EXP, NPS_EXP_SIGN):
984                state = NPS_DIGITS_EXP
985            else:
986                state = NPS_ERROR
987        else:
988            if c == u'.':
989                if state in (NPS_SPACE_PRE, NPS_SIGN):
990                    state = NPS_POINT_LEAD
991                elif state == NPS_DIGITS:
992                    state = NPS_POINT
993                else:
994                    state = NPS_ERROR
995                if not allow_float:
996                    state = NPS_ERROR
997            elif c in u'-+':
998                if state == NPS_SPACE_PRE:
999                    state = NPS_SIGN
1000                elif state == NPS_EXP:
1001                    state = NPS_EXP_SIGN
1002                else:
1003                    state = NPS_ERROR
1004            elif c == u'E':
1005                if state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION):
1006                    state = NPS_EXP
1007                else:
1008                    state = NPS_ERROR
1009                if not allow_float:
1010                    state = NPS_ERROR
1011            # Allow INF and NaN. XMLSchema requires case, we don't, like Python.
1012            elif c in u'iI':
1013                state = NPS_INF1 if allow_float and state in (NPS_SPACE_PRE, NPS_SIGN) else NPS_ERROR
1014            elif c in u'fF':
1015                state = NPS_INF3 if state == NPS_INF2 else NPS_ERROR
1016            elif c in u'aA':
1017                state = NPS_NAN2 if state == NPS_NAN1 else NPS_ERROR
1018            elif c in u'nN':
1019                # Python also allows [+-]NaN, so let's accept that.
1020                if state in (NPS_SPACE_PRE, NPS_SIGN):
1021                    state = NPS_NAN1 if allow_float else NPS_ERROR
1022                elif state == NPS_NAN2:
1023                    state = NPS_NAN3
1024                elif state == NPS_INF1:
1025                    state = NPS_INF2
1026                else:
1027                    state = NPS_ERROR
1028            # Allow spaces around text values.
1029            else:
1030                if c.isspace() if (bytes_unicode is unicode) else c in b'\x09\x0a\x0b\x0c\x0d\x20':
1031                    if state in (NPS_SPACE_PRE, NPS_SPACE_TAIL):
1032                        pass
1033                    elif state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3):
1034                        state = NPS_SPACE_TAIL
1035                    else:
1036                        state = NPS_ERROR
1037                else:
1038                    state = NPS_ERROR
1039
1040            if state == NPS_ERROR:
1041                break
1042
1043    if state not in (NPS_DIGITS, NPS_FRACTION, NPS_POINT, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3, NPS_SPACE_TAIL):
1044        raise ValueError
1045
1046
1047cdef _checkInt(s):
1048    if python.IS_PYTHON2 and type(s) is bytes:
1049        return _checkNumber(<bytes>s, allow_float=False)
1050    else:
1051        return _checkNumber(<unicode>s, allow_float=False)
1052
1053
1054cdef _checkFloat(s):
1055    if python.IS_PYTHON2 and type(s) is bytes:
1056        return _checkNumber(<bytes>s, allow_float=True)
1057    else:
1058        return _checkNumber(<unicode>s, allow_float=True)
1059
1060
1061cdef object _strValueOf(obj):
1062    if python._isString(obj):
1063        return obj
1064    if isinstance(obj, _Element):
1065        return textOf((<_Element>obj)._c_node) or u''
1066    if obj is None:
1067        return u''
1068    return unicode(obj)
1069
1070
1071cdef object _numericValueOf(obj):
1072    if isinstance(obj, NumberElement):
1073        return _parseNumber(<NumberElement>obj)
1074    try:
1075        # not always numeric, but Python will raise the right exception
1076        return obj.pyval
1077    except AttributeError:
1078        pass
1079    return obj
1080
1081
1082cdef _richcmpPyvals(left, right, int op):
1083    left  = getattr(left,  'pyval', left)
1084    right = getattr(right, 'pyval', right)
1085    return python.PyObject_RichCompare(left, right, op)
1086
1087
1088################################################################################
1089# Python type registry
1090
1091cdef class PyType:
1092    u"""PyType(self, name, type_check, type_class, stringify=None)
1093    User defined type.
1094
1095    Named type that contains a type check function, a type class that
1096    inherits from ObjectifiedDataElement and an optional "stringification"
1097    function.  The type check must take a string as argument and raise
1098    ValueError or TypeError if it cannot handle the string value.  It may be
1099    None in which case it is not considered for type guessing.  For registered
1100    named types, the 'stringify' function (or unicode() if None) is used to
1101    convert a Python object with type name 'name' to the string representation
1102    stored in the XML tree.
1103
1104    Example::
1105
1106        PyType('int', int, MyIntClass).register()
1107
1108    Note that the order in which types are registered matters.  The first
1109    matching type will be used.
1110    """
1111    cdef readonly object name
1112    cdef readonly object type_check
1113    cdef readonly object stringify
1114    cdef object _type
1115    cdef list _schema_types
1116    def __init__(self, name, type_check, type_class, stringify=None):
1117        if isinstance(name, bytes):
1118            name = (<bytes>name).decode('ascii')
1119        elif not isinstance(name, unicode):
1120            raise TypeError, u"Type name must be a string"
1121        if type_check is not None and not callable(type_check):
1122            raise TypeError, u"Type check function must be callable (or None)"
1123        if name != TREE_PYTYPE_NAME and \
1124               not issubclass(type_class, ObjectifiedDataElement):
1125            raise TypeError, \
1126                u"Data classes must inherit from ObjectifiedDataElement"
1127        self.name  = name
1128        self._type = type_class
1129        self.type_check = type_check
1130        if stringify is None:
1131            stringify = unicode
1132        self.stringify = stringify
1133        self._schema_types = []
1134
1135    def __repr__(self):
1136        return "PyType(%s, %s)" % (self.name, self._type.__name__)
1137
1138    def register(self, before=None, after=None):
1139        u"""register(self, before=None, after=None)
1140
1141        Register the type.
1142
1143        The additional keyword arguments 'before' and 'after' accept a
1144        sequence of type names that must appear before/after the new type in
1145        the type list.  If any of them is not currently known, it is simply
1146        ignored.  Raises ValueError if the dependencies cannot be fulfilled.
1147        """
1148        if self.name == TREE_PYTYPE_NAME:
1149            raise ValueError, u"Cannot register tree type"
1150        if self.type_check is not None:
1151            for item in _TYPE_CHECKS:
1152                if item[0] is self.type_check:
1153                    _TYPE_CHECKS.remove(item)
1154                    break
1155            entry = (self.type_check, self)
1156            first_pos = 0
1157            last_pos = -1
1158            if before or after:
1159                if before is None:
1160                    before = ()
1161                elif after is None:
1162                    after = ()
1163                for i, (check, pytype) in enumerate(_TYPE_CHECKS):
1164                    if last_pos == -1 and pytype.name in before:
1165                        last_pos = i
1166                    if pytype.name in after:
1167                        first_pos = i+1
1168            if last_pos == -1:
1169                _TYPE_CHECKS.append(entry)
1170            elif first_pos > last_pos:
1171                raise ValueError, u"inconsistent before/after dependencies"
1172            else:
1173                _TYPE_CHECKS.insert(last_pos, entry)
1174
1175        _PYTYPE_DICT[self.name] = self
1176        for xs_type in self._schema_types:
1177            _SCHEMA_TYPE_DICT[xs_type] = self
1178
1179    def unregister(self):
1180        u"unregister(self)"
1181        if _PYTYPE_DICT.get(self.name) is self:
1182            del _PYTYPE_DICT[self.name]
1183        for xs_type, pytype in list(_SCHEMA_TYPE_DICT.items()):
1184            if pytype is self:
1185                del _SCHEMA_TYPE_DICT[xs_type]
1186        if self.type_check is None:
1187            return
1188        try:
1189            _TYPE_CHECKS.remove( (self.type_check, self) )
1190        except ValueError:
1191            pass
1192
1193    property xmlSchemaTypes:
1194        u"""The list of XML Schema datatypes this Python type maps to.
1195
1196        Note that this must be set before registering the type!
1197        """
1198        def __get__(self):
1199            return self._schema_types
1200        def __set__(self, types):
1201            self._schema_types = list(map(unicode, types))
1202
1203
1204cdef dict _PYTYPE_DICT = {}
1205cdef dict _SCHEMA_TYPE_DICT = {}
1206cdef list _TYPE_CHECKS = []
1207
1208cdef unicode _lower_bool(b):
1209    return u"true" if b else u"false"
1210
1211cdef _pytypename(obj):
1212    return u"str" if python._isString(obj) else _typename(obj)
1213
1214def pytypename(obj):
1215    u"""pytypename(obj)
1216
1217    Find the name of the corresponding PyType for a Python object.
1218    """
1219    return _pytypename(obj)
1220
1221cdef _registerPyTypes():
1222    pytype = PyType(u'int', _checkInt, IntElement)  # wraps functions for Python
1223    pytype.xmlSchemaTypes = (u"integer", u"int", u"short", u"byte", u"unsignedShort",
1224                             u"unsignedByte", u"nonPositiveInteger",
1225                             u"negativeInteger", u"long", u"nonNegativeInteger",
1226                             u"unsignedLong", u"unsignedInt", u"positiveInteger",)
1227    pytype.register()
1228
1229    # 'long' type just for backwards compatibility
1230    pytype = PyType(u'long', None, IntElement)
1231    pytype.register()
1232
1233    pytype = PyType(u'float', _checkFloat, FloatElement, repr)  # wraps _parseFloat for Python
1234    pytype.xmlSchemaTypes = (u"double", u"float")
1235    pytype.register()
1236
1237    pytype = PyType(u'bool', _checkBool, BoolElement, _lower_bool)  # wraps functions for Python
1238    pytype.xmlSchemaTypes = (u"boolean",)
1239    pytype.register()
1240
1241    pytype = PyType(u'str', None, StringElement)
1242    pytype.xmlSchemaTypes = (u"string", u"normalizedString", u"token", u"language",
1243                             u"Name", u"NCName", u"ID", u"IDREF", u"ENTITY",
1244                             u"NMTOKEN", )
1245    pytype.register()
1246
1247    # since lxml 2.0
1248    pytype = PyType(u'NoneType', None, NoneElement)
1249    pytype.register()
1250
1251    # backwards compatibility
1252    pytype = PyType(u'none', None, NoneElement)
1253    pytype.register()
1254
1255# non-registered PyType for inner tree elements
1256cdef PyType TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement)
1257
1258_registerPyTypes()
1259
1260def getRegisteredTypes():
1261    u"""getRegisteredTypes()
1262
1263    Returns a list of the currently registered PyType objects.
1264
1265    To add a new type, retrieve this list and call unregister() for all
1266    entries.  Then add the new type at a suitable position (possibly replacing
1267    an existing one) and call register() for all entries.
1268
1269    This is necessary if the new type interferes with the type check functions
1270    of existing ones (normally only int/float/bool) and must the tried before
1271    other types.  To add a type that is not yet parsable by the current type
1272    check functions, you can simply register() it, which will append it to the
1273    end of the type list.
1274    """
1275    cdef list types = []
1276    cdef set known = set()
1277    for check, pytype in _TYPE_CHECKS:
1278        name = pytype.name
1279        if name not in known:
1280            known.add(name)
1281            types.append(pytype)
1282    for pytype in _PYTYPE_DICT.values():
1283        name = pytype.name
1284        if name not in known:
1285            known.add(name)
1286            types.append(pytype)
1287    return types
1288
1289cdef PyType _guessPyType(value, PyType defaulttype):
1290    if value is None:
1291        return None
1292    for type_check, tested_pytype in _TYPE_CHECKS:
1293        try:
1294            type_check(value)
1295            return <PyType>tested_pytype
1296        except IGNORABLE_ERRORS:
1297            # could not be parsed as the specified type => ignore
1298            pass
1299    return defaulttype
1300
1301cdef object _guessElementClass(tree.xmlNode* c_node):
1302    value = textOf(c_node)
1303    if value is None:
1304        return None
1305    if value == '':
1306        return StringElement
1307
1308    for type_check, pytype in _TYPE_CHECKS:
1309        try:
1310            type_check(value)
1311            return (<PyType>pytype)._type
1312        except IGNORABLE_ERRORS:
1313            pass
1314    return None
1315
1316################################################################################
1317# adapted ElementMaker supports registered PyTypes
1318
1319@cython.final
1320@cython.internal
1321cdef class _ObjectifyElementMakerCaller:
1322    cdef object _tag
1323    cdef object _nsmap
1324    cdef object _element_factory
1325    cdef bint _annotate
1326
1327    def __call__(self, *children, **attrib):
1328        u"__call__(self, *children, **attrib)"
1329        cdef _ObjectifyElementMakerCaller elementMaker
1330        cdef _Element element
1331        cdef _Element childElement
1332        cdef bint has_children
1333        cdef bint has_string_value
1334        if self._element_factory is None:
1335            element = _makeElement(self._tag, None, attrib, self._nsmap)
1336        else:
1337            element = self._element_factory(self._tag, attrib, self._nsmap)
1338
1339        pytype_name = None
1340        has_children = False
1341        has_string_value = False
1342        for child in children:
1343            if child is None:
1344                if len(children) == 1:
1345                    cetree.setAttributeValue(
1346                        element, XML_SCHEMA_INSTANCE_NIL_ATTR, u"true")
1347            elif python._isString(child):
1348                _add_text(element, child)
1349                has_string_value = True
1350            elif isinstance(child, _Element):
1351                cetree.appendChildToElement(element, <_Element>child)
1352                has_children = True
1353            elif isinstance(child, _ObjectifyElementMakerCaller):
1354                elementMaker = <_ObjectifyElementMakerCaller>child
1355                if elementMaker._element_factory is None:
1356                    cetree.makeSubElement(element, elementMaker._tag,
1357                                          None, None, None, None)
1358                else:
1359                    childElement = elementMaker._element_factory(
1360                        elementMaker._tag)
1361                    cetree.appendChildToElement(element, childElement)
1362                has_children = True
1363            elif isinstance(child, dict):
1364                for name, value in child.items():
1365                    # keyword arguments in attrib take precedence
1366                    if name in attrib:
1367                        continue
1368                    pytype = _PYTYPE_DICT.get(_typename(value))
1369                    if pytype is not None:
1370                        value = (<PyType>pytype).stringify(value)
1371                    elif not python._isString(value):
1372                        value = unicode(value)
1373                    cetree.setAttributeValue(element, name, value)
1374            else:
1375                if pytype_name is not None:
1376                    # concatenation always makes the result a string
1377                    has_string_value = True
1378                pytype_name = _typename(child)
1379                pytype = _PYTYPE_DICT.get(_typename(child))
1380                if pytype is not None:
1381                    _add_text(element, (<PyType>pytype).stringify(child))
1382                else:
1383                    has_string_value = True
1384                    child = unicode(child)
1385                    _add_text(element, child)
1386
1387        if self._annotate and not has_children:
1388            if has_string_value:
1389                cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, u"str")
1390            elif pytype_name is not None:
1391                cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
1392
1393        return element
1394
1395cdef _add_text(_Element elem, text):
1396    # add text to the tree in construction, either as element text or
1397    # tail text, depending on the current tree state
1398    cdef tree.xmlNode* c_child
1399    c_child = cetree.findChildBackwards(elem._c_node, 0)
1400    if c_child is not NULL:
1401        old = cetree.tailOf(c_child)
1402        if old is not None:
1403            text = old + text
1404        cetree.setTailText(c_child, text)
1405    else:
1406        old = cetree.textOf(elem._c_node)
1407        if old is not None:
1408            text = old + text
1409        cetree.setNodeText(elem._c_node, text)
1410
1411cdef class ElementMaker:
1412    u"""ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None)
1413
1414    An ElementMaker that can be used for constructing trees.
1415
1416    Example::
1417
1418      >>> M = ElementMaker(annotate=False)
1419      >>> attributes = {'class': 'par'}
1420      >>> html = M.html( M.body( M.p('hello', attributes, M.br, 'objectify', style="font-weight: bold") ) )
1421
1422      >>> from lxml.etree import tostring
1423      >>> print(tostring(html, method='html').decode('ascii'))
1424      <html><body><p style="font-weight: bold" class="par">hello<br>objectify</p></body></html>
1425
1426    To create tags that are not valid Python identifiers, call the factory
1427    directly and pass the tag name as first argument::
1428
1429      >>> root = M('tricky-tag', 'some text')
1430      >>> print(root.tag)
1431      tricky-tag
1432      >>> print(root.text)
1433      some text
1434
1435    Note that this module has a predefined ElementMaker instance called ``E``.
1436    """
1437    cdef object _makeelement
1438    cdef object _namespace
1439    cdef object _nsmap
1440    cdef bint _annotate
1441    cdef dict _cache
1442    def __init__(self, *, namespace=None, nsmap=None, annotate=True,
1443                 makeelement=None):
1444        if nsmap is None:
1445            nsmap = _DEFAULT_NSMAP if annotate else {}
1446        self._nsmap = nsmap
1447        self._namespace = None if namespace is None else u"{%s}" % namespace
1448        self._annotate = annotate
1449        if makeelement is not None:
1450            if not callable(makeelement):
1451                raise TypeError(
1452                    f"argument of 'makeelement' parameter must be callable, got {type(makeelement)}")
1453            self._makeelement = makeelement
1454        else:
1455            self._makeelement = None
1456        self._cache = {}
1457
1458    @cython.final
1459    cdef _build_element_maker(self, tag, bint caching):
1460        cdef _ObjectifyElementMakerCaller element_maker
1461        element_maker = _ObjectifyElementMakerCaller.__new__(_ObjectifyElementMakerCaller)
1462        if self._namespace is not None and tag[0] != u"{":
1463            element_maker._tag = self._namespace + tag
1464        else:
1465            element_maker._tag = tag
1466        element_maker._nsmap = self._nsmap
1467        element_maker._annotate = self._annotate
1468        element_maker._element_factory = self._makeelement
1469        if caching:
1470            if len(self._cache) > 200:
1471                self._cache.clear()
1472            self._cache[tag] = element_maker
1473        return element_maker
1474
1475    def __getattr__(self, tag):
1476        element_maker = self._cache.get(tag)
1477        if element_maker is None:
1478            if is_special_method(tag):
1479                return object.__getattr__(self, tag)
1480            return self._build_element_maker(tag, caching=True)
1481        return element_maker
1482
1483    def __call__(self, tag, *args, **kwargs):
1484        element_maker = self._cache.get(tag)
1485        if element_maker is None:
1486            element_maker = self._build_element_maker(
1487                tag, caching=not is_special_method(tag))
1488        return element_maker(*args, **kwargs)
1489
1490################################################################################
1491# Recursive element dumping
1492
1493cdef bint __RECURSIVE_STR = 0 # default: off
1494
1495def enable_recursive_str(on=True):
1496    u"""enable_recursive_str(on=True)
1497
1498    Enable a recursively generated tree representation for str(element),
1499    based on objectify.dump(element).
1500    """
1501    global __RECURSIVE_STR
1502    __RECURSIVE_STR = on
1503
1504def dump(_Element element not None):
1505    u"""dump(_Element element not None)
1506
1507    Return a recursively generated string representation of an element.
1508    """
1509    return _dump(element, 0)
1510
1511cdef object _dump(_Element element, int indent):
1512    indentstr = u"    " * indent
1513    if isinstance(element, ObjectifiedDataElement):
1514        value = repr(element)
1515    else:
1516        value = textOf(element._c_node)
1517        if value is not None:
1518            if not value.strip():
1519                value = None
1520            else:
1521                value = repr(value)
1522    result = f"{indentstr}{element.tag} = {value} [{_typename(element)}]\n"
1523    xsi_ns    = u"{%s}" % XML_SCHEMA_INSTANCE_NS
1524    pytype_ns = u"{%s}" % PYTYPE_NAMESPACE
1525    for name, value in sorted(cetree.iterattributes(element, 3)):
1526        if u'{' in name:
1527            if name == PYTYPE_ATTRIBUTE:
1528                if value == TREE_PYTYPE_NAME:
1529                    continue
1530                else:
1531                    name = name.replace(pytype_ns, u'py:')
1532            name = name.replace(xsi_ns, u'xsi:')
1533        result += f"{indentstr}  * {name} = {value!r}\n"
1534
1535    indent += 1
1536    for child in element.iterchildren():
1537        result += _dump(child, indent)
1538    if indent == 1:
1539        return result[:-1] # strip last '\n'
1540    else:
1541        return result
1542
1543
1544################################################################################
1545# Pickle support for objectified ElementTree
1546
1547def __unpickleElementTree(data):
1548    return etree.ElementTree(fromstring(data))
1549
1550cdef _setupPickle(elementTreeReduceFunction):
1551    if python.IS_PYTHON2:
1552        import copy_reg as copyreg
1553    else:
1554        import copyreg
1555    copyreg.pickle(etree._ElementTree,
1556                   elementTreeReduceFunction, __unpickleElementTree)
1557
1558def pickleReduceElementTree(obj):
1559    return __unpickleElementTree, (etree.tostring(obj),)
1560
1561_setupPickle(pickleReduceElementTree)
1562del pickleReduceElementTree
1563
1564################################################################################
1565# Element class lookup
1566
1567cdef class ObjectifyElementClassLookup(ElementClassLookup):
1568    u"""ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None)
1569    Element class lookup method that uses the objectify classes.
1570    """
1571    cdef object empty_data_class
1572    cdef object tree_class
1573    def __init__(self, tree_class=None, empty_data_class=None):
1574        u"""Lookup mechanism for objectify.
1575
1576        The default Element classes can be replaced by passing subclasses of
1577        ObjectifiedElement and ObjectifiedDataElement as keyword arguments.
1578        'tree_class' defines inner tree classes (defaults to
1579        ObjectifiedElement), 'empty_data_class' defines the default class for
1580        empty data elements (defaults to StringElement).
1581        """
1582        self._lookup_function = _lookupElementClass
1583        if tree_class is None:
1584            tree_class = ObjectifiedElement
1585        self.tree_class = tree_class
1586        if empty_data_class is None:
1587            empty_data_class = StringElement
1588        self.empty_data_class = empty_data_class
1589
1590cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node):
1591    cdef ObjectifyElementClassLookup lookup
1592    lookup = <ObjectifyElementClassLookup>state
1593    # if element has children => no data class
1594    if cetree.hasChild(c_node):
1595        return lookup.tree_class
1596
1597    # if element is defined as xsi:nil, return NoneElement class
1598    if u"true" == cetree.attributeValueFromNsName(
1599        c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil"):
1600        return NoneElement
1601
1602    # check for Python type hint
1603    value = cetree.attributeValueFromNsName(
1604        c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
1605    if value is not None:
1606        if value == TREE_PYTYPE_NAME:
1607            return lookup.tree_class
1608        py_type = <PyType>_PYTYPE_DICT.get(value)
1609        if py_type is not None:
1610            return py_type._type
1611        # unknown 'pyval' => try to figure it out ourself, just go on
1612
1613    # check for XML Schema type hint
1614    value = cetree.attributeValueFromNsName(
1615        c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
1616
1617    if value is not None:
1618        schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
1619        if schema_type is None and u':' in value:
1620            prefix, value = value.split(u':', 1)
1621            schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
1622        if schema_type is not None:
1623            return schema_type._type
1624
1625    # otherwise determine class based on text content type
1626    el_class = _guessElementClass(c_node)
1627    if el_class is not None:
1628        return el_class
1629
1630    # if element is a root node => default to tree node
1631    if c_node.parent is NULL or not tree._isElement(c_node.parent):
1632        return lookup.tree_class
1633
1634    return lookup.empty_data_class
1635
1636
1637################################################################################
1638# Type annotations
1639
1640cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype):
1641    if pytype is None:
1642        return None
1643    value = textOf(c_node)
1644    try:
1645        pytype.type_check(value)
1646        return pytype
1647    except IGNORABLE_ERRORS:
1648        # could not be parsed as the specified type => ignore
1649        pass
1650    return None
1651
1652def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False,
1653             empty_pytype=None):
1654    u"""pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None)
1655
1656    Recursively annotates the elements of an XML tree with 'pytype'
1657    attributes.
1658
1659    If the 'ignore_old' keyword argument is True (the default), current 'pytype'
1660    attributes will be ignored and replaced.  Otherwise, they will be checked
1661    and only replaced if they no longer fit the current text value.
1662
1663    Setting the keyword argument ``ignore_xsi`` to True makes the function
1664    additionally ignore existing ``xsi:type`` annotations.  The default is to
1665    use them as a type hint.
1666
1667    The default annotation of empty elements can be set with the
1668    ``empty_pytype`` keyword argument.  The default is not to annotate empty
1669    elements.  Pass 'str', for example, to make string values the default.
1670    """
1671    cdef _Element  element
1672    element = cetree.rootNodeOrRaise(element_or_tree)
1673    _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype)
1674
1675def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False,
1676                empty_type=None):
1677    u"""xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None)
1678
1679    Recursively annotates the elements of an XML tree with 'xsi:type'
1680    attributes.
1681
1682    If the 'ignore_old' keyword argument is True (the default), current
1683    'xsi:type' attributes will be ignored and replaced.  Otherwise, they will be
1684    checked and only replaced if they no longer fit the current text value.
1685
1686    Note that the mapping from Python types to XSI types is usually ambiguous.
1687    Currently, only the first XSI type name in the corresponding PyType
1688    definition will be used for annotation.  Thus, you should consider naming
1689    the widest type first if you define additional types.
1690
1691    Setting the keyword argument ``ignore_pytype`` to True makes the function
1692    additionally ignore existing ``pytype`` annotations.  The default is to
1693    use them as a type hint.
1694
1695    The default annotation of empty elements can be set with the
1696    ``empty_type`` keyword argument.  The default is not to annotate empty
1697    elements.  Pass 'string', for example, to make string values the default.
1698    """
1699    cdef _Element  element
1700    element = cetree.rootNodeOrRaise(element_or_tree)
1701    _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None)
1702
1703def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
1704             empty_pytype=None, empty_type=None, annotate_xsi=0,
1705             annotate_pytype=1):
1706    u"""annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1)
1707
1708    Recursively annotates the elements of an XML tree with 'xsi:type'
1709    and/or 'py:pytype' attributes.
1710
1711    If the 'ignore_old' keyword argument is True (the default), current
1712    'py:pytype' attributes will be ignored for the type annotation. Set to False
1713    if you want reuse existing 'py:pytype' information (iff appropriate for the
1714    element text value).
1715
1716    If the 'ignore_xsi' keyword argument is False (the default), existing
1717    'xsi:type' attributes will be used for the type annotation, if they fit the
1718    element text values.
1719
1720    Note that the mapping from Python types to XSI types is usually ambiguous.
1721    Currently, only the first XSI type name in the corresponding PyType
1722    definition will be used for annotation.  Thus, you should consider naming
1723    the widest type first if you define additional types.
1724
1725    The default 'py:pytype' annotation of empty elements can be set with the
1726    ``empty_pytype`` keyword argument. Pass 'str', for example, to make
1727    string values the default.
1728
1729    The default 'xsi:type' annotation of empty elements can be set with the
1730    ``empty_type`` keyword argument.  The default is not to annotate empty
1731    elements.  Pass 'string', for example, to make string values the default.
1732
1733    The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype'
1734    (default: 1) control which kind(s) of annotation to use.
1735    """
1736    cdef _Element  element
1737    element = cetree.rootNodeOrRaise(element_or_tree)
1738    _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi,
1739              ignore_old, empty_type, empty_pytype)
1740
1741
1742cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype,
1743               bint ignore_xsi, bint ignore_pytype,
1744               empty_type_name, empty_pytype_name):
1745    cdef _Document doc
1746    cdef tree.xmlNode* c_node
1747    cdef PyType empty_pytype, StrType, NoneType
1748
1749    if not annotate_xsi and not annotate_pytype:
1750        return
1751
1752    if empty_type_name is not None:
1753        if isinstance(empty_type_name, bytes):
1754            empty_type_name = (<bytes>empty_type_name).decode("ascii")
1755        empty_pytype = <PyType>_SCHEMA_TYPE_DICT.get(empty_type_name)
1756    elif empty_pytype_name is not None:
1757        if isinstance(empty_pytype_name, bytes):
1758            empty_pytype_name = (<bytes>empty_pytype_name).decode("ascii")
1759        empty_pytype = <PyType>_PYTYPE_DICT.get(empty_pytype_name)
1760    else:
1761        empty_pytype = None
1762
1763    StrType  = <PyType>_PYTYPE_DICT.get(u'str')
1764    NoneType = <PyType>_PYTYPE_DICT.get(u'NoneType')
1765
1766    doc = element._doc
1767    c_node = element._c_node
1768    tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
1769    if c_node.type == tree.XML_ELEMENT_NODE:
1770        _annotate_element(c_node, doc, annotate_xsi, annotate_pytype,
1771                          ignore_xsi, ignore_pytype,
1772                          empty_type_name, empty_pytype, StrType, NoneType)
1773    tree.END_FOR_EACH_ELEMENT_FROM(c_node)
1774
1775cdef int _annotate_element(tree.xmlNode* c_node, _Document doc,
1776                           bint annotate_xsi, bint annotate_pytype,
1777                           bint ignore_xsi, bint ignore_pytype,
1778                           empty_type_name, PyType empty_pytype,
1779                           PyType StrType, PyType NoneType) except -1:
1780    cdef tree.xmlNs*   c_ns
1781    cdef PyType pytype = None
1782    typename = None
1783    istree = 0
1784
1785    # if element is defined as xsi:nil, represent it as None
1786    if cetree.attributeValueFromNsName(
1787        c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") == "true":
1788        pytype = NoneType
1789
1790    if pytype is None and not ignore_xsi:
1791        # check that old xsi type value is valid
1792        typename = cetree.attributeValueFromNsName(
1793            c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
1794        if typename is not None:
1795            pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
1796            if pytype is None and u':' in typename:
1797                prefix, typename = typename.split(u':', 1)
1798                pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
1799            if pytype is not None and pytype is not StrType:
1800                # StrType does not have a typecheck but is the default
1801                # anyway, so just accept it if given as type
1802                # information
1803                pytype = _check_type(c_node, pytype)
1804                if pytype is None:
1805                    typename = None
1806
1807    if pytype is None and not ignore_pytype:
1808        # check that old pytype value is valid
1809        old_pytypename = cetree.attributeValueFromNsName(
1810            c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
1811        if old_pytypename is not None:
1812            if old_pytypename == TREE_PYTYPE_NAME:
1813                if not cetree.hasChild(c_node):
1814                    # only case where we should keep it,
1815                    # everything else is clear enough
1816                    pytype = TREE_PYTYPE
1817            else:
1818                if old_pytypename == 'none':
1819                    # transition from lxml 1.x
1820                    old_pytypename = "NoneType"
1821                pytype = <PyType>_PYTYPE_DICT.get(old_pytypename)
1822                if pytype is not None and pytype is not StrType:
1823                    # StrType does not have a typecheck but is the
1824                    # default anyway, so just accept it if given as
1825                    # type information
1826                    pytype = _check_type(c_node, pytype)
1827
1828    if pytype is None:
1829        # try to guess type
1830        if not cetree.hasChild(c_node):
1831            # element has no children => data class
1832            pytype = _guessPyType(textOf(c_node), StrType)
1833        else:
1834            istree = 1
1835
1836    if pytype is None:
1837        # use default type for empty elements
1838        if cetree.hasText(c_node):
1839            pytype = StrType
1840        else:
1841            pytype = empty_pytype
1842            if typename is None:
1843                typename = empty_type_name
1844
1845    if pytype is not None:
1846        if typename is None:
1847            if not istree:
1848                if pytype._schema_types:
1849                    # pytype->xsi:type is a 1:n mapping
1850                    # simply take the first
1851                    typename = pytype._schema_types[0]
1852        elif typename not in pytype._schema_types:
1853            typename = pytype._schema_types[0]
1854
1855    if annotate_xsi:
1856        if typename is None or istree:
1857            cetree.delAttributeFromNsName(
1858                c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
1859        else:
1860            # update or create attribute
1861            typename_utf8 = cetree.utf8(typename)
1862            c_ns = cetree.findOrBuildNodeNsPrefix(
1863                doc, c_node, _XML_SCHEMA_NS, <unsigned char*>'xsd')
1864            if c_ns is not NULL:
1865                if b':' in typename_utf8:
1866                    prefix, name = typename_utf8.split(b':', 1)
1867                    if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0':
1868                        typename_utf8 = name
1869                    elif tree.xmlStrcmp(_xcstr(prefix), c_ns.prefix) != 0:
1870                        typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + name
1871                elif c_ns.prefix is not NULL and c_ns.prefix[0] != c'\0':
1872                    typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + typename_utf8
1873            c_ns = cetree.findOrBuildNodeNsPrefix(
1874                doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
1875            tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"type", _xcstr(typename_utf8))
1876
1877    if annotate_pytype:
1878        if pytype is None:
1879            # delete attribute if it exists
1880            cetree.delAttributeFromNsName(
1881                c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
1882        else:
1883            # update or create attribute
1884            c_ns = cetree.findOrBuildNodeNsPrefix(
1885                doc, c_node, _PYTYPE_NAMESPACE, <unsigned char*>'py')
1886            pytype_name = cetree.utf8(pytype.name)
1887            tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME,
1888                              _xcstr(pytype_name))
1889            if pytype is NoneType:
1890                c_ns = cetree.findOrBuildNodeNsPrefix(
1891                    doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
1892                tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"nil", <unsigned char*>"true")
1893
1894    return 0
1895
1896cdef object _strip_attributes = etree.strip_attributes
1897cdef object _cleanup_namespaces = etree.cleanup_namespaces
1898
1899def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True,
1900               bint xsi_nil=False, bint cleanup_namespaces=False):
1901    u"""deannotate(element_or_tree, pytype=True, xsi=True, xsi_nil=False, cleanup_namespaces=False)
1902
1903    Recursively de-annotate the elements of an XML tree by removing 'py:pytype'
1904    and/or 'xsi:type' attributes and/or 'xsi:nil' attributes.
1905
1906    If the 'pytype' keyword argument is True (the default), 'py:pytype'
1907    attributes will be removed. If the 'xsi' keyword argument is True (the
1908    default), 'xsi:type' attributes will be removed.
1909    If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil'
1910    attributes will be removed.
1911
1912    Note that this does not touch the namespace declarations by
1913    default.  If you want to remove unused namespace declarations from
1914    the tree, pass the option ``cleanup_namespaces=True``.
1915    """
1916    cdef list attribute_names = []
1917
1918    if pytype:
1919        attribute_names.append(PYTYPE_ATTRIBUTE)
1920    if xsi:
1921        attribute_names.append(XML_SCHEMA_INSTANCE_TYPE_ATTR)
1922    if xsi_nil:
1923        attribute_names.append(XML_SCHEMA_INSTANCE_NIL_ATTR)
1924
1925    _strip_attributes(element_or_tree, *attribute_names)
1926    if cleanup_namespaces:
1927        _cleanup_namespaces(element_or_tree)
1928
1929################################################################################
1930# Module level parser setup
1931
1932cdef object __DEFAULT_PARSER
1933__DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True)
1934__DEFAULT_PARSER.set_element_class_lookup( ObjectifyElementClassLookup() )
1935
1936cdef object objectify_parser
1937objectify_parser = __DEFAULT_PARSER
1938
1939def set_default_parser(new_parser = None):
1940    u"""set_default_parser(new_parser = None)
1941
1942    Replace the default parser used by objectify's Element() and
1943    fromstring() functions.
1944
1945    The new parser must be an etree.XMLParser.
1946
1947    Call without arguments to reset to the original parser.
1948    """
1949    global objectify_parser
1950    if new_parser is None:
1951        objectify_parser = __DEFAULT_PARSER
1952    elif isinstance(new_parser, etree.XMLParser):
1953        objectify_parser = new_parser
1954    else:
1955        raise TypeError, u"parser must inherit from lxml.etree.XMLParser"
1956
1957def makeparser(**kw):
1958    u"""makeparser(remove_blank_text=True, **kw)
1959
1960    Create a new XML parser for objectify trees.
1961
1962    You can pass all keyword arguments that are supported by
1963    ``etree.XMLParser()``.  Note that this parser defaults to removing
1964    blank text.  You can disable this by passing the
1965    ``remove_blank_text`` boolean keyword option yourself.
1966    """
1967    if 'remove_blank_text' not in kw:
1968        kw['remove_blank_text'] = True
1969    parser = etree.XMLParser(**kw)
1970    parser.set_element_class_lookup( ObjectifyElementClassLookup() )
1971    return parser
1972
1973cdef _Element _makeElement(tag, text, attrib, nsmap):
1974    return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap)
1975
1976################################################################################
1977# Module level factory functions
1978
1979cdef object _fromstring
1980_fromstring = etree.fromstring
1981
1982SubElement = etree.SubElement
1983
1984def fromstring(xml, parser=None, *, base_url=None):
1985    u"""fromstring(xml, parser=None, base_url=None)
1986
1987    Objectify specific version of the lxml.etree fromstring() function
1988    that uses the objectify parser.
1989
1990    You can pass a different parser as second argument.
1991
1992    The ``base_url`` keyword argument allows to set the original base URL of
1993    the document to support relative Paths when looking up external entities
1994    (DTD, XInclude, ...).
1995    """
1996    if parser is None:
1997        parser = objectify_parser
1998    return _fromstring(xml, parser, base_url=base_url)
1999
2000def XML(xml, parser=None, *, base_url=None):
2001    u"""XML(xml, parser=None, base_url=None)
2002
2003    Objectify specific version of the lxml.etree XML() literal factory
2004    that uses the objectify parser.
2005
2006    You can pass a different parser as second argument.
2007
2008    The ``base_url`` keyword argument allows to set the original base URL of
2009    the document to support relative Paths when looking up external entities
2010    (DTD, XInclude, ...).
2011    """
2012    if parser is None:
2013        parser = objectify_parser
2014    return _fromstring(xml, parser, base_url=base_url)
2015
2016cdef object _parse
2017_parse = etree.parse
2018
2019def parse(f, parser=None, *, base_url=None):
2020    u"""parse(f, parser=None, base_url=None)
2021
2022    Parse a file or file-like object with the objectify parser.
2023
2024    You can pass a different parser as second argument.
2025
2026    The ``base_url`` keyword allows setting a URL for the document
2027    when parsing from a file-like object.  This is needed when looking
2028    up external entities (DTD, XInclude, ...) with relative paths.
2029    """
2030    if parser is None:
2031        parser = objectify_parser
2032    return _parse(f, parser, base_url=base_url)
2033
2034cdef dict _DEFAULT_NSMAP = {
2035    "py"  : PYTYPE_NAMESPACE,
2036    "xsi" : XML_SCHEMA_INSTANCE_NS,
2037    "xsd" : XML_SCHEMA_NS
2038}
2039
2040E = ElementMaker()
2041
2042def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes):
2043    u"""Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes)
2044
2045    Objectify specific version of the lxml.etree Element() factory that
2046    always creates a structural (tree) element.
2047
2048    NOTE: requires parser based element class lookup activated in lxml.etree!
2049    """
2050    if attrib is not None:
2051        if _attributes:
2052            attrib = dict(attrib)
2053            attrib.update(_attributes)
2054        _attributes = attrib
2055    if _pytype is None:
2056        _pytype = TREE_PYTYPE_NAME
2057    if nsmap is None:
2058        nsmap = _DEFAULT_NSMAP
2059    _attributes[PYTYPE_ATTRIBUTE] = _pytype
2060    return _makeElement(_tag, None, _attributes, nsmap)
2061
2062def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None,
2063                **_attributes):
2064    u"""DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes)
2065
2066    Create a new element from a Python value and XML attributes taken from
2067    keyword arguments or a dictionary passed as second argument.
2068
2069    Automatically adds a 'pytype' attribute for the Python type of the value,
2070    if the type can be identified.  If '_pytype' or '_xsi' are among the
2071    keyword arguments, they will be used instead.
2072
2073    If the _value argument is an ObjectifiedDataElement instance, its py:pytype,
2074    xsi:type and other attributes and nsmap are reused unless they are redefined
2075    in attrib and/or keyword arguments.
2076    """
2077    if nsmap is None:
2078        nsmap = _DEFAULT_NSMAP
2079    if attrib is not None and attrib:
2080        if _attributes:
2081            attrib = dict(attrib)
2082            attrib.update(_attributes)
2083        _attributes = attrib
2084    if isinstance(_value, ObjectifiedElement):
2085        if _pytype is None:
2086            if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP:
2087                # special case: no change!
2088                return _value.__copy__()
2089    if isinstance(_value, ObjectifiedDataElement):
2090        # reuse existing nsmap unless redefined in nsmap parameter
2091        temp = _value.nsmap
2092        if temp is not None and temp:
2093            temp = dict(temp)
2094            temp.update(nsmap)
2095            nsmap = temp
2096        # reuse existing attributes unless redefined in attrib/_attributes
2097        temp = _value.attrib
2098        if temp is not None and temp:
2099            temp = dict(temp)
2100            temp.update(_attributes)
2101            _attributes = temp
2102        # reuse existing xsi:type or py:pytype attributes, unless provided as
2103        # arguments
2104        if _xsi is None and _pytype is None:
2105            _xsi = _attributes.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)
2106            _pytype = _attributes.get(PYTYPE_ATTRIBUTE)
2107
2108    if _xsi is not None:
2109        if u':' in _xsi:
2110            prefix, name = _xsi.split(u':', 1)
2111            ns = nsmap.get(prefix)
2112            if ns != XML_SCHEMA_NS:
2113                raise ValueError, u"XSD types require the XSD namespace"
2114        elif nsmap is _DEFAULT_NSMAP:
2115            name = _xsi
2116            _xsi = u'xsd:' + _xsi
2117        else:
2118            name = _xsi
2119            for prefix, ns in nsmap.items():
2120                if ns == XML_SCHEMA_NS:
2121                    if prefix is not None and prefix:
2122                        _xsi = prefix + u':' + _xsi
2123                    break
2124            else:
2125                raise ValueError, u"XSD types require the XSD namespace"
2126        _attributes[XML_SCHEMA_INSTANCE_TYPE_ATTR] = _xsi
2127        if _pytype is None:
2128            # allow using unregistered or even wrong xsi:type names
2129            py_type = <PyType>_SCHEMA_TYPE_DICT.get(_xsi)
2130            if py_type is None:
2131                py_type = <PyType>_SCHEMA_TYPE_DICT.get(name)
2132            if py_type is not None:
2133                _pytype = py_type.name
2134
2135    if _pytype is None:
2136        _pytype = _pytypename(_value)
2137
2138    if _value is None and _pytype != u"str":
2139        _pytype = _pytype or u"NoneType"
2140        strval = None
2141    elif python._isString(_value):
2142        strval = _value
2143    elif isinstance(_value, bool):
2144        if _value:
2145            strval = u"true"
2146        else:
2147            strval = u"false"
2148    else:
2149        py_type = <PyType>_PYTYPE_DICT.get(_pytype)
2150        stringify = unicode if py_type is None else py_type.stringify
2151        strval = stringify(_value)
2152
2153    if _pytype is not None:
2154        if _pytype == u"NoneType" or _pytype == u"none":
2155            strval = None
2156            _attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = u"true"
2157        else:
2158            # check if type information from arguments is valid
2159            py_type = <PyType>_PYTYPE_DICT.get(_pytype)
2160            if py_type is not None:
2161                if py_type.type_check is not None:
2162                    py_type.type_check(strval)
2163                _attributes[PYTYPE_ATTRIBUTE] = _pytype
2164
2165    return _makeElement(u"value", strval, _attributes, nsmap)
2166
2167
2168################################################################################
2169# ObjectPath
2170
2171include "objectpath.pxi"
2172