1# defusedxml
2#
3# Copyright (c) 2013 by Christian Heimes <christian@python.org>
4# Licensed to PSF under a Contributor Agreement.
5# See https://www.python.org/psf/license for licensing details.
6"""DEPRECATED Example code for lxml.etree protection
7
8The code has NO protection against decompression bombs.
9"""
10from __future__ import print_function, absolute_import
11
12import threading
13import warnings
14
15from lxml import etree as _etree
16
17from .common import DTDForbidden, EntitiesForbidden, NotSupportedError
18
19LXML3 = _etree.LXML_VERSION[0] >= 3
20
21__origin__ = "lxml.etree"
22
23tostring = _etree.tostring
24
25
26warnings.warn(
27    "defusedxml.lxml is no longer supported and will be removed in a " "future release.",
28    category=DeprecationWarning,
29    stacklevel=2,
30)
31
32
33class RestrictedElement(_etree.ElementBase):
34    """A restricted Element class that filters out instances of some classes
35    """
36
37    __slots__ = ()
38    # blacklist = (etree._Entity, etree._ProcessingInstruction, etree._Comment)
39    blacklist = _etree._Entity
40
41    def _filter(self, iterator):
42        blacklist = self.blacklist
43        for child in iterator:
44            if isinstance(child, blacklist):
45                continue
46            yield child
47
48    def __iter__(self):
49        iterator = super(RestrictedElement, self).__iter__()
50        return self._filter(iterator)
51
52    def iterchildren(self, tag=None, reversed=False):
53        iterator = super(RestrictedElement, self).iterchildren(tag=tag, reversed=reversed)
54        return self._filter(iterator)
55
56    def iter(self, tag=None, *tags):
57        iterator = super(RestrictedElement, self).iter(tag=tag, *tags)
58        return self._filter(iterator)
59
60    def iterdescendants(self, tag=None, *tags):
61        iterator = super(RestrictedElement, self).iterdescendants(tag=tag, *tags)
62        return self._filter(iterator)
63
64    def itersiblings(self, tag=None, preceding=False):
65        iterator = super(RestrictedElement, self).itersiblings(tag=tag, preceding=preceding)
66        return self._filter(iterator)
67
68    def getchildren(self):
69        iterator = super(RestrictedElement, self).__iter__()
70        return list(self._filter(iterator))
71
72    def getiterator(self, tag=None):
73        iterator = super(RestrictedElement, self).getiterator(tag)
74        return self._filter(iterator)
75
76
77class GlobalParserTLS(threading.local):
78    """Thread local context for custom parser instances
79    """
80
81    parser_config = {
82        "resolve_entities": False,
83        # 'remove_comments': True,
84        # 'remove_pis': True,
85    }
86
87    element_class = RestrictedElement
88
89    def createDefaultParser(self):
90        parser = _etree.XMLParser(**self.parser_config)
91        element_class = self.element_class
92        if self.element_class is not None:
93            lookup = _etree.ElementDefaultClassLookup(element=element_class)
94            parser.set_element_class_lookup(lookup)
95        return parser
96
97    def setDefaultParser(self, parser):
98        self._default_parser = parser
99
100    def getDefaultParser(self):
101        parser = getattr(self, "_default_parser", None)
102        if parser is None:
103            parser = self.createDefaultParser()
104            self.setDefaultParser(parser)
105        return parser
106
107
108_parser_tls = GlobalParserTLS()
109getDefaultParser = _parser_tls.getDefaultParser
110
111
112def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True):
113    """Check docinfo of an element tree for DTD and entity declarations
114
115    The check for entity declarations needs lxml 3 or newer. lxml 2.x does
116    not support dtd.iterentities().
117    """
118    docinfo = elementtree.docinfo
119    if docinfo.doctype:
120        if forbid_dtd:
121            raise DTDForbidden(docinfo.doctype, docinfo.system_url, docinfo.public_id)
122        if forbid_entities and not LXML3:
123            # lxml < 3 has no iterentities()
124            raise NotSupportedError("Unable to check for entity declarations " "in lxml 2.x")
125
126    if forbid_entities:
127        for dtd in docinfo.internalDTD, docinfo.externalDTD:
128            if dtd is None:
129                continue
130            for entity in dtd.iterentities():
131                raise EntitiesForbidden(entity.name, entity.content, None, None, None, None)
132
133
134def parse(source, parser=None, base_url=None, forbid_dtd=False, forbid_entities=True):
135    if parser is None:
136        parser = getDefaultParser()
137    elementtree = _etree.parse(source, parser, base_url=base_url)
138    check_docinfo(elementtree, forbid_dtd, forbid_entities)
139    return elementtree
140
141
142def fromstring(text, parser=None, base_url=None, forbid_dtd=False, forbid_entities=True):
143    if parser is None:
144        parser = getDefaultParser()
145    rootelement = _etree.fromstring(text, parser, base_url=base_url)
146    elementtree = rootelement.getroottree()
147    check_docinfo(elementtree, forbid_dtd, forbid_entities)
148    return rootelement
149
150
151XML = fromstring
152
153
154def iterparse(*args, **kwargs):
155    raise NotSupportedError("defused lxml.etree.iterparse not available")
156