1# defusedxml 2# 3# Copyright (c) 2013 by Christian Heimes <christian@python.org> 4# Licensed to PSF under a Contributor Agreement. 5# See https://www.python.org/psf/license for licensing details. 6"""DEPRECATED Example code for lxml.etree protection 7 8The code has NO protection against decompression bombs. 9""" 10from __future__ import print_function, absolute_import 11 12import threading 13import warnings 14 15from lxml import etree as _etree 16 17from .common import DTDForbidden, EntitiesForbidden, NotSupportedError 18 19LXML3 = _etree.LXML_VERSION[0] >= 3 20 21__origin__ = "lxml.etree" 22 23tostring = _etree.tostring 24 25 26warnings.warn( 27 "defusedxml.lxml is no longer supported and will be removed in a " "future release.", 28 category=DeprecationWarning, 29 stacklevel=2, 30) 31 32 33class RestrictedElement(_etree.ElementBase): 34 """A restricted Element class that filters out instances of some classes 35 """ 36 37 __slots__ = () 38 # blacklist = (etree._Entity, etree._ProcessingInstruction, etree._Comment) 39 blacklist = _etree._Entity 40 41 def _filter(self, iterator): 42 blacklist = self.blacklist 43 for child in iterator: 44 if isinstance(child, blacklist): 45 continue 46 yield child 47 48 def __iter__(self): 49 iterator = super(RestrictedElement, self).__iter__() 50 return self._filter(iterator) 51 52 def iterchildren(self, tag=None, reversed=False): 53 iterator = super(RestrictedElement, self).iterchildren(tag=tag, reversed=reversed) 54 return self._filter(iterator) 55 56 def iter(self, tag=None, *tags): 57 iterator = super(RestrictedElement, self).iter(tag=tag, *tags) 58 return self._filter(iterator) 59 60 def iterdescendants(self, tag=None, *tags): 61 iterator = super(RestrictedElement, self).iterdescendants(tag=tag, *tags) 62 return self._filter(iterator) 63 64 def itersiblings(self, tag=None, preceding=False): 65 iterator = super(RestrictedElement, self).itersiblings(tag=tag, preceding=preceding) 66 return self._filter(iterator) 67 68 def getchildren(self): 69 iterator = super(RestrictedElement, self).__iter__() 70 return list(self._filter(iterator)) 71 72 def getiterator(self, tag=None): 73 iterator = super(RestrictedElement, self).getiterator(tag) 74 return self._filter(iterator) 75 76 77class GlobalParserTLS(threading.local): 78 """Thread local context for custom parser instances 79 """ 80 81 parser_config = { 82 "resolve_entities": False, 83 # 'remove_comments': True, 84 # 'remove_pis': True, 85 } 86 87 element_class = RestrictedElement 88 89 def createDefaultParser(self): 90 parser = _etree.XMLParser(**self.parser_config) 91 element_class = self.element_class 92 if self.element_class is not None: 93 lookup = _etree.ElementDefaultClassLookup(element=element_class) 94 parser.set_element_class_lookup(lookup) 95 return parser 96 97 def setDefaultParser(self, parser): 98 self._default_parser = parser 99 100 def getDefaultParser(self): 101 parser = getattr(self, "_default_parser", None) 102 if parser is None: 103 parser = self.createDefaultParser() 104 self.setDefaultParser(parser) 105 return parser 106 107 108_parser_tls = GlobalParserTLS() 109getDefaultParser = _parser_tls.getDefaultParser 110 111 112def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True): 113 """Check docinfo of an element tree for DTD and entity declarations 114 115 The check for entity declarations needs lxml 3 or newer. lxml 2.x does 116 not support dtd.iterentities(). 117 """ 118 docinfo = elementtree.docinfo 119 if docinfo.doctype: 120 if forbid_dtd: 121 raise DTDForbidden(docinfo.doctype, docinfo.system_url, docinfo.public_id) 122 if forbid_entities and not LXML3: 123 # lxml < 3 has no iterentities() 124 raise NotSupportedError("Unable to check for entity declarations " "in lxml 2.x") 125 126 if forbid_entities: 127 for dtd in docinfo.internalDTD, docinfo.externalDTD: 128 if dtd is None: 129 continue 130 for entity in dtd.iterentities(): 131 raise EntitiesForbidden(entity.name, entity.content, None, None, None, None) 132 133 134def parse(source, parser=None, base_url=None, forbid_dtd=False, forbid_entities=True): 135 if parser is None: 136 parser = getDefaultParser() 137 elementtree = _etree.parse(source, parser, base_url=base_url) 138 check_docinfo(elementtree, forbid_dtd, forbid_entities) 139 return elementtree 140 141 142def fromstring(text, parser=None, base_url=None, forbid_dtd=False, forbid_entities=True): 143 if parser is None: 144 parser = getDefaultParser() 145 rootelement = _etree.fromstring(text, parser, base_url=base_url) 146 elementtree = rootelement.getroottree() 147 check_docinfo(elementtree, forbid_dtd, forbid_entities) 148 return rootelement 149 150 151XML = fromstring 152 153 154def iterparse(*args, **kwargs): 155 raise NotSupportedError("defused lxml.etree.iterparse not available") 156