1#
2# ElementTree
3# $Id: ElementInclude.py 3375 2008-02-13 08:05:08Z fredrik $
4#
5# limited xinclude support for element trees
6#
7# history:
8# 2003-08-15 fl   created
9# 2003-11-14 fl   fixed default loader
10#
11# Copyright (c) 2003-2004 by Fredrik Lundh.  All rights reserved.
12#
13# fredrik@pythonware.com
14# http://www.pythonware.com
15#
16# --------------------------------------------------------------------
17# The ElementTree toolkit is
18#
19# Copyright (c) 1999-2008 by Fredrik Lundh
20#
21# By obtaining, using, and/or copying this software and/or its
22# associated documentation, you agree that you have read, understood,
23# and will comply with the following terms and conditions:
24#
25# Permission to use, copy, modify, and distribute this software and
26# its associated documentation for any purpose and without fee is
27# hereby granted, provided that the above copyright notice appears in
28# all copies, and that both that copyright notice and this permission
29# notice appear in supporting documentation, and that the name of
30# Secret Labs AB or the author not be used in advertising or publicity
31# pertaining to distribution of the software without specific, written
32# prior permission.
33#
34# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
35# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
36# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
37# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
38# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
39# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
41# OF THIS SOFTWARE.
42# --------------------------------------------------------------------
43
44# Licensed to PSF under a Contributor Agreement.
45# See https://www.python.org/psf/license for licensing details.
46
47##
48# Limited XInclude support for the ElementTree package.
49##
50
51import copy
52from . import ElementTree
53from urllib.parse import urljoin
54
55XINCLUDE = "{http://www.w3.org/2001/XInclude}"
56
57XINCLUDE_INCLUDE = XINCLUDE + "include"
58XINCLUDE_FALLBACK = XINCLUDE + "fallback"
59
60# For security reasons, the inclusion depth is limited to this read-only value by default.
61DEFAULT_MAX_INCLUSION_DEPTH = 6
62
63
64##
65# Fatal include error.
66
67class FatalIncludeError(SyntaxError):
68    pass
69
70
71class LimitedRecursiveIncludeError(FatalIncludeError):
72    pass
73
74
75##
76# Default loader.  This loader reads an included resource from disk.
77#
78# @param href Resource reference.
79# @param parse Parse mode.  Either "xml" or "text".
80# @param encoding Optional text encoding (UTF-8 by default for "text").
81# @return The expanded resource.  If the parse mode is "xml", this
82#    is an ElementTree instance.  If the parse mode is "text", this
83#    is a Unicode string.  If the loader fails, it can return None
84#    or raise an OSError exception.
85# @throws OSError If the loader fails to load the resource.
86
87def default_loader(href, parse, encoding=None):
88    if parse == "xml":
89        with open(href, 'rb') as file:
90            data = ElementTree.parse(file).getroot()
91    else:
92        if not encoding:
93            encoding = 'UTF-8'
94        with open(href, 'r', encoding=encoding) as file:
95            data = file.read()
96    return data
97
98##
99# Expand XInclude directives.
100#
101# @param elem Root element.
102# @param loader Optional resource loader.  If omitted, it defaults
103#     to {@link default_loader}.  If given, it should be a callable
104#     that implements the same interface as <b>default_loader</b>.
105# @param base_url The base URL of the original file, to resolve
106#     relative include file references.
107# @param max_depth The maximum number of recursive inclusions.
108#     Limited to reduce the risk of malicious content explosion.
109#     Pass a negative value to disable the limitation.
110# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
111# @throws FatalIncludeError If the function fails to include a given
112#     resource, or if the tree contains malformed XInclude elements.
113# @throws IOError If the function fails to load a given resource.
114# @returns the node or its replacement if it was an XInclude node
115
116def include(elem, loader=None, base_url=None,
117            max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
118    if max_depth is None:
119        max_depth = -1
120    elif max_depth < 0:
121        raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
122
123    if hasattr(elem, 'getroot'):
124        elem = elem.getroot()
125    if loader is None:
126        loader = default_loader
127
128    _include(elem, loader, base_url, max_depth, set())
129
130
131def _include(elem, loader, base_url, max_depth, _parent_hrefs):
132    # look for xinclude elements
133    i = 0
134    while i < len(elem):
135        e = elem[i]
136        if e.tag == XINCLUDE_INCLUDE:
137            # process xinclude directive
138            href = e.get("href")
139            if base_url:
140                href = urljoin(base_url, href)
141            parse = e.get("parse", "xml")
142            if parse == "xml":
143                if href in _parent_hrefs:
144                    raise FatalIncludeError("recursive include of %s" % href)
145                if max_depth == 0:
146                    raise LimitedRecursiveIncludeError(
147                        "maximum xinclude depth reached when including file %s" % href)
148                _parent_hrefs.add(href)
149                node = loader(href, parse)
150                if node is None:
151                    raise FatalIncludeError(
152                        "cannot load %r as %r" % (href, parse)
153                        )
154                node = copy.copy(node)  # FIXME: this makes little sense with recursive includes
155                _include(node, loader, href, max_depth - 1, _parent_hrefs)
156                _parent_hrefs.remove(href)
157                if e.tail:
158                    node.tail = (node.tail or "") + e.tail
159                elem[i] = node
160            elif parse == "text":
161                text = loader(href, parse, e.get("encoding"))
162                if text is None:
163                    raise FatalIncludeError(
164                        "cannot load %r as %r" % (href, parse)
165                        )
166                if e.tail:
167                    text += e.tail
168                if i:
169                    node = elem[i-1]
170                    node.tail = (node.tail or "") + text
171                else:
172                    elem.text = (elem.text or "") + text
173                del elem[i]
174                continue
175            else:
176                raise FatalIncludeError(
177                    "unknown parse type in xi:include tag (%r)" % parse
178                )
179        elif e.tag == XINCLUDE_FALLBACK:
180            raise FatalIncludeError(
181                "xi:fallback tag must be child of xi:include (%r)" % e.tag
182                )
183        else:
184            _include(e, loader, base_url, max_depth, _parent_hrefs)
185        i += 1
186