1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5r"""Python sandbox implementation for build files.
6
7This module contains classes for Python sandboxes that execute in a
8highly-controlled environment.
9
10The main class is `Sandbox`. This provides an execution environment for Python
11code and is used to fill a Context instance for the takeaway information from
12the execution.
13
14Code in this module takes a different approach to exception handling compared
15to what you'd see elsewhere in Python. Arguments to built-in exceptions like
16KeyError are machine parseable. This machine-friendly data is used to present
17user-friendly error messages in the case of errors.
18"""
19
20from __future__ import absolute_import, print_function, unicode_literals
21
22import os
23import six
24import sys
25import weakref
26
27from mozbuild.util import (
28    exec_,
29    ReadOnlyDict,
30)
31from .context import Context
32from mozpack.files import FileFinder
33
34
35default_finder = FileFinder("/")
36
37
38def alphabetical_sorted(iterable, key=lambda x: x.lower(), reverse=False):
39    """sorted() replacement for the sandbox, ordering alphabetically by
40    default.
41    """
42    return sorted(iterable, key=key, reverse=reverse)
43
44
45class SandboxError(Exception):
46    def __init__(self, file_stack):
47        self.file_stack = file_stack
48
49
50class SandboxExecutionError(SandboxError):
51    """Represents errors encountered during execution of a Sandbox.
52
53    This is a simple container exception. It's purpose is to capture state
54    so something else can report on it.
55    """
56
57    def __init__(self, file_stack, exc_type, exc_value, trace):
58        SandboxError.__init__(self, file_stack)
59
60        self.exc_type = exc_type
61        self.exc_value = exc_value
62        self.trace = trace
63
64
65class SandboxLoadError(SandboxError):
66    """Represents errors encountered when loading a file for execution.
67
68    This exception represents errors in a Sandbox that occurred as part of
69    loading a file. The error could have occurred in the course of executing
70    a file. If so, the file_stack will be non-empty and the file that caused
71    the load will be on top of the stack.
72    """
73
74    def __init__(self, file_stack, trace, illegal_path=None, read_error=None):
75        SandboxError.__init__(self, file_stack)
76
77        self.trace = trace
78        self.illegal_path = illegal_path
79        self.read_error = read_error
80
81
82class Sandbox(dict):
83    """Represents a sandbox for executing Python code.
84
85    This class provides a sandbox for execution of a single mozbuild frontend
86    file. The results of that execution is stored in the Context instance given
87    as the ``context`` argument.
88
89    Sandbox is effectively a glorified wrapper around compile() + exec(). You
90    point it at some Python code and it executes it. The main difference from
91    executing Python code like normal is that the executed code is very limited
92    in what it can do: the sandbox only exposes a very limited set of Python
93    functionality. Only specific types and functions are available. This
94    prevents executed code from doing things like import modules, open files,
95    etc.
96
97    Sandbox instances act as global namespace for the sandboxed execution
98    itself. They shall not be used to access the results of the execution.
99    Those results are available in the given Context instance after execution.
100
101    The Sandbox itself is responsible for enforcing rules such as forbidding
102    reassignment of variables.
103
104    Implementation note: Sandbox derives from dict because exec() insists that
105    what it is given for namespaces is a dict.
106    """
107
108    # The default set of builtins.
109    BUILTINS = ReadOnlyDict(
110        {
111            # Only real Python built-ins should go here.
112            "None": None,
113            "False": False,
114            "True": True,
115            "sorted": alphabetical_sorted,
116            "int": int,
117            "set": set,
118            "tuple": tuple,
119        }
120    )
121
122    def __init__(self, context, finder=default_finder):
123        """Initialize a Sandbox ready for execution."""
124        self._builtins = self.BUILTINS
125        dict.__setitem__(self, "__builtins__", self._builtins)
126
127        assert isinstance(self._builtins, ReadOnlyDict)
128        assert isinstance(context, Context)
129
130        # Contexts are modeled as a stack because multiple context managers
131        # may be active.
132        self._active_contexts = [context]
133
134        # Seen sub-contexts. Will be populated with other Context instances
135        # that were related to execution of this instance.
136        self.subcontexts = []
137
138        # We need to record this because it gets swallowed as part of
139        # evaluation.
140        self._last_name_error = None
141
142        # Current literal source being executed.
143        self._current_source = None
144
145        self._finder = finder
146
147    @property
148    def _context(self):
149        return self._active_contexts[-1]
150
151    def exec_file(self, path):
152        """Execute code at a path in the sandbox.
153
154        The path must be absolute.
155        """
156        assert os.path.isabs(path)
157
158        try:
159            source = six.ensure_text(self._finder.get(path).read())
160        except Exception:
161            raise SandboxLoadError(
162                self._context.source_stack, sys.exc_info()[2], read_error=path
163            )
164
165        self.exec_source(source, path)
166
167    def exec_source(self, source, path=""):
168        """Execute Python code within a string.
169
170        The passed string should contain Python code to be executed. The string
171        will be compiled and executed.
172
173        You should almost always go through exec_file() because exec_source()
174        does not perform extra path normalization. This can cause relative
175        paths to behave weirdly.
176        """
177
178        def execute():
179            # compile() inherits the __future__ from the module by default. We
180            # do want Unicode literals.
181            code = compile(source, path, "exec")
182            # We use ourself as the global namespace for the execution. There
183            # is no need for a separate local namespace as moz.build execution
184            # is flat, namespace-wise.
185            old_source = self._current_source
186            self._current_source = source
187            try:
188                exec_(code, self)
189            finally:
190                self._current_source = old_source
191
192        self.exec_function(execute, path=path)
193
194    def exec_function(
195        self, func, args=(), kwargs={}, path="", becomes_current_path=True
196    ):
197        """Execute function with the given arguments in the sandbox."""
198        if path and becomes_current_path:
199            self._context.push_source(path)
200
201        old_sandbox = self._context._sandbox
202        self._context._sandbox = weakref.ref(self)
203
204        # We don't have to worry about bytecode generation here because we are
205        # too low-level for that. However, we could add bytecode generation via
206        # the marshall module if parsing performance were ever an issue.
207
208        old_source = self._current_source
209        self._current_source = None
210        try:
211            func(*args, **kwargs)
212        except SandboxError as e:
213            raise e
214        except NameError as e:
215            # A NameError is raised when a variable could not be found.
216            # The original KeyError has been dropped by the interpreter.
217            # However, we should have it cached in our instance!
218
219            # Unless a script is doing something wonky like catching NameError
220            # itself (that would be silly), if there is an exception on the
221            # global namespace, that's our error.
222            actual = e
223
224            if self._last_name_error is not None:
225                actual = self._last_name_error
226            source_stack = self._context.source_stack
227            if not becomes_current_path:
228                # Add current file to the stack because it wasn't added before
229                # sandbox execution.
230                source_stack.append(path)
231            raise SandboxExecutionError(
232                source_stack, type(actual), actual, sys.exc_info()[2]
233            )
234
235        except Exception:
236            # Need to copy the stack otherwise we get a reference and that is
237            # mutated during the finally.
238            exc = sys.exc_info()
239            source_stack = self._context.source_stack
240            if not becomes_current_path:
241                # Add current file to the stack because it wasn't added before
242                # sandbox execution.
243                source_stack.append(path)
244            raise SandboxExecutionError(source_stack, exc[0], exc[1], exc[2])
245        finally:
246            self._current_source = old_source
247            self._context._sandbox = old_sandbox
248            if path and becomes_current_path:
249                self._context.pop_source()
250
251    def push_subcontext(self, context):
252        """Push a SubContext onto the execution stack.
253
254        When called, the active context will be set to the specified context,
255        meaning all variable accesses will go through it. We also record this
256        SubContext as having been executed as part of this sandbox.
257        """
258        self._active_contexts.append(context)
259        if context not in self.subcontexts:
260            self.subcontexts.append(context)
261
262    def pop_subcontext(self, context):
263        """Pop a SubContext off the execution stack.
264
265        SubContexts must be pushed and popped in opposite order. This is
266        validated as part of the function call to ensure proper consumer API
267        use.
268        """
269        popped = self._active_contexts.pop()
270        assert popped == context
271
272    def __getitem__(self, key):
273        if key.isupper():
274            try:
275                return self._context[key]
276            except Exception as e:
277                self._last_name_error = e
278                raise
279
280        return dict.__getitem__(self, key)
281
282    def __setitem__(self, key, value):
283        if key in self._builtins or key == "__builtins__":
284            raise KeyError("Cannot reassign builtins")
285
286        if key.isupper():
287            # Forbid assigning over a previously set value. Interestingly, when
288            # doing FOO += ['bar'], python actually does something like:
289            #   foo = namespace.__getitem__('FOO')
290            #   foo.__iadd__(['bar'])
291            #   namespace.__setitem__('FOO', foo)
292            # This means __setitem__ is called with the value that is already
293            # in the dict, when doing +=, which is permitted.
294            if key in self._context and self._context[key] is not value:
295                raise KeyError("global_ns", "reassign", key)
296
297            if (
298                key not in self._context
299                and isinstance(value, (list, dict))
300                and not value
301            ):
302                raise KeyError("Variable %s assigned an empty value." % key)
303
304            self._context[key] = value
305        else:
306            dict.__setitem__(self, key, value)
307
308    def get(self, key, default=None):
309        raise NotImplementedError("Not supported")
310
311    def __iter__(self):
312        raise NotImplementedError("Not supported")
313
314    def __contains__(self, key):
315        if key.isupper():
316            return key in self._context
317        return dict.__contains__(self, key)
318