1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5r"""Python sandbox implementation for build files.
6
7This module contains classes for Python sandboxes that execute in a
8highly-controlled environment.
9
10The main class is `Sandbox`. This provides an execution environment for Python
11code and is used to fill a Context instance for the takeaway information from
12the execution.
13
14Code in this module takes a different approach to exception handling compared
15to what you'd see elsewhere in Python. Arguments to built-in exceptions like
16KeyError are machine parseable. This machine-friendly data is used to present
17user-friendly error messages in the case of errors.
18"""
19
20from __future__ import absolute_import, print_function, unicode_literals
21
22import os
23import six
24import sys
25import weakref
26
27from mozbuild.util import (
28    exec_,
29    ReadOnlyDict,
30)
31from .context import Context
32from mozpack.files import FileFinder
33
34
35default_finder = FileFinder('/')
36
37
38def alphabetical_sorted(iterable, key=lambda x: x.lower(),
39                        reverse=False):
40    """sorted() replacement for the sandbox, ordering alphabetically by
41    default.
42    """
43    return sorted(iterable, key=key, reverse=reverse)
44
45
46class SandboxError(Exception):
47    def __init__(self, file_stack):
48        self.file_stack = file_stack
49
50
51class SandboxExecutionError(SandboxError):
52    """Represents errors encountered during execution of a Sandbox.
53
54    This is a simple container exception. It's purpose is to capture state
55    so something else can report on it.
56    """
57
58    def __init__(self, file_stack, exc_type, exc_value, trace):
59        SandboxError.__init__(self, file_stack)
60
61        self.exc_type = exc_type
62        self.exc_value = exc_value
63        self.trace = trace
64
65
66class SandboxLoadError(SandboxError):
67    """Represents errors encountered when loading a file for execution.
68
69    This exception represents errors in a Sandbox that occurred as part of
70    loading a file. The error could have occurred in the course of executing
71    a file. If so, the file_stack will be non-empty and the file that caused
72    the load will be on top of the stack.
73    """
74
75    def __init__(self, file_stack, trace, illegal_path=None, read_error=None):
76        SandboxError.__init__(self, file_stack)
77
78        self.trace = trace
79        self.illegal_path = illegal_path
80        self.read_error = read_error
81
82
83class Sandbox(dict):
84    """Represents a sandbox for executing Python code.
85
86    This class provides a sandbox for execution of a single mozbuild frontend
87    file. The results of that execution is stored in the Context instance given
88    as the ``context`` argument.
89
90    Sandbox is effectively a glorified wrapper around compile() + exec(). You
91    point it at some Python code and it executes it. The main difference from
92    executing Python code like normal is that the executed code is very limited
93    in what it can do: the sandbox only exposes a very limited set of Python
94    functionality. Only specific types and functions are available. This
95    prevents executed code from doing things like import modules, open files,
96    etc.
97
98    Sandbox instances act as global namespace for the sandboxed execution
99    itself. They shall not be used to access the results of the execution.
100    Those results are available in the given Context instance after execution.
101
102    The Sandbox itself is responsible for enforcing rules such as forbidding
103    reassignment of variables.
104
105    Implementation note: Sandbox derives from dict because exec() insists that
106    what it is given for namespaces is a dict.
107    """
108    # The default set of builtins.
109    BUILTINS = ReadOnlyDict({
110        # Only real Python built-ins should go here.
111        'None': None,
112        'False': False,
113        'True': True,
114        'sorted': alphabetical_sorted,
115        'int': int,
116        'set': set,
117        'tuple': tuple,
118    })
119
120    def __init__(self, context, finder=default_finder):
121        """Initialize a Sandbox ready for execution.
122        """
123        self._builtins = self.BUILTINS
124        dict.__setitem__(self, '__builtins__', self._builtins)
125
126        assert isinstance(self._builtins, ReadOnlyDict)
127        assert isinstance(context, Context)
128
129        # Contexts are modeled as a stack because multiple context managers
130        # may be active.
131        self._active_contexts = [context]
132
133        # Seen sub-contexts. Will be populated with other Context instances
134        # that were related to execution of this instance.
135        self.subcontexts = []
136
137        # We need to record this because it gets swallowed as part of
138        # evaluation.
139        self._last_name_error = None
140
141        # Current literal source being executed.
142        self._current_source = None
143
144        self._finder = finder
145
146    @property
147    def _context(self):
148        return self._active_contexts[-1]
149
150    def exec_file(self, path):
151        """Execute code at a path in the sandbox.
152
153        The path must be absolute.
154        """
155        assert os.path.isabs(path)
156
157        try:
158            source = six.ensure_text(self._finder.get(path).read())
159        except Exception:
160            raise SandboxLoadError(self._context.source_stack,
161                                   sys.exc_info()[2], read_error=path)
162
163        self.exec_source(source, path)
164
165    def exec_source(self, source, path=''):
166        """Execute Python code within a string.
167
168        The passed string should contain Python code to be executed. The string
169        will be compiled and executed.
170
171        You should almost always go through exec_file() because exec_source()
172        does not perform extra path normalization. This can cause relative
173        paths to behave weirdly.
174        """
175        def execute():
176            # compile() inherits the __future__ from the module by default. We
177            # do want Unicode literals.
178            code = compile(source, path, 'exec')
179            # We use ourself as the global namespace for the execution. There
180            # is no need for a separate local namespace as moz.build execution
181            # is flat, namespace-wise.
182            old_source = self._current_source
183            self._current_source = source
184            try:
185                exec_(code, self)
186            finally:
187                self._current_source = old_source
188
189        self.exec_function(execute, path=path)
190
191    def exec_function(self, func, args=(), kwargs={}, path='',
192                      becomes_current_path=True):
193        """Execute function with the given arguments in the sandbox.
194        """
195        if path and becomes_current_path:
196            self._context.push_source(path)
197
198        old_sandbox = self._context._sandbox
199        self._context._sandbox = weakref.ref(self)
200
201        # We don't have to worry about bytecode generation here because we are
202        # too low-level for that. However, we could add bytecode generation via
203        # the marshall module if parsing performance were ever an issue.
204
205        old_source = self._current_source
206        self._current_source = None
207        try:
208            func(*args, **kwargs)
209        except SandboxError as e:
210            raise e
211        except NameError as e:
212            # A NameError is raised when a variable could not be found.
213            # The original KeyError has been dropped by the interpreter.
214            # However, we should have it cached in our instance!
215
216            # Unless a script is doing something wonky like catching NameError
217            # itself (that would be silly), if there is an exception on the
218            # global namespace, that's our error.
219            actual = e
220
221            if self._last_name_error is not None:
222                actual = self._last_name_error
223            source_stack = self._context.source_stack
224            if not becomes_current_path:
225                # Add current file to the stack because it wasn't added before
226                # sandbox execution.
227                source_stack.append(path)
228            raise SandboxExecutionError(source_stack, type(actual), actual,
229                                        sys.exc_info()[2])
230
231        except Exception:
232            # Need to copy the stack otherwise we get a reference and that is
233            # mutated during the finally.
234            exc = sys.exc_info()
235            source_stack = self._context.source_stack
236            if not becomes_current_path:
237                # Add current file to the stack because it wasn't added before
238                # sandbox execution.
239                source_stack.append(path)
240            raise SandboxExecutionError(source_stack, exc[0], exc[1], exc[2])
241        finally:
242            self._current_source = old_source
243            self._context._sandbox = old_sandbox
244            if path and becomes_current_path:
245                self._context.pop_source()
246
247    def push_subcontext(self, context):
248        """Push a SubContext onto the execution stack.
249
250        When called, the active context will be set to the specified context,
251        meaning all variable accesses will go through it. We also record this
252        SubContext as having been executed as part of this sandbox.
253        """
254        self._active_contexts.append(context)
255        if context not in self.subcontexts:
256            self.subcontexts.append(context)
257
258    def pop_subcontext(self, context):
259        """Pop a SubContext off the execution stack.
260
261        SubContexts must be pushed and popped in opposite order. This is
262        validated as part of the function call to ensure proper consumer API
263        use.
264        """
265        popped = self._active_contexts.pop()
266        assert popped == context
267
268    def __getitem__(self, key):
269        if key.isupper():
270            try:
271                return self._context[key]
272            except Exception as e:
273                self._last_name_error = e
274                raise
275
276        return dict.__getitem__(self, key)
277
278    def __setitem__(self, key, value):
279        if key in self._builtins or key == '__builtins__':
280            raise KeyError('Cannot reassign builtins')
281
282        if key.isupper():
283            # Forbid assigning over a previously set value. Interestingly, when
284            # doing FOO += ['bar'], python actually does something like:
285            #   foo = namespace.__getitem__('FOO')
286            #   foo.__iadd__(['bar'])
287            #   namespace.__setitem__('FOO', foo)
288            # This means __setitem__ is called with the value that is already
289            # in the dict, when doing +=, which is permitted.
290            if key in self._context and self._context[key] is not value:
291                raise KeyError('global_ns', 'reassign', key)
292
293            if (key not in self._context and isinstance(value, (list, dict))
294                and not value):
295                raise KeyError('Variable %s assigned an empty value.' % key)
296
297            self._context[key] = value
298        else:
299            dict.__setitem__(self, key, value)
300
301    def get(self, key, default=None):
302        raise NotImplementedError('Not supported')
303
304    def __iter__(self):
305        raise NotImplementedError('Not supported')
306
307    def __contains__(self, key):
308        if key.isupper():
309            return key in self._context
310        return dict.__contains__(self, key)
311