1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5r"""Python sandbox implementation for build files.
6
7This module contains classes for Python sandboxes that execute in a
8highly-controlled environment.
9
10The main class is `Sandbox`. This provides an execution environment for Python
11code and is used to fill a Context instance for the takeaway information from
12the execution.
13
14Code in this module takes a different approach to exception handling compared
15to what you'd see elsewhere in Python. Arguments to built-in exceptions like
16KeyError are machine parseable. This machine-friendly data is used to present
17user-friendly error messages in the case of errors.
18"""
19
20from __future__ import absolute_import, unicode_literals
21
22import os
23import sys
24import weakref
25
26from mozbuild.util import (
27    exec_,
28    ReadOnlyDict,
29)
30from .context import Context
31from mozpack.files import FileFinder
32
33
34default_finder = FileFinder('/')
35
36
37def alphabetical_sorted(iterable, cmp=None, key=lambda x: x.lower(),
38                        reverse=False):
39    """sorted() replacement for the sandbox, ordering alphabetically by
40    default.
41    """
42    return sorted(iterable, cmp, key, reverse)
43
44
45class SandboxError(Exception):
46    def __init__(self, file_stack):
47        self.file_stack = file_stack
48
49
50class SandboxExecutionError(SandboxError):
51    """Represents errors encountered during execution of a Sandbox.
52
53    This is a simple container exception. It's purpose is to capture state
54    so something else can report on it.
55    """
56    def __init__(self, file_stack, exc_type, exc_value, trace):
57        SandboxError.__init__(self, file_stack)
58
59        self.exc_type = exc_type
60        self.exc_value = exc_value
61        self.trace = trace
62
63
64class SandboxLoadError(SandboxError):
65    """Represents errors encountered when loading a file for execution.
66
67    This exception represents errors in a Sandbox that occurred as part of
68    loading a file. The error could have occurred in the course of executing
69    a file. If so, the file_stack will be non-empty and the file that caused
70    the load will be on top of the stack.
71    """
72    def __init__(self, file_stack, trace, illegal_path=None, read_error=None):
73        SandboxError.__init__(self, file_stack)
74
75        self.trace = trace
76        self.illegal_path = illegal_path
77        self.read_error = read_error
78
79
80class Sandbox(dict):
81    """Represents a sandbox for executing Python code.
82
83    This class provides a sandbox for execution of a single mozbuild frontend
84    file. The results of that execution is stored in the Context instance given
85    as the ``context`` argument.
86
87    Sandbox is effectively a glorified wrapper around compile() + exec(). You
88    point it at some Python code and it executes it. The main difference from
89    executing Python code like normal is that the executed code is very limited
90    in what it can do: the sandbox only exposes a very limited set of Python
91    functionality. Only specific types and functions are available. This
92    prevents executed code from doing things like import modules, open files,
93    etc.
94
95    Sandbox instances act as global namespace for the sandboxed execution
96    itself. They shall not be used to access the results of the execution.
97    Those results are available in the given Context instance after execution.
98
99    The Sandbox itself is responsible for enforcing rules such as forbidding
100    reassignment of variables.
101
102    Implementation note: Sandbox derives from dict because exec() insists that
103    what it is given for namespaces is a dict.
104    """
105    # The default set of builtins.
106    BUILTINS = ReadOnlyDict({
107        # Only real Python built-ins should go here.
108        'None': None,
109        'False': False,
110        'True': True,
111        'sorted': alphabetical_sorted,
112        'int': int,
113        'set': set,
114        'tuple': tuple,
115    })
116
117    def __init__(self, context, finder=default_finder):
118        """Initialize a Sandbox ready for execution.
119        """
120        self._builtins = self.BUILTINS
121        dict.__setitem__(self, '__builtins__', self._builtins)
122
123        assert isinstance(self._builtins, ReadOnlyDict)
124        assert isinstance(context, Context)
125
126        # Contexts are modeled as a stack because multiple context managers
127        # may be active.
128        self._active_contexts = [context]
129
130        # Seen sub-contexts. Will be populated with other Context instances
131        # that were related to execution of this instance.
132        self.subcontexts = []
133
134        # We need to record this because it gets swallowed as part of
135        # evaluation.
136        self._last_name_error = None
137
138        # Current literal source being executed.
139        self._current_source = None
140
141        self._finder = finder
142
143    @property
144    def _context(self):
145        return self._active_contexts[-1]
146
147    def exec_file(self, path):
148        """Execute code at a path in the sandbox.
149
150        The path must be absolute.
151        """
152        assert os.path.isabs(path)
153
154        try:
155            source = self._finder.get(path).read()
156        except Exception as e:
157            raise SandboxLoadError(self._context.source_stack,
158                sys.exc_info()[2], read_error=path)
159
160        self.exec_source(source, path)
161
162    def exec_source(self, source, path=''):
163        """Execute Python code within a string.
164
165        The passed string should contain Python code to be executed. The string
166        will be compiled and executed.
167
168        You should almost always go through exec_file() because exec_source()
169        does not perform extra path normalization. This can cause relative
170        paths to behave weirdly.
171        """
172        def execute():
173            # compile() inherits the __future__ from the module by default. We
174            # do want Unicode literals.
175            code = compile(source, path, 'exec')
176            # We use ourself as the global namespace for the execution. There
177            # is no need for a separate local namespace as moz.build execution
178            # is flat, namespace-wise.
179            old_source = self._current_source
180            self._current_source = source
181            try:
182                exec_(code, self)
183            finally:
184                self._current_source = old_source
185
186        self.exec_function(execute, path=path)
187
188    def exec_function(self, func, args=(), kwargs={}, path='',
189                      becomes_current_path=True):
190        """Execute function with the given arguments in the sandbox.
191        """
192        if path and becomes_current_path:
193            self._context.push_source(path)
194
195        old_sandbox = self._context._sandbox
196        self._context._sandbox = weakref.ref(self)
197
198        # We don't have to worry about bytecode generation here because we are
199        # too low-level for that. However, we could add bytecode generation via
200        # the marshall module if parsing performance were ever an issue.
201
202        old_source = self._current_source
203        self._current_source = None
204        try:
205            func(*args, **kwargs)
206        except SandboxError as e:
207            raise e
208        except NameError as e:
209            # A NameError is raised when a variable could not be found.
210            # The original KeyError has been dropped by the interpreter.
211            # However, we should have it cached in our instance!
212
213            # Unless a script is doing something wonky like catching NameError
214            # itself (that would be silly), if there is an exception on the
215            # global namespace, that's our error.
216            actual = e
217
218            if self._last_name_error is not None:
219                actual = self._last_name_error
220            source_stack = self._context.source_stack
221            if not becomes_current_path:
222                # Add current file to the stack because it wasn't added before
223                # sandbox execution.
224                source_stack.append(path)
225            raise SandboxExecutionError(source_stack, type(actual), actual,
226                                        sys.exc_info()[2])
227
228        except Exception as e:
229            # Need to copy the stack otherwise we get a reference and that is
230            # mutated during the finally.
231            exc = sys.exc_info()
232            source_stack = self._context.source_stack
233            if not becomes_current_path:
234                # Add current file to the stack because it wasn't added before
235                # sandbox execution.
236                source_stack.append(path)
237            raise SandboxExecutionError(source_stack, exc[0], exc[1], exc[2])
238        finally:
239            self._current_source = old_source
240            self._context._sandbox = old_sandbox
241            if path and becomes_current_path:
242                self._context.pop_source()
243
244    def push_subcontext(self, context):
245        """Push a SubContext onto the execution stack.
246
247        When called, the active context will be set to the specified context,
248        meaning all variable accesses will go through it. We also record this
249        SubContext as having been executed as part of this sandbox.
250        """
251        self._active_contexts.append(context)
252        if context not in self.subcontexts:
253            self.subcontexts.append(context)
254
255    def pop_subcontext(self, context):
256        """Pop a SubContext off the execution stack.
257
258        SubContexts must be pushed and popped in opposite order. This is
259        validated as part of the function call to ensure proper consumer API
260        use.
261        """
262        popped = self._active_contexts.pop()
263        assert popped == context
264
265    def __getitem__(self, key):
266        if key.isupper():
267            try:
268                return self._context[key]
269            except Exception as e:
270                self._last_name_error = e
271                raise
272
273        return dict.__getitem__(self, key)
274
275    def __setitem__(self, key, value):
276        if key in self._builtins or key == '__builtins__':
277            raise KeyError('Cannot reassign builtins')
278
279        if key.isupper():
280            # Forbid assigning over a previously set value. Interestingly, when
281            # doing FOO += ['bar'], python actually does something like:
282            #   foo = namespace.__getitem__('FOO')
283            #   foo.__iadd__(['bar'])
284            #   namespace.__setitem__('FOO', foo)
285            # This means __setitem__ is called with the value that is already
286            # in the dict, when doing +=, which is permitted.
287            if key in self._context and self._context[key] is not value:
288                raise KeyError('global_ns', 'reassign', key)
289
290            if (key not in self._context and isinstance(value, (list, dict))
291               and not value):
292                raise KeyError('Variable %s assigned an empty value.' % key)
293
294            self._context[key] = value
295        else:
296            dict.__setitem__(self, key, value)
297
298    def get(self, key, default=None):
299        raise NotImplementedError('Not supported')
300
301    def __len__(self):
302        raise NotImplementedError('Not supported')
303
304    def __iter__(self):
305        raise NotImplementedError('Not supported')
306
307    def __contains__(self, key):
308        if key.isupper():
309            return key in self._context
310        return dict.__contains__(self, key)
311