1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this 3# file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5r"""Python sandbox implementation for build files. 6 7This module contains classes for Python sandboxes that execute in a 8highly-controlled environment. 9 10The main class is `Sandbox`. This provides an execution environment for Python 11code and is used to fill a Context instance for the takeaway information from 12the execution. 13 14Code in this module takes a different approach to exception handling compared 15to what you'd see elsewhere in Python. Arguments to built-in exceptions like 16KeyError are machine parseable. This machine-friendly data is used to present 17user-friendly error messages in the case of errors. 18""" 19 20from __future__ import absolute_import, print_function, unicode_literals 21 22import os 23import six 24import sys 25import weakref 26 27from mozbuild.util import ( 28 exec_, 29 ReadOnlyDict, 30) 31from .context import Context 32from mozpack.files import FileFinder 33 34 35default_finder = FileFinder("/") 36 37 38def alphabetical_sorted(iterable, key=lambda x: x.lower(), reverse=False): 39 """sorted() replacement for the sandbox, ordering alphabetically by 40 default. 41 """ 42 return sorted(iterable, key=key, reverse=reverse) 43 44 45class SandboxError(Exception): 46 def __init__(self, file_stack): 47 self.file_stack = file_stack 48 49 50class SandboxExecutionError(SandboxError): 51 """Represents errors encountered during execution of a Sandbox. 52 53 This is a simple container exception. It's purpose is to capture state 54 so something else can report on it. 55 """ 56 57 def __init__(self, file_stack, exc_type, exc_value, trace): 58 SandboxError.__init__(self, file_stack) 59 60 self.exc_type = exc_type 61 self.exc_value = exc_value 62 self.trace = trace 63 64 65class SandboxLoadError(SandboxError): 66 """Represents errors encountered when loading a file for execution. 67 68 This exception represents errors in a Sandbox that occurred as part of 69 loading a file. The error could have occurred in the course of executing 70 a file. If so, the file_stack will be non-empty and the file that caused 71 the load will be on top of the stack. 72 """ 73 74 def __init__(self, file_stack, trace, illegal_path=None, read_error=None): 75 SandboxError.__init__(self, file_stack) 76 77 self.trace = trace 78 self.illegal_path = illegal_path 79 self.read_error = read_error 80 81 82class Sandbox(dict): 83 """Represents a sandbox for executing Python code. 84 85 This class provides a sandbox for execution of a single mozbuild frontend 86 file. The results of that execution is stored in the Context instance given 87 as the ``context`` argument. 88 89 Sandbox is effectively a glorified wrapper around compile() + exec(). You 90 point it at some Python code and it executes it. The main difference from 91 executing Python code like normal is that the executed code is very limited 92 in what it can do: the sandbox only exposes a very limited set of Python 93 functionality. Only specific types and functions are available. This 94 prevents executed code from doing things like import modules, open files, 95 etc. 96 97 Sandbox instances act as global namespace for the sandboxed execution 98 itself. They shall not be used to access the results of the execution. 99 Those results are available in the given Context instance after execution. 100 101 The Sandbox itself is responsible for enforcing rules such as forbidding 102 reassignment of variables. 103 104 Implementation note: Sandbox derives from dict because exec() insists that 105 what it is given for namespaces is a dict. 106 """ 107 108 # The default set of builtins. 109 BUILTINS = ReadOnlyDict( 110 { 111 # Only real Python built-ins should go here. 112 "None": None, 113 "False": False, 114 "True": True, 115 "sorted": alphabetical_sorted, 116 "int": int, 117 "set": set, 118 "tuple": tuple, 119 } 120 ) 121 122 def __init__(self, context, finder=default_finder): 123 """Initialize a Sandbox ready for execution.""" 124 self._builtins = self.BUILTINS 125 dict.__setitem__(self, "__builtins__", self._builtins) 126 127 assert isinstance(self._builtins, ReadOnlyDict) 128 assert isinstance(context, Context) 129 130 # Contexts are modeled as a stack because multiple context managers 131 # may be active. 132 self._active_contexts = [context] 133 134 # Seen sub-contexts. Will be populated with other Context instances 135 # that were related to execution of this instance. 136 self.subcontexts = [] 137 138 # We need to record this because it gets swallowed as part of 139 # evaluation. 140 self._last_name_error = None 141 142 # Current literal source being executed. 143 self._current_source = None 144 145 self._finder = finder 146 147 @property 148 def _context(self): 149 return self._active_contexts[-1] 150 151 def exec_file(self, path): 152 """Execute code at a path in the sandbox. 153 154 The path must be absolute. 155 """ 156 assert os.path.isabs(path) 157 158 try: 159 source = six.ensure_text(self._finder.get(path).read()) 160 except Exception: 161 raise SandboxLoadError( 162 self._context.source_stack, sys.exc_info()[2], read_error=path 163 ) 164 165 self.exec_source(source, path) 166 167 def exec_source(self, source, path=""): 168 """Execute Python code within a string. 169 170 The passed string should contain Python code to be executed. The string 171 will be compiled and executed. 172 173 You should almost always go through exec_file() because exec_source() 174 does not perform extra path normalization. This can cause relative 175 paths to behave weirdly. 176 """ 177 178 def execute(): 179 # compile() inherits the __future__ from the module by default. We 180 # do want Unicode literals. 181 code = compile(source, path, "exec") 182 # We use ourself as the global namespace for the execution. There 183 # is no need for a separate local namespace as moz.build execution 184 # is flat, namespace-wise. 185 old_source = self._current_source 186 self._current_source = source 187 try: 188 exec_(code, self) 189 finally: 190 self._current_source = old_source 191 192 self.exec_function(execute, path=path) 193 194 def exec_function( 195 self, func, args=(), kwargs={}, path="", becomes_current_path=True 196 ): 197 """Execute function with the given arguments in the sandbox.""" 198 if path and becomes_current_path: 199 self._context.push_source(path) 200 201 old_sandbox = self._context._sandbox 202 self._context._sandbox = weakref.ref(self) 203 204 # We don't have to worry about bytecode generation here because we are 205 # too low-level for that. However, we could add bytecode generation via 206 # the marshall module if parsing performance were ever an issue. 207 208 old_source = self._current_source 209 self._current_source = None 210 try: 211 func(*args, **kwargs) 212 except SandboxError as e: 213 raise e 214 except NameError as e: 215 # A NameError is raised when a variable could not be found. 216 # The original KeyError has been dropped by the interpreter. 217 # However, we should have it cached in our instance! 218 219 # Unless a script is doing something wonky like catching NameError 220 # itself (that would be silly), if there is an exception on the 221 # global namespace, that's our error. 222 actual = e 223 224 if self._last_name_error is not None: 225 actual = self._last_name_error 226 source_stack = self._context.source_stack 227 if not becomes_current_path: 228 # Add current file to the stack because it wasn't added before 229 # sandbox execution. 230 source_stack.append(path) 231 raise SandboxExecutionError( 232 source_stack, type(actual), actual, sys.exc_info()[2] 233 ) 234 235 except Exception: 236 # Need to copy the stack otherwise we get a reference and that is 237 # mutated during the finally. 238 exc = sys.exc_info() 239 source_stack = self._context.source_stack 240 if not becomes_current_path: 241 # Add current file to the stack because it wasn't added before 242 # sandbox execution. 243 source_stack.append(path) 244 raise SandboxExecutionError(source_stack, exc[0], exc[1], exc[2]) 245 finally: 246 self._current_source = old_source 247 self._context._sandbox = old_sandbox 248 if path and becomes_current_path: 249 self._context.pop_source() 250 251 def push_subcontext(self, context): 252 """Push a SubContext onto the execution stack. 253 254 When called, the active context will be set to the specified context, 255 meaning all variable accesses will go through it. We also record this 256 SubContext as having been executed as part of this sandbox. 257 """ 258 self._active_contexts.append(context) 259 if context not in self.subcontexts: 260 self.subcontexts.append(context) 261 262 def pop_subcontext(self, context): 263 """Pop a SubContext off the execution stack. 264 265 SubContexts must be pushed and popped in opposite order. This is 266 validated as part of the function call to ensure proper consumer API 267 use. 268 """ 269 popped = self._active_contexts.pop() 270 assert popped == context 271 272 def __getitem__(self, key): 273 if key.isupper(): 274 try: 275 return self._context[key] 276 except Exception as e: 277 self._last_name_error = e 278 raise 279 280 return dict.__getitem__(self, key) 281 282 def __setitem__(self, key, value): 283 if key in self._builtins or key == "__builtins__": 284 raise KeyError("Cannot reassign builtins") 285 286 if key.isupper(): 287 # Forbid assigning over a previously set value. Interestingly, when 288 # doing FOO += ['bar'], python actually does something like: 289 # foo = namespace.__getitem__('FOO') 290 # foo.__iadd__(['bar']) 291 # namespace.__setitem__('FOO', foo) 292 # This means __setitem__ is called with the value that is already 293 # in the dict, when doing +=, which is permitted. 294 if key in self._context and self._context[key] is not value: 295 raise KeyError("global_ns", "reassign", key) 296 297 if ( 298 key not in self._context 299 and isinstance(value, (list, dict)) 300 and not value 301 ): 302 raise KeyError("Variable %s assigned an empty value." % key) 303 304 self._context[key] = value 305 else: 306 dict.__setitem__(self, key, value) 307 308 def get(self, key, default=None): 309 raise NotImplementedError("Not supported") 310 311 def __iter__(self): 312 raise NotImplementedError("Not supported") 313 314 def __contains__(self, key): 315 if key.isupper(): 316 return key in self._context 317 return dict.__contains__(self, key) 318