1 //! WebAssembly module and function translation state.
2 //!
3 //! The `ModuleTranslationState` struct defined in this module is used to keep track of data about
4 //! the whole WebAssembly module, such as the decoded type signatures.
5 //!
6 //! The `FuncTranslationState` struct defined in this module is used to keep track of the WebAssembly
7 //! value and control stacks during the translation of a single function.
8
9 use crate::environ::{FuncEnvironment, GlobalVariable, WasmResult};
10 use crate::translation_utils::{FuncIndex, GlobalIndex, MemoryIndex, SignatureIndex, TableIndex};
11 use crate::{HashMap, Occupied, Vacant};
12 use cranelift_codegen::ir::{self, Block, Inst, Value};
13 use std::vec::Vec;
14
15 /// Information about the presence of an associated `else` for an `if`, or the
16 /// lack thereof.
17 #[derive(Debug)]
18 pub enum ElseData {
19 /// The `if` does not already have an `else` block.
20 ///
21 /// This doesn't mean that it will never have an `else`, just that we
22 /// haven't seen it yet.
23 NoElse {
24 /// If we discover that we need an `else` block, this is the jump
25 /// instruction that needs to be fixed up to point to the new `else`
26 /// block rather than the destination block after the `if...end`.
27 branch_inst: Inst,
28 },
29
30 /// We have already allocated an `else` block.
31 ///
32 /// Usually we don't know whether we will hit an `if .. end` or an `if
33 /// .. else .. end`, but sometimes we can tell based on the block's type
34 /// signature that the signature is not valid if there isn't an `else`. In
35 /// these cases, we pre-allocate the `else` block.
36 WithElse {
37 /// This is the `else` block.
38 else_block: Block,
39 },
40 }
41
42 /// A control stack frame can be an `if`, a `block` or a `loop`, each one having the following
43 /// fields:
44 ///
45 /// - `destination`: reference to the `Block` that will hold the code after the control block;
46 /// - `num_return_values`: number of values returned by the control block;
47 /// - `original_stack_size`: size of the value stack at the beginning of the control block.
48 ///
49 /// Moreover, the `if` frame has the `branch_inst` field that points to the `brz` instruction
50 /// separating the `true` and `false` branch. The `loop` frame has a `header` field that references
51 /// the `Block` that contains the beginning of the body of the loop.
52 #[derive(Debug)]
53 pub enum ControlStackFrame {
54 If {
55 destination: Block,
56 else_data: ElseData,
57 num_param_values: usize,
58 num_return_values: usize,
59 original_stack_size: usize,
60 exit_is_branched_to: bool,
61 blocktype: wasmparser::TypeOrFuncType,
62 /// Was the head of the `if` reachable?
63 head_is_reachable: bool,
64 /// What was the reachability at the end of the consequent?
65 ///
66 /// This is `None` until we're finished translating the consequent, and
67 /// is set to `Some` either by hitting an `else` when we will begin
68 /// translating the alternative, or by hitting an `end` in which case
69 /// there is no alternative.
70 consequent_ends_reachable: Option<bool>,
71 // Note: no need for `alternative_ends_reachable` because that is just
72 // `state.reachable` when we hit the `end` in the `if .. else .. end`.
73 },
74 Block {
75 destination: Block,
76 num_param_values: usize,
77 num_return_values: usize,
78 original_stack_size: usize,
79 exit_is_branched_to: bool,
80 },
81 Loop {
82 destination: Block,
83 header: Block,
84 num_param_values: usize,
85 num_return_values: usize,
86 original_stack_size: usize,
87 },
88 }
89
90 /// Helper methods for the control stack objects.
91 impl ControlStackFrame {
num_return_values(&self) -> usize92 pub fn num_return_values(&self) -> usize {
93 match *self {
94 Self::If {
95 num_return_values, ..
96 }
97 | Self::Block {
98 num_return_values, ..
99 }
100 | Self::Loop {
101 num_return_values, ..
102 } => num_return_values,
103 }
104 }
num_param_values(&self) -> usize105 pub fn num_param_values(&self) -> usize {
106 match *self {
107 Self::If {
108 num_param_values, ..
109 }
110 | Self::Block {
111 num_param_values, ..
112 }
113 | Self::Loop {
114 num_param_values, ..
115 } => num_param_values,
116 }
117 }
following_code(&self) -> Block118 pub fn following_code(&self) -> Block {
119 match *self {
120 Self::If { destination, .. }
121 | Self::Block { destination, .. }
122 | Self::Loop { destination, .. } => destination,
123 }
124 }
br_destination(&self) -> Block125 pub fn br_destination(&self) -> Block {
126 match *self {
127 Self::If { destination, .. } | Self::Block { destination, .. } => destination,
128 Self::Loop { header, .. } => header,
129 }
130 }
original_stack_size(&self) -> usize131 pub fn original_stack_size(&self) -> usize {
132 match *self {
133 Self::If {
134 original_stack_size,
135 ..
136 }
137 | Self::Block {
138 original_stack_size,
139 ..
140 }
141 | Self::Loop {
142 original_stack_size,
143 ..
144 } => original_stack_size,
145 }
146 }
is_loop(&self) -> bool147 pub fn is_loop(&self) -> bool {
148 match *self {
149 Self::If { .. } | Self::Block { .. } => false,
150 Self::Loop { .. } => true,
151 }
152 }
153
exit_is_branched_to(&self) -> bool154 pub fn exit_is_branched_to(&self) -> bool {
155 match *self {
156 Self::If {
157 exit_is_branched_to,
158 ..
159 }
160 | Self::Block {
161 exit_is_branched_to,
162 ..
163 } => exit_is_branched_to,
164 Self::Loop { .. } => false,
165 }
166 }
167
set_branched_to_exit(&mut self)168 pub fn set_branched_to_exit(&mut self) {
169 match *self {
170 Self::If {
171 ref mut exit_is_branched_to,
172 ..
173 }
174 | Self::Block {
175 ref mut exit_is_branched_to,
176 ..
177 } => *exit_is_branched_to = true,
178 Self::Loop { .. } => {}
179 }
180 }
181 }
182
183 /// Contains information passed along during a function's translation and that records:
184 ///
185 /// - The current value and control stacks.
186 /// - The depth of the two unreachable control blocks stacks, that are manipulated when translating
187 /// unreachable code;
188 pub struct FuncTranslationState {
189 /// A stack of values corresponding to the active values in the input wasm function at this
190 /// point.
191 pub(crate) stack: Vec<Value>,
192 /// A stack of active control flow operations at this point in the input wasm function.
193 pub(crate) control_stack: Vec<ControlStackFrame>,
194 /// Is the current translation state still reachable? This is false when translating operators
195 /// like End, Return, or Unreachable.
196 pub(crate) reachable: bool,
197
198 // Map of global variables that have already been created by `FuncEnvironment::make_global`.
199 globals: HashMap<GlobalIndex, GlobalVariable>,
200
201 // Map of heaps that have been created by `FuncEnvironment::make_heap`.
202 heaps: HashMap<MemoryIndex, ir::Heap>,
203
204 // Map of tables that have been created by `FuncEnvironment::make_table`.
205 tables: HashMap<TableIndex, ir::Table>,
206
207 // Map of indirect call signatures that have been created by
208 // `FuncEnvironment::make_indirect_sig()`.
209 // Stores both the signature reference and the number of WebAssembly arguments
210 signatures: HashMap<SignatureIndex, (ir::SigRef, usize)>,
211
212 // Imported and local functions that have been created by
213 // `FuncEnvironment::make_direct_func()`.
214 // Stores both the function reference and the number of WebAssembly arguments
215 functions: HashMap<FuncIndex, (ir::FuncRef, usize)>,
216 }
217
218 // Public methods that are exposed to non-`cranelift_wasm` API consumers.
219 impl FuncTranslationState {
220 /// True if the current translation state expresses reachable code, false if it is unreachable.
221 #[inline]
reachable(&self) -> bool222 pub fn reachable(&self) -> bool {
223 self.reachable
224 }
225 }
226
227 impl FuncTranslationState {
228 /// Construct a new, empty, `FuncTranslationState`
new() -> Self229 pub(crate) fn new() -> Self {
230 Self {
231 stack: Vec::new(),
232 control_stack: Vec::new(),
233 reachable: true,
234 globals: HashMap::new(),
235 heaps: HashMap::new(),
236 tables: HashMap::new(),
237 signatures: HashMap::new(),
238 functions: HashMap::new(),
239 }
240 }
241
clear(&mut self)242 fn clear(&mut self) {
243 debug_assert!(self.stack.is_empty());
244 debug_assert!(self.control_stack.is_empty());
245 self.reachable = true;
246 self.globals.clear();
247 self.heaps.clear();
248 self.tables.clear();
249 self.signatures.clear();
250 self.functions.clear();
251 }
252
253 /// Initialize the state for compiling a function with the given signature.
254 ///
255 /// This resets the state to containing only a single block representing the whole function.
256 /// The exit block is the last block in the function which will contain the return instruction.
initialize(&mut self, sig: &ir::Signature, exit_block: Block)257 pub(crate) fn initialize(&mut self, sig: &ir::Signature, exit_block: Block) {
258 self.clear();
259 self.push_block(
260 exit_block,
261 0,
262 sig.returns
263 .iter()
264 .filter(|arg| arg.purpose == ir::ArgumentPurpose::Normal)
265 .count(),
266 );
267 }
268
269 /// Push a value.
push1(&mut self, val: Value)270 pub(crate) fn push1(&mut self, val: Value) {
271 self.stack.push(val);
272 }
273
274 /// Push multiple values.
pushn(&mut self, vals: &[Value])275 pub(crate) fn pushn(&mut self, vals: &[Value]) {
276 self.stack.extend_from_slice(vals);
277 }
278
279 /// Pop one value.
pop1(&mut self) -> Value280 pub(crate) fn pop1(&mut self) -> Value {
281 self.stack
282 .pop()
283 .expect("attempted to pop a value from an empty stack")
284 }
285
286 /// Peek at the top of the stack without popping it.
peek1(&self) -> Value287 pub(crate) fn peek1(&self) -> Value {
288 *self
289 .stack
290 .last()
291 .expect("attempted to peek at a value on an empty stack")
292 }
293
294 /// Pop two values. Return them in the order they were pushed.
pop2(&mut self) -> (Value, Value)295 pub(crate) fn pop2(&mut self) -> (Value, Value) {
296 let v2 = self.stack.pop().unwrap();
297 let v1 = self.stack.pop().unwrap();
298 (v1, v2)
299 }
300
301 /// Pop three values. Return them in the order they were pushed.
pop3(&mut self) -> (Value, Value, Value)302 pub(crate) fn pop3(&mut self) -> (Value, Value, Value) {
303 let v3 = self.stack.pop().unwrap();
304 let v2 = self.stack.pop().unwrap();
305 let v1 = self.stack.pop().unwrap();
306 (v1, v2, v3)
307 }
308
309 /// Helper to ensure the the stack size is at least as big as `n`; note that due to
310 /// `debug_assert` this will not execute in non-optimized builds.
311 #[inline]
ensure_length_is_at_least(&self, n: usize)312 fn ensure_length_is_at_least(&self, n: usize) {
313 debug_assert!(
314 n <= self.stack.len(),
315 "attempted to access {} values but stack only has {} values",
316 n,
317 self.stack.len()
318 )
319 }
320
321 /// Pop the top `n` values on the stack.
322 ///
323 /// The popped values are not returned. Use `peekn` to look at them before popping.
popn(&mut self, n: usize)324 pub(crate) fn popn(&mut self, n: usize) {
325 self.ensure_length_is_at_least(n);
326 let new_len = self.stack.len() - n;
327 self.stack.truncate(new_len);
328 }
329
330 /// Peek at the top `n` values on the stack in the order they were pushed.
peekn(&self, n: usize) -> &[Value]331 pub(crate) fn peekn(&self, n: usize) -> &[Value] {
332 self.ensure_length_is_at_least(n);
333 &self.stack[self.stack.len() - n..]
334 }
335
336 /// Peek at the top `n` values on the stack in the order they were pushed.
peekn_mut(&mut self, n: usize) -> &mut [Value]337 pub(crate) fn peekn_mut(&mut self, n: usize) -> &mut [Value] {
338 self.ensure_length_is_at_least(n);
339 let len = self.stack.len();
340 &mut self.stack[len - n..]
341 }
342
343 /// Push a block on the control stack.
push_block( &mut self, following_code: Block, num_param_types: usize, num_result_types: usize, )344 pub(crate) fn push_block(
345 &mut self,
346 following_code: Block,
347 num_param_types: usize,
348 num_result_types: usize,
349 ) {
350 debug_assert!(num_param_types <= self.stack.len());
351 self.control_stack.push(ControlStackFrame::Block {
352 destination: following_code,
353 original_stack_size: self.stack.len() - num_param_types,
354 num_param_values: num_param_types,
355 num_return_values: num_result_types,
356 exit_is_branched_to: false,
357 });
358 }
359
360 /// Push a loop on the control stack.
push_loop( &mut self, header: Block, following_code: Block, num_param_types: usize, num_result_types: usize, )361 pub(crate) fn push_loop(
362 &mut self,
363 header: Block,
364 following_code: Block,
365 num_param_types: usize,
366 num_result_types: usize,
367 ) {
368 debug_assert!(num_param_types <= self.stack.len());
369 self.control_stack.push(ControlStackFrame::Loop {
370 header,
371 destination: following_code,
372 original_stack_size: self.stack.len() - num_param_types,
373 num_param_values: num_param_types,
374 num_return_values: num_result_types,
375 });
376 }
377
378 /// Push an if on the control stack.
push_if( &mut self, destination: Block, else_data: ElseData, num_param_types: usize, num_result_types: usize, blocktype: wasmparser::TypeOrFuncType, )379 pub(crate) fn push_if(
380 &mut self,
381 destination: Block,
382 else_data: ElseData,
383 num_param_types: usize,
384 num_result_types: usize,
385 blocktype: wasmparser::TypeOrFuncType,
386 ) {
387 debug_assert!(num_param_types <= self.stack.len());
388
389 // Push a second copy of our `if`'s parameters on the stack. This lets
390 // us avoid saving them on the side in the `ControlStackFrame` for our
391 // `else` block (if it exists), which would require a second heap
392 // allocation. See also the comment in `translate_operator` for
393 // `Operator::Else`.
394 self.stack.reserve(num_param_types);
395 for i in (self.stack.len() - num_param_types)..self.stack.len() {
396 let val = self.stack[i];
397 self.stack.push(val);
398 }
399
400 self.control_stack.push(ControlStackFrame::If {
401 destination,
402 else_data,
403 original_stack_size: self.stack.len() - num_param_types,
404 num_param_values: num_param_types,
405 num_return_values: num_result_types,
406 exit_is_branched_to: false,
407 head_is_reachable: self.reachable,
408 consequent_ends_reachable: None,
409 blocktype,
410 });
411 }
412 }
413
414 /// Methods for handling entity references.
415 impl FuncTranslationState {
416 /// Get the `GlobalVariable` reference that should be used to access the global variable
417 /// `index`. Create the reference if necessary.
418 /// Also return the WebAssembly type of the global.
get_global<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<GlobalVariable>419 pub(crate) fn get_global<FE: FuncEnvironment + ?Sized>(
420 &mut self,
421 func: &mut ir::Function,
422 index: u32,
423 environ: &mut FE,
424 ) -> WasmResult<GlobalVariable> {
425 let index = GlobalIndex::from_u32(index);
426 match self.globals.entry(index) {
427 Occupied(entry) => Ok(*entry.get()),
428 Vacant(entry) => Ok(*entry.insert(environ.make_global(func, index)?)),
429 }
430 }
431
432 /// Get the `Heap` reference that should be used to access linear memory `index`.
433 /// Create the reference if necessary.
get_heap<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<ir::Heap>434 pub(crate) fn get_heap<FE: FuncEnvironment + ?Sized>(
435 &mut self,
436 func: &mut ir::Function,
437 index: u32,
438 environ: &mut FE,
439 ) -> WasmResult<ir::Heap> {
440 let index = MemoryIndex::from_u32(index);
441 match self.heaps.entry(index) {
442 Occupied(entry) => Ok(*entry.get()),
443 Vacant(entry) => Ok(*entry.insert(environ.make_heap(func, index)?)),
444 }
445 }
446
447 /// Get the `Table` reference that should be used to access table `index`.
448 /// Create the reference if necessary.
get_table<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<ir::Table>449 pub(crate) fn get_table<FE: FuncEnvironment + ?Sized>(
450 &mut self,
451 func: &mut ir::Function,
452 index: u32,
453 environ: &mut FE,
454 ) -> WasmResult<ir::Table> {
455 let index = TableIndex::from_u32(index);
456 match self.tables.entry(index) {
457 Occupied(entry) => Ok(*entry.get()),
458 Vacant(entry) => Ok(*entry.insert(environ.make_table(func, index)?)),
459 }
460 }
461
462 /// Get the `SigRef` reference that should be used to make an indirect call with signature
463 /// `index`. Also return the number of WebAssembly arguments in the signature.
464 ///
465 /// Create the signature if necessary.
get_indirect_sig<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<(ir::SigRef, usize)>466 pub(crate) fn get_indirect_sig<FE: FuncEnvironment + ?Sized>(
467 &mut self,
468 func: &mut ir::Function,
469 index: u32,
470 environ: &mut FE,
471 ) -> WasmResult<(ir::SigRef, usize)> {
472 let index = SignatureIndex::from_u32(index);
473 match self.signatures.entry(index) {
474 Occupied(entry) => Ok(*entry.get()),
475 Vacant(entry) => {
476 let sig = environ.make_indirect_sig(func, index)?;
477 Ok(*entry.insert((sig, num_wasm_parameters(environ, &func.dfg.signatures[sig]))))
478 }
479 }
480 }
481
482 /// Get the `FuncRef` reference that should be used to make a direct call to function
483 /// `index`. Also return the number of WebAssembly arguments in the signature.
484 ///
485 /// Create the function reference if necessary.
get_direct_func<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<(ir::FuncRef, usize)>486 pub(crate) fn get_direct_func<FE: FuncEnvironment + ?Sized>(
487 &mut self,
488 func: &mut ir::Function,
489 index: u32,
490 environ: &mut FE,
491 ) -> WasmResult<(ir::FuncRef, usize)> {
492 let index = FuncIndex::from_u32(index);
493 match self.functions.entry(index) {
494 Occupied(entry) => Ok(*entry.get()),
495 Vacant(entry) => {
496 let fref = environ.make_direct_func(func, index)?;
497 let sig = func.dfg.ext_funcs[fref].signature;
498 Ok(*entry.insert((
499 fref,
500 num_wasm_parameters(environ, &func.dfg.signatures[sig]),
501 )))
502 }
503 }
504 }
505 }
506
num_wasm_parameters<FE: FuncEnvironment + ?Sized>( environ: &FE, signature: &ir::Signature, ) -> usize507 fn num_wasm_parameters<FE: FuncEnvironment + ?Sized>(
508 environ: &FE,
509 signature: &ir::Signature,
510 ) -> usize {
511 (0..signature.params.len())
512 .filter(|index| environ.is_wasm_parameter(signature, *index))
513 .count()
514 }
515