1 //! WebAssembly module and function translation state.
2 //!
3 //! The `ModuleTranslationState` struct defined in this module is used to keep track of data about
4 //! the whole WebAssembly module, such as the decoded type signatures.
5 //!
6 //! The `FuncTranslationState` struct defined in this module is used to keep track of the WebAssembly
7 //! value and control stacks during the translation of a single function.
8 
9 use crate::environ::{FuncEnvironment, GlobalVariable, WasmResult};
10 use crate::translation_utils::{FuncIndex, GlobalIndex, MemoryIndex, TableIndex, TypeIndex};
11 use crate::{HashMap, Occupied, Vacant};
12 use cranelift_codegen::ir::{self, Block, Inst, Value};
13 use std::vec::Vec;
14 
15 /// Information about the presence of an associated `else` for an `if`, or the
16 /// lack thereof.
17 #[derive(Debug)]
18 pub enum ElseData {
19     /// The `if` does not already have an `else` block.
20     ///
21     /// This doesn't mean that it will never have an `else`, just that we
22     /// haven't seen it yet.
23     NoElse {
24         /// If we discover that we need an `else` block, this is the jump
25         /// instruction that needs to be fixed up to point to the new `else`
26         /// block rather than the destination block after the `if...end`.
27         branch_inst: Inst,
28     },
29 
30     /// We have already allocated an `else` block.
31     ///
32     /// Usually we don't know whether we will hit an `if .. end` or an `if
33     /// .. else .. end`, but sometimes we can tell based on the block's type
34     /// signature that the signature is not valid if there isn't an `else`. In
35     /// these cases, we pre-allocate the `else` block.
36     WithElse {
37         /// This is the `else` block.
38         else_block: Block,
39     },
40 }
41 
42 /// A control stack frame can be an `if`, a `block` or a `loop`, each one having the following
43 /// fields:
44 ///
45 /// - `destination`: reference to the `Block` that will hold the code after the control block;
46 /// - `num_return_values`: number of values returned by the control block;
47 /// - `original_stack_size`: size of the value stack at the beginning of the control block.
48 ///
49 /// Moreover, the `if` frame has the `branch_inst` field that points to the `brz` instruction
50 /// separating the `true` and `false` branch. The `loop` frame has a `header` field that references
51 /// the `Block` that contains the beginning of the body of the loop.
52 #[derive(Debug)]
53 pub enum ControlStackFrame {
54     If {
55         destination: Block,
56         else_data: ElseData,
57         num_param_values: usize,
58         num_return_values: usize,
59         original_stack_size: usize,
60         exit_is_branched_to: bool,
61         blocktype: wasmparser::TypeOrFuncType,
62         /// Was the head of the `if` reachable?
63         head_is_reachable: bool,
64         /// What was the reachability at the end of the consequent?
65         ///
66         /// This is `None` until we're finished translating the consequent, and
67         /// is set to `Some` either by hitting an `else` when we will begin
68         /// translating the alternative, or by hitting an `end` in which case
69         /// there is no alternative.
70         consequent_ends_reachable: Option<bool>,
71         // Note: no need for `alternative_ends_reachable` because that is just
72         // `state.reachable` when we hit the `end` in the `if .. else .. end`.
73     },
74     Block {
75         destination: Block,
76         num_param_values: usize,
77         num_return_values: usize,
78         original_stack_size: usize,
79         exit_is_branched_to: bool,
80     },
81     Loop {
82         destination: Block,
83         header: Block,
84         num_param_values: usize,
85         num_return_values: usize,
86         original_stack_size: usize,
87     },
88 }
89 
90 /// Helper methods for the control stack objects.
91 impl ControlStackFrame {
num_return_values(&self) -> usize92     pub fn num_return_values(&self) -> usize {
93         match *self {
94             Self::If {
95                 num_return_values, ..
96             }
97             | Self::Block {
98                 num_return_values, ..
99             }
100             | Self::Loop {
101                 num_return_values, ..
102             } => num_return_values,
103         }
104     }
num_param_values(&self) -> usize105     pub fn num_param_values(&self) -> usize {
106         match *self {
107             Self::If {
108                 num_param_values, ..
109             }
110             | Self::Block {
111                 num_param_values, ..
112             }
113             | Self::Loop {
114                 num_param_values, ..
115             } => num_param_values,
116         }
117     }
following_code(&self) -> Block118     pub fn following_code(&self) -> Block {
119         match *self {
120             Self::If { destination, .. }
121             | Self::Block { destination, .. }
122             | Self::Loop { destination, .. } => destination,
123         }
124     }
br_destination(&self) -> Block125     pub fn br_destination(&self) -> Block {
126         match *self {
127             Self::If { destination, .. } | Self::Block { destination, .. } => destination,
128             Self::Loop { header, .. } => header,
129         }
130     }
131     /// Private helper. Use `truncate_value_stack_to_else_params()` or
132     /// `truncate_value_stack_to_original_size()` to restore value-stack state.
original_stack_size(&self) -> usize133     fn original_stack_size(&self) -> usize {
134         match *self {
135             Self::If {
136                 original_stack_size,
137                 ..
138             }
139             | Self::Block {
140                 original_stack_size,
141                 ..
142             }
143             | Self::Loop {
144                 original_stack_size,
145                 ..
146             } => original_stack_size,
147         }
148     }
is_loop(&self) -> bool149     pub fn is_loop(&self) -> bool {
150         match *self {
151             Self::If { .. } | Self::Block { .. } => false,
152             Self::Loop { .. } => true,
153         }
154     }
155 
exit_is_branched_to(&self) -> bool156     pub fn exit_is_branched_to(&self) -> bool {
157         match *self {
158             Self::If {
159                 exit_is_branched_to,
160                 ..
161             }
162             | Self::Block {
163                 exit_is_branched_to,
164                 ..
165             } => exit_is_branched_to,
166             Self::Loop { .. } => false,
167         }
168     }
169 
set_branched_to_exit(&mut self)170     pub fn set_branched_to_exit(&mut self) {
171         match *self {
172             Self::If {
173                 ref mut exit_is_branched_to,
174                 ..
175             }
176             | Self::Block {
177                 ref mut exit_is_branched_to,
178                 ..
179             } => *exit_is_branched_to = true,
180             Self::Loop { .. } => {}
181         }
182     }
183 
184     /// Pop values from the value stack so that it is left at the
185     /// input-parameters to an else-block.
truncate_value_stack_to_else_params(&self, stack: &mut Vec<Value>)186     pub fn truncate_value_stack_to_else_params(&self, stack: &mut Vec<Value>) {
187         debug_assert!(matches!(self, &ControlStackFrame::If { .. }));
188         stack.truncate(self.original_stack_size());
189     }
190 
191     /// Pop values from the value stack so that it is left at the state it was
192     /// before this control-flow frame.
truncate_value_stack_to_original_size(&self, stack: &mut Vec<Value>)193     pub fn truncate_value_stack_to_original_size(&self, stack: &mut Vec<Value>) {
194         // The "If" frame pushes its parameters twice, so they're available to the else block
195         // (see also `FuncTranslationState::push_if`).
196         // Yet, the original_stack_size member accounts for them only once, so that the else
197         // block can see the same number of parameters as the consequent block. As a matter of
198         // fact, we need to substract an extra number of parameter values for if blocks.
199         let num_duplicated_params = match self {
200             &ControlStackFrame::If {
201                 num_param_values, ..
202             } => {
203                 debug_assert!(num_param_values <= self.original_stack_size());
204                 num_param_values
205             }
206             _ => 0,
207         };
208         stack.truncate(self.original_stack_size() - num_duplicated_params);
209     }
210 }
211 
212 /// Contains information passed along during a function's translation and that records:
213 ///
214 /// - The current value and control stacks.
215 /// - The depth of the two unreachable control blocks stacks, that are manipulated when translating
216 ///   unreachable code;
217 pub struct FuncTranslationState {
218     /// A stack of values corresponding to the active values in the input wasm function at this
219     /// point.
220     pub(crate) stack: Vec<Value>,
221     /// A stack of active control flow operations at this point in the input wasm function.
222     pub(crate) control_stack: Vec<ControlStackFrame>,
223     /// Is the current translation state still reachable? This is false when translating operators
224     /// like End, Return, or Unreachable.
225     pub(crate) reachable: bool,
226 
227     // Map of global variables that have already been created by `FuncEnvironment::make_global`.
228     globals: HashMap<GlobalIndex, GlobalVariable>,
229 
230     // Map of heaps that have been created by `FuncEnvironment::make_heap`.
231     heaps: HashMap<MemoryIndex, ir::Heap>,
232 
233     // Map of tables that have been created by `FuncEnvironment::make_table`.
234     pub(crate) tables: HashMap<TableIndex, ir::Table>,
235 
236     // Map of indirect call signatures that have been created by
237     // `FuncEnvironment::make_indirect_sig()`.
238     // Stores both the signature reference and the number of WebAssembly arguments
239     signatures: HashMap<TypeIndex, (ir::SigRef, usize)>,
240 
241     // Imported and local functions that have been created by
242     // `FuncEnvironment::make_direct_func()`.
243     // Stores both the function reference and the number of WebAssembly arguments
244     functions: HashMap<FuncIndex, (ir::FuncRef, usize)>,
245 }
246 
247 // Public methods that are exposed to non-`cranelift_wasm` API consumers.
248 impl FuncTranslationState {
249     /// True if the current translation state expresses reachable code, false if it is unreachable.
250     #[inline]
reachable(&self) -> bool251     pub fn reachable(&self) -> bool {
252         self.reachable
253     }
254 }
255 
256 impl FuncTranslationState {
257     /// Construct a new, empty, `FuncTranslationState`
new() -> Self258     pub(crate) fn new() -> Self {
259         Self {
260             stack: Vec::new(),
261             control_stack: Vec::new(),
262             reachable: true,
263             globals: HashMap::new(),
264             heaps: HashMap::new(),
265             tables: HashMap::new(),
266             signatures: HashMap::new(),
267             functions: HashMap::new(),
268         }
269     }
270 
clear(&mut self)271     fn clear(&mut self) {
272         debug_assert!(self.stack.is_empty());
273         debug_assert!(self.control_stack.is_empty());
274         self.reachable = true;
275         self.globals.clear();
276         self.heaps.clear();
277         self.tables.clear();
278         self.signatures.clear();
279         self.functions.clear();
280     }
281 
282     /// Initialize the state for compiling a function with the given signature.
283     ///
284     /// This resets the state to containing only a single block representing the whole function.
285     /// The exit block is the last block in the function which will contain the return instruction.
initialize(&mut self, sig: &ir::Signature, exit_block: Block)286     pub(crate) fn initialize(&mut self, sig: &ir::Signature, exit_block: Block) {
287         self.clear();
288         self.push_block(
289             exit_block,
290             0,
291             sig.returns
292                 .iter()
293                 .filter(|arg| arg.purpose == ir::ArgumentPurpose::Normal)
294                 .count(),
295         );
296     }
297 
298     /// Push a value.
push1(&mut self, val: Value)299     pub(crate) fn push1(&mut self, val: Value) {
300         self.stack.push(val);
301     }
302 
303     /// Push multiple values.
pushn(&mut self, vals: &[Value])304     pub(crate) fn pushn(&mut self, vals: &[Value]) {
305         self.stack.extend_from_slice(vals);
306     }
307 
308     /// Pop one value.
pop1(&mut self) -> Value309     pub(crate) fn pop1(&mut self) -> Value {
310         self.stack
311             .pop()
312             .expect("attempted to pop a value from an empty stack")
313     }
314 
315     /// Peek at the top of the stack without popping it.
peek1(&self) -> Value316     pub(crate) fn peek1(&self) -> Value {
317         *self
318             .stack
319             .last()
320             .expect("attempted to peek at a value on an empty stack")
321     }
322 
323     /// Pop two values. Return them in the order they were pushed.
pop2(&mut self) -> (Value, Value)324     pub(crate) fn pop2(&mut self) -> (Value, Value) {
325         let v2 = self.stack.pop().unwrap();
326         let v1 = self.stack.pop().unwrap();
327         (v1, v2)
328     }
329 
330     /// Pop three values. Return them in the order they were pushed.
pop3(&mut self) -> (Value, Value, Value)331     pub(crate) fn pop3(&mut self) -> (Value, Value, Value) {
332         let v3 = self.stack.pop().unwrap();
333         let v2 = self.stack.pop().unwrap();
334         let v1 = self.stack.pop().unwrap();
335         (v1, v2, v3)
336     }
337 
338     /// Helper to ensure the the stack size is at least as big as `n`; note that due to
339     /// `debug_assert` this will not execute in non-optimized builds.
340     #[inline]
ensure_length_is_at_least(&self, n: usize)341     fn ensure_length_is_at_least(&self, n: usize) {
342         debug_assert!(
343             n <= self.stack.len(),
344             "attempted to access {} values but stack only has {} values",
345             n,
346             self.stack.len()
347         )
348     }
349 
350     /// Pop the top `n` values on the stack.
351     ///
352     /// The popped values are not returned. Use `peekn` to look at them before popping.
popn(&mut self, n: usize)353     pub(crate) fn popn(&mut self, n: usize) {
354         self.ensure_length_is_at_least(n);
355         let new_len = self.stack.len() - n;
356         self.stack.truncate(new_len);
357     }
358 
359     /// Peek at the top `n` values on the stack in the order they were pushed.
peekn(&self, n: usize) -> &[Value]360     pub(crate) fn peekn(&self, n: usize) -> &[Value] {
361         self.ensure_length_is_at_least(n);
362         &self.stack[self.stack.len() - n..]
363     }
364 
365     /// Peek at the top `n` values on the stack in the order they were pushed.
peekn_mut(&mut self, n: usize) -> &mut [Value]366     pub(crate) fn peekn_mut(&mut self, n: usize) -> &mut [Value] {
367         self.ensure_length_is_at_least(n);
368         let len = self.stack.len();
369         &mut self.stack[len - n..]
370     }
371 
372     /// Push a block on the control stack.
push_block( &mut self, following_code: Block, num_param_types: usize, num_result_types: usize, )373     pub(crate) fn push_block(
374         &mut self,
375         following_code: Block,
376         num_param_types: usize,
377         num_result_types: usize,
378     ) {
379         debug_assert!(num_param_types <= self.stack.len());
380         self.control_stack.push(ControlStackFrame::Block {
381             destination: following_code,
382             original_stack_size: self.stack.len() - num_param_types,
383             num_param_values: num_param_types,
384             num_return_values: num_result_types,
385             exit_is_branched_to: false,
386         });
387     }
388 
389     /// Push a loop on the control stack.
push_loop( &mut self, header: Block, following_code: Block, num_param_types: usize, num_result_types: usize, )390     pub(crate) fn push_loop(
391         &mut self,
392         header: Block,
393         following_code: Block,
394         num_param_types: usize,
395         num_result_types: usize,
396     ) {
397         debug_assert!(num_param_types <= self.stack.len());
398         self.control_stack.push(ControlStackFrame::Loop {
399             header,
400             destination: following_code,
401             original_stack_size: self.stack.len() - num_param_types,
402             num_param_values: num_param_types,
403             num_return_values: num_result_types,
404         });
405     }
406 
407     /// Push an if on the control stack.
push_if( &mut self, destination: Block, else_data: ElseData, num_param_types: usize, num_result_types: usize, blocktype: wasmparser::TypeOrFuncType, )408     pub(crate) fn push_if(
409         &mut self,
410         destination: Block,
411         else_data: ElseData,
412         num_param_types: usize,
413         num_result_types: usize,
414         blocktype: wasmparser::TypeOrFuncType,
415     ) {
416         debug_assert!(num_param_types <= self.stack.len());
417 
418         // Push a second copy of our `if`'s parameters on the stack. This lets
419         // us avoid saving them on the side in the `ControlStackFrame` for our
420         // `else` block (if it exists), which would require a second heap
421         // allocation. See also the comment in `translate_operator` for
422         // `Operator::Else`.
423         self.stack.reserve(num_param_types);
424         for i in (self.stack.len() - num_param_types)..self.stack.len() {
425             let val = self.stack[i];
426             self.stack.push(val);
427         }
428 
429         self.control_stack.push(ControlStackFrame::If {
430             destination,
431             else_data,
432             original_stack_size: self.stack.len() - num_param_types,
433             num_param_values: num_param_types,
434             num_return_values: num_result_types,
435             exit_is_branched_to: false,
436             head_is_reachable: self.reachable,
437             consequent_ends_reachable: None,
438             blocktype,
439         });
440     }
441 }
442 
443 /// Methods for handling entity references.
444 impl FuncTranslationState {
445     /// Get the `GlobalVariable` reference that should be used to access the global variable
446     /// `index`. Create the reference if necessary.
447     /// Also return the WebAssembly type of the global.
get_global<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<GlobalVariable>448     pub(crate) fn get_global<FE: FuncEnvironment + ?Sized>(
449         &mut self,
450         func: &mut ir::Function,
451         index: u32,
452         environ: &mut FE,
453     ) -> WasmResult<GlobalVariable> {
454         let index = GlobalIndex::from_u32(index);
455         match self.globals.entry(index) {
456             Occupied(entry) => Ok(*entry.get()),
457             Vacant(entry) => Ok(*entry.insert(environ.make_global(func, index)?)),
458         }
459     }
460 
461     /// Get the `Heap` reference that should be used to access linear memory `index`.
462     /// Create the reference if necessary.
get_heap<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<ir::Heap>463     pub(crate) fn get_heap<FE: FuncEnvironment + ?Sized>(
464         &mut self,
465         func: &mut ir::Function,
466         index: u32,
467         environ: &mut FE,
468     ) -> WasmResult<ir::Heap> {
469         let index = MemoryIndex::from_u32(index);
470         match self.heaps.entry(index) {
471             Occupied(entry) => Ok(*entry.get()),
472             Vacant(entry) => Ok(*entry.insert(environ.make_heap(func, index)?)),
473         }
474     }
475 
476     /// Get the `Table` reference that should be used to access table `index`.
477     /// Create the reference if necessary.
get_or_create_table<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<ir::Table>478     pub(crate) fn get_or_create_table<FE: FuncEnvironment + ?Sized>(
479         &mut self,
480         func: &mut ir::Function,
481         index: u32,
482         environ: &mut FE,
483     ) -> WasmResult<ir::Table> {
484         let index = TableIndex::from_u32(index);
485         match self.tables.entry(index) {
486             Occupied(entry) => Ok(*entry.get()),
487             Vacant(entry) => Ok(*entry.insert(environ.make_table(func, index)?)),
488         }
489     }
490 
491     /// Get the `SigRef` reference that should be used to make an indirect call with signature
492     /// `index`. Also return the number of WebAssembly arguments in the signature.
493     ///
494     /// Create the signature if necessary.
get_indirect_sig<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<(ir::SigRef, usize)>495     pub(crate) fn get_indirect_sig<FE: FuncEnvironment + ?Sized>(
496         &mut self,
497         func: &mut ir::Function,
498         index: u32,
499         environ: &mut FE,
500     ) -> WasmResult<(ir::SigRef, usize)> {
501         let index = TypeIndex::from_u32(index);
502         match self.signatures.entry(index) {
503             Occupied(entry) => Ok(*entry.get()),
504             Vacant(entry) => {
505                 let sig = environ.make_indirect_sig(func, index)?;
506                 Ok(*entry.insert((sig, num_wasm_parameters(environ, &func.dfg.signatures[sig]))))
507             }
508         }
509     }
510 
511     /// Get the `FuncRef` reference that should be used to make a direct call to function
512     /// `index`. Also return the number of WebAssembly arguments in the signature.
513     ///
514     /// Create the function reference if necessary.
get_direct_func<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<(ir::FuncRef, usize)>515     pub(crate) fn get_direct_func<FE: FuncEnvironment + ?Sized>(
516         &mut self,
517         func: &mut ir::Function,
518         index: u32,
519         environ: &mut FE,
520     ) -> WasmResult<(ir::FuncRef, usize)> {
521         let index = FuncIndex::from_u32(index);
522         match self.functions.entry(index) {
523             Occupied(entry) => Ok(*entry.get()),
524             Vacant(entry) => {
525                 let fref = environ.make_direct_func(func, index)?;
526                 let sig = func.dfg.ext_funcs[fref].signature;
527                 Ok(*entry.insert((
528                     fref,
529                     num_wasm_parameters(environ, &func.dfg.signatures[sig]),
530                 )))
531             }
532         }
533     }
534 }
535 
num_wasm_parameters<FE: FuncEnvironment + ?Sized>( environ: &FE, signature: &ir::Signature, ) -> usize536 fn num_wasm_parameters<FE: FuncEnvironment + ?Sized>(
537     environ: &FE,
538     signature: &ir::Signature,
539 ) -> usize {
540     (0..signature.params.len())
541         .filter(|index| environ.is_wasm_parameter(signature, *index))
542         .count()
543 }
544