1 //! WebAssembly module and function translation state.
2 //!
3 //! The `ModuleTranslationState` struct defined in this module is used to keep track of data about
4 //! the whole WebAssembly module, such as the decoded type signatures.
5 //!
6 //! The `FuncTranslationState` struct defined in this module is used to keep track of the WebAssembly
7 //! value and control stacks during the translation of a single function.
8 
9 use crate::environ::{FuncEnvironment, GlobalVariable, WasmResult};
10 use crate::translation_utils::{FuncIndex, GlobalIndex, MemoryIndex, SignatureIndex, TableIndex};
11 use crate::{HashMap, Occupied, Vacant};
12 use cranelift_codegen::ir::{self, Block, Inst, Value};
13 use std::vec::Vec;
14 
15 /// Information about the presence of an associated `else` for an `if`, or the
16 /// lack thereof.
17 #[derive(Debug)]
18 pub enum ElseData {
19     /// The `if` does not already have an `else` block.
20     ///
21     /// This doesn't mean that it will never have an `else`, just that we
22     /// haven't seen it yet.
23     NoElse {
24         /// If we discover that we need an `else` block, this is the jump
25         /// instruction that needs to be fixed up to point to the new `else`
26         /// block rather than the destination block after the `if...end`.
27         branch_inst: Inst,
28     },
29 
30     /// We have already allocated an `else` block.
31     ///
32     /// Usually we don't know whether we will hit an `if .. end` or an `if
33     /// .. else .. end`, but sometimes we can tell based on the block's type
34     /// signature that the signature is not valid if there isn't an `else`. In
35     /// these cases, we pre-allocate the `else` block.
36     WithElse {
37         /// This is the `else` block.
38         else_block: Block,
39     },
40 }
41 
42 /// A control stack frame can be an `if`, a `block` or a `loop`, each one having the following
43 /// fields:
44 ///
45 /// - `destination`: reference to the `Block` that will hold the code after the control block;
46 /// - `num_return_values`: number of values returned by the control block;
47 /// - `original_stack_size`: size of the value stack at the beginning of the control block.
48 ///
49 /// Moreover, the `if` frame has the `branch_inst` field that points to the `brz` instruction
50 /// separating the `true` and `false` branch. The `loop` frame has a `header` field that references
51 /// the `Block` that contains the beginning of the body of the loop.
52 #[derive(Debug)]
53 pub enum ControlStackFrame {
54     If {
55         destination: Block,
56         else_data: ElseData,
57         num_param_values: usize,
58         num_return_values: usize,
59         original_stack_size: usize,
60         exit_is_branched_to: bool,
61         blocktype: wasmparser::TypeOrFuncType,
62         /// Was the head of the `if` reachable?
63         head_is_reachable: bool,
64         /// What was the reachability at the end of the consequent?
65         ///
66         /// This is `None` until we're finished translating the consequent, and
67         /// is set to `Some` either by hitting an `else` when we will begin
68         /// translating the alternative, or by hitting an `end` in which case
69         /// there is no alternative.
70         consequent_ends_reachable: Option<bool>,
71         // Note: no need for `alternative_ends_reachable` because that is just
72         // `state.reachable` when we hit the `end` in the `if .. else .. end`.
73     },
74     Block {
75         destination: Block,
76         num_param_values: usize,
77         num_return_values: usize,
78         original_stack_size: usize,
79         exit_is_branched_to: bool,
80     },
81     Loop {
82         destination: Block,
83         header: Block,
84         num_param_values: usize,
85         num_return_values: usize,
86         original_stack_size: usize,
87     },
88 }
89 
90 /// Helper methods for the control stack objects.
91 impl ControlStackFrame {
num_return_values(&self) -> usize92     pub fn num_return_values(&self) -> usize {
93         match *self {
94             Self::If {
95                 num_return_values, ..
96             }
97             | Self::Block {
98                 num_return_values, ..
99             }
100             | Self::Loop {
101                 num_return_values, ..
102             } => num_return_values,
103         }
104     }
num_param_values(&self) -> usize105     pub fn num_param_values(&self) -> usize {
106         match *self {
107             Self::If {
108                 num_param_values, ..
109             }
110             | Self::Block {
111                 num_param_values, ..
112             }
113             | Self::Loop {
114                 num_param_values, ..
115             } => num_param_values,
116         }
117     }
following_code(&self) -> Block118     pub fn following_code(&self) -> Block {
119         match *self {
120             Self::If { destination, .. }
121             | Self::Block { destination, .. }
122             | Self::Loop { destination, .. } => destination,
123         }
124     }
br_destination(&self) -> Block125     pub fn br_destination(&self) -> Block {
126         match *self {
127             Self::If { destination, .. } | Self::Block { destination, .. } => destination,
128             Self::Loop { header, .. } => header,
129         }
130     }
original_stack_size(&self) -> usize131     pub fn original_stack_size(&self) -> usize {
132         match *self {
133             Self::If {
134                 original_stack_size,
135                 ..
136             }
137             | Self::Block {
138                 original_stack_size,
139                 ..
140             }
141             | Self::Loop {
142                 original_stack_size,
143                 ..
144             } => original_stack_size,
145         }
146     }
is_loop(&self) -> bool147     pub fn is_loop(&self) -> bool {
148         match *self {
149             Self::If { .. } | Self::Block { .. } => false,
150             Self::Loop { .. } => true,
151         }
152     }
153 
exit_is_branched_to(&self) -> bool154     pub fn exit_is_branched_to(&self) -> bool {
155         match *self {
156             Self::If {
157                 exit_is_branched_to,
158                 ..
159             }
160             | Self::Block {
161                 exit_is_branched_to,
162                 ..
163             } => exit_is_branched_to,
164             Self::Loop { .. } => false,
165         }
166     }
167 
set_branched_to_exit(&mut self)168     pub fn set_branched_to_exit(&mut self) {
169         match *self {
170             Self::If {
171                 ref mut exit_is_branched_to,
172                 ..
173             }
174             | Self::Block {
175                 ref mut exit_is_branched_to,
176                 ..
177             } => *exit_is_branched_to = true,
178             Self::Loop { .. } => {}
179         }
180     }
181 }
182 
183 /// Contains information passed along during a function's translation and that records:
184 ///
185 /// - The current value and control stacks.
186 /// - The depth of the two unreachable control blocks stacks, that are manipulated when translating
187 ///   unreachable code;
188 pub struct FuncTranslationState {
189     /// A stack of values corresponding to the active values in the input wasm function at this
190     /// point.
191     pub(crate) stack: Vec<Value>,
192     /// A stack of active control flow operations at this point in the input wasm function.
193     pub(crate) control_stack: Vec<ControlStackFrame>,
194     /// Is the current translation state still reachable? This is false when translating operators
195     /// like End, Return, or Unreachable.
196     pub(crate) reachable: bool,
197 
198     // Map of global variables that have already been created by `FuncEnvironment::make_global`.
199     globals: HashMap<GlobalIndex, GlobalVariable>,
200 
201     // Map of heaps that have been created by `FuncEnvironment::make_heap`.
202     heaps: HashMap<MemoryIndex, ir::Heap>,
203 
204     // Map of tables that have been created by `FuncEnvironment::make_table`.
205     tables: HashMap<TableIndex, ir::Table>,
206 
207     // Map of indirect call signatures that have been created by
208     // `FuncEnvironment::make_indirect_sig()`.
209     // Stores both the signature reference and the number of WebAssembly arguments
210     signatures: HashMap<SignatureIndex, (ir::SigRef, usize)>,
211 
212     // Imported and local functions that have been created by
213     // `FuncEnvironment::make_direct_func()`.
214     // Stores both the function reference and the number of WebAssembly arguments
215     functions: HashMap<FuncIndex, (ir::FuncRef, usize)>,
216 }
217 
218 // Public methods that are exposed to non-`cranelift_wasm` API consumers.
219 impl FuncTranslationState {
220     /// True if the current translation state expresses reachable code, false if it is unreachable.
221     #[inline]
reachable(&self) -> bool222     pub fn reachable(&self) -> bool {
223         self.reachable
224     }
225 }
226 
227 impl FuncTranslationState {
228     /// Construct a new, empty, `FuncTranslationState`
new() -> Self229     pub(crate) fn new() -> Self {
230         Self {
231             stack: Vec::new(),
232             control_stack: Vec::new(),
233             reachable: true,
234             globals: HashMap::new(),
235             heaps: HashMap::new(),
236             tables: HashMap::new(),
237             signatures: HashMap::new(),
238             functions: HashMap::new(),
239         }
240     }
241 
clear(&mut self)242     fn clear(&mut self) {
243         debug_assert!(self.stack.is_empty());
244         debug_assert!(self.control_stack.is_empty());
245         self.reachable = true;
246         self.globals.clear();
247         self.heaps.clear();
248         self.tables.clear();
249         self.signatures.clear();
250         self.functions.clear();
251     }
252 
253     /// Initialize the state for compiling a function with the given signature.
254     ///
255     /// This resets the state to containing only a single block representing the whole function.
256     /// The exit block is the last block in the function which will contain the return instruction.
initialize(&mut self, sig: &ir::Signature, exit_block: Block)257     pub(crate) fn initialize(&mut self, sig: &ir::Signature, exit_block: Block) {
258         self.clear();
259         self.push_block(
260             exit_block,
261             0,
262             sig.returns
263                 .iter()
264                 .filter(|arg| arg.purpose == ir::ArgumentPurpose::Normal)
265                 .count(),
266         );
267     }
268 
269     /// Push a value.
push1(&mut self, val: Value)270     pub(crate) fn push1(&mut self, val: Value) {
271         self.stack.push(val);
272     }
273 
274     /// Push multiple values.
pushn(&mut self, vals: &[Value])275     pub(crate) fn pushn(&mut self, vals: &[Value]) {
276         self.stack.extend_from_slice(vals);
277     }
278 
279     /// Pop one value.
pop1(&mut self) -> Value280     pub(crate) fn pop1(&mut self) -> Value {
281         self.stack
282             .pop()
283             .expect("attempted to pop a value from an empty stack")
284     }
285 
286     /// Peek at the top of the stack without popping it.
peek1(&self) -> Value287     pub(crate) fn peek1(&self) -> Value {
288         *self
289             .stack
290             .last()
291             .expect("attempted to peek at a value on an empty stack")
292     }
293 
294     /// Pop two values. Return them in the order they were pushed.
pop2(&mut self) -> (Value, Value)295     pub(crate) fn pop2(&mut self) -> (Value, Value) {
296         let v2 = self.stack.pop().unwrap();
297         let v1 = self.stack.pop().unwrap();
298         (v1, v2)
299     }
300 
301     /// Pop three values. Return them in the order they were pushed.
pop3(&mut self) -> (Value, Value, Value)302     pub(crate) fn pop3(&mut self) -> (Value, Value, Value) {
303         let v3 = self.stack.pop().unwrap();
304         let v2 = self.stack.pop().unwrap();
305         let v1 = self.stack.pop().unwrap();
306         (v1, v2, v3)
307     }
308 
309     /// Helper to ensure the the stack size is at least as big as `n`; note that due to
310     /// `debug_assert` this will not execute in non-optimized builds.
311     #[inline]
ensure_length_is_at_least(&self, n: usize)312     fn ensure_length_is_at_least(&self, n: usize) {
313         debug_assert!(
314             n <= self.stack.len(),
315             "attempted to access {} values but stack only has {} values",
316             n,
317             self.stack.len()
318         )
319     }
320 
321     /// Pop the top `n` values on the stack.
322     ///
323     /// The popped values are not returned. Use `peekn` to look at them before popping.
popn(&mut self, n: usize)324     pub(crate) fn popn(&mut self, n: usize) {
325         self.ensure_length_is_at_least(n);
326         let new_len = self.stack.len() - n;
327         self.stack.truncate(new_len);
328     }
329 
330     /// Peek at the top `n` values on the stack in the order they were pushed.
peekn(&self, n: usize) -> &[Value]331     pub(crate) fn peekn(&self, n: usize) -> &[Value] {
332         self.ensure_length_is_at_least(n);
333         &self.stack[self.stack.len() - n..]
334     }
335 
336     /// Peek at the top `n` values on the stack in the order they were pushed.
peekn_mut(&mut self, n: usize) -> &mut [Value]337     pub(crate) fn peekn_mut(&mut self, n: usize) -> &mut [Value] {
338         self.ensure_length_is_at_least(n);
339         let len = self.stack.len();
340         &mut self.stack[len - n..]
341     }
342 
343     /// Push a block on the control stack.
push_block( &mut self, following_code: Block, num_param_types: usize, num_result_types: usize, )344     pub(crate) fn push_block(
345         &mut self,
346         following_code: Block,
347         num_param_types: usize,
348         num_result_types: usize,
349     ) {
350         debug_assert!(num_param_types <= self.stack.len());
351         self.control_stack.push(ControlStackFrame::Block {
352             destination: following_code,
353             original_stack_size: self.stack.len() - num_param_types,
354             num_param_values: num_param_types,
355             num_return_values: num_result_types,
356             exit_is_branched_to: false,
357         });
358     }
359 
360     /// Push a loop on the control stack.
push_loop( &mut self, header: Block, following_code: Block, num_param_types: usize, num_result_types: usize, )361     pub(crate) fn push_loop(
362         &mut self,
363         header: Block,
364         following_code: Block,
365         num_param_types: usize,
366         num_result_types: usize,
367     ) {
368         debug_assert!(num_param_types <= self.stack.len());
369         self.control_stack.push(ControlStackFrame::Loop {
370             header,
371             destination: following_code,
372             original_stack_size: self.stack.len() - num_param_types,
373             num_param_values: num_param_types,
374             num_return_values: num_result_types,
375         });
376     }
377 
378     /// Push an if on the control stack.
push_if( &mut self, destination: Block, else_data: ElseData, num_param_types: usize, num_result_types: usize, blocktype: wasmparser::TypeOrFuncType, )379     pub(crate) fn push_if(
380         &mut self,
381         destination: Block,
382         else_data: ElseData,
383         num_param_types: usize,
384         num_result_types: usize,
385         blocktype: wasmparser::TypeOrFuncType,
386     ) {
387         debug_assert!(num_param_types <= self.stack.len());
388 
389         // Push a second copy of our `if`'s parameters on the stack. This lets
390         // us avoid saving them on the side in the `ControlStackFrame` for our
391         // `else` block (if it exists), which would require a second heap
392         // allocation. See also the comment in `translate_operator` for
393         // `Operator::Else`.
394         self.stack.reserve(num_param_types);
395         for i in (self.stack.len() - num_param_types)..self.stack.len() {
396             let val = self.stack[i];
397             self.stack.push(val);
398         }
399 
400         self.control_stack.push(ControlStackFrame::If {
401             destination,
402             else_data,
403             original_stack_size: self.stack.len() - num_param_types,
404             num_param_values: num_param_types,
405             num_return_values: num_result_types,
406             exit_is_branched_to: false,
407             head_is_reachable: self.reachable,
408             consequent_ends_reachable: None,
409             blocktype,
410         });
411     }
412 }
413 
414 /// Methods for handling entity references.
415 impl FuncTranslationState {
416     /// Get the `GlobalVariable` reference that should be used to access the global variable
417     /// `index`. Create the reference if necessary.
418     /// Also return the WebAssembly type of the global.
get_global<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<GlobalVariable>419     pub(crate) fn get_global<FE: FuncEnvironment + ?Sized>(
420         &mut self,
421         func: &mut ir::Function,
422         index: u32,
423         environ: &mut FE,
424     ) -> WasmResult<GlobalVariable> {
425         let index = GlobalIndex::from_u32(index);
426         match self.globals.entry(index) {
427             Occupied(entry) => Ok(*entry.get()),
428             Vacant(entry) => Ok(*entry.insert(environ.make_global(func, index)?)),
429         }
430     }
431 
432     /// Get the `Heap` reference that should be used to access linear memory `index`.
433     /// Create the reference if necessary.
get_heap<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<ir::Heap>434     pub(crate) fn get_heap<FE: FuncEnvironment + ?Sized>(
435         &mut self,
436         func: &mut ir::Function,
437         index: u32,
438         environ: &mut FE,
439     ) -> WasmResult<ir::Heap> {
440         let index = MemoryIndex::from_u32(index);
441         match self.heaps.entry(index) {
442             Occupied(entry) => Ok(*entry.get()),
443             Vacant(entry) => Ok(*entry.insert(environ.make_heap(func, index)?)),
444         }
445     }
446 
447     /// Get the `Table` reference that should be used to access table `index`.
448     /// Create the reference if necessary.
get_table<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<ir::Table>449     pub(crate) fn get_table<FE: FuncEnvironment + ?Sized>(
450         &mut self,
451         func: &mut ir::Function,
452         index: u32,
453         environ: &mut FE,
454     ) -> WasmResult<ir::Table> {
455         let index = TableIndex::from_u32(index);
456         match self.tables.entry(index) {
457             Occupied(entry) => Ok(*entry.get()),
458             Vacant(entry) => Ok(*entry.insert(environ.make_table(func, index)?)),
459         }
460     }
461 
462     /// Get the `SigRef` reference that should be used to make an indirect call with signature
463     /// `index`. Also return the number of WebAssembly arguments in the signature.
464     ///
465     /// Create the signature if necessary.
get_indirect_sig<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<(ir::SigRef, usize)>466     pub(crate) fn get_indirect_sig<FE: FuncEnvironment + ?Sized>(
467         &mut self,
468         func: &mut ir::Function,
469         index: u32,
470         environ: &mut FE,
471     ) -> WasmResult<(ir::SigRef, usize)> {
472         let index = SignatureIndex::from_u32(index);
473         match self.signatures.entry(index) {
474             Occupied(entry) => Ok(*entry.get()),
475             Vacant(entry) => {
476                 let sig = environ.make_indirect_sig(func, index)?;
477                 Ok(*entry.insert((sig, num_wasm_parameters(environ, &func.dfg.signatures[sig]))))
478             }
479         }
480     }
481 
482     /// Get the `FuncRef` reference that should be used to make a direct call to function
483     /// `index`. Also return the number of WebAssembly arguments in the signature.
484     ///
485     /// Create the function reference if necessary.
get_direct_func<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<(ir::FuncRef, usize)>486     pub(crate) fn get_direct_func<FE: FuncEnvironment + ?Sized>(
487         &mut self,
488         func: &mut ir::Function,
489         index: u32,
490         environ: &mut FE,
491     ) -> WasmResult<(ir::FuncRef, usize)> {
492         let index = FuncIndex::from_u32(index);
493         match self.functions.entry(index) {
494             Occupied(entry) => Ok(*entry.get()),
495             Vacant(entry) => {
496                 let fref = environ.make_direct_func(func, index)?;
497                 let sig = func.dfg.ext_funcs[fref].signature;
498                 Ok(*entry.insert((
499                     fref,
500                     num_wasm_parameters(environ, &func.dfg.signatures[sig]),
501                 )))
502             }
503         }
504     }
505 }
506 
num_wasm_parameters<FE: FuncEnvironment + ?Sized>( environ: &FE, signature: &ir::Signature, ) -> usize507 fn num_wasm_parameters<FE: FuncEnvironment + ?Sized>(
508     environ: &FE,
509     signature: &ir::Signature,
510 ) -> usize {
511     (0..signature.params.len())
512         .filter(|index| environ.is_wasm_parameter(signature, *index))
513         .count()
514 }
515