1 //! WebAssembly module and function translation state.
2 //!
3 //! The `ModuleTranslationState` struct defined in this module is used to keep track of data about
4 //! the whole WebAssembly module, such as the decoded type signatures.
5 //!
6 //! The `FuncTranslationState` struct defined in this module is used to keep track of the WebAssembly
7 //! value and control stacks during the translation of a single function.
8
9 use crate::environ::{FuncEnvironment, GlobalVariable, WasmResult};
10 use crate::translation_utils::{FuncIndex, GlobalIndex, MemoryIndex, TableIndex, TypeIndex};
11 use crate::{HashMap, Occupied, Vacant};
12 use cranelift_codegen::ir::{self, Block, Inst, Value};
13 use std::vec::Vec;
14
15 /// Information about the presence of an associated `else` for an `if`, or the
16 /// lack thereof.
17 #[derive(Debug)]
18 pub enum ElseData {
19 /// The `if` does not already have an `else` block.
20 ///
21 /// This doesn't mean that it will never have an `else`, just that we
22 /// haven't seen it yet.
23 NoElse {
24 /// If we discover that we need an `else` block, this is the jump
25 /// instruction that needs to be fixed up to point to the new `else`
26 /// block rather than the destination block after the `if...end`.
27 branch_inst: Inst,
28 },
29
30 /// We have already allocated an `else` block.
31 ///
32 /// Usually we don't know whether we will hit an `if .. end` or an `if
33 /// .. else .. end`, but sometimes we can tell based on the block's type
34 /// signature that the signature is not valid if there isn't an `else`. In
35 /// these cases, we pre-allocate the `else` block.
36 WithElse {
37 /// This is the `else` block.
38 else_block: Block,
39 },
40 }
41
42 /// A control stack frame can be an `if`, a `block` or a `loop`, each one having the following
43 /// fields:
44 ///
45 /// - `destination`: reference to the `Block` that will hold the code after the control block;
46 /// - `num_return_values`: number of values returned by the control block;
47 /// - `original_stack_size`: size of the value stack at the beginning of the control block.
48 ///
49 /// Moreover, the `if` frame has the `branch_inst` field that points to the `brz` instruction
50 /// separating the `true` and `false` branch. The `loop` frame has a `header` field that references
51 /// the `Block` that contains the beginning of the body of the loop.
52 #[derive(Debug)]
53 pub enum ControlStackFrame {
54 If {
55 destination: Block,
56 else_data: ElseData,
57 num_param_values: usize,
58 num_return_values: usize,
59 original_stack_size: usize,
60 exit_is_branched_to: bool,
61 blocktype: wasmparser::TypeOrFuncType,
62 /// Was the head of the `if` reachable?
63 head_is_reachable: bool,
64 /// What was the reachability at the end of the consequent?
65 ///
66 /// This is `None` until we're finished translating the consequent, and
67 /// is set to `Some` either by hitting an `else` when we will begin
68 /// translating the alternative, or by hitting an `end` in which case
69 /// there is no alternative.
70 consequent_ends_reachable: Option<bool>,
71 // Note: no need for `alternative_ends_reachable` because that is just
72 // `state.reachable` when we hit the `end` in the `if .. else .. end`.
73 },
74 Block {
75 destination: Block,
76 num_param_values: usize,
77 num_return_values: usize,
78 original_stack_size: usize,
79 exit_is_branched_to: bool,
80 },
81 Loop {
82 destination: Block,
83 header: Block,
84 num_param_values: usize,
85 num_return_values: usize,
86 original_stack_size: usize,
87 },
88 }
89
90 /// Helper methods for the control stack objects.
91 impl ControlStackFrame {
num_return_values(&self) -> usize92 pub fn num_return_values(&self) -> usize {
93 match *self {
94 Self::If {
95 num_return_values, ..
96 }
97 | Self::Block {
98 num_return_values, ..
99 }
100 | Self::Loop {
101 num_return_values, ..
102 } => num_return_values,
103 }
104 }
num_param_values(&self) -> usize105 pub fn num_param_values(&self) -> usize {
106 match *self {
107 Self::If {
108 num_param_values, ..
109 }
110 | Self::Block {
111 num_param_values, ..
112 }
113 | Self::Loop {
114 num_param_values, ..
115 } => num_param_values,
116 }
117 }
following_code(&self) -> Block118 pub fn following_code(&self) -> Block {
119 match *self {
120 Self::If { destination, .. }
121 | Self::Block { destination, .. }
122 | Self::Loop { destination, .. } => destination,
123 }
124 }
br_destination(&self) -> Block125 pub fn br_destination(&self) -> Block {
126 match *self {
127 Self::If { destination, .. } | Self::Block { destination, .. } => destination,
128 Self::Loop { header, .. } => header,
129 }
130 }
131 /// Private helper. Use `truncate_value_stack_to_else_params()` or
132 /// `truncate_value_stack_to_original_size()` to restore value-stack state.
original_stack_size(&self) -> usize133 fn original_stack_size(&self) -> usize {
134 match *self {
135 Self::If {
136 original_stack_size,
137 ..
138 }
139 | Self::Block {
140 original_stack_size,
141 ..
142 }
143 | Self::Loop {
144 original_stack_size,
145 ..
146 } => original_stack_size,
147 }
148 }
is_loop(&self) -> bool149 pub fn is_loop(&self) -> bool {
150 match *self {
151 Self::If { .. } | Self::Block { .. } => false,
152 Self::Loop { .. } => true,
153 }
154 }
155
exit_is_branched_to(&self) -> bool156 pub fn exit_is_branched_to(&self) -> bool {
157 match *self {
158 Self::If {
159 exit_is_branched_to,
160 ..
161 }
162 | Self::Block {
163 exit_is_branched_to,
164 ..
165 } => exit_is_branched_to,
166 Self::Loop { .. } => false,
167 }
168 }
169
set_branched_to_exit(&mut self)170 pub fn set_branched_to_exit(&mut self) {
171 match *self {
172 Self::If {
173 ref mut exit_is_branched_to,
174 ..
175 }
176 | Self::Block {
177 ref mut exit_is_branched_to,
178 ..
179 } => *exit_is_branched_to = true,
180 Self::Loop { .. } => {}
181 }
182 }
183
184 /// Pop values from the value stack so that it is left at the
185 /// input-parameters to an else-block.
truncate_value_stack_to_else_params(&self, stack: &mut Vec<Value>)186 pub fn truncate_value_stack_to_else_params(&self, stack: &mut Vec<Value>) {
187 debug_assert!(matches!(self, &ControlStackFrame::If { .. }));
188 stack.truncate(self.original_stack_size());
189 }
190
191 /// Pop values from the value stack so that it is left at the state it was
192 /// before this control-flow frame.
truncate_value_stack_to_original_size(&self, stack: &mut Vec<Value>)193 pub fn truncate_value_stack_to_original_size(&self, stack: &mut Vec<Value>) {
194 // The "If" frame pushes its parameters twice, so they're available to the else block
195 // (see also `FuncTranslationState::push_if`).
196 // Yet, the original_stack_size member accounts for them only once, so that the else
197 // block can see the same number of parameters as the consequent block. As a matter of
198 // fact, we need to substract an extra number of parameter values for if blocks.
199 let num_duplicated_params = match self {
200 &ControlStackFrame::If {
201 num_param_values, ..
202 } => {
203 debug_assert!(num_param_values <= self.original_stack_size());
204 num_param_values
205 }
206 _ => 0,
207 };
208 stack.truncate(self.original_stack_size() - num_duplicated_params);
209 }
210 }
211
212 /// Contains information passed along during a function's translation and that records:
213 ///
214 /// - The current value and control stacks.
215 /// - The depth of the two unreachable control blocks stacks, that are manipulated when translating
216 /// unreachable code;
217 pub struct FuncTranslationState {
218 /// A stack of values corresponding to the active values in the input wasm function at this
219 /// point.
220 pub(crate) stack: Vec<Value>,
221 /// A stack of active control flow operations at this point in the input wasm function.
222 pub(crate) control_stack: Vec<ControlStackFrame>,
223 /// Is the current translation state still reachable? This is false when translating operators
224 /// like End, Return, or Unreachable.
225 pub(crate) reachable: bool,
226
227 // Map of global variables that have already been created by `FuncEnvironment::make_global`.
228 globals: HashMap<GlobalIndex, GlobalVariable>,
229
230 // Map of heaps that have been created by `FuncEnvironment::make_heap`.
231 heaps: HashMap<MemoryIndex, ir::Heap>,
232
233 // Map of tables that have been created by `FuncEnvironment::make_table`.
234 pub(crate) tables: HashMap<TableIndex, ir::Table>,
235
236 // Map of indirect call signatures that have been created by
237 // `FuncEnvironment::make_indirect_sig()`.
238 // Stores both the signature reference and the number of WebAssembly arguments
239 signatures: HashMap<TypeIndex, (ir::SigRef, usize)>,
240
241 // Imported and local functions that have been created by
242 // `FuncEnvironment::make_direct_func()`.
243 // Stores both the function reference and the number of WebAssembly arguments
244 functions: HashMap<FuncIndex, (ir::FuncRef, usize)>,
245 }
246
247 // Public methods that are exposed to non-`cranelift_wasm` API consumers.
248 impl FuncTranslationState {
249 /// True if the current translation state expresses reachable code, false if it is unreachable.
250 #[inline]
reachable(&self) -> bool251 pub fn reachable(&self) -> bool {
252 self.reachable
253 }
254 }
255
256 impl FuncTranslationState {
257 /// Construct a new, empty, `FuncTranslationState`
new() -> Self258 pub(crate) fn new() -> Self {
259 Self {
260 stack: Vec::new(),
261 control_stack: Vec::new(),
262 reachable: true,
263 globals: HashMap::new(),
264 heaps: HashMap::new(),
265 tables: HashMap::new(),
266 signatures: HashMap::new(),
267 functions: HashMap::new(),
268 }
269 }
270
clear(&mut self)271 fn clear(&mut self) {
272 debug_assert!(self.stack.is_empty());
273 debug_assert!(self.control_stack.is_empty());
274 self.reachable = true;
275 self.globals.clear();
276 self.heaps.clear();
277 self.tables.clear();
278 self.signatures.clear();
279 self.functions.clear();
280 }
281
282 /// Initialize the state for compiling a function with the given signature.
283 ///
284 /// This resets the state to containing only a single block representing the whole function.
285 /// The exit block is the last block in the function which will contain the return instruction.
initialize(&mut self, sig: &ir::Signature, exit_block: Block)286 pub(crate) fn initialize(&mut self, sig: &ir::Signature, exit_block: Block) {
287 self.clear();
288 self.push_block(
289 exit_block,
290 0,
291 sig.returns
292 .iter()
293 .filter(|arg| arg.purpose == ir::ArgumentPurpose::Normal)
294 .count(),
295 );
296 }
297
298 /// Push a value.
push1(&mut self, val: Value)299 pub(crate) fn push1(&mut self, val: Value) {
300 self.stack.push(val);
301 }
302
303 /// Push multiple values.
pushn(&mut self, vals: &[Value])304 pub(crate) fn pushn(&mut self, vals: &[Value]) {
305 self.stack.extend_from_slice(vals);
306 }
307
308 /// Pop one value.
pop1(&mut self) -> Value309 pub(crate) fn pop1(&mut self) -> Value {
310 self.stack
311 .pop()
312 .expect("attempted to pop a value from an empty stack")
313 }
314
315 /// Peek at the top of the stack without popping it.
peek1(&self) -> Value316 pub(crate) fn peek1(&self) -> Value {
317 *self
318 .stack
319 .last()
320 .expect("attempted to peek at a value on an empty stack")
321 }
322
323 /// Pop two values. Return them in the order they were pushed.
pop2(&mut self) -> (Value, Value)324 pub(crate) fn pop2(&mut self) -> (Value, Value) {
325 let v2 = self.stack.pop().unwrap();
326 let v1 = self.stack.pop().unwrap();
327 (v1, v2)
328 }
329
330 /// Pop three values. Return them in the order they were pushed.
pop3(&mut self) -> (Value, Value, Value)331 pub(crate) fn pop3(&mut self) -> (Value, Value, Value) {
332 let v3 = self.stack.pop().unwrap();
333 let v2 = self.stack.pop().unwrap();
334 let v1 = self.stack.pop().unwrap();
335 (v1, v2, v3)
336 }
337
338 /// Helper to ensure the the stack size is at least as big as `n`; note that due to
339 /// `debug_assert` this will not execute in non-optimized builds.
340 #[inline]
ensure_length_is_at_least(&self, n: usize)341 fn ensure_length_is_at_least(&self, n: usize) {
342 debug_assert!(
343 n <= self.stack.len(),
344 "attempted to access {} values but stack only has {} values",
345 n,
346 self.stack.len()
347 )
348 }
349
350 /// Pop the top `n` values on the stack.
351 ///
352 /// The popped values are not returned. Use `peekn` to look at them before popping.
popn(&mut self, n: usize)353 pub(crate) fn popn(&mut self, n: usize) {
354 self.ensure_length_is_at_least(n);
355 let new_len = self.stack.len() - n;
356 self.stack.truncate(new_len);
357 }
358
359 /// Peek at the top `n` values on the stack in the order they were pushed.
peekn(&self, n: usize) -> &[Value]360 pub(crate) fn peekn(&self, n: usize) -> &[Value] {
361 self.ensure_length_is_at_least(n);
362 &self.stack[self.stack.len() - n..]
363 }
364
365 /// Peek at the top `n` values on the stack in the order they were pushed.
peekn_mut(&mut self, n: usize) -> &mut [Value]366 pub(crate) fn peekn_mut(&mut self, n: usize) -> &mut [Value] {
367 self.ensure_length_is_at_least(n);
368 let len = self.stack.len();
369 &mut self.stack[len - n..]
370 }
371
372 /// Push a block on the control stack.
push_block( &mut self, following_code: Block, num_param_types: usize, num_result_types: usize, )373 pub(crate) fn push_block(
374 &mut self,
375 following_code: Block,
376 num_param_types: usize,
377 num_result_types: usize,
378 ) {
379 debug_assert!(num_param_types <= self.stack.len());
380 self.control_stack.push(ControlStackFrame::Block {
381 destination: following_code,
382 original_stack_size: self.stack.len() - num_param_types,
383 num_param_values: num_param_types,
384 num_return_values: num_result_types,
385 exit_is_branched_to: false,
386 });
387 }
388
389 /// Push a loop on the control stack.
push_loop( &mut self, header: Block, following_code: Block, num_param_types: usize, num_result_types: usize, )390 pub(crate) fn push_loop(
391 &mut self,
392 header: Block,
393 following_code: Block,
394 num_param_types: usize,
395 num_result_types: usize,
396 ) {
397 debug_assert!(num_param_types <= self.stack.len());
398 self.control_stack.push(ControlStackFrame::Loop {
399 header,
400 destination: following_code,
401 original_stack_size: self.stack.len() - num_param_types,
402 num_param_values: num_param_types,
403 num_return_values: num_result_types,
404 });
405 }
406
407 /// Push an if on the control stack.
push_if( &mut self, destination: Block, else_data: ElseData, num_param_types: usize, num_result_types: usize, blocktype: wasmparser::TypeOrFuncType, )408 pub(crate) fn push_if(
409 &mut self,
410 destination: Block,
411 else_data: ElseData,
412 num_param_types: usize,
413 num_result_types: usize,
414 blocktype: wasmparser::TypeOrFuncType,
415 ) {
416 debug_assert!(num_param_types <= self.stack.len());
417
418 // Push a second copy of our `if`'s parameters on the stack. This lets
419 // us avoid saving them on the side in the `ControlStackFrame` for our
420 // `else` block (if it exists), which would require a second heap
421 // allocation. See also the comment in `translate_operator` for
422 // `Operator::Else`.
423 self.stack.reserve(num_param_types);
424 for i in (self.stack.len() - num_param_types)..self.stack.len() {
425 let val = self.stack[i];
426 self.stack.push(val);
427 }
428
429 self.control_stack.push(ControlStackFrame::If {
430 destination,
431 else_data,
432 original_stack_size: self.stack.len() - num_param_types,
433 num_param_values: num_param_types,
434 num_return_values: num_result_types,
435 exit_is_branched_to: false,
436 head_is_reachable: self.reachable,
437 consequent_ends_reachable: None,
438 blocktype,
439 });
440 }
441 }
442
443 /// Methods for handling entity references.
444 impl FuncTranslationState {
445 /// Get the `GlobalVariable` reference that should be used to access the global variable
446 /// `index`. Create the reference if necessary.
447 /// Also return the WebAssembly type of the global.
get_global<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<GlobalVariable>448 pub(crate) fn get_global<FE: FuncEnvironment + ?Sized>(
449 &mut self,
450 func: &mut ir::Function,
451 index: u32,
452 environ: &mut FE,
453 ) -> WasmResult<GlobalVariable> {
454 let index = GlobalIndex::from_u32(index);
455 match self.globals.entry(index) {
456 Occupied(entry) => Ok(*entry.get()),
457 Vacant(entry) => Ok(*entry.insert(environ.make_global(func, index)?)),
458 }
459 }
460
461 /// Get the `Heap` reference that should be used to access linear memory `index`.
462 /// Create the reference if necessary.
get_heap<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<ir::Heap>463 pub(crate) fn get_heap<FE: FuncEnvironment + ?Sized>(
464 &mut self,
465 func: &mut ir::Function,
466 index: u32,
467 environ: &mut FE,
468 ) -> WasmResult<ir::Heap> {
469 let index = MemoryIndex::from_u32(index);
470 match self.heaps.entry(index) {
471 Occupied(entry) => Ok(*entry.get()),
472 Vacant(entry) => Ok(*entry.insert(environ.make_heap(func, index)?)),
473 }
474 }
475
476 /// Get the `Table` reference that should be used to access table `index`.
477 /// Create the reference if necessary.
get_or_create_table<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<ir::Table>478 pub(crate) fn get_or_create_table<FE: FuncEnvironment + ?Sized>(
479 &mut self,
480 func: &mut ir::Function,
481 index: u32,
482 environ: &mut FE,
483 ) -> WasmResult<ir::Table> {
484 let index = TableIndex::from_u32(index);
485 match self.tables.entry(index) {
486 Occupied(entry) => Ok(*entry.get()),
487 Vacant(entry) => Ok(*entry.insert(environ.make_table(func, index)?)),
488 }
489 }
490
491 /// Get the `SigRef` reference that should be used to make an indirect call with signature
492 /// `index`. Also return the number of WebAssembly arguments in the signature.
493 ///
494 /// Create the signature if necessary.
get_indirect_sig<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<(ir::SigRef, usize)>495 pub(crate) fn get_indirect_sig<FE: FuncEnvironment + ?Sized>(
496 &mut self,
497 func: &mut ir::Function,
498 index: u32,
499 environ: &mut FE,
500 ) -> WasmResult<(ir::SigRef, usize)> {
501 let index = TypeIndex::from_u32(index);
502 match self.signatures.entry(index) {
503 Occupied(entry) => Ok(*entry.get()),
504 Vacant(entry) => {
505 let sig = environ.make_indirect_sig(func, index)?;
506 Ok(*entry.insert((sig, num_wasm_parameters(environ, &func.dfg.signatures[sig]))))
507 }
508 }
509 }
510
511 /// Get the `FuncRef` reference that should be used to make a direct call to function
512 /// `index`. Also return the number of WebAssembly arguments in the signature.
513 ///
514 /// Create the function reference if necessary.
get_direct_func<FE: FuncEnvironment + ?Sized>( &mut self, func: &mut ir::Function, index: u32, environ: &mut FE, ) -> WasmResult<(ir::FuncRef, usize)>515 pub(crate) fn get_direct_func<FE: FuncEnvironment + ?Sized>(
516 &mut self,
517 func: &mut ir::Function,
518 index: u32,
519 environ: &mut FE,
520 ) -> WasmResult<(ir::FuncRef, usize)> {
521 let index = FuncIndex::from_u32(index);
522 match self.functions.entry(index) {
523 Occupied(entry) => Ok(*entry.get()),
524 Vacant(entry) => {
525 let fref = environ.make_direct_func(func, index)?;
526 let sig = func.dfg.ext_funcs[fref].signature;
527 Ok(*entry.insert((
528 fref,
529 num_wasm_parameters(environ, &func.dfg.signatures[sig]),
530 )))
531 }
532 }
533 }
534 }
535
num_wasm_parameters<FE: FuncEnvironment + ?Sized>( environ: &FE, signature: &ir::Signature, ) -> usize536 fn num_wasm_parameters<FE: FuncEnvironment + ?Sized>(
537 environ: &FE,
538 signature: &ir::Signature,
539 ) -> usize {
540 (0..signature.params.len())
541 .filter(|index| environ.is_wasm_parameter(signature, *index))
542 .count()
543 }
544