1 #![allow(clippy::float_cmp)]
2 
3 use self::registers::*;
4 use crate::error::Error;
5 use crate::microwasm::{BrTarget, Ieee32, Ieee64, SignlessType, Type, Value, F32, F64, I32, I64};
6 use crate::module::ModuleContext;
7 use cranelift_codegen::{
8     binemit,
9     ir::{self, SourceLoc, TrapCode},
10 };
11 use dynasm::dynasm;
12 use dynasmrt::x64::Assembler;
13 use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer};
14 use std::{
15     cmp::Ordering,
16     convert::{TryFrom, TryInto},
17     fmt::Display,
18     hash::Hash,
19     iter, mem,
20     ops::{Deref, RangeInclusive},
21 };
22 // use wasmtime_environ::BuiltinFunctionIndex;
23 
24 mod magic {
25     /// An index type for builtin functions.
26     pub struct BuiltinFunctionIndex(u32);
27 
28     impl BuiltinFunctionIndex {
29         /// Returns an index for wasm's `memory.grow` builtin function.
get_memory32_grow_index() -> Self30         pub const fn get_memory32_grow_index() -> Self {
31             Self(0)
32         }
33         /// Returns an index for wasm's imported `memory.grow` builtin function.
get_imported_memory32_grow_index() -> Self34         pub const fn get_imported_memory32_grow_index() -> Self {
35             Self(1)
36         }
37         /// Returns an index for wasm's `memory.size` builtin function.
get_memory32_size_index() -> Self38         pub const fn get_memory32_size_index() -> Self {
39             Self(2)
40         }
41         /// Returns an index for wasm's imported `memory.size` builtin function.
get_imported_memory32_size_index() -> Self42         pub const fn get_imported_memory32_size_index() -> Self {
43             Self(3)
44         }
45 
46         /// Return the index as an u32 number.
index(&self) -> u3247         pub const fn index(&self) -> u32 {
48             self.0
49         }
50     }
51 }
52 
53 use magic::BuiltinFunctionIndex;
54 
55 /// Size of a pointer on the target in bytes.
56 const WORD_SIZE: u32 = 8;
57 
58 type RegId = u8;
59 
60 #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
61 pub enum GPR {
62     Rq(RegId),
63     Rx(RegId),
64 }
65 
66 #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
67 pub enum GPRType {
68     Rq,
69     Rx,
70 }
71 
72 impl From<SignlessType> for GPRType {
from(other: SignlessType) -> GPRType73     fn from(other: SignlessType) -> GPRType {
74         match other {
75             I32 | I64 => GPRType::Rq,
76             F32 | F64 => GPRType::Rx,
77         }
78     }
79 }
80 
81 impl From<SignlessType> for Option<GPRType> {
from(other: SignlessType) -> Self82     fn from(other: SignlessType) -> Self {
83         Some(other.into())
84     }
85 }
86 
87 impl GPR {
type_(self) -> GPRType88     fn type_(self) -> GPRType {
89         match self {
90             GPR::Rq(_) => GPRType::Rq,
91             GPR::Rx(_) => GPRType::Rx,
92         }
93     }
94 
rq(self) -> Option<RegId>95     fn rq(self) -> Option<RegId> {
96         match self {
97             GPR::Rq(r) => Some(r),
98             GPR::Rx(_) => None,
99         }
100     }
101 
rx(self) -> Option<RegId>102     fn rx(self) -> Option<RegId> {
103         match self {
104             GPR::Rx(r) => Some(r),
105             GPR::Rq(_) => None,
106         }
107     }
108 }
109 
arg_locs<I: IntoIterator<Item = SignlessType>>( types: I, ) -> impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone where I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,110 fn arg_locs<I: IntoIterator<Item = SignlessType>>(
111     types: I,
112 ) -> impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone
113 where
114     I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
115 {
116     // TODO: VmCtx is in the first register
117     let mut int_gpr_iter = INTEGER_ARGS_IN_GPRS.iter();
118     let mut float_gpr_iter = FLOAT_ARGS_IN_GPRS.iter();
119     let mut stack_idx = 0;
120 
121     types
122         .into_iter()
123         .map(move |ty| {
124             match ty {
125                 I32 | I64 => int_gpr_iter.next(),
126                 F32 | F64 => float_gpr_iter.next(),
127             }
128             .map(|&r| CCLoc::Reg(r))
129             .unwrap_or_else(|| {
130                 let out = CCLoc::Stack(stack_idx);
131                 stack_idx += 1;
132                 out
133             })
134         })
135         // Since we only advance the iterators based on the values in `types`,
136         // we can't do this lazily.
137         .collect::<Vec<_>>()
138         .into_iter()
139 }
140 
arg_locs_skip_caller_vmctx<I: IntoIterator<Item = SignlessType>>( types: I, ) -> impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone where I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,141 fn arg_locs_skip_caller_vmctx<I: IntoIterator<Item = SignlessType>>(
142     types: I,
143 ) -> impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone
144 where
145     I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
146 {
147     #[derive(Debug, Clone)]
148     struct WithInt<I> {
149         caller_vmctx_ty: Option<SignlessType>,
150         iter: I,
151     }
152 
153     impl<I> Iterator for WithInt<I>
154     where
155         I: Iterator<Item = SignlessType>,
156     {
157         type Item = SignlessType;
158 
159         fn next(&mut self) -> Option<Self::Item> {
160             self.caller_vmctx_ty.take().or_else(|| self.iter.next())
161         }
162 
163         fn size_hint(&self) -> (usize, Option<usize>) {
164             let ty_len = if self.caller_vmctx_ty.is_some() { 1 } else { 0 };
165             let (lower, upper) = self.iter.size_hint();
166 
167             (lower + ty_len, upper.map(|u| u + ty_len))
168         }
169     }
170 
171     impl<I> DoubleEndedIterator for WithInt<I>
172     where
173         I: DoubleEndedIterator<Item = SignlessType>,
174     {
175         fn next_back(&mut self) -> Option<Self::Item> {
176             self.iter
177                 .next_back()
178                 .or_else(|| self.caller_vmctx_ty.take())
179         }
180     }
181 
182     impl<I> ExactSizeIterator for WithInt<I> where I: ExactSizeIterator<Item = SignlessType> {}
183 
184     arg_locs(WithInt {
185         caller_vmctx_ty: Some(I32),
186         iter: types.into_iter(),
187     })
188     .skip(1)
189 }
190 
ret_locs(types: impl IntoIterator<Item = SignlessType>) -> Result<Vec<CCLoc>, Error>191 pub fn ret_locs(types: impl IntoIterator<Item = SignlessType>) -> Result<Vec<CCLoc>, Error> {
192     let types = types.into_iter();
193     let mut out = Vec::with_capacity(types.size_hint().0);
194     // TODO: VmCtx is in the first register
195     let mut int_gpr_iter = INTEGER_RETURN_GPRS.iter();
196     let mut float_gpr_iter = FLOAT_RETURN_GPRS.iter();
197 
198     for ty in types {
199         match ty {
200             I32 | I64 => match int_gpr_iter.next() {
201                 None => {
202                     return Err(Error::Microwasm(
203                         "We don't support stack returns yet".to_string(),
204                     ))
205                 }
206                 Some(val) => out.push(CCLoc::Reg(*val)),
207             },
208             F32 | F64 => match float_gpr_iter.next() {
209                 None => {
210                     return Err(Error::Microwasm(
211                         "We don't support stack returns yet".to_string(),
212                     ))
213                 }
214                 Some(val) => out.push(CCLoc::Reg(*val)),
215             },
216         }
217     }
218 
219     Ok(out)
220 }
221 
222 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
223 struct GPRs {
224     bits: u16,
225 }
226 
227 impl GPRs {
new() -> Self228     fn new() -> Self {
229         Self { bits: 0 }
230     }
231 }
232 
233 #[allow(dead_code)]
234 pub mod registers {
235     use super::{RegId, GPR};
236 
237     pub mod rq {
238         use super::RegId;
239 
240         pub const RAX: RegId = 0;
241         pub const RCX: RegId = 1;
242         pub const RDX: RegId = 2;
243         pub const RBX: RegId = 3;
244         pub const RSP: RegId = 4;
245         pub const RBP: RegId = 5;
246         pub const RSI: RegId = 6;
247         pub const RDI: RegId = 7;
248         pub const R8: RegId = 8;
249         pub const R9: RegId = 9;
250         pub const R10: RegId = 10;
251         pub const R11: RegId = 11;
252         pub const R12: RegId = 12;
253         pub const R13: RegId = 13;
254         pub const R14: RegId = 14;
255         pub const R15: RegId = 15;
256     }
257 
258     pub const RAX: GPR = GPR::Rq(self::rq::RAX);
259     pub const RCX: GPR = GPR::Rq(self::rq::RCX);
260     pub const RDX: GPR = GPR::Rq(self::rq::RDX);
261     pub const RBX: GPR = GPR::Rq(self::rq::RBX);
262     pub const RSP: GPR = GPR::Rq(self::rq::RSP);
263     pub const RBP: GPR = GPR::Rq(self::rq::RBP);
264     pub const RSI: GPR = GPR::Rq(self::rq::RSI);
265     pub const RDI: GPR = GPR::Rq(self::rq::RDI);
266     pub const R8: GPR = GPR::Rq(self::rq::R8);
267     pub const R9: GPR = GPR::Rq(self::rq::R9);
268     pub const R10: GPR = GPR::Rq(self::rq::R10);
269     pub const R11: GPR = GPR::Rq(self::rq::R11);
270     pub const R12: GPR = GPR::Rq(self::rq::R12);
271     pub const R13: GPR = GPR::Rq(self::rq::R13);
272     pub const R14: GPR = GPR::Rq(self::rq::R14);
273     pub const R15: GPR = GPR::Rq(self::rq::R15);
274 
275     pub const XMM0: GPR = GPR::Rx(0);
276     pub const XMM1: GPR = GPR::Rx(1);
277     pub const XMM2: GPR = GPR::Rx(2);
278     pub const XMM3: GPR = GPR::Rx(3);
279     pub const XMM4: GPR = GPR::Rx(4);
280     pub const XMM5: GPR = GPR::Rx(5);
281     pub const XMM6: GPR = GPR::Rx(6);
282     pub const XMM7: GPR = GPR::Rx(7);
283     pub const XMM8: GPR = GPR::Rx(8);
284     pub const XMM9: GPR = GPR::Rx(9);
285     pub const XMM10: GPR = GPR::Rx(10);
286     pub const XMM11: GPR = GPR::Rx(11);
287     pub const XMM12: GPR = GPR::Rx(12);
288     pub const XMM13: GPR = GPR::Rx(13);
289     pub const XMM14: GPR = GPR::Rx(14);
290     pub const XMM15: GPR = GPR::Rx(15);
291 
292     pub const NUM_GPRS: u8 = 16;
293 }
294 
295 const SIGN_MASK_F64: u64 = 0x8000_0000_0000_0000;
296 const REST_MASK_F64: u64 = !SIGN_MASK_F64;
297 const SIGN_MASK_F32: u32 = 0x8000_0000;
298 const REST_MASK_F32: u32 = !SIGN_MASK_F32;
299 
300 impl GPRs {
take(&mut self) -> Option<RegId>301     fn take(&mut self) -> Option<RegId> {
302         let lz = self.bits.trailing_zeros();
303         if lz < 16 {
304             let gpr = lz as RegId;
305             self.mark_used(gpr);
306             Some(gpr)
307         } else {
308             None
309         }
310     }
311 
mark_used(&mut self, gpr: RegId)312     fn mark_used(&mut self, gpr: RegId) {
313         self.bits &= !(1 << gpr as u16);
314     }
315 
release(&mut self, gpr: RegId)316     fn release(&mut self, gpr: RegId) {
317         debug_assert!(
318             !self.is_free(gpr),
319             "released register {} was already free",
320             gpr
321         );
322         self.bits |= 1 << gpr;
323     }
324 
is_free(self, gpr: RegId) -> bool325     fn is_free(self, gpr: RegId) -> bool {
326         (self.bits & (1 << gpr)) != 0
327     }
328 }
329 
330 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
331 pub struct Registers {
332     /// Registers at 64 bits and below (al/ah/ax/eax/rax, for example)
333     scratch_64: (GPRs, [u8; NUM_GPRS as usize]),
334     /// Registers at 128 bits (xmm0, for example)
335     scratch_128: (GPRs, [u8; NUM_GPRS as usize]),
336 }
337 
338 impl Default for Registers {
default() -> Self339     fn default() -> Self {
340         Self::new()
341     }
342 }
343 
344 impl Registers {
new() -> Self345     pub fn new() -> Self {
346         Self {
347             scratch_64: (GPRs::new(), [1; NUM_GPRS as _]),
348             scratch_128: (GPRs::new(), [1; NUM_GPRS as _]),
349         }
350     }
351 
release_scratch_register(&mut self) -> Result<(), Error>352     pub fn release_scratch_register(&mut self) -> Result<(), Error> {
353         // Give ourselves a few scratch registers to work with, for now.
354         for &scratch in SCRATCH_REGS {
355             self.release(scratch)?;
356         }
357         Ok(())
358     }
359 
scratch_counts_mut(&mut self, gpr: GPR) -> (u8, &mut (GPRs, [u8; NUM_GPRS as usize]))360     fn scratch_counts_mut(&mut self, gpr: GPR) -> (u8, &mut (GPRs, [u8; NUM_GPRS as usize])) {
361         match gpr {
362             GPR::Rq(r) => (r, &mut self.scratch_64),
363             GPR::Rx(r) => (r, &mut self.scratch_128),
364         }
365     }
366 
scratch_counts(&self, gpr: GPR) -> (u8, &(GPRs, [u8; NUM_GPRS as usize]))367     fn scratch_counts(&self, gpr: GPR) -> (u8, &(GPRs, [u8; NUM_GPRS as usize])) {
368         match gpr {
369             GPR::Rq(r) => (r, &self.scratch_64),
370             GPR::Rx(r) => (r, &self.scratch_128),
371         }
372     }
373 
mark_used(&mut self, gpr: GPR)374     pub fn mark_used(&mut self, gpr: GPR) {
375         let (gpr, scratch_counts) = self.scratch_counts_mut(gpr);
376         scratch_counts.0.mark_used(gpr);
377         scratch_counts.1[gpr as usize] += 1;
378     }
379 
num_usages(&self, gpr: GPR) -> u8380     pub fn num_usages(&self, gpr: GPR) -> u8 {
381         let (gpr, scratch_counts) = self.scratch_counts(gpr);
382         scratch_counts.1[gpr as usize]
383     }
384 
take(&mut self, ty: impl Into<GPRType>) -> Option<GPR>385     pub fn take(&mut self, ty: impl Into<GPRType>) -> Option<GPR> {
386         let (mk_gpr, scratch_counts) = match ty.into() {
387             GPRType::Rq => (GPR::Rq as fn(_) -> _, &mut self.scratch_64),
388             GPRType::Rx => (GPR::Rx as fn(_) -> _, &mut self.scratch_128),
389         };
390 
391         let out = scratch_counts.0.take()?;
392         scratch_counts.1[out as usize] += 1;
393         Some(mk_gpr(out))
394     }
395 
release(&mut self, gpr: GPR) -> Result<(), Error>396     pub fn release(&mut self, gpr: GPR) -> Result<(), Error> {
397         let (gpr, scratch_counts) = self.scratch_counts_mut(gpr);
398         let c = &mut scratch_counts.1[gpr as usize];
399         *c = match c.checked_sub(1) {
400             Some(e) => e,
401             None => return Err(Error::Microwasm(format!("Double-freed register: {}", gpr))),
402         };
403         if *c == 0 {
404             scratch_counts.0.release(gpr);
405         }
406         Ok(())
407     }
408 
is_free(&self, gpr: GPR) -> bool409     pub fn is_free(&self, gpr: GPR) -> bool {
410         let (gpr, scratch_counts) = self.scratch_counts(gpr);
411         scratch_counts.0.is_free(gpr)
412     }
413 }
414 
415 #[derive(Debug, Clone, PartialEq, Eq)]
416 pub struct BlockCallingConvention<I = Vec<CCLoc>> {
417     pub stack_depth: StackDepth,
418     pub arguments: I,
419 }
420 
421 impl<I> BlockCallingConvention<I> {
function_start(arguments: I) -> Self422     pub fn function_start(arguments: I) -> Self {
423         BlockCallingConvention {
424             // We start and return the function with stack depth 1 since we must
425             // allow space for the saved return address.
426             stack_depth: StackDepth(1),
427             arguments,
428         }
429     }
430 }
431 
432 impl<T: Copy + 'static, I: Deref> BlockCallingConvention<I>
433 where
434     for<'a> &'a I::Target: IntoIterator<Item = &'a T>,
435 {
as_ref(&self) -> BlockCallingConvention<impl Iterator<Item = T> + '_>436     pub fn as_ref(&self) -> BlockCallingConvention<impl Iterator<Item = T> + '_> {
437         BlockCallingConvention {
438             // We start and return the function with stack depth 1 since we must
439             // allow space for the saved return address.
440             stack_depth: self.stack_depth.clone(),
441             arguments: self.arguments.into_iter().copied(),
442         }
443     }
444 }
445 
446 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
447 pub enum FunctionDefLocation {
448     SameModule,
449     PossiblyExternal,
450 }
451 
452 // TODO: Combine this with `ValueLocation`?
453 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
454 pub enum CCLoc {
455     /// Value exists in a register.
456     Reg(GPR),
457     /// Value exists on the stack.
458     Stack(i32),
459 }
460 
461 impl CCLoc {
try_from(other: ValueLocation) -> Option<Self>462     fn try_from(other: ValueLocation) -> Option<Self> {
463         match other {
464             ValueLocation::Reg(reg) => Some(CCLoc::Reg(reg)),
465             ValueLocation::Stack(offset) => Some(CCLoc::Stack(offset)),
466             ValueLocation::Cond(_) | ValueLocation::Immediate(_) => None,
467         }
468     }
469 }
470 
471 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
472 pub enum CondCode {
473     CF0,
474     CF1,
475     ZF0,
476     ZF1,
477     CF0AndZF0,
478     CF1OrZF1,
479     ZF0AndSFEqOF,
480     ZF1OrSFNeOF,
481     SFEqOF,
482     SFNeOF,
483 }
484 
485 mod cc {
486     use super::CondCode;
487 
488     pub const EQUAL: CondCode = CondCode::ZF0;
489     pub const NOT_EQUAL: CondCode = CondCode::ZF1;
490     pub const GE_U: CondCode = CondCode::CF0;
491     pub const LT_U: CondCode = CondCode::CF1;
492     pub const GT_U: CondCode = CondCode::CF0AndZF0;
493     pub const LE_U: CondCode = CondCode::CF1OrZF1;
494     pub const GE_S: CondCode = CondCode::SFEqOF;
495     pub const LT_S: CondCode = CondCode::SFNeOF;
496     pub const GT_S: CondCode = CondCode::ZF0AndSFEqOF;
497     pub const LE_S: CondCode = CondCode::ZF1OrSFNeOF;
498 }
499 
500 impl std::ops::Not for CondCode {
501     type Output = Self;
502 
not(self) -> Self503     fn not(self) -> Self {
504         use CondCode::*;
505 
506         match self {
507             CF0 => CF1,
508             CF1 => CF0,
509             ZF0 => ZF1,
510             ZF1 => ZF0,
511             CF0AndZF0 => CF1OrZF1,
512             CF1OrZF1 => CF0AndZF0,
513             ZF0AndSFEqOF => ZF1OrSFNeOF,
514             ZF1OrSFNeOF => ZF0AndSFEqOF,
515             SFEqOF => SFNeOF,
516             SFNeOF => SFEqOF,
517         }
518     }
519 }
520 
521 /// Describes location of a value.
522 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
523 pub enum ValueLocation {
524     /// Value exists in a register.
525     Reg(GPR),
526     /// Value exists on the stack. Note that this offset is from the rsp as it
527     /// was when we entered the function.
528     Stack(i32),
529     /// Value is a literal
530     Immediate(Value),
531     /// Value is a set condition code
532     Cond(CondCode),
533 }
534 
535 impl From<CCLoc> for ValueLocation {
from(other: CCLoc) -> Self536     fn from(other: CCLoc) -> Self {
537         match other {
538             CCLoc::Reg(r) => ValueLocation::Reg(r),
539             CCLoc::Stack(o) => ValueLocation::Stack(o),
540         }
541     }
542 }
543 
544 impl ValueLocation {
stack(self) -> Option<i32>545     fn stack(self) -> Option<i32> {
546         match self {
547             ValueLocation::Stack(o) => Some(o),
548             _ => None,
549         }
550     }
551 
reg(self) -> Option<GPR>552     fn reg(self) -> Option<GPR> {
553         match self {
554             ValueLocation::Reg(r) => Some(r),
555             _ => None,
556         }
557     }
558 
immediate(self) -> Option<Value>559     fn immediate(self) -> Option<Value> {
560         match self {
561             ValueLocation::Immediate(i) => Some(i),
562             _ => None,
563         }
564     }
565 
imm_i32(self) -> Option<i32>566     fn imm_i32(self) -> Option<i32> {
567         self.immediate().and_then(Value::as_i32)
568     }
569 
imm_i64(self) -> Option<i64>570     fn imm_i64(self) -> Option<i64> {
571         self.immediate().and_then(Value::as_i64)
572     }
573 
imm_f32(self) -> Option<Ieee32>574     fn imm_f32(self) -> Option<Ieee32> {
575         self.immediate().and_then(Value::as_f32)
576     }
577 
imm_f64(self) -> Option<Ieee64>578     fn imm_f64(self) -> Option<Ieee64> {
579         self.immediate().and_then(Value::as_f64)
580     }
581 }
582 
583 // TODO: This assumes only system-v calling convention.
584 // In system-v calling convention the first 6 arguments are passed via registers.
585 // All rest arguments are passed on the stack.
586 // Usually system-v uses rdi and rsi, but rdi is used for the vmctx and rsi is used for the _caller_ vmctx
587 const INTEGER_ARGS_IN_GPRS: &[GPR] = &[GPR::Rq(CALLER_VMCTX), RDX, RCX, R8, R9];
588 const INTEGER_RETURN_GPRS: &[GPR] = &[RAX, RDX];
589 const FLOAT_ARGS_IN_GPRS: &[GPR] = &[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7];
590 const FLOAT_RETURN_GPRS: &[GPR] = &[XMM0, XMM1];
591 // List of scratch registers taken from https://wiki.osdev.org/System_V_ABI
592 const SCRATCH_REGS: &[GPR] = &[
593     RDX, RCX, R8, R9, RAX, R10, R11, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9,
594     XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
595 ];
596 const VMCTX: RegId = rq::RDI;
597 const CALLER_VMCTX: RegId = rq::RSI;
598 
599 pub struct CodeGenSession<'module, M> {
600     assembler: Assembler,
601     pub module_context: &'module M,
602     pub op_offset_map: Vec<(AssemblyOffset, Box<dyn Display + Send + Sync>)>,
603     func_starts: Vec<(Option<AssemblyOffset>, DynamicLabel)>,
604     pointer_type: SignlessType,
605 }
606 
607 impl<'module, M> CodeGenSession<'module, M> {
new(func_count: u32, module_context: &'module M, pointer_type: SignlessType) -> Self608     pub fn new(func_count: u32, module_context: &'module M, pointer_type: SignlessType) -> Self {
609         let mut assembler = Assembler::new().unwrap();
610         let func_starts = iter::repeat_with(|| (None, assembler.new_dynamic_label()))
611             .take(func_count as usize)
612             .collect::<Vec<_>>();
613 
614         CodeGenSession {
615             assembler,
616             op_offset_map: Default::default(),
617             func_starts,
618             module_context,
619             pointer_type,
620         }
621     }
622 
offset(&self) -> usize623     pub fn offset(&self) -> usize {
624         self.assembler.offset().0
625     }
626 
pointer_type(&self) -> SignlessType627     pub fn pointer_type(&self) -> SignlessType {
628         self.pointer_type
629     }
630 
new_context<'this>( &'this mut self, func_idx: u32, reloc_sink: &'this mut dyn binemit::RelocSink, ) -> Context<'this, M>631     pub fn new_context<'this>(
632         &'this mut self,
633         func_idx: u32,
634         reloc_sink: &'this mut dyn binemit::RelocSink,
635     ) -> Context<'this, M> {
636         {
637             let func_start = &mut self.func_starts[func_idx as usize];
638 
639             // At this point we know the exact start address of this function. Save it
640             // and define dynamic label at this location.
641             func_start.0 = Some(self.assembler.offset());
642             self.assembler.dynamic_label(func_start.1);
643         }
644 
645         Context {
646             asm: &mut self.assembler,
647             current_function: func_idx,
648             reloc_sink,
649             pointer_type: self.pointer_type,
650             source_loc: Default::default(),
651             func_starts: &self.func_starts,
652             block_state: Default::default(),
653             module_context: self.module_context,
654             labels: Default::default(),
655         }
656     }
657 
into_translated_code_section(self) -> Result<TranslatedCodeSection, Error>658     pub fn into_translated_code_section(self) -> Result<TranslatedCodeSection, Error> {
659         let exec_buf = self
660             .assembler
661             .finalize()
662             .map_err(|_asm| Error::Assembler("assembler error".to_owned()))?;
663         let func_starts = self
664             .func_starts
665             .iter()
666             .map(|(offset, _)| offset.unwrap())
667             .collect::<Vec<_>>();
668         Ok(TranslatedCodeSection {
669             exec_buf,
670             func_starts,
671             op_offset_map: self.op_offset_map,
672             // TODO
673             relocatable_accesses: vec![],
674         })
675     }
676 }
677 
678 #[derive(Debug)]
679 struct RelocateAddress {
680     reg: Option<GPR>,
681     imm: usize,
682 }
683 
684 #[derive(Debug)]
685 struct RelocateAccess {
686     position: AssemblyOffset,
687     dst_reg: GPR,
688     address: RelocateAddress,
689 }
690 
691 pub struct TranslatedCodeSection {
692     exec_buf: ExecutableBuffer,
693     func_starts: Vec<AssemblyOffset>,
694     #[allow(dead_code)]
695     relocatable_accesses: Vec<RelocateAccess>,
696     op_offset_map: Vec<(AssemblyOffset, Box<dyn Display + Send + Sync>)>,
697 }
698 
699 impl TranslatedCodeSection {
func_start(&self, idx: usize) -> *const u8700     pub fn func_start(&self, idx: usize) -> *const u8 {
701         let offset = self.func_starts[idx];
702         self.exec_buf.ptr(offset)
703     }
704 
func_range(&self, idx: usize) -> std::ops::Range<usize>705     pub fn func_range(&self, idx: usize) -> std::ops::Range<usize> {
706         let end = self
707             .func_starts
708             .get(idx + 1)
709             .map(|i| i.0)
710             .unwrap_or_else(|| self.exec_buf.len());
711 
712         self.func_starts[idx].0..end
713     }
714 
funcs<'a>(&'a self) -> impl Iterator<Item = std::ops::Range<usize>> + 'a715     pub fn funcs<'a>(&'a self) -> impl Iterator<Item = std::ops::Range<usize>> + 'a {
716         (0..self.func_starts.len()).map(move |i| self.func_range(i))
717     }
718 
buffer(&self) -> &[u8]719     pub fn buffer(&self) -> &[u8] {
720         &*self.exec_buf
721     }
722 
disassemble(&self)723     pub fn disassemble(&self) {
724         crate::disassemble::disassemble(&*self.exec_buf, &self.op_offset_map).unwrap();
725     }
726 }
727 
728 #[derive(Debug, Default, Clone)]
729 pub struct BlockState {
730     pub stack: Stack,
731     pub depth: StackDepth,
732     pub regs: Registers,
733 }
734 
735 type Stack = Vec<ValueLocation>;
736 
737 mod labels {
738     use super::Label;
739     use std::collections::HashMap;
740 
741     pub struct LabelInfo {
742         pub label: Label,
743         pub align: u32,
744         pub inner: LabelValue,
745     }
746 
747     #[derive(Copy, Clone, PartialEq, Eq, Hash)]
748     pub enum LabelValue {
749         Ret,
750         I32(i32),
751         I64(i64),
752     }
753 
754     #[derive(Default)]
755     pub struct Labels {
756         map: HashMap<LabelValue, LabelInfo>,
757     }
758 
759     impl Labels {
drain(&mut self) -> impl Iterator<Item = LabelInfo> + '_760         pub fn drain(&mut self) -> impl Iterator<Item = LabelInfo> + '_ {
761             self.map.drain().map(|(_, info)| info)
762         }
763 
insert( &mut self, l: impl FnOnce() -> Label, align: u32, label: LabelValue, ) -> Label764         pub fn insert(
765             &mut self,
766             l: impl FnOnce() -> Label,
767             align: u32,
768             label: LabelValue,
769         ) -> Label {
770             let val = self.map.entry(label).or_insert_with(move || LabelInfo {
771                 label: l(),
772                 align,
773                 inner: label,
774             });
775 
776             val.align = val.align.max(align);
777 
778             val.label
779         }
780     }
781 }
782 
783 use labels::{LabelInfo, LabelValue, Labels};
784 
785 pub struct Context<'this, M> {
786     pub asm: &'this mut Assembler,
787     pointer_type: SignlessType,
788     source_loc: SourceLoc,
789     reloc_sink: &'this mut dyn binemit::RelocSink,
790     module_context: &'this M,
791     current_function: u32,
792     func_starts: &'this Vec<(Option<AssemblyOffset>, DynamicLabel)>,
793     /// Each push and pop on the value stack increments or decrements this value by 1 respectively.
794     pub block_state: BlockState,
795     labels: Labels,
796 }
797 
798 /// Label in code.
799 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
800 pub struct Label(DynamicLabel);
801 
802 /// Offset from starting value of SP counted in words.
803 #[derive(Default, Debug, Clone, PartialEq, Eq)]
804 pub struct StackDepth(u32);
805 
806 impl StackDepth {
reserve(&mut self, slots: u32)807     pub fn reserve(&mut self, slots: u32) {
808         self.0 = self.0.checked_add(slots).unwrap();
809     }
810 
free(&mut self, slots: u32)811     pub fn free(&mut self, slots: u32) {
812         self.0 = self.0.checked_sub(slots).unwrap();
813     }
814 }
815 
816 macro_rules! int_div {
817     ($full_div_s:ident, $full_div_u:ident, $div_u:ident, $div_s:ident, $rem_u:ident, $rem_s:ident, $imm_fn:ident, $signed_ty:ty, $unsigned_ty:ty, $reg_ty:tt, $pointer_ty:tt) => {
818         // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when
819         //       emitting Wasm.
820         pub fn $div_u(&mut self) -> Result<(), Error>{
821             let divisor = self.pop()?;
822             let dividend = self.pop()?;
823 
824             if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
825                 if divisor == 0 {
826                     self.trap(TrapCode::IntegerDivisionByZero);
827                     self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()))?;
828                 } else {
829                     self.push(ValueLocation::Immediate(
830                         <$unsigned_ty>::wrapping_div(dividend as _, divisor as _).into(),
831                     ))?;
832                 }
833 
834                 return Ok(())
835             }
836 
837             let (div, rem, saved) = self.$full_div_u(divisor, dividend)?;
838 
839             self.free_value(rem)?;
840 
841             let div = match div {
842                 ValueLocation::Reg(div)  => {
843                     if saved.clone().any(|dst| dst == div) {
844                         let new = self.take_reg(I32).unwrap();
845                         dynasm!(self.asm
846                             ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap())
847                         );
848                         self.block_state.regs.release(div)?;
849                         ValueLocation::Reg(new)
850                     } else {
851                         ValueLocation::Reg(div)
852                     }
853                 }
854                 ValueLocation::Stack(_) |
855                 ValueLocation::Cond(_) |
856                 ValueLocation::Immediate(_) => div,
857             };
858 
859             self.cleanup_gprs(saved);
860 
861             self.push(div)?;
862             Ok(())
863         }
864 
865         // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when
866         //       emitting Wasm.
867         pub fn $div_s(&mut self) -> Result<(), Error>{
868             let divisor = self.pop()?;
869             let dividend = self.pop()?;
870 
871             if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
872                 if divisor == 0 {
873                     self.trap(TrapCode::IntegerDivisionByZero);
874                     self.push(ValueLocation::Immediate((0 as $signed_ty).into()))?;
875                 } else {
876                     self.push(ValueLocation::Immediate(
877                         <$signed_ty>::wrapping_div(dividend, divisor).into(),
878                     ))?;
879                 }
880 
881                 return Ok(())
882             }
883 
884             let (div, rem, saved) = self.$full_div_s(divisor, dividend)?;
885 
886             self.free_value(rem)?;
887 
888             let div = match div {
889                 ValueLocation::Reg(div)  => {
890                     if saved.clone().any(|dst| dst == div) {
891                         let new = self.take_reg(I32).unwrap();
892                         dynasm!(self.asm
893                             ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap())
894                         );
895                         self.block_state.regs.release(div)?;
896                         ValueLocation::Reg(new)
897                     } else {
898                         ValueLocation::Reg(div)
899                     }
900                 }
901                 ValueLocation::Stack(_) |
902                 ValueLocation::Cond(_) |
903                 ValueLocation::Immediate(_) => div,
904             };
905 
906             self.cleanup_gprs(saved);
907 
908             self.push(div)?;
909             Ok(())
910         }
911 
912         pub fn $rem_u(&mut self) -> Result<(), Error>{
913             let divisor = self.pop()?;
914             let dividend = self.pop()?;
915 
916             if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
917                 if divisor == 0 {
918                     self.trap(TrapCode::IntegerDivisionByZero);
919                     self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()))?;
920                 } else {
921                     self.push(ValueLocation::Immediate(
922                         (dividend as $unsigned_ty % divisor as $unsigned_ty).into(),
923                     ))?;
924                 }
925                 return Ok(());
926             }
927 
928             let (div, rem, saved) = self.$full_div_u(divisor, dividend)?;
929 
930             self.free_value(div)?;
931 
932             let rem = match rem {
933                 ValueLocation::Reg(rem)  => {
934                     if saved.clone().any(|dst| dst == rem) {
935                         let new = self.take_reg(I32).unwrap();
936                         dynasm!(self.asm
937                             ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap())
938                         );
939                         self.block_state.regs.release(rem)?;
940                         ValueLocation::Reg(new)
941                     } else {
942                         ValueLocation::Reg(rem)
943                     }
944                 }
945                 ValueLocation::Stack(_) |
946                 ValueLocation::Cond(_) |
947                 ValueLocation::Immediate(_) => rem,
948             };
949 
950             self.cleanup_gprs(saved);
951 
952             self.push(rem)?;
953             Ok(())
954         }
955 
956         pub fn $rem_s(&mut self) -> Result<(), Error>{
957             let mut divisor = self.pop()?;
958             let dividend = self.pop()?;
959 
960             if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
961                 if divisor == 0 {
962                     self.trap(TrapCode::IntegerDivisionByZero);
963                     self.push(ValueLocation::Immediate((0 as $signed_ty).into()))?;
964                 } else {
965                     self.push(ValueLocation::Immediate((dividend % divisor).into()))?;
966                 }
967                 return Ok(());
968             }
969 
970             let is_neg1 = self.create_label();
971 
972             let current_depth = self.block_state.depth.clone();
973 
974             // TODO: This could cause segfaults because of implicit push/pop
975             let gen_neg1_case = match divisor {
976                 ValueLocation::Immediate(_) => {
977                     if divisor.$imm_fn().unwrap() == -1 {
978                         self.push(ValueLocation::Immediate((-1 as $signed_ty).into()))?;
979                         self.free_value(dividend)?;
980                         return Ok(());
981                     }
982 
983                     false
984                 }
985                 ValueLocation::Reg(_) => {
986                     let reg = self.put_into_register(GPRType::Rq, &mut divisor)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
987 
988                     dynasm!(self.asm
989                         ; cmp $reg_ty(reg.rq().unwrap()), -1
990                     );
991                     // TODO: We could choose `current_depth` as the depth here instead but we currently
992                     //       don't for simplicity
993                     self.set_stack_depth(current_depth.clone())?;
994                     dynasm!(self.asm
995                         ; je =>is_neg1.0
996                     );
997 
998                     true
999                 }
1000                 ValueLocation::Stack(offset) => {
1001                     let offset = self.adjusted_offset(offset);
1002                     dynasm!(self.asm
1003                         ; cmp $pointer_ty [rsp + offset], -1
1004                     );
1005                     self.set_stack_depth(current_depth.clone())?;
1006                     dynasm!(self.asm
1007                         ; je =>is_neg1.0
1008                     );
1009 
1010                     true
1011                 }
1012                 ValueLocation::Cond(_) => {
1013                     // `cc` can never be `-1`, only `0` and `1`
1014                     false
1015                 }
1016             };
1017 
1018             let (div, rem, saved) = self.$full_div_s(divisor, dividend)?;
1019 
1020             self.free_value(div)?;
1021 
1022             let rem = match rem {
1023                 ValueLocation::Reg(rem) => {
1024                     if saved.clone().any(|dst| dst == rem) {
1025                         let new = self.take_reg(I32).unwrap();
1026                         dynasm!(self.asm
1027                             ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap())
1028                         );
1029                         self.block_state.regs.release(rem)?;
1030                         ValueLocation::Reg(new)
1031                     } else {
1032                         ValueLocation::Reg(rem)
1033                     }
1034                 }
1035                 ValueLocation::Stack(_) |
1036                 ValueLocation::Cond(_) |
1037                 ValueLocation::Immediate(_) => rem,
1038             };
1039 
1040             self.cleanup_gprs(saved);
1041 
1042             if gen_neg1_case {
1043                 let ret = self.create_label();
1044                 self.set_stack_depth(current_depth.clone())?;
1045                 dynasm!(self.asm
1046                     ; jmp =>ret.0
1047                 );
1048                 self.define_label(is_neg1);
1049 
1050                 let dst_ccloc = match CCLoc::try_from(rem) {
1051                     None => {
1052                         return Err(Error::Microwasm(
1053                             "$rem_s Programmer error".to_string(),
1054                         ))
1055                     }
1056                     Some(o) => o,
1057                 };
1058 
1059                 self.copy_value(
1060                     ValueLocation::Immediate((0 as $signed_ty).into()),
1061                     dst_ccloc
1062                 )?;
1063 
1064                 self.set_stack_depth(current_depth.clone())?;
1065                 self.define_label(ret);
1066             }
1067 
1068             self.push(rem)?;
1069             Ok(())
1070         }
1071     }
1072 }
1073 
1074 macro_rules! unop {
1075     ($name:ident, $instr:ident, $reg_ty:tt, $typ:ty, $const_fallback:expr) => {
1076         pub fn $name(&mut self) -> Result<(), Error>{
1077             let mut val = self.pop()?;
1078 
1079             let out_val = match val {
1080                 ValueLocation::Immediate(imm) =>
1081                     ValueLocation::Immediate(
1082                         ($const_fallback(imm.as_int().unwrap() as $typ) as $typ).into()
1083                     ),
1084                 ValueLocation::Stack(offset) => {
1085                     let offset = self.adjusted_offset(offset);
1086                     let temp = self.take_reg(Type::for_::<$typ>()).unwrap();
1087                     dynasm!(self.asm
1088                         ; $instr $reg_ty(temp.rq().unwrap()), [rsp + offset]
1089                     );
1090                     ValueLocation::Reg(temp)
1091                 }
1092                 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1093                     let reg = self.put_into_register(GPRType::Rq, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1094                     let temp = self.take_reg(Type::for_::<$typ>()).unwrap();
1095                     dynasm!(self.asm
1096                         ; $instr $reg_ty(temp.rq().unwrap()), $reg_ty(reg.rq().unwrap())
1097                     );
1098                     ValueLocation::Reg(temp)
1099                 }
1100             };
1101 
1102             self.free_value(val)?;
1103             self.push(out_val)?;
1104             Ok(())
1105         }
1106     }
1107 }
1108 
1109 macro_rules! conversion {
1110     (
1111         $name:ident,
1112         $instr:ident,
1113         $in_reg_ty:tt,
1114         $in_reg_fn:ident,
1115         $out_reg_ty:tt,
1116         $out_reg_fn:ident,
1117         $in_typ:ty,
1118         $out_typ:ty,
1119         $const_ty_fn:ident,
1120         $const_fallback:expr
1121     ) => {
1122         pub fn $name(&mut self) -> Result<(), Error>{
1123             let mut val = self.pop()?;
1124 
1125             let out_val = match val {
1126                 ValueLocation::Immediate(imm) =>
1127                     ValueLocation::Immediate(
1128                         $const_fallback(imm.$const_ty_fn().unwrap()).into()
1129                     ),
1130                 ValueLocation::Stack(offset) => {
1131                     let offset = self.adjusted_offset(offset);
1132                     let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap();
1133                     dynasm!(self.asm
1134                         ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), [rsp + offset]
1135                     );
1136 
1137                     ValueLocation::Reg(temp)
1138                 }
1139                 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1140                     let reg = self.put_into_register(Type::for_::<$in_typ>(), &mut val)? .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1141                     let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap();
1142 
1143                     dynasm!(self.asm
1144                         ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), $in_reg_ty(reg.$in_reg_fn().unwrap())
1145                     );
1146 
1147                     ValueLocation::Reg(temp)
1148                 }
1149             };
1150 
1151             self.free_value(val)?;
1152 
1153             self.push(out_val)?;
1154             Ok(())
1155         }
1156     }
1157 }
1158 
1159 // TODO: Support immediate `count` parameters
1160 macro_rules! shift {
1161     ($name:ident, $reg_ty:tt, $instr:ident, $const_fallback:expr, $ty:expr) => {
1162         pub fn $name(&mut self) -> Result<(), Error>{
1163             let mut count = self.pop()?;
1164             let mut val = self.pop()?;
1165 
1166             if let Some(imm) = count.immediate() {
1167                 if let Some(imm) = imm.as_int() {
1168                     if let Ok(imm) = i8::try_from(imm) {
1169                         let reg = self.put_into_temp_register($ty, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1170 
1171                         dynasm!(self.asm
1172                             ; $instr $reg_ty(reg.rq().unwrap()), imm
1173                         );
1174                         self.push(ValueLocation::Reg(reg))?;
1175                         return Ok(());
1176                     }
1177                 }
1178             }
1179 
1180             if val == ValueLocation::Reg(RCX) {
1181                 let new = self.take_reg($ty).unwrap();
1182                 self.copy_value(val, CCLoc::Reg(new))?;
1183                 self.free_value(val)?;
1184                 val = ValueLocation::Reg(new);
1185             }
1186 
1187             // TODO: Maybe allocate `RCX`, write `count` to it and then free `count`.
1188             //       Once we've implemented refcounting this will do the right thing
1189             //       for free.
1190             let temp_rcx = match count {
1191                 ValueLocation::Reg(RCX) => {None}
1192                 other => {
1193                     let out = if self.block_state.regs.is_free(RCX) {
1194                         None
1195                     } else {
1196                         let new_reg = self.take_reg(I32).unwrap();
1197                         dynasm!(self.asm
1198                             ; mov Rq(new_reg.rq().unwrap()), rcx
1199                         );
1200                         Some(new_reg)
1201                     };
1202 
1203                     match other {
1204                         ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1205                             let gpr = self.put_into_register(I32, &mut count)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1206                             dynasm!(self.asm
1207                                 ; mov cl, Rb(gpr.rq().unwrap())
1208                             );
1209                         }
1210                         ValueLocation::Stack(offset) => {
1211                             let offset = self.adjusted_offset(offset);
1212                             dynasm!(self.asm
1213                                 ; mov cl, [rsp + offset]
1214                             );
1215                         }
1216                         ValueLocation::Immediate(imm) => {
1217                             dynasm!(self.asm
1218                                 ; mov cl, imm.as_int().unwrap() as i8
1219                             );
1220                         }
1221                     }
1222 
1223                     out
1224                 }
1225             };
1226 
1227             self.free_value(count)?;
1228             self.block_state.regs.mark_used(RCX);
1229             count = ValueLocation::Reg(RCX);
1230 
1231             let reg = self.put_into_temp_register($ty, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1232 
1233             dynasm!(self.asm
1234                 ; $instr $reg_ty(reg.rq().unwrap()), cl
1235             );
1236 
1237             self.free_value(count)?;
1238 
1239             if let Some(gpr) = temp_rcx {
1240                 dynasm!(self.asm
1241                     ; mov rcx, Rq(gpr.rq().unwrap())
1242                 );
1243                 self.block_state.regs.release(gpr)?;
1244             }
1245 
1246             self.push(val)?;
1247             Ok(())
1248         }
1249     }
1250 }
1251 
1252 macro_rules! cmp_i32 {
1253     ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => {
1254         pub fn $name(&mut self) -> Result<(), Error>{
1255             let mut right = self.pop()?;
1256             let mut left = self.pop()?;
1257 
1258             let out = if let Some(i) = left.imm_i32() {
1259                 match right {
1260                     ValueLocation::Stack(offset) => {
1261                         let offset = self.adjusted_offset(offset);
1262 
1263                         dynasm!(self.asm
1264                             ; cmp DWORD [rsp + offset], i
1265                         );
1266                         ValueLocation::Cond($reverse_flags)
1267                     }
1268                     ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1269                         let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1270                         dynasm!(self.asm
1271                             ; cmp Rd(rreg.rq().unwrap()), i
1272                         );
1273                         ValueLocation::Cond($reverse_flags)
1274                     }
1275                     ValueLocation::Immediate(right) => {
1276                         ValueLocation::Immediate(
1277                             (if $const_fallback(i, right.as_i32().unwrap()) {
1278                                 1i32
1279                             } else {
1280                                 0i32
1281                             }).into()
1282                         )
1283                     }
1284                 }
1285             } else {
1286                 let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1287 
1288                 match right {
1289                     ValueLocation::Stack(offset) => {
1290                         let offset = self.adjusted_offset(offset);
1291                         dynasm!(self.asm
1292                             ; cmp Rd(lreg.rq().unwrap()), [rsp + offset]
1293                         );
1294                     }
1295                     ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1296                         let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1297 
1298                         dynasm!(self.asm
1299                             ; cmp Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap())
1300                         );
1301                     }
1302                     ValueLocation::Immediate(i) => {
1303                         dynasm!(self.asm
1304                             ; cmp Rd(lreg.rq().unwrap()), i.as_i32().unwrap()
1305                         );
1306                     }
1307                 }
1308 
1309                 ValueLocation::Cond($flags)
1310             };
1311 
1312             self.free_value(left)?;
1313             self.free_value(right)?;
1314 
1315             self.push(out)?;
1316             Ok(())
1317         }
1318     }
1319 }
1320 
1321 macro_rules! cmp_i64 {
1322     ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => {
1323         pub fn $name(&mut self) -> Result<(), Error> {
1324             let mut right = self.pop()?;
1325             let mut left = self.pop()?;
1326 
1327             let out = if let Some(i) = left.imm_i64() {
1328                 match right {
1329                     ValueLocation::Stack(offset) => {
1330                         let offset = self.adjusted_offset(offset);
1331                         if let Some(i) = i.try_into().ok() {
1332                             dynasm!(self.asm
1333                                 ; cmp QWORD [rsp + offset], i
1334                             );
1335                         } else {
1336                             let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1337 
1338                             dynasm!(self.asm
1339                                 ; cmp QWORD [rsp + offset], Rq(lreg.rq().unwrap())
1340                             );
1341                         }
1342                         ValueLocation::Cond($reverse_flags)
1343                     }
1344                     ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1345                         let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1346 
1347                         if let Some(i) = i.try_into().ok() {
1348                             dynasm!(self.asm
1349                                 ; cmp Rq(rreg.rq().unwrap()), i
1350                             );
1351                         } else {
1352                             let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1353 
1354                             dynasm!(self.asm
1355                                 ; cmp Rq(rreg.rq().unwrap()), Rq(lreg.rq().unwrap())
1356                             );
1357                         }
1358                         ValueLocation::Cond($reverse_flags)
1359                     }
1360                     ValueLocation::Immediate(right) => {
1361                         ValueLocation::Immediate(
1362                             (if $const_fallback(i, right.as_i64().unwrap()) {
1363                                 1i32
1364                             } else {
1365                                 0i32
1366                             }).into()
1367                         )
1368                     }
1369                 }
1370             } else {
1371                 let lreg = self.put_into_register(I64, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1372 
1373                 match right {
1374                     ValueLocation::Stack(offset) => {
1375                         let offset = self.adjusted_offset(offset);
1376                         dynasm!(self.asm
1377                             ; cmp Rq(lreg.rq().unwrap()), [rsp + offset]
1378                         );
1379                     }
1380                     ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1381                         let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1382 
1383                         dynasm!(self.asm
1384                             ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
1385                         );
1386                     }
1387                     ValueLocation::Immediate(i) => {
1388                         let i = i.as_i64().unwrap();
1389                         if let Some(i) = i.try_into().ok() {
1390                             dynasm!(self.asm
1391                                     ; cmp Rq(lreg.rq().unwrap()), i
1392                             );
1393                         } else {
1394                             let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1395 
1396                             dynasm!(self.asm
1397                                 ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
1398                             );
1399                         }
1400                     }
1401                 }
1402 
1403                 ValueLocation::Cond($flags)
1404             };
1405 
1406             self.free_value(left)?;
1407             self.free_value(right)?;
1408             self.push(out)?;
1409             Ok(())
1410         }
1411     }
1412 }
1413 
1414 macro_rules! cmp_f32 {
1415     ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => {
1416         cmp_float!(
1417             comiss,
1418             f32,
1419             imm_f32,
1420             $name,
1421             $reverse_name,
1422             $instr,
1423             $const_fallback
1424         );
1425     };
1426 }
1427 
1428 macro_rules! eq_float {
1429     ($name:ident, $instr:ident, $imm_fn:ident, $const_fallback:expr) => {
1430         pub fn $name(&mut self) -> Result<(), Error>{
1431             let right = self.pop()?;
1432             let left = self.pop()?;
1433 
1434             if let Some(right) = right.immediate() {
1435                 if let Some(left) = left.immediate() {
1436                     self.push(ValueLocation::Immediate(
1437                         if $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()) {
1438                             1u32
1439                         } else {
1440                             0
1441                         }.into()
1442                     ))?;
1443                     return Ok(());
1444                 }
1445             }
1446 
1447             let (mut left, mut right) = match left {
1448                 ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right),
1449                 _ =>  (right, left)
1450             };
1451 
1452             let lreg = self.put_into_temp_register(GPRType::Rx, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1453             let rreg = self.put_into_register(GPRType::Rx, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1454 
1455             let out = self.take_reg(I32).unwrap();
1456 
1457             dynasm!(self.asm
1458                 ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1459                 ; movd Rd(out.rq().unwrap()), Rx(lreg.rx().unwrap())
1460                 ; and Rd(out.rq().unwrap()), 1
1461             );
1462 
1463             self.push(ValueLocation::Reg(out))?;
1464             self.free_value(left)?;
1465             self.free_value(right)?;
1466             Ok(())
1467         }
1468 
1469     }
1470 }
1471 
1472 macro_rules! minmax_float {
1473     (
1474         $name:ident,
1475         $instr:ident,
1476         $cmpinstr:ident,
1477         $addinstr:ident,
1478         $combineinstr:ident,
1479         $imm_fn:ident,
1480         $const_fallback:expr
1481     ) => {
1482         pub fn $name(&mut self) -> Result<(), Error>{
1483             let right = self.pop()?;
1484             let left = self.pop()?;
1485 
1486             if let Some(right) = right.immediate() {
1487                 if let Some(left) = left.immediate() {
1488                     self.push(ValueLocation::Immediate(
1489                         $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()).into()
1490                     ))?;
1491                     return Ok(());
1492                 }
1493             }
1494 
1495             let (mut left, mut right) = match left {
1496                 ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right),
1497                 _ =>  (right, left)
1498             };
1499 
1500             let lreg = self.put_into_temp_register(GPRType::Rx, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1501             let rreg = self.put_into_register(GPRType::Rx, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1502 
1503             dynasm!(self.asm
1504                 ; $cmpinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1505                 ; je >equal
1506                 ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1507                 ; jmp >ret
1508             ; equal:
1509                 ; jnp >equal_but_not_parity
1510                 ; $addinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1511                 ; jmp >ret
1512             ; equal_but_not_parity:
1513                 ; $combineinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1514             ; ret:
1515             );
1516 
1517             self.push(left)?;
1518             self.free_value(right)?;
1519             Ok(())
1520         }
1521 
1522     }
1523 }
1524 
1525 macro_rules! cmp_f64 {
1526     ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => {
1527         cmp_float!(
1528             comisd,
1529             f64,
1530             imm_f64,
1531             $name,
1532             $reverse_name,
1533             $instr,
1534             $const_fallback
1535         );
1536     };
1537 }
1538 
1539 macro_rules! cmp_float {
1540     (@helper $cmp_instr:ident, $ty:ty, $imm_fn:ident, $self:expr, $left:expr, $right:expr, $instr:ident, $const_fallback:expr) => {{
1541         let (left, right, this) = ($left, $right, $self);
1542         if let (Some(left), Some(right)) = (left.$imm_fn(), right.$imm_fn()) {
1543             if $const_fallback(<$ty>::from_bits(left.to_bits()), <$ty>::from_bits(right.to_bits())) {
1544                 ValueLocation::Immediate(1i32.into())
1545             } else {
1546                 ValueLocation::Immediate(0i32.into())
1547             }
1548         } else {
1549             let lreg = this.put_into_register(GPRType::Rx, left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1550 
1551             let result = this.take_reg(I32).unwrap();
1552 
1553             match right {
1554                 ValueLocation::Stack(offset) => {
1555                     let offset = this.adjusted_offset(*offset);
1556 
1557                     dynasm!(this.asm
1558                         ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap())
1559                         ; $cmp_instr Rx(lreg.rx().unwrap()), [rsp + offset]
1560                         ; $instr Rb(result.rq().unwrap())
1561                     );
1562                 }
1563                 right => {
1564                     let rreg = this.put_into_register(GPRType::Rx, right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1565 
1566                     dynasm!(this.asm
1567                         ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap())
1568                         ; $cmp_instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1569                         ; $instr Rb(result.rq().unwrap())
1570                     );
1571                 }
1572             }
1573 
1574             ValueLocation::Reg(result)
1575         }
1576     }};
1577     ($cmp_instr:ident, $ty:ty, $imm_fn:ident, $name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => {
1578         pub fn $name(&mut self) -> Result<(), Error> {
1579             let mut right = self.pop()?;
1580             let mut left = self.pop()?;
1581 
1582             let out = cmp_float!(@helper
1583                 $cmp_instr,
1584                 $ty,
1585                 $imm_fn,
1586                 &mut *self,
1587                 &mut left,
1588                 &mut right,
1589                 $instr,
1590                 $const_fallback
1591             );
1592 
1593             self.free_value(left)?;
1594             self.free_value(right)?;
1595 
1596             self.push(out)?;
1597             Ok(())
1598         }
1599 
1600         pub fn $reverse_name(&mut self) -> Result<(), Error> {
1601             let mut right = self.pop()?;
1602             let mut left = self.pop()?;
1603 
1604             let out = cmp_float!(@helper
1605                 $cmp_instr,
1606                 $ty,
1607                 $imm_fn,
1608                 &mut *self,
1609                 &mut right,
1610                 &mut left,
1611                 $instr,
1612                 $const_fallback
1613             );
1614 
1615             self.free_value(left)?;
1616             self.free_value(right)?;
1617 
1618             self.push(out)?;
1619             Ok(())
1620         }
1621     };
1622 }
1623 
1624 macro_rules! binop_i32 {
1625     ($name:ident, $instr:ident, $const_fallback:expr) => {
1626         binop!(
1627             $name,
1628             $instr,
1629             $const_fallback,
1630             Rd,
1631             rq,
1632             I32,
1633             imm_i32,
1634             |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
1635                 ; $instr Rd(op1.rq().unwrap()), i
1636             )
1637         );
1638     };
1639 }
1640 
1641 macro_rules! commutative_binop_i32 {
1642     ($name:ident, $instr:ident, $const_fallback:expr) => {
1643         commutative_binop!(
1644             $name,
1645             $instr,
1646             $const_fallback,
1647             Rd,
1648             rq,
1649             I32,
1650             imm_i32,
1651             |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
1652                 ; $instr Rd(op1.rq().unwrap()), i
1653             )
1654         );
1655     };
1656 }
1657 
1658 macro_rules! binop_i64 {
1659     ($name:ident, $instr:ident, $const_fallback:expr) => {
1660         binop!(
1661             $name,
1662             $instr,
1663             $const_fallback,
1664             Rq,
1665             rq,
1666             I64,
1667             imm_i64,
1668             |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
1669                 ; $instr Rq(op1.rq().unwrap()), i
1670             )
1671         );
1672     };
1673 }
1674 
1675 macro_rules! commutative_binop_i64 {
1676     ($name:ident, $instr:ident, $const_fallback:expr) => {
1677         commutative_binop!(
1678             $name,
1679             $instr,
1680             $const_fallback,
1681             Rq,
1682             rq,
1683             I64,
1684             imm_i64,
1685             |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
1686                 ; $instr Rq(op1.rq().unwrap()), i
1687             )
1688         );
1689     };
1690 }
1691 
1692 macro_rules! binop_f32 {
1693     ($name:ident, $instr:ident, $const_fallback:expr) => {
1694         binop!(
1695             $name,
1696             $instr,
1697             |a: Ieee32, b: Ieee32| Ieee32::from_bits(
1698                 $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits()
1699             ),
1700             Rx,
1701             rx,
1702             F32,
1703             imm_f32,
1704             |_, _, _: i32| unreachable!()
1705         );
1706     };
1707 }
1708 
1709 macro_rules! commutative_binop_f32 {
1710     ($name:ident, $instr:ident, $const_fallback:expr) => {
1711         commutative_binop!(
1712             $name,
1713             $instr,
1714             |a: Ieee32, b: Ieee32| Ieee32::from_bits(
1715                 $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits()
1716             ),
1717             Rx,
1718             rx,
1719             F32,
1720             imm_f32,
1721             |_, _, _: i32| unreachable!()
1722         );
1723     };
1724 }
1725 
1726 macro_rules! binop_f64 {
1727     ($name:ident, $instr:ident, $const_fallback:expr) => {
1728         binop!(
1729             $name,
1730             $instr,
1731             |a: Ieee64, b: Ieee64| Ieee64::from_bits(
1732                 $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits()
1733             ),
1734             Rx,
1735             rx,
1736             F64,
1737             imm_f64,
1738             |_, _, _: i32| unreachable!()
1739         );
1740     };
1741 }
1742 
1743 macro_rules! commutative_binop_f64 {
1744     ($name:ident, $instr:ident, $const_fallback:expr) => {
1745         commutative_binop!(
1746             $name,
1747             $instr,
1748             |a: Ieee64, b: Ieee64| Ieee64::from_bits(
1749                 $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits()
1750             ),
1751             Rx,
1752             rx,
1753             F64,
1754             imm_f64,
1755             |_, _, _: i32| unreachable!()
1756         );
1757     };
1758 }
1759 macro_rules! commutative_binop {
1760     ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => {
1761         binop!(
1762             $name,
1763             $instr,
1764             $const_fallback,
1765             $reg_ty,
1766             $reg_fn,
1767             $ty,
1768             $imm_fn,
1769             $direct_imm,
1770             |op1: ValueLocation, op0: ValueLocation| match op1 {
1771                 ValueLocation::Reg(_) => (op1, op0),
1772                 _ => {
1773                     if op0.immediate().is_some() {
1774                         (op1, op0)
1775                     } else {
1776                         (op0, op1)
1777                     }
1778                 }
1779             }
1780         );
1781     };
1782 }
1783 
1784 macro_rules! binop {
1785     ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => {
1786         binop!($name, $instr, $const_fallback, $reg_ty, $reg_fn, $ty, $imm_fn, $direct_imm, |a, b| (a, b));
1787     };
1788     ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr, $map_op:expr) => {
1789         pub fn $name(&mut self) -> Result<(), Error> {
1790             let right = self.pop()?;
1791             let left = self.pop()?;
1792 
1793             if let Some(i1) = left.$imm_fn() {
1794                 if let Some(i0) = right.$imm_fn() {
1795                     self.block_state.stack.push(ValueLocation::Immediate($const_fallback(i1, i0).into()));
1796                     return Ok(());
1797                 }
1798             }
1799 
1800             let (mut left, mut right) = $map_op(left, right);
1801             let lreg = self.put_into_temp_register($ty, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1802 
1803             match right {
1804                 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1805                     // This handles the case where we (for example) have a float in an `Rq` reg
1806                     let right_reg = self.put_into_register($ty, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1807 
1808                     dynasm!(self.asm
1809                         ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(right_reg.$reg_fn().unwrap())
1810                     );
1811                 }
1812                 ValueLocation::Stack(offset) => {
1813                     let offset = self.adjusted_offset(offset);
1814                     dynasm!(self.asm
1815                         ; $instr $reg_ty(lreg.$reg_fn().unwrap()), [rsp + offset]
1816                     );
1817                 }
1818                 ValueLocation::Immediate(i) => {
1819                     if let Some(i) = i.as_int().and_then(|i| i.try_into().ok()) {
1820                         $direct_imm(&mut *self, lreg, i);
1821                     } else {
1822                         let scratch = self.take_reg($ty).unwrap();
1823                         self.immediate_to_reg(scratch, i)?;
1824 
1825                         dynasm!(self.asm
1826                             ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(scratch.$reg_fn().unwrap())
1827                         );
1828 
1829                         self.block_state.regs.release(scratch)?;
1830                     }
1831                 }
1832             }
1833 
1834             self.free_value(right)?;
1835             self.push(left)?;
1836             Ok(())
1837         }
1838     }
1839 }
1840 
1841 macro_rules! load {
1842     (@inner $name:ident, $rtype:expr, $reg_ty:tt, $emit_fn:expr) => {
1843         pub fn $name(&mut self, offset: u32) -> Result<(), Error> {
1844             fn load_to_reg<_M: ModuleContext>(
1845                 ctx: &mut Context<_M>,
1846                 dst: GPR,
1847                 (offset, runtime_offset): (i32, Result<i32, GPR>)
1848             ) -> Result<(), Error> {
1849                 let mem_index = 0;
1850                 let reg_offset = ctx.module_context
1851                     .defined_memory_index(mem_index)
1852                     .map(|index| (
1853                         None,
1854                         ctx.module_context.vmctx_vmmemory_definition(index) as i32
1855                     ));
1856                 let (reg, mem_offset) = reg_offset.unwrap_or_else(|| {
1857                     let reg = ctx.take_reg(I64).unwrap();
1858 
1859                     dynasm!(ctx.asm
1860                         ; mov Rq(reg.rq().unwrap()), [
1861                             Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32
1862                         ]
1863                     );
1864 
1865                     (Some(reg), 0)
1866                 });
1867 
1868                 let vmctx = GPR::Rq(VMCTX);
1869 
1870                 if ctx.module_context.emit_memory_bounds_check() {
1871                     let addr_reg = match runtime_offset {
1872                         Ok(imm) => {
1873                             let addr_reg = ctx.take_reg(I64).unwrap();
1874                             dynasm!(ctx.asm
1875                                 ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64
1876                             );
1877                             addr_reg
1878                         }
1879                         Err(gpr) => {
1880                             if offset == 0 {
1881                                 ctx.clone_to_register(I32, ValueLocation::Reg(gpr))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
1882                             } else if offset > 0 {
1883                                 let addr_reg = ctx.take_reg(I64).unwrap();
1884                                 dynasm!(ctx.asm
1885                                     ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset]
1886                                 );
1887                                 addr_reg
1888                             } else {
1889                                 let addr_reg = ctx.take_reg(I64).unwrap();
1890                                 let offset_reg = ctx.take_reg(I64).unwrap();
1891                                 dynasm!(ctx.asm
1892                                     ; mov Rd(offset_reg.rq().unwrap()), offset
1893                                     ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap())
1894                                     ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap())
1895                                 );
1896                                 ctx.block_state.regs.release(offset_reg)?;
1897                                 addr_reg
1898                             }
1899                         }
1900                     };
1901                     dynasm!(ctx.asm
1902                         ; cmp Rq(addr_reg.rq().unwrap()), [
1903                             Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
1904                                 mem_offset +
1905                                 ctx.module_context.vmmemory_definition_current_length() as i32
1906                         ]
1907                         ;; ctx.trap_if(cc::GE_U, TrapCode::HeapOutOfBounds)
1908                     );
1909                     ctx.block_state.regs.release(addr_reg)?;
1910                 }
1911 
1912                 let mem_ptr_reg = ctx.take_reg(I64).unwrap();
1913                 dynasm!(ctx.asm
1914                     ; mov Rq(mem_ptr_reg.rq().unwrap()), [
1915                         Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
1916                             mem_offset +
1917                             ctx.module_context.vmmemory_definition_base() as i32
1918                     ]
1919                 );
1920                 if let Some(reg) = reg {
1921                     ctx.block_state.regs.release(reg)?;
1922                 }
1923                 $emit_fn(ctx, dst, mem_ptr_reg, runtime_offset, offset)?;
1924                 ctx.block_state.regs.release(mem_ptr_reg)?;
1925                 Ok(())
1926             }
1927 
1928             let base = self.pop()?;
1929 
1930             let temp = self.take_reg($rtype).unwrap();
1931 
1932             match base {
1933                 ValueLocation::Immediate(i) => {
1934                     load_to_reg(self, temp, (offset as _, Ok(i.as_i32().unwrap())))?;
1935                 }
1936                 mut base => {
1937                     let gpr = self.put_into_register(I32, &mut base)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1938                     load_to_reg(self, temp, (offset as _, Err(gpr)))?;
1939                     self.free_value(base)?;
1940                 }
1941             }
1942 
1943             self.push(ValueLocation::Reg(temp))?;
1944             Ok(())
1945         }
1946     };
1947     ($name:ident, $rtype:expr, $reg_ty:tt, NONE, $rq_instr:ident, $ty:ident) => {
1948         load!(@inner
1949             $name,
1950             $rtype,
1951             $reg_ty,
1952             |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32| -> Result<(), Error>  {
1953                 match runtime_offset {
1954                     Ok(imm) => {
1955                         dynasm!(ctx.asm
1956                             ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm]
1957                         );
1958                         Ok(())
1959                     }
1960                     Err(offset_reg) => {
1961                         dynasm!(ctx.asm
1962                             ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset]
1963                         );
1964                         Ok(())
1965                     }
1966                 }
1967             }
1968         );
1969     };
1970     ($name:ident, $rtype:expr, $reg_ty:tt, $xmm_instr:ident, $rq_instr:ident, $ty:ident) => {
1971         load!(@inner
1972             $name,
1973             $rtype,
1974             $reg_ty,
1975             |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32| -> Result<(), Error>  {
1976                 match (dst, runtime_offset) {
1977                     (GPR::Rq(r), Ok(imm)) => {
1978                         dynasm!(ctx.asm
1979                             ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm]
1980                         );
1981                         Ok(())
1982                     }
1983                     (GPR::Rx(r), Ok(imm)) => {
1984                         if let Some(combined) = offset.checked_add(imm) {
1985                             dynasm!(ctx.asm
1986                                 ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + combined]
1987                             );
1988                             Ok(())
1989                         } else {
1990                             let offset_reg = ctx.take_reg(GPRType::Rq).unwrap();
1991                             dynasm!(ctx.asm
1992                                 ; mov Rq(offset_reg.rq().unwrap()), offset
1993                                 ; $xmm_instr Rx(r), $ty [
1994                                     Rq(mem_ptr_reg.rq().unwrap()) +
1995                                     Rq(offset_reg.rq().unwrap()) +
1996                                     imm
1997                                 ]
1998                             );
1999                             ctx.block_state.regs.release(offset_reg)?;
2000                             Ok(())
2001                         }
2002                     }
2003                     (GPR::Rq(r), Err(offset_reg)) => {
2004                         dynasm!(ctx.asm
2005                             ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset]
2006                         );
2007                         Ok(())
2008                     }
2009                     (GPR::Rx(r), Err(offset_reg)) => {
2010                         dynasm!(ctx.asm
2011                             ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset]
2012                         );
2013                         Ok(())
2014                     }
2015                 }
2016             }
2017         );
2018     };
2019 }
2020 
2021 macro_rules! store {
2022     (@inner $name:ident, $int_reg_ty:tt, $match_offset:expr, $size:ident) => {
2023         pub fn $name(&mut self, offset: u32) -> Result<(), Error>{
2024             fn store_from_reg<_M: ModuleContext>(
2025                 ctx: &mut Context<_M>,
2026                 src: GPR,
2027                 (offset, runtime_offset): (i32, Result<i32, GPR>)
2028             ) -> Result<(), Error> {
2029                 let mem_index = 0;
2030                 let reg_offset = ctx.module_context
2031                     .defined_memory_index(mem_index)
2032                     .map(|index| (
2033                         None,
2034                         ctx.module_context.vmctx_vmmemory_definition(index) as i32
2035                     ));
2036                 let (reg, mem_offset) = reg_offset.unwrap_or_else(|| {
2037                     let reg = ctx.take_reg(I64).unwrap();
2038 
2039                     dynasm!(ctx.asm
2040                         ; mov Rq(reg.rq().unwrap()), [
2041                             Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32
2042                         ]
2043                     );
2044 
2045                     (Some(reg), 0)
2046                 });
2047 
2048                 let vmctx = GPR::Rq(VMCTX);
2049 
2050                 if ctx.module_context.emit_memory_bounds_check() {
2051                     let addr_reg = match runtime_offset {
2052                         Ok(imm) => {
2053                             let addr_reg = ctx.take_reg(I64).unwrap();
2054                             dynasm!(ctx.asm
2055                                 ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64
2056                             );
2057                             addr_reg
2058                         }
2059                         Err(gpr) => {
2060                             if offset == 0 {
2061                                 ctx.clone_to_register(I32, ValueLocation::Reg(gpr))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
2062 
2063                             } else if offset > 0 {
2064                                 let addr_reg = ctx.take_reg(I64).unwrap();
2065                                 dynasm!(ctx.asm
2066                                     ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset]
2067                                 );
2068                                 addr_reg
2069                             } else {
2070                                 let addr_reg = ctx.take_reg(I64).unwrap();
2071                                 let offset_reg = ctx.take_reg(I64).unwrap();
2072                                 dynasm!(ctx.asm
2073                                     ; mov Rd(offset_reg.rq().unwrap()), offset
2074                                     ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap())
2075                                     ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap())
2076                                 );
2077                                 ctx.block_state.regs.release(offset_reg)?;
2078                                 addr_reg
2079                             }
2080                         }
2081                     };
2082                     dynasm!(ctx.asm
2083                         ; cmp Rq(addr_reg.rq().unwrap()), [
2084                             Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
2085                                 mem_offset +
2086                                 ctx.module_context.vmmemory_definition_current_length() as i32
2087                         ]
2088                         ;; ctx.trap_if(cc::GE_U, TrapCode::HeapOutOfBounds)
2089                     );
2090                     ctx.block_state.regs.release(addr_reg)?;
2091                 }
2092 
2093                 let mem_ptr_reg = ctx.take_reg(I64).unwrap();
2094                 dynasm!(ctx.asm
2095                     ; mov Rq(mem_ptr_reg.rq().unwrap()), [
2096                         Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
2097                             mem_offset +
2098                             ctx.module_context.vmmemory_definition_base() as i32
2099                     ]
2100                 );
2101                 if let Some(reg) = reg {
2102                     ctx.block_state.regs.release(reg)?;
2103                 }
2104                 let src = $match_offset(ctx, mem_ptr_reg, runtime_offset, offset, src)?;
2105                 ctx.block_state.regs.release(mem_ptr_reg)?;
2106                 ctx.block_state.regs.release(src)?;
2107                 Ok(())
2108             }
2109 
2110             if !(offset <= i32::max_value() as u32) {
2111                 return Err(Error::Microwasm(format!("store: offset value too big {}", offset)))
2112             }
2113 
2114             let mut src = self.pop()?;
2115             let base = self.pop()?;
2116 
2117             // `store_from_reg` frees `src`
2118             // TODO: Would it be better to free it outside `store_from_reg`?
2119             let src_reg = self.put_into_register(None, &mut src)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2120 
2121 
2122             match base {
2123                 ValueLocation::Immediate(i) => {
2124                     store_from_reg(self, src_reg, (offset as i32, Ok(i.as_i32().unwrap())))?
2125                 }
2126                 mut base => {
2127                     let gpr = self.put_into_register(I32, &mut base)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2128                     store_from_reg(self, src_reg, (offset as i32, Err(gpr)))?;
2129                     self.free_value(base)?;
2130                 }
2131             }
2132             Ok(())
2133         }
2134     };
2135     ($name:ident, $int_reg_ty:tt, NONE, $size:ident) => {
2136         store!(@inner
2137             $name,
2138             $int_reg_ty,
2139             |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32, src| -> Result<GPR, Error> {
2140                 let src_reg = ctx.put_into_temp_register(GPRType::Rq, &mut ValueLocation::Reg(src))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2141 
2142                 match runtime_offset {
2143                     Ok(imm) => {
2144                         dynasm!(ctx.asm
2145                             ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(src_reg.rq().unwrap())
2146                         );
2147                     }
2148                     Err(offset_reg) => {
2149                         dynasm!(ctx.asm
2150                             ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(src_reg.rq().unwrap())
2151                         );
2152                     }
2153                 }
2154 
2155                 Ok(src_reg)
2156             },
2157             $size
2158         );
2159     };
2160     ($name:ident, $int_reg_ty:tt, $xmm_instr:ident, $size:ident) => {
2161         store!(@inner
2162             $name,
2163             $int_reg_ty,
2164             |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32, src| -> Result<GPR, Error> {
2165                 match (runtime_offset, src) {
2166                     (Ok(imm), GPR::Rq(r)) => {
2167                         dynasm!(ctx.asm
2168                             ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(r)
2169                         );
2170                     }
2171                     (Ok(imm), GPR::Rx(r)) => {
2172                         dynasm!(ctx.asm
2173                             ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], Rx(r)
2174                         );
2175                     }
2176                     (Err(offset_reg), GPR::Rq(r)) => {
2177                         dynasm!(ctx.asm
2178                             ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(r)
2179                         );
2180                     }
2181                     (Err(offset_reg), GPR::Rx(r)) => {
2182                         dynasm!(ctx.asm
2183                             ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], Rx(r)
2184                         );
2185                     }
2186                 }
2187 
2188                 Ok(src)
2189             },
2190             $size
2191         );
2192     };
2193 }
2194 
2195 #[derive(Debug, Clone, PartialEq, Eq)]
2196 pub struct VirtualCallingConvention {
2197     pub stack: Stack,
2198     pub depth: StackDepth,
2199 }
2200 
2201 impl<'this, M: ModuleContext> Context<'this, M> {
free_reg(&mut self, type_: GPRType) -> Result<bool, Error>2202     fn free_reg(&mut self, type_: GPRType) -> Result<bool, Error> {
2203         let pos = if let Some(pos) = self
2204             .block_state
2205             .stack
2206             .iter()
2207             .position(|r| r.reg().map(|reg| reg.type_() == type_).unwrap_or(false))
2208         {
2209             pos
2210         } else {
2211             return Ok(false);
2212         };
2213 
2214         let old_loc = self.block_state.stack[pos];
2215         let new_loc = self.push_physical(old_loc)?;
2216         self.block_state.stack[pos] = new_loc;
2217 
2218         let reg = old_loc.reg().unwrap();
2219 
2220         for elem in &mut self.block_state.stack[pos + 1..] {
2221             if *elem == old_loc {
2222                 *elem = new_loc;
2223                 self.block_state.regs.release(reg)?;
2224             }
2225         }
2226 
2227         Ok(true)
2228     }
2229 
take_reg(&mut self, r: impl Into<GPRType>) -> Option<GPR>2230     fn take_reg(&mut self, r: impl Into<GPRType>) -> Option<GPR> {
2231         let r = r.into();
2232         loop {
2233             if let Some(gpr) = self.block_state.regs.take(r) {
2234                 break Some(gpr);
2235             }
2236 
2237             if self.free_reg(r) == Ok(false) {
2238                 break None;
2239             }
2240         }
2241     }
2242 
set_source_loc(&mut self, loc: SourceLoc)2243     pub fn set_source_loc(&mut self, loc: SourceLoc) {
2244         self.source_loc = loc;
2245     }
2246 
virtual_calling_convention(&self) -> VirtualCallingConvention2247     pub fn virtual_calling_convention(&self) -> VirtualCallingConvention {
2248         VirtualCallingConvention {
2249             stack: self.block_state.stack.clone(),
2250             depth: self.block_state.depth.clone(),
2251         }
2252     }
2253 
2254     /// Create a new undefined label.
create_label(&mut self) -> Label2255     pub fn create_label(&mut self) -> Label {
2256         Label(self.asm.new_dynamic_label())
2257     }
2258 
adjusted_offset(&self, offset: i32) -> i322259     fn adjusted_offset(&self, offset: i32) -> i32 {
2260         (self.block_state.depth.0 as i32 + offset) * WORD_SIZE as i32
2261     }
2262 
2263     cmp_i32!(i32_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b);
2264     cmp_i32!(i32_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b);
2265     // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous
2266     cmp_i32!(i32_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u32) < (b as u32));
2267     cmp_i32!(i32_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u32)
2268         <= (b as u32));
2269     cmp_i32!(i32_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u32) > (b as u32));
2270     cmp_i32!(i32_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u32)
2271         >= (b as u32));
2272     cmp_i32!(i32_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b);
2273     cmp_i32!(i32_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b);
2274     cmp_i32!(i32_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b);
2275     cmp_i32!(i32_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b);
2276 
2277     cmp_i64!(i64_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b);
2278     cmp_i64!(i64_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b);
2279     // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous
2280     cmp_i64!(i64_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u64) < (b as u64));
2281     cmp_i64!(i64_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u64)
2282         <= (b as u64));
2283     cmp_i64!(i64_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u64) > (b as u64));
2284     cmp_i64!(i64_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u64)
2285         >= (b as u64));
2286     cmp_i64!(i64_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b);
2287     cmp_i64!(i64_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b);
2288     cmp_i64!(i64_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b);
2289     cmp_i64!(i64_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b);
2290 
2291     cmp_f32!(f32_gt, f32_lt, seta, |a, b| a > b);
2292     cmp_f32!(f32_ge, f32_le, setnc, |a, b| a >= b);
2293     eq_float!(
2294         f32_eq,
2295         cmpeqss,
2296         as_f32,
2297         |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) == f32::from_bits(b.to_bits())
2298     );
2299     eq_float!(
2300         f32_ne,
2301         cmpneqss,
2302         as_f32,
2303         |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) != f32::from_bits(b.to_bits())
2304     );
2305 
2306     cmp_f64!(f64_gt, f64_lt, seta, |a, b| a > b);
2307     cmp_f64!(f64_ge, f64_le, setnc, |a, b| a >= b);
2308     eq_float!(
2309         f64_eq,
2310         cmpeqsd,
2311         as_f64,
2312         |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) == f64::from_bits(b.to_bits())
2313     );
2314     eq_float!(
2315         f64_ne,
2316         cmpneqsd,
2317         as_f64,
2318         |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) != f64::from_bits(b.to_bits())
2319     );
2320 
2321     // TODO: Should we do this logic in `eq` and just have this delegate to `eq`?
2322     //       That would mean that `eqz` and `eq` with a const 0 argument don't
2323     //       result in different code. It would also allow us to generate better
2324     //       code for `neq` and `gt_u` with const 0 operand
i32_eqz(&mut self) -> Result<(), Error>2325     pub fn i32_eqz(&mut self) -> Result<(), Error> {
2326         let mut val = self.pop()?;
2327 
2328         if let ValueLocation::Immediate(Value::I32(i)) = val {
2329             self.push(ValueLocation::Immediate(
2330                 (if i == 0 { 1i32 } else { 0 }).into(),
2331             ))?;
2332             return Ok(());
2333         }
2334 
2335         if let ValueLocation::Cond(loc) = val {
2336             self.push(ValueLocation::Cond(!loc))?;
2337             return Ok(());
2338         }
2339 
2340         let reg = self
2341             .put_into_register(I32, &mut val)?
2342             .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2343 
2344         let out = self.take_reg(I32).unwrap();
2345 
2346         dynasm!(self.asm
2347             ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap())
2348             ; test Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap())
2349             ; setz Rb(out.rq().unwrap())
2350         );
2351 
2352         self.free_value(val)?;
2353 
2354         self.push(ValueLocation::Reg(out))?;
2355         Ok(())
2356     }
2357 
i64_eqz(&mut self) -> Result<(), Error>2358     pub fn i64_eqz(&mut self) -> Result<(), Error> {
2359         let mut val = self.pop()?;
2360 
2361         if let ValueLocation::Immediate(Value::I64(i)) = val {
2362             self.push(ValueLocation::Immediate(
2363                 (if i == 0 { 1i32 } else { 0 }).into(),
2364             ))?;
2365             return Ok(());
2366         }
2367 
2368         if let ValueLocation::Cond(loc) = val {
2369             self.push(ValueLocation::Cond(!loc))?;
2370             return Ok(());
2371         }
2372 
2373         let reg = self
2374             .put_into_register(I64, &mut val)?
2375             .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2376 
2377         let out = self.take_reg(I64).unwrap();
2378 
2379         dynasm!(self.asm
2380             ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap())
2381             ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap())
2382             ; setz Rb(out.rq().unwrap())
2383         );
2384 
2385         self.free_value(val)?;
2386 
2387         self.push(ValueLocation::Reg(out))?;
2388         Ok(())
2389     }
2390 
br_on_cond_code(&mut self, label: Label, cond: CondCode)2391     fn br_on_cond_code(&mut self, label: Label, cond: CondCode) {
2392         match cond {
2393             cc::EQUAL => dynasm!(self.asm
2394                 ; je =>label.0
2395             ),
2396             cc::NOT_EQUAL => dynasm!(self.asm
2397                 ; jne =>label.0
2398             ),
2399             cc::GT_U => dynasm!(self.asm
2400                 ; ja =>label.0
2401             ),
2402             cc::GE_U => dynasm!(self.asm
2403                 ; jae =>label.0
2404             ),
2405             cc::LT_U => dynasm!(self.asm
2406                 ; jb =>label.0
2407             ),
2408             cc::LE_U => dynasm!(self.asm
2409                 ; jbe =>label.0
2410             ),
2411             cc::GT_S => dynasm!(self.asm
2412                 ; jg =>label.0
2413             ),
2414             cc::GE_S => dynasm!(self.asm
2415                 ; jge =>label.0
2416             ),
2417             cc::LT_S => dynasm!(self.asm
2418                 ; jl =>label.0
2419             ),
2420             cc::LE_S => dynasm!(self.asm
2421                 ; jle =>label.0
2422             ),
2423         }
2424     }
2425 
2426     /// Pops i32 predicate and branches to the specified label
2427     /// if the predicate is equal to zero.
br_if_false( &mut self, target: impl Into<BrTarget<Label>>, pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, ) -> Result<(), Error>2428     pub fn br_if_false(
2429         &mut self,
2430         target: impl Into<BrTarget<Label>>,
2431         pass_args: impl FnOnce(&mut Self) -> Result<(), Error>,
2432     ) -> Result<(), Error> {
2433         let mut val = self.pop()?;
2434         let label = self.target_to_label(target.into());
2435 
2436         let cond = match val {
2437             ValueLocation::Cond(cc) => !cc,
2438             _ => {
2439                 let predicate = match self.put_into_register(I32, &mut val) {
2440                     Err(e) => return Err(e),
2441                     Ok(o) => {
2442                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
2443                     }
2444                 };
2445 
2446                 dynasm!(self.asm
2447                     ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap())
2448                 );
2449 
2450                 CondCode::ZF0
2451             }
2452         };
2453 
2454         self.free_value(val)?;
2455 
2456         pass_args(self)?;
2457 
2458         self.br_on_cond_code(label, cond);
2459 
2460         Ok(())
2461     }
2462 
2463     /// Pops i32 predicate and branches to the specified label
2464     /// if the predicate is not equal to zero.
br_if_true( &mut self, target: impl Into<BrTarget<Label>>, pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, ) -> Result<(), Error>2465     pub fn br_if_true(
2466         &mut self,
2467         target: impl Into<BrTarget<Label>>,
2468         pass_args: impl FnOnce(&mut Self) -> Result<(), Error>,
2469     ) -> Result<(), Error> {
2470         let mut val = self.pop()?;
2471         let label = self.target_to_label(target.into());
2472 
2473         let cond = match val {
2474             ValueLocation::Cond(cc) => cc,
2475             _ => {
2476                 let predicate = match self.put_into_register(I32, &mut val) {
2477                     Err(e) => return Err(e),
2478                     Ok(o) => {
2479                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
2480                     }
2481                 };
2482 
2483                 dynasm!(self.asm
2484                     ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap())
2485                 );
2486 
2487                 CondCode::ZF1
2488             }
2489         };
2490 
2491         self.free_value(val)?;
2492 
2493         pass_args(self)?;
2494 
2495         self.br_on_cond_code(label, cond);
2496 
2497         Ok(())
2498     }
2499 
2500     /// Branch unconditionally to the specified label.
br(&mut self, label: impl Into<BrTarget<Label>>)2501     pub fn br(&mut self, label: impl Into<BrTarget<Label>>) {
2502         match label.into() {
2503             BrTarget::Return => self.ret(),
2504             BrTarget::Label(label) => dynasm!(self.asm
2505                 ; jmp =>label.0
2506             ),
2507         }
2508     }
2509 
2510     /// If `default` is `None` then the default is just continuing execution
br_table<I>( &mut self, targets: I, default: Option<BrTarget<Label>>, pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, ) -> Result<(), Error> where I: IntoIterator<Item = Option<BrTarget<Label>>>, I::IntoIter: ExactSizeIterator + DoubleEndedIterator,2511     pub fn br_table<I>(
2512         &mut self,
2513         targets: I,
2514         default: Option<BrTarget<Label>>,
2515         pass_args: impl FnOnce(&mut Self) -> Result<(), Error>,
2516     ) -> Result<(), Error>
2517     where
2518         I: IntoIterator<Item = Option<BrTarget<Label>>>,
2519         I::IntoIter: ExactSizeIterator + DoubleEndedIterator,
2520     {
2521         let mut targets = targets.into_iter();
2522         let count = targets.len();
2523 
2524         let mut selector = self.pop()?;
2525 
2526         pass_args(self)?;
2527 
2528         if let Some(imm) = selector.imm_i32() {
2529             if let Some(target) = targets.nth(imm as _).or(Some(default)).and_then(|a| a) {
2530                 match target {
2531                     BrTarget::Label(label) => self.br(label),
2532                     BrTarget::Return => {
2533                         dynasm!(self.asm
2534                             ; ret
2535                         );
2536                     }
2537                 }
2538             }
2539         } else {
2540             let end_label = self.create_label();
2541 
2542             if count > 0 {
2543                 let temp = match self.put_into_temp_register(GPRType::Rq, &mut selector) {
2544                     Err(e) => return Err(e),
2545                     Ok(o) => match o {
2546                         Some(r) => Ok((r, false)),
2547                         None => {
2548                             self.push_physical(ValueLocation::Reg(RAX))?;
2549                             self.block_state.regs.mark_used(RAX);
2550                             Ok((RAX, true))
2551                         }
2552                     },
2553                 };
2554 
2555                 let (selector_reg, pop_selector) = match temp {
2556                     Err(e) => return Err(e),
2557                     Ok(a) => a,
2558                 };
2559 
2560                 let (tmp, pop_tmp) = if let Some(reg) = self.take_reg(I64) {
2561                     (reg, false)
2562                 } else {
2563                     let out_reg = if selector_reg == RAX { RCX } else { RAX };
2564 
2565                     self.push_physical(ValueLocation::Reg(out_reg))?;
2566                     self.block_state.regs.mark_used(out_reg);
2567 
2568                     (out_reg, true)
2569                 };
2570 
2571                 self.immediate_to_reg(tmp, (count as u32).into())?;
2572                 dynasm!(self.asm
2573                     ; cmp Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap())
2574                     ; cmova Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap())
2575                     ; lea Rq(tmp.rq().unwrap()), [>start_label]
2576                     ; lea Rq(selector_reg.rq().unwrap()), [
2577                         Rq(selector_reg.rq().unwrap()) * 5
2578                     ]
2579                     ; add Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap())
2580                 );
2581 
2582                 if pop_tmp {
2583                     dynasm!(self.asm
2584                         ; pop Rq(tmp.rq().unwrap())
2585                     );
2586                 } else {
2587                     self.block_state.regs.release(tmp)?;
2588                 }
2589 
2590                 if pop_selector {
2591                     dynasm!(self.asm
2592                         ; pop Rq(selector_reg.rq().unwrap())
2593                     );
2594                 }
2595 
2596                 dynasm!(self.asm
2597                     ; jmp Rq(selector_reg.rq().unwrap())
2598                 ; start_label:
2599                 );
2600 
2601                 for target in targets {
2602                     let label = target
2603                         .map(|target| self.target_to_label(target))
2604                         .unwrap_or(end_label);
2605                     dynasm!(self.asm
2606                         ; jmp =>label.0
2607                     );
2608                 }
2609             }
2610 
2611             if let Some(def) = default {
2612                 self.br(def);
2613             }
2614 
2615             self.define_label(end_label);
2616         }
2617 
2618         self.free_value(selector)?;
2619         Ok(())
2620     }
2621 
set_stack_depth(&mut self, depth: StackDepth) -> Result<(), Error>2622     fn set_stack_depth(&mut self, depth: StackDepth) -> Result<(), Error> {
2623         if self.block_state.depth.0 != depth.0 {
2624             let diff = depth.0 as i32 - self.block_state.depth.0 as i32;
2625             let emit_lea = if diff.abs() != 1 {
2626                 true
2627             } else {
2628                 match self.block_state.depth.0.cmp(&depth.0) {
2629                     Ordering::Less => {
2630                         for _ in 0..diff {
2631                             dynasm!(self.asm
2632                                 ; push rax
2633                             );
2634                         }
2635                         false
2636                     }
2637                     Ordering::Greater => {
2638                         if let Some(trash) = self.take_reg(I64) {
2639                             for _ in 0..self.block_state.depth.0 - depth.0 {
2640                                 dynasm!(self.asm
2641                                     ; pop Rq(trash.rq().unwrap())
2642                                 );
2643                             }
2644                             self.block_state.regs.release(trash)?;
2645                             false
2646                         } else {
2647                             true
2648                         }
2649                     }
2650                     Ordering::Equal => false,
2651                 }
2652             };
2653             if emit_lea {
2654                 dynasm!(self.asm
2655                     ; lea rsp, [rsp + (self.block_state.depth.0 as i32 - depth.0 as i32) * WORD_SIZE as i32]
2656                 );
2657             }
2658             self.block_state.depth = depth;
2659         }
2660         Ok(())
2661     }
2662 
do_pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error>2663     fn do_pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error> {
2664         let args = &cc.arguments;
2665         for &dst in args.iter().rev().take(self.block_state.stack.len()) {
2666             if let CCLoc::Reg(r) = dst {
2667                 if !self.block_state.regs.is_free(r)
2668                     && *self.block_state.stack.last().unwrap() != ValueLocation::Reg(r)
2669                 {
2670                     // TODO: This would be made simpler and more efficient with a proper SSE
2671                     //       representation.
2672                     self.save_regs(std::iter::once(r))?;
2673                 }
2674 
2675                 self.block_state.regs.mark_used(r);
2676             }
2677             self.pop_into(dst)?;
2678         }
2679         Ok(())
2680     }
2681 
pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error>2682     pub fn pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error> {
2683         self.do_pass_block_args(cc)?;
2684         self.set_stack_depth(cc.stack_depth.clone())?;
2685         Ok(())
2686     }
2687 
serialize_block_args( &mut self, cc: &BlockCallingConvention, params: u32, ) -> Result<BlockCallingConvention, Error>2688     pub fn serialize_block_args(
2689         &mut self,
2690         cc: &BlockCallingConvention,
2691         params: u32,
2692     ) -> Result<BlockCallingConvention, Error> {
2693         self.do_pass_block_args(cc)?;
2694 
2695         let mut out_args = cc.arguments.clone();
2696 
2697         out_args.reverse();
2698 
2699         while out_args.len() < params as usize {
2700             let mut val = self.pop()?;
2701 
2702             // TODO: We can use stack slots for values already on the stack but we
2703             //       don't refcount stack slots right now
2704             let ccloc = self.put_into_temp_location(None, &mut val)?;
2705             out_args.push(ccloc);
2706         }
2707 
2708         out_args.reverse();
2709 
2710         self.set_stack_depth(cc.stack_depth.clone())?;
2711 
2712         Ok(BlockCallingConvention {
2713             stack_depth: cc.stack_depth.clone(),
2714             arguments: out_args,
2715         })
2716     }
2717 
2718     /// Puts all stack values into "real" locations so that they can i.e. be set to different
2719     /// values on different iterations of a loop
serialize_args(&mut self, count: u32) -> Result<BlockCallingConvention, Error>2720     pub fn serialize_args(&mut self, count: u32) -> Result<BlockCallingConvention, Error> {
2721         let mut out = Vec::with_capacity(count as _);
2722 
2723         // TODO: We can make this more efficient now that `pop` isn't so complicated
2724         for _ in 0..count {
2725             let mut val = self.pop()?;
2726             // TODO: We can use stack slots for values already on the stack but we
2727             //       don't refcount stack slots right now
2728             let loc = self.put_into_temp_location(None, &mut val)?;
2729 
2730             out.push(loc);
2731         }
2732 
2733         out.reverse();
2734 
2735         Ok(BlockCallingConvention {
2736             stack_depth: self.block_state.depth.clone(),
2737             arguments: out,
2738         })
2739     }
2740 
get_global(&mut self, global_idx: u32) -> Result<(), Error>2741     pub fn get_global(&mut self, global_idx: u32) -> Result<(), Error> {
2742         let (reg, offset) = self
2743             .module_context
2744             .defined_global_index(global_idx)
2745             .map(|defined_global_index| {
2746                 (
2747                     None,
2748                     self.module_context
2749                         .vmctx_vmglobal_definition(defined_global_index),
2750                 )
2751             })
2752             .unwrap_or_else(|| {
2753                 let reg = self.take_reg(I64).unwrap();
2754 
2755                 dynasm!(self.asm
2756                     ; mov Rq(reg.rq().unwrap()), [
2757                         Rq(VMCTX) +
2758                             self.module_context.vmctx_vmglobal_import_from(global_idx) as i32
2759                     ]
2760                 );
2761 
2762                 (Some(reg), 0)
2763             });
2764 
2765         let out = self.take_reg(GPRType::Rq).unwrap();
2766         let vmctx = GPR::Rq(VMCTX);
2767 
2768         // TODO: Are globals necessarily aligned to 128 bits? We can load directly to an XMM reg if so
2769         dynasm!(self.asm
2770             ; mov Rq(out.rq().unwrap()), [Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32]
2771         );
2772 
2773         if let Some(reg) = reg {
2774             self.block_state.regs.release(reg)?;
2775         }
2776 
2777         self.push(ValueLocation::Reg(out))?;
2778         Ok(())
2779     }
2780 
set_global(&mut self, global_idx: u32) -> Result<(), Error>2781     pub fn set_global(&mut self, global_idx: u32) -> Result<(), Error> {
2782         let mut val = self.pop()?;
2783         let (reg, offset) = self
2784             .module_context
2785             .defined_global_index(global_idx)
2786             .map(|defined_global_index| {
2787                 (
2788                     None,
2789                     self.module_context
2790                         .vmctx_vmglobal_definition(defined_global_index),
2791                 )
2792             })
2793             .unwrap_or_else(|| {
2794                 let reg = self.take_reg(I64).unwrap();
2795 
2796                 dynasm!(self.asm
2797                     ; mov Rq(reg.rq().unwrap()), [
2798                         Rq(VMCTX) +
2799                             self.module_context.vmctx_vmglobal_import_from(global_idx) as i32
2800                     ]
2801                 );
2802 
2803                 (Some(reg), 0)
2804             });
2805 
2806         let val_reg = self
2807             .put_into_register(GPRType::Rq, &mut val)?
2808             .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2809         let vmctx = GPR::Rq(VMCTX);
2810 
2811         // We always use `Rq` (even for floats) since the globals are not necessarily aligned to 128 bits
2812         dynasm!(self.asm
2813             ; mov [
2814                 Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32
2815             ], Rq(val_reg.rq().unwrap())
2816         );
2817 
2818         if let Some(reg) = reg {
2819             self.block_state.regs.release(reg)?;
2820         }
2821 
2822         self.free_value(val)?;
2823         Ok(())
2824     }
2825 
immediate_to_reg(&mut self, reg: GPR, val: Value) -> Result<(), Error>2826     fn immediate_to_reg(&mut self, reg: GPR, val: Value) -> Result<(), Error> {
2827         match reg {
2828             GPR::Rq(r) => {
2829                 let val = val.as_bytes();
2830                 if (val as u64) <= u32::max_value() as u64 {
2831                     dynasm!(self.asm
2832                         ; mov Rd(r), val as i32
2833                     );
2834                 } else {
2835                     dynasm!(self.asm
2836                         ; mov Rq(r), QWORD val
2837                     );
2838                 }
2839             }
2840             reg @ GPR::Rx(_) => {
2841                 let tmp = self
2842                     .take_reg(GPRType::Rq)
2843                     .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2844                 self.immediate_to_reg(tmp, val)?;
2845                 let tmp = ValueLocation::Reg(tmp);
2846                 self.copy_value(tmp, CCLoc::Reg(reg))?;
2847                 self.free_value(tmp)?;
2848             }
2849         }
2850 
2851         Ok(())
2852     }
2853 
2854     // The `&` and `&mut` aren't necessary (`ValueLocation` is copy) but it ensures that we don't get
2855     // the arguments the wrong way around. In the future we want to have a `ReadLocation` and `WriteLocation`
2856     // so we statically can't write to a literal so this will become a non-issue.
copy_value(&mut self, src: ValueLocation, dst: CCLoc) -> Result<(), Error>2857     fn copy_value(&mut self, src: ValueLocation, dst: CCLoc) -> Result<(), Error> {
2858         match (src, dst) {
2859             (ValueLocation::Cond(cond), CCLoc::Stack(o)) => {
2860                 let offset = self.adjusted_offset(o);
2861 
2862                 self.copy_value(ValueLocation::Immediate(0u64.into()), dst)?;
2863 
2864                 match cond {
2865                     cc::EQUAL => dynasm!(self.asm
2866                         ; sete [rsp + offset]
2867                     ),
2868                     cc::NOT_EQUAL => dynasm!(self.asm
2869                         ; setne [rsp + offset]
2870                     ),
2871                     cc::GT_U => dynasm!(self.asm
2872                         ; seta [rsp + offset]
2873                     ),
2874                     cc::GE_U => dynasm!(self.asm
2875                         ; setae [rsp + offset]
2876                     ),
2877                     cc::LT_U => dynasm!(self.asm
2878                         ; setb [rsp + offset]
2879                     ),
2880                     cc::LE_U => dynasm!(self.asm
2881                         ; setbe [rsp + offset]
2882                     ),
2883                     cc::GT_S => dynasm!(self.asm
2884                         ; setg [rsp + offset]
2885                     ),
2886                     cc::GE_S => dynasm!(self.asm
2887                         ; setge [rsp + offset]
2888                     ),
2889                     cc::LT_S => dynasm!(self.asm
2890                         ; setl [rsp + offset]
2891                     ),
2892                     cc::LE_S => dynasm!(self.asm
2893                         ; setle [rsp + offset]
2894                     ),
2895                 }
2896             }
2897             (ValueLocation::Cond(cond), CCLoc::Reg(reg)) => match reg {
2898                 GPR::Rq(r) => {
2899                     self.copy_value(ValueLocation::Immediate(0u64.into()), dst)?;
2900 
2901                     match cond {
2902                         cc::EQUAL => dynasm!(self.asm
2903                             ; sete Rb(r)
2904                         ),
2905                         cc::NOT_EQUAL => dynasm!(self.asm
2906                             ; setne Rb(r)
2907                         ),
2908                         cc::GT_U => dynasm!(self.asm
2909                             ; seta Rb(r)
2910                         ),
2911                         cc::GE_U => dynasm!(self.asm
2912                             ; setae Rb(r)
2913                         ),
2914                         cc::LT_U => dynasm!(self.asm
2915                             ; setb Rb(r)
2916                         ),
2917                         cc::LE_U => dynasm!(self.asm
2918                             ; setbe Rb(r)
2919                         ),
2920                         cc::GT_S => dynasm!(self.asm
2921                             ; setg Rb(r)
2922                         ),
2923                         cc::GE_S => dynasm!(self.asm
2924                             ; setge Rb(r)
2925                         ),
2926                         cc::LT_S => dynasm!(self.asm
2927                             ; setl Rb(r)
2928                         ),
2929                         cc::LE_S => dynasm!(self.asm
2930                             ; setle Rb(r)
2931                         ),
2932                     }
2933                 }
2934                 GPR::Rx(_) => {
2935                     let temp = CCLoc::Reg(self.take_reg(I32).unwrap());
2936                     self.copy_value(src, temp)?;
2937                     let temp = temp.into();
2938                     self.copy_value(temp, dst)?;
2939                     self.free_value(temp)?;
2940                 }
2941             },
2942             (ValueLocation::Stack(in_offset), CCLoc::Stack(out_offset)) => {
2943                 let in_offset = self.adjusted_offset(in_offset);
2944                 let out_offset = self.adjusted_offset(out_offset);
2945                 if in_offset != out_offset {
2946                     if let Some(gpr) = self.take_reg(I64) {
2947                         dynasm!(self.asm
2948                             ; mov Rq(gpr.rq().unwrap()), [rsp + in_offset]
2949                             ; mov [rsp + out_offset], Rq(gpr.rq().unwrap())
2950                         );
2951                         self.block_state.regs.release(gpr)?;
2952                     } else {
2953                         dynasm!(self.asm
2954                             ; push rax
2955                             ; mov rax, [rsp + in_offset + WORD_SIZE as i32]
2956                             ; mov [rsp + out_offset + WORD_SIZE as i32], rax
2957                             ; pop rax
2958                         );
2959                     }
2960                 }
2961             }
2962             // TODO: XMM registers
2963             (ValueLocation::Reg(in_reg), CCLoc::Stack(out_offset)) => {
2964                 let out_offset = self.adjusted_offset(out_offset);
2965                 match in_reg {
2966                     GPR::Rq(in_reg) => {
2967                         // We can always use `Rq` here for now because stack slots are in multiples of
2968                         // 8 bytes
2969                         dynasm!(self.asm
2970                             ; mov [rsp + out_offset], Rq(in_reg)
2971                         );
2972                     }
2973                     GPR::Rx(in_reg) => {
2974                         // We can always use `movq` here for now because stack slots are in multiples of
2975                         // 8 bytes
2976                         dynasm!(self.asm
2977                             ; movq [rsp + out_offset], Rx(in_reg)
2978                         );
2979                     }
2980                 }
2981             }
2982             (ValueLocation::Immediate(i), CCLoc::Stack(out_offset)) => {
2983                 // TODO: Floats
2984                 let i = i.as_bytes();
2985                 let out_offset = self.adjusted_offset(out_offset);
2986                 if let Some(scratch) = self.take_reg(I64) {
2987                     dynasm!(self.asm
2988                         ; mov Rq(scratch.rq().unwrap()), QWORD i
2989                         ; mov [rsp + out_offset], Rq(scratch.rq().unwrap())
2990                     );
2991 
2992                     self.block_state.regs.release(scratch)?;
2993                 } else {
2994                     dynasm!(self.asm
2995                         ; push rax
2996                         ; mov rax, QWORD i
2997                         ; mov [rsp + out_offset + WORD_SIZE as i32], rax
2998                         ; pop rax
2999                     );
3000                 }
3001             }
3002             (ValueLocation::Stack(in_offset), CCLoc::Reg(out_reg)) => {
3003                 let in_offset = self.adjusted_offset(in_offset);
3004                 match out_reg {
3005                     GPR::Rq(out_reg) => {
3006                         // We can always use `Rq` here for now because stack slots are in multiples of
3007                         // 8 bytes
3008                         dynasm!(self.asm
3009                             ; mov Rq(out_reg), [rsp + in_offset]
3010                         );
3011                     }
3012                     GPR::Rx(out_reg) => {
3013                         // We can always use `movq` here for now because stack slots are in multiples of
3014                         // 8 bytes
3015                         dynasm!(self.asm
3016                             ; movq Rx(out_reg), [rsp + in_offset]
3017                         );
3018                     }
3019                 }
3020             }
3021             (ValueLocation::Reg(in_reg), CCLoc::Reg(out_reg)) => {
3022                 if in_reg != out_reg {
3023                     match (in_reg, out_reg) {
3024                         (GPR::Rq(in_reg), GPR::Rq(out_reg)) => {
3025                             dynasm!(self.asm
3026                                 ; mov Rq(out_reg), Rq(in_reg)
3027                             );
3028                         }
3029                         (GPR::Rx(in_reg), GPR::Rq(out_reg)) => {
3030                             dynasm!(self.asm
3031                                 ; movq Rq(out_reg), Rx(in_reg)
3032                             );
3033                         }
3034                         (GPR::Rq(in_reg), GPR::Rx(out_reg)) => {
3035                             dynasm!(self.asm
3036                                 ; movq Rx(out_reg), Rq(in_reg)
3037                             );
3038                         }
3039                         (GPR::Rx(in_reg), GPR::Rx(out_reg)) => {
3040                             dynasm!(self.asm
3041                                 ; movapd Rx(out_reg), Rx(in_reg)
3042                             );
3043                         }
3044                     }
3045                 }
3046             }
3047             (ValueLocation::Immediate(i), CCLoc::Reg(out_reg)) => {
3048                 // TODO: Floats
3049                 self.immediate_to_reg(out_reg, i)?;
3050             }
3051         }
3052         Ok(())
3053     }
3054 
3055     /// Define the given label at the current position.
3056     ///
3057     /// Multiple labels can be defined at the same position. However, a label
3058     /// can be defined only once.
define_label(&mut self, label: Label)3059     pub fn define_label(&mut self, label: Label) {
3060         self.asm.dynamic_label(label.0);
3061     }
3062 
set_state(&mut self, state: VirtualCallingConvention) -> Result<(), Error>3063     pub fn set_state(&mut self, state: VirtualCallingConvention) -> Result<(), Error> {
3064         self.block_state.regs = Registers::new();
3065         self.block_state.regs.release_scratch_register()?;
3066         for elem in &state.stack {
3067             if let ValueLocation::Reg(r) = elem {
3068                 self.block_state.regs.mark_used(*r);
3069             }
3070         }
3071         self.block_state.stack = state.stack;
3072         self.block_state.depth = state.depth;
3073         Ok(())
3074     }
3075 
apply_cc( &mut self, cc: BlockCallingConvention<impl IntoIterator<Item = CCLoc>>, ) -> Result<(), Error>3076     pub fn apply_cc(
3077         &mut self,
3078         cc: BlockCallingConvention<impl IntoIterator<Item = CCLoc>>,
3079     ) -> Result<(), Error> {
3080         let stack = cc.arguments.into_iter();
3081 
3082         self.block_state.stack = Vec::with_capacity(stack.size_hint().0);
3083         self.block_state.regs = Registers::new();
3084         self.block_state.regs.release_scratch_register()?;
3085 
3086         for elem in stack {
3087             if let CCLoc::Reg(r) = elem {
3088                 self.block_state.regs.mark_used(r);
3089             }
3090 
3091             self.block_state.stack.push(elem.into());
3092         }
3093 
3094         self.block_state.depth = cc.stack_depth;
3095         Ok(())
3096     }
3097 
3098     load!(i32_load, GPRType::Rq, Rd, movd, mov, DWORD);
3099     load!(i64_load, GPRType::Rq, Rq, movq, mov, QWORD);
3100     load!(f32_load, GPRType::Rx, Rd, movd, mov, DWORD);
3101     load!(f64_load, GPRType::Rx, Rq, movq, mov, QWORD);
3102 
3103     load!(i32_load8_u, GPRType::Rq, Rd, NONE, movzx, BYTE);
3104     load!(i32_load8_s, GPRType::Rq, Rd, NONE, movsx, BYTE);
3105     load!(i32_load16_u, GPRType::Rq, Rd, NONE, movzx, WORD);
3106     load!(i32_load16_s, GPRType::Rq, Rd, NONE, movsx, WORD);
3107 
3108     load!(i64_load8_u, GPRType::Rq, Rq, NONE, movzx, BYTE);
3109     load!(i64_load8_s, GPRType::Rq, Rq, NONE, movsx, BYTE);
3110     load!(i64_load16_u, GPRType::Rq, Rq, NONE, movzx, WORD);
3111     load!(i64_load16_s, GPRType::Rq, Rq, NONE, movsx, WORD);
3112     load!(i64_load32_u, GPRType::Rq, Rd, movd, mov, DWORD);
3113     load!(i64_load32_s, GPRType::Rq, Rq, NONE, movsxd, DWORD);
3114 
3115     store!(store8, Rb, NONE, DWORD);
3116     store!(store16, Rw, NONE, QWORD);
3117     store!(store32, Rd, movd, DWORD);
3118     store!(store64, Rq, movq, QWORD);
3119 
push_physical(&mut self, mut value: ValueLocation) -> Result<ValueLocation, Error>3120     fn push_physical(&mut self, mut value: ValueLocation) -> Result<ValueLocation, Error> {
3121         let out_offset = -(self.block_state.depth.0 as i32 + 1);
3122         match value {
3123             ValueLocation::Reg(_) | ValueLocation::Immediate(_) | ValueLocation::Cond(_) => {
3124                 if let Some(gpr) = self.put_into_register(GPRType::Rq, &mut value)? {
3125                     dynasm!(self.asm
3126                         ; push Rq(gpr.rq().unwrap())
3127                     );
3128                 } else {
3129                     dynasm!(self.asm
3130                         ; push rax
3131                     );
3132 
3133                     self.copy_value(value, CCLoc::Stack(out_offset))?;
3134                 }
3135 
3136                 self.free_value(value)?;
3137             }
3138             ValueLocation::Stack(o) => {
3139                 let offset = self.adjusted_offset(o);
3140                 dynasm!(self.asm
3141                     ; push QWORD [rsp + offset]
3142                 );
3143             }
3144         }
3145 
3146         self.block_state.depth.reserve(1);
3147 
3148         Ok(ValueLocation::Stack(out_offset))
3149     }
3150 
push(&mut self, value: ValueLocation) -> Result<(), Error>3151     fn push(&mut self, value: ValueLocation) -> Result<(), Error> {
3152         if let Some(mut top) = self.block_state.stack.pop() {
3153             if let ValueLocation::Cond(_) = top {
3154                 match self.put_into_register(I32, &mut top) {
3155                     Err(e) => return Err(e),
3156                     Ok(o) => {
3157                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3158                     }
3159                 };
3160             }
3161 
3162             self.block_state.stack.push(top);
3163         }
3164 
3165         self.block_state.stack.push(value);
3166         Ok(())
3167     }
3168 
pop(&mut self) -> Result<ValueLocation, Error>3169     fn pop(&mut self) -> Result<ValueLocation, Error> {
3170         match self.block_state.stack.pop() {
3171             Some(v) => Ok(v),
3172             None => Err(Error::Microwasm(
3173                 "Stack is empty - pop impossible".to_string(),
3174             )),
3175         }
3176     }
3177 
drop(&mut self, range: RangeInclusive<u32>) -> Result<(), Error>3178     pub fn drop(&mut self, range: RangeInclusive<u32>) -> Result<(), Error> {
3179         let mut repush = Vec::with_capacity(*range.start() as _);
3180 
3181         for _ in 0..*range.start() {
3182             let v = self.pop()?;
3183             repush.push(v);
3184         }
3185 
3186         for _ in range {
3187             let val = self.pop()?;
3188             self.free_value(val)?;
3189         }
3190 
3191         for v in repush.into_iter().rev() {
3192             self.push(v)?;
3193         }
3194         Ok(())
3195     }
3196 
pop_into(&mut self, dst: CCLoc) -> Result<(), Error>3197     fn pop_into(&mut self, dst: CCLoc) -> Result<(), Error> {
3198         let val = self.pop()?;
3199         self.copy_value(val, dst)?;
3200         self.free_value(val)?;
3201         Ok(())
3202     }
3203 
free_value(&mut self, val: ValueLocation) -> Result<(), Error>3204     fn free_value(&mut self, val: ValueLocation) -> Result<(), Error> {
3205         if let ValueLocation::Reg(r) = val {
3206             self.block_state.regs.release(r)?;
3207         }
3208         Ok(())
3209     }
3210 
3211     /// Puts this value into a register so that it can be efficiently read
put_into_register( &mut self, ty: impl Into<Option<GPRType>>, val: &mut ValueLocation, ) -> Result<Option<GPR>, Error>3212     fn put_into_register(
3213         &mut self,
3214         ty: impl Into<Option<GPRType>>,
3215         val: &mut ValueLocation,
3216     ) -> Result<Option<GPR>, Error> {
3217         if let Some(out) = self.clone_to_register(ty, *val)? {
3218             self.free_value(*val)?;
3219             *val = ValueLocation::Reg(out);
3220             Ok(Some(out))
3221         } else {
3222             Ok(None)
3223         }
3224     }
3225 
3226     /// Clones this value into a register so that it can be efficiently read
clone_to_register( &mut self, ty: impl Into<Option<GPRType>>, val: ValueLocation, ) -> Result<Option<GPR>, Error>3227     fn clone_to_register(
3228         &mut self,
3229         ty: impl Into<Option<GPRType>>,
3230         val: ValueLocation,
3231     ) -> Result<Option<GPR>, Error> {
3232         let ty = ty.into();
3233         match val {
3234             ValueLocation::Reg(r) if ty.map(|t| t == r.type_()).unwrap_or(true) => {
3235                 self.block_state.regs.mark_used(r);
3236                 Ok(Some(r))
3237             }
3238             val => match self.take_reg(ty.unwrap_or(GPRType::Rq)) {
3239                 Some(scratch) => {
3240                     self.copy_value(val, CCLoc::Reg(scratch))?;
3241                     Ok(Some(scratch))
3242                 }
3243                 None => Ok(None),
3244             },
3245         }
3246     }
3247 
3248     /// Puts this value into a temporary register so that operations
3249     /// on that register don't write to a local.
put_into_temp_register( &mut self, ty: impl Into<Option<GPRType>>, val: &mut ValueLocation, ) -> Result<Option<GPR>, Error>3250     fn put_into_temp_register(
3251         &mut self,
3252         ty: impl Into<Option<GPRType>>,
3253         val: &mut ValueLocation,
3254     ) -> Result<Option<GPR>, Error> {
3255         let out = self.clone_to_temp_register(ty, *val)?;
3256         if let Some(o) = out {
3257             self.free_value(*val)?;
3258             *val = ValueLocation::Reg(o);
3259             Ok(Some(o))
3260         } else {
3261             Ok(None)
3262         }
3263     }
3264 
put_into_temp_location( &mut self, ty: impl Into<Option<GPRType>>, val: &mut ValueLocation, ) -> Result<CCLoc, Error>3265     fn put_into_temp_location(
3266         &mut self,
3267         ty: impl Into<Option<GPRType>>,
3268         val: &mut ValueLocation,
3269     ) -> Result<CCLoc, Error> {
3270         if let Some(gpr) = self.put_into_temp_register(ty, val)? {
3271             Ok(CCLoc::Reg(gpr))
3272         } else {
3273             let out = CCLoc::Stack(self.push_physical(*val)?.stack().unwrap());
3274             *val = out.into();
3275             Ok(out)
3276         }
3277     }
3278 
3279     /// Clones this value into a temporary register so that operations
3280     /// on that register don't write to a local.
3281 
clone_to_temp_register( &mut self, ty: impl Into<Option<GPRType>>, val: ValueLocation, ) -> Result<Option<GPR>, Error>3282     fn clone_to_temp_register(
3283         &mut self,
3284         ty: impl Into<Option<GPRType>>,
3285         val: ValueLocation,
3286     ) -> Result<Option<GPR>, Error> {
3287         // If we have `None` as the type then it always matches (`.unwrap_or(true)`)
3288         match val {
3289             ValueLocation::Reg(r) => {
3290                 let ty = ty.into();
3291                 let type_matches = ty.map(|t| t == r.type_()).unwrap_or(true);
3292 
3293                 if self.block_state.regs.num_usages(r) <= 1 && type_matches {
3294                     self.block_state.regs.mark_used(r);
3295                     Ok(Some(r))
3296                 } else if let Some(scratch) = self.take_reg(ty.unwrap_or(GPRType::Rq)) {
3297                     self.copy_value(val, CCLoc::Reg(scratch))?;
3298                     Ok(Some(scratch))
3299                 } else {
3300                     Ok(None)
3301                 }
3302             }
3303             val => self.clone_to_register(ty, val),
3304         }
3305     }
3306 
f32_neg(&mut self) -> Result<(), Error>3307     pub fn f32_neg(&mut self) -> Result<(), Error> {
3308         let mut val = self.pop()?;
3309 
3310         let out = if let Some(i) = val.imm_f32() {
3311             ValueLocation::Immediate(
3312                 Ieee32::from_bits((-f32::from_bits(i.to_bits())).to_bits()).into(),
3313             )
3314         } else {
3315             let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3316                 Err(e) => return Err(e),
3317                 Ok(o) => {
3318                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3319                 }
3320             };
3321             let const_label = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32));
3322 
3323             dynasm!(self.asm
3324                 ; xorps Rx(reg.rx().unwrap()), [=>const_label.0]
3325             );
3326 
3327             val
3328         };
3329 
3330         self.push(out)?;
3331         Ok(())
3332     }
3333 
f64_neg(&mut self) -> Result<(), Error>3334     pub fn f64_neg(&mut self) -> Result<(), Error> {
3335         let mut val = self.pop()?;
3336 
3337         let out = if let Some(i) = val.imm_f64() {
3338             ValueLocation::Immediate(
3339                 Ieee64::from_bits((-f64::from_bits(i.to_bits())).to_bits()).into(),
3340             )
3341         } else {
3342             let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3343                 Err(e) => return Err(e),
3344                 Ok(o) => {
3345                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3346                 }
3347             };
3348             let const_label = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
3349 
3350             dynasm!(self.asm
3351                 ; xorpd Rx(reg.rx().unwrap()), [=>const_label.0]
3352             );
3353 
3354             val
3355         };
3356 
3357         self.push(out)?;
3358         Ok(())
3359     }
3360 
f32_abs(&mut self) -> Result<(), Error>3361     pub fn f32_abs(&mut self) -> Result<(), Error> {
3362         let mut val = self.pop()?;
3363 
3364         let out = if let Some(i) = val.imm_f32() {
3365             ValueLocation::Immediate(
3366                 Ieee32::from_bits(f32::from_bits(i.to_bits()).abs().to_bits()).into(),
3367             )
3368         } else {
3369             let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3370                 Err(e) => return Err(e),
3371                 Ok(o) => {
3372                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3373                 }
3374             };
3375             let const_label = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32));
3376 
3377             dynasm!(self.asm
3378                 ; andps Rx(reg.rx().unwrap()), [=>const_label.0]
3379             );
3380 
3381             val
3382         };
3383 
3384         self.push(out)?;
3385         Ok(())
3386     }
3387 
f64_abs(&mut self) -> Result<(), Error>3388     pub fn f64_abs(&mut self) -> Result<(), Error> {
3389         let mut val = self.pop()?;
3390 
3391         let out = if let Some(i) = val.imm_f64() {
3392             ValueLocation::Immediate(
3393                 Ieee64::from_bits(f64::from_bits(i.to_bits()).abs().to_bits()).into(),
3394             )
3395         } else {
3396             let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3397                 Err(e) => return Err(e),
3398                 Ok(o) => {
3399                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3400                 }
3401             };
3402 
3403             let const_label = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64));
3404 
3405             dynasm!(self.asm
3406                 ; andps Rx(reg.rx().unwrap()), [=>const_label.0]
3407             );
3408 
3409             val
3410         };
3411 
3412         self.push(out)?;
3413         Ok(())
3414     }
3415 
f32_sqrt(&mut self) -> Result<(), Error>3416     pub fn f32_sqrt(&mut self) -> Result<(), Error> {
3417         let mut val = self.pop()?;
3418 
3419         let out = if let Some(i) = val.imm_f32() {
3420             ValueLocation::Immediate(
3421                 Ieee32::from_bits(f32::from_bits(i.to_bits()).sqrt().to_bits()).into(),
3422             )
3423         } else {
3424             let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3425                 Err(e) => return Err(e),
3426                 Ok(o) => {
3427                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3428                 }
3429             };
3430 
3431             dynasm!(self.asm
3432                 ; sqrtss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
3433             );
3434 
3435             val
3436         };
3437 
3438         self.push(out)?;
3439         Ok(())
3440     }
3441 
f64_sqrt(&mut self) -> Result<(), Error>3442     pub fn f64_sqrt(&mut self) -> Result<(), Error> {
3443         let mut val = self.pop()?;
3444 
3445         let out = if let Some(i) = val.imm_f64() {
3446             ValueLocation::Immediate(
3447                 Ieee64::from_bits(f64::from_bits(i.to_bits()).sqrt().to_bits()).into(),
3448             )
3449         } else {
3450             let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3451                 Err(e) => return Err(e),
3452                 Ok(o) => {
3453                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3454                 }
3455             };
3456 
3457             dynasm!(self.asm
3458                 ; sqrtsd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
3459             );
3460 
3461             ValueLocation::Reg(reg)
3462         };
3463 
3464         self.push(out)?;
3465         Ok(())
3466     }
3467 
f32_copysign(&mut self) -> Result<(), Error>3468     pub fn f32_copysign(&mut self) -> Result<(), Error> {
3469         let mut right = self.pop()?;
3470         let mut left = self.pop()?;
3471 
3472         let out = if let (Some(left), Some(right)) = (left.imm_f32(), right.imm_f32()) {
3473             ValueLocation::Immediate(
3474                 Ieee32::from_bits(
3475                     (left.to_bits() & REST_MASK_F32) | (right.to_bits() & SIGN_MASK_F32),
3476                 )
3477                 .into(),
3478             )
3479         } else {
3480             let lreg = match self.put_into_temp_register(GPRType::Rx, &mut left) {
3481                 Err(e) => return Err(e),
3482                 Ok(o) => {
3483                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3484                 }
3485             };
3486             let rreg = match self.put_into_register(GPRType::Rx, &mut right) {
3487                 Err(e) => return Err(e),
3488                 Ok(o) => {
3489                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3490                 }
3491             };
3492 
3493             let sign_mask = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32));
3494             let rest_mask = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32));
3495 
3496             dynasm!(self.asm
3497                 ; andps Rx(rreg.rx().unwrap()), [=>sign_mask.0]
3498                 ; andps Rx(lreg.rx().unwrap()), [=>rest_mask.0]
3499                 ; orps  Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
3500             );
3501 
3502             self.free_value(right)?;
3503 
3504             left
3505         };
3506 
3507         self.push(out)?;
3508         Ok(())
3509     }
3510 
f64_copysign(&mut self) -> Result<(), Error>3511     pub fn f64_copysign(&mut self) -> Result<(), Error> {
3512         let mut right = self.pop()?;
3513         let mut left = self.pop()?;
3514 
3515         let out = if let (Some(left), Some(right)) = (left.imm_f64(), right.imm_f64()) {
3516             ValueLocation::Immediate(
3517                 Ieee64::from_bits(
3518                     (left.to_bits() & REST_MASK_F64) | (right.to_bits() & SIGN_MASK_F64),
3519                 )
3520                 .into(),
3521             )
3522         } else {
3523             let lreg = match self.put_into_temp_register(GPRType::Rx, &mut left) {
3524                 Err(e) => return Err(e),
3525                 Ok(o) => {
3526                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3527                 }
3528             };
3529             let rreg = match self.put_into_register(GPRType::Rx, &mut right) {
3530                 Err(e) => return Err(e),
3531                 Ok(o) => {
3532                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3533                 }
3534             };
3535 
3536             let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
3537             let rest_mask = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64));
3538 
3539             dynasm!(self.asm
3540                 ; andpd Rx(rreg.rx().unwrap()), [=>sign_mask.0]
3541                 ; andpd Rx(lreg.rx().unwrap()), [=>rest_mask.0]
3542                 ; orpd  Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
3543             );
3544 
3545             self.free_value(right)?;
3546 
3547             left
3548         };
3549 
3550         self.push(out)?;
3551         Ok(())
3552     }
3553 
i32_clz(&mut self) -> Result<(), Error>3554     pub fn i32_clz(&mut self) -> Result<(), Error> {
3555         let mut val = self.pop()?;
3556 
3557         let out_val = match val {
3558             ValueLocation::Immediate(imm) => {
3559                 ValueLocation::Immediate(imm.as_i32().unwrap().leading_zeros().into())
3560             }
3561             ValueLocation::Stack(offset) => {
3562                 let offset = self.adjusted_offset(offset);
3563                 let temp = self.take_reg(I32).unwrap();
3564 
3565                 if is_x86_feature_detected!("lzcnt") {
3566                     dynasm!(self.asm
3567                         ; lzcnt Rd(temp.rq().unwrap()), [rsp + offset]
3568                     );
3569                     ValueLocation::Reg(temp)
3570                 } else {
3571                     let temp_2 = self.take_reg(I32).unwrap();
3572 
3573                     dynasm!(self.asm
3574                         ; bsr Rd(temp.rq().unwrap()), [rsp + offset]
3575                         ; mov Rd(temp_2.rq().unwrap()), DWORD 0x3fu64 as _
3576                         ; cmove Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap())
3577                         ; mov Rd(temp_2.rq().unwrap()), DWORD 0x1fu64 as _
3578                         ; xor Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap())
3579                     );
3580                     self.free_value(ValueLocation::Reg(temp_2))?;
3581                     ValueLocation::Reg(temp)
3582                 }
3583             }
3584             ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
3585                 let reg = match self.put_into_register(GPRType::Rq, &mut val) {
3586                     Err(e) => return Err(e),
3587                     Ok(o) => {
3588                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3589                     }
3590                 };
3591 
3592                 let temp = self.take_reg(I32).unwrap();
3593 
3594                 if is_x86_feature_detected!("lzcnt") {
3595                     dynasm!(self.asm
3596                         ; lzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3597                     );
3598                     ValueLocation::Reg(temp)
3599                 } else {
3600                     dynasm!(self.asm
3601                         ; bsr Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3602                         ; mov Rd(reg.rq().unwrap()), DWORD 0x3fu64 as _
3603                         ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3604                         ; mov Rd(reg.rq().unwrap()), DWORD 0x1fu64 as _
3605                         ; xor Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3606                     );
3607                     ValueLocation::Reg(temp)
3608                 }
3609             }
3610         };
3611 
3612         self.free_value(val)?;
3613         self.push(out_val)?;
3614         Ok(())
3615     }
3616 
i64_clz(&mut self) -> Result<(), Error>3617     pub fn i64_clz(&mut self) -> Result<(), Error> {
3618         let mut val = self.pop()?;
3619 
3620         let out_val = match val {
3621             ValueLocation::Immediate(imm) => {
3622                 ValueLocation::Immediate((imm.as_i64().unwrap().leading_zeros() as u64).into())
3623             }
3624             ValueLocation::Stack(offset) => {
3625                 let offset = self.adjusted_offset(offset);
3626                 let temp = self.take_reg(I64).unwrap();
3627 
3628                 if is_x86_feature_detected!("lzcnt") {
3629                     dynasm!(self.asm
3630                         ; lzcnt Rq(temp.rq().unwrap()), [rsp + offset]
3631                     );
3632                     ValueLocation::Reg(temp)
3633                 } else {
3634                     let temp_2 = self.take_reg(I64).unwrap();
3635 
3636                     dynasm!(self.asm
3637                         ; bsr Rq(temp.rq().unwrap()), [rsp + offset]
3638                         ; mov Rq(temp_2.rq().unwrap()), QWORD 0x7fu64 as _
3639                         ; cmove Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap())
3640                         ; mov Rq(temp_2.rq().unwrap()), QWORD 0x3fu64 as _
3641                         ; xor Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap())
3642                     );
3643                     self.free_value(ValueLocation::Reg(temp_2))?;
3644                     ValueLocation::Reg(temp)
3645                 }
3646             }
3647             ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
3648                 let reg = match self.put_into_register(GPRType::Rq, &mut val) {
3649                     Err(e) => return Err(e),
3650                     Ok(o) => {
3651                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3652                     }
3653                 };
3654                 let temp = self.take_reg(I64).unwrap();
3655 
3656                 if is_x86_feature_detected!("lzcnt") {
3657                     dynasm!(self.asm
3658                         ; lzcnt Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3659                     );
3660                     ValueLocation::Reg(temp)
3661                 } else {
3662                     dynasm!(self.asm
3663                         ; bsr Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3664                         ; mov Rq(reg.rq().unwrap()), QWORD 0x7fu64 as _
3665                         ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3666                         ; mov Rq(reg.rq().unwrap()), QWORD 0x3fu64 as _
3667                         ; xor Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3668                     );
3669                     ValueLocation::Reg(temp)
3670                 }
3671             }
3672         };
3673 
3674         self.free_value(val)?;
3675         self.push(out_val)?;
3676         Ok(())
3677     }
3678 
i32_ctz(&mut self) -> Result<(), Error>3679     pub fn i32_ctz(&mut self) -> Result<(), Error> {
3680         let mut val = self.pop()?;
3681 
3682         let out_val = match val {
3683             ValueLocation::Immediate(imm) => {
3684                 ValueLocation::Immediate(imm.as_i32().unwrap().trailing_zeros().into())
3685             }
3686             ValueLocation::Stack(offset) => {
3687                 let offset = self.adjusted_offset(offset);
3688                 let temp = self.take_reg(I32).unwrap();
3689 
3690                 if is_x86_feature_detected!("lzcnt") {
3691                     dynasm!(self.asm
3692                         ; tzcnt Rd(temp.rq().unwrap()), [rsp + offset]
3693                     );
3694                     ValueLocation::Reg(temp)
3695                 } else {
3696                     let temp_zero_val = self.take_reg(I32).unwrap();
3697 
3698                     dynasm!(self.asm
3699                         ; bsf Rd(temp.rq().unwrap()), [rsp + offset]
3700                         ; mov Rd(temp_zero_val.rq().unwrap()), DWORD 0x20u32 as _
3701                         ; cmove Rd(temp.rq().unwrap()), Rd(temp_zero_val.rq().unwrap())
3702                     );
3703                     self.free_value(ValueLocation::Reg(temp_zero_val))?;
3704                     ValueLocation::Reg(temp)
3705                 }
3706             }
3707             ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
3708                 let reg = match self.put_into_register(GPRType::Rq, &mut val) {
3709                     Err(e) => return Err(e),
3710                     Ok(o) => {
3711                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3712                     }
3713                 };
3714                 let temp = self.take_reg(I32).unwrap();
3715 
3716                 if is_x86_feature_detected!("lzcnt") {
3717                     dynasm!(self.asm
3718                         ; tzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3719                     );
3720                     ValueLocation::Reg(temp)
3721                 } else {
3722                     dynasm!(self.asm
3723                         ; bsf Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3724                         ; mov Rd(reg.rq().unwrap()), DWORD 0x20u32 as _
3725                         ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3726                     );
3727                     ValueLocation::Reg(temp)
3728                 }
3729             }
3730         };
3731 
3732         self.free_value(val)?;
3733         self.push(out_val)?;
3734         Ok(())
3735     }
3736 
i64_ctz(&mut self) -> Result<(), Error>3737     pub fn i64_ctz(&mut self) -> Result<(), Error> {
3738         let mut val = self.pop()?;
3739 
3740         let out_val = match val {
3741             ValueLocation::Immediate(imm) => {
3742                 ValueLocation::Immediate((imm.as_i64().unwrap().trailing_zeros() as u64).into())
3743             }
3744             ValueLocation::Stack(offset) => {
3745                 let offset = self.adjusted_offset(offset);
3746                 let temp = self.take_reg(I64).unwrap();
3747 
3748                 if is_x86_feature_detected!("lzcnt") {
3749                     dynasm!(self.asm
3750                         ; tzcnt Rq(temp.rq().unwrap()), [rsp + offset]
3751                     );
3752                     ValueLocation::Reg(temp)
3753                 } else {
3754                     let temp_zero_val = self.take_reg(I64).unwrap();
3755 
3756                     dynasm!(self.asm
3757                         ; bsf Rq(temp.rq().unwrap()), [rsp + offset]
3758                         ; mov Rq(temp_zero_val.rq().unwrap()), QWORD 0x40u64 as _
3759                         ; cmove Rq(temp.rq().unwrap()), Rq(temp_zero_val.rq().unwrap())
3760                     );
3761                     self.free_value(ValueLocation::Reg(temp_zero_val))?;
3762                     ValueLocation::Reg(temp)
3763                 }
3764             }
3765             ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
3766                 let reg = match self.put_into_register(GPRType::Rq, &mut val) {
3767                     Err(e) => return Err(e),
3768                     Ok(o) => {
3769                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3770                     }
3771                 };
3772                 let temp = self.take_reg(I64).unwrap();
3773 
3774                 dynasm!(self.asm
3775                     ; bsf Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3776                     ; mov Rq(reg.rq().unwrap()), QWORD 0x40u64 as _
3777                     ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3778                 );
3779                 ValueLocation::Reg(temp)
3780             }
3781         };
3782 
3783         self.free_value(val)?;
3784         self.push(out_val)?;
3785         Ok(())
3786     }
3787 
i32_extend_u(&mut self) -> Result<(), Error>3788     pub fn i32_extend_u(&mut self) -> Result<(), Error> {
3789         let val = self.pop()?;
3790 
3791         let out = if let ValueLocation::Immediate(imm) = val {
3792             ValueLocation::Immediate((imm.as_i32().unwrap() as u32 as u64).into())
3793         } else {
3794             let new_reg = self.take_reg(I64).unwrap();
3795 
3796             // TODO: Track set-ness of bits - we can make this a no-op in most cases
3797             //       but we have to make this unconditional just in case this value
3798             //       came from a truncate.
3799             match val {
3800                 ValueLocation::Reg(GPR::Rx(rxreg)) => {
3801                     dynasm!(self.asm
3802                         ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg)
3803                     );
3804                 }
3805                 ValueLocation::Reg(GPR::Rq(rqreg)) => {
3806                     dynasm!(self.asm
3807                         ; mov Rd(new_reg.rq().unwrap()), Rd(rqreg)
3808                     );
3809                 }
3810                 ValueLocation::Stack(offset) => {
3811                     let offset = self.adjusted_offset(offset);
3812 
3813                     dynasm!(self.asm
3814                         ; mov Rd(new_reg.rq().unwrap()), [rsp + offset]
3815                     );
3816                 }
3817                 ValueLocation::Cond(_) => self.copy_value(val, CCLoc::Reg(new_reg))?,
3818                 ValueLocation::Immediate(_) => {
3819                     return Err(Error::Microwasm(
3820                         "i32_extend_u unreachable code".to_string(),
3821                     ))
3822                 }
3823             }
3824 
3825             ValueLocation::Reg(new_reg)
3826         };
3827 
3828         self.free_value(val)?;
3829 
3830         self.push(out)?;
3831         Ok(())
3832     }
3833 
i32_extend_s(&mut self) -> Result<(), Error>3834     pub fn i32_extend_s(&mut self) -> Result<(), Error> {
3835         let val = self.pop()?;
3836 
3837         self.free_value(val)?;
3838         let new_reg = self.take_reg(I64).unwrap();
3839 
3840         let out = match val {
3841             ValueLocation::Reg(GPR::Rx(rxreg)) => {
3842                 dynasm!(self.asm
3843                     ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg)
3844                     ; movsxd Rq(new_reg.rq().unwrap()), Rd(new_reg.rq().unwrap())
3845                 );
3846 
3847                 ValueLocation::Reg(new_reg)
3848             }
3849             ValueLocation::Reg(GPR::Rq(rqreg)) => {
3850                 dynasm!(self.asm
3851                     ; movsxd Rq(new_reg.rq().unwrap()), Rd(rqreg)
3852                 );
3853 
3854                 ValueLocation::Reg(new_reg)
3855             }
3856             ValueLocation::Stack(offset) => {
3857                 let offset = self.adjusted_offset(offset);
3858 
3859                 dynasm!(self.asm
3860                     ; movsxd Rq(new_reg.rq().unwrap()), DWORD [rsp + offset]
3861                 );
3862 
3863                 ValueLocation::Reg(new_reg)
3864             }
3865             // `CondCode` can only be 0 or 1, so sign-extension is always the same as
3866             // zero-extension
3867             val @ ValueLocation::Cond(_) => {
3868                 self.copy_value(val, CCLoc::Reg(new_reg))?;
3869 
3870                 ValueLocation::Reg(new_reg)
3871             }
3872             ValueLocation::Immediate(imm) => {
3873                 self.block_state.regs.release(new_reg)?;
3874 
3875                 ValueLocation::Immediate((imm.as_i32().unwrap() as i64).into())
3876             }
3877         };
3878 
3879         self.push(out)?;
3880         Ok(())
3881     }
3882 
3883     unop!(i32_popcnt, popcnt, Rd, u32, u32::count_ones);
3884     conversion!(
3885         f64_from_f32,
3886         cvtss2sd,
3887         Rx,
3888         rx,
3889         Rx,
3890         rx,
3891         f32,
3892         f64,
3893         as_f32,
3894         |a: Ieee32| Ieee64::from_bits((f32::from_bits(a.to_bits()) as f64).to_bits())
3895     );
3896     conversion!(
3897         f32_from_f64,
3898         cvtsd2ss,
3899         Rx,
3900         rx,
3901         Rx,
3902         rx,
3903         f64,
3904         f32,
3905         as_f64,
3906         |a: Ieee64| Ieee32::from_bits((f64::from_bits(a.to_bits()) as f32).to_bits())
3907     );
3908 
i32_truncate_f32_s(&mut self) -> Result<(), Error>3909     pub fn i32_truncate_f32_s(&mut self) -> Result<(), Error> {
3910         let mut val = self.pop()?;
3911 
3912         let out_val = match val {
3913             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
3914                 (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(),
3915             ),
3916             _ => {
3917                 let reg = match self.put_into_register(F32, &mut val) {
3918                     Err(e) => return Err(e),
3919                     Ok(o) => {
3920                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3921                     }
3922                 };
3923                 let temp = self.take_reg(I32).unwrap();
3924 
3925                 let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
3926                 let float_cmp_mask =
3927                     self.aligned_label(16, LabelValue::I32(0xCF00_0000_u32 as i32));
3928                 let zero = self.aligned_label(16, LabelValue::I32(0));
3929 
3930                 dynasm!(self.asm
3931                     ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
3932                     ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0]
3933                     ; jne >ret
3934                     ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
3935                     ; jp >trap
3936                     ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
3937                     ; jnae >trap
3938                     ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0]
3939                     ; jb >ret
3940                 ; trap:
3941                     ;; self.trap(TrapCode::BadConversionToInteger)
3942                 ; ret:
3943                 );
3944 
3945                 ValueLocation::Reg(temp)
3946             }
3947         };
3948 
3949         self.free_value(val)?;
3950 
3951         self.push(out_val)?;
3952         Ok(())
3953     }
3954 
i32_truncate_f32_u(&mut self) -> Result<(), Error>3955     pub fn i32_truncate_f32_u(&mut self) -> Result<(), Error> {
3956         let mut val = self.pop()?;
3957 
3958         let out_val = match val {
3959             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
3960                 (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(),
3961             ),
3962             _ => {
3963                 let reg = match self.put_into_temp_register(F32, &mut val) {
3964                     Err(e) => return Err(e),
3965                     Ok(o) => {
3966                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3967                     }
3968                 };
3969 
3970                 let temp = self.take_reg(I32).unwrap();
3971 
3972                 let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
3973                 let float_cmp_mask =
3974                     self.aligned_label(16, LabelValue::I32(0x4F00_0000_u32 as i32));
3975 
3976                 dynasm!(self.asm
3977                     ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
3978                     ; jae >else_
3979                     ; jp >trap
3980                     ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
3981                     ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
3982                     ; js >trap
3983                     ; jmp >ret
3984                 ; else_:
3985                     ; subss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
3986                     ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
3987                     ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
3988                     ; js >trap
3989                     ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
3990                     ; jmp >ret
3991                 ; trap:
3992                     ;; self.trap(TrapCode::BadConversionToInteger)
3993                 ; ret:
3994                 );
3995 
3996                 ValueLocation::Reg(temp)
3997             }
3998         };
3999 
4000         self.free_value(val)?;
4001 
4002         self.push(out_val)?;
4003         Ok(())
4004     }
4005 
i32_truncate_f64_s(&mut self) -> Result<(), Error>4006     pub fn i32_truncate_f64_s(&mut self) -> Result<(), Error> {
4007         let mut val = self.pop()?;
4008 
4009         let out_val = match val {
4010             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4011                 (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i32).into(),
4012             ),
4013             _ => {
4014                 let reg = match self.put_into_register(F32, &mut val) {
4015                     Err(e) => return Err(e),
4016                     Ok(o) => {
4017                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4018                     }
4019                 };
4020 
4021                 let temp = self.take_reg(I32).unwrap();
4022 
4023                 let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
4024                 let float_cmp_mask =
4025                     self.aligned_label(16, LabelValue::I64(0xC1E0_0000_0020_0000_u64 as i64));
4026                 let zero = self.aligned_label(16, LabelValue::I64(0));
4027 
4028                 dynasm!(self.asm
4029                     ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4030                     ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0]
4031                     ; jne >ret
4032                     ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
4033                     ; jp >trap
4034                     ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
4035                     ; jna >trap
4036                     ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0]
4037                     ; jb >ret
4038                 ; trap:
4039                     ;; self.trap(TrapCode::BadConversionToInteger)
4040                 ; ret:
4041                 );
4042 
4043                 ValueLocation::Reg(temp)
4044             }
4045         };
4046 
4047         self.free_value(val)?;
4048 
4049         self.push(out_val)?;
4050         Ok(())
4051     }
4052 
i32_truncate_f64_u(&mut self) -> Result<(), Error>4053     pub fn i32_truncate_f64_u(&mut self) -> Result<(), Error> {
4054         let mut val = self.pop()?;
4055 
4056         let out_val = match val {
4057             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4058                 (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u32).into(),
4059             ),
4060             _ => {
4061                 let reg = match self.put_into_temp_register(F32, &mut val) {
4062                     Err(e) => return Err(e),
4063                     Ok(o) => {
4064                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4065                     }
4066                 };
4067 
4068                 let temp = self.take_reg(I32).unwrap();
4069 
4070                 let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
4071                 let float_cmp_mask =
4072                     self.aligned_label(16, LabelValue::I64(0x41E0_0000_0000_0000_u64 as i64));
4073 
4074                 dynasm!(self.asm
4075                     ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
4076                     ; jae >else_
4077                     ; jp >trap
4078                     ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4079                     ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
4080                     ; js >trap
4081                     ; jmp >ret
4082                 ; else_:
4083                     ; subsd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
4084                     ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4085                     ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
4086                     ; js >trap
4087                     ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
4088                     ; jmp >ret
4089                 ; trap:
4090                     ;; self.trap(TrapCode::BadConversionToInteger)
4091                 ; ret:
4092                 );
4093 
4094                 ValueLocation::Reg(temp)
4095             }
4096         };
4097 
4098         self.free_value(val)?;
4099 
4100         self.push(out_val)?;
4101         Ok(())
4102     }
4103 
4104     conversion!(
4105         f32_convert_from_i32_s,
4106         cvtsi2ss,
4107         Rd,
4108         rq,
4109         Rx,
4110         rx,
4111         i32,
4112         f32,
4113         as_i32,
4114         |a| Ieee32::from_bits((a as f32).to_bits())
4115     );
4116     conversion!(
4117         f64_convert_from_i32_s,
4118         cvtsi2sd,
4119         Rd,
4120         rq,
4121         Rx,
4122         rx,
4123         i32,
4124         f64,
4125         as_i32,
4126         |a| Ieee64::from_bits((a as f64).to_bits())
4127     );
4128     conversion!(
4129         f32_convert_from_i64_s,
4130         cvtsi2ss,
4131         Rq,
4132         rq,
4133         Rx,
4134         rx,
4135         i64,
4136         f32,
4137         as_i64,
4138         |a| Ieee32::from_bits((a as f32).to_bits())
4139     );
4140     conversion!(
4141         f64_convert_from_i64_s,
4142         cvtsi2sd,
4143         Rq,
4144         rq,
4145         Rx,
4146         rx,
4147         i64,
4148         f64,
4149         as_i64,
4150         |a| Ieee64::from_bits((a as f64).to_bits())
4151     );
4152 
i64_truncate_f32_s(&mut self) -> Result<(), Error>4153     pub fn i64_truncate_f32_s(&mut self) -> Result<(), Error> {
4154         let mut val = self.pop()?;
4155 
4156         let out_val = match val {
4157             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4158                 (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i64).into(),
4159             ),
4160             _ => {
4161                 let reg = match self.put_into_temp_register(F32, &mut val) {
4162                     Err(e) => return Err(e),
4163                     Ok(o) => {
4164                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4165                     }
4166                 };
4167 
4168                 let temp = self.take_reg(I32).unwrap();
4169 
4170                 let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
4171                 let float_cmp_mask =
4172                     self.aligned_label(16, LabelValue::I32(0xDF00_0000_u32 as i32));
4173                 let zero = self.aligned_label(16, LabelValue::I64(0));
4174 
4175                 dynasm!(self.asm
4176                     ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4177                     ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0]
4178                     ; jne >ret
4179                     ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
4180                     ; jp >trap
4181                     ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
4182                     ; jnae >trap
4183                     ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0]
4184                     ; jb >ret
4185                 ; trap:
4186                     ;; self.trap(TrapCode::BadConversionToInteger)
4187                 ; ret:
4188                 );
4189 
4190                 ValueLocation::Reg(temp)
4191             }
4192         };
4193 
4194         self.free_value(val)?;
4195 
4196         self.push(out_val)?;
4197         Ok(())
4198     }
4199 
i64_truncate_f64_s(&mut self) -> Result<(), Error>4200     pub fn i64_truncate_f64_s(&mut self) -> Result<(), Error> {
4201         let mut val = self.pop()?;
4202 
4203         let out_val = match val {
4204             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4205                 (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i64).into(),
4206             ),
4207             _ => {
4208                 let reg = match self.put_into_register(F32, &mut val) {
4209                     Err(e) => return Err(e),
4210                     Ok(o) => {
4211                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4212                     }
4213                 };
4214 
4215                 let temp = self.take_reg(I32).unwrap();
4216 
4217                 let sign_mask = self.aligned_label(8, LabelValue::I64(SIGN_MASK_F64 as i64));
4218                 let float_cmp_mask =
4219                     self.aligned_label(16, LabelValue::I64(0xC3E0_0000_0000_0000_u64 as i64));
4220                 let zero = self.aligned_label(16, LabelValue::I64(0));
4221 
4222                 dynasm!(self.asm
4223                     ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4224                     ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0]
4225                     ; jne >ret
4226                     ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
4227                     ; jp >trap
4228                     ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
4229                     ; jnae >trap
4230                     ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0]
4231                     ; jb >ret
4232                 ; trap:
4233                     ;; self.trap(TrapCode::BadConversionToInteger)
4234                 ; ret:
4235                 );
4236 
4237                 ValueLocation::Reg(temp)
4238             }
4239         };
4240 
4241         self.free_value(val)?;
4242 
4243         self.push(out_val)?;
4244         Ok(())
4245     }
4246 
i64_truncate_f32_u(&mut self) -> Result<(), Error>4247     pub fn i64_truncate_f32_u(&mut self) -> Result<(), Error> {
4248         let mut val = self.pop()?;
4249 
4250         let out_val = match val {
4251             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4252                 (f32::from_bits(imm.as_f32().unwrap().to_bits()) as u64).into(),
4253             ),
4254             _ => {
4255                 let reg = match self.put_into_register(F32, &mut val) {
4256                     Err(e) => return Err(e),
4257                     Ok(o) => {
4258                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4259                     }
4260                 };
4261 
4262                 let temp = self.take_reg(I64).unwrap();
4263                 let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
4264                 let u64_trunc_f32_const = self.aligned_label(16, LabelValue::I32(0x5F00_0000_i32));
4265 
4266                 dynasm!(self.asm
4267                     ; comiss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0]
4268                     ; jae >large
4269                     ; jp >trap
4270                     ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4271                     ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap())
4272                     ; js >trap
4273                     ; jmp >cont
4274                 ; large:
4275                     ; subss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0]
4276                     ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4277                     ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap())
4278                     ; js >trap
4279                     ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
4280                     ; jmp >cont
4281                 ; trap:
4282                     ;; self.trap(TrapCode::BadConversionToInteger)
4283                 ; cont:
4284                 );
4285 
4286                 ValueLocation::Reg(temp)
4287             }
4288         };
4289 
4290         self.free_value(val)?;
4291 
4292         self.push(out_val)?;
4293         Ok(())
4294     }
4295 
i64_truncate_f64_u(&mut self) -> Result<(), Error>4296     pub fn i64_truncate_f64_u(&mut self) -> Result<(), Error> {
4297         let mut val = self.pop()?;
4298 
4299         let out_val = match val {
4300             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4301                 (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u64).into(),
4302             ),
4303             _ => {
4304                 let reg = match self.put_into_register(F64, &mut val) {
4305                     Err(e) => return Err(e),
4306                     Ok(o) => {
4307                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4308                     }
4309                 };
4310 
4311                 let temp = self.take_reg(I64).unwrap();
4312 
4313                 let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
4314                 let u64_trunc_f64_const =
4315                     self.aligned_label(16, LabelValue::I64(0x43E0_0000_0000_0000_i64));
4316 
4317                 dynasm!(self.asm
4318                     ; comisd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0]
4319                     ; jnb >large
4320                     ; jp >trap
4321                     ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4322                     ; cmp Rq(temp.rq().unwrap()), 0
4323                     ; jl >trap
4324                     ; jmp >cont
4325                 ; large:
4326                     ; subsd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0]
4327                     ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4328                     ; cmp Rq(temp.rq().unwrap()), 0
4329                     ; jnge >trap
4330                     ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
4331                     ; jmp >cont
4332                 ; trap:
4333                     ;; self.trap(TrapCode::BadConversionToInteger)
4334                 ; cont:
4335                 );
4336 
4337                 ValueLocation::Reg(temp)
4338             }
4339         };
4340 
4341         self.free_value(val)?;
4342 
4343         self.push(out_val)?;
4344         Ok(())
4345     }
4346 
f32_convert_from_i32_u(&mut self) -> Result<(), Error>4347     pub fn f32_convert_from_i32_u(&mut self) -> Result<(), Error> {
4348         let mut val = self.pop()?;
4349 
4350         let out_val = match val {
4351             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4352                 Ieee32::from_bits((imm.as_i32().unwrap() as u32 as f32).to_bits()).into(),
4353             ),
4354             _ => {
4355                 let reg = match self.put_into_register(I32, &mut val) {
4356                     Err(e) => return Err(e),
4357                     Ok(o) => {
4358                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4359                     }
4360                 };
4361 
4362                 let temp = self.take_reg(F32).unwrap();
4363 
4364                 dynasm!(self.asm
4365                     ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap())
4366                     ; cvtsi2ss Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap())
4367                 );
4368 
4369                 ValueLocation::Reg(temp)
4370             }
4371         };
4372 
4373         self.free_value(val)?;
4374 
4375         self.push(out_val)?;
4376         Ok(())
4377     }
4378 
f64_convert_from_i32_u(&mut self) -> Result<(), Error>4379     pub fn f64_convert_from_i32_u(&mut self) -> Result<(), Error> {
4380         let mut val = self.pop()?;
4381 
4382         let out_val = match val {
4383             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4384                 Ieee64::from_bits((imm.as_i32().unwrap() as u32 as f64).to_bits()).into(),
4385             ),
4386             _ => {
4387                 let reg = match self.put_into_register(I32, &mut val) {
4388                     Err(e) => return Err(e),
4389                     Ok(o) => {
4390                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4391                     }
4392                 };
4393 
4394                 let temp = self.take_reg(F64).unwrap();
4395 
4396                 dynasm!(self.asm
4397                     ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap())
4398                     ; cvtsi2sd Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap())
4399                 );
4400 
4401                 ValueLocation::Reg(temp)
4402             }
4403         };
4404 
4405         self.free_value(val)?;
4406 
4407         self.push(out_val)?;
4408         Ok(())
4409     }
4410 
f32_convert_from_i64_u(&mut self) -> Result<(), Error>4411     pub fn f32_convert_from_i64_u(&mut self) -> Result<(), Error> {
4412         let mut val = self.pop()?;
4413 
4414         let out_val = match val {
4415             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4416                 Ieee32::from_bits((imm.as_i64().unwrap() as u64 as f32).to_bits()).into(),
4417             ),
4418             _ => {
4419                 let reg = match self.put_into_register(I64, &mut val) {
4420                     Err(e) => return Err(e),
4421                     Ok(o) => {
4422                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4423                     }
4424                 };
4425 
4426                 let out = self.take_reg(F32).unwrap();
4427                 let temp = self.take_reg(I64).unwrap();
4428 
4429                 dynasm!(self.asm
4430                     ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap())
4431                     ; js >negative
4432                     ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
4433                     ; jmp >ret
4434                 ; negative:
4435                     ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
4436                     ; shr Rq(temp.rq().unwrap()), 1
4437                     ; and Rq(reg.rq().unwrap()), 1
4438                     ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap())
4439                     ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
4440                     ; addss Rx(out.rx().unwrap()), Rx(out.rx().unwrap())
4441                 ; ret:
4442                 );
4443 
4444                 self.free_value(ValueLocation::Reg(temp))?;
4445 
4446                 ValueLocation::Reg(out)
4447             }
4448         };
4449 
4450         self.free_value(val)?;
4451 
4452         self.push(out_val)?;
4453         Ok(())
4454     }
4455 
f64_convert_from_i64_u(&mut self) -> Result<(), Error>4456     pub fn f64_convert_from_i64_u(&mut self) -> Result<(), Error> {
4457         let mut val = self.pop()?;
4458 
4459         let out_val = match val {
4460             ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4461                 Ieee64::from_bits((imm.as_i64().unwrap() as u64 as f64).to_bits()).into(),
4462             ),
4463             _ => {
4464                 let reg = match self.put_into_register(I64, &mut val) {
4465                     Err(e) => return Err(e),
4466                     Ok(o) => {
4467                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4468                     }
4469                 };
4470 
4471                 let out = self.take_reg(F32).unwrap();
4472                 let temp = self.take_reg(I64).unwrap();
4473 
4474                 dynasm!(self.asm
4475                     ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap())
4476                     ; js >negative
4477                     ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
4478                     ; jmp >ret
4479                 ; negative:
4480                     ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
4481                     ; shr Rq(temp.rq().unwrap()), 1
4482                     ; and Rq(reg.rq().unwrap()), 1
4483                     ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap())
4484                     ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
4485                     ; addsd Rx(out.rx().unwrap()), Rx(out.rx().unwrap())
4486                 ; ret:
4487                 );
4488 
4489                 self.free_value(ValueLocation::Reg(temp))?;
4490 
4491                 ValueLocation::Reg(out)
4492             }
4493         };
4494 
4495         self.free_value(val)?;
4496 
4497         self.push(out_val)?;
4498         Ok(())
4499     }
4500 
i32_wrap_from_i64(&mut self) -> Result<(), Error>4501     pub fn i32_wrap_from_i64(&mut self) -> Result<(), Error> {
4502         let val = self.pop()?;
4503 
4504         let out = match val {
4505             ValueLocation::Immediate(imm) => {
4506                 ValueLocation::Immediate((imm.as_i64().unwrap() as u64 as u32).into())
4507             }
4508             val => val,
4509         };
4510 
4511         self.push(out)?;
4512         Ok(())
4513     }
4514 
i32_reinterpret_from_f32(&mut self) -> Result<(), Error>4515     pub fn i32_reinterpret_from_f32(&mut self) -> Result<(), Error> {
4516         let val = self.pop()?;
4517 
4518         let out = match val {
4519             ValueLocation::Immediate(imm) => {
4520                 ValueLocation::Immediate(imm.as_f32().unwrap().to_bits().into())
4521             }
4522             val => val,
4523         };
4524 
4525         self.push(out)?;
4526         Ok(())
4527     }
4528 
i64_reinterpret_from_f64(&mut self) -> Result<(), Error>4529     pub fn i64_reinterpret_from_f64(&mut self) -> Result<(), Error> {
4530         let val = self.pop()?;
4531 
4532         let out = match val {
4533             ValueLocation::Immediate(imm) => {
4534                 ValueLocation::Immediate(imm.as_f64().unwrap().to_bits().into())
4535             }
4536             val => val,
4537         };
4538 
4539         self.push(out)?;
4540         Ok(())
4541     }
4542 
f32_reinterpret_from_i32(&mut self) -> Result<(), Error>4543     pub fn f32_reinterpret_from_i32(&mut self) -> Result<(), Error> {
4544         let val = self.pop()?;
4545 
4546         let out = match val {
4547             ValueLocation::Immediate(imm) => {
4548                 ValueLocation::Immediate(Ieee32::from_bits(imm.as_i32().unwrap() as _).into())
4549             }
4550             val => val,
4551         };
4552 
4553         self.push(out)?;
4554         Ok(())
4555     }
4556 
f64_reinterpret_from_i64(&mut self) -> Result<(), Error>4557     pub fn f64_reinterpret_from_i64(&mut self) -> Result<(), Error> {
4558         let val = self.pop()?;
4559 
4560         let out = match val {
4561             ValueLocation::Immediate(imm) => {
4562                 ValueLocation::Immediate(Ieee64::from_bits(imm.as_i64().unwrap() as _).into())
4563             }
4564             val => val,
4565         };
4566 
4567         self.push(out)?;
4568         Ok(())
4569     }
4570 
4571     unop!(i64_popcnt, popcnt, Rq, u64, |a: u64| a.count_ones() as u64);
4572 
4573     // TODO: Use `lea` when the LHS operand isn't a temporary but both of the operands
4574     //       are in registers.
4575     commutative_binop_i32!(i32_add, add, i32::wrapping_add);
4576     commutative_binop_i32!(i32_and, and, |a, b| a & b);
4577     commutative_binop_i32!(i32_or, or, |a, b| a | b);
4578     commutative_binop_i32!(i32_xor, xor, |a, b| a ^ b);
4579     binop_i32!(i32_sub, sub, i32::wrapping_sub);
4580 
4581     commutative_binop_i64!(i64_add, add, i64::wrapping_add);
4582     commutative_binop_i64!(i64_and, and, |a, b| a & b);
4583     commutative_binop_i64!(i64_or, or, |a, b| a | b);
4584     commutative_binop_i64!(i64_xor, xor, |a, b| a ^ b);
4585     binop_i64!(i64_sub, sub, i64::wrapping_sub);
4586 
4587     commutative_binop_f32!(f32_add, addss, |a, b| a + b);
4588     commutative_binop_f32!(f32_mul, mulss, |a, b| a * b);
4589     minmax_float!(
4590         f32_min,
4591         minss,
4592         ucomiss,
4593         addss,
4594         orps,
4595         as_f32,
4596         |a: Ieee32, b: Ieee32| Ieee32::from_bits(
4597             f32::from_bits(a.to_bits())
4598                 .min(f32::from_bits(b.to_bits()))
4599                 .to_bits()
4600         )
4601     );
4602     minmax_float!(
4603         f32_max,
4604         maxss,
4605         ucomiss,
4606         addss,
4607         andps,
4608         as_f32,
4609         |a: Ieee32, b: Ieee32| Ieee32::from_bits(
4610             f32::from_bits(a.to_bits())
4611                 .max(f32::from_bits(b.to_bits()))
4612                 .to_bits()
4613         )
4614     );
4615     binop_f32!(f32_sub, subss, |a, b| a - b);
4616     binop_f32!(f32_div, divss, |a, b| a / b);
4617 
f32_ceil(&mut self) -> Result<(), Error>4618     pub fn f32_ceil(&mut self) -> Result<(), Error> {
4619         self.relocated_function_call(
4620             &ir::ExternalName::LibCall(ir::LibCall::CeilF32),
4621             iter::once(F32),
4622             iter::once(F32),
4623             FunctionDefLocation::PossiblyExternal,
4624         )?;
4625         Ok(())
4626     }
4627 
f32_floor(&mut self) -> Result<(), Error>4628     pub fn f32_floor(&mut self) -> Result<(), Error> {
4629         self.relocated_function_call(
4630             &ir::ExternalName::LibCall(ir::LibCall::FloorF32),
4631             iter::once(F32),
4632             iter::once(F32),
4633             FunctionDefLocation::PossiblyExternal,
4634         )?;
4635         Ok(())
4636     }
4637 
f32_nearest(&mut self) -> Result<(), Error>4638     pub fn f32_nearest(&mut self) -> Result<(), Error> {
4639         self.relocated_function_call(
4640             &ir::ExternalName::LibCall(ir::LibCall::NearestF32),
4641             iter::once(F32),
4642             iter::once(F32),
4643             FunctionDefLocation::PossiblyExternal,
4644         )?;
4645         Ok(())
4646     }
4647 
f32_trunc(&mut self) -> Result<(), Error>4648     pub fn f32_trunc(&mut self) -> Result<(), Error> {
4649         self.relocated_function_call(
4650             &ir::ExternalName::LibCall(ir::LibCall::TruncF32),
4651             iter::once(F32),
4652             iter::once(F32),
4653             FunctionDefLocation::PossiblyExternal,
4654         )?;
4655         Ok(())
4656     }
4657 
4658     commutative_binop_f64!(f64_add, addsd, |a, b| a + b);
4659     commutative_binop_f64!(f64_mul, mulsd, |a, b| a * b);
4660     minmax_float!(
4661         f64_min,
4662         minsd,
4663         ucomisd,
4664         addsd,
4665         orpd,
4666         as_f64,
4667         |a: Ieee64, b: Ieee64| Ieee64::from_bits(
4668             f64::from_bits(a.to_bits())
4669                 .min(f64::from_bits(b.to_bits()))
4670                 .to_bits()
4671         )
4672     );
4673     minmax_float!(
4674         f64_max,
4675         maxsd,
4676         ucomisd,
4677         addsd,
4678         andpd,
4679         as_f64,
4680         |a: Ieee64, b: Ieee64| Ieee64::from_bits(
4681             f64::from_bits(a.to_bits())
4682                 .max(f64::from_bits(b.to_bits()))
4683                 .to_bits()
4684         )
4685     );
4686     binop_f64!(f64_sub, subsd, |a, b| a - b);
4687     binop_f64!(f64_div, divsd, |a, b| a / b);
4688 
f64_ceil(&mut self) -> Result<(), Error>4689     pub fn f64_ceil(&mut self) -> Result<(), Error> {
4690         self.relocated_function_call(
4691             &ir::ExternalName::LibCall(ir::LibCall::CeilF64),
4692             iter::once(F64),
4693             iter::once(F64),
4694             FunctionDefLocation::PossiblyExternal,
4695         )?;
4696         Ok(())
4697     }
4698 
f64_floor(&mut self) -> Result<(), Error>4699     pub fn f64_floor(&mut self) -> Result<(), Error> {
4700         self.relocated_function_call(
4701             &ir::ExternalName::LibCall(ir::LibCall::FloorF64),
4702             iter::once(F64),
4703             iter::once(F64),
4704             FunctionDefLocation::PossiblyExternal,
4705         )?;
4706         Ok(())
4707     }
4708 
f64_nearest(&mut self) -> Result<(), Error>4709     pub fn f64_nearest(&mut self) -> Result<(), Error> {
4710         self.relocated_function_call(
4711             &ir::ExternalName::LibCall(ir::LibCall::NearestF64),
4712             iter::once(F64),
4713             iter::once(F64),
4714             FunctionDefLocation::PossiblyExternal,
4715         )?;
4716         Ok(())
4717     }
4718 
f64_trunc(&mut self) -> Result<(), Error>4719     pub fn f64_trunc(&mut self) -> Result<(), Error> {
4720         self.relocated_function_call(
4721             &ir::ExternalName::LibCall(ir::LibCall::TruncF64),
4722             iter::once(F64),
4723             iter::once(F64),
4724             FunctionDefLocation::PossiblyExternal,
4725         )?;
4726         Ok(())
4727     }
4728 
4729     shift!(
4730         i32_shl,
4731         Rd,
4732         shl,
4733         |a, b| (a as i32).wrapping_shl(b as _),
4734         I32
4735     );
4736     shift!(
4737         i32_shr_s,
4738         Rd,
4739         sar,
4740         |a, b| (a as i32).wrapping_shr(b as _),
4741         I32
4742     );
4743     shift!(
4744         i32_shr_u,
4745         Rd,
4746         shr,
4747         |a, b| (a as u32).wrapping_shr(b as _),
4748         I32
4749     );
4750     shift!(
4751         i32_rotl,
4752         Rd,
4753         rol,
4754         |a, b| (a as u32).rotate_left(b as _),
4755         I32
4756     );
4757     shift!(
4758         i32_rotr,
4759         Rd,
4760         ror,
4761         |a, b| (a as u32).rotate_right(b as _),
4762         I32
4763     );
4764 
4765     shift!(
4766         i64_shl,
4767         Rq,
4768         shl,
4769         |a, b| (a as i64).wrapping_shl(b as _),
4770         I64
4771     );
4772     shift!(
4773         i64_shr_s,
4774         Rq,
4775         sar,
4776         |a, b| (a as i64).wrapping_shr(b as _),
4777         I64
4778     );
4779     shift!(
4780         i64_shr_u,
4781         Rq,
4782         shr,
4783         |a, b| (a as u64).wrapping_shr(b as _),
4784         I64
4785     );
4786     shift!(
4787         i64_rotl,
4788         Rq,
4789         rol,
4790         |a, b| (a as u64).rotate_left(b as _),
4791         I64
4792     );
4793     shift!(
4794         i64_rotr,
4795         Rq,
4796         ror,
4797         |a, b| (a as u64).rotate_right(b as _),
4798         I64
4799     );
4800 
4801     // TODO: Do this without emitting `mov`
cleanup_gprs(&mut self, gprs: impl Iterator<Item = GPR>)4802     fn cleanup_gprs(&mut self, gprs: impl Iterator<Item = GPR>) {
4803         for gpr in gprs {
4804             dynasm!(self.asm
4805                 ; pop Rq(gpr.rq().unwrap())
4806             );
4807             self.block_state.depth.free(1);
4808             // DON'T MARK IT USED HERE! See comment in `full_div`
4809         }
4810     }
4811 
4812     int_div!(
4813         i32_full_div_s,
4814         i32_full_div_u,
4815         i32_div_u,
4816         i32_div_s,
4817         i32_rem_u,
4818         i32_rem_s,
4819         imm_i32,
4820         i32,
4821         u32,
4822         Rd,
4823         DWORD
4824     );
4825     int_div!(
4826         i64_full_div_s,
4827         i64_full_div_u,
4828         i64_div_u,
4829         i64_div_s,
4830         i64_rem_u,
4831         i64_rem_s,
4832         imm_i64,
4833         i64,
4834         u64,
4835         Rq,
4836         QWORD
4837     );
4838 
4839     // TODO: With a proper SSE-like "Value" system we could do this way better (we wouldn't have
4840     //       to move `RAX`/`RDX` back afterwards).
full_div( &mut self, mut divisor: ValueLocation, dividend: ValueLocation, do_div: impl FnOnce(&mut Self, &mut ValueLocation) -> Result<(), Error>, ) -> Result< ( ValueLocation, ValueLocation, impl Iterator<Item = GPR> + Clone + 'this, ), Error, >4841     fn full_div(
4842         &mut self,
4843         mut divisor: ValueLocation,
4844         dividend: ValueLocation,
4845         do_div: impl FnOnce(&mut Self, &mut ValueLocation) -> Result<(), Error>,
4846     ) -> Result<
4847         (
4848             ValueLocation,
4849             ValueLocation,
4850             impl Iterator<Item = GPR> + Clone + 'this,
4851         ),
4852         Error,
4853     > {
4854         // To stop `take_reg` from allocating either of these necessary registers
4855         self.block_state.regs.mark_used(RAX);
4856         self.block_state.regs.mark_used(RDX);
4857         if divisor == ValueLocation::Reg(RAX) || divisor == ValueLocation::Reg(RDX) {
4858             let new_reg = self.take_reg(GPRType::Rq).unwrap();
4859             self.copy_value(divisor, CCLoc::Reg(new_reg))?;
4860             self.free_value(divisor)?;
4861 
4862             divisor = ValueLocation::Reg(new_reg);
4863         }
4864         self.block_state.regs.release(RAX)?;
4865         self.block_state.regs.release(RDX)?;
4866 
4867         let saved_rax = if self.block_state.regs.is_free(RAX) {
4868             None
4869         } else {
4870             dynasm!(self.asm
4871                 ; push rax
4872             );
4873             self.block_state.depth.reserve(1);
4874             // DON'T FREE THIS REGISTER HERE - since we don't
4875             // remove it from the stack freeing the register
4876             // here will cause `take_reg` to allocate it.
4877             Some(())
4878         };
4879 
4880         let saved_rdx = if self.block_state.regs.is_free(RDX) {
4881             None
4882         } else {
4883             dynasm!(self.asm
4884                 ; push rdx
4885             );
4886             self.block_state.depth.reserve(1);
4887             // DON'T FREE THIS REGISTER HERE - since we don't
4888             // remove it from the stack freeing the register
4889             // here will cause `take_reg` to allocate it.
4890             Some(())
4891         };
4892 
4893         let saved = saved_rdx
4894             .map(|_| RDX)
4895             .into_iter()
4896             .chain(saved_rax.map(|_| RAX));
4897 
4898         self.copy_value(dividend, CCLoc::Reg(RAX))?;
4899         self.block_state.regs.mark_used(RAX);
4900 
4901         self.free_value(dividend)?;
4902         // To stop `take_reg` from allocating either of these necessary registers
4903         self.block_state.regs.mark_used(RDX);
4904 
4905         do_div(self, &mut divisor)?;
4906         self.free_value(divisor)?;
4907 
4908         if self.block_state.regs.is_free(RAX) {
4909             return Err(Error::Microwasm("full_div: RAX is not free".to_string()));
4910         }
4911         if self.block_state.regs.is_free(RDX) {
4912             return Err(Error::Microwasm("full_div: RDX is not free".to_string()));
4913         }
4914 
4915         Ok((ValueLocation::Reg(RAX), ValueLocation::Reg(RDX), saved))
4916     }
4917 
i32_full_div_u( &mut self, divisor: ValueLocation, dividend: ValueLocation, ) -> Result< ( ValueLocation, ValueLocation, impl Iterator<Item = GPR> + Clone + 'this, ), Error, >4918     fn i32_full_div_u(
4919         &mut self,
4920         divisor: ValueLocation,
4921         dividend: ValueLocation,
4922     ) -> Result<
4923         (
4924             ValueLocation,
4925             ValueLocation,
4926             impl Iterator<Item = GPR> + Clone + 'this,
4927         ),
4928         Error,
4929     > {
4930         self.full_div(divisor, dividend, |this, divisor| match divisor {
4931             ValueLocation::Stack(offset) => {
4932                 let offset = this.adjusted_offset(*offset);
4933                 dynasm!(this.asm
4934                     ; xor edx, edx
4935                     ; div DWORD [rsp + offset]
4936                 );
4937                 Ok(())
4938             }
4939             ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
4940                 let r = match this.put_into_register(I32, divisor) {
4941                     Err(e) => return Err(e),
4942                     Ok(o) => {
4943                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4944                     }
4945                 };
4946 
4947                 dynasm!(this.asm
4948                     ; xor edx, edx
4949                     ; div Rd(r.rq().unwrap())
4950                 );
4951                 Ok(())
4952             }
4953         })
4954     }
4955 
i32_full_div_s( &mut self, divisor: ValueLocation, dividend: ValueLocation, ) -> Result< ( ValueLocation, ValueLocation, impl Iterator<Item = GPR> + Clone + 'this, ), Error, >4956     fn i32_full_div_s(
4957         &mut self,
4958         divisor: ValueLocation,
4959         dividend: ValueLocation,
4960     ) -> Result<
4961         (
4962             ValueLocation,
4963             ValueLocation,
4964             impl Iterator<Item = GPR> + Clone + 'this,
4965         ),
4966         Error,
4967     > {
4968         self.full_div(divisor, dividend, |this, divisor| match divisor {
4969             ValueLocation::Stack(offset) => {
4970                 let offset = this.adjusted_offset(*offset);
4971                 dynasm!(this.asm
4972                     ; cdq
4973                     ; idiv DWORD [rsp + offset]
4974                 );
4975                 Ok(())
4976             }
4977             ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
4978                 let r = match this.put_into_register(I32, divisor) {
4979                     Err(e) => return Err(e),
4980                     Ok(o) => {
4981                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4982                     }
4983                 };
4984 
4985                 dynasm!(this.asm
4986                     ; cdq
4987                     ; idiv Rd(r.rq().unwrap())
4988                 );
4989                 Ok(())
4990             }
4991         })
4992     }
4993 
i64_full_div_u( &mut self, divisor: ValueLocation, dividend: ValueLocation, ) -> Result< ( ValueLocation, ValueLocation, impl Iterator<Item = GPR> + Clone + 'this, ), Error, >4994     fn i64_full_div_u(
4995         &mut self,
4996         divisor: ValueLocation,
4997         dividend: ValueLocation,
4998     ) -> Result<
4999         (
5000             ValueLocation,
5001             ValueLocation,
5002             impl Iterator<Item = GPR> + Clone + 'this,
5003         ),
5004         Error,
5005     > {
5006         self.full_div(divisor, dividend, |this, divisor| match divisor {
5007             ValueLocation::Stack(offset) => {
5008                 let offset = this.adjusted_offset(*offset);
5009                 dynasm!(this.asm
5010                     ; xor rdx, rdx
5011                     ; div QWORD [rsp + offset]
5012                 );
5013                 Ok(())
5014             }
5015             ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
5016                 let r = match this.put_into_register(I64, divisor) {
5017                     Err(e) => return Err(e),
5018                     Ok(o) => {
5019                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5020                     }
5021                 };
5022                 dynasm!(this.asm
5023                     ; xor rdx, rdx
5024                     ; div Rq(r.rq().unwrap())
5025                 );
5026                 Ok(())
5027             }
5028         })
5029     }
5030 
i64_full_div_s( &mut self, divisor: ValueLocation, dividend: ValueLocation, ) -> Result< ( ValueLocation, ValueLocation, impl Iterator<Item = GPR> + Clone + 'this, ), Error, >5031     fn i64_full_div_s(
5032         &mut self,
5033         divisor: ValueLocation,
5034         dividend: ValueLocation,
5035     ) -> Result<
5036         (
5037             ValueLocation,
5038             ValueLocation,
5039             impl Iterator<Item = GPR> + Clone + 'this,
5040         ),
5041         Error,
5042     > {
5043         self.full_div(divisor, dividend, |this, divisor| match divisor {
5044             ValueLocation::Stack(offset) => {
5045                 let offset = this.adjusted_offset(*offset);
5046                 dynasm!(this.asm
5047                     ; cqo
5048                     ; idiv QWORD [rsp + offset]
5049                 );
5050                 Ok(())
5051             }
5052             ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
5053                 let r = match this.put_into_register(I64, divisor) {
5054                     Err(e) => return Err(e),
5055                     Ok(o) => {
5056                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5057                     }
5058                 };
5059 
5060                 dynasm!(this.asm
5061                     ; cqo
5062                     ; idiv Rq(r.rq().unwrap())
5063                 );
5064                 Ok(())
5065             }
5066         })
5067     }
5068 
5069     // `i32_mul` needs to be separate because the immediate form of the instruction
5070     // has a different syntax to the immediate form of the other instructions.
i32_mul(&mut self) -> Result<(), Error>5071     pub fn i32_mul(&mut self) -> Result<(), Error> {
5072         let right = self.pop()?;
5073         let left = self.pop()?;
5074 
5075         if let Some(right) = right.immediate() {
5076             if let Some(left) = left.immediate() {
5077                 self.push(ValueLocation::Immediate(
5078                     i32::wrapping_mul(right.as_i32().unwrap(), left.as_i32().unwrap()).into(),
5079                 ))?;
5080                 return Ok(());
5081             }
5082         }
5083 
5084         let (mut left, mut right) = match left {
5085             ValueLocation::Reg(_) => (left, right),
5086             _ => {
5087                 if right.immediate().is_some() {
5088                     (left, right)
5089                 } else {
5090                     (right, left)
5091                 }
5092             }
5093         };
5094 
5095         let out = match right {
5096             ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
5097                 let rreg = match self.put_into_register(I32, &mut right) {
5098                     Err(e) => return Err(e),
5099                     Ok(o) => {
5100                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5101                     }
5102                 };
5103                 let lreg = match self.put_into_temp_register(I32, &mut left) {
5104                     Err(e) => return Err(e),
5105                     Ok(o) => {
5106                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5107                     }
5108                 };
5109 
5110                 dynasm!(self.asm
5111                     ; imul Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap())
5112                 );
5113                 left
5114             }
5115             ValueLocation::Stack(offset) => {
5116                 let offset = self.adjusted_offset(offset);
5117 
5118                 let lreg = match self.put_into_temp_register(I32, &mut left) {
5119                     Err(e) => return Err(e),
5120                     Ok(o) => {
5121                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5122                     }
5123                 };
5124 
5125                 dynasm!(self.asm
5126                     ; imul Rd(lreg.rq().unwrap()), [rsp + offset]
5127                 );
5128                 left
5129             }
5130             ValueLocation::Immediate(i) => {
5131                 let lreg = match self.put_into_register(I32, &mut left) {
5132                     Err(e) => return Err(e),
5133                     Ok(o) => {
5134                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5135                     }
5136                 };
5137 
5138                 let new_reg = self.take_reg(I32).unwrap();
5139                 dynasm!(self.asm
5140                     ; imul Rd(new_reg.rq().unwrap()), Rd(lreg.rq().unwrap()), i.as_i32().unwrap()
5141                 );
5142                 self.free_value(left)?;
5143                 ValueLocation::Reg(new_reg)
5144             }
5145         };
5146 
5147         self.push(out)?;
5148         self.free_value(right)?;
5149         Ok(())
5150     }
5151 
5152     // `i64_mul` needs to be separate because the immediate form of the instruction
5153     // has a different syntax to the immediate form of the other instructions.
i64_mul(&mut self) -> Result<(), Error>5154     pub fn i64_mul(&mut self) -> Result<(), Error> {
5155         let right = self.pop()?;
5156         let left = self.pop()?;
5157 
5158         if let Some(right) = right.immediate() {
5159             if let Some(left) = left.immediate() {
5160                 self.push(ValueLocation::Immediate(
5161                     i64::wrapping_mul(right.as_i64().unwrap(), left.as_i64().unwrap()).into(),
5162                 ))?;
5163                 return Ok(());
5164             }
5165         }
5166 
5167         let (mut left, mut right) = match left {
5168             ValueLocation::Reg(_) => (left, right),
5169             _ => {
5170                 if right.immediate().is_some() {
5171                     (left, right)
5172                 } else {
5173                     (right, left)
5174                 }
5175             }
5176         };
5177 
5178         let out = match right {
5179             ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
5180                 let rreg = match self.put_into_register(I64, &mut right) {
5181                     Err(e) => return Err(e),
5182                     Ok(o) => {
5183                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5184                     }
5185                 };
5186                 let lreg = match self.put_into_temp_register(I64, &mut left) {
5187                     Err(e) => return Err(e),
5188                     Ok(o) => {
5189                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5190                     }
5191                 };
5192 
5193                 dynasm!(self.asm
5194                     ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
5195                 );
5196                 left
5197             }
5198             ValueLocation::Stack(offset) => {
5199                 let offset = self.adjusted_offset(offset);
5200 
5201                 let lreg = match self.put_into_temp_register(I64, &mut left) {
5202                     Err(e) => return Err(e),
5203                     Ok(o) => {
5204                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5205                     }
5206                 };
5207 
5208                 dynasm!(self.asm
5209                     ; imul Rq(lreg.rq().unwrap()), [rsp + offset]
5210                 );
5211                 left
5212             }
5213             ValueLocation::Immediate(i) => {
5214                 let i = i.as_i64().unwrap();
5215                 if let Ok(i) = i.try_into() {
5216                     let new_reg = self.take_reg(I64).unwrap();
5217 
5218                     let lreg = self
5219                         .put_into_register(I64, &mut left)?
5220                         .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
5221 
5222                     dynasm!(self.asm
5223                         ; imul Rq(new_reg.rq().unwrap()), Rq(lreg.rq().unwrap()), i
5224                     );
5225 
5226                     self.free_value(left)?;
5227 
5228                     ValueLocation::Reg(new_reg)
5229                 } else {
5230                     let rreg = self
5231                         .put_into_register(I64, &mut right)?
5232                         .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
5233                     let lreg = self
5234                         .put_into_temp_register(I64, &mut left)?
5235                         .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
5236 
5237                     dynasm!(self.asm
5238                         ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
5239                     );
5240                     left
5241                 }
5242             }
5243         };
5244 
5245         self.push(out)?;
5246         self.free_value(right)?;
5247         Ok(())
5248     }
5249 
cmov(&mut self, cond_code: CondCode, dst: GPR, src: CCLoc)5250     fn cmov(&mut self, cond_code: CondCode, dst: GPR, src: CCLoc) {
5251         match src {
5252             CCLoc::Reg(reg) => match cond_code {
5253                 cc::EQUAL => {
5254                     dynasm!(self.asm
5255                         ; cmove Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5256                     );
5257                 }
5258                 cc::NOT_EQUAL => {
5259                     dynasm!(self.asm
5260                         ; cmovne Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5261                     );
5262                 }
5263                 cc::GE_U => {
5264                     dynasm!(self.asm
5265                         ; cmovae Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5266                     );
5267                 }
5268                 cc::LT_U => {
5269                     dynasm!(self.asm
5270                         ; cmovb Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5271                     );
5272                 }
5273                 cc::GT_U => {
5274                     dynasm!(self.asm
5275                         ; cmova Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5276                     );
5277                 }
5278                 cc::LE_U => {
5279                     dynasm!(self.asm
5280                         ; cmovbe Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5281                     );
5282                 }
5283                 cc::GE_S => {
5284                     dynasm!(self.asm
5285                         ; cmovge Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5286                     );
5287                 }
5288                 cc::LT_S => {
5289                     dynasm!(self.asm
5290                         ; cmovl Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5291                     );
5292                 }
5293                 cc::GT_S => {
5294                     dynasm!(self.asm
5295                         ; cmovg Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5296                     );
5297                 }
5298                 cc::LE_S => {
5299                     dynasm!(self.asm
5300                         ; cmovle Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5301                     );
5302                 }
5303             },
5304             CCLoc::Stack(offset) => {
5305                 let offset = self.adjusted_offset(offset);
5306 
5307                 match cond_code {
5308                     cc::EQUAL => {
5309                         dynasm!(self.asm
5310                             ; cmove Rq(dst.rq().unwrap()), [rsp + offset]
5311                         );
5312                     }
5313                     cc::NOT_EQUAL => {
5314                         dynasm!(self.asm
5315                             ; cmovne Rq(dst.rq().unwrap()), [rsp + offset]
5316                         );
5317                     }
5318                     cc::GE_U => {
5319                         dynasm!(self.asm
5320                             ; cmovae Rq(dst.rq().unwrap()), [rsp + offset]
5321                         );
5322                     }
5323                     cc::LT_U => {
5324                         dynasm!(self.asm
5325                             ; cmovb Rq(dst.rq().unwrap()), [rsp + offset]
5326                         );
5327                     }
5328                     cc::GT_U => {
5329                         dynasm!(self.asm
5330                             ; cmova Rq(dst.rq().unwrap()), [rsp + offset]
5331                         );
5332                     }
5333                     cc::LE_U => {
5334                         dynasm!(self.asm
5335                             ; cmovbe Rq(dst.rq().unwrap()), [rsp + offset]
5336                         );
5337                     }
5338                     cc::GE_S => {
5339                         dynasm!(self.asm
5340                             ; cmovge Rq(dst.rq().unwrap()), [rsp + offset]
5341                         );
5342                     }
5343                     cc::LT_S => {
5344                         dynasm!(self.asm
5345                             ; cmovl Rq(dst.rq().unwrap()), [rsp + offset]
5346                         );
5347                     }
5348                     cc::GT_S => {
5349                         dynasm!(self.asm
5350                             ; cmovg Rq(dst.rq().unwrap()), [rsp + offset]
5351                         );
5352                     }
5353                     cc::LE_S => {
5354                         dynasm!(self.asm
5355                             ; cmovle Rq(dst.rq().unwrap()), [rsp + offset]
5356                         );
5357                     }
5358                 }
5359             }
5360         }
5361     }
5362 
select(&mut self) -> Result<(), Error>5363     pub fn select(&mut self) -> Result<(), Error> {
5364         let mut cond = self.pop()?;
5365         let mut else_ = self.pop()?;
5366         let mut then = self.pop()?;
5367 
5368         if let ValueLocation::Immediate(i) = cond {
5369             if i.as_i32().unwrap() == 0 {
5370                 self.free_value(then)?;
5371                 self.push(else_)?;
5372             } else {
5373                 self.free_value(else_)?;
5374                 self.push(then)?;
5375             }
5376 
5377             return Ok(());
5378         }
5379 
5380         let cond_code = match cond {
5381             ValueLocation::Cond(cc) => cc,
5382             _ => {
5383                 let cond_reg = match self.put_into_register(I32, &mut cond) {
5384                     Err(e) => return Err(e),
5385                     Ok(o) => {
5386                         o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5387                     }
5388                 };
5389                 dynasm!(self.asm
5390                     ; test Rd(cond_reg.rq().unwrap()), Rd(cond_reg.rq().unwrap())
5391                 );
5392                 self.free_value(cond)?;
5393 
5394                 cc::NOT_EQUAL
5395             }
5396         };
5397 
5398         let else_ = if let ValueLocation::Stack(offset) = else_ {
5399             CCLoc::Stack(offset)
5400         } else {
5401             let gpr = match self.put_into_register(I32, &mut else_) {
5402                 Err(e) => return Err(e),
5403                 Ok(o) => {
5404                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5405                 }
5406             };
5407             CCLoc::Reg(gpr)
5408         };
5409 
5410         let then = if let ValueLocation::Stack(offset) = then {
5411             CCLoc::Stack(offset)
5412         } else {
5413             let gpr = match self.put_into_register(I32, &mut then) {
5414                 Err(e) => return Err(e),
5415                 Ok(o) => {
5416                     o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5417                 }
5418             };
5419             CCLoc::Reg(gpr)
5420         };
5421 
5422         let out_gpr = match (then, else_) {
5423             (CCLoc::Reg(then_reg), else_) if self.block_state.regs.num_usages(then_reg) <= 1 => {
5424                 self.cmov(!cond_code, then_reg, else_);
5425                 self.free_value(else_.into())?;
5426 
5427                 then_reg
5428             }
5429             (then, CCLoc::Reg(else_reg)) if self.block_state.regs.num_usages(else_reg) <= 1 => {
5430                 self.cmov(cond_code, else_reg, then);
5431                 self.free_value(then.into())?;
5432 
5433                 else_reg
5434             }
5435             (then, else_) => {
5436                 let out = self.take_reg(GPRType::Rq).unwrap();
5437                 self.copy_value(else_.into(), CCLoc::Reg(out))?;
5438                 self.cmov(cond_code, out, then);
5439 
5440                 self.free_value(then.into())?;
5441                 self.free_value(else_.into())?;
5442 
5443                 out
5444             }
5445         };
5446 
5447         self.push(ValueLocation::Reg(out_gpr))?;
5448         Ok(())
5449     }
5450 
pick(&mut self, depth: u32)5451     pub fn pick(&mut self, depth: u32) {
5452         let idx = self.block_state.stack.len() - 1 - depth as usize;
5453         let v = self.block_state.stack[idx];
5454         if let ValueLocation::Reg(r) = v {
5455             self.block_state.regs.mark_used(r);
5456         }
5457         self.block_state.stack.push(v);
5458     }
5459 
const_(&mut self, imm: Value) -> Result<(), Error>5460     pub fn const_(&mut self, imm: Value) -> Result<(), Error> {
5461         self.push(ValueLocation::Immediate(imm))?;
5462         Ok(())
5463     }
5464 
relocated_function_call< A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>, >( &mut self, name: &cranelift_codegen::ir::ExternalName, args: A, rets: R, func_def_loc: FunctionDefLocation, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5465     fn relocated_function_call<
5466         A: IntoIterator<Item = SignlessType>,
5467         R: IntoIterator<Item = SignlessType>,
5468     >(
5469         &mut self,
5470         name: &cranelift_codegen::ir::ExternalName,
5471         args: A,
5472         rets: R,
5473         func_def_loc: FunctionDefLocation,
5474     ) -> Result<(), Error>
5475     where
5476         A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5477         R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5478     {
5479         let locs = arg_locs_skip_caller_vmctx(args);
5480 
5481         let saved_vmctx = if func_def_loc == FunctionDefLocation::PossiblyExternal {
5482             dynasm!(self.asm
5483                 ; mov Rq(CALLER_VMCTX), Rq(VMCTX)
5484             );
5485             self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX));
5486             self.block_state.regs.mark_used(GPR::Rq(VMCTX));
5487             Some(self.push_physical(ValueLocation::Reg(GPR::Rq(VMCTX)))?)
5488         } else {
5489             None
5490         };
5491 
5492         self.save_volatile()?;
5493 
5494         self.pass_outgoing_args(&locs)?;
5495 
5496         // 2 bytes for the 64-bit `mov` opcode + register ident, the rest is the immediate
5497         self.reloc_sink.reloc_external(
5498             (self.asm.offset().0
5499                 - self.func_starts[self.current_function as usize]
5500                     .0
5501                     .unwrap()
5502                     .0) as u32
5503                 + 2,
5504             // Passing a default location here, since until proven otherwise, it's not used.
5505             ir::SourceLoc::default(),
5506             binemit::Reloc::Abs8,
5507             name,
5508             0,
5509         );
5510         let temp = self.take_reg(I64).unwrap();
5511 
5512         dynasm!(self.asm
5513             ; mov Rq(temp.rq().unwrap()), QWORD 0xDEAD_BEEF_DEAD_BEEF_u64 as i64
5514             ; call Rq(temp.rq().unwrap())
5515         );
5516         self.block_state.regs.release(temp)?;
5517 
5518         for i in locs {
5519             self.free_value(i.into())?;
5520         }
5521 
5522         self.push_function_returns(rets)?;
5523 
5524         if func_def_loc == FunctionDefLocation::PossiblyExternal {
5525             let saved_vmctx = saved_vmctx.unwrap();
5526             self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?;
5527             self.copy_value(saved_vmctx, CCLoc::Reg(GPR::Rq(VMCTX)))?;
5528             self.free_value(saved_vmctx)?;
5529         }
5530 
5531         Ok(())
5532     }
5533 
builtin_function_call< A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>, >( &mut self, i: BuiltinFunctionIndex, args: A, rets: R, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5534     fn builtin_function_call<
5535         A: IntoIterator<Item = SignlessType>,
5536         R: IntoIterator<Item = SignlessType>,
5537     >(
5538         &mut self,
5539         i: BuiltinFunctionIndex,
5540         args: A,
5541         rets: R,
5542     ) -> Result<(), Error>
5543     where
5544         A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5545         R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5546     {
5547         let locs = arg_locs(args);
5548 
5549         dynasm!(self.asm
5550             ; push Rq(VMCTX)
5551         );
5552         self.block_state.depth.reserve(1);
5553         let depth = self.block_state.depth.clone();
5554 
5555         self.save_volatile()?;
5556 
5557         self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?;
5558         self.pass_outgoing_args(&locs)?;
5559 
5560         let temp = self.take_reg(I64).unwrap();
5561         dynasm!(self.asm
5562             ; mov Rq(temp.rq().unwrap()), [
5563                 Rq(VMCTX) + self.module_context.vmctx_builtin_function(i.index()) as i32
5564             ]
5565             ; call Rq(temp.rq().unwrap())
5566         );
5567 
5568         self.block_state.regs.release(temp)?;
5569 
5570         for i in locs {
5571             self.free_value(i.into())?;
5572         }
5573         self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX));
5574 
5575         self.push_function_returns(rets)?;
5576 
5577         self.set_stack_depth(depth)?;
5578         dynasm!(self.asm
5579             ; pop Rq(VMCTX)
5580         );
5581         self.block_state.depth.free(1);
5582 
5583         Ok(())
5584     }
5585 
5586     // TODO: Other memory indices
memory_size(&mut self) -> Result<(), Error>5587     pub fn memory_size(&mut self) -> Result<(), Error> {
5588         let memory_index = 0;
5589         if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) {
5590             self.push(ValueLocation::Immediate(defined_memory_index.into()))?;
5591             self.builtin_function_call(
5592                 BuiltinFunctionIndex::get_memory32_size_index(),
5593                 [self.pointer_type].iter().copied(),
5594                 [self.pointer_type].iter().copied(),
5595             )?;
5596         } else {
5597             self.push(ValueLocation::Immediate(memory_index.into()))?;
5598             self.builtin_function_call(
5599                 BuiltinFunctionIndex::get_imported_memory32_size_index(),
5600                 [self.pointer_type].iter().copied(),
5601                 [self.pointer_type].iter().copied(),
5602             )?;
5603         }
5604         Ok(())
5605     }
5606 
5607     // TODO: Other memory indices
memory_grow(&mut self) -> Result<(), Error>5608     pub fn memory_grow(&mut self) -> Result<(), Error> {
5609         let memory_index = 0;
5610         if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) {
5611             self.push(ValueLocation::Immediate(defined_memory_index.into()))?;
5612             self.builtin_function_call(
5613                 BuiltinFunctionIndex::get_memory32_grow_index(),
5614                 [self.pointer_type, self.pointer_type].iter().copied(),
5615                 [self.pointer_type].iter().copied(),
5616             )?;
5617         } else {
5618             self.push(ValueLocation::Immediate(memory_index.into()))?;
5619             self.builtin_function_call(
5620                 BuiltinFunctionIndex::get_imported_memory32_grow_index(),
5621                 [self.pointer_type, self.pointer_type].iter().copied(),
5622                 [self.pointer_type].iter().copied(),
5623             )?;
5624         }
5625         Ok(())
5626     }
5627 
5628     // TODO: Use `ArrayVec`?
5629     // TODO: This inefficiently duplicates registers but it's not really possible
5630     //       to double up stack space right now.
5631     /// Saves volatile (i.e. caller-saved) registers before a function call, if they are used.
save_volatile(&mut self) -> Result<(), Error>5632     fn save_volatile(&mut self) -> Result<(), Error> {
5633         self.save_regs(SCRATCH_REGS.iter().copied())?;
5634         Ok(())
5635     }
5636 
save_regs<I>(&mut self, to_save: I) -> Result<(), Error> where I: IntoIterator<Item = GPR>, I::IntoIter: Clone,5637     fn save_regs<I>(&mut self, to_save: I) -> Result<(), Error>
5638     where
5639         I: IntoIterator<Item = GPR>,
5640         I::IntoIter: Clone,
5641     {
5642         // TODO: We can filter out registers that are already marked free, but just to ensure
5643         //       that this doesn't fail when confronted with the `memory_grow`/`memory_size`
5644         //       weirdness.
5645         let to_save = to_save.into_iter();
5646         if to_save.clone().count() == 0 {
5647             return Ok(());
5648         }
5649 
5650         let mut stack = mem::replace(&mut self.block_state.stack, vec![]);
5651         let mut slice = &mut stack[..];
5652 
5653         while let Some((first, rest)) = slice.split_first_mut() {
5654             if let ValueLocation::Reg(vreg) = *first {
5655                 if to_save.clone().any(|r| r == vreg) {
5656                     let old = *first;
5657                     *first = self.push_physical(old)?;
5658                     for val in &mut *rest {
5659                         if *val == old {
5660                             self.free_value(*val)?;
5661                             *val = *first;
5662                         }
5663                     }
5664                 }
5665             }
5666 
5667             slice = rest;
5668         }
5669 
5670         self.block_state.stack = stack;
5671 
5672         Ok(())
5673     }
5674 
5675     /// Write the arguments to the callee to the registers and the stack using the SystemV
5676     /// calling convention.
pass_outgoing_args( &mut self, out_locs: &(impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone), ) -> Result<(), Error>5677     fn pass_outgoing_args(
5678         &mut self,
5679         out_locs: &(impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone),
5680     ) -> Result<(), Error> {
5681         let total_stack_space = out_locs
5682             .clone()
5683             .flat_map(|l| {
5684                 if let CCLoc::Stack(offset) = l {
5685                     if offset >= 0 {
5686                         Some(offset as u32 + 1)
5687                     } else {
5688                         None
5689                     }
5690                 } else {
5691                     None
5692                 }
5693             })
5694             .max()
5695             .unwrap_or(0);
5696         let original_depth = self.block_state.depth.clone();
5697         let mut needed_depth = original_depth.clone();
5698         needed_depth.reserve(total_stack_space);
5699 
5700         if needed_depth.0 & 1 != 0 {
5701             needed_depth.reserve(1);
5702         }
5703 
5704         self.set_stack_depth(needed_depth.clone())?;
5705 
5706         let mut pending = Vec::<(ValueLocation, CCLoc)>::with_capacity(out_locs.len());
5707 
5708         for loc in out_locs.clone().rev() {
5709             let val = self.pop()?;
5710 
5711             pending.push((val, loc));
5712         }
5713 
5714         while !pending.is_empty() {
5715             let start_len = pending.len();
5716 
5717             for (src, dst) in mem::replace(&mut pending, vec![]) {
5718                 if src != ValueLocation::from(dst) {
5719                     let dst = match dst {
5720                         CCLoc::Reg(r) => {
5721                             if !self.block_state.regs.is_free(r) {
5722                                 pending.push((src, dst));
5723                                 continue;
5724                             }
5725 
5726                             self.block_state.regs.mark_used(r);
5727 
5728                             dst
5729                         }
5730                         CCLoc::Stack(offset) => CCLoc::Stack(offset - needed_depth.0 as i32),
5731                     };
5732 
5733                     self.copy_value(src, dst)?;
5734                     self.free_value(src)?;
5735                 }
5736             }
5737 
5738             if pending.len() == start_len {
5739                 let src = match pending
5740                     .iter()
5741                     .filter_map(|(src, _)| {
5742                         if let ValueLocation::Reg(reg) = src {
5743                             Some(reg)
5744                         } else {
5745                             None
5746                         }
5747                     })
5748                     .next()
5749                 {
5750                     None => {
5751                         return Err(Error::Microwasm(
5752                             "Programmer error: We shouldn't need to push \
5753                              intermediate args if we don't have any argument sources in registers"
5754                                 .to_string(),
5755                         ));
5756                     }
5757                     Some(val) => *val,
5758                 };
5759                 let new_src = self.push_physical(ValueLocation::Reg(src))?;
5760                 for (old_src, _) in pending.iter_mut() {
5761                     if *old_src == ValueLocation::Reg(src) {
5762                         *old_src = new_src;
5763                     }
5764                 }
5765             }
5766         }
5767 
5768         // We do this a second time just in case we had to use `push_physical` to resolve cycles in
5769         // `pending`
5770         self.set_stack_depth(needed_depth)?;
5771 
5772         Ok(())
5773     }
5774 
push_function_returns( &mut self, returns: impl IntoIterator<Item = SignlessType>, ) -> Result<(), Error>5775     fn push_function_returns(
5776         &mut self,
5777         returns: impl IntoIterator<Item = SignlessType>,
5778     ) -> Result<(), Error> {
5779         for loc in ret_locs(returns)? {
5780             if let CCLoc::Reg(reg) = loc {
5781                 self.block_state.regs.mark_used(reg);
5782             }
5783 
5784             self.push(loc.into())?;
5785         }
5786         Ok(())
5787     }
5788 
trap_if(&mut self, ccode: CondCode, trap_code: TrapCode)5789     fn trap_if(&mut self, ccode: CondCode, trap_code: TrapCode) {
5790         let label = self.create_label();
5791         self.br_on_cond_code(label, !ccode);
5792         self.trap(trap_code);
5793         self.define_label(label);
5794     }
5795 
call_indirect< A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>, >( &mut self, type_id: u32, arg_types: A, return_types: R, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5796     pub fn call_indirect<
5797         A: IntoIterator<Item = SignlessType>,
5798         R: IntoIterator<Item = SignlessType>,
5799     >(
5800         &mut self,
5801         type_id: u32,
5802         arg_types: A,
5803         return_types: R,
5804     ) -> Result<(), Error>
5805     where
5806         A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5807         R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5808     {
5809         dynasm!(self.asm
5810             ; push Rq(VMCTX)
5811         );
5812         self.block_state.depth.reserve(1);
5813         let depth = self.block_state.depth.clone();
5814 
5815         let locs = arg_locs_skip_caller_vmctx(arg_types);
5816 
5817         for loc in locs.clone() {
5818             if let CCLoc::Reg(r) = loc {
5819                 self.block_state.regs.mark_used(r);
5820             }
5821         }
5822 
5823         let mut callee = self.pop()?;
5824         let callee_reg = self
5825             .put_into_temp_register(I32, &mut callee)?
5826             .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
5827 
5828         self.save_volatile()?;
5829 
5830         for loc in locs.clone() {
5831             if let CCLoc::Reg(r) = loc {
5832                 self.block_state.regs.release(r)?;
5833             }
5834         }
5835 
5836         self.pass_outgoing_args(&locs)?;
5837 
5838         dynasm!(self.asm
5839             ; mov Rq(CALLER_VMCTX), Rq(VMCTX)
5840         );
5841         self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX));
5842 
5843         let table_index = 0;
5844         let reg_offset = self
5845             .module_context
5846             .defined_table_index(table_index)
5847             .map(|index| {
5848                 (
5849                     None,
5850                     self.module_context.vmctx_vmtable_definition(index) as i32,
5851                 )
5852             });
5853 
5854         let vmctx = GPR::Rq(VMCTX);
5855         let (reg, offset) = reg_offset.unwrap_or_else(|| {
5856             let reg = self.take_reg(I64).unwrap();
5857 
5858             dynasm!(self.asm
5859                 ; mov Rq(reg.rq().unwrap()), [
5860                     Rq(VMCTX) + self.module_context.vmctx_vmtable_import_from(table_index) as i32
5861                 ]
5862             );
5863 
5864             (Some(reg), 0)
5865         });
5866 
5867         let temp0 = self.take_reg(I64).unwrap();
5868         dynasm!(self.asm
5869             ; cmp Rd(callee_reg.rq().unwrap()), [
5870                 Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
5871                     offset +
5872                     self.module_context.vmtable_definition_current_elements() as i32
5873             ]
5874             ;; self.trap_if(cc::GE_U, TrapCode::TableOutOfBounds)
5875             ; imul
5876                 Rd(callee_reg.rq().unwrap()),
5877                 Rd(callee_reg.rq().unwrap()),
5878                 self.module_context.size_of_vmcaller_checked_anyfunc() as i32
5879             ; mov Rq(temp0.rq().unwrap()), [
5880                 Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
5881                     offset +
5882                     self.module_context.vmtable_definition_base() as i32
5883             ]
5884         );
5885 
5886         if let Some(reg) = reg {
5887             self.block_state.regs.release(reg)?;
5888         }
5889 
5890         let temp1 = self.take_reg(I64).unwrap();
5891 
5892         dynasm!(self.asm
5893             ; mov Rd(temp1.rq().unwrap()), [
5894                 Rq(VMCTX) +
5895                     self.module_context
5896                         .vmctx_vmshared_signature_id(type_id) as i32
5897             ]
5898             ; cmp DWORD [
5899                 Rq(temp0.rq().unwrap()) +
5900                     Rq(callee_reg.rq().unwrap()) +
5901                     self.module_context.vmcaller_checked_anyfunc_type_index() as i32
5902             ], Rd(temp1.rq().unwrap())
5903             ;; self.trap_if(cc::NOT_EQUAL, TrapCode::BadSignature)
5904             ; mov Rq(VMCTX), [
5905                 Rq(temp0.rq().unwrap()) +
5906                     Rq(callee_reg.rq().unwrap()) +
5907                     self.module_context.vmcaller_checked_anyfunc_vmctx() as i32
5908             ]
5909             ; call QWORD [
5910                 Rq(temp0.rq().unwrap()) +
5911                     Rq(callee_reg.rq().unwrap()) +
5912                     self.module_context.vmcaller_checked_anyfunc_func_ptr() as i32
5913             ]
5914         );
5915 
5916         self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?;
5917         self.block_state.regs.release(temp0)?;
5918         self.block_state.regs.release(temp1)?;
5919         self.free_value(callee)?;
5920 
5921         for i in locs {
5922             self.free_value(i.into())?;
5923         }
5924 
5925         self.push_function_returns(return_types)?;
5926 
5927         self.set_stack_depth(depth)?;
5928         dynasm!(self.asm
5929             ; pop Rq(VMCTX)
5930         );
5931         self.block_state.depth.free(1);
5932 
5933         Ok(())
5934     }
5935 
swap(&mut self, depth: u32)5936     pub fn swap(&mut self, depth: u32) {
5937         let last = self.block_state.stack.len() - 1;
5938         self.block_state.stack.swap(last, last - depth as usize);
5939     }
5940 
5941     /// Call a function with the given index
call_direct<A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>>( &mut self, index: u32, arg_types: A, return_types: R, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5942     pub fn call_direct<A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>>(
5943         &mut self,
5944         index: u32,
5945         arg_types: A,
5946         return_types: R,
5947     ) -> Result<(), Error>
5948     where
5949         A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5950         R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5951     {
5952         self.relocated_function_call(
5953             &ir::ExternalName::user(0, index),
5954             arg_types,
5955             return_types,
5956             FunctionDefLocation::SameModule,
5957         )?;
5958         Ok(())
5959     }
5960 
5961     /// Recursively call the same function again
call_direct_self< A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>, >( &mut self, defined_index: u32, arg_types: A, return_types: R, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5962     pub fn call_direct_self<
5963         A: IntoIterator<Item = SignlessType>,
5964         R: IntoIterator<Item = SignlessType>,
5965     >(
5966         &mut self,
5967         defined_index: u32,
5968         arg_types: A,
5969         return_types: R,
5970     ) -> Result<(), Error>
5971     where
5972         A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5973         R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5974     {
5975         let locs = arg_locs_skip_caller_vmctx(arg_types);
5976 
5977         self.save_volatile()?;
5978 
5979         let (_, label) = self.func_starts[defined_index as usize];
5980 
5981         self.pass_outgoing_args(&locs)?;
5982         dynasm!(self.asm
5983             ; call =>label
5984         );
5985 
5986         for i in locs {
5987             self.free_value(i.into())?;
5988         }
5989 
5990         self.push_function_returns(return_types)?;
5991         Ok(())
5992     }
5993 
5994     /// Call a function with the given index
call_direct_imported< A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>, >( &mut self, index: u32, arg_types: A, return_types: R, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5995     pub fn call_direct_imported<
5996         A: IntoIterator<Item = SignlessType>,
5997         R: IntoIterator<Item = SignlessType>,
5998     >(
5999         &mut self,
6000         index: u32,
6001         arg_types: A,
6002         return_types: R,
6003     ) -> Result<(), Error>
6004     where
6005         A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
6006         R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
6007     {
6008         let locs = arg_locs_skip_caller_vmctx(arg_types);
6009 
6010         dynasm!(self.asm
6011             ; mov Rq(CALLER_VMCTX), Rq(VMCTX)
6012         );
6013         self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX));
6014         self.block_state.regs.mark_used(GPR::Rq(VMCTX));
6015         let saved_vmctx = self.push_physical(ValueLocation::Reg(GPR::Rq(VMCTX)))?;
6016 
6017         self.save_volatile()?;
6018         self.pass_outgoing_args(&locs)?;
6019 
6020         let callee = self.take_reg(I64).unwrap();
6021 
6022         dynasm!(self.asm
6023             ; mov Rq(callee.rq().unwrap()), [
6024                 Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_body(index) as i32
6025             ]
6026             ; mov Rq(VMCTX), [
6027                 Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_vmctx(index) as i32
6028             ]
6029             ; call Rq(callee.rq().unwrap())
6030         );
6031 
6032         self.block_state.regs.release(callee)?;
6033 
6034         for i in locs {
6035             self.free_value(i.into())?;
6036         }
6037 
6038         self.push_function_returns(return_types)?;
6039 
6040         self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?;
6041         self.copy_value(saved_vmctx, CCLoc::Reg(GPR::Rq(VMCTX)))?;
6042         self.free_value(saved_vmctx)?;
6043 
6044         Ok(())
6045     }
6046 
6047     // TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them
6048     //       as scratch registers
6049     /// Writes the function prologue and stores the arguments as locals
start_function<P: IntoIterator<Item = SignlessType>>( &mut self, params: P, ) -> Result<(), Error> where P::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,6050     pub fn start_function<P: IntoIterator<Item = SignlessType>>(
6051         &mut self,
6052         params: P,
6053     ) -> Result<(), Error>
6054     where
6055         P::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
6056     {
6057         self.apply_cc(BlockCallingConvention::function_start(
6058             arg_locs_skip_caller_vmctx(params),
6059         ))?;
6060         Ok(())
6061     }
6062 
ret(&mut self)6063     pub fn ret(&mut self) {
6064         dynasm!(self.asm
6065             ; ret
6066         );
6067     }
6068 
epilogue(&mut self)6069     pub fn epilogue(&mut self) {
6070         for LabelInfo {
6071             label,
6072             align,
6073             inner,
6074         } in self.labels.drain()
6075         {
6076             match inner {
6077                 LabelValue::I32(val) => {
6078                     dynasm!(self.asm
6079                         ; .align align as usize
6080                         ;; self.asm.dynamic_label(label.0)
6081                         ; .dword val
6082                     );
6083                 }
6084                 LabelValue::I64(val) => {
6085                     dynasm!(self.asm
6086                         ; .align align as usize
6087                         ;; self.asm.dynamic_label(label.0)
6088                         ; .qword val
6089                     );
6090                 }
6091                 LabelValue::Ret => {
6092                     dynasm!(self.asm
6093                         ; .align align as usize
6094                         ;; self.asm.dynamic_label(label.0)
6095                         ; ret
6096                     );
6097                 }
6098             }
6099         }
6100     }
6101 
trap(&mut self, _trap_id: TrapCode)6102     pub fn trap(&mut self, _trap_id: TrapCode) {
6103         // TODO: Emit trap info by writing the trap ID and current source location to a
6104         //       `binemit::TrapSink`.
6105         dynasm!(self.asm
6106             ; ud2
6107         );
6108     }
6109 
ret_label(&mut self) -> Label6110     pub fn ret_label(&mut self) -> Label {
6111         #[derive(Copy, Clone, Hash)]
6112         struct RetLabel;
6113 
6114         self.label(LabelValue::Ret)
6115     }
6116 
label(&mut self, label: LabelValue) -> Label6117     fn label(&mut self, label: LabelValue) -> Label {
6118         self.aligned_label(1, label)
6119     }
6120 
aligned_label(&mut self, align: u32, label: LabelValue) -> Label6121     fn aligned_label(&mut self, align: u32, label: LabelValue) -> Label {
6122         let asm = &mut self.asm;
6123         self.labels
6124             .insert(|| Label(asm.new_dynamic_label()), align, label)
6125     }
6126 
target_to_label(&mut self, target: BrTarget<Label>) -> Label6127     fn target_to_label(&mut self, target: BrTarget<Label>) -> Label {
6128         match target {
6129             BrTarget::Label(label) => label,
6130             BrTarget::Return => self.ret_label(),
6131         }
6132     }
6133 }
6134