1 #![allow(clippy::float_cmp)]
2
3 use self::registers::*;
4 use crate::error::Error;
5 use crate::microwasm::{BrTarget, Ieee32, Ieee64, SignlessType, Type, Value, F32, F64, I32, I64};
6 use crate::module::ModuleContext;
7 use cranelift_codegen::{
8 binemit,
9 ir::{self, SourceLoc, TrapCode},
10 };
11 use dynasm::dynasm;
12 use dynasmrt::x64::Assembler;
13 use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer};
14 use std::{
15 cmp::Ordering,
16 convert::{TryFrom, TryInto},
17 fmt::Display,
18 hash::Hash,
19 iter, mem,
20 ops::{Deref, RangeInclusive},
21 };
22 // use wasmtime_environ::BuiltinFunctionIndex;
23
24 mod magic {
25 /// An index type for builtin functions.
26 pub struct BuiltinFunctionIndex(u32);
27
28 impl BuiltinFunctionIndex {
29 /// Returns an index for wasm's `memory.grow` builtin function.
get_memory32_grow_index() -> Self30 pub const fn get_memory32_grow_index() -> Self {
31 Self(0)
32 }
33 /// Returns an index for wasm's imported `memory.grow` builtin function.
get_imported_memory32_grow_index() -> Self34 pub const fn get_imported_memory32_grow_index() -> Self {
35 Self(1)
36 }
37 /// Returns an index for wasm's `memory.size` builtin function.
get_memory32_size_index() -> Self38 pub const fn get_memory32_size_index() -> Self {
39 Self(2)
40 }
41 /// Returns an index for wasm's imported `memory.size` builtin function.
get_imported_memory32_size_index() -> Self42 pub const fn get_imported_memory32_size_index() -> Self {
43 Self(3)
44 }
45
46 /// Return the index as an u32 number.
index(&self) -> u3247 pub const fn index(&self) -> u32 {
48 self.0
49 }
50 }
51 }
52
53 use magic::BuiltinFunctionIndex;
54
55 /// Size of a pointer on the target in bytes.
56 const WORD_SIZE: u32 = 8;
57
58 type RegId = u8;
59
60 #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
61 pub enum GPR {
62 Rq(RegId),
63 Rx(RegId),
64 }
65
66 #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
67 pub enum GPRType {
68 Rq,
69 Rx,
70 }
71
72 impl From<SignlessType> for GPRType {
from(other: SignlessType) -> GPRType73 fn from(other: SignlessType) -> GPRType {
74 match other {
75 I32 | I64 => GPRType::Rq,
76 F32 | F64 => GPRType::Rx,
77 }
78 }
79 }
80
81 impl From<SignlessType> for Option<GPRType> {
from(other: SignlessType) -> Self82 fn from(other: SignlessType) -> Self {
83 Some(other.into())
84 }
85 }
86
87 impl GPR {
type_(self) -> GPRType88 fn type_(self) -> GPRType {
89 match self {
90 GPR::Rq(_) => GPRType::Rq,
91 GPR::Rx(_) => GPRType::Rx,
92 }
93 }
94
rq(self) -> Option<RegId>95 fn rq(self) -> Option<RegId> {
96 match self {
97 GPR::Rq(r) => Some(r),
98 GPR::Rx(_) => None,
99 }
100 }
101
rx(self) -> Option<RegId>102 fn rx(self) -> Option<RegId> {
103 match self {
104 GPR::Rx(r) => Some(r),
105 GPR::Rq(_) => None,
106 }
107 }
108 }
109
arg_locs<I: IntoIterator<Item = SignlessType>>( types: I, ) -> impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone where I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,110 fn arg_locs<I: IntoIterator<Item = SignlessType>>(
111 types: I,
112 ) -> impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone
113 where
114 I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
115 {
116 // TODO: VmCtx is in the first register
117 let mut int_gpr_iter = INTEGER_ARGS_IN_GPRS.iter();
118 let mut float_gpr_iter = FLOAT_ARGS_IN_GPRS.iter();
119 let mut stack_idx = 0;
120
121 types
122 .into_iter()
123 .map(move |ty| {
124 match ty {
125 I32 | I64 => int_gpr_iter.next(),
126 F32 | F64 => float_gpr_iter.next(),
127 }
128 .map(|&r| CCLoc::Reg(r))
129 .unwrap_or_else(|| {
130 let out = CCLoc::Stack(stack_idx);
131 stack_idx += 1;
132 out
133 })
134 })
135 // Since we only advance the iterators based on the values in `types`,
136 // we can't do this lazily.
137 .collect::<Vec<_>>()
138 .into_iter()
139 }
140
arg_locs_skip_caller_vmctx<I: IntoIterator<Item = SignlessType>>( types: I, ) -> impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone where I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,141 fn arg_locs_skip_caller_vmctx<I: IntoIterator<Item = SignlessType>>(
142 types: I,
143 ) -> impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone
144 where
145 I::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
146 {
147 #[derive(Debug, Clone)]
148 struct WithInt<I> {
149 caller_vmctx_ty: Option<SignlessType>,
150 iter: I,
151 }
152
153 impl<I> Iterator for WithInt<I>
154 where
155 I: Iterator<Item = SignlessType>,
156 {
157 type Item = SignlessType;
158
159 fn next(&mut self) -> Option<Self::Item> {
160 self.caller_vmctx_ty.take().or_else(|| self.iter.next())
161 }
162
163 fn size_hint(&self) -> (usize, Option<usize>) {
164 let ty_len = if self.caller_vmctx_ty.is_some() { 1 } else { 0 };
165 let (lower, upper) = self.iter.size_hint();
166
167 (lower + ty_len, upper.map(|u| u + ty_len))
168 }
169 }
170
171 impl<I> DoubleEndedIterator for WithInt<I>
172 where
173 I: DoubleEndedIterator<Item = SignlessType>,
174 {
175 fn next_back(&mut self) -> Option<Self::Item> {
176 self.iter
177 .next_back()
178 .or_else(|| self.caller_vmctx_ty.take())
179 }
180 }
181
182 impl<I> ExactSizeIterator for WithInt<I> where I: ExactSizeIterator<Item = SignlessType> {}
183
184 arg_locs(WithInt {
185 caller_vmctx_ty: Some(I32),
186 iter: types.into_iter(),
187 })
188 .skip(1)
189 }
190
ret_locs(types: impl IntoIterator<Item = SignlessType>) -> Result<Vec<CCLoc>, Error>191 pub fn ret_locs(types: impl IntoIterator<Item = SignlessType>) -> Result<Vec<CCLoc>, Error> {
192 let types = types.into_iter();
193 let mut out = Vec::with_capacity(types.size_hint().0);
194 // TODO: VmCtx is in the first register
195 let mut int_gpr_iter = INTEGER_RETURN_GPRS.iter();
196 let mut float_gpr_iter = FLOAT_RETURN_GPRS.iter();
197
198 for ty in types {
199 match ty {
200 I32 | I64 => match int_gpr_iter.next() {
201 None => {
202 return Err(Error::Microwasm(
203 "We don't support stack returns yet".to_string(),
204 ))
205 }
206 Some(val) => out.push(CCLoc::Reg(*val)),
207 },
208 F32 | F64 => match float_gpr_iter.next() {
209 None => {
210 return Err(Error::Microwasm(
211 "We don't support stack returns yet".to_string(),
212 ))
213 }
214 Some(val) => out.push(CCLoc::Reg(*val)),
215 },
216 }
217 }
218
219 Ok(out)
220 }
221
222 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
223 struct GPRs {
224 bits: u16,
225 }
226
227 impl GPRs {
new() -> Self228 fn new() -> Self {
229 Self { bits: 0 }
230 }
231 }
232
233 #[allow(dead_code)]
234 pub mod registers {
235 use super::{RegId, GPR};
236
237 pub mod rq {
238 use super::RegId;
239
240 pub const RAX: RegId = 0;
241 pub const RCX: RegId = 1;
242 pub const RDX: RegId = 2;
243 pub const RBX: RegId = 3;
244 pub const RSP: RegId = 4;
245 pub const RBP: RegId = 5;
246 pub const RSI: RegId = 6;
247 pub const RDI: RegId = 7;
248 pub const R8: RegId = 8;
249 pub const R9: RegId = 9;
250 pub const R10: RegId = 10;
251 pub const R11: RegId = 11;
252 pub const R12: RegId = 12;
253 pub const R13: RegId = 13;
254 pub const R14: RegId = 14;
255 pub const R15: RegId = 15;
256 }
257
258 pub const RAX: GPR = GPR::Rq(self::rq::RAX);
259 pub const RCX: GPR = GPR::Rq(self::rq::RCX);
260 pub const RDX: GPR = GPR::Rq(self::rq::RDX);
261 pub const RBX: GPR = GPR::Rq(self::rq::RBX);
262 pub const RSP: GPR = GPR::Rq(self::rq::RSP);
263 pub const RBP: GPR = GPR::Rq(self::rq::RBP);
264 pub const RSI: GPR = GPR::Rq(self::rq::RSI);
265 pub const RDI: GPR = GPR::Rq(self::rq::RDI);
266 pub const R8: GPR = GPR::Rq(self::rq::R8);
267 pub const R9: GPR = GPR::Rq(self::rq::R9);
268 pub const R10: GPR = GPR::Rq(self::rq::R10);
269 pub const R11: GPR = GPR::Rq(self::rq::R11);
270 pub const R12: GPR = GPR::Rq(self::rq::R12);
271 pub const R13: GPR = GPR::Rq(self::rq::R13);
272 pub const R14: GPR = GPR::Rq(self::rq::R14);
273 pub const R15: GPR = GPR::Rq(self::rq::R15);
274
275 pub const XMM0: GPR = GPR::Rx(0);
276 pub const XMM1: GPR = GPR::Rx(1);
277 pub const XMM2: GPR = GPR::Rx(2);
278 pub const XMM3: GPR = GPR::Rx(3);
279 pub const XMM4: GPR = GPR::Rx(4);
280 pub const XMM5: GPR = GPR::Rx(5);
281 pub const XMM6: GPR = GPR::Rx(6);
282 pub const XMM7: GPR = GPR::Rx(7);
283 pub const XMM8: GPR = GPR::Rx(8);
284 pub const XMM9: GPR = GPR::Rx(9);
285 pub const XMM10: GPR = GPR::Rx(10);
286 pub const XMM11: GPR = GPR::Rx(11);
287 pub const XMM12: GPR = GPR::Rx(12);
288 pub const XMM13: GPR = GPR::Rx(13);
289 pub const XMM14: GPR = GPR::Rx(14);
290 pub const XMM15: GPR = GPR::Rx(15);
291
292 pub const NUM_GPRS: u8 = 16;
293 }
294
295 const SIGN_MASK_F64: u64 = 0x8000_0000_0000_0000;
296 const REST_MASK_F64: u64 = !SIGN_MASK_F64;
297 const SIGN_MASK_F32: u32 = 0x8000_0000;
298 const REST_MASK_F32: u32 = !SIGN_MASK_F32;
299
300 impl GPRs {
take(&mut self) -> Option<RegId>301 fn take(&mut self) -> Option<RegId> {
302 let lz = self.bits.trailing_zeros();
303 if lz < 16 {
304 let gpr = lz as RegId;
305 self.mark_used(gpr);
306 Some(gpr)
307 } else {
308 None
309 }
310 }
311
mark_used(&mut self, gpr: RegId)312 fn mark_used(&mut self, gpr: RegId) {
313 self.bits &= !(1 << gpr as u16);
314 }
315
release(&mut self, gpr: RegId)316 fn release(&mut self, gpr: RegId) {
317 debug_assert!(
318 !self.is_free(gpr),
319 "released register {} was already free",
320 gpr
321 );
322 self.bits |= 1 << gpr;
323 }
324
is_free(self, gpr: RegId) -> bool325 fn is_free(self, gpr: RegId) -> bool {
326 (self.bits & (1 << gpr)) != 0
327 }
328 }
329
330 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
331 pub struct Registers {
332 /// Registers at 64 bits and below (al/ah/ax/eax/rax, for example)
333 scratch_64: (GPRs, [u8; NUM_GPRS as usize]),
334 /// Registers at 128 bits (xmm0, for example)
335 scratch_128: (GPRs, [u8; NUM_GPRS as usize]),
336 }
337
338 impl Default for Registers {
default() -> Self339 fn default() -> Self {
340 Self::new()
341 }
342 }
343
344 impl Registers {
new() -> Self345 pub fn new() -> Self {
346 Self {
347 scratch_64: (GPRs::new(), [1; NUM_GPRS as _]),
348 scratch_128: (GPRs::new(), [1; NUM_GPRS as _]),
349 }
350 }
351
release_scratch_register(&mut self) -> Result<(), Error>352 pub fn release_scratch_register(&mut self) -> Result<(), Error> {
353 // Give ourselves a few scratch registers to work with, for now.
354 for &scratch in SCRATCH_REGS {
355 self.release(scratch)?;
356 }
357 Ok(())
358 }
359
scratch_counts_mut(&mut self, gpr: GPR) -> (u8, &mut (GPRs, [u8; NUM_GPRS as usize]))360 fn scratch_counts_mut(&mut self, gpr: GPR) -> (u8, &mut (GPRs, [u8; NUM_GPRS as usize])) {
361 match gpr {
362 GPR::Rq(r) => (r, &mut self.scratch_64),
363 GPR::Rx(r) => (r, &mut self.scratch_128),
364 }
365 }
366
scratch_counts(&self, gpr: GPR) -> (u8, &(GPRs, [u8; NUM_GPRS as usize]))367 fn scratch_counts(&self, gpr: GPR) -> (u8, &(GPRs, [u8; NUM_GPRS as usize])) {
368 match gpr {
369 GPR::Rq(r) => (r, &self.scratch_64),
370 GPR::Rx(r) => (r, &self.scratch_128),
371 }
372 }
373
mark_used(&mut self, gpr: GPR)374 pub fn mark_used(&mut self, gpr: GPR) {
375 let (gpr, scratch_counts) = self.scratch_counts_mut(gpr);
376 scratch_counts.0.mark_used(gpr);
377 scratch_counts.1[gpr as usize] += 1;
378 }
379
num_usages(&self, gpr: GPR) -> u8380 pub fn num_usages(&self, gpr: GPR) -> u8 {
381 let (gpr, scratch_counts) = self.scratch_counts(gpr);
382 scratch_counts.1[gpr as usize]
383 }
384
take(&mut self, ty: impl Into<GPRType>) -> Option<GPR>385 pub fn take(&mut self, ty: impl Into<GPRType>) -> Option<GPR> {
386 let (mk_gpr, scratch_counts) = match ty.into() {
387 GPRType::Rq => (GPR::Rq as fn(_) -> _, &mut self.scratch_64),
388 GPRType::Rx => (GPR::Rx as fn(_) -> _, &mut self.scratch_128),
389 };
390
391 let out = scratch_counts.0.take()?;
392 scratch_counts.1[out as usize] += 1;
393 Some(mk_gpr(out))
394 }
395
release(&mut self, gpr: GPR) -> Result<(), Error>396 pub fn release(&mut self, gpr: GPR) -> Result<(), Error> {
397 let (gpr, scratch_counts) = self.scratch_counts_mut(gpr);
398 let c = &mut scratch_counts.1[gpr as usize];
399 *c = match c.checked_sub(1) {
400 Some(e) => e,
401 None => return Err(Error::Microwasm(format!("Double-freed register: {}", gpr))),
402 };
403 if *c == 0 {
404 scratch_counts.0.release(gpr);
405 }
406 Ok(())
407 }
408
is_free(&self, gpr: GPR) -> bool409 pub fn is_free(&self, gpr: GPR) -> bool {
410 let (gpr, scratch_counts) = self.scratch_counts(gpr);
411 scratch_counts.0.is_free(gpr)
412 }
413 }
414
415 #[derive(Debug, Clone, PartialEq, Eq)]
416 pub struct BlockCallingConvention<I = Vec<CCLoc>> {
417 pub stack_depth: StackDepth,
418 pub arguments: I,
419 }
420
421 impl<I> BlockCallingConvention<I> {
function_start(arguments: I) -> Self422 pub fn function_start(arguments: I) -> Self {
423 BlockCallingConvention {
424 // We start and return the function with stack depth 1 since we must
425 // allow space for the saved return address.
426 stack_depth: StackDepth(1),
427 arguments,
428 }
429 }
430 }
431
432 impl<T: Copy + 'static, I: Deref> BlockCallingConvention<I>
433 where
434 for<'a> &'a I::Target: IntoIterator<Item = &'a T>,
435 {
as_ref(&self) -> BlockCallingConvention<impl Iterator<Item = T> + '_>436 pub fn as_ref(&self) -> BlockCallingConvention<impl Iterator<Item = T> + '_> {
437 BlockCallingConvention {
438 // We start and return the function with stack depth 1 since we must
439 // allow space for the saved return address.
440 stack_depth: self.stack_depth.clone(),
441 arguments: self.arguments.into_iter().copied(),
442 }
443 }
444 }
445
446 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
447 pub enum FunctionDefLocation {
448 SameModule,
449 PossiblyExternal,
450 }
451
452 // TODO: Combine this with `ValueLocation`?
453 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
454 pub enum CCLoc {
455 /// Value exists in a register.
456 Reg(GPR),
457 /// Value exists on the stack.
458 Stack(i32),
459 }
460
461 impl CCLoc {
try_from(other: ValueLocation) -> Option<Self>462 fn try_from(other: ValueLocation) -> Option<Self> {
463 match other {
464 ValueLocation::Reg(reg) => Some(CCLoc::Reg(reg)),
465 ValueLocation::Stack(offset) => Some(CCLoc::Stack(offset)),
466 ValueLocation::Cond(_) | ValueLocation::Immediate(_) => None,
467 }
468 }
469 }
470
471 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
472 pub enum CondCode {
473 CF0,
474 CF1,
475 ZF0,
476 ZF1,
477 CF0AndZF0,
478 CF1OrZF1,
479 ZF0AndSFEqOF,
480 ZF1OrSFNeOF,
481 SFEqOF,
482 SFNeOF,
483 }
484
485 mod cc {
486 use super::CondCode;
487
488 pub const EQUAL: CondCode = CondCode::ZF0;
489 pub const NOT_EQUAL: CondCode = CondCode::ZF1;
490 pub const GE_U: CondCode = CondCode::CF0;
491 pub const LT_U: CondCode = CondCode::CF1;
492 pub const GT_U: CondCode = CondCode::CF0AndZF0;
493 pub const LE_U: CondCode = CondCode::CF1OrZF1;
494 pub const GE_S: CondCode = CondCode::SFEqOF;
495 pub const LT_S: CondCode = CondCode::SFNeOF;
496 pub const GT_S: CondCode = CondCode::ZF0AndSFEqOF;
497 pub const LE_S: CondCode = CondCode::ZF1OrSFNeOF;
498 }
499
500 impl std::ops::Not for CondCode {
501 type Output = Self;
502
not(self) -> Self503 fn not(self) -> Self {
504 use CondCode::*;
505
506 match self {
507 CF0 => CF1,
508 CF1 => CF0,
509 ZF0 => ZF1,
510 ZF1 => ZF0,
511 CF0AndZF0 => CF1OrZF1,
512 CF1OrZF1 => CF0AndZF0,
513 ZF0AndSFEqOF => ZF1OrSFNeOF,
514 ZF1OrSFNeOF => ZF0AndSFEqOF,
515 SFEqOF => SFNeOF,
516 SFNeOF => SFEqOF,
517 }
518 }
519 }
520
521 /// Describes location of a value.
522 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
523 pub enum ValueLocation {
524 /// Value exists in a register.
525 Reg(GPR),
526 /// Value exists on the stack. Note that this offset is from the rsp as it
527 /// was when we entered the function.
528 Stack(i32),
529 /// Value is a literal
530 Immediate(Value),
531 /// Value is a set condition code
532 Cond(CondCode),
533 }
534
535 impl From<CCLoc> for ValueLocation {
from(other: CCLoc) -> Self536 fn from(other: CCLoc) -> Self {
537 match other {
538 CCLoc::Reg(r) => ValueLocation::Reg(r),
539 CCLoc::Stack(o) => ValueLocation::Stack(o),
540 }
541 }
542 }
543
544 impl ValueLocation {
stack(self) -> Option<i32>545 fn stack(self) -> Option<i32> {
546 match self {
547 ValueLocation::Stack(o) => Some(o),
548 _ => None,
549 }
550 }
551
reg(self) -> Option<GPR>552 fn reg(self) -> Option<GPR> {
553 match self {
554 ValueLocation::Reg(r) => Some(r),
555 _ => None,
556 }
557 }
558
immediate(self) -> Option<Value>559 fn immediate(self) -> Option<Value> {
560 match self {
561 ValueLocation::Immediate(i) => Some(i),
562 _ => None,
563 }
564 }
565
imm_i32(self) -> Option<i32>566 fn imm_i32(self) -> Option<i32> {
567 self.immediate().and_then(Value::as_i32)
568 }
569
imm_i64(self) -> Option<i64>570 fn imm_i64(self) -> Option<i64> {
571 self.immediate().and_then(Value::as_i64)
572 }
573
imm_f32(self) -> Option<Ieee32>574 fn imm_f32(self) -> Option<Ieee32> {
575 self.immediate().and_then(Value::as_f32)
576 }
577
imm_f64(self) -> Option<Ieee64>578 fn imm_f64(self) -> Option<Ieee64> {
579 self.immediate().and_then(Value::as_f64)
580 }
581 }
582
583 // TODO: This assumes only system-v calling convention.
584 // In system-v calling convention the first 6 arguments are passed via registers.
585 // All rest arguments are passed on the stack.
586 // Usually system-v uses rdi and rsi, but rdi is used for the vmctx and rsi is used for the _caller_ vmctx
587 const INTEGER_ARGS_IN_GPRS: &[GPR] = &[GPR::Rq(CALLER_VMCTX), RDX, RCX, R8, R9];
588 const INTEGER_RETURN_GPRS: &[GPR] = &[RAX, RDX];
589 const FLOAT_ARGS_IN_GPRS: &[GPR] = &[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7];
590 const FLOAT_RETURN_GPRS: &[GPR] = &[XMM0, XMM1];
591 // List of scratch registers taken from https://wiki.osdev.org/System_V_ABI
592 const SCRATCH_REGS: &[GPR] = &[
593 RDX, RCX, R8, R9, RAX, R10, R11, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9,
594 XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
595 ];
596 const VMCTX: RegId = rq::RDI;
597 const CALLER_VMCTX: RegId = rq::RSI;
598
599 pub struct CodeGenSession<'module, M> {
600 assembler: Assembler,
601 pub module_context: &'module M,
602 pub op_offset_map: Vec<(AssemblyOffset, Box<dyn Display + Send + Sync>)>,
603 func_starts: Vec<(Option<AssemblyOffset>, DynamicLabel)>,
604 pointer_type: SignlessType,
605 }
606
607 impl<'module, M> CodeGenSession<'module, M> {
new(func_count: u32, module_context: &'module M, pointer_type: SignlessType) -> Self608 pub fn new(func_count: u32, module_context: &'module M, pointer_type: SignlessType) -> Self {
609 let mut assembler = Assembler::new().unwrap();
610 let func_starts = iter::repeat_with(|| (None, assembler.new_dynamic_label()))
611 .take(func_count as usize)
612 .collect::<Vec<_>>();
613
614 CodeGenSession {
615 assembler,
616 op_offset_map: Default::default(),
617 func_starts,
618 module_context,
619 pointer_type,
620 }
621 }
622
offset(&self) -> usize623 pub fn offset(&self) -> usize {
624 self.assembler.offset().0
625 }
626
pointer_type(&self) -> SignlessType627 pub fn pointer_type(&self) -> SignlessType {
628 self.pointer_type
629 }
630
new_context<'this>( &'this mut self, func_idx: u32, reloc_sink: &'this mut dyn binemit::RelocSink, ) -> Context<'this, M>631 pub fn new_context<'this>(
632 &'this mut self,
633 func_idx: u32,
634 reloc_sink: &'this mut dyn binemit::RelocSink,
635 ) -> Context<'this, M> {
636 {
637 let func_start = &mut self.func_starts[func_idx as usize];
638
639 // At this point we know the exact start address of this function. Save it
640 // and define dynamic label at this location.
641 func_start.0 = Some(self.assembler.offset());
642 self.assembler.dynamic_label(func_start.1);
643 }
644
645 Context {
646 asm: &mut self.assembler,
647 current_function: func_idx,
648 reloc_sink,
649 pointer_type: self.pointer_type,
650 source_loc: Default::default(),
651 func_starts: &self.func_starts,
652 block_state: Default::default(),
653 module_context: self.module_context,
654 labels: Default::default(),
655 }
656 }
657
into_translated_code_section(self) -> Result<TranslatedCodeSection, Error>658 pub fn into_translated_code_section(self) -> Result<TranslatedCodeSection, Error> {
659 let exec_buf = self
660 .assembler
661 .finalize()
662 .map_err(|_asm| Error::Assembler("assembler error".to_owned()))?;
663 let func_starts = self
664 .func_starts
665 .iter()
666 .map(|(offset, _)| offset.unwrap())
667 .collect::<Vec<_>>();
668 Ok(TranslatedCodeSection {
669 exec_buf,
670 func_starts,
671 op_offset_map: self.op_offset_map,
672 // TODO
673 relocatable_accesses: vec![],
674 })
675 }
676 }
677
678 #[derive(Debug)]
679 struct RelocateAddress {
680 reg: Option<GPR>,
681 imm: usize,
682 }
683
684 #[derive(Debug)]
685 struct RelocateAccess {
686 position: AssemblyOffset,
687 dst_reg: GPR,
688 address: RelocateAddress,
689 }
690
691 pub struct TranslatedCodeSection {
692 exec_buf: ExecutableBuffer,
693 func_starts: Vec<AssemblyOffset>,
694 #[allow(dead_code)]
695 relocatable_accesses: Vec<RelocateAccess>,
696 op_offset_map: Vec<(AssemblyOffset, Box<dyn Display + Send + Sync>)>,
697 }
698
699 impl TranslatedCodeSection {
func_start(&self, idx: usize) -> *const u8700 pub fn func_start(&self, idx: usize) -> *const u8 {
701 let offset = self.func_starts[idx];
702 self.exec_buf.ptr(offset)
703 }
704
func_range(&self, idx: usize) -> std::ops::Range<usize>705 pub fn func_range(&self, idx: usize) -> std::ops::Range<usize> {
706 let end = self
707 .func_starts
708 .get(idx + 1)
709 .map(|i| i.0)
710 .unwrap_or_else(|| self.exec_buf.len());
711
712 self.func_starts[idx].0..end
713 }
714
funcs<'a>(&'a self) -> impl Iterator<Item = std::ops::Range<usize>> + 'a715 pub fn funcs<'a>(&'a self) -> impl Iterator<Item = std::ops::Range<usize>> + 'a {
716 (0..self.func_starts.len()).map(move |i| self.func_range(i))
717 }
718
buffer(&self) -> &[u8]719 pub fn buffer(&self) -> &[u8] {
720 &*self.exec_buf
721 }
722
disassemble(&self)723 pub fn disassemble(&self) {
724 crate::disassemble::disassemble(&*self.exec_buf, &self.op_offset_map).unwrap();
725 }
726 }
727
728 #[derive(Debug, Default, Clone)]
729 pub struct BlockState {
730 pub stack: Stack,
731 pub depth: StackDepth,
732 pub regs: Registers,
733 }
734
735 type Stack = Vec<ValueLocation>;
736
737 mod labels {
738 use super::Label;
739 use std::collections::HashMap;
740
741 pub struct LabelInfo {
742 pub label: Label,
743 pub align: u32,
744 pub inner: LabelValue,
745 }
746
747 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
748 pub enum LabelValue {
749 Ret,
750 I32(i32),
751 I64(i64),
752 }
753
754 #[derive(Default)]
755 pub struct Labels {
756 map: HashMap<LabelValue, LabelInfo>,
757 }
758
759 impl Labels {
drain(&mut self) -> impl Iterator<Item = LabelInfo> + '_760 pub fn drain(&mut self) -> impl Iterator<Item = LabelInfo> + '_ {
761 self.map.drain().map(|(_, info)| info)
762 }
763
insert( &mut self, l: impl FnOnce() -> Label, align: u32, label: LabelValue, ) -> Label764 pub fn insert(
765 &mut self,
766 l: impl FnOnce() -> Label,
767 align: u32,
768 label: LabelValue,
769 ) -> Label {
770 let val = self.map.entry(label).or_insert_with(move || LabelInfo {
771 label: l(),
772 align,
773 inner: label,
774 });
775
776 val.align = val.align.max(align);
777
778 val.label
779 }
780 }
781 }
782
783 use labels::{LabelInfo, LabelValue, Labels};
784
785 pub struct Context<'this, M> {
786 pub asm: &'this mut Assembler,
787 pointer_type: SignlessType,
788 source_loc: SourceLoc,
789 reloc_sink: &'this mut dyn binemit::RelocSink,
790 module_context: &'this M,
791 current_function: u32,
792 func_starts: &'this Vec<(Option<AssemblyOffset>, DynamicLabel)>,
793 /// Each push and pop on the value stack increments or decrements this value by 1 respectively.
794 pub block_state: BlockState,
795 labels: Labels,
796 }
797
798 /// Label in code.
799 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
800 pub struct Label(DynamicLabel);
801
802 /// Offset from starting value of SP counted in words.
803 #[derive(Default, Debug, Clone, PartialEq, Eq)]
804 pub struct StackDepth(u32);
805
806 impl StackDepth {
reserve(&mut self, slots: u32)807 pub fn reserve(&mut self, slots: u32) {
808 self.0 = self.0.checked_add(slots).unwrap();
809 }
810
free(&mut self, slots: u32)811 pub fn free(&mut self, slots: u32) {
812 self.0 = self.0.checked_sub(slots).unwrap();
813 }
814 }
815
816 macro_rules! int_div {
817 ($full_div_s:ident, $full_div_u:ident, $div_u:ident, $div_s:ident, $rem_u:ident, $rem_s:ident, $imm_fn:ident, $signed_ty:ty, $unsigned_ty:ty, $reg_ty:tt, $pointer_ty:tt) => {
818 // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when
819 // emitting Wasm.
820 pub fn $div_u(&mut self) -> Result<(), Error>{
821 let divisor = self.pop()?;
822 let dividend = self.pop()?;
823
824 if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
825 if divisor == 0 {
826 self.trap(TrapCode::IntegerDivisionByZero);
827 self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()))?;
828 } else {
829 self.push(ValueLocation::Immediate(
830 <$unsigned_ty>::wrapping_div(dividend as _, divisor as _).into(),
831 ))?;
832 }
833
834 return Ok(())
835 }
836
837 let (div, rem, saved) = self.$full_div_u(divisor, dividend)?;
838
839 self.free_value(rem)?;
840
841 let div = match div {
842 ValueLocation::Reg(div) => {
843 if saved.clone().any(|dst| dst == div) {
844 let new = self.take_reg(I32).unwrap();
845 dynasm!(self.asm
846 ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap())
847 );
848 self.block_state.regs.release(div)?;
849 ValueLocation::Reg(new)
850 } else {
851 ValueLocation::Reg(div)
852 }
853 }
854 ValueLocation::Stack(_) |
855 ValueLocation::Cond(_) |
856 ValueLocation::Immediate(_) => div,
857 };
858
859 self.cleanup_gprs(saved);
860
861 self.push(div)?;
862 Ok(())
863 }
864
865 // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when
866 // emitting Wasm.
867 pub fn $div_s(&mut self) -> Result<(), Error>{
868 let divisor = self.pop()?;
869 let dividend = self.pop()?;
870
871 if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
872 if divisor == 0 {
873 self.trap(TrapCode::IntegerDivisionByZero);
874 self.push(ValueLocation::Immediate((0 as $signed_ty).into()))?;
875 } else {
876 self.push(ValueLocation::Immediate(
877 <$signed_ty>::wrapping_div(dividend, divisor).into(),
878 ))?;
879 }
880
881 return Ok(())
882 }
883
884 let (div, rem, saved) = self.$full_div_s(divisor, dividend)?;
885
886 self.free_value(rem)?;
887
888 let div = match div {
889 ValueLocation::Reg(div) => {
890 if saved.clone().any(|dst| dst == div) {
891 let new = self.take_reg(I32).unwrap();
892 dynasm!(self.asm
893 ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap())
894 );
895 self.block_state.regs.release(div)?;
896 ValueLocation::Reg(new)
897 } else {
898 ValueLocation::Reg(div)
899 }
900 }
901 ValueLocation::Stack(_) |
902 ValueLocation::Cond(_) |
903 ValueLocation::Immediate(_) => div,
904 };
905
906 self.cleanup_gprs(saved);
907
908 self.push(div)?;
909 Ok(())
910 }
911
912 pub fn $rem_u(&mut self) -> Result<(), Error>{
913 let divisor = self.pop()?;
914 let dividend = self.pop()?;
915
916 if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
917 if divisor == 0 {
918 self.trap(TrapCode::IntegerDivisionByZero);
919 self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()))?;
920 } else {
921 self.push(ValueLocation::Immediate(
922 (dividend as $unsigned_ty % divisor as $unsigned_ty).into(),
923 ))?;
924 }
925 return Ok(());
926 }
927
928 let (div, rem, saved) = self.$full_div_u(divisor, dividend)?;
929
930 self.free_value(div)?;
931
932 let rem = match rem {
933 ValueLocation::Reg(rem) => {
934 if saved.clone().any(|dst| dst == rem) {
935 let new = self.take_reg(I32).unwrap();
936 dynasm!(self.asm
937 ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap())
938 );
939 self.block_state.regs.release(rem)?;
940 ValueLocation::Reg(new)
941 } else {
942 ValueLocation::Reg(rem)
943 }
944 }
945 ValueLocation::Stack(_) |
946 ValueLocation::Cond(_) |
947 ValueLocation::Immediate(_) => rem,
948 };
949
950 self.cleanup_gprs(saved);
951
952 self.push(rem)?;
953 Ok(())
954 }
955
956 pub fn $rem_s(&mut self) -> Result<(), Error>{
957 let mut divisor = self.pop()?;
958 let dividend = self.pop()?;
959
960 if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
961 if divisor == 0 {
962 self.trap(TrapCode::IntegerDivisionByZero);
963 self.push(ValueLocation::Immediate((0 as $signed_ty).into()))?;
964 } else {
965 self.push(ValueLocation::Immediate((dividend % divisor).into()))?;
966 }
967 return Ok(());
968 }
969
970 let is_neg1 = self.create_label();
971
972 let current_depth = self.block_state.depth.clone();
973
974 // TODO: This could cause segfaults because of implicit push/pop
975 let gen_neg1_case = match divisor {
976 ValueLocation::Immediate(_) => {
977 if divisor.$imm_fn().unwrap() == -1 {
978 self.push(ValueLocation::Immediate((-1 as $signed_ty).into()))?;
979 self.free_value(dividend)?;
980 return Ok(());
981 }
982
983 false
984 }
985 ValueLocation::Reg(_) => {
986 let reg = self.put_into_register(GPRType::Rq, &mut divisor)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
987
988 dynasm!(self.asm
989 ; cmp $reg_ty(reg.rq().unwrap()), -1
990 );
991 // TODO: We could choose `current_depth` as the depth here instead but we currently
992 // don't for simplicity
993 self.set_stack_depth(current_depth.clone())?;
994 dynasm!(self.asm
995 ; je =>is_neg1.0
996 );
997
998 true
999 }
1000 ValueLocation::Stack(offset) => {
1001 let offset = self.adjusted_offset(offset);
1002 dynasm!(self.asm
1003 ; cmp $pointer_ty [rsp + offset], -1
1004 );
1005 self.set_stack_depth(current_depth.clone())?;
1006 dynasm!(self.asm
1007 ; je =>is_neg1.0
1008 );
1009
1010 true
1011 }
1012 ValueLocation::Cond(_) => {
1013 // `cc` can never be `-1`, only `0` and `1`
1014 false
1015 }
1016 };
1017
1018 let (div, rem, saved) = self.$full_div_s(divisor, dividend)?;
1019
1020 self.free_value(div)?;
1021
1022 let rem = match rem {
1023 ValueLocation::Reg(rem) => {
1024 if saved.clone().any(|dst| dst == rem) {
1025 let new = self.take_reg(I32).unwrap();
1026 dynasm!(self.asm
1027 ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap())
1028 );
1029 self.block_state.regs.release(rem)?;
1030 ValueLocation::Reg(new)
1031 } else {
1032 ValueLocation::Reg(rem)
1033 }
1034 }
1035 ValueLocation::Stack(_) |
1036 ValueLocation::Cond(_) |
1037 ValueLocation::Immediate(_) => rem,
1038 };
1039
1040 self.cleanup_gprs(saved);
1041
1042 if gen_neg1_case {
1043 let ret = self.create_label();
1044 self.set_stack_depth(current_depth.clone())?;
1045 dynasm!(self.asm
1046 ; jmp =>ret.0
1047 );
1048 self.define_label(is_neg1);
1049
1050 let dst_ccloc = match CCLoc::try_from(rem) {
1051 None => {
1052 return Err(Error::Microwasm(
1053 "$rem_s Programmer error".to_string(),
1054 ))
1055 }
1056 Some(o) => o,
1057 };
1058
1059 self.copy_value(
1060 ValueLocation::Immediate((0 as $signed_ty).into()),
1061 dst_ccloc
1062 )?;
1063
1064 self.set_stack_depth(current_depth.clone())?;
1065 self.define_label(ret);
1066 }
1067
1068 self.push(rem)?;
1069 Ok(())
1070 }
1071 }
1072 }
1073
1074 macro_rules! unop {
1075 ($name:ident, $instr:ident, $reg_ty:tt, $typ:ty, $const_fallback:expr) => {
1076 pub fn $name(&mut self) -> Result<(), Error>{
1077 let mut val = self.pop()?;
1078
1079 let out_val = match val {
1080 ValueLocation::Immediate(imm) =>
1081 ValueLocation::Immediate(
1082 ($const_fallback(imm.as_int().unwrap() as $typ) as $typ).into()
1083 ),
1084 ValueLocation::Stack(offset) => {
1085 let offset = self.adjusted_offset(offset);
1086 let temp = self.take_reg(Type::for_::<$typ>()).unwrap();
1087 dynasm!(self.asm
1088 ; $instr $reg_ty(temp.rq().unwrap()), [rsp + offset]
1089 );
1090 ValueLocation::Reg(temp)
1091 }
1092 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1093 let reg = self.put_into_register(GPRType::Rq, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1094 let temp = self.take_reg(Type::for_::<$typ>()).unwrap();
1095 dynasm!(self.asm
1096 ; $instr $reg_ty(temp.rq().unwrap()), $reg_ty(reg.rq().unwrap())
1097 );
1098 ValueLocation::Reg(temp)
1099 }
1100 };
1101
1102 self.free_value(val)?;
1103 self.push(out_val)?;
1104 Ok(())
1105 }
1106 }
1107 }
1108
1109 macro_rules! conversion {
1110 (
1111 $name:ident,
1112 $instr:ident,
1113 $in_reg_ty:tt,
1114 $in_reg_fn:ident,
1115 $out_reg_ty:tt,
1116 $out_reg_fn:ident,
1117 $in_typ:ty,
1118 $out_typ:ty,
1119 $const_ty_fn:ident,
1120 $const_fallback:expr
1121 ) => {
1122 pub fn $name(&mut self) -> Result<(), Error>{
1123 let mut val = self.pop()?;
1124
1125 let out_val = match val {
1126 ValueLocation::Immediate(imm) =>
1127 ValueLocation::Immediate(
1128 $const_fallback(imm.$const_ty_fn().unwrap()).into()
1129 ),
1130 ValueLocation::Stack(offset) => {
1131 let offset = self.adjusted_offset(offset);
1132 let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap();
1133 dynasm!(self.asm
1134 ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), [rsp + offset]
1135 );
1136
1137 ValueLocation::Reg(temp)
1138 }
1139 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1140 let reg = self.put_into_register(Type::for_::<$in_typ>(), &mut val)? .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1141 let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap();
1142
1143 dynasm!(self.asm
1144 ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), $in_reg_ty(reg.$in_reg_fn().unwrap())
1145 );
1146
1147 ValueLocation::Reg(temp)
1148 }
1149 };
1150
1151 self.free_value(val)?;
1152
1153 self.push(out_val)?;
1154 Ok(())
1155 }
1156 }
1157 }
1158
1159 // TODO: Support immediate `count` parameters
1160 macro_rules! shift {
1161 ($name:ident, $reg_ty:tt, $instr:ident, $const_fallback:expr, $ty:expr) => {
1162 pub fn $name(&mut self) -> Result<(), Error>{
1163 let mut count = self.pop()?;
1164 let mut val = self.pop()?;
1165
1166 if let Some(imm) = count.immediate() {
1167 if let Some(imm) = imm.as_int() {
1168 if let Ok(imm) = i8::try_from(imm) {
1169 let reg = self.put_into_temp_register($ty, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1170
1171 dynasm!(self.asm
1172 ; $instr $reg_ty(reg.rq().unwrap()), imm
1173 );
1174 self.push(ValueLocation::Reg(reg))?;
1175 return Ok(());
1176 }
1177 }
1178 }
1179
1180 if val == ValueLocation::Reg(RCX) {
1181 let new = self.take_reg($ty).unwrap();
1182 self.copy_value(val, CCLoc::Reg(new))?;
1183 self.free_value(val)?;
1184 val = ValueLocation::Reg(new);
1185 }
1186
1187 // TODO: Maybe allocate `RCX`, write `count` to it and then free `count`.
1188 // Once we've implemented refcounting this will do the right thing
1189 // for free.
1190 let temp_rcx = match count {
1191 ValueLocation::Reg(RCX) => {None}
1192 other => {
1193 let out = if self.block_state.regs.is_free(RCX) {
1194 None
1195 } else {
1196 let new_reg = self.take_reg(I32).unwrap();
1197 dynasm!(self.asm
1198 ; mov Rq(new_reg.rq().unwrap()), rcx
1199 );
1200 Some(new_reg)
1201 };
1202
1203 match other {
1204 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1205 let gpr = self.put_into_register(I32, &mut count)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1206 dynasm!(self.asm
1207 ; mov cl, Rb(gpr.rq().unwrap())
1208 );
1209 }
1210 ValueLocation::Stack(offset) => {
1211 let offset = self.adjusted_offset(offset);
1212 dynasm!(self.asm
1213 ; mov cl, [rsp + offset]
1214 );
1215 }
1216 ValueLocation::Immediate(imm) => {
1217 dynasm!(self.asm
1218 ; mov cl, imm.as_int().unwrap() as i8
1219 );
1220 }
1221 }
1222
1223 out
1224 }
1225 };
1226
1227 self.free_value(count)?;
1228 self.block_state.regs.mark_used(RCX);
1229 count = ValueLocation::Reg(RCX);
1230
1231 let reg = self.put_into_temp_register($ty, &mut val)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1232
1233 dynasm!(self.asm
1234 ; $instr $reg_ty(reg.rq().unwrap()), cl
1235 );
1236
1237 self.free_value(count)?;
1238
1239 if let Some(gpr) = temp_rcx {
1240 dynasm!(self.asm
1241 ; mov rcx, Rq(gpr.rq().unwrap())
1242 );
1243 self.block_state.regs.release(gpr)?;
1244 }
1245
1246 self.push(val)?;
1247 Ok(())
1248 }
1249 }
1250 }
1251
1252 macro_rules! cmp_i32 {
1253 ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => {
1254 pub fn $name(&mut self) -> Result<(), Error>{
1255 let mut right = self.pop()?;
1256 let mut left = self.pop()?;
1257
1258 let out = if let Some(i) = left.imm_i32() {
1259 match right {
1260 ValueLocation::Stack(offset) => {
1261 let offset = self.adjusted_offset(offset);
1262
1263 dynasm!(self.asm
1264 ; cmp DWORD [rsp + offset], i
1265 );
1266 ValueLocation::Cond($reverse_flags)
1267 }
1268 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1269 let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1270 dynasm!(self.asm
1271 ; cmp Rd(rreg.rq().unwrap()), i
1272 );
1273 ValueLocation::Cond($reverse_flags)
1274 }
1275 ValueLocation::Immediate(right) => {
1276 ValueLocation::Immediate(
1277 (if $const_fallback(i, right.as_i32().unwrap()) {
1278 1i32
1279 } else {
1280 0i32
1281 }).into()
1282 )
1283 }
1284 }
1285 } else {
1286 let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1287
1288 match right {
1289 ValueLocation::Stack(offset) => {
1290 let offset = self.adjusted_offset(offset);
1291 dynasm!(self.asm
1292 ; cmp Rd(lreg.rq().unwrap()), [rsp + offset]
1293 );
1294 }
1295 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1296 let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1297
1298 dynasm!(self.asm
1299 ; cmp Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap())
1300 );
1301 }
1302 ValueLocation::Immediate(i) => {
1303 dynasm!(self.asm
1304 ; cmp Rd(lreg.rq().unwrap()), i.as_i32().unwrap()
1305 );
1306 }
1307 }
1308
1309 ValueLocation::Cond($flags)
1310 };
1311
1312 self.free_value(left)?;
1313 self.free_value(right)?;
1314
1315 self.push(out)?;
1316 Ok(())
1317 }
1318 }
1319 }
1320
1321 macro_rules! cmp_i64 {
1322 ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => {
1323 pub fn $name(&mut self) -> Result<(), Error> {
1324 let mut right = self.pop()?;
1325 let mut left = self.pop()?;
1326
1327 let out = if let Some(i) = left.imm_i64() {
1328 match right {
1329 ValueLocation::Stack(offset) => {
1330 let offset = self.adjusted_offset(offset);
1331 if let Some(i) = i.try_into().ok() {
1332 dynasm!(self.asm
1333 ; cmp QWORD [rsp + offset], i
1334 );
1335 } else {
1336 let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1337
1338 dynasm!(self.asm
1339 ; cmp QWORD [rsp + offset], Rq(lreg.rq().unwrap())
1340 );
1341 }
1342 ValueLocation::Cond($reverse_flags)
1343 }
1344 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1345 let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1346
1347 if let Some(i) = i.try_into().ok() {
1348 dynasm!(self.asm
1349 ; cmp Rq(rreg.rq().unwrap()), i
1350 );
1351 } else {
1352 let lreg = self.put_into_register(I32, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1353
1354 dynasm!(self.asm
1355 ; cmp Rq(rreg.rq().unwrap()), Rq(lreg.rq().unwrap())
1356 );
1357 }
1358 ValueLocation::Cond($reverse_flags)
1359 }
1360 ValueLocation::Immediate(right) => {
1361 ValueLocation::Immediate(
1362 (if $const_fallback(i, right.as_i64().unwrap()) {
1363 1i32
1364 } else {
1365 0i32
1366 }).into()
1367 )
1368 }
1369 }
1370 } else {
1371 let lreg = self.put_into_register(I64, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1372
1373 match right {
1374 ValueLocation::Stack(offset) => {
1375 let offset = self.adjusted_offset(offset);
1376 dynasm!(self.asm
1377 ; cmp Rq(lreg.rq().unwrap()), [rsp + offset]
1378 );
1379 }
1380 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1381 let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1382
1383 dynasm!(self.asm
1384 ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
1385 );
1386 }
1387 ValueLocation::Immediate(i) => {
1388 let i = i.as_i64().unwrap();
1389 if let Some(i) = i.try_into().ok() {
1390 dynasm!(self.asm
1391 ; cmp Rq(lreg.rq().unwrap()), i
1392 );
1393 } else {
1394 let rreg = self.put_into_register(I32, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1395
1396 dynasm!(self.asm
1397 ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
1398 );
1399 }
1400 }
1401 }
1402
1403 ValueLocation::Cond($flags)
1404 };
1405
1406 self.free_value(left)?;
1407 self.free_value(right)?;
1408 self.push(out)?;
1409 Ok(())
1410 }
1411 }
1412 }
1413
1414 macro_rules! cmp_f32 {
1415 ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => {
1416 cmp_float!(
1417 comiss,
1418 f32,
1419 imm_f32,
1420 $name,
1421 $reverse_name,
1422 $instr,
1423 $const_fallback
1424 );
1425 };
1426 }
1427
1428 macro_rules! eq_float {
1429 ($name:ident, $instr:ident, $imm_fn:ident, $const_fallback:expr) => {
1430 pub fn $name(&mut self) -> Result<(), Error>{
1431 let right = self.pop()?;
1432 let left = self.pop()?;
1433
1434 if let Some(right) = right.immediate() {
1435 if let Some(left) = left.immediate() {
1436 self.push(ValueLocation::Immediate(
1437 if $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()) {
1438 1u32
1439 } else {
1440 0
1441 }.into()
1442 ))?;
1443 return Ok(());
1444 }
1445 }
1446
1447 let (mut left, mut right) = match left {
1448 ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right),
1449 _ => (right, left)
1450 };
1451
1452 let lreg = self.put_into_temp_register(GPRType::Rx, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1453 let rreg = self.put_into_register(GPRType::Rx, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1454
1455 let out = self.take_reg(I32).unwrap();
1456
1457 dynasm!(self.asm
1458 ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1459 ; movd Rd(out.rq().unwrap()), Rx(lreg.rx().unwrap())
1460 ; and Rd(out.rq().unwrap()), 1
1461 );
1462
1463 self.push(ValueLocation::Reg(out))?;
1464 self.free_value(left)?;
1465 self.free_value(right)?;
1466 Ok(())
1467 }
1468
1469 }
1470 }
1471
1472 macro_rules! minmax_float {
1473 (
1474 $name:ident,
1475 $instr:ident,
1476 $cmpinstr:ident,
1477 $addinstr:ident,
1478 $combineinstr:ident,
1479 $imm_fn:ident,
1480 $const_fallback:expr
1481 ) => {
1482 pub fn $name(&mut self) -> Result<(), Error>{
1483 let right = self.pop()?;
1484 let left = self.pop()?;
1485
1486 if let Some(right) = right.immediate() {
1487 if let Some(left) = left.immediate() {
1488 self.push(ValueLocation::Immediate(
1489 $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()).into()
1490 ))?;
1491 return Ok(());
1492 }
1493 }
1494
1495 let (mut left, mut right) = match left {
1496 ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right),
1497 _ => (right, left)
1498 };
1499
1500 let lreg = self.put_into_temp_register(GPRType::Rx, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1501 let rreg = self.put_into_register(GPRType::Rx, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1502
1503 dynasm!(self.asm
1504 ; $cmpinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1505 ; je >equal
1506 ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1507 ; jmp >ret
1508 ; equal:
1509 ; jnp >equal_but_not_parity
1510 ; $addinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1511 ; jmp >ret
1512 ; equal_but_not_parity:
1513 ; $combineinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1514 ; ret:
1515 );
1516
1517 self.push(left)?;
1518 self.free_value(right)?;
1519 Ok(())
1520 }
1521
1522 }
1523 }
1524
1525 macro_rules! cmp_f64 {
1526 ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => {
1527 cmp_float!(
1528 comisd,
1529 f64,
1530 imm_f64,
1531 $name,
1532 $reverse_name,
1533 $instr,
1534 $const_fallback
1535 );
1536 };
1537 }
1538
1539 macro_rules! cmp_float {
1540 (@helper $cmp_instr:ident, $ty:ty, $imm_fn:ident, $self:expr, $left:expr, $right:expr, $instr:ident, $const_fallback:expr) => {{
1541 let (left, right, this) = ($left, $right, $self);
1542 if let (Some(left), Some(right)) = (left.$imm_fn(), right.$imm_fn()) {
1543 if $const_fallback(<$ty>::from_bits(left.to_bits()), <$ty>::from_bits(right.to_bits())) {
1544 ValueLocation::Immediate(1i32.into())
1545 } else {
1546 ValueLocation::Immediate(0i32.into())
1547 }
1548 } else {
1549 let lreg = this.put_into_register(GPRType::Rx, left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1550
1551 let result = this.take_reg(I32).unwrap();
1552
1553 match right {
1554 ValueLocation::Stack(offset) => {
1555 let offset = this.adjusted_offset(*offset);
1556
1557 dynasm!(this.asm
1558 ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap())
1559 ; $cmp_instr Rx(lreg.rx().unwrap()), [rsp + offset]
1560 ; $instr Rb(result.rq().unwrap())
1561 );
1562 }
1563 right => {
1564 let rreg = this.put_into_register(GPRType::Rx, right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1565
1566 dynasm!(this.asm
1567 ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap())
1568 ; $cmp_instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
1569 ; $instr Rb(result.rq().unwrap())
1570 );
1571 }
1572 }
1573
1574 ValueLocation::Reg(result)
1575 }
1576 }};
1577 ($cmp_instr:ident, $ty:ty, $imm_fn:ident, $name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => {
1578 pub fn $name(&mut self) -> Result<(), Error> {
1579 let mut right = self.pop()?;
1580 let mut left = self.pop()?;
1581
1582 let out = cmp_float!(@helper
1583 $cmp_instr,
1584 $ty,
1585 $imm_fn,
1586 &mut *self,
1587 &mut left,
1588 &mut right,
1589 $instr,
1590 $const_fallback
1591 );
1592
1593 self.free_value(left)?;
1594 self.free_value(right)?;
1595
1596 self.push(out)?;
1597 Ok(())
1598 }
1599
1600 pub fn $reverse_name(&mut self) -> Result<(), Error> {
1601 let mut right = self.pop()?;
1602 let mut left = self.pop()?;
1603
1604 let out = cmp_float!(@helper
1605 $cmp_instr,
1606 $ty,
1607 $imm_fn,
1608 &mut *self,
1609 &mut right,
1610 &mut left,
1611 $instr,
1612 $const_fallback
1613 );
1614
1615 self.free_value(left)?;
1616 self.free_value(right)?;
1617
1618 self.push(out)?;
1619 Ok(())
1620 }
1621 };
1622 }
1623
1624 macro_rules! binop_i32 {
1625 ($name:ident, $instr:ident, $const_fallback:expr) => {
1626 binop!(
1627 $name,
1628 $instr,
1629 $const_fallback,
1630 Rd,
1631 rq,
1632 I32,
1633 imm_i32,
1634 |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
1635 ; $instr Rd(op1.rq().unwrap()), i
1636 )
1637 );
1638 };
1639 }
1640
1641 macro_rules! commutative_binop_i32 {
1642 ($name:ident, $instr:ident, $const_fallback:expr) => {
1643 commutative_binop!(
1644 $name,
1645 $instr,
1646 $const_fallback,
1647 Rd,
1648 rq,
1649 I32,
1650 imm_i32,
1651 |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
1652 ; $instr Rd(op1.rq().unwrap()), i
1653 )
1654 );
1655 };
1656 }
1657
1658 macro_rules! binop_i64 {
1659 ($name:ident, $instr:ident, $const_fallback:expr) => {
1660 binop!(
1661 $name,
1662 $instr,
1663 $const_fallback,
1664 Rq,
1665 rq,
1666 I64,
1667 imm_i64,
1668 |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
1669 ; $instr Rq(op1.rq().unwrap()), i
1670 )
1671 );
1672 };
1673 }
1674
1675 macro_rules! commutative_binop_i64 {
1676 ($name:ident, $instr:ident, $const_fallback:expr) => {
1677 commutative_binop!(
1678 $name,
1679 $instr,
1680 $const_fallback,
1681 Rq,
1682 rq,
1683 I64,
1684 imm_i64,
1685 |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
1686 ; $instr Rq(op1.rq().unwrap()), i
1687 )
1688 );
1689 };
1690 }
1691
1692 macro_rules! binop_f32 {
1693 ($name:ident, $instr:ident, $const_fallback:expr) => {
1694 binop!(
1695 $name,
1696 $instr,
1697 |a: Ieee32, b: Ieee32| Ieee32::from_bits(
1698 $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits()
1699 ),
1700 Rx,
1701 rx,
1702 F32,
1703 imm_f32,
1704 |_, _, _: i32| unreachable!()
1705 );
1706 };
1707 }
1708
1709 macro_rules! commutative_binop_f32 {
1710 ($name:ident, $instr:ident, $const_fallback:expr) => {
1711 commutative_binop!(
1712 $name,
1713 $instr,
1714 |a: Ieee32, b: Ieee32| Ieee32::from_bits(
1715 $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits()
1716 ),
1717 Rx,
1718 rx,
1719 F32,
1720 imm_f32,
1721 |_, _, _: i32| unreachable!()
1722 );
1723 };
1724 }
1725
1726 macro_rules! binop_f64 {
1727 ($name:ident, $instr:ident, $const_fallback:expr) => {
1728 binop!(
1729 $name,
1730 $instr,
1731 |a: Ieee64, b: Ieee64| Ieee64::from_bits(
1732 $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits()
1733 ),
1734 Rx,
1735 rx,
1736 F64,
1737 imm_f64,
1738 |_, _, _: i32| unreachable!()
1739 );
1740 };
1741 }
1742
1743 macro_rules! commutative_binop_f64 {
1744 ($name:ident, $instr:ident, $const_fallback:expr) => {
1745 commutative_binop!(
1746 $name,
1747 $instr,
1748 |a: Ieee64, b: Ieee64| Ieee64::from_bits(
1749 $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits()
1750 ),
1751 Rx,
1752 rx,
1753 F64,
1754 imm_f64,
1755 |_, _, _: i32| unreachable!()
1756 );
1757 };
1758 }
1759 macro_rules! commutative_binop {
1760 ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => {
1761 binop!(
1762 $name,
1763 $instr,
1764 $const_fallback,
1765 $reg_ty,
1766 $reg_fn,
1767 $ty,
1768 $imm_fn,
1769 $direct_imm,
1770 |op1: ValueLocation, op0: ValueLocation| match op1 {
1771 ValueLocation::Reg(_) => (op1, op0),
1772 _ => {
1773 if op0.immediate().is_some() {
1774 (op1, op0)
1775 } else {
1776 (op0, op1)
1777 }
1778 }
1779 }
1780 );
1781 };
1782 }
1783
1784 macro_rules! binop {
1785 ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => {
1786 binop!($name, $instr, $const_fallback, $reg_ty, $reg_fn, $ty, $imm_fn, $direct_imm, |a, b| (a, b));
1787 };
1788 ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr, $map_op:expr) => {
1789 pub fn $name(&mut self) -> Result<(), Error> {
1790 let right = self.pop()?;
1791 let left = self.pop()?;
1792
1793 if let Some(i1) = left.$imm_fn() {
1794 if let Some(i0) = right.$imm_fn() {
1795 self.block_state.stack.push(ValueLocation::Immediate($const_fallback(i1, i0).into()));
1796 return Ok(());
1797 }
1798 }
1799
1800 let (mut left, mut right) = $map_op(left, right);
1801 let lreg = self.put_into_temp_register($ty, &mut left)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1802
1803 match right {
1804 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
1805 // This handles the case where we (for example) have a float in an `Rq` reg
1806 let right_reg = self.put_into_register($ty, &mut right)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1807
1808 dynasm!(self.asm
1809 ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(right_reg.$reg_fn().unwrap())
1810 );
1811 }
1812 ValueLocation::Stack(offset) => {
1813 let offset = self.adjusted_offset(offset);
1814 dynasm!(self.asm
1815 ; $instr $reg_ty(lreg.$reg_fn().unwrap()), [rsp + offset]
1816 );
1817 }
1818 ValueLocation::Immediate(i) => {
1819 if let Some(i) = i.as_int().and_then(|i| i.try_into().ok()) {
1820 $direct_imm(&mut *self, lreg, i);
1821 } else {
1822 let scratch = self.take_reg($ty).unwrap();
1823 self.immediate_to_reg(scratch, i)?;
1824
1825 dynasm!(self.asm
1826 ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(scratch.$reg_fn().unwrap())
1827 );
1828
1829 self.block_state.regs.release(scratch)?;
1830 }
1831 }
1832 }
1833
1834 self.free_value(right)?;
1835 self.push(left)?;
1836 Ok(())
1837 }
1838 }
1839 }
1840
1841 macro_rules! load {
1842 (@inner $name:ident, $rtype:expr, $reg_ty:tt, $emit_fn:expr) => {
1843 pub fn $name(&mut self, offset: u32) -> Result<(), Error> {
1844 fn load_to_reg<_M: ModuleContext>(
1845 ctx: &mut Context<_M>,
1846 dst: GPR,
1847 (offset, runtime_offset): (i32, Result<i32, GPR>)
1848 ) -> Result<(), Error> {
1849 let mem_index = 0;
1850 let reg_offset = ctx.module_context
1851 .defined_memory_index(mem_index)
1852 .map(|index| (
1853 None,
1854 ctx.module_context.vmctx_vmmemory_definition(index) as i32
1855 ));
1856 let (reg, mem_offset) = reg_offset.unwrap_or_else(|| {
1857 let reg = ctx.take_reg(I64).unwrap();
1858
1859 dynasm!(ctx.asm
1860 ; mov Rq(reg.rq().unwrap()), [
1861 Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32
1862 ]
1863 );
1864
1865 (Some(reg), 0)
1866 });
1867
1868 let vmctx = GPR::Rq(VMCTX);
1869
1870 if ctx.module_context.emit_memory_bounds_check() {
1871 let addr_reg = match runtime_offset {
1872 Ok(imm) => {
1873 let addr_reg = ctx.take_reg(I64).unwrap();
1874 dynasm!(ctx.asm
1875 ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64
1876 );
1877 addr_reg
1878 }
1879 Err(gpr) => {
1880 if offset == 0 {
1881 ctx.clone_to_register(I32, ValueLocation::Reg(gpr))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
1882 } else if offset > 0 {
1883 let addr_reg = ctx.take_reg(I64).unwrap();
1884 dynasm!(ctx.asm
1885 ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset]
1886 );
1887 addr_reg
1888 } else {
1889 let addr_reg = ctx.take_reg(I64).unwrap();
1890 let offset_reg = ctx.take_reg(I64).unwrap();
1891 dynasm!(ctx.asm
1892 ; mov Rd(offset_reg.rq().unwrap()), offset
1893 ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap())
1894 ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap())
1895 );
1896 ctx.block_state.regs.release(offset_reg)?;
1897 addr_reg
1898 }
1899 }
1900 };
1901 dynasm!(ctx.asm
1902 ; cmp Rq(addr_reg.rq().unwrap()), [
1903 Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
1904 mem_offset +
1905 ctx.module_context.vmmemory_definition_current_length() as i32
1906 ]
1907 ;; ctx.trap_if(cc::GE_U, TrapCode::HeapOutOfBounds)
1908 );
1909 ctx.block_state.regs.release(addr_reg)?;
1910 }
1911
1912 let mem_ptr_reg = ctx.take_reg(I64).unwrap();
1913 dynasm!(ctx.asm
1914 ; mov Rq(mem_ptr_reg.rq().unwrap()), [
1915 Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
1916 mem_offset +
1917 ctx.module_context.vmmemory_definition_base() as i32
1918 ]
1919 );
1920 if let Some(reg) = reg {
1921 ctx.block_state.regs.release(reg)?;
1922 }
1923 $emit_fn(ctx, dst, mem_ptr_reg, runtime_offset, offset)?;
1924 ctx.block_state.regs.release(mem_ptr_reg)?;
1925 Ok(())
1926 }
1927
1928 let base = self.pop()?;
1929
1930 let temp = self.take_reg($rtype).unwrap();
1931
1932 match base {
1933 ValueLocation::Immediate(i) => {
1934 load_to_reg(self, temp, (offset as _, Ok(i.as_i32().unwrap())))?;
1935 }
1936 mut base => {
1937 let gpr = self.put_into_register(I32, &mut base)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
1938 load_to_reg(self, temp, (offset as _, Err(gpr)))?;
1939 self.free_value(base)?;
1940 }
1941 }
1942
1943 self.push(ValueLocation::Reg(temp))?;
1944 Ok(())
1945 }
1946 };
1947 ($name:ident, $rtype:expr, $reg_ty:tt, NONE, $rq_instr:ident, $ty:ident) => {
1948 load!(@inner
1949 $name,
1950 $rtype,
1951 $reg_ty,
1952 |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32| -> Result<(), Error> {
1953 match runtime_offset {
1954 Ok(imm) => {
1955 dynasm!(ctx.asm
1956 ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm]
1957 );
1958 Ok(())
1959 }
1960 Err(offset_reg) => {
1961 dynasm!(ctx.asm
1962 ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset]
1963 );
1964 Ok(())
1965 }
1966 }
1967 }
1968 );
1969 };
1970 ($name:ident, $rtype:expr, $reg_ty:tt, $xmm_instr:ident, $rq_instr:ident, $ty:ident) => {
1971 load!(@inner
1972 $name,
1973 $rtype,
1974 $reg_ty,
1975 |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32| -> Result<(), Error> {
1976 match (dst, runtime_offset) {
1977 (GPR::Rq(r), Ok(imm)) => {
1978 dynasm!(ctx.asm
1979 ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm]
1980 );
1981 Ok(())
1982 }
1983 (GPR::Rx(r), Ok(imm)) => {
1984 if let Some(combined) = offset.checked_add(imm) {
1985 dynasm!(ctx.asm
1986 ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + combined]
1987 );
1988 Ok(())
1989 } else {
1990 let offset_reg = ctx.take_reg(GPRType::Rq).unwrap();
1991 dynasm!(ctx.asm
1992 ; mov Rq(offset_reg.rq().unwrap()), offset
1993 ; $xmm_instr Rx(r), $ty [
1994 Rq(mem_ptr_reg.rq().unwrap()) +
1995 Rq(offset_reg.rq().unwrap()) +
1996 imm
1997 ]
1998 );
1999 ctx.block_state.regs.release(offset_reg)?;
2000 Ok(())
2001 }
2002 }
2003 (GPR::Rq(r), Err(offset_reg)) => {
2004 dynasm!(ctx.asm
2005 ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset]
2006 );
2007 Ok(())
2008 }
2009 (GPR::Rx(r), Err(offset_reg)) => {
2010 dynasm!(ctx.asm
2011 ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset]
2012 );
2013 Ok(())
2014 }
2015 }
2016 }
2017 );
2018 };
2019 }
2020
2021 macro_rules! store {
2022 (@inner $name:ident, $int_reg_ty:tt, $match_offset:expr, $size:ident) => {
2023 pub fn $name(&mut self, offset: u32) -> Result<(), Error>{
2024 fn store_from_reg<_M: ModuleContext>(
2025 ctx: &mut Context<_M>,
2026 src: GPR,
2027 (offset, runtime_offset): (i32, Result<i32, GPR>)
2028 ) -> Result<(), Error> {
2029 let mem_index = 0;
2030 let reg_offset = ctx.module_context
2031 .defined_memory_index(mem_index)
2032 .map(|index| (
2033 None,
2034 ctx.module_context.vmctx_vmmemory_definition(index) as i32
2035 ));
2036 let (reg, mem_offset) = reg_offset.unwrap_or_else(|| {
2037 let reg = ctx.take_reg(I64).unwrap();
2038
2039 dynasm!(ctx.asm
2040 ; mov Rq(reg.rq().unwrap()), [
2041 Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32
2042 ]
2043 );
2044
2045 (Some(reg), 0)
2046 });
2047
2048 let vmctx = GPR::Rq(VMCTX);
2049
2050 if ctx.module_context.emit_memory_bounds_check() {
2051 let addr_reg = match runtime_offset {
2052 Ok(imm) => {
2053 let addr_reg = ctx.take_reg(I64).unwrap();
2054 dynasm!(ctx.asm
2055 ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64
2056 );
2057 addr_reg
2058 }
2059 Err(gpr) => {
2060 if offset == 0 {
2061 ctx.clone_to_register(I32, ValueLocation::Reg(gpr))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
2062
2063 } else if offset > 0 {
2064 let addr_reg = ctx.take_reg(I64).unwrap();
2065 dynasm!(ctx.asm
2066 ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset]
2067 );
2068 addr_reg
2069 } else {
2070 let addr_reg = ctx.take_reg(I64).unwrap();
2071 let offset_reg = ctx.take_reg(I64).unwrap();
2072 dynasm!(ctx.asm
2073 ; mov Rd(offset_reg.rq().unwrap()), offset
2074 ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap())
2075 ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap())
2076 );
2077 ctx.block_state.regs.release(offset_reg)?;
2078 addr_reg
2079 }
2080 }
2081 };
2082 dynasm!(ctx.asm
2083 ; cmp Rq(addr_reg.rq().unwrap()), [
2084 Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
2085 mem_offset +
2086 ctx.module_context.vmmemory_definition_current_length() as i32
2087 ]
2088 ;; ctx.trap_if(cc::GE_U, TrapCode::HeapOutOfBounds)
2089 );
2090 ctx.block_state.regs.release(addr_reg)?;
2091 }
2092
2093 let mem_ptr_reg = ctx.take_reg(I64).unwrap();
2094 dynasm!(ctx.asm
2095 ; mov Rq(mem_ptr_reg.rq().unwrap()), [
2096 Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
2097 mem_offset +
2098 ctx.module_context.vmmemory_definition_base() as i32
2099 ]
2100 );
2101 if let Some(reg) = reg {
2102 ctx.block_state.regs.release(reg)?;
2103 }
2104 let src = $match_offset(ctx, mem_ptr_reg, runtime_offset, offset, src)?;
2105 ctx.block_state.regs.release(mem_ptr_reg)?;
2106 ctx.block_state.regs.release(src)?;
2107 Ok(())
2108 }
2109
2110 if !(offset <= i32::max_value() as u32) {
2111 return Err(Error::Microwasm(format!("store: offset value too big {}", offset)))
2112 }
2113
2114 let mut src = self.pop()?;
2115 let base = self.pop()?;
2116
2117 // `store_from_reg` frees `src`
2118 // TODO: Would it be better to free it outside `store_from_reg`?
2119 let src_reg = self.put_into_register(None, &mut src)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2120
2121
2122 match base {
2123 ValueLocation::Immediate(i) => {
2124 store_from_reg(self, src_reg, (offset as i32, Ok(i.as_i32().unwrap())))?
2125 }
2126 mut base => {
2127 let gpr = self.put_into_register(I32, &mut base)?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2128 store_from_reg(self, src_reg, (offset as i32, Err(gpr)))?;
2129 self.free_value(base)?;
2130 }
2131 }
2132 Ok(())
2133 }
2134 };
2135 ($name:ident, $int_reg_ty:tt, NONE, $size:ident) => {
2136 store!(@inner
2137 $name,
2138 $int_reg_ty,
2139 |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32, src| -> Result<GPR, Error> {
2140 let src_reg = ctx.put_into_temp_register(GPRType::Rq, &mut ValueLocation::Reg(src))?.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2141
2142 match runtime_offset {
2143 Ok(imm) => {
2144 dynasm!(ctx.asm
2145 ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(src_reg.rq().unwrap())
2146 );
2147 }
2148 Err(offset_reg) => {
2149 dynasm!(ctx.asm
2150 ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(src_reg.rq().unwrap())
2151 );
2152 }
2153 }
2154
2155 Ok(src_reg)
2156 },
2157 $size
2158 );
2159 };
2160 ($name:ident, $int_reg_ty:tt, $xmm_instr:ident, $size:ident) => {
2161 store!(@inner
2162 $name,
2163 $int_reg_ty,
2164 |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32, src| -> Result<GPR, Error> {
2165 match (runtime_offset, src) {
2166 (Ok(imm), GPR::Rq(r)) => {
2167 dynasm!(ctx.asm
2168 ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(r)
2169 );
2170 }
2171 (Ok(imm), GPR::Rx(r)) => {
2172 dynasm!(ctx.asm
2173 ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], Rx(r)
2174 );
2175 }
2176 (Err(offset_reg), GPR::Rq(r)) => {
2177 dynasm!(ctx.asm
2178 ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(r)
2179 );
2180 }
2181 (Err(offset_reg), GPR::Rx(r)) => {
2182 dynasm!(ctx.asm
2183 ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], Rx(r)
2184 );
2185 }
2186 }
2187
2188 Ok(src)
2189 },
2190 $size
2191 );
2192 };
2193 }
2194
2195 #[derive(Debug, Clone, PartialEq, Eq)]
2196 pub struct VirtualCallingConvention {
2197 pub stack: Stack,
2198 pub depth: StackDepth,
2199 }
2200
2201 impl<'this, M: ModuleContext> Context<'this, M> {
free_reg(&mut self, type_: GPRType) -> Result<bool, Error>2202 fn free_reg(&mut self, type_: GPRType) -> Result<bool, Error> {
2203 let pos = if let Some(pos) = self
2204 .block_state
2205 .stack
2206 .iter()
2207 .position(|r| r.reg().map(|reg| reg.type_() == type_).unwrap_or(false))
2208 {
2209 pos
2210 } else {
2211 return Ok(false);
2212 };
2213
2214 let old_loc = self.block_state.stack[pos];
2215 let new_loc = self.push_physical(old_loc)?;
2216 self.block_state.stack[pos] = new_loc;
2217
2218 let reg = old_loc.reg().unwrap();
2219
2220 for elem in &mut self.block_state.stack[pos + 1..] {
2221 if *elem == old_loc {
2222 *elem = new_loc;
2223 self.block_state.regs.release(reg)?;
2224 }
2225 }
2226
2227 Ok(true)
2228 }
2229
take_reg(&mut self, r: impl Into<GPRType>) -> Option<GPR>2230 fn take_reg(&mut self, r: impl Into<GPRType>) -> Option<GPR> {
2231 let r = r.into();
2232 loop {
2233 if let Some(gpr) = self.block_state.regs.take(r) {
2234 break Some(gpr);
2235 }
2236
2237 if self.free_reg(r) == Ok(false) {
2238 break None;
2239 }
2240 }
2241 }
2242
set_source_loc(&mut self, loc: SourceLoc)2243 pub fn set_source_loc(&mut self, loc: SourceLoc) {
2244 self.source_loc = loc;
2245 }
2246
virtual_calling_convention(&self) -> VirtualCallingConvention2247 pub fn virtual_calling_convention(&self) -> VirtualCallingConvention {
2248 VirtualCallingConvention {
2249 stack: self.block_state.stack.clone(),
2250 depth: self.block_state.depth.clone(),
2251 }
2252 }
2253
2254 /// Create a new undefined label.
create_label(&mut self) -> Label2255 pub fn create_label(&mut self) -> Label {
2256 Label(self.asm.new_dynamic_label())
2257 }
2258
adjusted_offset(&self, offset: i32) -> i322259 fn adjusted_offset(&self, offset: i32) -> i32 {
2260 (self.block_state.depth.0 as i32 + offset) * WORD_SIZE as i32
2261 }
2262
2263 cmp_i32!(i32_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b);
2264 cmp_i32!(i32_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b);
2265 // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous
2266 cmp_i32!(i32_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u32) < (b as u32));
2267 cmp_i32!(i32_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u32)
2268 <= (b as u32));
2269 cmp_i32!(i32_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u32) > (b as u32));
2270 cmp_i32!(i32_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u32)
2271 >= (b as u32));
2272 cmp_i32!(i32_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b);
2273 cmp_i32!(i32_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b);
2274 cmp_i32!(i32_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b);
2275 cmp_i32!(i32_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b);
2276
2277 cmp_i64!(i64_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b);
2278 cmp_i64!(i64_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b);
2279 // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous
2280 cmp_i64!(i64_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u64) < (b as u64));
2281 cmp_i64!(i64_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u64)
2282 <= (b as u64));
2283 cmp_i64!(i64_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u64) > (b as u64));
2284 cmp_i64!(i64_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u64)
2285 >= (b as u64));
2286 cmp_i64!(i64_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b);
2287 cmp_i64!(i64_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b);
2288 cmp_i64!(i64_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b);
2289 cmp_i64!(i64_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b);
2290
2291 cmp_f32!(f32_gt, f32_lt, seta, |a, b| a > b);
2292 cmp_f32!(f32_ge, f32_le, setnc, |a, b| a >= b);
2293 eq_float!(
2294 f32_eq,
2295 cmpeqss,
2296 as_f32,
2297 |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) == f32::from_bits(b.to_bits())
2298 );
2299 eq_float!(
2300 f32_ne,
2301 cmpneqss,
2302 as_f32,
2303 |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) != f32::from_bits(b.to_bits())
2304 );
2305
2306 cmp_f64!(f64_gt, f64_lt, seta, |a, b| a > b);
2307 cmp_f64!(f64_ge, f64_le, setnc, |a, b| a >= b);
2308 eq_float!(
2309 f64_eq,
2310 cmpeqsd,
2311 as_f64,
2312 |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) == f64::from_bits(b.to_bits())
2313 );
2314 eq_float!(
2315 f64_ne,
2316 cmpneqsd,
2317 as_f64,
2318 |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) != f64::from_bits(b.to_bits())
2319 );
2320
2321 // TODO: Should we do this logic in `eq` and just have this delegate to `eq`?
2322 // That would mean that `eqz` and `eq` with a const 0 argument don't
2323 // result in different code. It would also allow us to generate better
2324 // code for `neq` and `gt_u` with const 0 operand
i32_eqz(&mut self) -> Result<(), Error>2325 pub fn i32_eqz(&mut self) -> Result<(), Error> {
2326 let mut val = self.pop()?;
2327
2328 if let ValueLocation::Immediate(Value::I32(i)) = val {
2329 self.push(ValueLocation::Immediate(
2330 (if i == 0 { 1i32 } else { 0 }).into(),
2331 ))?;
2332 return Ok(());
2333 }
2334
2335 if let ValueLocation::Cond(loc) = val {
2336 self.push(ValueLocation::Cond(!loc))?;
2337 return Ok(());
2338 }
2339
2340 let reg = self
2341 .put_into_register(I32, &mut val)?
2342 .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2343
2344 let out = self.take_reg(I32).unwrap();
2345
2346 dynasm!(self.asm
2347 ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap())
2348 ; test Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap())
2349 ; setz Rb(out.rq().unwrap())
2350 );
2351
2352 self.free_value(val)?;
2353
2354 self.push(ValueLocation::Reg(out))?;
2355 Ok(())
2356 }
2357
i64_eqz(&mut self) -> Result<(), Error>2358 pub fn i64_eqz(&mut self) -> Result<(), Error> {
2359 let mut val = self.pop()?;
2360
2361 if let ValueLocation::Immediate(Value::I64(i)) = val {
2362 self.push(ValueLocation::Immediate(
2363 (if i == 0 { 1i32 } else { 0 }).into(),
2364 ))?;
2365 return Ok(());
2366 }
2367
2368 if let ValueLocation::Cond(loc) = val {
2369 self.push(ValueLocation::Cond(!loc))?;
2370 return Ok(());
2371 }
2372
2373 let reg = self
2374 .put_into_register(I64, &mut val)?
2375 .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2376
2377 let out = self.take_reg(I64).unwrap();
2378
2379 dynasm!(self.asm
2380 ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap())
2381 ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap())
2382 ; setz Rb(out.rq().unwrap())
2383 );
2384
2385 self.free_value(val)?;
2386
2387 self.push(ValueLocation::Reg(out))?;
2388 Ok(())
2389 }
2390
br_on_cond_code(&mut self, label: Label, cond: CondCode)2391 fn br_on_cond_code(&mut self, label: Label, cond: CondCode) {
2392 match cond {
2393 cc::EQUAL => dynasm!(self.asm
2394 ; je =>label.0
2395 ),
2396 cc::NOT_EQUAL => dynasm!(self.asm
2397 ; jne =>label.0
2398 ),
2399 cc::GT_U => dynasm!(self.asm
2400 ; ja =>label.0
2401 ),
2402 cc::GE_U => dynasm!(self.asm
2403 ; jae =>label.0
2404 ),
2405 cc::LT_U => dynasm!(self.asm
2406 ; jb =>label.0
2407 ),
2408 cc::LE_U => dynasm!(self.asm
2409 ; jbe =>label.0
2410 ),
2411 cc::GT_S => dynasm!(self.asm
2412 ; jg =>label.0
2413 ),
2414 cc::GE_S => dynasm!(self.asm
2415 ; jge =>label.0
2416 ),
2417 cc::LT_S => dynasm!(self.asm
2418 ; jl =>label.0
2419 ),
2420 cc::LE_S => dynasm!(self.asm
2421 ; jle =>label.0
2422 ),
2423 }
2424 }
2425
2426 /// Pops i32 predicate and branches to the specified label
2427 /// if the predicate is equal to zero.
br_if_false( &mut self, target: impl Into<BrTarget<Label>>, pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, ) -> Result<(), Error>2428 pub fn br_if_false(
2429 &mut self,
2430 target: impl Into<BrTarget<Label>>,
2431 pass_args: impl FnOnce(&mut Self) -> Result<(), Error>,
2432 ) -> Result<(), Error> {
2433 let mut val = self.pop()?;
2434 let label = self.target_to_label(target.into());
2435
2436 let cond = match val {
2437 ValueLocation::Cond(cc) => !cc,
2438 _ => {
2439 let predicate = match self.put_into_register(I32, &mut val) {
2440 Err(e) => return Err(e),
2441 Ok(o) => {
2442 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
2443 }
2444 };
2445
2446 dynasm!(self.asm
2447 ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap())
2448 );
2449
2450 CondCode::ZF0
2451 }
2452 };
2453
2454 self.free_value(val)?;
2455
2456 pass_args(self)?;
2457
2458 self.br_on_cond_code(label, cond);
2459
2460 Ok(())
2461 }
2462
2463 /// Pops i32 predicate and branches to the specified label
2464 /// if the predicate is not equal to zero.
br_if_true( &mut self, target: impl Into<BrTarget<Label>>, pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, ) -> Result<(), Error>2465 pub fn br_if_true(
2466 &mut self,
2467 target: impl Into<BrTarget<Label>>,
2468 pass_args: impl FnOnce(&mut Self) -> Result<(), Error>,
2469 ) -> Result<(), Error> {
2470 let mut val = self.pop()?;
2471 let label = self.target_to_label(target.into());
2472
2473 let cond = match val {
2474 ValueLocation::Cond(cc) => cc,
2475 _ => {
2476 let predicate = match self.put_into_register(I32, &mut val) {
2477 Err(e) => return Err(e),
2478 Ok(o) => {
2479 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
2480 }
2481 };
2482
2483 dynasm!(self.asm
2484 ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap())
2485 );
2486
2487 CondCode::ZF1
2488 }
2489 };
2490
2491 self.free_value(val)?;
2492
2493 pass_args(self)?;
2494
2495 self.br_on_cond_code(label, cond);
2496
2497 Ok(())
2498 }
2499
2500 /// Branch unconditionally to the specified label.
br(&mut self, label: impl Into<BrTarget<Label>>)2501 pub fn br(&mut self, label: impl Into<BrTarget<Label>>) {
2502 match label.into() {
2503 BrTarget::Return => self.ret(),
2504 BrTarget::Label(label) => dynasm!(self.asm
2505 ; jmp =>label.0
2506 ),
2507 }
2508 }
2509
2510 /// If `default` is `None` then the default is just continuing execution
br_table<I>( &mut self, targets: I, default: Option<BrTarget<Label>>, pass_args: impl FnOnce(&mut Self) -> Result<(), Error>, ) -> Result<(), Error> where I: IntoIterator<Item = Option<BrTarget<Label>>>, I::IntoIter: ExactSizeIterator + DoubleEndedIterator,2511 pub fn br_table<I>(
2512 &mut self,
2513 targets: I,
2514 default: Option<BrTarget<Label>>,
2515 pass_args: impl FnOnce(&mut Self) -> Result<(), Error>,
2516 ) -> Result<(), Error>
2517 where
2518 I: IntoIterator<Item = Option<BrTarget<Label>>>,
2519 I::IntoIter: ExactSizeIterator + DoubleEndedIterator,
2520 {
2521 let mut targets = targets.into_iter();
2522 let count = targets.len();
2523
2524 let mut selector = self.pop()?;
2525
2526 pass_args(self)?;
2527
2528 if let Some(imm) = selector.imm_i32() {
2529 if let Some(target) = targets.nth(imm as _).or(Some(default)).and_then(|a| a) {
2530 match target {
2531 BrTarget::Label(label) => self.br(label),
2532 BrTarget::Return => {
2533 dynasm!(self.asm
2534 ; ret
2535 );
2536 }
2537 }
2538 }
2539 } else {
2540 let end_label = self.create_label();
2541
2542 if count > 0 {
2543 let temp = match self.put_into_temp_register(GPRType::Rq, &mut selector) {
2544 Err(e) => return Err(e),
2545 Ok(o) => match o {
2546 Some(r) => Ok((r, false)),
2547 None => {
2548 self.push_physical(ValueLocation::Reg(RAX))?;
2549 self.block_state.regs.mark_used(RAX);
2550 Ok((RAX, true))
2551 }
2552 },
2553 };
2554
2555 let (selector_reg, pop_selector) = match temp {
2556 Err(e) => return Err(e),
2557 Ok(a) => a,
2558 };
2559
2560 let (tmp, pop_tmp) = if let Some(reg) = self.take_reg(I64) {
2561 (reg, false)
2562 } else {
2563 let out_reg = if selector_reg == RAX { RCX } else { RAX };
2564
2565 self.push_physical(ValueLocation::Reg(out_reg))?;
2566 self.block_state.regs.mark_used(out_reg);
2567
2568 (out_reg, true)
2569 };
2570
2571 self.immediate_to_reg(tmp, (count as u32).into())?;
2572 dynasm!(self.asm
2573 ; cmp Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap())
2574 ; cmova Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap())
2575 ; lea Rq(tmp.rq().unwrap()), [>start_label]
2576 ; lea Rq(selector_reg.rq().unwrap()), [
2577 Rq(selector_reg.rq().unwrap()) * 5
2578 ]
2579 ; add Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap())
2580 );
2581
2582 if pop_tmp {
2583 dynasm!(self.asm
2584 ; pop Rq(tmp.rq().unwrap())
2585 );
2586 } else {
2587 self.block_state.regs.release(tmp)?;
2588 }
2589
2590 if pop_selector {
2591 dynasm!(self.asm
2592 ; pop Rq(selector_reg.rq().unwrap())
2593 );
2594 }
2595
2596 dynasm!(self.asm
2597 ; jmp Rq(selector_reg.rq().unwrap())
2598 ; start_label:
2599 );
2600
2601 for target in targets {
2602 let label = target
2603 .map(|target| self.target_to_label(target))
2604 .unwrap_or(end_label);
2605 dynasm!(self.asm
2606 ; jmp =>label.0
2607 );
2608 }
2609 }
2610
2611 if let Some(def) = default {
2612 self.br(def);
2613 }
2614
2615 self.define_label(end_label);
2616 }
2617
2618 self.free_value(selector)?;
2619 Ok(())
2620 }
2621
set_stack_depth(&mut self, depth: StackDepth) -> Result<(), Error>2622 fn set_stack_depth(&mut self, depth: StackDepth) -> Result<(), Error> {
2623 if self.block_state.depth.0 != depth.0 {
2624 let diff = depth.0 as i32 - self.block_state.depth.0 as i32;
2625 let emit_lea = if diff.abs() != 1 {
2626 true
2627 } else {
2628 match self.block_state.depth.0.cmp(&depth.0) {
2629 Ordering::Less => {
2630 for _ in 0..diff {
2631 dynasm!(self.asm
2632 ; push rax
2633 );
2634 }
2635 false
2636 }
2637 Ordering::Greater => {
2638 if let Some(trash) = self.take_reg(I64) {
2639 for _ in 0..self.block_state.depth.0 - depth.0 {
2640 dynasm!(self.asm
2641 ; pop Rq(trash.rq().unwrap())
2642 );
2643 }
2644 self.block_state.regs.release(trash)?;
2645 false
2646 } else {
2647 true
2648 }
2649 }
2650 Ordering::Equal => false,
2651 }
2652 };
2653 if emit_lea {
2654 dynasm!(self.asm
2655 ; lea rsp, [rsp + (self.block_state.depth.0 as i32 - depth.0 as i32) * WORD_SIZE as i32]
2656 );
2657 }
2658 self.block_state.depth = depth;
2659 }
2660 Ok(())
2661 }
2662
do_pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error>2663 fn do_pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error> {
2664 let args = &cc.arguments;
2665 for &dst in args.iter().rev().take(self.block_state.stack.len()) {
2666 if let CCLoc::Reg(r) = dst {
2667 if !self.block_state.regs.is_free(r)
2668 && *self.block_state.stack.last().unwrap() != ValueLocation::Reg(r)
2669 {
2670 // TODO: This would be made simpler and more efficient with a proper SSE
2671 // representation.
2672 self.save_regs(std::iter::once(r))?;
2673 }
2674
2675 self.block_state.regs.mark_used(r);
2676 }
2677 self.pop_into(dst)?;
2678 }
2679 Ok(())
2680 }
2681
pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error>2682 pub fn pass_block_args(&mut self, cc: &BlockCallingConvention) -> Result<(), Error> {
2683 self.do_pass_block_args(cc)?;
2684 self.set_stack_depth(cc.stack_depth.clone())?;
2685 Ok(())
2686 }
2687
serialize_block_args( &mut self, cc: &BlockCallingConvention, params: u32, ) -> Result<BlockCallingConvention, Error>2688 pub fn serialize_block_args(
2689 &mut self,
2690 cc: &BlockCallingConvention,
2691 params: u32,
2692 ) -> Result<BlockCallingConvention, Error> {
2693 self.do_pass_block_args(cc)?;
2694
2695 let mut out_args = cc.arguments.clone();
2696
2697 out_args.reverse();
2698
2699 while out_args.len() < params as usize {
2700 let mut val = self.pop()?;
2701
2702 // TODO: We can use stack slots for values already on the stack but we
2703 // don't refcount stack slots right now
2704 let ccloc = self.put_into_temp_location(None, &mut val)?;
2705 out_args.push(ccloc);
2706 }
2707
2708 out_args.reverse();
2709
2710 self.set_stack_depth(cc.stack_depth.clone())?;
2711
2712 Ok(BlockCallingConvention {
2713 stack_depth: cc.stack_depth.clone(),
2714 arguments: out_args,
2715 })
2716 }
2717
2718 /// Puts all stack values into "real" locations so that they can i.e. be set to different
2719 /// values on different iterations of a loop
serialize_args(&mut self, count: u32) -> Result<BlockCallingConvention, Error>2720 pub fn serialize_args(&mut self, count: u32) -> Result<BlockCallingConvention, Error> {
2721 let mut out = Vec::with_capacity(count as _);
2722
2723 // TODO: We can make this more efficient now that `pop` isn't so complicated
2724 for _ in 0..count {
2725 let mut val = self.pop()?;
2726 // TODO: We can use stack slots for values already on the stack but we
2727 // don't refcount stack slots right now
2728 let loc = self.put_into_temp_location(None, &mut val)?;
2729
2730 out.push(loc);
2731 }
2732
2733 out.reverse();
2734
2735 Ok(BlockCallingConvention {
2736 stack_depth: self.block_state.depth.clone(),
2737 arguments: out,
2738 })
2739 }
2740
get_global(&mut self, global_idx: u32) -> Result<(), Error>2741 pub fn get_global(&mut self, global_idx: u32) -> Result<(), Error> {
2742 let (reg, offset) = self
2743 .module_context
2744 .defined_global_index(global_idx)
2745 .map(|defined_global_index| {
2746 (
2747 None,
2748 self.module_context
2749 .vmctx_vmglobal_definition(defined_global_index),
2750 )
2751 })
2752 .unwrap_or_else(|| {
2753 let reg = self.take_reg(I64).unwrap();
2754
2755 dynasm!(self.asm
2756 ; mov Rq(reg.rq().unwrap()), [
2757 Rq(VMCTX) +
2758 self.module_context.vmctx_vmglobal_import_from(global_idx) as i32
2759 ]
2760 );
2761
2762 (Some(reg), 0)
2763 });
2764
2765 let out = self.take_reg(GPRType::Rq).unwrap();
2766 let vmctx = GPR::Rq(VMCTX);
2767
2768 // TODO: Are globals necessarily aligned to 128 bits? We can load directly to an XMM reg if so
2769 dynasm!(self.asm
2770 ; mov Rq(out.rq().unwrap()), [Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32]
2771 );
2772
2773 if let Some(reg) = reg {
2774 self.block_state.regs.release(reg)?;
2775 }
2776
2777 self.push(ValueLocation::Reg(out))?;
2778 Ok(())
2779 }
2780
set_global(&mut self, global_idx: u32) -> Result<(), Error>2781 pub fn set_global(&mut self, global_idx: u32) -> Result<(), Error> {
2782 let mut val = self.pop()?;
2783 let (reg, offset) = self
2784 .module_context
2785 .defined_global_index(global_idx)
2786 .map(|defined_global_index| {
2787 (
2788 None,
2789 self.module_context
2790 .vmctx_vmglobal_definition(defined_global_index),
2791 )
2792 })
2793 .unwrap_or_else(|| {
2794 let reg = self.take_reg(I64).unwrap();
2795
2796 dynasm!(self.asm
2797 ; mov Rq(reg.rq().unwrap()), [
2798 Rq(VMCTX) +
2799 self.module_context.vmctx_vmglobal_import_from(global_idx) as i32
2800 ]
2801 );
2802
2803 (Some(reg), 0)
2804 });
2805
2806 let val_reg = self
2807 .put_into_register(GPRType::Rq, &mut val)?
2808 .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2809 let vmctx = GPR::Rq(VMCTX);
2810
2811 // We always use `Rq` (even for floats) since the globals are not necessarily aligned to 128 bits
2812 dynasm!(self.asm
2813 ; mov [
2814 Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32
2815 ], Rq(val_reg.rq().unwrap())
2816 );
2817
2818 if let Some(reg) = reg {
2819 self.block_state.regs.release(reg)?;
2820 }
2821
2822 self.free_value(val)?;
2823 Ok(())
2824 }
2825
immediate_to_reg(&mut self, reg: GPR, val: Value) -> Result<(), Error>2826 fn immediate_to_reg(&mut self, reg: GPR, val: Value) -> Result<(), Error> {
2827 match reg {
2828 GPR::Rq(r) => {
2829 let val = val.as_bytes();
2830 if (val as u64) <= u32::max_value() as u64 {
2831 dynasm!(self.asm
2832 ; mov Rd(r), val as i32
2833 );
2834 } else {
2835 dynasm!(self.asm
2836 ; mov Rq(r), QWORD val
2837 );
2838 }
2839 }
2840 reg @ GPR::Rx(_) => {
2841 let tmp = self
2842 .take_reg(GPRType::Rq)
2843 .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
2844 self.immediate_to_reg(tmp, val)?;
2845 let tmp = ValueLocation::Reg(tmp);
2846 self.copy_value(tmp, CCLoc::Reg(reg))?;
2847 self.free_value(tmp)?;
2848 }
2849 }
2850
2851 Ok(())
2852 }
2853
2854 // The `&` and `&mut` aren't necessary (`ValueLocation` is copy) but it ensures that we don't get
2855 // the arguments the wrong way around. In the future we want to have a `ReadLocation` and `WriteLocation`
2856 // so we statically can't write to a literal so this will become a non-issue.
copy_value(&mut self, src: ValueLocation, dst: CCLoc) -> Result<(), Error>2857 fn copy_value(&mut self, src: ValueLocation, dst: CCLoc) -> Result<(), Error> {
2858 match (src, dst) {
2859 (ValueLocation::Cond(cond), CCLoc::Stack(o)) => {
2860 let offset = self.adjusted_offset(o);
2861
2862 self.copy_value(ValueLocation::Immediate(0u64.into()), dst)?;
2863
2864 match cond {
2865 cc::EQUAL => dynasm!(self.asm
2866 ; sete [rsp + offset]
2867 ),
2868 cc::NOT_EQUAL => dynasm!(self.asm
2869 ; setne [rsp + offset]
2870 ),
2871 cc::GT_U => dynasm!(self.asm
2872 ; seta [rsp + offset]
2873 ),
2874 cc::GE_U => dynasm!(self.asm
2875 ; setae [rsp + offset]
2876 ),
2877 cc::LT_U => dynasm!(self.asm
2878 ; setb [rsp + offset]
2879 ),
2880 cc::LE_U => dynasm!(self.asm
2881 ; setbe [rsp + offset]
2882 ),
2883 cc::GT_S => dynasm!(self.asm
2884 ; setg [rsp + offset]
2885 ),
2886 cc::GE_S => dynasm!(self.asm
2887 ; setge [rsp + offset]
2888 ),
2889 cc::LT_S => dynasm!(self.asm
2890 ; setl [rsp + offset]
2891 ),
2892 cc::LE_S => dynasm!(self.asm
2893 ; setle [rsp + offset]
2894 ),
2895 }
2896 }
2897 (ValueLocation::Cond(cond), CCLoc::Reg(reg)) => match reg {
2898 GPR::Rq(r) => {
2899 self.copy_value(ValueLocation::Immediate(0u64.into()), dst)?;
2900
2901 match cond {
2902 cc::EQUAL => dynasm!(self.asm
2903 ; sete Rb(r)
2904 ),
2905 cc::NOT_EQUAL => dynasm!(self.asm
2906 ; setne Rb(r)
2907 ),
2908 cc::GT_U => dynasm!(self.asm
2909 ; seta Rb(r)
2910 ),
2911 cc::GE_U => dynasm!(self.asm
2912 ; setae Rb(r)
2913 ),
2914 cc::LT_U => dynasm!(self.asm
2915 ; setb Rb(r)
2916 ),
2917 cc::LE_U => dynasm!(self.asm
2918 ; setbe Rb(r)
2919 ),
2920 cc::GT_S => dynasm!(self.asm
2921 ; setg Rb(r)
2922 ),
2923 cc::GE_S => dynasm!(self.asm
2924 ; setge Rb(r)
2925 ),
2926 cc::LT_S => dynasm!(self.asm
2927 ; setl Rb(r)
2928 ),
2929 cc::LE_S => dynasm!(self.asm
2930 ; setle Rb(r)
2931 ),
2932 }
2933 }
2934 GPR::Rx(_) => {
2935 let temp = CCLoc::Reg(self.take_reg(I32).unwrap());
2936 self.copy_value(src, temp)?;
2937 let temp = temp.into();
2938 self.copy_value(temp, dst)?;
2939 self.free_value(temp)?;
2940 }
2941 },
2942 (ValueLocation::Stack(in_offset), CCLoc::Stack(out_offset)) => {
2943 let in_offset = self.adjusted_offset(in_offset);
2944 let out_offset = self.adjusted_offset(out_offset);
2945 if in_offset != out_offset {
2946 if let Some(gpr) = self.take_reg(I64) {
2947 dynasm!(self.asm
2948 ; mov Rq(gpr.rq().unwrap()), [rsp + in_offset]
2949 ; mov [rsp + out_offset], Rq(gpr.rq().unwrap())
2950 );
2951 self.block_state.regs.release(gpr)?;
2952 } else {
2953 dynasm!(self.asm
2954 ; push rax
2955 ; mov rax, [rsp + in_offset + WORD_SIZE as i32]
2956 ; mov [rsp + out_offset + WORD_SIZE as i32], rax
2957 ; pop rax
2958 );
2959 }
2960 }
2961 }
2962 // TODO: XMM registers
2963 (ValueLocation::Reg(in_reg), CCLoc::Stack(out_offset)) => {
2964 let out_offset = self.adjusted_offset(out_offset);
2965 match in_reg {
2966 GPR::Rq(in_reg) => {
2967 // We can always use `Rq` here for now because stack slots are in multiples of
2968 // 8 bytes
2969 dynasm!(self.asm
2970 ; mov [rsp + out_offset], Rq(in_reg)
2971 );
2972 }
2973 GPR::Rx(in_reg) => {
2974 // We can always use `movq` here for now because stack slots are in multiples of
2975 // 8 bytes
2976 dynasm!(self.asm
2977 ; movq [rsp + out_offset], Rx(in_reg)
2978 );
2979 }
2980 }
2981 }
2982 (ValueLocation::Immediate(i), CCLoc::Stack(out_offset)) => {
2983 // TODO: Floats
2984 let i = i.as_bytes();
2985 let out_offset = self.adjusted_offset(out_offset);
2986 if let Some(scratch) = self.take_reg(I64) {
2987 dynasm!(self.asm
2988 ; mov Rq(scratch.rq().unwrap()), QWORD i
2989 ; mov [rsp + out_offset], Rq(scratch.rq().unwrap())
2990 );
2991
2992 self.block_state.regs.release(scratch)?;
2993 } else {
2994 dynasm!(self.asm
2995 ; push rax
2996 ; mov rax, QWORD i
2997 ; mov [rsp + out_offset + WORD_SIZE as i32], rax
2998 ; pop rax
2999 );
3000 }
3001 }
3002 (ValueLocation::Stack(in_offset), CCLoc::Reg(out_reg)) => {
3003 let in_offset = self.adjusted_offset(in_offset);
3004 match out_reg {
3005 GPR::Rq(out_reg) => {
3006 // We can always use `Rq` here for now because stack slots are in multiples of
3007 // 8 bytes
3008 dynasm!(self.asm
3009 ; mov Rq(out_reg), [rsp + in_offset]
3010 );
3011 }
3012 GPR::Rx(out_reg) => {
3013 // We can always use `movq` here for now because stack slots are in multiples of
3014 // 8 bytes
3015 dynasm!(self.asm
3016 ; movq Rx(out_reg), [rsp + in_offset]
3017 );
3018 }
3019 }
3020 }
3021 (ValueLocation::Reg(in_reg), CCLoc::Reg(out_reg)) => {
3022 if in_reg != out_reg {
3023 match (in_reg, out_reg) {
3024 (GPR::Rq(in_reg), GPR::Rq(out_reg)) => {
3025 dynasm!(self.asm
3026 ; mov Rq(out_reg), Rq(in_reg)
3027 );
3028 }
3029 (GPR::Rx(in_reg), GPR::Rq(out_reg)) => {
3030 dynasm!(self.asm
3031 ; movq Rq(out_reg), Rx(in_reg)
3032 );
3033 }
3034 (GPR::Rq(in_reg), GPR::Rx(out_reg)) => {
3035 dynasm!(self.asm
3036 ; movq Rx(out_reg), Rq(in_reg)
3037 );
3038 }
3039 (GPR::Rx(in_reg), GPR::Rx(out_reg)) => {
3040 dynasm!(self.asm
3041 ; movapd Rx(out_reg), Rx(in_reg)
3042 );
3043 }
3044 }
3045 }
3046 }
3047 (ValueLocation::Immediate(i), CCLoc::Reg(out_reg)) => {
3048 // TODO: Floats
3049 self.immediate_to_reg(out_reg, i)?;
3050 }
3051 }
3052 Ok(())
3053 }
3054
3055 /// Define the given label at the current position.
3056 ///
3057 /// Multiple labels can be defined at the same position. However, a label
3058 /// can be defined only once.
define_label(&mut self, label: Label)3059 pub fn define_label(&mut self, label: Label) {
3060 self.asm.dynamic_label(label.0);
3061 }
3062
set_state(&mut self, state: VirtualCallingConvention) -> Result<(), Error>3063 pub fn set_state(&mut self, state: VirtualCallingConvention) -> Result<(), Error> {
3064 self.block_state.regs = Registers::new();
3065 self.block_state.regs.release_scratch_register()?;
3066 for elem in &state.stack {
3067 if let ValueLocation::Reg(r) = elem {
3068 self.block_state.regs.mark_used(*r);
3069 }
3070 }
3071 self.block_state.stack = state.stack;
3072 self.block_state.depth = state.depth;
3073 Ok(())
3074 }
3075
apply_cc( &mut self, cc: BlockCallingConvention<impl IntoIterator<Item = CCLoc>>, ) -> Result<(), Error>3076 pub fn apply_cc(
3077 &mut self,
3078 cc: BlockCallingConvention<impl IntoIterator<Item = CCLoc>>,
3079 ) -> Result<(), Error> {
3080 let stack = cc.arguments.into_iter();
3081
3082 self.block_state.stack = Vec::with_capacity(stack.size_hint().0);
3083 self.block_state.regs = Registers::new();
3084 self.block_state.regs.release_scratch_register()?;
3085
3086 for elem in stack {
3087 if let CCLoc::Reg(r) = elem {
3088 self.block_state.regs.mark_used(r);
3089 }
3090
3091 self.block_state.stack.push(elem.into());
3092 }
3093
3094 self.block_state.depth = cc.stack_depth;
3095 Ok(())
3096 }
3097
3098 load!(i32_load, GPRType::Rq, Rd, movd, mov, DWORD);
3099 load!(i64_load, GPRType::Rq, Rq, movq, mov, QWORD);
3100 load!(f32_load, GPRType::Rx, Rd, movd, mov, DWORD);
3101 load!(f64_load, GPRType::Rx, Rq, movq, mov, QWORD);
3102
3103 load!(i32_load8_u, GPRType::Rq, Rd, NONE, movzx, BYTE);
3104 load!(i32_load8_s, GPRType::Rq, Rd, NONE, movsx, BYTE);
3105 load!(i32_load16_u, GPRType::Rq, Rd, NONE, movzx, WORD);
3106 load!(i32_load16_s, GPRType::Rq, Rd, NONE, movsx, WORD);
3107
3108 load!(i64_load8_u, GPRType::Rq, Rq, NONE, movzx, BYTE);
3109 load!(i64_load8_s, GPRType::Rq, Rq, NONE, movsx, BYTE);
3110 load!(i64_load16_u, GPRType::Rq, Rq, NONE, movzx, WORD);
3111 load!(i64_load16_s, GPRType::Rq, Rq, NONE, movsx, WORD);
3112 load!(i64_load32_u, GPRType::Rq, Rd, movd, mov, DWORD);
3113 load!(i64_load32_s, GPRType::Rq, Rq, NONE, movsxd, DWORD);
3114
3115 store!(store8, Rb, NONE, DWORD);
3116 store!(store16, Rw, NONE, QWORD);
3117 store!(store32, Rd, movd, DWORD);
3118 store!(store64, Rq, movq, QWORD);
3119
push_physical(&mut self, mut value: ValueLocation) -> Result<ValueLocation, Error>3120 fn push_physical(&mut self, mut value: ValueLocation) -> Result<ValueLocation, Error> {
3121 let out_offset = -(self.block_state.depth.0 as i32 + 1);
3122 match value {
3123 ValueLocation::Reg(_) | ValueLocation::Immediate(_) | ValueLocation::Cond(_) => {
3124 if let Some(gpr) = self.put_into_register(GPRType::Rq, &mut value)? {
3125 dynasm!(self.asm
3126 ; push Rq(gpr.rq().unwrap())
3127 );
3128 } else {
3129 dynasm!(self.asm
3130 ; push rax
3131 );
3132
3133 self.copy_value(value, CCLoc::Stack(out_offset))?;
3134 }
3135
3136 self.free_value(value)?;
3137 }
3138 ValueLocation::Stack(o) => {
3139 let offset = self.adjusted_offset(o);
3140 dynasm!(self.asm
3141 ; push QWORD [rsp + offset]
3142 );
3143 }
3144 }
3145
3146 self.block_state.depth.reserve(1);
3147
3148 Ok(ValueLocation::Stack(out_offset))
3149 }
3150
push(&mut self, value: ValueLocation) -> Result<(), Error>3151 fn push(&mut self, value: ValueLocation) -> Result<(), Error> {
3152 if let Some(mut top) = self.block_state.stack.pop() {
3153 if let ValueLocation::Cond(_) = top {
3154 match self.put_into_register(I32, &mut top) {
3155 Err(e) => return Err(e),
3156 Ok(o) => {
3157 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3158 }
3159 };
3160 }
3161
3162 self.block_state.stack.push(top);
3163 }
3164
3165 self.block_state.stack.push(value);
3166 Ok(())
3167 }
3168
pop(&mut self) -> Result<ValueLocation, Error>3169 fn pop(&mut self) -> Result<ValueLocation, Error> {
3170 match self.block_state.stack.pop() {
3171 Some(v) => Ok(v),
3172 None => Err(Error::Microwasm(
3173 "Stack is empty - pop impossible".to_string(),
3174 )),
3175 }
3176 }
3177
drop(&mut self, range: RangeInclusive<u32>) -> Result<(), Error>3178 pub fn drop(&mut self, range: RangeInclusive<u32>) -> Result<(), Error> {
3179 let mut repush = Vec::with_capacity(*range.start() as _);
3180
3181 for _ in 0..*range.start() {
3182 let v = self.pop()?;
3183 repush.push(v);
3184 }
3185
3186 for _ in range {
3187 let val = self.pop()?;
3188 self.free_value(val)?;
3189 }
3190
3191 for v in repush.into_iter().rev() {
3192 self.push(v)?;
3193 }
3194 Ok(())
3195 }
3196
pop_into(&mut self, dst: CCLoc) -> Result<(), Error>3197 fn pop_into(&mut self, dst: CCLoc) -> Result<(), Error> {
3198 let val = self.pop()?;
3199 self.copy_value(val, dst)?;
3200 self.free_value(val)?;
3201 Ok(())
3202 }
3203
free_value(&mut self, val: ValueLocation) -> Result<(), Error>3204 fn free_value(&mut self, val: ValueLocation) -> Result<(), Error> {
3205 if let ValueLocation::Reg(r) = val {
3206 self.block_state.regs.release(r)?;
3207 }
3208 Ok(())
3209 }
3210
3211 /// Puts this value into a register so that it can be efficiently read
put_into_register( &mut self, ty: impl Into<Option<GPRType>>, val: &mut ValueLocation, ) -> Result<Option<GPR>, Error>3212 fn put_into_register(
3213 &mut self,
3214 ty: impl Into<Option<GPRType>>,
3215 val: &mut ValueLocation,
3216 ) -> Result<Option<GPR>, Error> {
3217 if let Some(out) = self.clone_to_register(ty, *val)? {
3218 self.free_value(*val)?;
3219 *val = ValueLocation::Reg(out);
3220 Ok(Some(out))
3221 } else {
3222 Ok(None)
3223 }
3224 }
3225
3226 /// Clones this value into a register so that it can be efficiently read
clone_to_register( &mut self, ty: impl Into<Option<GPRType>>, val: ValueLocation, ) -> Result<Option<GPR>, Error>3227 fn clone_to_register(
3228 &mut self,
3229 ty: impl Into<Option<GPRType>>,
3230 val: ValueLocation,
3231 ) -> Result<Option<GPR>, Error> {
3232 let ty = ty.into();
3233 match val {
3234 ValueLocation::Reg(r) if ty.map(|t| t == r.type_()).unwrap_or(true) => {
3235 self.block_state.regs.mark_used(r);
3236 Ok(Some(r))
3237 }
3238 val => match self.take_reg(ty.unwrap_or(GPRType::Rq)) {
3239 Some(scratch) => {
3240 self.copy_value(val, CCLoc::Reg(scratch))?;
3241 Ok(Some(scratch))
3242 }
3243 None => Ok(None),
3244 },
3245 }
3246 }
3247
3248 /// Puts this value into a temporary register so that operations
3249 /// on that register don't write to a local.
put_into_temp_register( &mut self, ty: impl Into<Option<GPRType>>, val: &mut ValueLocation, ) -> Result<Option<GPR>, Error>3250 fn put_into_temp_register(
3251 &mut self,
3252 ty: impl Into<Option<GPRType>>,
3253 val: &mut ValueLocation,
3254 ) -> Result<Option<GPR>, Error> {
3255 let out = self.clone_to_temp_register(ty, *val)?;
3256 if let Some(o) = out {
3257 self.free_value(*val)?;
3258 *val = ValueLocation::Reg(o);
3259 Ok(Some(o))
3260 } else {
3261 Ok(None)
3262 }
3263 }
3264
put_into_temp_location( &mut self, ty: impl Into<Option<GPRType>>, val: &mut ValueLocation, ) -> Result<CCLoc, Error>3265 fn put_into_temp_location(
3266 &mut self,
3267 ty: impl Into<Option<GPRType>>,
3268 val: &mut ValueLocation,
3269 ) -> Result<CCLoc, Error> {
3270 if let Some(gpr) = self.put_into_temp_register(ty, val)? {
3271 Ok(CCLoc::Reg(gpr))
3272 } else {
3273 let out = CCLoc::Stack(self.push_physical(*val)?.stack().unwrap());
3274 *val = out.into();
3275 Ok(out)
3276 }
3277 }
3278
3279 /// Clones this value into a temporary register so that operations
3280 /// on that register don't write to a local.
3281
clone_to_temp_register( &mut self, ty: impl Into<Option<GPRType>>, val: ValueLocation, ) -> Result<Option<GPR>, Error>3282 fn clone_to_temp_register(
3283 &mut self,
3284 ty: impl Into<Option<GPRType>>,
3285 val: ValueLocation,
3286 ) -> Result<Option<GPR>, Error> {
3287 // If we have `None` as the type then it always matches (`.unwrap_or(true)`)
3288 match val {
3289 ValueLocation::Reg(r) => {
3290 let ty = ty.into();
3291 let type_matches = ty.map(|t| t == r.type_()).unwrap_or(true);
3292
3293 if self.block_state.regs.num_usages(r) <= 1 && type_matches {
3294 self.block_state.regs.mark_used(r);
3295 Ok(Some(r))
3296 } else if let Some(scratch) = self.take_reg(ty.unwrap_or(GPRType::Rq)) {
3297 self.copy_value(val, CCLoc::Reg(scratch))?;
3298 Ok(Some(scratch))
3299 } else {
3300 Ok(None)
3301 }
3302 }
3303 val => self.clone_to_register(ty, val),
3304 }
3305 }
3306
f32_neg(&mut self) -> Result<(), Error>3307 pub fn f32_neg(&mut self) -> Result<(), Error> {
3308 let mut val = self.pop()?;
3309
3310 let out = if let Some(i) = val.imm_f32() {
3311 ValueLocation::Immediate(
3312 Ieee32::from_bits((-f32::from_bits(i.to_bits())).to_bits()).into(),
3313 )
3314 } else {
3315 let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3316 Err(e) => return Err(e),
3317 Ok(o) => {
3318 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3319 }
3320 };
3321 let const_label = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32));
3322
3323 dynasm!(self.asm
3324 ; xorps Rx(reg.rx().unwrap()), [=>const_label.0]
3325 );
3326
3327 val
3328 };
3329
3330 self.push(out)?;
3331 Ok(())
3332 }
3333
f64_neg(&mut self) -> Result<(), Error>3334 pub fn f64_neg(&mut self) -> Result<(), Error> {
3335 let mut val = self.pop()?;
3336
3337 let out = if let Some(i) = val.imm_f64() {
3338 ValueLocation::Immediate(
3339 Ieee64::from_bits((-f64::from_bits(i.to_bits())).to_bits()).into(),
3340 )
3341 } else {
3342 let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3343 Err(e) => return Err(e),
3344 Ok(o) => {
3345 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3346 }
3347 };
3348 let const_label = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
3349
3350 dynasm!(self.asm
3351 ; xorpd Rx(reg.rx().unwrap()), [=>const_label.0]
3352 );
3353
3354 val
3355 };
3356
3357 self.push(out)?;
3358 Ok(())
3359 }
3360
f32_abs(&mut self) -> Result<(), Error>3361 pub fn f32_abs(&mut self) -> Result<(), Error> {
3362 let mut val = self.pop()?;
3363
3364 let out = if let Some(i) = val.imm_f32() {
3365 ValueLocation::Immediate(
3366 Ieee32::from_bits(f32::from_bits(i.to_bits()).abs().to_bits()).into(),
3367 )
3368 } else {
3369 let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3370 Err(e) => return Err(e),
3371 Ok(o) => {
3372 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3373 }
3374 };
3375 let const_label = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32));
3376
3377 dynasm!(self.asm
3378 ; andps Rx(reg.rx().unwrap()), [=>const_label.0]
3379 );
3380
3381 val
3382 };
3383
3384 self.push(out)?;
3385 Ok(())
3386 }
3387
f64_abs(&mut self) -> Result<(), Error>3388 pub fn f64_abs(&mut self) -> Result<(), Error> {
3389 let mut val = self.pop()?;
3390
3391 let out = if let Some(i) = val.imm_f64() {
3392 ValueLocation::Immediate(
3393 Ieee64::from_bits(f64::from_bits(i.to_bits()).abs().to_bits()).into(),
3394 )
3395 } else {
3396 let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3397 Err(e) => return Err(e),
3398 Ok(o) => {
3399 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3400 }
3401 };
3402
3403 let const_label = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64));
3404
3405 dynasm!(self.asm
3406 ; andps Rx(reg.rx().unwrap()), [=>const_label.0]
3407 );
3408
3409 val
3410 };
3411
3412 self.push(out)?;
3413 Ok(())
3414 }
3415
f32_sqrt(&mut self) -> Result<(), Error>3416 pub fn f32_sqrt(&mut self) -> Result<(), Error> {
3417 let mut val = self.pop()?;
3418
3419 let out = if let Some(i) = val.imm_f32() {
3420 ValueLocation::Immediate(
3421 Ieee32::from_bits(f32::from_bits(i.to_bits()).sqrt().to_bits()).into(),
3422 )
3423 } else {
3424 let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3425 Err(e) => return Err(e),
3426 Ok(o) => {
3427 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3428 }
3429 };
3430
3431 dynasm!(self.asm
3432 ; sqrtss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
3433 );
3434
3435 val
3436 };
3437
3438 self.push(out)?;
3439 Ok(())
3440 }
3441
f64_sqrt(&mut self) -> Result<(), Error>3442 pub fn f64_sqrt(&mut self) -> Result<(), Error> {
3443 let mut val = self.pop()?;
3444
3445 let out = if let Some(i) = val.imm_f64() {
3446 ValueLocation::Immediate(
3447 Ieee64::from_bits(f64::from_bits(i.to_bits()).sqrt().to_bits()).into(),
3448 )
3449 } else {
3450 let reg = match self.put_into_temp_register(GPRType::Rx, &mut val) {
3451 Err(e) => return Err(e),
3452 Ok(o) => {
3453 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3454 }
3455 };
3456
3457 dynasm!(self.asm
3458 ; sqrtsd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
3459 );
3460
3461 ValueLocation::Reg(reg)
3462 };
3463
3464 self.push(out)?;
3465 Ok(())
3466 }
3467
f32_copysign(&mut self) -> Result<(), Error>3468 pub fn f32_copysign(&mut self) -> Result<(), Error> {
3469 let mut right = self.pop()?;
3470 let mut left = self.pop()?;
3471
3472 let out = if let (Some(left), Some(right)) = (left.imm_f32(), right.imm_f32()) {
3473 ValueLocation::Immediate(
3474 Ieee32::from_bits(
3475 (left.to_bits() & REST_MASK_F32) | (right.to_bits() & SIGN_MASK_F32),
3476 )
3477 .into(),
3478 )
3479 } else {
3480 let lreg = match self.put_into_temp_register(GPRType::Rx, &mut left) {
3481 Err(e) => return Err(e),
3482 Ok(o) => {
3483 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3484 }
3485 };
3486 let rreg = match self.put_into_register(GPRType::Rx, &mut right) {
3487 Err(e) => return Err(e),
3488 Ok(o) => {
3489 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3490 }
3491 };
3492
3493 let sign_mask = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32));
3494 let rest_mask = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32));
3495
3496 dynasm!(self.asm
3497 ; andps Rx(rreg.rx().unwrap()), [=>sign_mask.0]
3498 ; andps Rx(lreg.rx().unwrap()), [=>rest_mask.0]
3499 ; orps Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
3500 );
3501
3502 self.free_value(right)?;
3503
3504 left
3505 };
3506
3507 self.push(out)?;
3508 Ok(())
3509 }
3510
f64_copysign(&mut self) -> Result<(), Error>3511 pub fn f64_copysign(&mut self) -> Result<(), Error> {
3512 let mut right = self.pop()?;
3513 let mut left = self.pop()?;
3514
3515 let out = if let (Some(left), Some(right)) = (left.imm_f64(), right.imm_f64()) {
3516 ValueLocation::Immediate(
3517 Ieee64::from_bits(
3518 (left.to_bits() & REST_MASK_F64) | (right.to_bits() & SIGN_MASK_F64),
3519 )
3520 .into(),
3521 )
3522 } else {
3523 let lreg = match self.put_into_temp_register(GPRType::Rx, &mut left) {
3524 Err(e) => return Err(e),
3525 Ok(o) => {
3526 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3527 }
3528 };
3529 let rreg = match self.put_into_register(GPRType::Rx, &mut right) {
3530 Err(e) => return Err(e),
3531 Ok(o) => {
3532 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3533 }
3534 };
3535
3536 let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
3537 let rest_mask = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64));
3538
3539 dynasm!(self.asm
3540 ; andpd Rx(rreg.rx().unwrap()), [=>sign_mask.0]
3541 ; andpd Rx(lreg.rx().unwrap()), [=>rest_mask.0]
3542 ; orpd Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
3543 );
3544
3545 self.free_value(right)?;
3546
3547 left
3548 };
3549
3550 self.push(out)?;
3551 Ok(())
3552 }
3553
i32_clz(&mut self) -> Result<(), Error>3554 pub fn i32_clz(&mut self) -> Result<(), Error> {
3555 let mut val = self.pop()?;
3556
3557 let out_val = match val {
3558 ValueLocation::Immediate(imm) => {
3559 ValueLocation::Immediate(imm.as_i32().unwrap().leading_zeros().into())
3560 }
3561 ValueLocation::Stack(offset) => {
3562 let offset = self.adjusted_offset(offset);
3563 let temp = self.take_reg(I32).unwrap();
3564
3565 if is_x86_feature_detected!("lzcnt") {
3566 dynasm!(self.asm
3567 ; lzcnt Rd(temp.rq().unwrap()), [rsp + offset]
3568 );
3569 ValueLocation::Reg(temp)
3570 } else {
3571 let temp_2 = self.take_reg(I32).unwrap();
3572
3573 dynasm!(self.asm
3574 ; bsr Rd(temp.rq().unwrap()), [rsp + offset]
3575 ; mov Rd(temp_2.rq().unwrap()), DWORD 0x3fu64 as _
3576 ; cmove Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap())
3577 ; mov Rd(temp_2.rq().unwrap()), DWORD 0x1fu64 as _
3578 ; xor Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap())
3579 );
3580 self.free_value(ValueLocation::Reg(temp_2))?;
3581 ValueLocation::Reg(temp)
3582 }
3583 }
3584 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
3585 let reg = match self.put_into_register(GPRType::Rq, &mut val) {
3586 Err(e) => return Err(e),
3587 Ok(o) => {
3588 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3589 }
3590 };
3591
3592 let temp = self.take_reg(I32).unwrap();
3593
3594 if is_x86_feature_detected!("lzcnt") {
3595 dynasm!(self.asm
3596 ; lzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3597 );
3598 ValueLocation::Reg(temp)
3599 } else {
3600 dynasm!(self.asm
3601 ; bsr Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3602 ; mov Rd(reg.rq().unwrap()), DWORD 0x3fu64 as _
3603 ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3604 ; mov Rd(reg.rq().unwrap()), DWORD 0x1fu64 as _
3605 ; xor Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3606 );
3607 ValueLocation::Reg(temp)
3608 }
3609 }
3610 };
3611
3612 self.free_value(val)?;
3613 self.push(out_val)?;
3614 Ok(())
3615 }
3616
i64_clz(&mut self) -> Result<(), Error>3617 pub fn i64_clz(&mut self) -> Result<(), Error> {
3618 let mut val = self.pop()?;
3619
3620 let out_val = match val {
3621 ValueLocation::Immediate(imm) => {
3622 ValueLocation::Immediate((imm.as_i64().unwrap().leading_zeros() as u64).into())
3623 }
3624 ValueLocation::Stack(offset) => {
3625 let offset = self.adjusted_offset(offset);
3626 let temp = self.take_reg(I64).unwrap();
3627
3628 if is_x86_feature_detected!("lzcnt") {
3629 dynasm!(self.asm
3630 ; lzcnt Rq(temp.rq().unwrap()), [rsp + offset]
3631 );
3632 ValueLocation::Reg(temp)
3633 } else {
3634 let temp_2 = self.take_reg(I64).unwrap();
3635
3636 dynasm!(self.asm
3637 ; bsr Rq(temp.rq().unwrap()), [rsp + offset]
3638 ; mov Rq(temp_2.rq().unwrap()), QWORD 0x7fu64 as _
3639 ; cmove Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap())
3640 ; mov Rq(temp_2.rq().unwrap()), QWORD 0x3fu64 as _
3641 ; xor Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap())
3642 );
3643 self.free_value(ValueLocation::Reg(temp_2))?;
3644 ValueLocation::Reg(temp)
3645 }
3646 }
3647 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
3648 let reg = match self.put_into_register(GPRType::Rq, &mut val) {
3649 Err(e) => return Err(e),
3650 Ok(o) => {
3651 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3652 }
3653 };
3654 let temp = self.take_reg(I64).unwrap();
3655
3656 if is_x86_feature_detected!("lzcnt") {
3657 dynasm!(self.asm
3658 ; lzcnt Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3659 );
3660 ValueLocation::Reg(temp)
3661 } else {
3662 dynasm!(self.asm
3663 ; bsr Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3664 ; mov Rq(reg.rq().unwrap()), QWORD 0x7fu64 as _
3665 ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3666 ; mov Rq(reg.rq().unwrap()), QWORD 0x3fu64 as _
3667 ; xor Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3668 );
3669 ValueLocation::Reg(temp)
3670 }
3671 }
3672 };
3673
3674 self.free_value(val)?;
3675 self.push(out_val)?;
3676 Ok(())
3677 }
3678
i32_ctz(&mut self) -> Result<(), Error>3679 pub fn i32_ctz(&mut self) -> Result<(), Error> {
3680 let mut val = self.pop()?;
3681
3682 let out_val = match val {
3683 ValueLocation::Immediate(imm) => {
3684 ValueLocation::Immediate(imm.as_i32().unwrap().trailing_zeros().into())
3685 }
3686 ValueLocation::Stack(offset) => {
3687 let offset = self.adjusted_offset(offset);
3688 let temp = self.take_reg(I32).unwrap();
3689
3690 if is_x86_feature_detected!("lzcnt") {
3691 dynasm!(self.asm
3692 ; tzcnt Rd(temp.rq().unwrap()), [rsp + offset]
3693 );
3694 ValueLocation::Reg(temp)
3695 } else {
3696 let temp_zero_val = self.take_reg(I32).unwrap();
3697
3698 dynasm!(self.asm
3699 ; bsf Rd(temp.rq().unwrap()), [rsp + offset]
3700 ; mov Rd(temp_zero_val.rq().unwrap()), DWORD 0x20u32 as _
3701 ; cmove Rd(temp.rq().unwrap()), Rd(temp_zero_val.rq().unwrap())
3702 );
3703 self.free_value(ValueLocation::Reg(temp_zero_val))?;
3704 ValueLocation::Reg(temp)
3705 }
3706 }
3707 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
3708 let reg = match self.put_into_register(GPRType::Rq, &mut val) {
3709 Err(e) => return Err(e),
3710 Ok(o) => {
3711 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3712 }
3713 };
3714 let temp = self.take_reg(I32).unwrap();
3715
3716 if is_x86_feature_detected!("lzcnt") {
3717 dynasm!(self.asm
3718 ; tzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3719 );
3720 ValueLocation::Reg(temp)
3721 } else {
3722 dynasm!(self.asm
3723 ; bsf Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3724 ; mov Rd(reg.rq().unwrap()), DWORD 0x20u32 as _
3725 ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
3726 );
3727 ValueLocation::Reg(temp)
3728 }
3729 }
3730 };
3731
3732 self.free_value(val)?;
3733 self.push(out_val)?;
3734 Ok(())
3735 }
3736
i64_ctz(&mut self) -> Result<(), Error>3737 pub fn i64_ctz(&mut self) -> Result<(), Error> {
3738 let mut val = self.pop()?;
3739
3740 let out_val = match val {
3741 ValueLocation::Immediate(imm) => {
3742 ValueLocation::Immediate((imm.as_i64().unwrap().trailing_zeros() as u64).into())
3743 }
3744 ValueLocation::Stack(offset) => {
3745 let offset = self.adjusted_offset(offset);
3746 let temp = self.take_reg(I64).unwrap();
3747
3748 if is_x86_feature_detected!("lzcnt") {
3749 dynasm!(self.asm
3750 ; tzcnt Rq(temp.rq().unwrap()), [rsp + offset]
3751 );
3752 ValueLocation::Reg(temp)
3753 } else {
3754 let temp_zero_val = self.take_reg(I64).unwrap();
3755
3756 dynasm!(self.asm
3757 ; bsf Rq(temp.rq().unwrap()), [rsp + offset]
3758 ; mov Rq(temp_zero_val.rq().unwrap()), QWORD 0x40u64 as _
3759 ; cmove Rq(temp.rq().unwrap()), Rq(temp_zero_val.rq().unwrap())
3760 );
3761 self.free_value(ValueLocation::Reg(temp_zero_val))?;
3762 ValueLocation::Reg(temp)
3763 }
3764 }
3765 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
3766 let reg = match self.put_into_register(GPRType::Rq, &mut val) {
3767 Err(e) => return Err(e),
3768 Ok(o) => {
3769 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3770 }
3771 };
3772 let temp = self.take_reg(I64).unwrap();
3773
3774 dynasm!(self.asm
3775 ; bsf Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3776 ; mov Rq(reg.rq().unwrap()), QWORD 0x40u64 as _
3777 ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
3778 );
3779 ValueLocation::Reg(temp)
3780 }
3781 };
3782
3783 self.free_value(val)?;
3784 self.push(out_val)?;
3785 Ok(())
3786 }
3787
i32_extend_u(&mut self) -> Result<(), Error>3788 pub fn i32_extend_u(&mut self) -> Result<(), Error> {
3789 let val = self.pop()?;
3790
3791 let out = if let ValueLocation::Immediate(imm) = val {
3792 ValueLocation::Immediate((imm.as_i32().unwrap() as u32 as u64).into())
3793 } else {
3794 let new_reg = self.take_reg(I64).unwrap();
3795
3796 // TODO: Track set-ness of bits - we can make this a no-op in most cases
3797 // but we have to make this unconditional just in case this value
3798 // came from a truncate.
3799 match val {
3800 ValueLocation::Reg(GPR::Rx(rxreg)) => {
3801 dynasm!(self.asm
3802 ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg)
3803 );
3804 }
3805 ValueLocation::Reg(GPR::Rq(rqreg)) => {
3806 dynasm!(self.asm
3807 ; mov Rd(new_reg.rq().unwrap()), Rd(rqreg)
3808 );
3809 }
3810 ValueLocation::Stack(offset) => {
3811 let offset = self.adjusted_offset(offset);
3812
3813 dynasm!(self.asm
3814 ; mov Rd(new_reg.rq().unwrap()), [rsp + offset]
3815 );
3816 }
3817 ValueLocation::Cond(_) => self.copy_value(val, CCLoc::Reg(new_reg))?,
3818 ValueLocation::Immediate(_) => {
3819 return Err(Error::Microwasm(
3820 "i32_extend_u unreachable code".to_string(),
3821 ))
3822 }
3823 }
3824
3825 ValueLocation::Reg(new_reg)
3826 };
3827
3828 self.free_value(val)?;
3829
3830 self.push(out)?;
3831 Ok(())
3832 }
3833
i32_extend_s(&mut self) -> Result<(), Error>3834 pub fn i32_extend_s(&mut self) -> Result<(), Error> {
3835 let val = self.pop()?;
3836
3837 self.free_value(val)?;
3838 let new_reg = self.take_reg(I64).unwrap();
3839
3840 let out = match val {
3841 ValueLocation::Reg(GPR::Rx(rxreg)) => {
3842 dynasm!(self.asm
3843 ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg)
3844 ; movsxd Rq(new_reg.rq().unwrap()), Rd(new_reg.rq().unwrap())
3845 );
3846
3847 ValueLocation::Reg(new_reg)
3848 }
3849 ValueLocation::Reg(GPR::Rq(rqreg)) => {
3850 dynasm!(self.asm
3851 ; movsxd Rq(new_reg.rq().unwrap()), Rd(rqreg)
3852 );
3853
3854 ValueLocation::Reg(new_reg)
3855 }
3856 ValueLocation::Stack(offset) => {
3857 let offset = self.adjusted_offset(offset);
3858
3859 dynasm!(self.asm
3860 ; movsxd Rq(new_reg.rq().unwrap()), DWORD [rsp + offset]
3861 );
3862
3863 ValueLocation::Reg(new_reg)
3864 }
3865 // `CondCode` can only be 0 or 1, so sign-extension is always the same as
3866 // zero-extension
3867 val @ ValueLocation::Cond(_) => {
3868 self.copy_value(val, CCLoc::Reg(new_reg))?;
3869
3870 ValueLocation::Reg(new_reg)
3871 }
3872 ValueLocation::Immediate(imm) => {
3873 self.block_state.regs.release(new_reg)?;
3874
3875 ValueLocation::Immediate((imm.as_i32().unwrap() as i64).into())
3876 }
3877 };
3878
3879 self.push(out)?;
3880 Ok(())
3881 }
3882
3883 unop!(i32_popcnt, popcnt, Rd, u32, u32::count_ones);
3884 conversion!(
3885 f64_from_f32,
3886 cvtss2sd,
3887 Rx,
3888 rx,
3889 Rx,
3890 rx,
3891 f32,
3892 f64,
3893 as_f32,
3894 |a: Ieee32| Ieee64::from_bits((f32::from_bits(a.to_bits()) as f64).to_bits())
3895 );
3896 conversion!(
3897 f32_from_f64,
3898 cvtsd2ss,
3899 Rx,
3900 rx,
3901 Rx,
3902 rx,
3903 f64,
3904 f32,
3905 as_f64,
3906 |a: Ieee64| Ieee32::from_bits((f64::from_bits(a.to_bits()) as f32).to_bits())
3907 );
3908
i32_truncate_f32_s(&mut self) -> Result<(), Error>3909 pub fn i32_truncate_f32_s(&mut self) -> Result<(), Error> {
3910 let mut val = self.pop()?;
3911
3912 let out_val = match val {
3913 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
3914 (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(),
3915 ),
3916 _ => {
3917 let reg = match self.put_into_register(F32, &mut val) {
3918 Err(e) => return Err(e),
3919 Ok(o) => {
3920 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3921 }
3922 };
3923 let temp = self.take_reg(I32).unwrap();
3924
3925 let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
3926 let float_cmp_mask =
3927 self.aligned_label(16, LabelValue::I32(0xCF00_0000_u32 as i32));
3928 let zero = self.aligned_label(16, LabelValue::I32(0));
3929
3930 dynasm!(self.asm
3931 ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
3932 ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0]
3933 ; jne >ret
3934 ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
3935 ; jp >trap
3936 ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
3937 ; jnae >trap
3938 ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0]
3939 ; jb >ret
3940 ; trap:
3941 ;; self.trap(TrapCode::BadConversionToInteger)
3942 ; ret:
3943 );
3944
3945 ValueLocation::Reg(temp)
3946 }
3947 };
3948
3949 self.free_value(val)?;
3950
3951 self.push(out_val)?;
3952 Ok(())
3953 }
3954
i32_truncate_f32_u(&mut self) -> Result<(), Error>3955 pub fn i32_truncate_f32_u(&mut self) -> Result<(), Error> {
3956 let mut val = self.pop()?;
3957
3958 let out_val = match val {
3959 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
3960 (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(),
3961 ),
3962 _ => {
3963 let reg = match self.put_into_temp_register(F32, &mut val) {
3964 Err(e) => return Err(e),
3965 Ok(o) => {
3966 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
3967 }
3968 };
3969
3970 let temp = self.take_reg(I32).unwrap();
3971
3972 let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
3973 let float_cmp_mask =
3974 self.aligned_label(16, LabelValue::I32(0x4F00_0000_u32 as i32));
3975
3976 dynasm!(self.asm
3977 ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
3978 ; jae >else_
3979 ; jp >trap
3980 ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
3981 ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
3982 ; js >trap
3983 ; jmp >ret
3984 ; else_:
3985 ; subss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
3986 ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
3987 ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
3988 ; js >trap
3989 ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
3990 ; jmp >ret
3991 ; trap:
3992 ;; self.trap(TrapCode::BadConversionToInteger)
3993 ; ret:
3994 );
3995
3996 ValueLocation::Reg(temp)
3997 }
3998 };
3999
4000 self.free_value(val)?;
4001
4002 self.push(out_val)?;
4003 Ok(())
4004 }
4005
i32_truncate_f64_s(&mut self) -> Result<(), Error>4006 pub fn i32_truncate_f64_s(&mut self) -> Result<(), Error> {
4007 let mut val = self.pop()?;
4008
4009 let out_val = match val {
4010 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4011 (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i32).into(),
4012 ),
4013 _ => {
4014 let reg = match self.put_into_register(F32, &mut val) {
4015 Err(e) => return Err(e),
4016 Ok(o) => {
4017 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4018 }
4019 };
4020
4021 let temp = self.take_reg(I32).unwrap();
4022
4023 let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
4024 let float_cmp_mask =
4025 self.aligned_label(16, LabelValue::I64(0xC1E0_0000_0020_0000_u64 as i64));
4026 let zero = self.aligned_label(16, LabelValue::I64(0));
4027
4028 dynasm!(self.asm
4029 ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4030 ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0]
4031 ; jne >ret
4032 ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
4033 ; jp >trap
4034 ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
4035 ; jna >trap
4036 ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0]
4037 ; jb >ret
4038 ; trap:
4039 ;; self.trap(TrapCode::BadConversionToInteger)
4040 ; ret:
4041 );
4042
4043 ValueLocation::Reg(temp)
4044 }
4045 };
4046
4047 self.free_value(val)?;
4048
4049 self.push(out_val)?;
4050 Ok(())
4051 }
4052
i32_truncate_f64_u(&mut self) -> Result<(), Error>4053 pub fn i32_truncate_f64_u(&mut self) -> Result<(), Error> {
4054 let mut val = self.pop()?;
4055
4056 let out_val = match val {
4057 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4058 (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u32).into(),
4059 ),
4060 _ => {
4061 let reg = match self.put_into_temp_register(F32, &mut val) {
4062 Err(e) => return Err(e),
4063 Ok(o) => {
4064 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4065 }
4066 };
4067
4068 let temp = self.take_reg(I32).unwrap();
4069
4070 let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
4071 let float_cmp_mask =
4072 self.aligned_label(16, LabelValue::I64(0x41E0_0000_0000_0000_u64 as i64));
4073
4074 dynasm!(self.asm
4075 ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
4076 ; jae >else_
4077 ; jp >trap
4078 ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4079 ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
4080 ; js >trap
4081 ; jmp >ret
4082 ; else_:
4083 ; subsd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
4084 ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4085 ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
4086 ; js >trap
4087 ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
4088 ; jmp >ret
4089 ; trap:
4090 ;; self.trap(TrapCode::BadConversionToInteger)
4091 ; ret:
4092 );
4093
4094 ValueLocation::Reg(temp)
4095 }
4096 };
4097
4098 self.free_value(val)?;
4099
4100 self.push(out_val)?;
4101 Ok(())
4102 }
4103
4104 conversion!(
4105 f32_convert_from_i32_s,
4106 cvtsi2ss,
4107 Rd,
4108 rq,
4109 Rx,
4110 rx,
4111 i32,
4112 f32,
4113 as_i32,
4114 |a| Ieee32::from_bits((a as f32).to_bits())
4115 );
4116 conversion!(
4117 f64_convert_from_i32_s,
4118 cvtsi2sd,
4119 Rd,
4120 rq,
4121 Rx,
4122 rx,
4123 i32,
4124 f64,
4125 as_i32,
4126 |a| Ieee64::from_bits((a as f64).to_bits())
4127 );
4128 conversion!(
4129 f32_convert_from_i64_s,
4130 cvtsi2ss,
4131 Rq,
4132 rq,
4133 Rx,
4134 rx,
4135 i64,
4136 f32,
4137 as_i64,
4138 |a| Ieee32::from_bits((a as f32).to_bits())
4139 );
4140 conversion!(
4141 f64_convert_from_i64_s,
4142 cvtsi2sd,
4143 Rq,
4144 rq,
4145 Rx,
4146 rx,
4147 i64,
4148 f64,
4149 as_i64,
4150 |a| Ieee64::from_bits((a as f64).to_bits())
4151 );
4152
i64_truncate_f32_s(&mut self) -> Result<(), Error>4153 pub fn i64_truncate_f32_s(&mut self) -> Result<(), Error> {
4154 let mut val = self.pop()?;
4155
4156 let out_val = match val {
4157 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4158 (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i64).into(),
4159 ),
4160 _ => {
4161 let reg = match self.put_into_temp_register(F32, &mut val) {
4162 Err(e) => return Err(e),
4163 Ok(o) => {
4164 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4165 }
4166 };
4167
4168 let temp = self.take_reg(I32).unwrap();
4169
4170 let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
4171 let float_cmp_mask =
4172 self.aligned_label(16, LabelValue::I32(0xDF00_0000_u32 as i32));
4173 let zero = self.aligned_label(16, LabelValue::I64(0));
4174
4175 dynasm!(self.asm
4176 ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4177 ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0]
4178 ; jne >ret
4179 ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
4180 ; jp >trap
4181 ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
4182 ; jnae >trap
4183 ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0]
4184 ; jb >ret
4185 ; trap:
4186 ;; self.trap(TrapCode::BadConversionToInteger)
4187 ; ret:
4188 );
4189
4190 ValueLocation::Reg(temp)
4191 }
4192 };
4193
4194 self.free_value(val)?;
4195
4196 self.push(out_val)?;
4197 Ok(())
4198 }
4199
i64_truncate_f64_s(&mut self) -> Result<(), Error>4200 pub fn i64_truncate_f64_s(&mut self) -> Result<(), Error> {
4201 let mut val = self.pop()?;
4202
4203 let out_val = match val {
4204 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4205 (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i64).into(),
4206 ),
4207 _ => {
4208 let reg = match self.put_into_register(F32, &mut val) {
4209 Err(e) => return Err(e),
4210 Ok(o) => {
4211 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4212 }
4213 };
4214
4215 let temp = self.take_reg(I32).unwrap();
4216
4217 let sign_mask = self.aligned_label(8, LabelValue::I64(SIGN_MASK_F64 as i64));
4218 let float_cmp_mask =
4219 self.aligned_label(16, LabelValue::I64(0xC3E0_0000_0000_0000_u64 as i64));
4220 let zero = self.aligned_label(16, LabelValue::I64(0));
4221
4222 dynasm!(self.asm
4223 ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4224 ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0]
4225 ; jne >ret
4226 ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
4227 ; jp >trap
4228 ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
4229 ; jnae >trap
4230 ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0]
4231 ; jb >ret
4232 ; trap:
4233 ;; self.trap(TrapCode::BadConversionToInteger)
4234 ; ret:
4235 );
4236
4237 ValueLocation::Reg(temp)
4238 }
4239 };
4240
4241 self.free_value(val)?;
4242
4243 self.push(out_val)?;
4244 Ok(())
4245 }
4246
i64_truncate_f32_u(&mut self) -> Result<(), Error>4247 pub fn i64_truncate_f32_u(&mut self) -> Result<(), Error> {
4248 let mut val = self.pop()?;
4249
4250 let out_val = match val {
4251 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4252 (f32::from_bits(imm.as_f32().unwrap().to_bits()) as u64).into(),
4253 ),
4254 _ => {
4255 let reg = match self.put_into_register(F32, &mut val) {
4256 Err(e) => return Err(e),
4257 Ok(o) => {
4258 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4259 }
4260 };
4261
4262 let temp = self.take_reg(I64).unwrap();
4263 let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
4264 let u64_trunc_f32_const = self.aligned_label(16, LabelValue::I32(0x5F00_0000_i32));
4265
4266 dynasm!(self.asm
4267 ; comiss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0]
4268 ; jae >large
4269 ; jp >trap
4270 ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4271 ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap())
4272 ; js >trap
4273 ; jmp >cont
4274 ; large:
4275 ; subss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0]
4276 ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4277 ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap())
4278 ; js >trap
4279 ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
4280 ; jmp >cont
4281 ; trap:
4282 ;; self.trap(TrapCode::BadConversionToInteger)
4283 ; cont:
4284 );
4285
4286 ValueLocation::Reg(temp)
4287 }
4288 };
4289
4290 self.free_value(val)?;
4291
4292 self.push(out_val)?;
4293 Ok(())
4294 }
4295
i64_truncate_f64_u(&mut self) -> Result<(), Error>4296 pub fn i64_truncate_f64_u(&mut self) -> Result<(), Error> {
4297 let mut val = self.pop()?;
4298
4299 let out_val = match val {
4300 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4301 (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u64).into(),
4302 ),
4303 _ => {
4304 let reg = match self.put_into_register(F64, &mut val) {
4305 Err(e) => return Err(e),
4306 Ok(o) => {
4307 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4308 }
4309 };
4310
4311 let temp = self.take_reg(I64).unwrap();
4312
4313 let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
4314 let u64_trunc_f64_const =
4315 self.aligned_label(16, LabelValue::I64(0x43E0_0000_0000_0000_i64));
4316
4317 dynasm!(self.asm
4318 ; comisd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0]
4319 ; jnb >large
4320 ; jp >trap
4321 ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4322 ; cmp Rq(temp.rq().unwrap()), 0
4323 ; jl >trap
4324 ; jmp >cont
4325 ; large:
4326 ; subsd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0]
4327 ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
4328 ; cmp Rq(temp.rq().unwrap()), 0
4329 ; jnge >trap
4330 ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
4331 ; jmp >cont
4332 ; trap:
4333 ;; self.trap(TrapCode::BadConversionToInteger)
4334 ; cont:
4335 );
4336
4337 ValueLocation::Reg(temp)
4338 }
4339 };
4340
4341 self.free_value(val)?;
4342
4343 self.push(out_val)?;
4344 Ok(())
4345 }
4346
f32_convert_from_i32_u(&mut self) -> Result<(), Error>4347 pub fn f32_convert_from_i32_u(&mut self) -> Result<(), Error> {
4348 let mut val = self.pop()?;
4349
4350 let out_val = match val {
4351 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4352 Ieee32::from_bits((imm.as_i32().unwrap() as u32 as f32).to_bits()).into(),
4353 ),
4354 _ => {
4355 let reg = match self.put_into_register(I32, &mut val) {
4356 Err(e) => return Err(e),
4357 Ok(o) => {
4358 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4359 }
4360 };
4361
4362 let temp = self.take_reg(F32).unwrap();
4363
4364 dynasm!(self.asm
4365 ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap())
4366 ; cvtsi2ss Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap())
4367 );
4368
4369 ValueLocation::Reg(temp)
4370 }
4371 };
4372
4373 self.free_value(val)?;
4374
4375 self.push(out_val)?;
4376 Ok(())
4377 }
4378
f64_convert_from_i32_u(&mut self) -> Result<(), Error>4379 pub fn f64_convert_from_i32_u(&mut self) -> Result<(), Error> {
4380 let mut val = self.pop()?;
4381
4382 let out_val = match val {
4383 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4384 Ieee64::from_bits((imm.as_i32().unwrap() as u32 as f64).to_bits()).into(),
4385 ),
4386 _ => {
4387 let reg = match self.put_into_register(I32, &mut val) {
4388 Err(e) => return Err(e),
4389 Ok(o) => {
4390 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4391 }
4392 };
4393
4394 let temp = self.take_reg(F64).unwrap();
4395
4396 dynasm!(self.asm
4397 ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap())
4398 ; cvtsi2sd Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap())
4399 );
4400
4401 ValueLocation::Reg(temp)
4402 }
4403 };
4404
4405 self.free_value(val)?;
4406
4407 self.push(out_val)?;
4408 Ok(())
4409 }
4410
f32_convert_from_i64_u(&mut self) -> Result<(), Error>4411 pub fn f32_convert_from_i64_u(&mut self) -> Result<(), Error> {
4412 let mut val = self.pop()?;
4413
4414 let out_val = match val {
4415 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4416 Ieee32::from_bits((imm.as_i64().unwrap() as u64 as f32).to_bits()).into(),
4417 ),
4418 _ => {
4419 let reg = match self.put_into_register(I64, &mut val) {
4420 Err(e) => return Err(e),
4421 Ok(o) => {
4422 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4423 }
4424 };
4425
4426 let out = self.take_reg(F32).unwrap();
4427 let temp = self.take_reg(I64).unwrap();
4428
4429 dynasm!(self.asm
4430 ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap())
4431 ; js >negative
4432 ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
4433 ; jmp >ret
4434 ; negative:
4435 ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
4436 ; shr Rq(temp.rq().unwrap()), 1
4437 ; and Rq(reg.rq().unwrap()), 1
4438 ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap())
4439 ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
4440 ; addss Rx(out.rx().unwrap()), Rx(out.rx().unwrap())
4441 ; ret:
4442 );
4443
4444 self.free_value(ValueLocation::Reg(temp))?;
4445
4446 ValueLocation::Reg(out)
4447 }
4448 };
4449
4450 self.free_value(val)?;
4451
4452 self.push(out_val)?;
4453 Ok(())
4454 }
4455
f64_convert_from_i64_u(&mut self) -> Result<(), Error>4456 pub fn f64_convert_from_i64_u(&mut self) -> Result<(), Error> {
4457 let mut val = self.pop()?;
4458
4459 let out_val = match val {
4460 ValueLocation::Immediate(imm) => ValueLocation::Immediate(
4461 Ieee64::from_bits((imm.as_i64().unwrap() as u64 as f64).to_bits()).into(),
4462 ),
4463 _ => {
4464 let reg = match self.put_into_register(I64, &mut val) {
4465 Err(e) => return Err(e),
4466 Ok(o) => {
4467 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4468 }
4469 };
4470
4471 let out = self.take_reg(F32).unwrap();
4472 let temp = self.take_reg(I64).unwrap();
4473
4474 dynasm!(self.asm
4475 ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap())
4476 ; js >negative
4477 ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
4478 ; jmp >ret
4479 ; negative:
4480 ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
4481 ; shr Rq(temp.rq().unwrap()), 1
4482 ; and Rq(reg.rq().unwrap()), 1
4483 ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap())
4484 ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
4485 ; addsd Rx(out.rx().unwrap()), Rx(out.rx().unwrap())
4486 ; ret:
4487 );
4488
4489 self.free_value(ValueLocation::Reg(temp))?;
4490
4491 ValueLocation::Reg(out)
4492 }
4493 };
4494
4495 self.free_value(val)?;
4496
4497 self.push(out_val)?;
4498 Ok(())
4499 }
4500
i32_wrap_from_i64(&mut self) -> Result<(), Error>4501 pub fn i32_wrap_from_i64(&mut self) -> Result<(), Error> {
4502 let val = self.pop()?;
4503
4504 let out = match val {
4505 ValueLocation::Immediate(imm) => {
4506 ValueLocation::Immediate((imm.as_i64().unwrap() as u64 as u32).into())
4507 }
4508 val => val,
4509 };
4510
4511 self.push(out)?;
4512 Ok(())
4513 }
4514
i32_reinterpret_from_f32(&mut self) -> Result<(), Error>4515 pub fn i32_reinterpret_from_f32(&mut self) -> Result<(), Error> {
4516 let val = self.pop()?;
4517
4518 let out = match val {
4519 ValueLocation::Immediate(imm) => {
4520 ValueLocation::Immediate(imm.as_f32().unwrap().to_bits().into())
4521 }
4522 val => val,
4523 };
4524
4525 self.push(out)?;
4526 Ok(())
4527 }
4528
i64_reinterpret_from_f64(&mut self) -> Result<(), Error>4529 pub fn i64_reinterpret_from_f64(&mut self) -> Result<(), Error> {
4530 let val = self.pop()?;
4531
4532 let out = match val {
4533 ValueLocation::Immediate(imm) => {
4534 ValueLocation::Immediate(imm.as_f64().unwrap().to_bits().into())
4535 }
4536 val => val,
4537 };
4538
4539 self.push(out)?;
4540 Ok(())
4541 }
4542
f32_reinterpret_from_i32(&mut self) -> Result<(), Error>4543 pub fn f32_reinterpret_from_i32(&mut self) -> Result<(), Error> {
4544 let val = self.pop()?;
4545
4546 let out = match val {
4547 ValueLocation::Immediate(imm) => {
4548 ValueLocation::Immediate(Ieee32::from_bits(imm.as_i32().unwrap() as _).into())
4549 }
4550 val => val,
4551 };
4552
4553 self.push(out)?;
4554 Ok(())
4555 }
4556
f64_reinterpret_from_i64(&mut self) -> Result<(), Error>4557 pub fn f64_reinterpret_from_i64(&mut self) -> Result<(), Error> {
4558 let val = self.pop()?;
4559
4560 let out = match val {
4561 ValueLocation::Immediate(imm) => {
4562 ValueLocation::Immediate(Ieee64::from_bits(imm.as_i64().unwrap() as _).into())
4563 }
4564 val => val,
4565 };
4566
4567 self.push(out)?;
4568 Ok(())
4569 }
4570
4571 unop!(i64_popcnt, popcnt, Rq, u64, |a: u64| a.count_ones() as u64);
4572
4573 // TODO: Use `lea` when the LHS operand isn't a temporary but both of the operands
4574 // are in registers.
4575 commutative_binop_i32!(i32_add, add, i32::wrapping_add);
4576 commutative_binop_i32!(i32_and, and, |a, b| a & b);
4577 commutative_binop_i32!(i32_or, or, |a, b| a | b);
4578 commutative_binop_i32!(i32_xor, xor, |a, b| a ^ b);
4579 binop_i32!(i32_sub, sub, i32::wrapping_sub);
4580
4581 commutative_binop_i64!(i64_add, add, i64::wrapping_add);
4582 commutative_binop_i64!(i64_and, and, |a, b| a & b);
4583 commutative_binop_i64!(i64_or, or, |a, b| a | b);
4584 commutative_binop_i64!(i64_xor, xor, |a, b| a ^ b);
4585 binop_i64!(i64_sub, sub, i64::wrapping_sub);
4586
4587 commutative_binop_f32!(f32_add, addss, |a, b| a + b);
4588 commutative_binop_f32!(f32_mul, mulss, |a, b| a * b);
4589 minmax_float!(
4590 f32_min,
4591 minss,
4592 ucomiss,
4593 addss,
4594 orps,
4595 as_f32,
4596 |a: Ieee32, b: Ieee32| Ieee32::from_bits(
4597 f32::from_bits(a.to_bits())
4598 .min(f32::from_bits(b.to_bits()))
4599 .to_bits()
4600 )
4601 );
4602 minmax_float!(
4603 f32_max,
4604 maxss,
4605 ucomiss,
4606 addss,
4607 andps,
4608 as_f32,
4609 |a: Ieee32, b: Ieee32| Ieee32::from_bits(
4610 f32::from_bits(a.to_bits())
4611 .max(f32::from_bits(b.to_bits()))
4612 .to_bits()
4613 )
4614 );
4615 binop_f32!(f32_sub, subss, |a, b| a - b);
4616 binop_f32!(f32_div, divss, |a, b| a / b);
4617
f32_ceil(&mut self) -> Result<(), Error>4618 pub fn f32_ceil(&mut self) -> Result<(), Error> {
4619 self.relocated_function_call(
4620 &ir::ExternalName::LibCall(ir::LibCall::CeilF32),
4621 iter::once(F32),
4622 iter::once(F32),
4623 FunctionDefLocation::PossiblyExternal,
4624 )?;
4625 Ok(())
4626 }
4627
f32_floor(&mut self) -> Result<(), Error>4628 pub fn f32_floor(&mut self) -> Result<(), Error> {
4629 self.relocated_function_call(
4630 &ir::ExternalName::LibCall(ir::LibCall::FloorF32),
4631 iter::once(F32),
4632 iter::once(F32),
4633 FunctionDefLocation::PossiblyExternal,
4634 )?;
4635 Ok(())
4636 }
4637
f32_nearest(&mut self) -> Result<(), Error>4638 pub fn f32_nearest(&mut self) -> Result<(), Error> {
4639 self.relocated_function_call(
4640 &ir::ExternalName::LibCall(ir::LibCall::NearestF32),
4641 iter::once(F32),
4642 iter::once(F32),
4643 FunctionDefLocation::PossiblyExternal,
4644 )?;
4645 Ok(())
4646 }
4647
f32_trunc(&mut self) -> Result<(), Error>4648 pub fn f32_trunc(&mut self) -> Result<(), Error> {
4649 self.relocated_function_call(
4650 &ir::ExternalName::LibCall(ir::LibCall::TruncF32),
4651 iter::once(F32),
4652 iter::once(F32),
4653 FunctionDefLocation::PossiblyExternal,
4654 )?;
4655 Ok(())
4656 }
4657
4658 commutative_binop_f64!(f64_add, addsd, |a, b| a + b);
4659 commutative_binop_f64!(f64_mul, mulsd, |a, b| a * b);
4660 minmax_float!(
4661 f64_min,
4662 minsd,
4663 ucomisd,
4664 addsd,
4665 orpd,
4666 as_f64,
4667 |a: Ieee64, b: Ieee64| Ieee64::from_bits(
4668 f64::from_bits(a.to_bits())
4669 .min(f64::from_bits(b.to_bits()))
4670 .to_bits()
4671 )
4672 );
4673 minmax_float!(
4674 f64_max,
4675 maxsd,
4676 ucomisd,
4677 addsd,
4678 andpd,
4679 as_f64,
4680 |a: Ieee64, b: Ieee64| Ieee64::from_bits(
4681 f64::from_bits(a.to_bits())
4682 .max(f64::from_bits(b.to_bits()))
4683 .to_bits()
4684 )
4685 );
4686 binop_f64!(f64_sub, subsd, |a, b| a - b);
4687 binop_f64!(f64_div, divsd, |a, b| a / b);
4688
f64_ceil(&mut self) -> Result<(), Error>4689 pub fn f64_ceil(&mut self) -> Result<(), Error> {
4690 self.relocated_function_call(
4691 &ir::ExternalName::LibCall(ir::LibCall::CeilF64),
4692 iter::once(F64),
4693 iter::once(F64),
4694 FunctionDefLocation::PossiblyExternal,
4695 )?;
4696 Ok(())
4697 }
4698
f64_floor(&mut self) -> Result<(), Error>4699 pub fn f64_floor(&mut self) -> Result<(), Error> {
4700 self.relocated_function_call(
4701 &ir::ExternalName::LibCall(ir::LibCall::FloorF64),
4702 iter::once(F64),
4703 iter::once(F64),
4704 FunctionDefLocation::PossiblyExternal,
4705 )?;
4706 Ok(())
4707 }
4708
f64_nearest(&mut self) -> Result<(), Error>4709 pub fn f64_nearest(&mut self) -> Result<(), Error> {
4710 self.relocated_function_call(
4711 &ir::ExternalName::LibCall(ir::LibCall::NearestF64),
4712 iter::once(F64),
4713 iter::once(F64),
4714 FunctionDefLocation::PossiblyExternal,
4715 )?;
4716 Ok(())
4717 }
4718
f64_trunc(&mut self) -> Result<(), Error>4719 pub fn f64_trunc(&mut self) -> Result<(), Error> {
4720 self.relocated_function_call(
4721 &ir::ExternalName::LibCall(ir::LibCall::TruncF64),
4722 iter::once(F64),
4723 iter::once(F64),
4724 FunctionDefLocation::PossiblyExternal,
4725 )?;
4726 Ok(())
4727 }
4728
4729 shift!(
4730 i32_shl,
4731 Rd,
4732 shl,
4733 |a, b| (a as i32).wrapping_shl(b as _),
4734 I32
4735 );
4736 shift!(
4737 i32_shr_s,
4738 Rd,
4739 sar,
4740 |a, b| (a as i32).wrapping_shr(b as _),
4741 I32
4742 );
4743 shift!(
4744 i32_shr_u,
4745 Rd,
4746 shr,
4747 |a, b| (a as u32).wrapping_shr(b as _),
4748 I32
4749 );
4750 shift!(
4751 i32_rotl,
4752 Rd,
4753 rol,
4754 |a, b| (a as u32).rotate_left(b as _),
4755 I32
4756 );
4757 shift!(
4758 i32_rotr,
4759 Rd,
4760 ror,
4761 |a, b| (a as u32).rotate_right(b as _),
4762 I32
4763 );
4764
4765 shift!(
4766 i64_shl,
4767 Rq,
4768 shl,
4769 |a, b| (a as i64).wrapping_shl(b as _),
4770 I64
4771 );
4772 shift!(
4773 i64_shr_s,
4774 Rq,
4775 sar,
4776 |a, b| (a as i64).wrapping_shr(b as _),
4777 I64
4778 );
4779 shift!(
4780 i64_shr_u,
4781 Rq,
4782 shr,
4783 |a, b| (a as u64).wrapping_shr(b as _),
4784 I64
4785 );
4786 shift!(
4787 i64_rotl,
4788 Rq,
4789 rol,
4790 |a, b| (a as u64).rotate_left(b as _),
4791 I64
4792 );
4793 shift!(
4794 i64_rotr,
4795 Rq,
4796 ror,
4797 |a, b| (a as u64).rotate_right(b as _),
4798 I64
4799 );
4800
4801 // TODO: Do this without emitting `mov`
cleanup_gprs(&mut self, gprs: impl Iterator<Item = GPR>)4802 fn cleanup_gprs(&mut self, gprs: impl Iterator<Item = GPR>) {
4803 for gpr in gprs {
4804 dynasm!(self.asm
4805 ; pop Rq(gpr.rq().unwrap())
4806 );
4807 self.block_state.depth.free(1);
4808 // DON'T MARK IT USED HERE! See comment in `full_div`
4809 }
4810 }
4811
4812 int_div!(
4813 i32_full_div_s,
4814 i32_full_div_u,
4815 i32_div_u,
4816 i32_div_s,
4817 i32_rem_u,
4818 i32_rem_s,
4819 imm_i32,
4820 i32,
4821 u32,
4822 Rd,
4823 DWORD
4824 );
4825 int_div!(
4826 i64_full_div_s,
4827 i64_full_div_u,
4828 i64_div_u,
4829 i64_div_s,
4830 i64_rem_u,
4831 i64_rem_s,
4832 imm_i64,
4833 i64,
4834 u64,
4835 Rq,
4836 QWORD
4837 );
4838
4839 // TODO: With a proper SSE-like "Value" system we could do this way better (we wouldn't have
4840 // to move `RAX`/`RDX` back afterwards).
full_div( &mut self, mut divisor: ValueLocation, dividend: ValueLocation, do_div: impl FnOnce(&mut Self, &mut ValueLocation) -> Result<(), Error>, ) -> Result< ( ValueLocation, ValueLocation, impl Iterator<Item = GPR> + Clone + 'this, ), Error, >4841 fn full_div(
4842 &mut self,
4843 mut divisor: ValueLocation,
4844 dividend: ValueLocation,
4845 do_div: impl FnOnce(&mut Self, &mut ValueLocation) -> Result<(), Error>,
4846 ) -> Result<
4847 (
4848 ValueLocation,
4849 ValueLocation,
4850 impl Iterator<Item = GPR> + Clone + 'this,
4851 ),
4852 Error,
4853 > {
4854 // To stop `take_reg` from allocating either of these necessary registers
4855 self.block_state.regs.mark_used(RAX);
4856 self.block_state.regs.mark_used(RDX);
4857 if divisor == ValueLocation::Reg(RAX) || divisor == ValueLocation::Reg(RDX) {
4858 let new_reg = self.take_reg(GPRType::Rq).unwrap();
4859 self.copy_value(divisor, CCLoc::Reg(new_reg))?;
4860 self.free_value(divisor)?;
4861
4862 divisor = ValueLocation::Reg(new_reg);
4863 }
4864 self.block_state.regs.release(RAX)?;
4865 self.block_state.regs.release(RDX)?;
4866
4867 let saved_rax = if self.block_state.regs.is_free(RAX) {
4868 None
4869 } else {
4870 dynasm!(self.asm
4871 ; push rax
4872 );
4873 self.block_state.depth.reserve(1);
4874 // DON'T FREE THIS REGISTER HERE - since we don't
4875 // remove it from the stack freeing the register
4876 // here will cause `take_reg` to allocate it.
4877 Some(())
4878 };
4879
4880 let saved_rdx = if self.block_state.regs.is_free(RDX) {
4881 None
4882 } else {
4883 dynasm!(self.asm
4884 ; push rdx
4885 );
4886 self.block_state.depth.reserve(1);
4887 // DON'T FREE THIS REGISTER HERE - since we don't
4888 // remove it from the stack freeing the register
4889 // here will cause `take_reg` to allocate it.
4890 Some(())
4891 };
4892
4893 let saved = saved_rdx
4894 .map(|_| RDX)
4895 .into_iter()
4896 .chain(saved_rax.map(|_| RAX));
4897
4898 self.copy_value(dividend, CCLoc::Reg(RAX))?;
4899 self.block_state.regs.mark_used(RAX);
4900
4901 self.free_value(dividend)?;
4902 // To stop `take_reg` from allocating either of these necessary registers
4903 self.block_state.regs.mark_used(RDX);
4904
4905 do_div(self, &mut divisor)?;
4906 self.free_value(divisor)?;
4907
4908 if self.block_state.regs.is_free(RAX) {
4909 return Err(Error::Microwasm("full_div: RAX is not free".to_string()));
4910 }
4911 if self.block_state.regs.is_free(RDX) {
4912 return Err(Error::Microwasm("full_div: RDX is not free".to_string()));
4913 }
4914
4915 Ok((ValueLocation::Reg(RAX), ValueLocation::Reg(RDX), saved))
4916 }
4917
i32_full_div_u( &mut self, divisor: ValueLocation, dividend: ValueLocation, ) -> Result< ( ValueLocation, ValueLocation, impl Iterator<Item = GPR> + Clone + 'this, ), Error, >4918 fn i32_full_div_u(
4919 &mut self,
4920 divisor: ValueLocation,
4921 dividend: ValueLocation,
4922 ) -> Result<
4923 (
4924 ValueLocation,
4925 ValueLocation,
4926 impl Iterator<Item = GPR> + Clone + 'this,
4927 ),
4928 Error,
4929 > {
4930 self.full_div(divisor, dividend, |this, divisor| match divisor {
4931 ValueLocation::Stack(offset) => {
4932 let offset = this.adjusted_offset(*offset);
4933 dynasm!(this.asm
4934 ; xor edx, edx
4935 ; div DWORD [rsp + offset]
4936 );
4937 Ok(())
4938 }
4939 ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
4940 let r = match this.put_into_register(I32, divisor) {
4941 Err(e) => return Err(e),
4942 Ok(o) => {
4943 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4944 }
4945 };
4946
4947 dynasm!(this.asm
4948 ; xor edx, edx
4949 ; div Rd(r.rq().unwrap())
4950 );
4951 Ok(())
4952 }
4953 })
4954 }
4955
i32_full_div_s( &mut self, divisor: ValueLocation, dividend: ValueLocation, ) -> Result< ( ValueLocation, ValueLocation, impl Iterator<Item = GPR> + Clone + 'this, ), Error, >4956 fn i32_full_div_s(
4957 &mut self,
4958 divisor: ValueLocation,
4959 dividend: ValueLocation,
4960 ) -> Result<
4961 (
4962 ValueLocation,
4963 ValueLocation,
4964 impl Iterator<Item = GPR> + Clone + 'this,
4965 ),
4966 Error,
4967 > {
4968 self.full_div(divisor, dividend, |this, divisor| match divisor {
4969 ValueLocation::Stack(offset) => {
4970 let offset = this.adjusted_offset(*offset);
4971 dynasm!(this.asm
4972 ; cdq
4973 ; idiv DWORD [rsp + offset]
4974 );
4975 Ok(())
4976 }
4977 ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
4978 let r = match this.put_into_register(I32, divisor) {
4979 Err(e) => return Err(e),
4980 Ok(o) => {
4981 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
4982 }
4983 };
4984
4985 dynasm!(this.asm
4986 ; cdq
4987 ; idiv Rd(r.rq().unwrap())
4988 );
4989 Ok(())
4990 }
4991 })
4992 }
4993
i64_full_div_u( &mut self, divisor: ValueLocation, dividend: ValueLocation, ) -> Result< ( ValueLocation, ValueLocation, impl Iterator<Item = GPR> + Clone + 'this, ), Error, >4994 fn i64_full_div_u(
4995 &mut self,
4996 divisor: ValueLocation,
4997 dividend: ValueLocation,
4998 ) -> Result<
4999 (
5000 ValueLocation,
5001 ValueLocation,
5002 impl Iterator<Item = GPR> + Clone + 'this,
5003 ),
5004 Error,
5005 > {
5006 self.full_div(divisor, dividend, |this, divisor| match divisor {
5007 ValueLocation::Stack(offset) => {
5008 let offset = this.adjusted_offset(*offset);
5009 dynasm!(this.asm
5010 ; xor rdx, rdx
5011 ; div QWORD [rsp + offset]
5012 );
5013 Ok(())
5014 }
5015 ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
5016 let r = match this.put_into_register(I64, divisor) {
5017 Err(e) => return Err(e),
5018 Ok(o) => {
5019 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5020 }
5021 };
5022 dynasm!(this.asm
5023 ; xor rdx, rdx
5024 ; div Rq(r.rq().unwrap())
5025 );
5026 Ok(())
5027 }
5028 })
5029 }
5030
i64_full_div_s( &mut self, divisor: ValueLocation, dividend: ValueLocation, ) -> Result< ( ValueLocation, ValueLocation, impl Iterator<Item = GPR> + Clone + 'this, ), Error, >5031 fn i64_full_div_s(
5032 &mut self,
5033 divisor: ValueLocation,
5034 dividend: ValueLocation,
5035 ) -> Result<
5036 (
5037 ValueLocation,
5038 ValueLocation,
5039 impl Iterator<Item = GPR> + Clone + 'this,
5040 ),
5041 Error,
5042 > {
5043 self.full_div(divisor, dividend, |this, divisor| match divisor {
5044 ValueLocation::Stack(offset) => {
5045 let offset = this.adjusted_offset(*offset);
5046 dynasm!(this.asm
5047 ; cqo
5048 ; idiv QWORD [rsp + offset]
5049 );
5050 Ok(())
5051 }
5052 ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
5053 let r = match this.put_into_register(I64, divisor) {
5054 Err(e) => return Err(e),
5055 Ok(o) => {
5056 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5057 }
5058 };
5059
5060 dynasm!(this.asm
5061 ; cqo
5062 ; idiv Rq(r.rq().unwrap())
5063 );
5064 Ok(())
5065 }
5066 })
5067 }
5068
5069 // `i32_mul` needs to be separate because the immediate form of the instruction
5070 // has a different syntax to the immediate form of the other instructions.
i32_mul(&mut self) -> Result<(), Error>5071 pub fn i32_mul(&mut self) -> Result<(), Error> {
5072 let right = self.pop()?;
5073 let left = self.pop()?;
5074
5075 if let Some(right) = right.immediate() {
5076 if let Some(left) = left.immediate() {
5077 self.push(ValueLocation::Immediate(
5078 i32::wrapping_mul(right.as_i32().unwrap(), left.as_i32().unwrap()).into(),
5079 ))?;
5080 return Ok(());
5081 }
5082 }
5083
5084 let (mut left, mut right) = match left {
5085 ValueLocation::Reg(_) => (left, right),
5086 _ => {
5087 if right.immediate().is_some() {
5088 (left, right)
5089 } else {
5090 (right, left)
5091 }
5092 }
5093 };
5094
5095 let out = match right {
5096 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
5097 let rreg = match self.put_into_register(I32, &mut right) {
5098 Err(e) => return Err(e),
5099 Ok(o) => {
5100 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5101 }
5102 };
5103 let lreg = match self.put_into_temp_register(I32, &mut left) {
5104 Err(e) => return Err(e),
5105 Ok(o) => {
5106 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5107 }
5108 };
5109
5110 dynasm!(self.asm
5111 ; imul Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap())
5112 );
5113 left
5114 }
5115 ValueLocation::Stack(offset) => {
5116 let offset = self.adjusted_offset(offset);
5117
5118 let lreg = match self.put_into_temp_register(I32, &mut left) {
5119 Err(e) => return Err(e),
5120 Ok(o) => {
5121 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5122 }
5123 };
5124
5125 dynasm!(self.asm
5126 ; imul Rd(lreg.rq().unwrap()), [rsp + offset]
5127 );
5128 left
5129 }
5130 ValueLocation::Immediate(i) => {
5131 let lreg = match self.put_into_register(I32, &mut left) {
5132 Err(e) => return Err(e),
5133 Ok(o) => {
5134 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5135 }
5136 };
5137
5138 let new_reg = self.take_reg(I32).unwrap();
5139 dynasm!(self.asm
5140 ; imul Rd(new_reg.rq().unwrap()), Rd(lreg.rq().unwrap()), i.as_i32().unwrap()
5141 );
5142 self.free_value(left)?;
5143 ValueLocation::Reg(new_reg)
5144 }
5145 };
5146
5147 self.push(out)?;
5148 self.free_value(right)?;
5149 Ok(())
5150 }
5151
5152 // `i64_mul` needs to be separate because the immediate form of the instruction
5153 // has a different syntax to the immediate form of the other instructions.
i64_mul(&mut self) -> Result<(), Error>5154 pub fn i64_mul(&mut self) -> Result<(), Error> {
5155 let right = self.pop()?;
5156 let left = self.pop()?;
5157
5158 if let Some(right) = right.immediate() {
5159 if let Some(left) = left.immediate() {
5160 self.push(ValueLocation::Immediate(
5161 i64::wrapping_mul(right.as_i64().unwrap(), left.as_i64().unwrap()).into(),
5162 ))?;
5163 return Ok(());
5164 }
5165 }
5166
5167 let (mut left, mut right) = match left {
5168 ValueLocation::Reg(_) => (left, right),
5169 _ => {
5170 if right.immediate().is_some() {
5171 (left, right)
5172 } else {
5173 (right, left)
5174 }
5175 }
5176 };
5177
5178 let out = match right {
5179 ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
5180 let rreg = match self.put_into_register(I64, &mut right) {
5181 Err(e) => return Err(e),
5182 Ok(o) => {
5183 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5184 }
5185 };
5186 let lreg = match self.put_into_temp_register(I64, &mut left) {
5187 Err(e) => return Err(e),
5188 Ok(o) => {
5189 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5190 }
5191 };
5192
5193 dynasm!(self.asm
5194 ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
5195 );
5196 left
5197 }
5198 ValueLocation::Stack(offset) => {
5199 let offset = self.adjusted_offset(offset);
5200
5201 let lreg = match self.put_into_temp_register(I64, &mut left) {
5202 Err(e) => return Err(e),
5203 Ok(o) => {
5204 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5205 }
5206 };
5207
5208 dynasm!(self.asm
5209 ; imul Rq(lreg.rq().unwrap()), [rsp + offset]
5210 );
5211 left
5212 }
5213 ValueLocation::Immediate(i) => {
5214 let i = i.as_i64().unwrap();
5215 if let Ok(i) = i.try_into() {
5216 let new_reg = self.take_reg(I64).unwrap();
5217
5218 let lreg = self
5219 .put_into_register(I64, &mut left)?
5220 .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
5221
5222 dynasm!(self.asm
5223 ; imul Rq(new_reg.rq().unwrap()), Rq(lreg.rq().unwrap()), i
5224 );
5225
5226 self.free_value(left)?;
5227
5228 ValueLocation::Reg(new_reg)
5229 } else {
5230 let rreg = self
5231 .put_into_register(I64, &mut right)?
5232 .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
5233 let lreg = self
5234 .put_into_temp_register(I64, &mut left)?
5235 .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
5236
5237 dynasm!(self.asm
5238 ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
5239 );
5240 left
5241 }
5242 }
5243 };
5244
5245 self.push(out)?;
5246 self.free_value(right)?;
5247 Ok(())
5248 }
5249
cmov(&mut self, cond_code: CondCode, dst: GPR, src: CCLoc)5250 fn cmov(&mut self, cond_code: CondCode, dst: GPR, src: CCLoc) {
5251 match src {
5252 CCLoc::Reg(reg) => match cond_code {
5253 cc::EQUAL => {
5254 dynasm!(self.asm
5255 ; cmove Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5256 );
5257 }
5258 cc::NOT_EQUAL => {
5259 dynasm!(self.asm
5260 ; cmovne Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5261 );
5262 }
5263 cc::GE_U => {
5264 dynasm!(self.asm
5265 ; cmovae Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5266 );
5267 }
5268 cc::LT_U => {
5269 dynasm!(self.asm
5270 ; cmovb Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5271 );
5272 }
5273 cc::GT_U => {
5274 dynasm!(self.asm
5275 ; cmova Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5276 );
5277 }
5278 cc::LE_U => {
5279 dynasm!(self.asm
5280 ; cmovbe Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5281 );
5282 }
5283 cc::GE_S => {
5284 dynasm!(self.asm
5285 ; cmovge Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5286 );
5287 }
5288 cc::LT_S => {
5289 dynasm!(self.asm
5290 ; cmovl Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5291 );
5292 }
5293 cc::GT_S => {
5294 dynasm!(self.asm
5295 ; cmovg Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5296 );
5297 }
5298 cc::LE_S => {
5299 dynasm!(self.asm
5300 ; cmovle Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
5301 );
5302 }
5303 },
5304 CCLoc::Stack(offset) => {
5305 let offset = self.adjusted_offset(offset);
5306
5307 match cond_code {
5308 cc::EQUAL => {
5309 dynasm!(self.asm
5310 ; cmove Rq(dst.rq().unwrap()), [rsp + offset]
5311 );
5312 }
5313 cc::NOT_EQUAL => {
5314 dynasm!(self.asm
5315 ; cmovne Rq(dst.rq().unwrap()), [rsp + offset]
5316 );
5317 }
5318 cc::GE_U => {
5319 dynasm!(self.asm
5320 ; cmovae Rq(dst.rq().unwrap()), [rsp + offset]
5321 );
5322 }
5323 cc::LT_U => {
5324 dynasm!(self.asm
5325 ; cmovb Rq(dst.rq().unwrap()), [rsp + offset]
5326 );
5327 }
5328 cc::GT_U => {
5329 dynasm!(self.asm
5330 ; cmova Rq(dst.rq().unwrap()), [rsp + offset]
5331 );
5332 }
5333 cc::LE_U => {
5334 dynasm!(self.asm
5335 ; cmovbe Rq(dst.rq().unwrap()), [rsp + offset]
5336 );
5337 }
5338 cc::GE_S => {
5339 dynasm!(self.asm
5340 ; cmovge Rq(dst.rq().unwrap()), [rsp + offset]
5341 );
5342 }
5343 cc::LT_S => {
5344 dynasm!(self.asm
5345 ; cmovl Rq(dst.rq().unwrap()), [rsp + offset]
5346 );
5347 }
5348 cc::GT_S => {
5349 dynasm!(self.asm
5350 ; cmovg Rq(dst.rq().unwrap()), [rsp + offset]
5351 );
5352 }
5353 cc::LE_S => {
5354 dynasm!(self.asm
5355 ; cmovle Rq(dst.rq().unwrap()), [rsp + offset]
5356 );
5357 }
5358 }
5359 }
5360 }
5361 }
5362
select(&mut self) -> Result<(), Error>5363 pub fn select(&mut self) -> Result<(), Error> {
5364 let mut cond = self.pop()?;
5365 let mut else_ = self.pop()?;
5366 let mut then = self.pop()?;
5367
5368 if let ValueLocation::Immediate(i) = cond {
5369 if i.as_i32().unwrap() == 0 {
5370 self.free_value(then)?;
5371 self.push(else_)?;
5372 } else {
5373 self.free_value(else_)?;
5374 self.push(then)?;
5375 }
5376
5377 return Ok(());
5378 }
5379
5380 let cond_code = match cond {
5381 ValueLocation::Cond(cc) => cc,
5382 _ => {
5383 let cond_reg = match self.put_into_register(I32, &mut cond) {
5384 Err(e) => return Err(e),
5385 Ok(o) => {
5386 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5387 }
5388 };
5389 dynasm!(self.asm
5390 ; test Rd(cond_reg.rq().unwrap()), Rd(cond_reg.rq().unwrap())
5391 );
5392 self.free_value(cond)?;
5393
5394 cc::NOT_EQUAL
5395 }
5396 };
5397
5398 let else_ = if let ValueLocation::Stack(offset) = else_ {
5399 CCLoc::Stack(offset)
5400 } else {
5401 let gpr = match self.put_into_register(I32, &mut else_) {
5402 Err(e) => return Err(e),
5403 Ok(o) => {
5404 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5405 }
5406 };
5407 CCLoc::Reg(gpr)
5408 };
5409
5410 let then = if let ValueLocation::Stack(offset) = then {
5411 CCLoc::Stack(offset)
5412 } else {
5413 let gpr = match self.put_into_register(I32, &mut then) {
5414 Err(e) => return Err(e),
5415 Ok(o) => {
5416 o.ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?
5417 }
5418 };
5419 CCLoc::Reg(gpr)
5420 };
5421
5422 let out_gpr = match (then, else_) {
5423 (CCLoc::Reg(then_reg), else_) if self.block_state.regs.num_usages(then_reg) <= 1 => {
5424 self.cmov(!cond_code, then_reg, else_);
5425 self.free_value(else_.into())?;
5426
5427 then_reg
5428 }
5429 (then, CCLoc::Reg(else_reg)) if self.block_state.regs.num_usages(else_reg) <= 1 => {
5430 self.cmov(cond_code, else_reg, then);
5431 self.free_value(then.into())?;
5432
5433 else_reg
5434 }
5435 (then, else_) => {
5436 let out = self.take_reg(GPRType::Rq).unwrap();
5437 self.copy_value(else_.into(), CCLoc::Reg(out))?;
5438 self.cmov(cond_code, out, then);
5439
5440 self.free_value(then.into())?;
5441 self.free_value(else_.into())?;
5442
5443 out
5444 }
5445 };
5446
5447 self.push(ValueLocation::Reg(out_gpr))?;
5448 Ok(())
5449 }
5450
pick(&mut self, depth: u32)5451 pub fn pick(&mut self, depth: u32) {
5452 let idx = self.block_state.stack.len() - 1 - depth as usize;
5453 let v = self.block_state.stack[idx];
5454 if let ValueLocation::Reg(r) = v {
5455 self.block_state.regs.mark_used(r);
5456 }
5457 self.block_state.stack.push(v);
5458 }
5459
const_(&mut self, imm: Value) -> Result<(), Error>5460 pub fn const_(&mut self, imm: Value) -> Result<(), Error> {
5461 self.push(ValueLocation::Immediate(imm))?;
5462 Ok(())
5463 }
5464
relocated_function_call< A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>, >( &mut self, name: &cranelift_codegen::ir::ExternalName, args: A, rets: R, func_def_loc: FunctionDefLocation, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5465 fn relocated_function_call<
5466 A: IntoIterator<Item = SignlessType>,
5467 R: IntoIterator<Item = SignlessType>,
5468 >(
5469 &mut self,
5470 name: &cranelift_codegen::ir::ExternalName,
5471 args: A,
5472 rets: R,
5473 func_def_loc: FunctionDefLocation,
5474 ) -> Result<(), Error>
5475 where
5476 A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5477 R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5478 {
5479 let locs = arg_locs_skip_caller_vmctx(args);
5480
5481 let saved_vmctx = if func_def_loc == FunctionDefLocation::PossiblyExternal {
5482 dynasm!(self.asm
5483 ; mov Rq(CALLER_VMCTX), Rq(VMCTX)
5484 );
5485 self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX));
5486 self.block_state.regs.mark_used(GPR::Rq(VMCTX));
5487 Some(self.push_physical(ValueLocation::Reg(GPR::Rq(VMCTX)))?)
5488 } else {
5489 None
5490 };
5491
5492 self.save_volatile()?;
5493
5494 self.pass_outgoing_args(&locs)?;
5495
5496 // 2 bytes for the 64-bit `mov` opcode + register ident, the rest is the immediate
5497 self.reloc_sink.reloc_external(
5498 (self.asm.offset().0
5499 - self.func_starts[self.current_function as usize]
5500 .0
5501 .unwrap()
5502 .0) as u32
5503 + 2,
5504 // Passing a default location here, since until proven otherwise, it's not used.
5505 ir::SourceLoc::default(),
5506 binemit::Reloc::Abs8,
5507 name,
5508 0,
5509 );
5510 let temp = self.take_reg(I64).unwrap();
5511
5512 dynasm!(self.asm
5513 ; mov Rq(temp.rq().unwrap()), QWORD 0xDEAD_BEEF_DEAD_BEEF_u64 as i64
5514 ; call Rq(temp.rq().unwrap())
5515 );
5516 self.block_state.regs.release(temp)?;
5517
5518 for i in locs {
5519 self.free_value(i.into())?;
5520 }
5521
5522 self.push_function_returns(rets)?;
5523
5524 if func_def_loc == FunctionDefLocation::PossiblyExternal {
5525 let saved_vmctx = saved_vmctx.unwrap();
5526 self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?;
5527 self.copy_value(saved_vmctx, CCLoc::Reg(GPR::Rq(VMCTX)))?;
5528 self.free_value(saved_vmctx)?;
5529 }
5530
5531 Ok(())
5532 }
5533
builtin_function_call< A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>, >( &mut self, i: BuiltinFunctionIndex, args: A, rets: R, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5534 fn builtin_function_call<
5535 A: IntoIterator<Item = SignlessType>,
5536 R: IntoIterator<Item = SignlessType>,
5537 >(
5538 &mut self,
5539 i: BuiltinFunctionIndex,
5540 args: A,
5541 rets: R,
5542 ) -> Result<(), Error>
5543 where
5544 A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5545 R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5546 {
5547 let locs = arg_locs(args);
5548
5549 dynasm!(self.asm
5550 ; push Rq(VMCTX)
5551 );
5552 self.block_state.depth.reserve(1);
5553 let depth = self.block_state.depth.clone();
5554
5555 self.save_volatile()?;
5556
5557 self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?;
5558 self.pass_outgoing_args(&locs)?;
5559
5560 let temp = self.take_reg(I64).unwrap();
5561 dynasm!(self.asm
5562 ; mov Rq(temp.rq().unwrap()), [
5563 Rq(VMCTX) + self.module_context.vmctx_builtin_function(i.index()) as i32
5564 ]
5565 ; call Rq(temp.rq().unwrap())
5566 );
5567
5568 self.block_state.regs.release(temp)?;
5569
5570 for i in locs {
5571 self.free_value(i.into())?;
5572 }
5573 self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX));
5574
5575 self.push_function_returns(rets)?;
5576
5577 self.set_stack_depth(depth)?;
5578 dynasm!(self.asm
5579 ; pop Rq(VMCTX)
5580 );
5581 self.block_state.depth.free(1);
5582
5583 Ok(())
5584 }
5585
5586 // TODO: Other memory indices
memory_size(&mut self) -> Result<(), Error>5587 pub fn memory_size(&mut self) -> Result<(), Error> {
5588 let memory_index = 0;
5589 if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) {
5590 self.push(ValueLocation::Immediate(defined_memory_index.into()))?;
5591 self.builtin_function_call(
5592 BuiltinFunctionIndex::get_memory32_size_index(),
5593 [self.pointer_type].iter().copied(),
5594 [self.pointer_type].iter().copied(),
5595 )?;
5596 } else {
5597 self.push(ValueLocation::Immediate(memory_index.into()))?;
5598 self.builtin_function_call(
5599 BuiltinFunctionIndex::get_imported_memory32_size_index(),
5600 [self.pointer_type].iter().copied(),
5601 [self.pointer_type].iter().copied(),
5602 )?;
5603 }
5604 Ok(())
5605 }
5606
5607 // TODO: Other memory indices
memory_grow(&mut self) -> Result<(), Error>5608 pub fn memory_grow(&mut self) -> Result<(), Error> {
5609 let memory_index = 0;
5610 if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) {
5611 self.push(ValueLocation::Immediate(defined_memory_index.into()))?;
5612 self.builtin_function_call(
5613 BuiltinFunctionIndex::get_memory32_grow_index(),
5614 [self.pointer_type, self.pointer_type].iter().copied(),
5615 [self.pointer_type].iter().copied(),
5616 )?;
5617 } else {
5618 self.push(ValueLocation::Immediate(memory_index.into()))?;
5619 self.builtin_function_call(
5620 BuiltinFunctionIndex::get_imported_memory32_grow_index(),
5621 [self.pointer_type, self.pointer_type].iter().copied(),
5622 [self.pointer_type].iter().copied(),
5623 )?;
5624 }
5625 Ok(())
5626 }
5627
5628 // TODO: Use `ArrayVec`?
5629 // TODO: This inefficiently duplicates registers but it's not really possible
5630 // to double up stack space right now.
5631 /// Saves volatile (i.e. caller-saved) registers before a function call, if they are used.
save_volatile(&mut self) -> Result<(), Error>5632 fn save_volatile(&mut self) -> Result<(), Error> {
5633 self.save_regs(SCRATCH_REGS.iter().copied())?;
5634 Ok(())
5635 }
5636
save_regs<I>(&mut self, to_save: I) -> Result<(), Error> where I: IntoIterator<Item = GPR>, I::IntoIter: Clone,5637 fn save_regs<I>(&mut self, to_save: I) -> Result<(), Error>
5638 where
5639 I: IntoIterator<Item = GPR>,
5640 I::IntoIter: Clone,
5641 {
5642 // TODO: We can filter out registers that are already marked free, but just to ensure
5643 // that this doesn't fail when confronted with the `memory_grow`/`memory_size`
5644 // weirdness.
5645 let to_save = to_save.into_iter();
5646 if to_save.clone().count() == 0 {
5647 return Ok(());
5648 }
5649
5650 let mut stack = mem::replace(&mut self.block_state.stack, vec![]);
5651 let mut slice = &mut stack[..];
5652
5653 while let Some((first, rest)) = slice.split_first_mut() {
5654 if let ValueLocation::Reg(vreg) = *first {
5655 if to_save.clone().any(|r| r == vreg) {
5656 let old = *first;
5657 *first = self.push_physical(old)?;
5658 for val in &mut *rest {
5659 if *val == old {
5660 self.free_value(*val)?;
5661 *val = *first;
5662 }
5663 }
5664 }
5665 }
5666
5667 slice = rest;
5668 }
5669
5670 self.block_state.stack = stack;
5671
5672 Ok(())
5673 }
5674
5675 /// Write the arguments to the callee to the registers and the stack using the SystemV
5676 /// calling convention.
pass_outgoing_args( &mut self, out_locs: &(impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone), ) -> Result<(), Error>5677 fn pass_outgoing_args(
5678 &mut self,
5679 out_locs: &(impl ExactSizeIterator<Item = CCLoc> + DoubleEndedIterator + Clone),
5680 ) -> Result<(), Error> {
5681 let total_stack_space = out_locs
5682 .clone()
5683 .flat_map(|l| {
5684 if let CCLoc::Stack(offset) = l {
5685 if offset >= 0 {
5686 Some(offset as u32 + 1)
5687 } else {
5688 None
5689 }
5690 } else {
5691 None
5692 }
5693 })
5694 .max()
5695 .unwrap_or(0);
5696 let original_depth = self.block_state.depth.clone();
5697 let mut needed_depth = original_depth.clone();
5698 needed_depth.reserve(total_stack_space);
5699
5700 if needed_depth.0 & 1 != 0 {
5701 needed_depth.reserve(1);
5702 }
5703
5704 self.set_stack_depth(needed_depth.clone())?;
5705
5706 let mut pending = Vec::<(ValueLocation, CCLoc)>::with_capacity(out_locs.len());
5707
5708 for loc in out_locs.clone().rev() {
5709 let val = self.pop()?;
5710
5711 pending.push((val, loc));
5712 }
5713
5714 while !pending.is_empty() {
5715 let start_len = pending.len();
5716
5717 for (src, dst) in mem::replace(&mut pending, vec![]) {
5718 if src != ValueLocation::from(dst) {
5719 let dst = match dst {
5720 CCLoc::Reg(r) => {
5721 if !self.block_state.regs.is_free(r) {
5722 pending.push((src, dst));
5723 continue;
5724 }
5725
5726 self.block_state.regs.mark_used(r);
5727
5728 dst
5729 }
5730 CCLoc::Stack(offset) => CCLoc::Stack(offset - needed_depth.0 as i32),
5731 };
5732
5733 self.copy_value(src, dst)?;
5734 self.free_value(src)?;
5735 }
5736 }
5737
5738 if pending.len() == start_len {
5739 let src = match pending
5740 .iter()
5741 .filter_map(|(src, _)| {
5742 if let ValueLocation::Reg(reg) = src {
5743 Some(reg)
5744 } else {
5745 None
5746 }
5747 })
5748 .next()
5749 {
5750 None => {
5751 return Err(Error::Microwasm(
5752 "Programmer error: We shouldn't need to push \
5753 intermediate args if we don't have any argument sources in registers"
5754 .to_string(),
5755 ));
5756 }
5757 Some(val) => *val,
5758 };
5759 let new_src = self.push_physical(ValueLocation::Reg(src))?;
5760 for (old_src, _) in pending.iter_mut() {
5761 if *old_src == ValueLocation::Reg(src) {
5762 *old_src = new_src;
5763 }
5764 }
5765 }
5766 }
5767
5768 // We do this a second time just in case we had to use `push_physical` to resolve cycles in
5769 // `pending`
5770 self.set_stack_depth(needed_depth)?;
5771
5772 Ok(())
5773 }
5774
push_function_returns( &mut self, returns: impl IntoIterator<Item = SignlessType>, ) -> Result<(), Error>5775 fn push_function_returns(
5776 &mut self,
5777 returns: impl IntoIterator<Item = SignlessType>,
5778 ) -> Result<(), Error> {
5779 for loc in ret_locs(returns)? {
5780 if let CCLoc::Reg(reg) = loc {
5781 self.block_state.regs.mark_used(reg);
5782 }
5783
5784 self.push(loc.into())?;
5785 }
5786 Ok(())
5787 }
5788
trap_if(&mut self, ccode: CondCode, trap_code: TrapCode)5789 fn trap_if(&mut self, ccode: CondCode, trap_code: TrapCode) {
5790 let label = self.create_label();
5791 self.br_on_cond_code(label, !ccode);
5792 self.trap(trap_code);
5793 self.define_label(label);
5794 }
5795
call_indirect< A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>, >( &mut self, type_id: u32, arg_types: A, return_types: R, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5796 pub fn call_indirect<
5797 A: IntoIterator<Item = SignlessType>,
5798 R: IntoIterator<Item = SignlessType>,
5799 >(
5800 &mut self,
5801 type_id: u32,
5802 arg_types: A,
5803 return_types: R,
5804 ) -> Result<(), Error>
5805 where
5806 A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5807 R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5808 {
5809 dynasm!(self.asm
5810 ; push Rq(VMCTX)
5811 );
5812 self.block_state.depth.reserve(1);
5813 let depth = self.block_state.depth.clone();
5814
5815 let locs = arg_locs_skip_caller_vmctx(arg_types);
5816
5817 for loc in locs.clone() {
5818 if let CCLoc::Reg(r) = loc {
5819 self.block_state.regs.mark_used(r);
5820 }
5821 }
5822
5823 let mut callee = self.pop()?;
5824 let callee_reg = self
5825 .put_into_temp_register(I32, &mut callee)?
5826 .ok_or_else(|| Error::Microwasm("Ran out of free registers".to_string()))?;
5827
5828 self.save_volatile()?;
5829
5830 for loc in locs.clone() {
5831 if let CCLoc::Reg(r) = loc {
5832 self.block_state.regs.release(r)?;
5833 }
5834 }
5835
5836 self.pass_outgoing_args(&locs)?;
5837
5838 dynasm!(self.asm
5839 ; mov Rq(CALLER_VMCTX), Rq(VMCTX)
5840 );
5841 self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX));
5842
5843 let table_index = 0;
5844 let reg_offset = self
5845 .module_context
5846 .defined_table_index(table_index)
5847 .map(|index| {
5848 (
5849 None,
5850 self.module_context.vmctx_vmtable_definition(index) as i32,
5851 )
5852 });
5853
5854 let vmctx = GPR::Rq(VMCTX);
5855 let (reg, offset) = reg_offset.unwrap_or_else(|| {
5856 let reg = self.take_reg(I64).unwrap();
5857
5858 dynasm!(self.asm
5859 ; mov Rq(reg.rq().unwrap()), [
5860 Rq(VMCTX) + self.module_context.vmctx_vmtable_import_from(table_index) as i32
5861 ]
5862 );
5863
5864 (Some(reg), 0)
5865 });
5866
5867 let temp0 = self.take_reg(I64).unwrap();
5868 dynasm!(self.asm
5869 ; cmp Rd(callee_reg.rq().unwrap()), [
5870 Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
5871 offset +
5872 self.module_context.vmtable_definition_current_elements() as i32
5873 ]
5874 ;; self.trap_if(cc::GE_U, TrapCode::TableOutOfBounds)
5875 ; imul
5876 Rd(callee_reg.rq().unwrap()),
5877 Rd(callee_reg.rq().unwrap()),
5878 self.module_context.size_of_vmcaller_checked_anyfunc() as i32
5879 ; mov Rq(temp0.rq().unwrap()), [
5880 Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
5881 offset +
5882 self.module_context.vmtable_definition_base() as i32
5883 ]
5884 );
5885
5886 if let Some(reg) = reg {
5887 self.block_state.regs.release(reg)?;
5888 }
5889
5890 let temp1 = self.take_reg(I64).unwrap();
5891
5892 dynasm!(self.asm
5893 ; mov Rd(temp1.rq().unwrap()), [
5894 Rq(VMCTX) +
5895 self.module_context
5896 .vmctx_vmshared_signature_id(type_id) as i32
5897 ]
5898 ; cmp DWORD [
5899 Rq(temp0.rq().unwrap()) +
5900 Rq(callee_reg.rq().unwrap()) +
5901 self.module_context.vmcaller_checked_anyfunc_type_index() as i32
5902 ], Rd(temp1.rq().unwrap())
5903 ;; self.trap_if(cc::NOT_EQUAL, TrapCode::BadSignature)
5904 ; mov Rq(VMCTX), [
5905 Rq(temp0.rq().unwrap()) +
5906 Rq(callee_reg.rq().unwrap()) +
5907 self.module_context.vmcaller_checked_anyfunc_vmctx() as i32
5908 ]
5909 ; call QWORD [
5910 Rq(temp0.rq().unwrap()) +
5911 Rq(callee_reg.rq().unwrap()) +
5912 self.module_context.vmcaller_checked_anyfunc_func_ptr() as i32
5913 ]
5914 );
5915
5916 self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?;
5917 self.block_state.regs.release(temp0)?;
5918 self.block_state.regs.release(temp1)?;
5919 self.free_value(callee)?;
5920
5921 for i in locs {
5922 self.free_value(i.into())?;
5923 }
5924
5925 self.push_function_returns(return_types)?;
5926
5927 self.set_stack_depth(depth)?;
5928 dynasm!(self.asm
5929 ; pop Rq(VMCTX)
5930 );
5931 self.block_state.depth.free(1);
5932
5933 Ok(())
5934 }
5935
swap(&mut self, depth: u32)5936 pub fn swap(&mut self, depth: u32) {
5937 let last = self.block_state.stack.len() - 1;
5938 self.block_state.stack.swap(last, last - depth as usize);
5939 }
5940
5941 /// Call a function with the given index
call_direct<A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>>( &mut self, index: u32, arg_types: A, return_types: R, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5942 pub fn call_direct<A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>>(
5943 &mut self,
5944 index: u32,
5945 arg_types: A,
5946 return_types: R,
5947 ) -> Result<(), Error>
5948 where
5949 A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5950 R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5951 {
5952 self.relocated_function_call(
5953 &ir::ExternalName::user(0, index),
5954 arg_types,
5955 return_types,
5956 FunctionDefLocation::SameModule,
5957 )?;
5958 Ok(())
5959 }
5960
5961 /// Recursively call the same function again
call_direct_self< A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>, >( &mut self, defined_index: u32, arg_types: A, return_types: R, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5962 pub fn call_direct_self<
5963 A: IntoIterator<Item = SignlessType>,
5964 R: IntoIterator<Item = SignlessType>,
5965 >(
5966 &mut self,
5967 defined_index: u32,
5968 arg_types: A,
5969 return_types: R,
5970 ) -> Result<(), Error>
5971 where
5972 A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5973 R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
5974 {
5975 let locs = arg_locs_skip_caller_vmctx(arg_types);
5976
5977 self.save_volatile()?;
5978
5979 let (_, label) = self.func_starts[defined_index as usize];
5980
5981 self.pass_outgoing_args(&locs)?;
5982 dynasm!(self.asm
5983 ; call =>label
5984 );
5985
5986 for i in locs {
5987 self.free_value(i.into())?;
5988 }
5989
5990 self.push_function_returns(return_types)?;
5991 Ok(())
5992 }
5993
5994 /// Call a function with the given index
call_direct_imported< A: IntoIterator<Item = SignlessType>, R: IntoIterator<Item = SignlessType>, >( &mut self, index: u32, arg_types: A, return_types: R, ) -> Result<(), Error> where A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone, R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,5995 pub fn call_direct_imported<
5996 A: IntoIterator<Item = SignlessType>,
5997 R: IntoIterator<Item = SignlessType>,
5998 >(
5999 &mut self,
6000 index: u32,
6001 arg_types: A,
6002 return_types: R,
6003 ) -> Result<(), Error>
6004 where
6005 A::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
6006 R::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
6007 {
6008 let locs = arg_locs_skip_caller_vmctx(arg_types);
6009
6010 dynasm!(self.asm
6011 ; mov Rq(CALLER_VMCTX), Rq(VMCTX)
6012 );
6013 self.block_state.regs.mark_used(GPR::Rq(CALLER_VMCTX));
6014 self.block_state.regs.mark_used(GPR::Rq(VMCTX));
6015 let saved_vmctx = self.push_physical(ValueLocation::Reg(GPR::Rq(VMCTX)))?;
6016
6017 self.save_volatile()?;
6018 self.pass_outgoing_args(&locs)?;
6019
6020 let callee = self.take_reg(I64).unwrap();
6021
6022 dynasm!(self.asm
6023 ; mov Rq(callee.rq().unwrap()), [
6024 Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_body(index) as i32
6025 ]
6026 ; mov Rq(VMCTX), [
6027 Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_vmctx(index) as i32
6028 ]
6029 ; call Rq(callee.rq().unwrap())
6030 );
6031
6032 self.block_state.regs.release(callee)?;
6033
6034 for i in locs {
6035 self.free_value(i.into())?;
6036 }
6037
6038 self.push_function_returns(return_types)?;
6039
6040 self.block_state.regs.release(GPR::Rq(CALLER_VMCTX))?;
6041 self.copy_value(saved_vmctx, CCLoc::Reg(GPR::Rq(VMCTX)))?;
6042 self.free_value(saved_vmctx)?;
6043
6044 Ok(())
6045 }
6046
6047 // TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them
6048 // as scratch registers
6049 /// Writes the function prologue and stores the arguments as locals
start_function<P: IntoIterator<Item = SignlessType>>( &mut self, params: P, ) -> Result<(), Error> where P::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,6050 pub fn start_function<P: IntoIterator<Item = SignlessType>>(
6051 &mut self,
6052 params: P,
6053 ) -> Result<(), Error>
6054 where
6055 P::IntoIter: ExactSizeIterator + DoubleEndedIterator + Clone,
6056 {
6057 self.apply_cc(BlockCallingConvention::function_start(
6058 arg_locs_skip_caller_vmctx(params),
6059 ))?;
6060 Ok(())
6061 }
6062
ret(&mut self)6063 pub fn ret(&mut self) {
6064 dynasm!(self.asm
6065 ; ret
6066 );
6067 }
6068
epilogue(&mut self)6069 pub fn epilogue(&mut self) {
6070 for LabelInfo {
6071 label,
6072 align,
6073 inner,
6074 } in self.labels.drain()
6075 {
6076 match inner {
6077 LabelValue::I32(val) => {
6078 dynasm!(self.asm
6079 ; .align align as usize
6080 ;; self.asm.dynamic_label(label.0)
6081 ; .dword val
6082 );
6083 }
6084 LabelValue::I64(val) => {
6085 dynasm!(self.asm
6086 ; .align align as usize
6087 ;; self.asm.dynamic_label(label.0)
6088 ; .qword val
6089 );
6090 }
6091 LabelValue::Ret => {
6092 dynasm!(self.asm
6093 ; .align align as usize
6094 ;; self.asm.dynamic_label(label.0)
6095 ; ret
6096 );
6097 }
6098 }
6099 }
6100 }
6101
trap(&mut self, _trap_id: TrapCode)6102 pub fn trap(&mut self, _trap_id: TrapCode) {
6103 // TODO: Emit trap info by writing the trap ID and current source location to a
6104 // `binemit::TrapSink`.
6105 dynasm!(self.asm
6106 ; ud2
6107 );
6108 }
6109
ret_label(&mut self) -> Label6110 pub fn ret_label(&mut self) -> Label {
6111 #[derive(Copy, Clone, Hash)]
6112 struct RetLabel;
6113
6114 self.label(LabelValue::Ret)
6115 }
6116
label(&mut self, label: LabelValue) -> Label6117 fn label(&mut self, label: LabelValue) -> Label {
6118 self.aligned_label(1, label)
6119 }
6120
aligned_label(&mut self, align: u32, label: LabelValue) -> Label6121 fn aligned_label(&mut self, align: u32, label: LabelValue) -> Label {
6122 let asm = &mut self.asm;
6123 self.labels
6124 .insert(|| Label(asm.new_dynamic_label()), align, label)
6125 }
6126
target_to_label(&mut self, target: BrTarget<Label>) -> Label6127 fn target_to_label(&mut self, target: BrTarget<Label>) -> Label {
6128 match target {
6129 BrTarget::Label(label) => label,
6130 BrTarget::Return => self.ret_label(),
6131 }
6132 }
6133 }
6134