1 // Copyright 2015 Dolphin Emulator Project / 2018 dynarmic project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4 
5 #include <algorithm>
6 #include <array>
7 #include <cinttypes>
8 #include <cstring>
9 #include <vector>
10 
11 #if defined(__APPLE__)
12 #include <libkern/OSCacheControl.h>
13 #endif
14 
15 #include "a64_emitter.h"
16 #include "common/assert.h"
17 #include "common/bit_util.h"
18 #include "common/cast_util.h"
19 #include "common/common_types.h"
20 #include "common/math_util.h"
21 
22 namespace Dynarmic::BackendA64::Arm64Gen {
23 
24 namespace {
25 const int kWRegSizeInBits = 32;
26 const int kXRegSizeInBits = 64;
27 
28 // The below few functions are taken from V8.
CountLeadingZeros(u64 value,int width)29 int CountLeadingZeros(u64 value, int width) {
30 #ifdef _MSC_VER
31     if (width == 64) {
32         return _CountLeadingZeros64(value);
33     }
34 #else
35     if (width == 64) {
36         return __builtin_clzll(value);
37     }
38 #endif
39     // TODO(jbramley): Optimize this for ARM64 hosts.
40     int count = 0;
41     uint64_t bit_test = 1ULL << (width - 1);
42     while ((count < width) && ((bit_test & value) == 0)) {
43         count++;
44         bit_test >>= 1;
45     }
46     return count;
47 }
48 
LargestPowerOf2Divisor(uint64_t value)49 uint64_t LargestPowerOf2Divisor(uint64_t value) {
50     return value & -(int64_t)value;
51 }
52 
53 // For ADD/SUB
IsImmArithmetic(uint64_t input,u32 * val,bool * shift)54 bool IsImmArithmetic(uint64_t input, u32* val, bool* shift) {
55     if (input < 4096) {
56         *val = static_cast<u32>(input);
57         *shift = false;
58         return true;
59     } else if ((input & 0xFFF000) == input) {
60         *val = static_cast<u32>(input >> 12);
61         *shift = true;
62         return true;
63     }
64     return false;
65 }
66 
67 // For AND/TST/ORR/EOR etc
IsImmLogical(uint64_t value,unsigned int width,unsigned int * n,unsigned int * imm_s,unsigned int * imm_r)68 bool IsImmLogical(uint64_t value, unsigned int width, unsigned int* n, unsigned int* imm_s,
69                   unsigned int* imm_r) {
70     bool negate = false;
71 
72     // Logical immediates are encoded using parameters n, imm_s and imm_r using
73     // the following table:
74     //
75     //    N   imms    immr    size        S             R
76     //    1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
77     //    0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
78     //    0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
79     //    0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
80     //    0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
81     //    0  11110s  xxxxxr     2    UInt(s)       UInt(r)
82     // (s bits must not be all set)
83     //
84     // A pattern is constructed of size bits, where the least significant S+1 bits
85     // are set. The pattern is rotated right by R, and repeated across a 32 or
86     // 64-bit value, depending on destination register width.
87     //
88     // Put another way: the basic format of a logical immediate is a single
89     // contiguous stretch of 1 bits, repeated across the whole word at intervals
90     // given by a power of 2. To identify them quickly, we first locate the
91     // lowest stretch of 1 bits, then the next 1 bit above that; that combination
92     // is different for every logical immediate, so it gives us all the
93     // information we need to identify the only logical immediate that our input
94     // could be, and then we simply check if that's the value we actually have.
95     //
96     // (The rotation parameter does give the possibility of the stretch of 1 bits
97     // going 'round the end' of the word. To deal with that, we observe that in
98     // any situation where that happens the bitwise NOT of the value is also a
99     // valid logical immediate. So we simply invert the input whenever its low bit
100     // is set, and then we know that the rotated case can't arise.)
101 
102     if (value & 1) {
103         // If the low bit is 1, negate the value, and set a flag to remember that we
104         // did (so that we can adjust the return values appropriately).
105         negate = true;
106         value = ~value;
107     }
108 
109     if (width == kWRegSizeInBits) {
110         // To handle 32-bit logical immediates, the very easiest thing is to repeat
111         // the input value twice to make a 64-bit word. The correct encoding of that
112         // as a logical immediate will also be the correct encoding of the 32-bit
113         // value.
114 
115         // The most-significant 32 bits may not be zero (ie. negate is true) so
116         // shift the value left before duplicating it.
117         value <<= kWRegSizeInBits;
118         value |= value >> kWRegSizeInBits;
119     }
120 
121     // The basic analysis idea: imagine our input word looks like this.
122     //
123     //    0011111000111110001111100011111000111110001111100011111000111110
124     //                                                          c  b    a
125     //                                                          |<--d-->|
126     //
127     // We find the lowest set bit (as an actual power-of-2 value, not its index)
128     // and call it a. Then we add a to our original number, which wipes out the
129     // bottommost stretch of set bits and replaces it with a 1 carried into the
130     // next zero bit. Then we look for the new lowest set bit, which is in
131     // position b, and subtract it, so now our number is just like the original
132     // but with the lowest stretch of set bits completely gone. Now we find the
133     // lowest set bit again, which is position c in the diagram above. Then we'll
134     // measure the distance d between bit positions a and c (using CLZ), and that
135     // tells us that the only valid logical immediate that could possibly be equal
136     // to this number is the one in which a stretch of bits running from a to just
137     // below b is replicated every d bits.
138     uint64_t a = LargestPowerOf2Divisor(value);
139     uint64_t value_plus_a = value + a;
140     uint64_t b = LargestPowerOf2Divisor(value_plus_a);
141     uint64_t value_plus_a_minus_b = value_plus_a - b;
142     uint64_t c = LargestPowerOf2Divisor(value_plus_a_minus_b);
143 
144     int d, clz_a, out_n;
145     uint64_t mask;
146 
147     if (c != 0) {
148         // The general case, in which there is more than one stretch of set bits.
149         // Compute the repeat distance d, and set up a bitmask covering the basic
150         // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
151         // of these cases the N bit of the output will be zero.
152         clz_a = CountLeadingZeros(a, kXRegSizeInBits);
153         int clz_c = CountLeadingZeros(c, kXRegSizeInBits);
154         d = clz_a - clz_c;
155         mask = ((UINT64_C(1) << d) - 1);
156         out_n = 0;
157     } else {
158         // Handle degenerate cases.
159         //
160         // If any of those 'find lowest set bit' operations didn't find a set bit at
161         // all, then the word will have been zero thereafter, so in particular the
162         // last lowest_set_bit operation will have returned zero. So we can test for
163         // all the special case conditions in one go by seeing if c is zero.
164         if (a == 0) {
165             // The input was zero (or all 1 bits, which will come to here too after we
166             // inverted it at the start of the function), for which we just return
167             // false.
168             return false;
169         } else {
170             // Otherwise, if c was zero but a was not, then there's just one stretch
171             // of set bits in our word, meaning that we have the trivial case of
172             // d == 64 and only one 'repetition'. Set up all the same variables as in
173             // the general case above, and set the N bit in the output.
174             clz_a = CountLeadingZeros(a, kXRegSizeInBits);
175             d = 64;
176             mask = ~UINT64_C(0);
177             out_n = 1;
178         }
179     }
180 
181     // If the repeat period d is not a power of two, it can't be encoded.
182     if (!Dynarmic::Common::IsPow2<u64>(d))
183         return false;
184 
185     // If the bit stretch (b - a) does not fit within the mask derived from the
186     // repeat period, then fail.
187     if (((b - a) & ~mask) != 0)
188         return false;
189 
190     // The only possible option is b - a repeated every d bits. Now we're going to
191     // actually construct the valid logical immediate derived from that
192     // specification, and see if it equals our original input.
193     //
194     // To repeat a value every d bits, we multiply it by a number of the form
195     // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
196     // be derived using a table lookup on CLZ(d).
197     static const std::array<uint64_t, 6> multipliers = {{
198         0x0000000000000001UL,
199         0x0000000100000001UL,
200         0x0001000100010001UL,
201         0x0101010101010101UL,
202         0x1111111111111111UL,
203         0x5555555555555555UL,
204     }};
205 
206     int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;
207 
208     // Ensure that the index to the multipliers array is within bounds.
209     DEBUG_ASSERT((multiplier_idx >= 0) &&
210                  (static_cast<size_t>(multiplier_idx) < multipliers.size()));
211 
212     uint64_t multiplier = multipliers[multiplier_idx];
213     uint64_t candidate = (b - a) * multiplier;
214 
215     // The candidate pattern doesn't match our input value, so fail.
216     if (value != candidate)
217         return false;
218 
219     // We have a match! This is a valid logical immediate, so now we have to
220     // construct the bits and pieces of the instruction encoding that generates
221     // it.
222 
223     // Count the set bits in our basic stretch. The special case of clz(0) == -1
224     // makes the answer come out right for stretches that reach the very top of
225     // the word (e.g. numbers like 0xffffc00000000000).
226     int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits);
227     int s = clz_a - clz_b;
228 
229     // Decide how many bits to rotate right by, to put the low bit of that basic
230     // stretch in position a.
231     int r;
232     if (negate) {
233         // If we inverted the input right at the start of this function, here's
234         // where we compensate: the number of set bits becomes the number of clear
235         // bits, and the rotation count is based on position b rather than position
236         // a (since b is the location of the 'lowest' 1 bit after inversion).
237         s = d - s;
238         r = (clz_b + 1) & (d - 1);
239     } else {
240         r = (clz_a + 1) & (d - 1);
241     }
242 
243     // Now we're done, except for having to encode the S output in such a way that
244     // it gives both the number of set bits and the length of the repeated
245     // segment. The s field is encoded like this:
246     //
247     //     imms    size        S
248     //    ssssss    64    UInt(ssssss)
249     //    0sssss    32    UInt(sssss)
250     //    10ssss    16    UInt(ssss)
251     //    110sss     8    UInt(sss)
252     //    1110ss     4    UInt(ss)
253     //    11110s     2    UInt(s)
254     //
255     // So we 'or' (-d << 1) with our computed s to form imms.
256     *n = out_n;
257     *imm_s = ((-d << 1) | (s - 1)) & 0x3f;
258     *imm_r = r;
259 
260     return true;
261 }
262 
FPImm8ToFloat(u8 bits)263 float FPImm8ToFloat(u8 bits) {
264     const u32 sign = bits >> 7;
265     const u32 bit6 = (bits >> 6) & 1;
266     const u32 exp = ((!bit6) << 7) | (0x7C * bit6) | ((bits >> 4) & 3);
267     const u32 mantissa = (bits & 0xF) << 19;
268     const u32 f = (sign << 31) | (exp << 23) | mantissa;
269 
270     return Dynarmic::Common::BitCast<float>(f);
271 }
272 
FPImm8FromFloat(float value,u8 * imm_out)273 bool FPImm8FromFloat(float value, u8* imm_out) {
274     const u32 f = Dynarmic::Common::BitCast<u32>(value);
275     const u32 mantissa4 = (f & 0x7FFFFF) >> 19;
276     const u32 exponent = (f >> 23) & 0xFF;
277     const u32 sign = f >> 31;
278 
279     if ((exponent >> 7) == ((exponent >> 6) & 1))
280         return false;
281 
282     const u8 imm8 = static_cast<u8>((sign << 7) | ((!(exponent >> 7)) << 6) |
283                                     ((exponent & 3) << 4) | mantissa4);
284     const float new_float = FPImm8ToFloat(imm8);
285     if (new_float == value)
286         *imm_out = imm8;
287     else
288         return false;
289 
290     return true;
291 }
292 
IsInRangeImm19(s64 distance)293 static constexpr bool IsInRangeImm19(s64 distance) {
294     return (distance >= -0x40000 && distance <= 0x3FFFF);
295 }
296 
IsInRangeImm14(s64 distance)297 static constexpr bool IsInRangeImm14(s64 distance) {
298     return (distance >= -0x2000 && distance <= 0x1FFF);
299 }
300 
IsInRangeImm26(s64 distance)301 static constexpr bool IsInRangeImm26(s64 distance) {
302     return (distance >= -0x2000000 && distance <= 0x1FFFFFF);
303 }
304 
MaskImm19(s64 distance)305 static constexpr u32 MaskImm19(s64 distance) {
306     return distance & 0x7FFFF;
307 }
308 
MaskImm14(s64 distance)309 static constexpr u32 MaskImm14(s64 distance) {
310     return distance & 0x3FFF;
311 }
312 
MaskImm26(s64 distance)313 static constexpr u32 MaskImm26(s64 distance) {
314     return distance & 0x3FFFFFF;
315 }
316 
317 } // Anonymous namespace
318 
SetCodePtrUnsafe(u8 * ptr)319 void ARM64XEmitter::SetCodePtrUnsafe(u8* ptr) {
320     m_code = ptr;
321 }
322 
SetCodePtr(u8 * ptr)323 void ARM64XEmitter::SetCodePtr(u8* ptr) {
324     SetCodePtrUnsafe(ptr);
325     m_lastCacheFlushEnd = ptr;
326 }
327 
GetCodePtr() const328 const u8* ARM64XEmitter::GetCodePtr() const {
329     return m_code;
330 }
331 
GetWritableCodePtr()332 u8* ARM64XEmitter::GetWritableCodePtr() {
333     return m_code;
334 }
335 
ReserveCodeSpace(u32 bytes)336 void ARM64XEmitter::ReserveCodeSpace(u32 bytes) {
337     for (u32 i = 0; i < bytes / 4; i++)
338         BRK(0);
339 }
340 
AlignCode16()341 const u8* ARM64XEmitter::AlignCode16() {
342     int c = int((u64)m_code & 15);
343     if (c)
344         ReserveCodeSpace(16 - c);
345     return m_code;
346 }
347 
AlignCodePage()348 const u8* ARM64XEmitter::AlignCodePage() {
349     int c = int((u64)m_code & 4095);
350     if (c)
351         ReserveCodeSpace(4096 - c);
352     return m_code;
353 }
354 
Write32(u32 value)355 void ARM64XEmitter::Write32(u32 value) {
356     std::memcpy(m_code, &value, sizeof(u32));
357     m_code += sizeof(u32);
358 }
359 
FlushIcache()360 void ARM64XEmitter::FlushIcache() {
361     FlushIcacheSection(m_lastCacheFlushEnd, m_code);
362     m_lastCacheFlushEnd = m_code;
363 }
364 
FlushIcacheSection(const u8 * start,const u8 * end)365 void ARM64XEmitter::FlushIcacheSection(const u8* start, const u8* end) {
366     if (start == end)
367         return;
368 
369 #if defined(__APPLE__)
370     // Header file says this is equivalent to: sys_icache_invalidate(start, end -
371     // start);
372     sys_cache_control(kCacheFunctionPrepareForExecution, const_cast<u8*>(start), end - start);
373 #else
374     // Don't rely on GCC's __clear_cache implementation, as it caches
375     // icache/dcache cache line sizes, that can vary between cores on
376     // big.LITTLE architectures.
377     u64 addr, ctr_el0;
378     static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
379     size_t isize, dsize;
380 
381     __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
382     isize = 4 << ((ctr_el0 >> 0) & 0xf);
383     dsize = 4 << ((ctr_el0 >> 16) & 0xf);
384 
385     // use the global minimum cache line size
386     icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
387     dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
388 
389     addr = reinterpret_cast<u64>(start) & ~static_cast<u64>(dsize - 1);
390     for (; addr < reinterpret_cast<u64>(end); addr += dsize)
391         // use "civac" instead of "cvau", as this is the suggested workaround for
392         // Cortex-A53 errata 819472, 826319, 827319 and 824069.
393         __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
394     __asm__ volatile("dsb ish" : : : "memory");
395 
396     addr = reinterpret_cast<u64>(start) & ~static_cast<u64>(isize - 1);
397     for (; addr < reinterpret_cast<u64>(end); addr += isize)
398         __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
399 
400     __asm__ volatile("dsb ish" : : : "memory");
401     __asm__ volatile("isb" : : : "memory");
402 #endif
403 }
404 
405 // Exception generation
406 static const u32 ExcEnc[][3] = {
407     {0, 0, 1}, // SVC
408     {0, 0, 2}, // HVC
409     {0, 0, 3}, // SMC
410     {1, 0, 0}, // BRK
411     {2, 0, 0}, // HLT
412     {5, 0, 1}, // DCPS1
413     {5, 0, 2}, // DCPS2
414     {5, 0, 3}, // DCPS3
415 };
416 
417 // Arithmetic generation
418 static const u32 ArithEnc[] = {
419     0x058, // ADD
420     0x258, // SUB
421 };
422 
423 // Conditional Select
424 static const u32 CondSelectEnc[][2] = {
425     {0, 0}, // CSEL
426     {0, 1}, // CSINC
427     {1, 0}, // CSINV
428     {1, 1}, // CSNEG
429 };
430 
431 // Data-Processing (1 source)
432 static const u32 Data1SrcEnc[][2] = {
433     {0, 0}, // RBIT
434     {0, 1}, // REV16
435     {0, 2}, // REV32
436     {0, 3}, // REV64
437     {0, 4}, // CLZ
438     {0, 5}, // CLS
439 };
440 
441 // Data-Processing (2 source)
442 static const u32 Data2SrcEnc[] = {
443     0x02, // UDIV
444     0x03, // SDIV
445     0x08, // LSLV
446     0x09, // LSRV
447     0x0A, // ASRV
448     0x0B, // RORV
449     0x10, // CRC32B
450     0x11, // CRC32H
451     0x12, // CRC32W
452     0x14, // CRC32CB
453     0x15, // CRC32CH
454     0x16, // CRC32CW
455     0x13, // CRC32X (64bit Only)
456     0x17, // XRC32CX (64bit Only)
457 };
458 
459 // Data-Processing (3 source)
460 static const u32 Data3SrcEnc[][2] = {
461     {0, 0}, // MADD
462     {0, 1}, // MSUB
463     {1, 0}, // SMADDL (64Bit Only)
464     {1, 1}, // SMSUBL (64Bit Only)
465     {2, 0}, // SMULH (64Bit Only)
466     {5, 0}, // UMADDL (64Bit Only)
467     {5, 1}, // UMSUBL (64Bit Only)
468     {6, 0}, // UMULH (64Bit Only)
469 };
470 
471 // Logical (shifted register)
472 static const u32 LogicalEnc[][2] = {
473     {0, 0}, // AND
474     {0, 1}, // BIC
475     {1, 0}, // OOR
476     {1, 1}, // ORN
477     {2, 0}, // EOR
478     {2, 1}, // EON
479     {3, 0}, // ANDS
480     {3, 1}, // BICS
481 };
482 
483 // Load/Store Exclusive
484 static const u32 LoadStoreExcEnc[][5] = {
485     {0, 0, 0, 0, 0}, // STXRB
486     {0, 0, 0, 0, 1}, // STLXRB
487     {0, 0, 1, 0, 0}, // LDXRB
488     {0, 0, 1, 0, 1}, // LDAXRB
489     {0, 1, 0, 0, 1}, // STLRB
490     {0, 1, 1, 0, 1}, // LDARB
491     {1, 0, 0, 0, 0}, // STXRH
492     {1, 0, 0, 0, 1}, // STLXRH
493     {1, 0, 1, 0, 0}, // LDXRH
494     {1, 0, 1, 0, 1}, // LDAXRH
495     {1, 1, 0, 0, 1}, // STLRH
496     {1, 1, 1, 0, 1}, // LDARH
497     {2, 0, 0, 0, 0}, // STXR
498     {3, 0, 0, 0, 0}, // (64bit) STXR
499     {2, 0, 0, 0, 1}, // STLXR
500     {3, 0, 0, 0, 1}, // (64bit) STLXR
501     {2, 0, 0, 1, 0}, // STXP
502     {3, 0, 0, 1, 0}, // (64bit) STXP
503     {2, 0, 0, 1, 1}, // STLXP
504     {3, 0, 0, 1, 1}, // (64bit) STLXP
505     {2, 0, 1, 0, 0}, // LDXR
506     {3, 0, 1, 0, 0}, // (64bit) LDXR
507     {2, 0, 1, 0, 1}, // LDAXR
508     {3, 0, 1, 0, 1}, // (64bit) LDAXR
509     {2, 0, 1, 1, 0}, // LDXP
510     {3, 0, 1, 1, 0}, // (64bit) LDXP
511     {2, 0, 1, 1, 1}, // LDAXP
512     {3, 0, 1, 1, 1}, // (64bit) LDAXP
513     {2, 1, 0, 0, 1}, // STLR
514     {3, 1, 0, 0, 1}, // (64bit) STLR
515     {2, 1, 1, 0, 1}, // LDAR
516     {3, 1, 1, 0, 1}, // (64bit) LDAR
517 };
518 
EncodeCompareBranchInst(u32 op,ARM64Reg Rt,const void * ptr)519 void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr) {
520     bool b64Bit = Is64Bit(Rt);
521     s64 distance = reinterpret_cast<s64>(ptr) - reinterpret_cast<s64>(m_code);
522 
523     ASSERT_MSG(!(distance & 0x3), "%s: distance must be a multiple of 4: %" PRIx64, __func__,
524                distance);
525 
526     distance >>= 2;
527 
528     ASSERT_MSG(distance >= -0x40000 && distance <= 0x3FFFF,
529                "%s: Received too large distance: %" PRIx64, __func__, distance);
530 
531     Rt = DecodeReg(Rt);
532     Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) |
533             ((static_cast<u32>(distance) << 5) & 0xFFFFE0) | Rt);
534 }
535 
EncodeTestBranchInst(u32 op,ARM64Reg Rt,u8 bits,const void * ptr)536 void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr) {
537     bool b64Bit = Is64Bit(Rt);
538     s64 distance = reinterpret_cast<s64>(ptr) - reinterpret_cast<s64>(m_code);
539 
540     ASSERT_MSG(!(distance & 0x3), "%s: distance must be a multiple of 4: %" PRIx64, __func__,
541                distance);
542 
543     distance >>= 2;
544 
545     ASSERT_MSG(distance >= -0x3FFF && distance < 0x3FFF,
546                "%s: Received too large distance: %" PRIx64, __func__, distance);
547 
548     Rt = DecodeReg(Rt);
549     Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | (bits << 19) |
550             ((static_cast<u32>(distance) << 5) & 0x7FFE0) | Rt);
551 }
552 
EncodeUnconditionalBranchInst(u32 op,const void * ptr)553 void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr) {
554     s64 distance = reinterpret_cast<s64>(ptr) - reinterpret_cast<s64>(m_code);
555 
556     ASSERT_MSG(!(distance & 0x3), "%s: distance must be a multiple of 4: %" PRIx64, __func__,
557                distance);
558 
559     distance >>= 2;
560 
561     ASSERT_MSG(distance >= -0x2000000LL && distance <= 0x1FFFFFFLL,
562                "%s: Received too large distance: %" PRIx64, __func__, distance);
563 
564     Write32((op << 31) | (0x5 << 26) | (distance & 0x3FFFFFF));
565 }
566 
EncodeUnconditionalBranchInst(u32 opc,u32 op2,u32 op3,u32 op4,ARM64Reg Rn)567 void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn) {
568     Rn = DecodeReg(Rn);
569     Write32((0x6B << 25) | (opc << 21) | (op2 << 16) | (op3 << 10) | (Rn << 5) | op4);
570 }
571 
EncodeExceptionInst(u32 instenc,u32 imm)572 void ARM64XEmitter::EncodeExceptionInst(u32 instenc, u32 imm) {
573     ASSERT_MSG(!(imm & ~0xFFFF), "%s: Exception instruction too large immediate: %d", __func__,
574                imm);
575 
576     Write32((0xD4 << 24) | (ExcEnc[instenc][0] << 21) | (imm << 5) | (ExcEnc[instenc][1] << 2) |
577             ExcEnc[instenc][2]);
578 }
579 
EncodeSystemInst(u32 op0,u32 op1,u32 CRn,u32 CRm,u32 op2,ARM64Reg Rt)580 void ARM64XEmitter::EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt) {
581     Write32((0x354 << 22) | (op0 << 19) | (op1 << 16) | (CRn << 12) | (CRm << 8) | (op2 << 5) | Rt);
582 }
583 
EncodeArithmeticInst(u32 instenc,bool flags,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Option)584 void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn,
585                                          ARM64Reg Rm, ArithOption Option) {
586     bool b64Bit = Is64Bit(Rd);
587 
588     Rd = DecodeReg(Rd);
589     Rn = DecodeReg(Rn);
590     Rm = DecodeReg(Rm);
591     Write32((b64Bit << 31) | (flags << 29) | (ArithEnc[instenc] << 21) |
592             (Option.GetType() == ArithOption::TYPE_EXTENDEDREG ? (1 << 21) : 0) | (Rm << 16) |
593             Option.GetData() | (Rn << 5) | Rd);
594 }
595 
EncodeArithmeticCarryInst(u32 op,bool flags,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)596 void ARM64XEmitter::EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn,
597                                               ARM64Reg Rm) {
598     bool b64Bit = Is64Bit(Rd);
599 
600     Rd = DecodeReg(Rd);
601     Rm = DecodeReg(Rm);
602     Rn = DecodeReg(Rn);
603     Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0xD0 << 21) | (Rm << 16) | (Rn << 5) |
604             Rd);
605 }
606 
EncodeCondCompareImmInst(u32 op,ARM64Reg Rn,u32 imm,u32 nzcv,CCFlags cond)607 void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) {
608     bool b64Bit = Is64Bit(Rn);
609 
610     ASSERT_MSG(!(imm & ~0x1F), "%s: too large immediate: %d", __func__, imm);
611     ASSERT_MSG(!(nzcv & ~0xF), "%s: Flags out of range: %d", __func__, nzcv);
612 
613     Rn = DecodeReg(Rn);
614     Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (imm << 16) | (cond << 12) |
615             (1 << 11) | (Rn << 5) | nzcv);
616 }
617 
EncodeCondCompareRegInst(u32 op,ARM64Reg Rn,ARM64Reg Rm,u32 nzcv,CCFlags cond)618 void ARM64XEmitter::EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv,
619                                              CCFlags cond) {
620     bool b64Bit = Is64Bit(Rm);
621 
622     ASSERT_MSG(!(nzcv & ~0xF), "%s: Flags out of range: %d", __func__, nzcv);
623 
624     Rm = DecodeReg(Rm);
625     Rn = DecodeReg(Rn);
626     Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (Rm << 16) | (cond << 12) |
627             (Rn << 5) | nzcv);
628 }
629 
EncodeCondSelectInst(u32 instenc,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,CCFlags cond)630 void ARM64XEmitter::EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm,
631                                          CCFlags cond) {
632     bool b64Bit = Is64Bit(Rd);
633 
634     Rd = DecodeReg(Rd);
635     Rm = DecodeReg(Rm);
636     Rn = DecodeReg(Rn);
637     Write32((b64Bit << 31) | (CondSelectEnc[instenc][0] << 30) | (0xD4 << 21) | (Rm << 16) |
638             (cond << 12) | (CondSelectEnc[instenc][1] << 10) | (Rn << 5) | Rd);
639 }
640 
EncodeData1SrcInst(u32 instenc,ARM64Reg Rd,ARM64Reg Rn)641 void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn) {
642     bool b64Bit = Is64Bit(Rd);
643 
644     Rd = DecodeReg(Rd);
645     Rn = DecodeReg(Rn);
646     Write32((b64Bit << 31) | (0x2D6 << 21) | (Data1SrcEnc[instenc][0] << 16) |
647             (Data1SrcEnc[instenc][1] << 10) | (Rn << 5) | Rd);
648 }
649 
EncodeData2SrcInst(u32 instenc,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)650 void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
651     bool b64Bit = Is64Bit(Rd);
652 
653     Rd = DecodeReg(Rd);
654     Rm = DecodeReg(Rm);
655     Rn = DecodeReg(Rn);
656     Write32((b64Bit << 31) | (0x0D6 << 21) | (Rm << 16) | (Data2SrcEnc[instenc] << 10) | (Rn << 5) |
657             Rd);
658 }
659 
EncodeData3SrcInst(u32 instenc,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)660 void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm,
661                                        ARM64Reg Ra) {
662     bool b64Bit = Is64Bit(Rd);
663 
664     Rd = DecodeReg(Rd);
665     Rm = DecodeReg(Rm);
666     Rn = DecodeReg(Rn);
667     Ra = DecodeReg(Ra);
668     Write32((b64Bit << 31) | (0xD8 << 21) | (Data3SrcEnc[instenc][0] << 21) | (Rm << 16) |
669             (Data3SrcEnc[instenc][1] << 15) | (Ra << 10) | (Rn << 5) | Rd);
670 }
671 
EncodeLogicalInst(u32 instenc,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Shift)672 void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm,
673                                       ArithOption Shift) {
674     bool b64Bit = Is64Bit(Rd);
675 
676     Rd = DecodeReg(Rd);
677     Rm = DecodeReg(Rm);
678     Rn = DecodeReg(Rn);
679     Write32((b64Bit << 31) | (LogicalEnc[instenc][0] << 29) | (0x5 << 25) |
680             (LogicalEnc[instenc][1] << 21) | Shift.GetData() | (Rm << 16) | (Rn << 5) | Rd);
681 }
682 
EncodeLoadRegisterInst(u32 bitop,ARM64Reg Rt,s32 imm)683 void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, s32 imm) {
684     bool b64Bit = Is64Bit(Rt);
685     bool bVec = IsVector(Rt);
686 
687     ASSERT_MSG(IsInRangeImm19(imm), "{}: offset too large {}", __func__, imm);
688 
689     Rt = DecodeReg(Rt);
690     if (b64Bit && bitop != 0x2) // LDRSW(0x2) uses 64bit reg, doesn't have 64bit bit set
691         bitop |= 0x1;
692     Write32((bitop << 30) | (bVec << 26) | (0x18 << 24) | (MaskImm19(imm) << 5) | Rt);
693 }
694 
EncodeLoadStoreExcInst(u32 instenc,ARM64Reg Rs,ARM64Reg Rt2,ARM64Reg Rn,ARM64Reg Rt)695 void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn,
696                                            ARM64Reg Rt) {
697     Rs = DecodeReg(Rs);
698     Rt2 = DecodeReg(Rt2);
699     Rn = DecodeReg(Rn);
700     Rt = DecodeReg(Rt);
701     Write32((LoadStoreExcEnc[instenc][0] << 30) | (0x8 << 24) |
702             (LoadStoreExcEnc[instenc][1] << 23) | (LoadStoreExcEnc[instenc][2] << 22) |
703             (LoadStoreExcEnc[instenc][3] << 21) | (Rs << 16) | (LoadStoreExcEnc[instenc][4] << 15) |
704             (Rt2 << 10) | (Rn << 5) | Rt);
705 }
706 
EncodeLoadStorePairedInst(u32 op,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,u32 imm)707 void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn,
708                                               u32 imm) {
709     bool b64Bit = Is64Bit(Rt);
710     bool b128Bit = IsQuad(Rt);
711     bool bVec = IsVector(Rt);
712 
713     if (b128Bit)
714         imm >>= 4;
715     else if (b64Bit)
716         imm >>= 3;
717     else
718         imm >>= 2;
719 
720     ASSERT_MSG(!(imm & ~0xF), "%s: offset too large %d", __func__, imm);
721 
722     u32 opc = 0;
723     if (b128Bit)
724         opc = 2;
725     else if (b64Bit && bVec)
726         opc = 1;
727     else if (b64Bit && !bVec)
728         opc = 2;
729 
730     Rt = DecodeReg(Rt);
731     Rt2 = DecodeReg(Rt2);
732     Rn = DecodeReg(Rn);
733     Write32((opc << 30) | (bVec << 26) | (op << 22) | (imm << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
734 }
735 
EncodeLoadStoreIndexedInst(u32 op,u32 op2,ARM64Reg Rt,ARM64Reg Rn,s32 imm)736 void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
737     bool b64Bit = Is64Bit(Rt);
738     bool bVec = IsVector(Rt);
739 
740     u32 offset = imm & 0x1FF;
741 
742     ASSERT_MSG(!(imm < -256 || imm > 255), "%s: offset too large %d", __func__, imm);
743 
744     Rt = DecodeReg(Rt);
745     Rn = DecodeReg(Rn);
746     Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (offset << 12) | (op2 << 10) | (Rn << 5) |
747             Rt);
748 }
749 
EncodeLoadStoreIndexedInst(u32 op,ARM64Reg Rt,ARM64Reg Rn,s32 imm,u8 size)750 void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size) {
751     bool b64Bit = Is64Bit(Rt);
752     bool bVec = IsVector(Rt);
753 
754     if (size == 64)
755         imm >>= 3;
756     else if (size == 32)
757         imm >>= 2;
758     else if (size == 16)
759         imm >>= 1;
760 
761     ASSERT_MSG(imm >= 0, "%s(INDEX_UNSIGNED): offset must be positive %d", __func__, imm);
762     ASSERT_MSG(!(imm & ~0xFFF), "%s(INDEX_UNSIGNED): offset too large %d", __func__, imm);
763 
764     Rt = DecodeReg(Rt);
765     Rn = DecodeReg(Rn);
766     Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (imm << 10) | (Rn << 5) | Rt);
767 }
768 
EncodeMOVWideInst(u32 op,ARM64Reg Rd,u32 imm,ShiftAmount pos)769 void ARM64XEmitter::EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos) {
770     bool b64Bit = Is64Bit(Rd);
771 
772     ASSERT_MSG(!(imm & ~0xFFFF), "%s: immediate out of range: %d", __func__, imm);
773 
774     Rd = DecodeReg(Rd);
775     Write32((b64Bit << 31) | (op << 29) | (0x25 << 23) | (pos << 21) | (imm << 5) | Rd);
776 }
777 
EncodeBitfieldMOVInst(u32 op,ARM64Reg Rd,ARM64Reg Rn,u32 immr,u32 imms)778 void ARM64XEmitter::EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) {
779     bool b64Bit = Is64Bit(Rd);
780 
781     Rd = DecodeReg(Rd);
782     Rn = DecodeReg(Rn);
783     Write32((b64Bit << 31) | (op << 29) | (0x26 << 23) | (b64Bit << 22) | (immr << 16) |
784             (imms << 10) | (Rn << 5) | Rd);
785 }
786 
EncodeLoadStoreRegisterOffset(u32 size,u32 opc,ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)787 void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn,
788                                                   ArithOption Rm) {
789     ASSERT_MSG(Rm.GetType() == ArithOption::TYPE_EXTENDEDREG, "Shifted registers are not supported used Indexed registers");
790 
791     Rt = DecodeReg(Rt);
792     Rn = DecodeReg(Rn);
793     ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());
794 
795     Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (decoded_Rm << 16) | Rm.GetData() |
796             (1 << 11) | (Rn << 5) | Rt);
797 }
798 
EncodeAddSubImmInst(u32 op,bool flags,u32 shift,u32 imm,ARM64Reg Rn,ARM64Reg Rd)799 void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn,
800                                         ARM64Reg Rd) {
801     bool b64Bit = Is64Bit(Rd);
802 
803     ASSERT_MSG(!(imm & ~0xFFF), "%s: immediate too large: %x", __func__, imm);
804 
805     Rd = DecodeReg(Rd);
806     Rn = DecodeReg(Rn);
807     Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0x11 << 24) | (shift << 22) |
808             (imm << 10) | (Rn << 5) | Rd);
809 }
810 
EncodeLogicalImmInst(u32 op,ARM64Reg Rd,ARM64Reg Rn,u32 immr,u32 imms,int n)811 void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms,
812                                          int n) {
813     // Sometimes Rd is fixed to SP, but can still be 32bit or 64bit.
814     // Use Rn to determine bitness here.
815     bool b64Bit = Is64Bit(Rn);
816 
817     Rd = DecodeReg(Rd);
818     Rn = DecodeReg(Rn);
819 
820     Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | (immr << 16) | (imms << 10) |
821             (Rn << 5) | Rd);
822 }
823 
EncodeLoadStorePair(u32 op,u32 load,IndexType type,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,s32 imm)824 void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2,
825                                         ARM64Reg Rn, s32 imm) {
826     bool b64Bit = Is64Bit(Rt);
827     u32 type_encode = 0;
828 
829     switch (type) {
830     case INDEX_SIGNED:
831         type_encode = 0b010;
832         break;
833     case INDEX_POST:
834         type_encode = 0b001;
835         break;
836     case INDEX_PRE:
837         type_encode = 0b011;
838         break;
839     case INDEX_UNSIGNED:
840         ASSERT_MSG(false, "%s doesn't support INDEX_UNSIGNED!", __func__);
841         break;
842     }
843 
844     if (b64Bit) {
845         op |= 0b10;
846         imm >>= 3;
847     } else {
848         imm >>= 2;
849     }
850 
851     Rt = DecodeReg(Rt);
852     Rt2 = DecodeReg(Rt2);
853     Rn = DecodeReg(Rn);
854 
855     Write32((op << 30) | (0b101 << 27) | (type_encode << 23) | (load << 22) | ((imm & 0x7F) << 15) |
856             (Rt2 << 10) | (Rn << 5) | Rt);
857 }
EncodeAddressInst(u32 op,ARM64Reg Rd,s32 imm)858 void ARM64XEmitter::EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm) {
859     Rd = DecodeReg(Rd);
860 
861     Write32((op << 31) | ((imm & 0x3) << 29) | (0x10 << 24) | ((imm & 0x1FFFFC) << 3) | Rd);
862 }
863 
EncodeLoadStoreUnscaled(u32 size,u32 op,ARM64Reg Rt,ARM64Reg Rn,s32 imm)864 void ARM64XEmitter::EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
865     ASSERT_MSG(!(imm < -256 || imm > 255), "%s received too large offset: %d", __func__, imm);
866     Rt = DecodeReg(Rt);
867     Rn = DecodeReg(Rn);
868 
869     Write32((size << 30) | (0b111 << 27) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt);
870 }
871 
872 // FixupBranch branching
SetJumpTarget(FixupBranch const & branch,u8 * target)873 void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch, u8* target) {
874     if(!target)
875         target = m_code;
876     bool Not = false;
877     u32 inst = 0;
878     s64 distance = static_cast<s64>(target - branch.ptr);
879     distance >>= 2;
880 
881     switch (branch.type) {
882     case 1: // CBNZ
883         Not = true;
884         [[fallthrough]];
885     case 0: // CBZ
886     {
887         ASSERT_MSG(IsInRangeImm19(distance), "%s(%d): Received too large distance: %" PRIx64,
888                    __func__, branch.type, distance);
889         bool b64Bit = Is64Bit(branch.reg);
890         ARM64Reg reg = DecodeReg(branch.reg);
891         inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | (MaskImm19(distance) << 5) | reg;
892     } break;
893     case 2: // B (conditional)
894         ASSERT_MSG(IsInRangeImm19(distance), "%s(%d): Received too large distance: %" PRIx64,
895                    __func__, branch.type, distance);
896         inst = (0x2A << 25) | (MaskImm19(distance) << 5) | branch.cond;
897         break;
898     case 4: // TBNZ
899         Not = true;
900         [[fallthrough]];
901     case 3: // TBZ
902     {
903         ASSERT_MSG(IsInRangeImm14(distance), "%s(%d): Received too large distance: %" PRIx64,
904                    __func__, branch.type, distance);
905         ARM64Reg reg = DecodeReg(branch.reg);
906         inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) |
907                ((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg;
908     } break;
909     case 5: // B (unconditional)
910         ASSERT_MSG(IsInRangeImm26(distance), "%s(%d): Received too large distance: %" PRIx64,
911                    __func__, branch.type, distance);
912         inst = (0x5 << 26) | MaskImm26(distance);
913         break;
914     case 6: // BL (unconditional)
915         ASSERT_MSG(IsInRangeImm26(distance), "%s(%d): Received too large distance: %" PRIx64,
916                    __func__, branch.type, distance);
917         inst = (0x25 << 26) | MaskImm26(distance);
918         break;
919     }
920     std::memcpy(branch.ptr, &inst, sizeof(inst));
921 }
922 
CBZ(ARM64Reg Rt)923 FixupBranch ARM64XEmitter::CBZ(ARM64Reg Rt) {
924     FixupBranch branch;
925     branch.ptr = m_code;
926     branch.type = 0;
927     branch.reg = Rt;
928     HINT(HINT_NOP);
929     return branch;
930 }
CBNZ(ARM64Reg Rt)931 FixupBranch ARM64XEmitter::CBNZ(ARM64Reg Rt) {
932     FixupBranch branch;
933     branch.ptr = m_code;
934     branch.type = 1;
935     branch.reg = Rt;
936     HINT(HINT_NOP);
937     return branch;
938 }
B(CCFlags cond)939 FixupBranch ARM64XEmitter::B(CCFlags cond) {
940     FixupBranch branch;
941     branch.ptr = m_code;
942     branch.type = 2;
943     branch.cond = cond;
944     HINT(HINT_NOP);
945     return branch;
946 }
TBZ(ARM64Reg Rt,u8 bit)947 FixupBranch ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bit) {
948     FixupBranch branch;
949     branch.ptr = m_code;
950     branch.type = 3;
951     branch.reg = Rt;
952     branch.bit = bit;
953     HINT(HINT_NOP);
954     return branch;
955 }
TBNZ(ARM64Reg Rt,u8 bit)956 FixupBranch ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bit) {
957     FixupBranch branch;
958     branch.ptr = m_code;
959     branch.type = 4;
960     branch.reg = Rt;
961     branch.bit = bit;
962     HINT(HINT_NOP);
963     return branch;
964 }
B()965 FixupBranch ARM64XEmitter::B() {
966     FixupBranch branch;
967     branch.ptr = m_code;
968     branch.type = 5;
969     HINT(HINT_NOP);
970     return branch;
971 }
BL()972 FixupBranch ARM64XEmitter::BL() {
973     FixupBranch branch;
974     branch.ptr = m_code;
975     branch.type = 6;
976     HINT(HINT_NOP);
977     return branch;
978 }
979 
980 // Compare and Branch
CBZ(ARM64Reg Rt,const void * ptr)981 void ARM64XEmitter::CBZ(ARM64Reg Rt, const void* ptr) {
982     EncodeCompareBranchInst(0, Rt, ptr);
983 }
CBNZ(ARM64Reg Rt,const void * ptr)984 void ARM64XEmitter::CBNZ(ARM64Reg Rt, const void* ptr) {
985     EncodeCompareBranchInst(1, Rt, ptr);
986 }
987 
988 // Conditional Branch
B(CCFlags cond,const void * ptr)989 void ARM64XEmitter::B(CCFlags cond, const void* ptr) {
990     s64 distance = reinterpret_cast<s64>(ptr) - reinterpret_cast<s64>(m_code);
991 
992     distance >>= 2;
993 
994     ASSERT_MSG(IsInRangeImm19(distance),
995                "%s: Received too large distance: %p->%p %" PRIi64 " %" PRIx64, __func__, m_code,
996                ptr, distance, distance);
997     Write32((0x54 << 24) | (MaskImm19(distance) << 5) | cond);
998 }
999 
1000 // Test and Branch
TBZ(ARM64Reg Rt,u8 bits,const void * ptr)1001 void ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bits, const void* ptr) {
1002     EncodeTestBranchInst(0, Rt, bits, ptr);
1003 }
TBNZ(ARM64Reg Rt,u8 bits,const void * ptr)1004 void ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bits, const void* ptr) {
1005     EncodeTestBranchInst(1, Rt, bits, ptr);
1006 }
1007 
1008 // Unconditional Branch
B(const void * ptr)1009 void ARM64XEmitter::B(const void* ptr) {
1010     EncodeUnconditionalBranchInst(0, ptr);
1011 }
BL(const void * ptr)1012 void ARM64XEmitter::BL(const void* ptr) {
1013     EncodeUnconditionalBranchInst(1, ptr);
1014 }
1015 
QuickCallFunction(const void * func,ARM64Reg scratchreg)1016 void ARM64XEmitter::QuickCallFunction(const void* func, ARM64Reg scratchreg) {
1017     s64 distance = reinterpret_cast<s64>(func) - reinterpret_cast<s64>(m_code);
1018     distance >>= 2; // Can only branch to opcode-aligned (4) addresses
1019     if (!IsInRangeImm26(distance)) {
1020         // WARN_LOG( "Distance too far in function call (%p to %p)! Using scratch.",
1021         // m_code, func);
1022         MOVI2R(scratchreg, reinterpret_cast<uintptr_t>(func));
1023         BLR(scratchreg);
1024     } else {
1025         BL(func);
1026     }
1027 }
1028 
1029 // Unconditional Branch (register)
BR(ARM64Reg Rn)1030 void ARM64XEmitter::BR(ARM64Reg Rn) {
1031     EncodeUnconditionalBranchInst(0, 0x1F, 0, 0, Rn);
1032 }
BLR(ARM64Reg Rn)1033 void ARM64XEmitter::BLR(ARM64Reg Rn) {
1034     EncodeUnconditionalBranchInst(1, 0x1F, 0, 0, Rn);
1035 }
RET(ARM64Reg Rn)1036 void ARM64XEmitter::RET(ARM64Reg Rn) {
1037     EncodeUnconditionalBranchInst(2, 0x1F, 0, 0, Rn);
1038 }
ERET()1039 void ARM64XEmitter::ERET() {
1040     EncodeUnconditionalBranchInst(4, 0x1F, 0, 0, SP);
1041 }
DRPS()1042 void ARM64XEmitter::DRPS() {
1043     EncodeUnconditionalBranchInst(5, 0x1F, 0, 0, SP);
1044 }
1045 
1046 // Exception generation
SVC(u32 imm)1047 void ARM64XEmitter::SVC(u32 imm) {
1048     EncodeExceptionInst(0, imm);
1049 }
1050 
HVC(u32 imm)1051 void ARM64XEmitter::HVC(u32 imm) {
1052     EncodeExceptionInst(1, imm);
1053 }
1054 
SMC(u32 imm)1055 void ARM64XEmitter::SMC(u32 imm) {
1056     EncodeExceptionInst(2, imm);
1057 }
1058 
BRK(u32 imm)1059 void ARM64XEmitter::BRK(u32 imm) {
1060     EncodeExceptionInst(3, imm);
1061 }
1062 
HLT(u32 imm)1063 void ARM64XEmitter::HLT(u32 imm) {
1064     EncodeExceptionInst(4, imm);
1065 }
1066 
DCPS1(u32 imm)1067 void ARM64XEmitter::DCPS1(u32 imm) {
1068     EncodeExceptionInst(5, imm);
1069 }
1070 
DCPS2(u32 imm)1071 void ARM64XEmitter::DCPS2(u32 imm) {
1072     EncodeExceptionInst(6, imm);
1073 }
1074 
DCPS3(u32 imm)1075 void ARM64XEmitter::DCPS3(u32 imm) {
1076     EncodeExceptionInst(7, imm);
1077 }
1078 
1079 // System
_MSR(PStateField field,u8 imm)1080 void ARM64XEmitter::_MSR(PStateField field, u8 imm) {
1081     u32 op1 = 0, op2 = 0;
1082     switch (field) {
1083     case FIELD_SPSel:
1084         op1 = 0;
1085         op2 = 5;
1086         break;
1087     case FIELD_DAIFSet:
1088         op1 = 3;
1089         op2 = 6;
1090         break;
1091     case FIELD_DAIFClr:
1092         op1 = 3;
1093         op2 = 7;
1094         break;
1095     default:
1096         ASSERT_MSG(false, "Invalid PStateField to do a imm move to");
1097         break;
1098     }
1099     EncodeSystemInst(0, op1, 4, imm, op2, WSP);
1100 }
1101 
GetSystemReg(PStateField field,int & o0,int & op1,int & CRn,int & CRm,int & op2)1102 static void GetSystemReg(PStateField field, int& o0, int& op1, int& CRn, int& CRm, int& op2) {
1103     switch (field) {
1104     case FIELD_NZCV:
1105         o0 = 3;
1106         op1 = 3;
1107         CRn = 4;
1108         CRm = 2;
1109         op2 = 0;
1110         break;
1111     case FIELD_FPCR:
1112         o0 = 3;
1113         op1 = 3;
1114         CRn = 4;
1115         CRm = 4;
1116         op2 = 0;
1117         break;
1118     case FIELD_FPSR:
1119         o0 = 3;
1120         op1 = 3;
1121         CRn = 4;
1122         CRm = 4;
1123         op2 = 1;
1124         break;
1125     case FIELD_PMCR_EL0:
1126         o0 = 3;
1127         op1 = 3;
1128         CRn = 9;
1129         CRm = 6;
1130         op2 = 0;
1131         break;
1132     case FIELD_PMCCNTR_EL0:
1133         o0 = 3;
1134         op1 = 3;
1135         CRn = 9;
1136         CRm = 7;
1137         op2 = 0;
1138         break;
1139     default:
1140         ASSERT_MSG(false, "Invalid PStateField to do a register move from/to");
1141         break;
1142     }
1143 }
1144 
_MSR(PStateField field,ARM64Reg Rt)1145 void ARM64XEmitter::_MSR(PStateField field, ARM64Reg Rt) {
1146     int o0 = 0, op1 = 0, CRn = 0, CRm = 0, op2 = 0;
1147     ASSERT_MSG(Is64Bit(Rt), "MSR: Rt must be 64-bit");
1148     GetSystemReg(field, o0, op1, CRn, CRm, op2);
1149     EncodeSystemInst(o0, op1, CRn, CRm, op2, DecodeReg(Rt));
1150 }
1151 
MRS(ARM64Reg Rt,PStateField field)1152 void ARM64XEmitter::MRS(ARM64Reg Rt, PStateField field) {
1153     int o0 = 0, op1 = 0, CRn = 0, CRm = 0, op2 = 0;
1154     ASSERT_MSG(Is64Bit(Rt), "MRS: Rt must be 64-bit");
1155     GetSystemReg(field, o0, op1, CRn, CRm, op2);
1156     EncodeSystemInst(o0 | 4, op1, CRn, CRm, op2, DecodeReg(Rt));
1157 }
1158 
CNTVCT(ARM64Reg Rt)1159 void ARM64XEmitter::CNTVCT(ARM64Reg Rt) {
1160     ASSERT_MSG(Is64Bit(Rt), "CNTVCT: Rt must be 64-bit");
1161 
1162     // MRS <Xt>, CNTVCT_EL0 ; Read CNTVCT_EL0 into Xt
1163     EncodeSystemInst(3 | 4, 3, 0xe, 0, 2, DecodeReg(Rt));
1164 }
1165 
HINT(SystemHint op)1166 void ARM64XEmitter::HINT(SystemHint op) {
1167     EncodeSystemInst(0, 3, 2, 0, op, WSP);
1168 }
CLREX()1169 void ARM64XEmitter::CLREX() {
1170     EncodeSystemInst(0, 3, 3, 0, 2, WSP);
1171 }
DSB(BarrierType type)1172 void ARM64XEmitter::DSB(BarrierType type) {
1173     EncodeSystemInst(0, 3, 3, type, 4, WSP);
1174 }
DMB(BarrierType type)1175 void ARM64XEmitter::DMB(BarrierType type) {
1176     EncodeSystemInst(0, 3, 3, type, 5, WSP);
1177 }
ISB(BarrierType type)1178 void ARM64XEmitter::ISB(BarrierType type) {
1179     EncodeSystemInst(0, 3, 3, type, 6, WSP);
1180 }
1181 
1182 // Add/Subtract (extended register)
ADD(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1183 void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1184     ADD(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1185 }
1186 
ADD(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Option)1187 void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) {
1188     EncodeArithmeticInst(0, false, Rd, Rn, Rm, Option);
1189 }
1190 
ADDS(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1191 void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1192     EncodeArithmeticInst(0, true, Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1193 }
1194 
ADDS(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Option)1195 void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) {
1196     EncodeArithmeticInst(0, true, Rd, Rn, Rm, Option);
1197 }
1198 
SUB(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1199 void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1200     SUB(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1201 }
1202 
SUB(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Option)1203 void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) {
1204     EncodeArithmeticInst(1, false, Rd, Rn, Rm, Option);
1205 }
1206 
SUBS(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1207 void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1208     EncodeArithmeticInst(1, true, Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1209 }
1210 
SUBS(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Option)1211 void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) {
1212     EncodeArithmeticInst(1, true, Rd, Rn, Rm, Option);
1213 }
1214 
CMN(ARM64Reg Rn,ARM64Reg Rm)1215 void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm) {
1216     CMN(Rn, Rm, ArithOption(Rn, ST_LSL, 0));
1217 }
1218 
CMN(ARM64Reg Rn,ARM64Reg Rm,ArithOption Option)1219 void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) {
1220     EncodeArithmeticInst(0, true, Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Option);
1221 }
1222 
CMP(ARM64Reg Rn,ARM64Reg Rm)1223 void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm) {
1224     CMP(Rn, Rm, ArithOption(Rn, ST_LSL, 0));
1225 }
1226 
CMP(ARM64Reg Rn,ARM64Reg Rm,ArithOption Option)1227 void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) {
1228     EncodeArithmeticInst(1, true, Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Option);
1229 }
1230 
1231 // Add/Subtract (with carry)
ADC(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1232 void ARM64XEmitter::ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1233     EncodeArithmeticCarryInst(0, false, Rd, Rn, Rm);
1234 }
ADCS(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1235 void ARM64XEmitter::ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1236     EncodeArithmeticCarryInst(0, true, Rd, Rn, Rm);
1237 }
SBC(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1238 void ARM64XEmitter::SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1239     EncodeArithmeticCarryInst(1, false, Rd, Rn, Rm);
1240 }
SBCS(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1241 void ARM64XEmitter::SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1242     EncodeArithmeticCarryInst(1, true, Rd, Rn, Rm);
1243 }
1244 
1245 // Conditional Compare (immediate)
CCMN(ARM64Reg Rn,u32 imm,u32 nzcv,CCFlags cond)1246 void ARM64XEmitter::CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) {
1247     EncodeCondCompareImmInst(0, Rn, imm, nzcv, cond);
1248 }
CCMP(ARM64Reg Rn,u32 imm,u32 nzcv,CCFlags cond)1249 void ARM64XEmitter::CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) {
1250     EncodeCondCompareImmInst(1, Rn, imm, nzcv, cond);
1251 }
1252 
1253 // Conditiona Compare (register)
CCMN(ARM64Reg Rn,ARM64Reg Rm,u32 nzcv,CCFlags cond)1254 void ARM64XEmitter::CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond) {
1255     EncodeCondCompareRegInst(0, Rn, Rm, nzcv, cond);
1256 }
CCMP(ARM64Reg Rn,ARM64Reg Rm,u32 nzcv,CCFlags cond)1257 void ARM64XEmitter::CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond) {
1258     EncodeCondCompareRegInst(1, Rn, Rm, nzcv, cond);
1259 }
1260 
1261 // Conditional Select
CSEL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,CCFlags cond)1262 void ARM64XEmitter::CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) {
1263     EncodeCondSelectInst(0, Rd, Rn, Rm, cond);
1264 }
CSINC(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,CCFlags cond)1265 void ARM64XEmitter::CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) {
1266     EncodeCondSelectInst(1, Rd, Rn, Rm, cond);
1267 }
CSINV(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,CCFlags cond)1268 void ARM64XEmitter::CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) {
1269     EncodeCondSelectInst(2, Rd, Rn, Rm, cond);
1270 }
CSNEG(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,CCFlags cond)1271 void ARM64XEmitter::CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) {
1272     EncodeCondSelectInst(3, Rd, Rn, Rm, cond);
1273 }
1274 
1275 // Data-Processing 1 source
RBIT(ARM64Reg Rd,ARM64Reg Rn)1276 void ARM64XEmitter::RBIT(ARM64Reg Rd, ARM64Reg Rn) {
1277     EncodeData1SrcInst(0, Rd, Rn);
1278 }
REV16(ARM64Reg Rd,ARM64Reg Rn)1279 void ARM64XEmitter::REV16(ARM64Reg Rd, ARM64Reg Rn) {
1280     EncodeData1SrcInst(1, Rd, Rn);
1281 }
REV32(ARM64Reg Rd,ARM64Reg Rn)1282 void ARM64XEmitter::REV32(ARM64Reg Rd, ARM64Reg Rn) {
1283     EncodeData1SrcInst(2, Rd, Rn);
1284 }
REV64(ARM64Reg Rd,ARM64Reg Rn)1285 void ARM64XEmitter::REV64(ARM64Reg Rd, ARM64Reg Rn) {
1286     EncodeData1SrcInst(3, Rd, Rn);
1287 }
CLZ(ARM64Reg Rd,ARM64Reg Rn)1288 void ARM64XEmitter::CLZ(ARM64Reg Rd, ARM64Reg Rn) {
1289     EncodeData1SrcInst(4, Rd, Rn);
1290 }
CLS(ARM64Reg Rd,ARM64Reg Rn)1291 void ARM64XEmitter::CLS(ARM64Reg Rd, ARM64Reg Rn) {
1292     EncodeData1SrcInst(5, Rd, Rn);
1293 }
1294 
1295 // Data-Processing 2 source
UDIV(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1296 void ARM64XEmitter::UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1297     EncodeData2SrcInst(0, Rd, Rn, Rm);
1298 }
SDIV(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1299 void ARM64XEmitter::SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1300     EncodeData2SrcInst(1, Rd, Rn, Rm);
1301 }
LSLV(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1302 void ARM64XEmitter::LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1303     EncodeData2SrcInst(2, Rd, Rn, Rm);
1304 }
LSRV(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1305 void ARM64XEmitter::LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1306     EncodeData2SrcInst(3, Rd, Rn, Rm);
1307 }
ASRV(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1308 void ARM64XEmitter::ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1309     EncodeData2SrcInst(4, Rd, Rn, Rm);
1310 }
RORV(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1311 void ARM64XEmitter::RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1312     EncodeData2SrcInst(5, Rd, Rn, Rm);
1313 }
CRC32B(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1314 void ARM64XEmitter::CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1315     EncodeData2SrcInst(6, Rd, Rn, Rm);
1316 }
CRC32H(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1317 void ARM64XEmitter::CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1318     EncodeData2SrcInst(7, Rd, Rn, Rm);
1319 }
CRC32W(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1320 void ARM64XEmitter::CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1321     EncodeData2SrcInst(8, Rd, Rn, Rm);
1322 }
CRC32CB(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1323 void ARM64XEmitter::CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1324     EncodeData2SrcInst(9, Rd, Rn, Rm);
1325 }
CRC32CH(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1326 void ARM64XEmitter::CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1327     EncodeData2SrcInst(10, Rd, Rn, Rm);
1328 }
CRC32CW(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1329 void ARM64XEmitter::CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1330     EncodeData2SrcInst(11, Rd, Rn, Rm);
1331 }
CRC32X(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1332 void ARM64XEmitter::CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1333     EncodeData2SrcInst(12, Rd, Rn, Rm);
1334 }
CRC32CX(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1335 void ARM64XEmitter::CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1336     EncodeData2SrcInst(13, Rd, Rn, Rm);
1337 }
1338 
1339 // Data-Processing 3 source
MADD(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)1340 void ARM64XEmitter::MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
1341     EncodeData3SrcInst(0, Rd, Rn, Rm, Ra);
1342 }
MSUB(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)1343 void ARM64XEmitter::MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
1344     EncodeData3SrcInst(1, Rd, Rn, Rm, Ra);
1345 }
SMADDL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)1346 void ARM64XEmitter::SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
1347     EncodeData3SrcInst(2, Rd, Rn, Rm, Ra);
1348 }
SMULL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1349 void ARM64XEmitter::SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1350     SMADDL(Rd, Rn, Rm, SP);
1351 }
SMSUBL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)1352 void ARM64XEmitter::SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
1353     EncodeData3SrcInst(3, Rd, Rn, Rm, Ra);
1354 }
SMULH(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1355 void ARM64XEmitter::SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1356     EncodeData3SrcInst(4, Rd, Rn, Rm, SP);
1357 }
UMADDL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)1358 void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
1359     EncodeData3SrcInst(5, Rd, Rn, Rm, Ra);
1360 }
UMULL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1361 void ARM64XEmitter::UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1362     UMADDL(Rd, Rn, Rm, SP);
1363 }
UMSUBL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)1364 void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
1365     EncodeData3SrcInst(6, Rd, Rn, Rm, Ra);
1366 }
UMULH(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1367 void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1368     EncodeData3SrcInst(7, Rd, Rn, Rm, SP);
1369 }
MUL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1370 void ARM64XEmitter::MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1371     EncodeData3SrcInst(0, Rd, Rn, Rm, SP);
1372 }
MNEG(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1373 void ARM64XEmitter::MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
1374     EncodeData3SrcInst(1, Rd, Rn, Rm, SP);
1375 }
1376 
1377 // Logical (shifted register)
AND(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Shift)1378 void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) {
1379     EncodeLogicalInst(0, Rd, Rn, Rm, Shift);
1380 }
BIC(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Shift)1381 void ARM64XEmitter::BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) {
1382     EncodeLogicalInst(1, Rd, Rn, Rm, Shift);
1383 }
ORR(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Shift)1384 void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) {
1385     EncodeLogicalInst(2, Rd, Rn, Rm, Shift);
1386 }
ORN(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Shift)1387 void ARM64XEmitter::ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) {
1388     EncodeLogicalInst(3, Rd, Rn, Rm, Shift);
1389 }
EOR(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Shift)1390 void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) {
1391     EncodeLogicalInst(4, Rd, Rn, Rm, Shift);
1392 }
EON(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Shift)1393 void ARM64XEmitter::EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) {
1394     EncodeLogicalInst(5, Rd, Rn, Rm, Shift);
1395 }
ANDS(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Shift)1396 void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) {
1397     EncodeLogicalInst(6, Rd, Rn, Rm, Shift);
1398 }
BICS(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Shift)1399 void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) {
1400     EncodeLogicalInst(7, Rd, Rn, Rm, Shift);
1401 }
1402 
MOV(ARM64Reg Rd,ARM64Reg Rm,ArithOption Shift)1403 void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift) {
1404     ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, Shift);
1405 }
1406 
MOV(ARM64Reg Rd,ARM64Reg Rm)1407 void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm) {
1408     if (IsGPR(Rd) && IsGPR(Rm))
1409         ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, 0));
1410     else
1411         ASSERT_MSG(false, "Non-GPRs not supported in MOV");
1412 }
MVN(ARM64Reg Rd,ARM64Reg Rm)1413 void ARM64XEmitter::MVN(ARM64Reg Rd, ARM64Reg Rm) {
1414     ORN(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, 0));
1415 }
LSL(ARM64Reg Rd,ARM64Reg Rm,int shift)1416 void ARM64XEmitter::LSL(ARM64Reg Rd, ARM64Reg Rm, int shift) {
1417     int bits = Is64Bit(Rd) ? 64 : 32;
1418     UBFM(Rd, Rm, (bits - shift) & (bits - 1), bits - shift - 1);
1419 }
LSR(ARM64Reg Rd,ARM64Reg Rm,int shift)1420 void ARM64XEmitter::LSR(ARM64Reg Rd, ARM64Reg Rm, int shift) {
1421     int bits = Is64Bit(Rd) ? 64 : 32;
1422     UBFM(Rd, Rm, shift, bits - 1);
1423 }
ASR(ARM64Reg Rd,ARM64Reg Rm,int shift)1424 void ARM64XEmitter::ASR(ARM64Reg Rd, ARM64Reg Rm, int shift) {
1425     int bits = Is64Bit(Rd) ? 64 : 32;
1426     SBFM(Rd, Rm, shift, bits - 1);
1427 }
ROR(ARM64Reg Rd,ARM64Reg Rm,int shift)1428 void ARM64XEmitter::ROR(ARM64Reg Rd, ARM64Reg Rm, int shift) {
1429     EXTR(Rd, Rm, Rm, shift);
1430 }
1431 
1432 // Logical (immediate)
AND(ARM64Reg Rd,ARM64Reg Rn,u32 immr,u32 imms,bool invert)1433 void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert) {
1434     EncodeLogicalImmInst(0, Rd, Rn, immr, imms, invert);
1435 }
ANDS(ARM64Reg Rd,ARM64Reg Rn,u32 immr,u32 imms,bool invert)1436 void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert) {
1437     EncodeLogicalImmInst(3, Rd, Rn, immr, imms, invert);
1438 }
EOR(ARM64Reg Rd,ARM64Reg Rn,u32 immr,u32 imms,bool invert)1439 void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert) {
1440     EncodeLogicalImmInst(2, Rd, Rn, immr, imms, invert);
1441 }
ORR(ARM64Reg Rd,ARM64Reg Rn,u32 immr,u32 imms,bool invert)1442 void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert) {
1443     EncodeLogicalImmInst(1, Rd, Rn, immr, imms, invert);
1444 }
TST(ARM64Reg Rn,u32 immr,u32 imms,bool invert)1445 void ARM64XEmitter::TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert) {
1446     EncodeLogicalImmInst(3, Is64Bit(Rn) ? ZR : WZR, Rn, immr, imms, invert);
1447 }
1448 
1449 // Add/subtract (immediate)
ADD(ARM64Reg Rd,ARM64Reg Rn,u32 imm,bool shift)1450 void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) {
1451     EncodeAddSubImmInst(0, false, shift, imm, Rn, Rd);
1452 }
ADDS(ARM64Reg Rd,ARM64Reg Rn,u32 imm,bool shift)1453 void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) {
1454     EncodeAddSubImmInst(0, true, shift, imm, Rn, Rd);
1455 }
SUB(ARM64Reg Rd,ARM64Reg Rn,u32 imm,bool shift)1456 void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) {
1457     EncodeAddSubImmInst(1, false, shift, imm, Rn, Rd);
1458 }
SUBS(ARM64Reg Rd,ARM64Reg Rn,u32 imm,bool shift)1459 void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) {
1460     EncodeAddSubImmInst(1, true, shift, imm, Rn, Rd);
1461 }
CMP(ARM64Reg Rn,u32 imm,bool shift)1462 void ARM64XEmitter::CMP(ARM64Reg Rn, u32 imm, bool shift) {
1463     EncodeAddSubImmInst(1, true, shift, imm, Rn, Is64Bit(Rn) ? ZR : WZR);
1464 }
1465 
1466 // Data Processing (Immediate)
MOVZ(ARM64Reg Rd,u32 imm,ShiftAmount pos)1467 void ARM64XEmitter::MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos) {
1468     EncodeMOVWideInst(2, Rd, imm, pos);
1469 }
MOVN(ARM64Reg Rd,u32 imm,ShiftAmount pos)1470 void ARM64XEmitter::MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos) {
1471     EncodeMOVWideInst(0, Rd, imm, pos);
1472 }
MOVK(ARM64Reg Rd,u32 imm,ShiftAmount pos)1473 void ARM64XEmitter::MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos) {
1474     EncodeMOVWideInst(3, Rd, imm, pos);
1475 }
1476 
1477 // Bitfield move
BFM(ARM64Reg Rd,ARM64Reg Rn,u32 immr,u32 imms)1478 void ARM64XEmitter::BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) {
1479     EncodeBitfieldMOVInst(1, Rd, Rn, immr, imms);
1480 }
SBFM(ARM64Reg Rd,ARM64Reg Rn,u32 immr,u32 imms)1481 void ARM64XEmitter::SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) {
1482     EncodeBitfieldMOVInst(0, Rd, Rn, immr, imms);
1483 }
UBFM(ARM64Reg Rd,ARM64Reg Rn,u32 immr,u32 imms)1484 void ARM64XEmitter::UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) {
1485     EncodeBitfieldMOVInst(2, Rd, Rn, immr, imms);
1486 }
1487 
BFI(ARM64Reg Rd,ARM64Reg Rn,u32 lsb,u32 width)1488 void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) {
1489     u32 size = Is64Bit(Rn) ? 64 : 32;
1490     ASSERT_MSG((lsb + width) <= size,
1491                "%s passed lsb %d and width %d which is greater than the register size!", __func__,
1492                lsb, width);
1493     EncodeBitfieldMOVInst(1, Rd, Rn, (size - lsb) % size, width - 1);
1494 }
UBFIZ(ARM64Reg Rd,ARM64Reg Rn,u32 lsb,u32 width)1495 void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width) {
1496     u32 size = Is64Bit(Rn) ? 64 : 32;
1497     ASSERT_MSG((lsb + width) <= size,
1498                "%s passed lsb %d and width %d which is greater than the register size!", __func__,
1499                lsb, width);
1500     EncodeBitfieldMOVInst(2, Rd, Rn, (size - lsb) % size, width - 1);
1501 }
EXTR(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,u32 shift)1502 void ARM64XEmitter::EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift) {
1503     bool sf = Is64Bit(Rd);
1504     bool N = sf;
1505     Rd = DecodeReg(Rd);
1506     Rn = DecodeReg(Rn);
1507     Rm = DecodeReg(Rm);
1508     Write32((sf << 31) | (0x27 << 23) | (N << 22) | (Rm << 16) | (shift << 10) | (Rm << 5) | Rd);
1509 }
SXTB(ARM64Reg Rd,ARM64Reg Rn)1510 void ARM64XEmitter::SXTB(ARM64Reg Rd, ARM64Reg Rn) {
1511     SBFM(Rd, Rn, 0, 7);
1512 }
SXTH(ARM64Reg Rd,ARM64Reg Rn)1513 void ARM64XEmitter::SXTH(ARM64Reg Rd, ARM64Reg Rn) {
1514     SBFM(Rd, Rn, 0, 15);
1515 }
SXTW(ARM64Reg Rd,ARM64Reg Rn)1516 void ARM64XEmitter::SXTW(ARM64Reg Rd, ARM64Reg Rn) {
1517     ASSERT_MSG(Is64Bit(Rd), "%s requires 64bit register as destination", __func__);
1518     SBFM(Rd, Rn, 0, 31);
1519 }
UXTB(ARM64Reg Rd,ARM64Reg Rn)1520 void ARM64XEmitter::UXTB(ARM64Reg Rd, ARM64Reg Rn) {
1521     UBFM(Rd, Rn, 0, 7);
1522 }
UXTH(ARM64Reg Rd,ARM64Reg Rn)1523 void ARM64XEmitter::UXTH(ARM64Reg Rd, ARM64Reg Rn) {
1524     UBFM(Rd, Rn, 0, 15);
1525 }
1526 
1527 // Load Register (Literal)
LDR(ARM64Reg Rt,s32 imm)1528 void ARM64XEmitter::LDR(ARM64Reg Rt, s32 imm) {
1529     EncodeLoadRegisterInst(0, Rt, imm);
1530 }
LDRSW(ARM64Reg Rt,s32 imm)1531 void ARM64XEmitter::LDRSW(ARM64Reg Rt, s32 imm) {
1532     EncodeLoadRegisterInst(2, Rt, imm);
1533 }
PRFM(ARM64Reg Rt,s32 imm)1534 void ARM64XEmitter::PRFM(ARM64Reg Rt, s32 imm) {
1535     EncodeLoadRegisterInst(3, Rt, imm);
1536 }
1537 
1538 // Load/Store pair
LDP(IndexType type,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,s32 imm)1539 void ARM64XEmitter::LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) {
1540     EncodeLoadStorePair(0, 1, type, Rt, Rt2, Rn, imm);
1541 }
LDPSW(IndexType type,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,s32 imm)1542 void ARM64XEmitter::LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) {
1543     EncodeLoadStorePair(1, 1, type, Rt, Rt2, Rn, imm);
1544 }
STP(IndexType type,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,s32 imm)1545 void ARM64XEmitter::STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm) {
1546     EncodeLoadStorePair(0, 0, type, Rt, Rt2, Rn, imm);
1547 }
1548 
1549 // Load/Store Exclusive
STXRB(ARM64Reg Rs,ARM64Reg Rt,ARM64Reg Rn)1550 void ARM64XEmitter::STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) {
1551     EncodeLoadStoreExcInst(0, Rs, SP, Rt, Rn);
1552 }
STLXRB(ARM64Reg Rs,ARM64Reg Rt,ARM64Reg Rn)1553 void ARM64XEmitter::STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) {
1554     EncodeLoadStoreExcInst(1, Rs, SP, Rt, Rn);
1555 }
LDXRB(ARM64Reg Rt,ARM64Reg Rn)1556 void ARM64XEmitter::LDXRB(ARM64Reg Rt, ARM64Reg Rn) {
1557     EncodeLoadStoreExcInst(2, SP, SP, Rt, Rn);
1558 }
LDAXRB(ARM64Reg Rt,ARM64Reg Rn)1559 void ARM64XEmitter::LDAXRB(ARM64Reg Rt, ARM64Reg Rn) {
1560     EncodeLoadStoreExcInst(3, SP, SP, Rt, Rn);
1561 }
STLRB(ARM64Reg Rt,ARM64Reg Rn)1562 void ARM64XEmitter::STLRB(ARM64Reg Rt, ARM64Reg Rn) {
1563     EncodeLoadStoreExcInst(4, SP, SP, Rt, Rn);
1564 }
LDARB(ARM64Reg Rt,ARM64Reg Rn)1565 void ARM64XEmitter::LDARB(ARM64Reg Rt, ARM64Reg Rn) {
1566     EncodeLoadStoreExcInst(5, SP, SP, Rt, Rn);
1567 }
STXRH(ARM64Reg Rs,ARM64Reg Rt,ARM64Reg Rn)1568 void ARM64XEmitter::STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) {
1569     EncodeLoadStoreExcInst(6, Rs, SP, Rt, Rn);
1570 }
STLXRH(ARM64Reg Rs,ARM64Reg Rt,ARM64Reg Rn)1571 void ARM64XEmitter::STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) {
1572     EncodeLoadStoreExcInst(7, Rs, SP, Rt, Rn);
1573 }
LDXRH(ARM64Reg Rt,ARM64Reg Rn)1574 void ARM64XEmitter::LDXRH(ARM64Reg Rt, ARM64Reg Rn) {
1575     EncodeLoadStoreExcInst(8, SP, SP, Rt, Rn);
1576 }
LDAXRH(ARM64Reg Rt,ARM64Reg Rn)1577 void ARM64XEmitter::LDAXRH(ARM64Reg Rt, ARM64Reg Rn) {
1578     EncodeLoadStoreExcInst(9, SP, SP, Rt, Rn);
1579 }
STLRH(ARM64Reg Rt,ARM64Reg Rn)1580 void ARM64XEmitter::STLRH(ARM64Reg Rt, ARM64Reg Rn) {
1581     EncodeLoadStoreExcInst(10, SP, SP, Rt, Rn);
1582 }
LDARH(ARM64Reg Rt,ARM64Reg Rn)1583 void ARM64XEmitter::LDARH(ARM64Reg Rt, ARM64Reg Rn) {
1584     EncodeLoadStoreExcInst(11, SP, SP, Rt, Rn);
1585 }
STXR(ARM64Reg Rs,ARM64Reg Rt,ARM64Reg Rn)1586 void ARM64XEmitter::STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) {
1587     EncodeLoadStoreExcInst(12 + Is64Bit(Rt), Rs, SP, Rt, Rn);
1588 }
STLXR(ARM64Reg Rs,ARM64Reg Rt,ARM64Reg Rn)1589 void ARM64XEmitter::STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) {
1590     EncodeLoadStoreExcInst(14 + Is64Bit(Rt), Rs, SP, Rt, Rn);
1591 }
STXP(ARM64Reg Rs,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn)1592 void ARM64XEmitter::STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) {
1593     EncodeLoadStoreExcInst(16 + Is64Bit(Rt), Rs, Rt2, Rt, Rn);
1594 }
STLXP(ARM64Reg Rs,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn)1595 void ARM64XEmitter::STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) {
1596     EncodeLoadStoreExcInst(18 + Is64Bit(Rt), Rs, Rt2, Rt, Rn);
1597 }
LDXR(ARM64Reg Rt,ARM64Reg Rn)1598 void ARM64XEmitter::LDXR(ARM64Reg Rt, ARM64Reg Rn) {
1599     EncodeLoadStoreExcInst(20 + Is64Bit(Rt), SP, SP, Rt, Rn);
1600 }
LDAXR(ARM64Reg Rt,ARM64Reg Rn)1601 void ARM64XEmitter::LDAXR(ARM64Reg Rt, ARM64Reg Rn) {
1602     EncodeLoadStoreExcInst(22 + Is64Bit(Rt), SP, SP, Rt, Rn);
1603 }
LDXP(ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn)1604 void ARM64XEmitter::LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) {
1605     EncodeLoadStoreExcInst(24 + Is64Bit(Rt), SP, Rt2, Rt, Rn);
1606 }
LDAXP(ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn)1607 void ARM64XEmitter::LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) {
1608     EncodeLoadStoreExcInst(26 + Is64Bit(Rt), SP, Rt2, Rt, Rn);
1609 }
STLR(ARM64Reg Rt,ARM64Reg Rn)1610 void ARM64XEmitter::STLR(ARM64Reg Rt, ARM64Reg Rn) {
1611     EncodeLoadStoreExcInst(28 + Is64Bit(Rt), SP, SP, Rt, Rn);
1612 }
LDAR(ARM64Reg Rt,ARM64Reg Rn)1613 void ARM64XEmitter::LDAR(ARM64Reg Rt, ARM64Reg Rn) {
1614     EncodeLoadStoreExcInst(30 + Is64Bit(Rt), SP, SP, Rt, Rn);
1615 }
1616 
1617 // Load/Store no-allocate pair (offset)
STNP(ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,u32 imm)1618 void ARM64XEmitter::STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) {
1619     EncodeLoadStorePairedInst(0xA0, Rt, Rt2, Rn, imm);
1620 }
LDNP(ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,u32 imm)1621 void ARM64XEmitter::LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) {
1622     EncodeLoadStorePairedInst(0xA1, Rt, Rt2, Rn, imm);
1623 }
1624 
1625 // Load/Store register (immediate post-indexed)
1626 // XXX: Most of these support vectors
STRB(IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)1627 void ARM64XEmitter::STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1628     if (type == INDEX_UNSIGNED)
1629         EncodeLoadStoreIndexedInst(0x0E4, Rt, Rn, imm, 8);
1630     else
1631         EncodeLoadStoreIndexedInst(0x0E0, type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1632 }
LDRB(IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)1633 void ARM64XEmitter::LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1634     if (type == INDEX_UNSIGNED)
1635         EncodeLoadStoreIndexedInst(0x0E5, Rt, Rn, imm, 8);
1636     else
1637         EncodeLoadStoreIndexedInst(0x0E1, type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1638 }
LDRSB(IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)1639 void ARM64XEmitter::LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1640     if (type == INDEX_UNSIGNED)
1641         EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E6 : 0x0E7, Rt, Rn, imm, 8);
1642     else
1643         EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E2 : 0x0E3, type == INDEX_POST ? 1 : 3, Rt, Rn,
1644                                    imm);
1645 }
STRH(IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)1646 void ARM64XEmitter::STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1647     if (type == INDEX_UNSIGNED)
1648         EncodeLoadStoreIndexedInst(0x1E4, Rt, Rn, imm, 16);
1649     else
1650         EncodeLoadStoreIndexedInst(0x1E0, type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1651 }
LDRH(IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)1652 void ARM64XEmitter::LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1653     if (type == INDEX_UNSIGNED)
1654         EncodeLoadStoreIndexedInst(0x1E5, Rt, Rn, imm, 16);
1655     else
1656         EncodeLoadStoreIndexedInst(0x1E1, type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1657 }
LDRSH(IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)1658 void ARM64XEmitter::LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1659     if (type == INDEX_UNSIGNED)
1660         EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E6 : 0x1E7, Rt, Rn, imm, 16);
1661     else
1662         EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E2 : 0x1E3, type == INDEX_POST ? 1 : 3, Rt, Rn,
1663                                    imm);
1664 }
STR(IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)1665 void ARM64XEmitter::STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1666     if (type == INDEX_UNSIGNED)
1667         EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E4 : 0x2E4, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32);
1668     else
1669         EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E0 : 0x2E0, type == INDEX_POST ? 1 : 3, Rt, Rn,
1670                                    imm);
1671 }
LDR(IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)1672 void ARM64XEmitter::LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1673     if (type == INDEX_UNSIGNED)
1674         EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E5 : 0x2E5, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32);
1675     else
1676         EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E1 : 0x2E1, type == INDEX_POST ? 1 : 3, Rt, Rn,
1677                                    imm);
1678 }
LDRSW(IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)1679 void ARM64XEmitter::LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1680     if (type == INDEX_UNSIGNED)
1681         EncodeLoadStoreIndexedInst(0x2E6, Rt, Rn, imm, 32);
1682     else
1683         EncodeLoadStoreIndexedInst(0x2E2, type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1684 }
1685 
1686 // Load/Store register (register offset)
STRB(ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)1687 void ARM64XEmitter::STRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
1688     EncodeLoadStoreRegisterOffset(0, 0, Rt, Rn, Rm);
1689 }
LDRB(ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)1690 void ARM64XEmitter::LDRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
1691     EncodeLoadStoreRegisterOffset(0, 1, Rt, Rn, Rm);
1692 }
LDRSB(ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)1693 void ARM64XEmitter::LDRSB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
1694     bool b64Bit = Is64Bit(Rt);
1695     EncodeLoadStoreRegisterOffset(0, 3 - b64Bit, Rt, Rn, Rm);
1696 }
STRH(ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)1697 void ARM64XEmitter::STRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
1698     EncodeLoadStoreRegisterOffset(1, 0, Rt, Rn, Rm);
1699 }
LDRH(ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)1700 void ARM64XEmitter::LDRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
1701     EncodeLoadStoreRegisterOffset(1, 1, Rt, Rn, Rm);
1702 }
LDRSH(ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)1703 void ARM64XEmitter::LDRSH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
1704     bool b64Bit = Is64Bit(Rt);
1705     EncodeLoadStoreRegisterOffset(1, 3 - b64Bit, Rt, Rn, Rm);
1706 }
STR(ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)1707 void ARM64XEmitter::STR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
1708     bool b64Bit = Is64Bit(Rt);
1709     EncodeLoadStoreRegisterOffset(2 + b64Bit, 0, Rt, Rn, Rm);
1710 }
LDR(ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)1711 void ARM64XEmitter::LDR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
1712     bool b64Bit = Is64Bit(Rt);
1713     EncodeLoadStoreRegisterOffset(2 + b64Bit, 1, Rt, Rn, Rm);
1714 }
LDRSW(ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)1715 void ARM64XEmitter::LDRSW(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
1716     EncodeLoadStoreRegisterOffset(2, 2, Rt, Rn, Rm);
1717 }
PRFM(ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)1718 void ARM64XEmitter::PRFM(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
1719     EncodeLoadStoreRegisterOffset(3, 2, Rt, Rn, Rm);
1720 }
1721 
1722 // Load/Store register (unscaled offset)
STURB(ARM64Reg Rt,ARM64Reg Rn,s32 imm)1723 void ARM64XEmitter::STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1724     EncodeLoadStoreUnscaled(0, 0, Rt, Rn, imm);
1725 }
LDURB(ARM64Reg Rt,ARM64Reg Rn,s32 imm)1726 void ARM64XEmitter::LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1727     EncodeLoadStoreUnscaled(0, 1, Rt, Rn, imm);
1728 }
LDURSB(ARM64Reg Rt,ARM64Reg Rn,s32 imm)1729 void ARM64XEmitter::LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1730     EncodeLoadStoreUnscaled(0, Is64Bit(Rt) ? 2 : 3, Rt, Rn, imm);
1731 }
STURH(ARM64Reg Rt,ARM64Reg Rn,s32 imm)1732 void ARM64XEmitter::STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1733     EncodeLoadStoreUnscaled(1, 0, Rt, Rn, imm);
1734 }
LDURH(ARM64Reg Rt,ARM64Reg Rn,s32 imm)1735 void ARM64XEmitter::LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1736     EncodeLoadStoreUnscaled(1, 1, Rt, Rn, imm);
1737 }
LDURSH(ARM64Reg Rt,ARM64Reg Rn,s32 imm)1738 void ARM64XEmitter::LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1739     EncodeLoadStoreUnscaled(1, Is64Bit(Rt) ? 2 : 3, Rt, Rn, imm);
1740 }
STUR(ARM64Reg Rt,ARM64Reg Rn,s32 imm)1741 void ARM64XEmitter::STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1742     EncodeLoadStoreUnscaled(Is64Bit(Rt) ? 3 : 2, 0, Rt, Rn, imm);
1743 }
LDUR(ARM64Reg Rt,ARM64Reg Rn,s32 imm)1744 void ARM64XEmitter::LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1745     EncodeLoadStoreUnscaled(Is64Bit(Rt) ? 3 : 2, 1, Rt, Rn, imm);
1746 }
LDURSW(ARM64Reg Rt,ARM64Reg Rn,s32 imm)1747 void ARM64XEmitter::LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
1748     ASSERT_MSG(!Is64Bit(Rt), "%s must have a 64bit destination register!", __func__);
1749     EncodeLoadStoreUnscaled(2, 2, Rt, Rn, imm);
1750 }
1751 
1752 // Address of label/page PC-relative
ADR(ARM64Reg Rd,s32 imm)1753 void ARM64XEmitter::ADR(ARM64Reg Rd, s32 imm) {
1754     EncodeAddressInst(0, Rd, imm);
1755 }
ADRP(ARM64Reg Rd,s32 imm)1756 void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm) {
1757     EncodeAddressInst(1, Rd, imm >> 12);
1758 }
1759 
1760 // Wrapper around MOVZ+MOVK (and later MOVN)
MOVI2R(ARM64Reg Rd,u64 imm,bool optimize)1761 void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize) {
1762     unsigned int parts = Is64Bit(Rd) ? 4 : 2;
1763     std::bitset<32> upload_part(0);
1764 
1765     // Always start with a movz! Kills the dependency on the register.
1766     bool use_movz = true;
1767 
1768     if (!imm) {
1769         // Zero immediate, just clear the register. EOR is pointless when we have
1770         // MOVZ, which looks clearer in disasm too.
1771         MOVZ(Rd, 0, SHIFT_0);
1772         return;
1773     }
1774 
1775     if ((Is64Bit(Rd) && imm == std::numeric_limits<u64>::max()) ||
1776         (!Is64Bit(Rd) && imm == std::numeric_limits<u32>::max())) {
1777         // Max unsigned value (or if signed, -1)
1778         // Set to ~ZR
1779         ARM64Reg ZR = Is64Bit(Rd) ? SP : WSP;
1780         ORN(Rd, ZR, ZR, ArithOption(ZR, ST_LSL, 0));
1781         return;
1782     }
1783 
1784     // TODO: Make some more systemic use of MOVN, but this will take care of most
1785     // cases. Small negative integer. Use MOVN
1786     if (!Is64Bit(Rd) && (imm | 0xFFFF0000) == imm) {
1787         MOVN(Rd, static_cast<u32>(~imm), SHIFT_0);
1788         return;
1789     }
1790 
1791     // XXX: Use MOVN when possible.
1792     // XXX: Optimize more
1793     // XXX: Support rotating immediates to save instructions
1794     if (optimize) {
1795         for (unsigned int i = 0; i < parts; ++i) {
1796             if ((imm >> (i * 16)) & 0xFFFF)
1797                 upload_part[i] = 1;
1798         }
1799     }
1800 
1801     u64 aligned_pc = reinterpret_cast<u64>(GetCodePtr()) & ~0xFFF;
1802     s64 aligned_offset = static_cast<s64>(imm) - static_cast<s64>(aligned_pc);
1803     // The offset for ADR/ADRP is an s32, so make sure it can be represented in
1804     // that
1805     if (upload_part.count() > 1 && std::abs(aligned_offset) < 0x7FFFFFFFLL) {
1806         // Immediate we are loading is within 4GB of our aligned range
1807         // Most likely a address that we can load in one or two instructions
1808         if (!(std::abs(aligned_offset) & 0xFFF)) {
1809             // Aligned ADR
1810             ADRP(Rd, static_cast<s32>(aligned_offset));
1811             return;
1812         } else {
1813             // If the address is within 1MB of PC we can load it in a single
1814             // instruction still
1815             s64 offset = static_cast<s64>(imm) - reinterpret_cast<s64>(GetCodePtr());
1816             if (offset >= -0xFFFFF && offset <= 0xFFFFF) {
1817                 ADR(Rd, static_cast<s32>(offset));
1818                 return;
1819             } else {
1820                 ADRP(Rd, static_cast<s32>(aligned_offset & ~0xFFF));
1821                 ADD(Rd, Rd, imm & 0xFFF);
1822                 return;
1823             }
1824         }
1825     }
1826 
1827     for (unsigned i = 0; i < parts; ++i) {
1828         if (use_movz && upload_part[i]) {
1829             MOVZ(Rd, (imm >> (i * 16)) & 0xFFFF, static_cast<ShiftAmount>(i));
1830             use_movz = false;
1831         } else {
1832             if (upload_part[i] || !optimize)
1833                 MOVK(Rd, (imm >> (i * 16)) & 0xFFFF, static_cast<ShiftAmount>(i));
1834         }
1835     }
1836 }
1837 
MOVI2R2(ARM64Reg Rd,u64 imm1,u64 imm2)1838 bool ARM64XEmitter::MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2) {
1839     // TODO: Also optimize for performance, not just for code size.
1840     u8* start_pointer = GetWritableCodePtr();
1841 
1842     MOVI2R(Rd, imm1);
1843     u64 size1 = GetCodePtr() - start_pointer;
1844 
1845     SetCodePtrUnsafe(start_pointer);
1846 
1847     MOVI2R(Rd, imm2);
1848     u64 size2 = GetCodePtr() - start_pointer;
1849 
1850     SetCodePtrUnsafe(start_pointer);
1851 
1852     bool element = size1 > size2;
1853 
1854     MOVI2R(Rd, element ? imm2 : imm1);
1855 
1856     return element;
1857 }
1858 
ABI_PushRegisters(u32 registers)1859 void ARM64XEmitter::ABI_PushRegisters(u32 registers) {
1860     int num_regs = Common::BitCount(registers);
1861     int stack_size = (num_regs + (num_regs & 1)) * 8;
1862     int it = 0;
1863 
1864     std::array<ARM64Reg, 32> gpr{};
1865 
1866     if (!num_regs)
1867         return;
1868 
1869     for (int i = 0; i < 32; ++i) {
1870         if (Common::Bit(i, registers)) {
1871             gpr[it++] = static_cast<ARM64Reg>(X0 + i);
1872         }
1873     }
1874 
1875     // 8 byte per register, but 16 byte alignment, so we may have to padd one register.
1876     // Only update the SP on the last write to avoid the dependency between those stores.
1877 
1878     // The first push must adjust the SP, else a context switch may invalidate everything below SP.
1879 
1880     it = 0;
1881     if (num_regs & 1) {
1882         STR(INDEX_PRE, gpr[0], SP, -stack_size);
1883         it++;
1884     } else {
1885         STP(INDEX_PRE, gpr[0], gpr[1], SP, -stack_size);
1886         it += 2;
1887     }
1888 
1889     // Fast store for all other registers, this is always an even number.
1890     for (int i = 0; i < (num_regs - 1) / 2; i++) {
1891         STP(INDEX_SIGNED, gpr[it], gpr[it + 1], SP, 16 * (i + 1));
1892         it += 2;
1893     }
1894 
1895     ASSERT_MSG(it == num_regs, "%s registers don't match.", __func__);
1896 }
1897 
ABI_PopRegisters(u32 registers)1898 void ARM64XEmitter::ABI_PopRegisters(u32 registers) {
1899     u8 num_regs = static_cast<u8>(Common::BitCount(registers));
1900     int stack_size = (num_regs + (num_regs & 1)) * 8;
1901     int it = 0;
1902 
1903     std::array<ARM64Reg, 32> gpr{};
1904 
1905     if (!num_regs)
1906         return;
1907 
1908     for (int i = 0; i < 32; ++i) {
1909         if (Common::Bit(i, registers)) {
1910             gpr[it++] = static_cast<ARM64Reg>(X0 + i);
1911         }
1912     }
1913     it = 0;
1914     // We must adjust the SP in the end, so load the first (two) registers at least.
1915     ARM64Reg first = gpr[it++];
1916     ARM64Reg second = INVALID_REG;
1917     if (!(num_regs & 1))
1918         second = gpr[it++];
1919 
1920     // 8 byte per register, but 16 byte alignment, so we may have to padd one register.
1921     // Only update the SP on the last load to avoid the dependency between those loads.
1922 
1923     // Fast load for all but the first (two) registers, this is always an even number.
1924 
1925     for (int i = 0; i < (num_regs - 1) / 2; ++i) {
1926         LDP(INDEX_SIGNED, gpr[it], gpr[it + 1], SP, 16 * (i + 1));
1927         it += 2;
1928     }
1929 
1930     // Post loading the first (two) registers.
1931     if (num_regs & 1)
1932         LDR(INDEX_POST, first, SP, stack_size);
1933     else
1934         LDP(INDEX_POST, first, second, SP, stack_size);
1935 
1936     ASSERT_MSG(it == num_regs, "%s registers don't match.", __func__);
1937 }
1938 
1939 // Float Emitter
EmitLoadStoreImmediate(u8 size,u32 opc,IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)1940 void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt,
1941                                                ARM64Reg Rn, s32 imm) {
1942     Rt = DecodeReg(Rt);
1943     Rn = DecodeReg(Rn);
1944     u32 encoded_size = 0;
1945     u32 encoded_imm = 0;
1946 
1947     if (size == 8)
1948         encoded_size = 0;
1949     else if (size == 16)
1950         encoded_size = 1;
1951     else if (size == 32)
1952         encoded_size = 2;
1953     else if (size == 64)
1954         encoded_size = 3;
1955     else if (size == 128)
1956         encoded_size = 0;
1957 
1958     if (type == INDEX_UNSIGNED) {
1959         ASSERT_MSG(!(imm & ((size - 1) >> 3)),
1960                    "%s(INDEX_UNSIGNED) immediate offset must be aligned to size! "
1961                    "(%d) (%p)",
1962                    __func__, imm, m_emit->GetCodePtr());
1963         ASSERT_MSG(imm >= 0, "%s(INDEX_UNSIGNED) immediate offset must be positive!", __func__);
1964         if (size == 16)
1965             imm >>= 1;
1966         else if (size == 32)
1967             imm >>= 2;
1968         else if (size == 64)
1969             imm >>= 3;
1970         else if (size == 128)
1971             imm >>= 4;
1972         encoded_imm = (imm & 0xFFF);
1973     } else {
1974         ASSERT_MSG(!(imm < -256 || imm > 255),
1975                    "%s immediate offset must be within range of -256 to 256!", __func__);
1976         encoded_imm = (imm & 0x1FF) << 2;
1977         if (type == INDEX_POST)
1978             encoded_imm |= 1;
1979         else
1980             encoded_imm |= 3;
1981     }
1982 
1983     Write32((encoded_size << 30) | (0xF << 26) | (type == INDEX_UNSIGNED ? (1 << 24) : 0) |
1984             (size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt);
1985 }
1986 
EmitScalar2Source(bool M,bool S,u32 type,u32 opcode,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1987 void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd,
1988                                           ARM64Reg Rn, ARM64Reg Rm) {
1989     ASSERT_MSG(!IsQuad(Rd), "%s only supports double and single registers!", __func__);
1990     Rd = DecodeReg(Rd);
1991     Rn = DecodeReg(Rn);
1992     Rm = DecodeReg(Rm);
1993 
1994     Write32((M << 31) | (S << 29) | (0b11110001 << 21) | (type << 22) | (Rm << 16) |
1995             (opcode << 12) | (1 << 11) | (Rn << 5) | Rd);
1996 }
1997 
EmitThreeSame(bool U,u32 size,u32 opcode,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)1998 void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn,
1999                                       ARM64Reg Rm) {
2000     ASSERT_MSG(!IsSingle(Rd), "%s doesn't support singles!", __func__);
2001     bool quad = IsQuad(Rd);
2002     Rd = DecodeReg(Rd);
2003     Rn = DecodeReg(Rn);
2004     Rm = DecodeReg(Rm);
2005 
2006     Write32((quad << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (Rm << 16) |
2007             (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
2008 }
2009 
EmitScalarThreeSame(bool U,u32 size,u32 opcode,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2010 void ARM64FloatEmitter::EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn,
2011                                       ARM64Reg Rm) {
2012     ASSERT_MSG(!IsQuad(Rd), "%s doesn't support quads!", __func__);
2013     Rd = DecodeReg(Rd);
2014     Rn = DecodeReg(Rn);
2015     Rm = DecodeReg(Rm);
2016     int esize = 0;
2017     switch (size) {
2018     case 8:
2019         esize = 0;
2020         break;
2021     case 16:
2022         esize = 1;
2023         break;
2024     case 32:
2025         esize = 2;
2026         break;
2027     case 64:
2028         esize = 3;
2029         break;
2030     default:
2031         ASSERT_MSG(false, "Size must be 8, 16, 32, or 64");
2032         break;
2033     }
2034 
2035 
2036     Write32((U << 29) | (0b1011110001 << 21) | (esize << 22) | (Rm << 16) |
2037             (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
2038 }
2039 
EmitCopy(bool Q,u32 op,u32 imm5,u32 imm4,ARM64Reg Rd,ARM64Reg Rn)2040 void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn) {
2041     Rd = DecodeReg(Rd);
2042     Rn = DecodeReg(Rn);
2043 
2044     Write32((Q << 30) | (op << 29) | (0b111 << 25) | (imm5 << 16) | (imm4 << 11) | (1 << 10) |
2045             (Rn << 5) | Rd);
2046 }
2047 
Emit2RegMisc(bool Q,bool U,u32 size,u32 opcode,ARM64Reg Rd,ARM64Reg Rn)2048 void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd,
2049                                      ARM64Reg Rn) {
2050     ASSERT_MSG(!IsSingle(Rd), "%s doesn't support singles!", __func__);
2051     Rd = DecodeReg(Rd);
2052     Rn = DecodeReg(Rn);
2053 
2054     Write32((Q << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (opcode << 12) | (1 << 11) |
2055             (Rn << 5) | Rd);
2056 }
2057 
EmitLoadStoreSingleStructure(bool L,bool R,u32 opcode,bool S,u32 size,ARM64Reg Rt,ARM64Reg Rn)2058 void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size,
2059                                                      ARM64Reg Rt, ARM64Reg Rn) {
2060     ASSERT_MSG(!IsSingle(Rt), "%s doesn't support singles!", __func__);
2061     bool quad = IsQuad(Rt);
2062     Rt = DecodeReg(Rt);
2063     Rn = DecodeReg(Rn);
2064 
2065     Write32((quad << 30) | (0b1101 << 24) | (L << 22) | (R << 21) | (opcode << 13) | (S << 12) |
2066             (size << 10) | (Rn << 5) | Rt);
2067 }
2068 
EmitLoadStoreSingleStructure(bool L,bool R,u32 opcode,bool S,u32 size,ARM64Reg Rt,ARM64Reg Rn,ARM64Reg Rm)2069 void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size,
2070                                                      ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) {
2071     ASSERT_MSG(!IsSingle(Rt), "%s doesn't support singles!", __func__);
2072     bool quad = IsQuad(Rt);
2073     Rt = DecodeReg(Rt);
2074     Rn = DecodeReg(Rn);
2075     Rm = DecodeReg(Rm);
2076 
2077     Write32((quad << 30) | (0x1B << 23) | (L << 22) | (R << 21) | (Rm << 16) | (opcode << 13) |
2078             (S << 12) | (size << 10) | (Rn << 5) | Rt);
2079 }
2080 
Emit1Source(bool M,bool S,u32 type,u32 opcode,ARM64Reg Rd,ARM64Reg Rn)2081 void ARM64FloatEmitter::Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd,
2082                                     ARM64Reg Rn) {
2083     ASSERT_MSG(!IsQuad(Rd), "%s doesn't support vector!", __func__);
2084     Rd = DecodeReg(Rd);
2085     Rn = DecodeReg(Rn);
2086 
2087     Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) |
2088             (Rn << 5) | Rd);
2089 }
2090 
EmitConversion(bool sf,bool S,u32 type,u32 rmode,u32 opcode,ARM64Reg Rd,ARM64Reg Rn)2091 void ARM64FloatEmitter::EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode,
2092                                        ARM64Reg Rd, ARM64Reg Rn) {
2093     ASSERT_MSG(Rn <= SP, "%s only supports GPR as source!", __func__);
2094     Rd = DecodeReg(Rd);
2095     Rn = DecodeReg(Rn);
2096 
2097     Write32((sf << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (rmode << 19) | (opcode << 16) |
2098             (Rn << 5) | Rd);
2099 }
2100 
EmitConvertScalarToInt(ARM64Reg Rd,ARM64Reg Rn,RoundingMode round,bool sign)2101 void ARM64FloatEmitter::EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round,
2102                                                bool sign) {
2103     DEBUG_ASSERT_MSG(IsScalar(Rn), "fcvts: Rn must be floating point");
2104     if (IsGPR(Rd)) {
2105         // Use the encoding that transfers the result to a GPR.
2106         bool sf = Is64Bit(Rd);
2107         int type = IsDouble(Rn) ? 1 : 0;
2108         Rd = DecodeReg(Rd);
2109         Rn = DecodeReg(Rn);
2110         int opcode = (sign ? 1 : 0);
2111         int rmode = 0;
2112         switch (round) {
2113         case ROUND_A:
2114             rmode = 0;
2115             opcode |= 4;
2116             break;
2117         case ROUND_P:
2118             rmode = 1;
2119             break;
2120         case ROUND_M:
2121             rmode = 2;
2122             break;
2123         case ROUND_Z:
2124             rmode = 3;
2125             break;
2126         case ROUND_N:
2127             rmode = 0;
2128             break;
2129         }
2130         EmitConversion2(sf, 0, true, type, rmode, opcode, 0, Rd, Rn);
2131     } else {
2132         // Use the encoding (vector, single) that keeps the result in the fp
2133         // register.
2134         int sz = IsDouble(Rn);
2135         Rd = DecodeReg(Rd);
2136         Rn = DecodeReg(Rn);
2137         int opcode = 0;
2138         switch (round) {
2139         case ROUND_A:
2140             opcode = 0x1C;
2141             break;
2142         case ROUND_N:
2143             opcode = 0x1A;
2144             break;
2145         case ROUND_M:
2146             opcode = 0x1B;
2147             break;
2148         case ROUND_P:
2149             opcode = 0x1A;
2150             sz |= 2;
2151             break;
2152         case ROUND_Z:
2153             opcode = 0x1B;
2154             sz |= 2;
2155             break;
2156         }
2157         Write32((0x5E << 24) | (sign << 29) | (sz << 22) | (1 << 21) | (opcode << 12) | (2 << 10) |
2158                 (Rn << 5) | Rd);
2159     }
2160 }
2161 
FCVTS(ARM64Reg Rd,ARM64Reg Rn,RoundingMode round)2162 void ARM64FloatEmitter::FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {
2163     EmitConvertScalarToInt(Rd, Rn, round, false);
2164 }
2165 
FCVTU(ARM64Reg Rd,ARM64Reg Rn,RoundingMode round)2166 void ARM64FloatEmitter::FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {
2167     EmitConvertScalarToInt(Rd, Rn, round, true);
2168 }
2169 
EmitConversion2(bool sf,bool S,bool direction,u32 type,u32 rmode,u32 opcode,int scale,ARM64Reg Rd,ARM64Reg Rn)2170 void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode,
2171                                         u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn) {
2172     Rd = DecodeReg(Rd);
2173     Rn = DecodeReg(Rn);
2174 
2175     Write32((sf << 31) | (S << 29) | (0xF0 << 21) | (direction << 21) | (type << 22) |
2176             (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
2177 }
2178 
EmitCompare(bool M,bool S,u32 op,u32 opcode2,ARM64Reg Rn,ARM64Reg Rm)2179 void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm) {
2180     ASSERT_MSG(!IsQuad(Rn), "%s doesn't support vector!", __func__);
2181     bool is_double = IsDouble(Rn);
2182 
2183     Rn = DecodeReg(Rn);
2184     Rm = DecodeReg(Rm);
2185 
2186     Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | (op << 14) |
2187             (1 << 13) | (Rn << 5) | opcode2);
2188 }
2189 
EmitCondSelect(bool M,bool S,CCFlags cond,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2190 void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn,
2191                                        ARM64Reg Rm) {
2192     ASSERT_MSG(!IsQuad(Rd), "%s doesn't support vector!", __func__);
2193     bool is_double = IsDouble(Rd);
2194 
2195     Rd = DecodeReg(Rd);
2196     Rn = DecodeReg(Rn);
2197     Rm = DecodeReg(Rm);
2198 
2199     Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | (cond << 12) |
2200             (3 << 10) | (Rn << 5) | Rd);
2201 }
2202 
EmitPermute(u32 size,u32 op,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2203 void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2204     ASSERT_MSG(!IsSingle(Rd), "%s doesn't support singles!", __func__);
2205 
2206     bool quad = IsQuad(Rd);
2207 
2208     u32 encoded_size = 0;
2209     if (size == 16)
2210         encoded_size = 1;
2211     else if (size == 32)
2212         encoded_size = 2;
2213     else if (size == 64)
2214         encoded_size = 3;
2215 
2216     Rd = DecodeReg(Rd);
2217     Rn = DecodeReg(Rn);
2218     Rm = DecodeReg(Rm);
2219 
2220     Write32((quad << 30) | (7 << 25) | (encoded_size << 22) | (Rm << 16) | (op << 12) | (1 << 11) |
2221             (Rn << 5) | Rd);
2222 }
2223 
EmitScalarImm(bool M,bool S,u32 type,u32 imm5,ARM64Reg Rd,u32 imm8)2224 void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8) {
2225     ASSERT_MSG(!IsQuad(Rd), "%s doesn't support vector!", __func__);
2226 
2227     bool is_double = !IsSingle(Rd);
2228 
2229     Rd = DecodeReg(Rd);
2230 
2231     Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (type << 22) | (imm8 << 13) |
2232             (1 << 12) | (imm5 << 5) | Rd);
2233 }
2234 
EmitShiftImm(bool Q,bool U,u32 immh,u32 immb,u32 opcode,ARM64Reg Rd,ARM64Reg Rn)2235 void ARM64FloatEmitter::EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd,
2236                                      ARM64Reg Rn) {
2237     ASSERT_MSG(immh, "%s bad encoding! Can't have zero immh", __func__);
2238 
2239     Rd = DecodeReg(Rd);
2240     Rn = DecodeReg(Rn);
2241 
2242     Write32((Q << 30) | (U << 29) | (0xF << 24) | (immh << 19) | (immb << 16) | (opcode << 11) |
2243             (1 << 10) | (Rn << 5) | Rd);
2244 }
2245 
EmitScalarShiftImm(bool U,u32 immh,u32 immb,u32 opcode,ARM64Reg Rd,ARM64Reg Rn)2246 void ARM64FloatEmitter::EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd,
2247                                            ARM64Reg Rn) {
2248     Rd = DecodeReg(Rd);
2249     Rn = DecodeReg(Rn);
2250 
2251     Write32((2 << 30) | (U << 29) | (0x3E << 23) | (immh << 19) | (immb << 16) | (opcode << 11) |
2252             (1 << 10) | (Rn << 5) | Rd);
2253 }
2254 
EmitLoadStoreMultipleStructure(u32 size,bool L,u32 opcode,ARM64Reg Rt,ARM64Reg Rn)2255 void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt,
2256                                                        ARM64Reg Rn) {
2257     bool quad = IsQuad(Rt);
2258     u32 encoded_size = 0;
2259 
2260     if (size == 16)
2261         encoded_size = 1;
2262     else if (size == 32)
2263         encoded_size = 2;
2264     else if (size == 64)
2265         encoded_size = 3;
2266 
2267     Rt = DecodeReg(Rt);
2268     Rn = DecodeReg(Rn);
2269 
2270     Write32((quad << 30) | (3 << 26) | (L << 22) | (opcode << 12) | (encoded_size << 10) |
2271             (Rn << 5) | Rt);
2272 }
2273 
EmitLoadStoreMultipleStructurePost(u32 size,bool L,u32 opcode,ARM64Reg Rt,ARM64Reg Rn,ARM64Reg Rm)2274 void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode,
2275                                                            ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) {
2276     bool quad = IsQuad(Rt);
2277     u32 encoded_size = 0;
2278 
2279     if (size == 16)
2280         encoded_size = 1;
2281     else if (size == 32)
2282         encoded_size = 2;
2283     else if (size == 64)
2284         encoded_size = 3;
2285 
2286     Rt = DecodeReg(Rt);
2287     Rn = DecodeReg(Rn);
2288     Rm = DecodeReg(Rm);
2289 
2290     Write32((quad << 30) | (0b11001 << 23) | (L << 22) | (Rm << 16) | (opcode << 12) |
2291             (encoded_size << 10) | (Rn << 5) | Rt);
2292 }
2293 
EmitScalar1Source(bool M,bool S,u32 type,u32 opcode,ARM64Reg Rd,ARM64Reg Rn)2294 void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd,
2295                                           ARM64Reg Rn) {
2296     ASSERT_MSG(!IsQuad(Rd), "%s doesn't support vector!", __func__);
2297 
2298     Rd = DecodeReg(Rd);
2299     Rn = DecodeReg(Rn);
2300 
2301     Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) |
2302             (Rn << 5) | Rd);
2303 }
2304 
EmitVectorxElement(bool U,u32 size,bool L,u32 opcode,bool H,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2305 void ARM64FloatEmitter::EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H,
2306                                            ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2307     bool quad = IsQuad(Rd);
2308 
2309     Rd = DecodeReg(Rd);
2310     Rn = DecodeReg(Rn);
2311     Rm = DecodeReg(Rm);
2312 
2313     Write32((quad << 30) | (U << 29) | (0xF << 24) | (size << 22) | (L << 21) | (Rm << 16) |
2314             (opcode << 12) | (H << 11) | (Rn << 5) | Rd);
2315 }
2316 
EmitLoadStoreUnscaled(u32 size,u32 op,ARM64Reg Rt,ARM64Reg Rn,s32 imm)2317 void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
2318     ASSERT_MSG(!(imm < -256 || imm > 255), "%s received too large offset: %d", __func__, imm);
2319     Rt = DecodeReg(Rt);
2320     Rn = DecodeReg(Rn);
2321 
2322     Write32((size << 30) | (0xF << 26) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt);
2323 }
2324 
EncodeLoadStorePair(u32 size,bool load,IndexType type,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,s32 imm)2325 void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt,
2326                                             ARM64Reg Rt2, ARM64Reg Rn, s32 imm) {
2327     u32 type_encode = 0;
2328     u32 opc = 0;
2329 
2330     switch (type) {
2331     case INDEX_SIGNED:
2332         type_encode = 0b010;
2333         break;
2334     case INDEX_POST:
2335         type_encode = 0b001;
2336         break;
2337     case INDEX_PRE:
2338         type_encode = 0b011;
2339         break;
2340     case INDEX_UNSIGNED:
2341         ASSERT_MSG(false, "%s doesn't support INDEX_UNSIGNED!", __func__);
2342         break;
2343     }
2344 
2345     if (size == 128) {
2346         ASSERT_MSG(!(imm & 0xF), "%s received invalid offset 0x%x!", __func__, imm);
2347         opc = 2;
2348         imm >>= 4;
2349     } else if (size == 64) {
2350         ASSERT_MSG(!(imm & 0x7), "%s received invalid offset 0x%x!", __func__, imm);
2351         opc = 1;
2352         imm >>= 3;
2353     } else if (size == 32) {
2354         ASSERT_MSG(!(imm & 0x3), "%s received invalid offset 0x%x!", __func__, imm);
2355         opc = 0;
2356         imm >>= 2;
2357     }
2358 
2359     Rt = DecodeReg(Rt);
2360     Rt2 = DecodeReg(Rt2);
2361     Rn = DecodeReg(Rn);
2362 
2363     Write32((opc << 30) | (0b1011 << 26) | (type_encode << 23) | (load << 22) |
2364             ((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
2365 }
2366 
EncodeLoadStoreRegisterOffset(u32 size,bool load,ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)2367 void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn,
2368                                                       ArithOption Rm) {
2369     ASSERT_MSG(Rm.GetType() == ArithOption::TYPE_EXTENDEDREG,
2370                "%s must contain an extended reg as Rm!", __func__);
2371 
2372     u32 encoded_size = 0;
2373     u32 encoded_op = 0;
2374 
2375     if (size == 8) {
2376         encoded_size = 0;
2377         encoded_op = 0;
2378     } else if (size == 16) {
2379         encoded_size = 1;
2380         encoded_op = 0;
2381     } else if (size == 32) {
2382         encoded_size = 2;
2383         encoded_op = 0;
2384     } else if (size == 64) {
2385         encoded_size = 3;
2386         encoded_op = 0;
2387     } else if (size == 128) {
2388         encoded_size = 0;
2389         encoded_op = 2;
2390     }
2391 
2392     if (load)
2393         encoded_op |= 1;
2394 
2395     Rt = DecodeReg(Rt);
2396     Rn = DecodeReg(Rn);
2397     ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());
2398 
2399     Write32((encoded_size << 30) | (encoded_op << 22) | (0b111100001 << 21) | (decoded_Rm << 16) |
2400             Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);
2401 }
2402 
EncodeModImm(bool Q,u8 op,u8 cmode,u8 o2,ARM64Reg Rd,u8 abcdefgh)2403 void ARM64FloatEmitter::EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh) {
2404     union V {
2405         u8 hex;
2406         struct InV {
2407             unsigned defgh : 5;
2408             unsigned abc : 3;
2409         } in;
2410     } v;
2411     v.hex = abcdefgh;
2412     Rd = DecodeReg(Rd);
2413     Write32((Q << 30) | (op << 29) | (0xF << 24) | (v.in.abc << 16) | (cmode << 12) | (o2 << 11) |
2414             (1 << 10) | (v.in.defgh << 5) | Rd);
2415 }
2416 
LDR(u8 size,IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)2417 void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
2418     EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm);
2419 }
STR(u8 size,IndexType type,ARM64Reg Rt,ARM64Reg Rn,s32 imm)2420 void ARM64FloatEmitter::STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
2421     EmitLoadStoreImmediate(size, 0, type, Rt, Rn, imm);
2422 }
2423 
2424 // Loadstore unscaled
LDUR(u8 size,ARM64Reg Rt,ARM64Reg Rn,s32 imm)2425 void ARM64FloatEmitter::LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
2426     u32 encoded_size = 0;
2427     u32 encoded_op = 0;
2428 
2429     if (size == 8) {
2430         encoded_size = 0;
2431         encoded_op = 1;
2432     } else if (size == 16) {
2433         encoded_size = 1;
2434         encoded_op = 1;
2435     } else if (size == 32) {
2436         encoded_size = 2;
2437         encoded_op = 1;
2438     } else if (size == 64) {
2439         encoded_size = 3;
2440         encoded_op = 1;
2441     } else if (size == 128) {
2442         encoded_size = 0;
2443         encoded_op = 3;
2444     }
2445 
2446     EmitLoadStoreUnscaled(encoded_size, encoded_op, Rt, Rn, imm);
2447 }
STUR(u8 size,ARM64Reg Rt,ARM64Reg Rn,s32 imm)2448 void ARM64FloatEmitter::STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm) {
2449     u32 encoded_size = 0;
2450     u32 encoded_op = 0;
2451 
2452     if (size == 8) {
2453         encoded_size = 0;
2454         encoded_op = 0;
2455     } else if (size == 16) {
2456         encoded_size = 1;
2457         encoded_op = 0;
2458     } else if (size == 32) {
2459         encoded_size = 2;
2460         encoded_op = 0;
2461     } else if (size == 64) {
2462         encoded_size = 3;
2463         encoded_op = 0;
2464     } else if (size == 128) {
2465         encoded_size = 0;
2466         encoded_op = 2;
2467     }
2468 
2469     EmitLoadStoreUnscaled(encoded_size, encoded_op, Rt, Rn, imm);
2470 }
2471 
2472 // Loadstore single structure
LD1(u8 size,ARM64Reg Rt,u8 index,ARM64Reg Rn)2473 void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn) {
2474     bool S = 0;
2475     u32 opcode = 0;
2476     u32 encoded_size = 0;
2477     ARM64Reg encoded_reg = INVALID_REG;
2478 
2479     if (size == 8) {
2480         S = (index & 4) != 0;
2481         opcode = 0;
2482         encoded_size = index & 3;
2483         if (index & 8)
2484             encoded_reg = EncodeRegToQuad(Rt);
2485         else
2486             encoded_reg = EncodeRegToDouble(Rt);
2487     } else if (size == 16) {
2488         S = (index & 2) != 0;
2489         opcode = 2;
2490         encoded_size = (index & 1) << 1;
2491         if (index & 4)
2492             encoded_reg = EncodeRegToQuad(Rt);
2493         else
2494             encoded_reg = EncodeRegToDouble(Rt);
2495     } else if (size == 32) {
2496         S = (index & 1) != 0;
2497         opcode = 4;
2498         encoded_size = 0;
2499         if (index & 2)
2500             encoded_reg = EncodeRegToQuad(Rt);
2501         else
2502             encoded_reg = EncodeRegToDouble(Rt);
2503     } else if (size == 64) {
2504         S = 0;
2505         opcode = 4;
2506         encoded_size = 1;
2507         if (index == 1)
2508             encoded_reg = EncodeRegToQuad(Rt);
2509         else
2510             encoded_reg = EncodeRegToDouble(Rt);
2511     }
2512 
2513     EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn);
2514 }
2515 
LD1(u8 size,ARM64Reg Rt,u8 index,ARM64Reg Rn,ARM64Reg Rm)2516 void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm) {
2517     bool S = 0;
2518     u32 opcode = 0;
2519     u32 encoded_size = 0;
2520     ARM64Reg encoded_reg = INVALID_REG;
2521 
2522     if (size == 8) {
2523         S = (index & 4) != 0;
2524         opcode = 0;
2525         encoded_size = index & 3;
2526         if (index & 8)
2527             encoded_reg = EncodeRegToQuad(Rt);
2528         else
2529             encoded_reg = EncodeRegToDouble(Rt);
2530     } else if (size == 16) {
2531         S = (index & 2) != 0;
2532         opcode = 2;
2533         encoded_size = (index & 1) << 1;
2534         if (index & 4)
2535             encoded_reg = EncodeRegToQuad(Rt);
2536         else
2537             encoded_reg = EncodeRegToDouble(Rt);
2538     } else if (size == 32) {
2539         S = (index & 1) != 0;
2540         opcode = 4;
2541         encoded_size = 0;
2542         if (index & 2)
2543             encoded_reg = EncodeRegToQuad(Rt);
2544         else
2545             encoded_reg = EncodeRegToDouble(Rt);
2546     } else if (size == 64) {
2547         S = 0;
2548         opcode = 4;
2549         encoded_size = 1;
2550         if (index == 1)
2551             encoded_reg = EncodeRegToQuad(Rt);
2552         else
2553             encoded_reg = EncodeRegToDouble(Rt);
2554     }
2555 
2556     EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);
2557 }
2558 
LD1R(u8 size,ARM64Reg Rt,ARM64Reg Rn)2559 void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn) {
2560     EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn);
2561 }
LD2R(u8 size,ARM64Reg Rt,ARM64Reg Rn)2562 void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn) {
2563     EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn);
2564 }
LD1R(u8 size,ARM64Reg Rt,ARM64Reg Rn,ARM64Reg Rm)2565 void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) {
2566     EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn, Rm);
2567 }
LD2R(u8 size,ARM64Reg Rt,ARM64Reg Rn,ARM64Reg Rm)2568 void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm) {
2569     EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn, Rm);
2570 }
2571 
ST1(u8 size,ARM64Reg Rt,u8 index,ARM64Reg Rn)2572 void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn) {
2573     bool S = 0;
2574     u32 opcode = 0;
2575     u32 encoded_size = 0;
2576     ARM64Reg encoded_reg = INVALID_REG;
2577 
2578     if (size == 8) {
2579         S = (index & 4) != 0;
2580         opcode = 0;
2581         encoded_size = index & 3;
2582         if (index & 8)
2583             encoded_reg = EncodeRegToQuad(Rt);
2584         else
2585             encoded_reg = EncodeRegToDouble(Rt);
2586     } else if (size == 16) {
2587         S = (index & 2) != 0;
2588         opcode = 2;
2589         encoded_size = (index & 1) << 1;
2590         if (index & 4)
2591             encoded_reg = EncodeRegToQuad(Rt);
2592         else
2593             encoded_reg = EncodeRegToDouble(Rt);
2594     } else if (size == 32) {
2595         S = (index & 1) != 0;
2596         opcode = 4;
2597         encoded_size = 0;
2598         if (index & 2)
2599             encoded_reg = EncodeRegToQuad(Rt);
2600         else
2601             encoded_reg = EncodeRegToDouble(Rt);
2602     } else if (size == 64) {
2603         S = 0;
2604         opcode = 4;
2605         encoded_size = 1;
2606         if (index == 1)
2607             encoded_reg = EncodeRegToQuad(Rt);
2608         else
2609             encoded_reg = EncodeRegToDouble(Rt);
2610     }
2611 
2612     EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn);
2613 }
2614 
ST1(u8 size,ARM64Reg Rt,u8 index,ARM64Reg Rn,ARM64Reg Rm)2615 void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm) {
2616     bool S = 0;
2617     u32 opcode = 0;
2618     u32 encoded_size = 0;
2619     ARM64Reg encoded_reg = INVALID_REG;
2620 
2621     if (size == 8) {
2622         S = (index & 4) != 0;
2623         opcode = 0;
2624         encoded_size = index & 3;
2625         if (index & 8)
2626             encoded_reg = EncodeRegToQuad(Rt);
2627         else
2628             encoded_reg = EncodeRegToDouble(Rt);
2629     } else if (size == 16) {
2630         S = (index & 2) != 0;
2631         opcode = 2;
2632         encoded_size = (index & 1) << 1;
2633         if (index & 4)
2634             encoded_reg = EncodeRegToQuad(Rt);
2635         else
2636             encoded_reg = EncodeRegToDouble(Rt);
2637     } else if (size == 32) {
2638         S = (index & 1) != 0;
2639         opcode = 4;
2640         encoded_size = 0;
2641         if (index & 2)
2642             encoded_reg = EncodeRegToQuad(Rt);
2643         else
2644             encoded_reg = EncodeRegToDouble(Rt);
2645     } else if (size == 64) {
2646         S = 0;
2647         opcode = 4;
2648         encoded_size = 1;
2649         if (index == 1)
2650             encoded_reg = EncodeRegToQuad(Rt);
2651         else
2652             encoded_reg = EncodeRegToDouble(Rt);
2653     }
2654 
2655     EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);
2656 }
2657 
2658 // Loadstore multiple structure
LD1(u8 size,u8 count,ARM64Reg Rt,ARM64Reg Rn)2659 void ARM64FloatEmitter::LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn) {
2660     ASSERT_MSG(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __func__);
2661     u32 opcode = 0;
2662     if (count == 1)
2663         opcode = 0b111;
2664     else if (count == 2)
2665         opcode = 0b1010;
2666     else if (count == 3)
2667         opcode = 0b0110;
2668     else if (count == 4)
2669         opcode = 0b0010;
2670     EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn);
2671 }
LD1(u8 size,u8 count,IndexType type,ARM64Reg Rt,ARM64Reg Rn,ARM64Reg Rm)2672 void ARM64FloatEmitter::LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn,
2673                             ARM64Reg Rm) {
2674     ASSERT_MSG(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __func__);
2675     ASSERT_MSG(type == INDEX_POST, "%s only supports post indexing!", __func__);
2676 
2677     u32 opcode = 0;
2678     if (count == 1)
2679         opcode = 0b111;
2680     else if (count == 2)
2681         opcode = 0b1010;
2682     else if (count == 3)
2683         opcode = 0b0110;
2684     else if (count == 4)
2685         opcode = 0b0010;
2686     EmitLoadStoreMultipleStructurePost(size, 1, opcode, Rt, Rn, Rm);
2687 }
ST1(u8 size,u8 count,ARM64Reg Rt,ARM64Reg Rn)2688 void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn) {
2689     ASSERT_MSG(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __func__);
2690     u32 opcode = 0;
2691     if (count == 1)
2692         opcode = 0b111;
2693     else if (count == 2)
2694         opcode = 0b1010;
2695     else if (count == 3)
2696         opcode = 0b0110;
2697     else if (count == 4)
2698         opcode = 0b0010;
2699     EmitLoadStoreMultipleStructure(size, 0, opcode, Rt, Rn);
2700 }
ST1(u8 size,u8 count,IndexType type,ARM64Reg Rt,ARM64Reg Rn,ARM64Reg Rm)2701 void ARM64FloatEmitter::ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn,
2702                             ARM64Reg Rm) {
2703     ASSERT_MSG(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __func__);
2704     ASSERT_MSG(type == INDEX_POST, "%s only supports post indexing!", __func__);
2705 
2706     u32 opcode = 0;
2707     if (count == 1)
2708         opcode = 0b111;
2709     else if (count == 2)
2710         opcode = 0b1010;
2711     else if (count == 3)
2712         opcode = 0b0110;
2713     else if (count == 4)
2714         opcode = 0b0010;
2715     EmitLoadStoreMultipleStructurePost(size, 0, opcode, Rt, Rn, Rm);
2716 }
2717 
2718 // Scalar - 1 Source
FMOV(ARM64Reg Rd,ARM64Reg Rn,bool top)2719 void ARM64FloatEmitter::FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top) {
2720     if (IsScalar(Rd) && IsScalar(Rn)) {
2721         EmitScalar1Source(0, 0, IsDouble(Rd), 0, Rd, Rn);
2722     } else {
2723         ASSERT_MSG(!IsQuad(Rd) && !IsQuad(Rn), "FMOV can't move to/from quads");
2724         int rmode = 0;
2725         int opcode = 6;
2726         int encoded_size = 0;
2727         int sf = 0;
2728         if (IsSingle(Rd) && !Is64Bit(Rn) && !top) {
2729             // GPR to scalar single
2730             opcode |= 1;
2731         } else if (!Is64Bit(Rd) && IsSingle(Rn) && !top) {
2732             // Scalar single to GPR - defaults are correct
2733         } else if (Is64Bit(Rd) && IsDouble(Rn) && !top) {
2734             // Scalar double to GPR
2735             sf = 1;
2736             encoded_size = 1;
2737         } else if (IsDouble(Rd) && Is64Bit(Rn) && !top) {
2738             // GPR to Scalar double
2739             sf = 1;
2740             encoded_size = 1;
2741             opcode |= 1;
2742         } else {
2743             // TODO
2744             ASSERT_MSG(0, "FMOV: Unhandled case");
2745         }
2746         Rd = DecodeReg(Rd);
2747         Rn = DecodeReg(Rn);
2748         Write32((sf << 31) | (encoded_size << 22) | (0x1e2 << 20)  | (rmode << 19) | (opcode << 16) | (Rn << 5) | Rd);
2749     }
2750 }
2751 
2752 // Loadstore paired
LDP(u8 size,IndexType type,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,s32 imm)2753 void ARM64FloatEmitter::LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn,
2754                             s32 imm) {
2755     EncodeLoadStorePair(size, true, type, Rt, Rt2, Rn, imm);
2756 }
STP(u8 size,IndexType type,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,s32 imm)2757 void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn,
2758                             s32 imm) {
2759     EncodeLoadStorePair(size, false, type, Rt, Rt2, Rn, imm);
2760 }
2761 
2762 // Loadstore register offset
STR(u8 size,ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)2763 void ARM64FloatEmitter::STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
2764     EncodeLoadStoreRegisterOffset(size, false, Rt, Rn, Rm);
2765 }
LDR(u8 size,ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)2766 void ARM64FloatEmitter::LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm) {
2767     EncodeLoadStoreRegisterOffset(size, true, Rt, Rn, Rm);
2768 }
2769 
FABS(ARM64Reg Rd,ARM64Reg Rn)2770 void ARM64FloatEmitter::FABS(ARM64Reg Rd, ARM64Reg Rn) {
2771     EmitScalar1Source(0, 0, IsDouble(Rd), 1, Rd, Rn);
2772 }
FNEG(ARM64Reg Rd,ARM64Reg Rn)2773 void ARM64FloatEmitter::FNEG(ARM64Reg Rd, ARM64Reg Rn) {
2774     EmitScalar1Source(0, 0, IsDouble(Rd), 2, Rd, Rn);
2775 }
FSQRT(ARM64Reg Rd,ARM64Reg Rn)2776 void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn) {
2777     EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn);
2778 }
2779 
2780 // Scalar - 2 Source
FADD(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2781 void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2782     EmitScalar2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm);
2783 }
FMUL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2784 void ARM64FloatEmitter::FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2785     EmitScalar2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm);
2786 }
FSUB(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2787 void ARM64FloatEmitter::FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2788     EmitScalar2Source(0, 0, IsDouble(Rd), 3, Rd, Rn, Rm);
2789 }
FDIV(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2790 void ARM64FloatEmitter::FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2791     EmitScalar2Source(0, 0, IsDouble(Rd), 1, Rd, Rn, Rm);
2792 }
FMAX(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2793 void ARM64FloatEmitter::FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2794     EmitScalar2Source(0, 0, IsDouble(Rd), 4, Rd, Rn, Rm);
2795 }
FMIN(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2796 void ARM64FloatEmitter::FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2797     EmitScalar2Source(0, 0, IsDouble(Rd), 5, Rd, Rn, Rm);
2798 }
FMAXNM(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2799 void ARM64FloatEmitter::FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2800     EmitScalar2Source(0, 0, IsDouble(Rd), 6, Rd, Rn, Rm);
2801 }
FMINNM(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2802 void ARM64FloatEmitter::FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2803     EmitScalar2Source(0, 0, IsDouble(Rd), 7, Rd, Rn, Rm);
2804 }
FNMUL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2805 void ARM64FloatEmitter::FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2806     EmitScalar2Source(0, 0, IsDouble(Rd), 8, Rd, Rn, Rm);
2807 }
2808 
FMADD(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)2809 void ARM64FloatEmitter::FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
2810     EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 0);
2811 }
FMSUB(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)2812 void ARM64FloatEmitter::FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
2813     EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 1);
2814 }
FNMADD(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)2815 void ARM64FloatEmitter::FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
2816     EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 2);
2817 }
FNMSUB(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)2818 void ARM64FloatEmitter::FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
2819     EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3);
2820 }
2821 
EmitScalar3Source(bool isDouble,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra,int opcode)2822 void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm,
2823                                           ARM64Reg Ra, int opcode) {
2824     int type = isDouble ? 1 : 0;
2825     Rd = DecodeReg(Rd);
2826     Rn = DecodeReg(Rn);
2827     Rm = DecodeReg(Rm);
2828     Ra = DecodeReg(Ra);
2829     int o1 = opcode >> 1;
2830     int o0 = opcode & 1;
2831     m_emit->Write32((0x1F << 24) | (type << 22) | (o1 << 21) | (Rm << 16) | (o0 << 15) |
2832                     (Ra << 10) | (Rn << 5) | Rd);
2833 }
2834 
2835 // Scalar three same
SQADD(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2836 void ARM64FloatEmitter::SQADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2837     EmitScalarThreeSame(0, size, 0b00001, Rd, Rn, Rm);
2838 }
SQSUB(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2839 void ARM64FloatEmitter::SQSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2840     EmitScalarThreeSame(0, size, 0b00101, Rd, Rn, Rm);
2841 }
UQADD(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2842 void ARM64FloatEmitter::UQADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2843     EmitScalarThreeSame(1, size, 0b00001, Rd, Rn, Rm);
2844 }
UQSUB(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2845 void ARM64FloatEmitter::UQSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2846     EmitScalarThreeSame(1, size, 0b00101, Rd, Rn, Rm);
2847 }
2848 
2849 // Scalar floating point immediate
FMOV(ARM64Reg Rd,uint8_t imm8)2850 void ARM64FloatEmitter::FMOV(ARM64Reg Rd, uint8_t imm8) {
2851     EmitScalarImm(0, 0, 0, 0, Rd, imm8);
2852 }
2853 
2854 // Vector
ADD(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2855 void ARM64FloatEmitter::ADD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2856     ASSERT(!(IsDouble(Rd) && esize == D));
2857     EmitThreeSame(0, static_cast<u32>(esize), 0b10000, Rd, Rn, Rm);
2858 }
ADDV(ESize esize,ARM64Reg Rd,ARM64Reg Rn)2859 void ARM64FloatEmitter::ADDV(ESize esize, ARM64Reg Rd, ARM64Reg Rn) {
2860     ASSERT(esize != D);
2861     Emit2RegMisc(IsQuad(Rd), 0, static_cast<u32>(esize), 0b100011011, Rd, Rn);
2862 }
SUB(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2863 void ARM64FloatEmitter::SUB(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2864     ASSERT(!(IsDouble(Rd) && esize == D));
2865     EmitThreeSame(1, static_cast<u32>(esize), 0b10000, Rd, Rn, Rm);
2866 }
AND(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2867 void ARM64FloatEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2868     EmitThreeSame(0, 0, 3, Rd, Rn, Rm);
2869 }
BSL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2870 void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2871     EmitThreeSame(1, 1, 3, Rd, Rn, Rm);
2872 }
CMGE(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2873 void ARM64FloatEmitter::CMGE(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2874     ASSERT(!(IsDouble(Rd) && esize == D));
2875     EmitThreeSame(0, static_cast<u32>(esize), 0b00111, Rd, Rn, Rm);
2876 }
CMGE_zero(ESize esize,ARM64Reg Rd,ARM64Reg Rn)2877 void ARM64FloatEmitter::CMGE_zero(ESize esize, ARM64Reg Rd, ARM64Reg Rn) {
2878     ASSERT(!(IsDouble(Rd) && esize == D));
2879     Emit2RegMisc(IsQuad(Rd), 1, static_cast<u32>(esize), 0b1000, Rd, Rn);
2880 }
CMGT(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2881 void ARM64FloatEmitter::CMGT(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2882     ASSERT(!(IsDouble(Rd) && esize == D));
2883     EmitThreeSame(0, static_cast<u32>(esize), 0b00110, Rd, Rn, Rm);
2884 }
CMHI(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2885 void ARM64FloatEmitter::CMHI(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2886     ASSERT(!(IsDouble(Rd) && esize == D));
2887     EmitThreeSame(1, static_cast<u32>(esize), 0b00110, Rd, Rn, Rm);
2888 }
CMHS(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2889 void ARM64FloatEmitter::CMHS(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2890     ASSERT(!(IsDouble(Rd) && esize == D));
2891     EmitThreeSame(1, static_cast<u32>(esize), 0b00111, Rd, Rn, Rm);
2892 }
DUP(u8 size,ARM64Reg Rd,ARM64Reg Rn,u8 index)2893 void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index) {
2894     u32 imm5 = 0;
2895 
2896     if (size == 8) {
2897         imm5 = 1;
2898         imm5 |= index << 1;
2899     } else if (size == 16) {
2900         imm5 = 2;
2901         imm5 |= index << 2;
2902     } else if (size == 32) {
2903         imm5 = 4;
2904         imm5 |= index << 3;
2905     } else if (size == 64) {
2906         imm5 = 8;
2907         imm5 |= index << 4;
2908     }
2909 
2910     EmitCopy(IsQuad(Rd), 0, imm5, 0, Rd, Rn);
2911 }
FABS(u8 size,ARM64Reg Rd,ARM64Reg Rn)2912 void ARM64FloatEmitter::FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
2913     Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xF, Rd, Rn);
2914 }
FADD(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2915 void ARM64FloatEmitter::FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2916     EmitThreeSame(0, size >> 6, 0x1A, Rd, Rn, Rm);
2917 }
FMAX(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2918 void ARM64FloatEmitter::FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2919     EmitThreeSame(0, size >> 6, 0b11110, Rd, Rn, Rm);
2920 }
FMLA(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2921 void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2922     EmitThreeSame(0, size >> 6, 0x19, Rd, Rn, Rm);
2923 }
FMIN(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2924 void ARM64FloatEmitter::FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2925     EmitThreeSame(0, 2 | size >> 6, 0b11110, Rd, Rn, Rm);
2926 }
FCVTL(u8 size,ARM64Reg Rd,ARM64Reg Rn)2927 void ARM64FloatEmitter::FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
2928     Emit2RegMisc(false, 0, size >> 6, 0x17, Rd, Rn);
2929 }
FCVTL2(u8 size,ARM64Reg Rd,ARM64Reg Rn)2930 void ARM64FloatEmitter::FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
2931     Emit2RegMisc(true, 0, size >> 6, 0x17, Rd, Rn);
2932 }
FCVTN(u8 dest_size,ARM64Reg Rd,ARM64Reg Rn)2933 void ARM64FloatEmitter::FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) {
2934     Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 5, 0x16, Rd, Rn);
2935 }
FCVTZS(u8 size,ARM64Reg Rd,ARM64Reg Rn)2936 void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
2937     Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x1B, Rd, Rn);
2938 }
FCVTZU(u8 size,ARM64Reg Rd,ARM64Reg Rn)2939 void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
2940     Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1B, Rd, Rn);
2941 }
FDIV(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2942 void ARM64FloatEmitter::FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2943     EmitThreeSame(1, size >> 6, 0x1F, Rd, Rn, Rm);
2944 }
FMUL(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2945 void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2946     EmitThreeSame(1, size >> 6, 0x1B, Rd, Rn, Rm);
2947 }
FNEG(u8 size,ARM64Reg Rd,ARM64Reg Rn)2948 void ARM64FloatEmitter::FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
2949     Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xF, Rd, Rn);
2950 }
FRECPE(u8 size,ARM64Reg Rd,ARM64Reg Rn)2951 void ARM64FloatEmitter::FRECPE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
2952     Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x1D, Rd, Rn);
2953 }
FRSQRTE(u8 size,ARM64Reg Rd,ARM64Reg Rn)2954 void ARM64FloatEmitter::FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
2955     Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1D, Rd, Rn);
2956 }
FSUB(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2957 void ARM64FloatEmitter::FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2958     EmitThreeSame(0, 2 | (size >> 6), 0x1A, Rd, Rn, Rm);
2959 }
FMLS(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2960 void ARM64FloatEmitter::FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2961     EmitThreeSame(0, 2 | (size >> 6), 0x19, Rd, Rn, Rm);
2962 }
NOT(ARM64Reg Rd,ARM64Reg Rn)2963 void ARM64FloatEmitter::NOT(ARM64Reg Rd, ARM64Reg Rn) {
2964     Emit2RegMisc(IsQuad(Rd), 1, 0, 5, Rd, Rn);
2965 }
ORR(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2966 void ARM64FloatEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2967     EmitThreeSame(0, 2, 3, Rd, Rn, Rm);
2968 }
REV16(u8 size,ARM64Reg Rd,ARM64Reg Rn)2969 void ARM64FloatEmitter::REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
2970     Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 1, Rd, Rn);
2971 }
REV32(u8 size,ARM64Reg Rd,ARM64Reg Rn)2972 void ARM64FloatEmitter::REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
2973     Emit2RegMisc(IsQuad(Rd), 1, size >> 4, 0, Rd, Rn);
2974 }
REV64(u8 size,ARM64Reg Rd,ARM64Reg Rn)2975 void ARM64FloatEmitter::REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
2976     Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 0, Rd, Rn);
2977 }
SABD(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2978 void ARM64FloatEmitter::SABD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2979     ASSERT(esize != D);
2980     EmitThreeSame(0, static_cast<u32>(esize), 0b01110, Rd, Rn, Rm);
2981 }
UABD(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2982 void ARM64FloatEmitter::UABD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2983     ASSERT(esize != D);
2984     EmitThreeSame(1, static_cast<u32>(esize), 0b01110, Rd, Rn, Rm);
2985 }
SADDLV(ESize esize,ARM64Reg Rd,ARM64Reg Rn)2986 void ARM64FloatEmitter::SADDLV(ESize esize, ARM64Reg Rd, ARM64Reg Rn) {
2987     ASSERT(esize != D);
2988     Emit2RegMisc(IsQuad(Rd), 0, static_cast<u32>(esize), 0b100000011, Rd, Rn);
2989 }
UADDLV(ESize esize,ARM64Reg Rd,ARM64Reg Rn)2990 void ARM64FloatEmitter::UADDLV(ESize esize, ARM64Reg Rd, ARM64Reg Rn) {
2991     ASSERT(esize != D);
2992     Emit2RegMisc(IsQuad(Rd), 1, static_cast<u32>(esize), 0b100000011, Rd, Rn);
2993 }
SHADD(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2994 void ARM64FloatEmitter::SHADD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2995     ASSERT(!(IsDouble(Rd) && esize == D));
2996     EmitThreeSame(0, static_cast<u32>(esize), 0b0, Rd, Rn, Rm);
2997 }
UHADD(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)2998 void ARM64FloatEmitter::UHADD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
2999     ASSERT(!(IsDouble(Rd) && esize == D));
3000     EmitThreeSame(1, static_cast<u32>(esize), 0b0, Rd, Rn, Rm);
3001 }
SHSUB(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3002 void ARM64FloatEmitter::SHSUB(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3003     ASSERT(!(IsDouble(Rd) && esize == D));
3004     EmitThreeSame(0, static_cast<u32>(esize), 0b00100, Rd, Rn, Rm);
3005 }
UHSUB(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3006 void ARM64FloatEmitter::UHSUB(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3007     ASSERT(!(IsDouble(Rd) && esize == D));
3008     EmitThreeSame(1, static_cast<u32>(esize), 0b00100, Rd, Rn, Rm);
3009 }
SMIN(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3010 void ARM64FloatEmitter::SMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3011     ASSERT(!(IsDouble(Rd) && esize == D));
3012     EmitThreeSame(0, static_cast<u32>(esize), 0b01101, Rd, Rn, Rm);
3013 }
UMIN(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3014 void ARM64FloatEmitter::UMIN(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3015     ASSERT(!(IsDouble(Rd) && esize == D));
3016     EmitThreeSame(1, static_cast<u32>(esize), 0b01101, Rd, Rn, Rm);
3017 }
SQADD(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3018 void ARM64FloatEmitter::SQADD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3019     ASSERT(!(IsDouble(Rd) && esize == D));
3020     EmitThreeSame(0, static_cast<u32>(esize), 0b00001, Rd, Rn, Rm);
3021 }
SQSUB(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3022 void ARM64FloatEmitter::SQSUB(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3023     ASSERT(!(IsDouble(Rd) && esize == D));
3024     EmitThreeSame(0, static_cast<u32>(esize), 0b00101, Rd, Rn, Rm);
3025 }
UQADD(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3026 void ARM64FloatEmitter::UQADD(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3027     ASSERT(!(IsDouble(Rd) && esize == D));
3028     EmitThreeSame(1, static_cast<u32>(esize), 0b00001, Rd, Rn, Rm);
3029 }
UQSUB(ESize esize,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3030 void ARM64FloatEmitter::UQSUB(ESize esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3031     ASSERT(!(IsDouble(Rd) && esize == D));
3032     EmitThreeSame(1, static_cast<u32>(esize), 0b00101, Rd, Rn, Rm);
3033 }
SCVTF(u8 size,ARM64Reg Rd,ARM64Reg Rn)3034 void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3035     Emit2RegMisc(IsQuad(Rd), 0, size >> 6, 0x1D, Rd, Rn);
3036 }
UCVTF(u8 size,ARM64Reg Rd,ARM64Reg Rn)3037 void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3038     Emit2RegMisc(IsQuad(Rd), 1, size >> 6, 0x1D, Rd, Rn);
3039 }
SCVTF(u8 size,ARM64Reg Rd,ARM64Reg Rn,int scale)3040 void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {
3041     int imm = size * 2 - scale;
3042     EmitShiftImm(IsQuad(Rd), 0, imm >> 3, imm & 7, 0x1C, Rd, Rn);
3043 }
UCVTF(u8 size,ARM64Reg Rd,ARM64Reg Rn,int scale)3044 void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {
3045     int imm = size * 2 - scale;
3046     EmitShiftImm(IsQuad(Rd), 1, imm >> 3, imm & 7, 0x1C, Rd, Rn);
3047 }
SQXTN(u8 dest_size,ARM64Reg Rd,ARM64Reg Rn)3048 void ARM64FloatEmitter::SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) {
3049     Emit2RegMisc(false, 0, dest_size >> 4, 0b10100, Rd, Rn);
3050 }
SQXTN2(u8 dest_size,ARM64Reg Rd,ARM64Reg Rn)3051 void ARM64FloatEmitter::SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) {
3052     Emit2RegMisc(true, 0, dest_size >> 4, 0b10100, Rd, Rn);
3053 }
UQXTN(u8 dest_size,ARM64Reg Rd,ARM64Reg Rn)3054 void ARM64FloatEmitter::UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) {
3055     Emit2RegMisc(false, 1, dest_size >> 4, 0b10100, Rd, Rn);
3056 }
UQXTN2(u8 dest_size,ARM64Reg Rd,ARM64Reg Rn)3057 void ARM64FloatEmitter::UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) {
3058     Emit2RegMisc(true, 1, dest_size >> 4, 0b10100, Rd, Rn);
3059 }
XTN(u8 dest_size,ARM64Reg Rd,ARM64Reg Rn)3060 void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) {
3061     Emit2RegMisc(false, 0, dest_size >> 4, 0b10010, Rd, Rn);
3062 }
XTN2(u8 dest_size,ARM64Reg Rd,ARM64Reg Rn)3063 void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) {
3064     Emit2RegMisc(true, 0, dest_size >> 4, 0b10010, Rd, Rn);
3065 }
3066 
3067 // Move
DUP(u8 size,ARM64Reg Rd,ARM64Reg Rn)3068 void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3069     u32 imm5 = 0;
3070 
3071     if (size == 8)
3072         imm5 = 1;
3073     else if (size == 16)
3074         imm5 = 2;
3075     else if (size == 32)
3076         imm5 = 4;
3077     else if (size == 64)
3078         imm5 = 8;
3079 
3080     EmitCopy(IsQuad(Rd), 0, imm5, 1, Rd, Rn);
3081 }
INS(u8 size,ARM64Reg Rd,u8 index,ARM64Reg Rn)3082 void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn) {
3083     u32 imm5 = 0;
3084 
3085     if (size == 8) {
3086         imm5 = 1;
3087         imm5 |= index << 1;
3088     } else if (size == 16) {
3089         imm5 = 2;
3090         imm5 |= index << 2;
3091     } else if (size == 32) {
3092         imm5 = 4;
3093         imm5 |= index << 3;
3094     } else if (size == 64) {
3095         imm5 = 8;
3096         imm5 |= index << 4;
3097     }
3098 
3099     EmitCopy(1, 0, imm5, 3, Rd, Rn);
3100 }
INS(u8 size,ARM64Reg Rd,u8 index1,ARM64Reg Rn,u8 index2)3101 void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2) {
3102     u32 imm5 = 0, imm4 = 0;
3103 
3104     if (size == 8) {
3105         imm5 = 1;
3106         imm5 |= index1 << 1;
3107         imm4 = index2;
3108     } else if (size == 16) {
3109         imm5 = 2;
3110         imm5 |= index1 << 2;
3111         imm4 = index2 << 1;
3112     } else if (size == 32) {
3113         imm5 = 4;
3114         imm5 |= index1 << 3;
3115         imm4 = index2 << 2;
3116     } else if (size == 64) {
3117         imm5 = 8;
3118         imm5 |= index1 << 4;
3119         imm4 = index2 << 3;
3120     }
3121 
3122     EmitCopy(1, 1, imm5, imm4, Rd, Rn);
3123 }
3124 
UMOV(u8 size,ARM64Reg Rd,ARM64Reg Rn,u8 index)3125 void ARM64FloatEmitter::UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index) {
3126     bool b64Bit = Is64Bit(Rd);
3127     ASSERT_MSG(Rd < SP, "%s destination must be a GPR!", __func__);
3128     ASSERT_MSG(!(b64Bit && size != 64), "%s must have a size of 64 when destination is 64bit!",
3129                __func__);
3130     u32 imm5 = 0;
3131 
3132     if (size == 8) {
3133         imm5 = 1;
3134         imm5 |= index << 1;
3135     } else if (size == 16) {
3136         imm5 = 2;
3137         imm5 |= index << 2;
3138     } else if (size == 32) {
3139         imm5 = 4;
3140         imm5 |= index << 3;
3141     } else if (size == 64) {
3142         imm5 = 8;
3143         imm5 |= index << 4;
3144     }
3145 
3146     EmitCopy(b64Bit, 0, imm5, 7, Rd, Rn);
3147 }
SMOV(u8 size,ARM64Reg Rd,ARM64Reg Rn,u8 index)3148 void ARM64FloatEmitter::SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index) {
3149     bool b64Bit = Is64Bit(Rd);
3150     ASSERT_MSG(Rd < SP, "%s destination must be a GPR!", __func__);
3151     ASSERT_MSG(size != 64, "%s doesn't support 64bit destination. Use UMOV!", __func__);
3152     u32 imm5 = 0;
3153 
3154     if (size == 8) {
3155         imm5 = 1;
3156         imm5 |= index << 1;
3157     } else if (size == 16) {
3158         imm5 = 2;
3159         imm5 |= index << 2;
3160     } else if (size == 32) {
3161         imm5 = 4;
3162         imm5 |= index << 3;
3163     }
3164 
3165     EmitCopy(b64Bit, 0, imm5, 5, Rd, Rn);
3166 }
3167 
3168 // One source
FCVT(u8 size_to,u8 size_from,ARM64Reg Rd,ARM64Reg Rn)3169 void ARM64FloatEmitter::FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn) {
3170     u32 dst_encoding = 0;
3171     u32 src_encoding = 0;
3172 
3173     if (size_to == 16)
3174         dst_encoding = 3;
3175     else if (size_to == 32)
3176         dst_encoding = 0;
3177     else if (size_to == 64)
3178         dst_encoding = 1;
3179 
3180     if (size_from == 16)
3181         src_encoding = 3;
3182     else if (size_from == 32)
3183         src_encoding = 0;
3184     else if (size_from == 64)
3185         src_encoding = 1;
3186 
3187     Emit1Source(0, 0, src_encoding, 4 | dst_encoding, Rd, Rn);
3188 }
3189 
SCVTF(ARM64Reg Rd,ARM64Reg Rn)3190 void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn) {
3191     if (IsScalar(Rn)) {
3192         // Source is in FP register (like destination!). We must use a vector
3193         // encoding.
3194         bool sign = false;
3195         Rd = DecodeReg(Rd);
3196         Rn = DecodeReg(Rn);
3197         int sz = IsDouble(Rn);
3198         Write32((0x5e << 24) | (sign << 29) | (sz << 22) | (0x876 << 10) | (Rn << 5) | Rd);
3199     } else {
3200         bool sf = Is64Bit(Rn);
3201         u32 type = 0;
3202         if (IsDouble(Rd))
3203             type = 1;
3204         EmitConversion(sf, 0, type, 0, 2, Rd, Rn);
3205     }
3206 }
3207 
UCVTF(ARM64Reg Rd,ARM64Reg Rn)3208 void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn) {
3209     if (IsScalar(Rn)) {
3210         // Source is in FP register (like destination!). We must use a vector
3211         // encoding.
3212         bool sign = true;
3213         Rd = DecodeReg(Rd);
3214         Rn = DecodeReg(Rn);
3215         int sz = IsDouble(Rn);
3216         Write32((0x5e << 24) | (sign << 29) | (sz << 22) | (0x876 << 10) | (Rn << 5) | Rd);
3217     } else {
3218         bool sf = Is64Bit(Rn);
3219         u32 type = 0;
3220         if (IsDouble(Rd))
3221             type = 1;
3222 
3223         EmitConversion(sf, 0, type, 0, 3, Rd, Rn);
3224     }
3225 }
3226 
SCVTF(ARM64Reg Rd,ARM64Reg Rn,int scale)3227 void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale) {
3228     bool sf = Is64Bit(Rn);
3229     u32 type = 0;
3230     if (IsDouble(Rd))
3231         type = 1;
3232 
3233     EmitConversion2(sf, 0, false, type, 0, 2, 64 - scale, Rd, Rn);
3234 }
3235 
UCVTF(ARM64Reg Rd,ARM64Reg Rn,int scale)3236 void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale) {
3237     bool sf = Is64Bit(Rn);
3238     u32 type = 0;
3239     if (IsDouble(Rd))
3240         type = 1;
3241 
3242     EmitConversion2(sf, 0, false, type, 0, 3, 64 - scale, Rd, Rn);
3243 }
3244 
FCMP(ARM64Reg Rn,ARM64Reg Rm)3245 void ARM64FloatEmitter::FCMP(ARM64Reg Rn, ARM64Reg Rm) {
3246     EmitCompare(0, 0, 0, 0, Rn, Rm);
3247 }
FCMP(ARM64Reg Rn)3248 void ARM64FloatEmitter::FCMP(ARM64Reg Rn) {
3249     EmitCompare(0, 0, 0, 8, Rn, static_cast<ARM64Reg>(0));
3250 }
FCMPE(ARM64Reg Rn,ARM64Reg Rm)3251 void ARM64FloatEmitter::FCMPE(ARM64Reg Rn, ARM64Reg Rm) {
3252     EmitCompare(0, 0, 0, 0x10, Rn, Rm);
3253 }
FCMPE(ARM64Reg Rn)3254 void ARM64FloatEmitter::FCMPE(ARM64Reg Rn) {
3255     EmitCompare(0, 0, 0, 0x18, Rn, static_cast<ARM64Reg>(0));
3256 }
FCMEQ(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3257 void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3258     EmitThreeSame(0, size >> 6, 0x1C, Rd, Rn, Rm);
3259 }
FCMEQ(u8 size,ARM64Reg Rd,ARM64Reg Rn)3260 void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3261     Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xD, Rd, Rn);
3262 }
FCMGE(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3263 void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3264     EmitThreeSame(1, size >> 6, 0x1C, Rd, Rn, Rm);
3265 }
FCMGE(u8 size,ARM64Reg Rd,ARM64Reg Rn)3266 void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3267     Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x0C, Rd, Rn);
3268 }
FCMGT(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3269 void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3270     EmitThreeSame(1, 2 | (size >> 6), 0x1C, Rd, Rn, Rm);
3271 }
FCMGT(u8 size,ARM64Reg Rd,ARM64Reg Rn)3272 void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3273     Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x0C, Rd, Rn);
3274 }
FCMLE(u8 size,ARM64Reg Rd,ARM64Reg Rn)3275 void ARM64FloatEmitter::FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3276     Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xD, Rd, Rn);
3277 }
FCMLT(u8 size,ARM64Reg Rd,ARM64Reg Rn)3278 void ARM64FloatEmitter::FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3279     Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xE, Rd, Rn);
3280 }
3281 
FCSEL(ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,CCFlags cond)3282 void ARM64FloatEmitter::FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) {
3283     EmitCondSelect(0, 0, cond, Rd, Rn, Rm);
3284 }
3285 
3286 // Permute
UZP1(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3287 void ARM64FloatEmitter::UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3288     EmitPermute(size, 0b001, Rd, Rn, Rm);
3289 }
TRN1(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3290 void ARM64FloatEmitter::TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3291     EmitPermute(size, 0b010, Rd, Rn, Rm);
3292 }
ZIP1(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3293 void ARM64FloatEmitter::ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3294     EmitPermute(size, 0b011, Rd, Rn, Rm);
3295 }
UZP2(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3296 void ARM64FloatEmitter::UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3297     EmitPermute(size, 0b101, Rd, Rn, Rm);
3298 }
TRN2(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3299 void ARM64FloatEmitter::TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3300     EmitPermute(size, 0b110, Rd, Rn, Rm);
3301 }
ZIP2(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)3302 void ARM64FloatEmitter::ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3303     EmitPermute(size, 0b111, Rd, Rn, Rm);
3304 }
3305 
3306 // Shift by immediate
SSHLL(u8 src_size,ARM64Reg Rd,ARM64Reg Rn,u32 shift)3307 void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3308     SSHLL(src_size, Rd, Rn, shift, false);
3309 }
SSHLL2(u8 src_size,ARM64Reg Rd,ARM64Reg Rn,u32 shift)3310 void ARM64FloatEmitter::SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3311     SSHLL(src_size, Rd, Rn, shift, true);
3312 }
SHRN(u8 dest_size,ARM64Reg Rd,ARM64Reg Rn,u32 shift)3313 void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3314     SHRN(dest_size, Rd, Rn, shift, false);
3315 }
SHRN2(u8 dest_size,ARM64Reg Rd,ARM64Reg Rn,u32 shift)3316 void ARM64FloatEmitter::SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3317     SHRN(dest_size, Rd, Rn, shift, true);
3318 }
USHLL(u8 src_size,ARM64Reg Rd,ARM64Reg Rn,u32 shift)3319 void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3320     USHLL(src_size, Rd, Rn, shift, false);
3321 }
USHLL2(u8 src_size,ARM64Reg Rd,ARM64Reg Rn,u32 shift)3322 void ARM64FloatEmitter::USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3323     USHLL(src_size, Rd, Rn, shift, true);
3324 }
SXTL(u8 src_size,ARM64Reg Rd,ARM64Reg Rn)3325 void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
3326     SXTL(src_size, Rd, Rn, false);
3327 }
SXTL2(u8 src_size,ARM64Reg Rd,ARM64Reg Rn)3328 void ARM64FloatEmitter::SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
3329     SXTL(src_size, Rd, Rn, true);
3330 }
UXTL(u8 src_size,ARM64Reg Rd,ARM64Reg Rn)3331 void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
3332     UXTL(src_size, Rd, Rn, false);
3333 }
UXTL2(u8 src_size,ARM64Reg Rd,ARM64Reg Rn)3334 void ARM64FloatEmitter::UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
3335     UXTL(src_size, Rd, Rn, true);
3336 }
3337 
SSHLL(u8 src_size,ARM64Reg Rd,ARM64Reg Rn,u32 shift,bool upper)3338 void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper) {
3339     ASSERT_MSG(shift < src_size, "%s shift amount must less than the element size!", __func__);
3340     u32 immh = 0;
3341     u32 immb = shift & 0xFFF;
3342 
3343     if (src_size == 8) {
3344         immh = 1;
3345     } else if (src_size == 16) {
3346         immh = 2 | ((shift >> 3) & 1);
3347     } else if (src_size == 32) {
3348         immh = 4 | ((shift >> 3) & 3);
3349         ;
3350     }
3351     EmitShiftImm(upper, 0, immh, immb, 0b10100, Rd, Rn);
3352 }
3353 
USHLL(u8 src_size,ARM64Reg Rd,ARM64Reg Rn,u32 shift,bool upper)3354 void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper) {
3355     ASSERT_MSG(shift < src_size, "%s shift amount must less than the element size!", __func__);
3356     u32 immh = 0;
3357     u32 immb = shift & 0xFFF;
3358 
3359     if (src_size == 8) {
3360         immh = 1;
3361     } else if (src_size == 16) {
3362         immh = 2 | ((shift >> 3) & 1);
3363     } else if (src_size == 32) {
3364         immh = 4 | ((shift >> 3) & 3);
3365         ;
3366     }
3367     EmitShiftImm(upper, 1, immh, immb, 0b10100, Rd, Rn);
3368 }
3369 
SHRN(u8 dest_size,ARM64Reg Rd,ARM64Reg Rn,u32 shift,bool upper)3370 void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper) {
3371     ASSERT_MSG(shift < dest_size, "%s shift amount must less than the element size!", __func__);
3372     u32 immh = 0;
3373     u32 immb = shift & 0xFFF;
3374 
3375     if (dest_size == 8) {
3376         immh = 1;
3377     } else if (dest_size == 16) {
3378         immh = 2 | ((shift >> 3) & 1);
3379     } else if (dest_size == 32) {
3380         immh = 4 | ((shift >> 3) & 3);
3381         ;
3382     }
3383     EmitShiftImm(upper, 1, immh, immb, 0b10000, Rd, Rn);
3384 }
3385 
SXTL(u8 src_size,ARM64Reg Rd,ARM64Reg Rn,bool upper)3386 void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper) {
3387     SSHLL(src_size, Rd, Rn, 0, upper);
3388 }
3389 
UXTL(u8 src_size,ARM64Reg Rd,ARM64Reg Rn,bool upper)3390 void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper) {
3391     USHLL(src_size, Rd, Rn, 0, upper);
3392 }
3393 
3394 // vector x indexed element
FMUL(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,u8 index)3395 void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index) {
3396     ASSERT_MSG(size == 32 || size == 64, "%s only supports 32bit or 64bit size!", __func__);
3397 
3398     bool L = false;
3399     bool H = false;
3400     if (size == 32) {
3401         L = index & 1;
3402         H = (index >> 1) & 1;
3403     } else if (size == 64) {
3404         H = index == 1;
3405     }
3406 
3407     EmitVectorxElement(0, 2 | (size >> 6), L, 0x9, H, Rd, Rn, Rm);
3408 }
3409 
FMLA(u8 size,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,u8 index)3410 void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index) {
3411     ASSERT_MSG(size == 32 || size == 64, "%s only supports 32bit or 64bit size!", __func__);
3412 
3413     bool L = false;
3414     bool H = false;
3415     if (size == 32) {
3416         L = index & 1;
3417         H = (index >> 1) & 1;
3418     } else if (size == 64) {
3419         H = index == 1;
3420     }
3421 
3422     EmitVectorxElement(0, 2 | (size >> 6), L, 1, H, Rd, Rn, Rm);
3423 }
3424 
3425 // Modified Immediate
MOVI(u8 size,ARM64Reg Rd,u64 imm,u8 shift)3426 void ARM64FloatEmitter::MOVI(u8 size, ARM64Reg Rd, u64 imm, u8 shift) {
3427     bool Q = IsQuad(Rd);
3428     u8 cmode = 0;
3429     u8 op = 0;
3430     u8 abcdefgh = imm & 0xFF;
3431     if (size == 8) {
3432         ASSERT_MSG(shift == 0, "%s(size8) doesn't support shift!", __func__);
3433         ASSERT_MSG(!(imm & ~0xFFULL), "%s(size8) only supports 8bit values!", __func__);
3434     } else if (size == 16) {
3435         ASSERT_MSG(shift == 0 || shift == 8, "%s(size16) only supports shift of {0, 8}!", __func__);
3436         ASSERT_MSG(!(imm & ~0xFFULL), "%s(size16) only supports 8bit values!", __func__);
3437 
3438         if (shift == 8)
3439             cmode |= 2;
3440     } else if (size == 32) {
3441         ASSERT_MSG(shift == 0 || shift == 8 || shift == 16 || shift == 24,
3442                    "%s(size32) only supports shift of {0, 8, 16, 24}!", __func__);
3443         // XXX: Implement support for MOVI - shifting ones variant
3444         ASSERT_MSG(!(imm & ~0xFFULL), "%s(size32) only supports 8bit values!", __func__);
3445         switch (shift) {
3446         case 8:
3447             cmode |= 2;
3448             break;
3449         case 16:
3450             cmode |= 4;
3451             break;
3452         case 24:
3453             cmode |= 6;
3454             break;
3455         default:
3456             break;
3457         }
3458     } else // 64
3459     {
3460         ASSERT_MSG(shift == 0, "%s(size64) doesn't support shift!", __func__);
3461 
3462         op = 1;
3463         cmode = 0xE;
3464         abcdefgh = 0;
3465         for (int i = 0; i < 8; ++i) {
3466             u8 tmp = (imm >> (i << 3)) & 0xFF;
3467             ASSERT_MSG(tmp == 0xFF || tmp == 0, "%s(size64) Invalid immediate!", __func__);
3468             if (tmp == 0xFF)
3469                 abcdefgh |= (1 << i);
3470         }
3471     }
3472     EncodeModImm(Q, op, cmode, 0, Rd, abcdefgh);
3473 }
3474 
BIC(u8 size,ARM64Reg Rd,u8 imm,u8 shift)3475 void ARM64FloatEmitter::BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift) {
3476     bool Q = IsQuad(Rd);
3477     u8 cmode = 1;
3478     u8 op = 1;
3479     if (size == 16) {
3480         ASSERT_MSG(shift == 0 || shift == 8, "%s(size16) only supports shift of {0, 8}!", __func__);
3481 
3482         if (shift == 8)
3483             cmode |= 2;
3484     } else if (size == 32) {
3485         ASSERT_MSG(shift == 0 || shift == 8 || shift == 16 || shift == 24,
3486                    "%s(size32) only supports shift of {0, 8, 16, 24}!", __func__);
3487         // XXX: Implement support for MOVI - shifting ones variant
3488         switch (shift) {
3489         case 8:
3490             cmode |= 2;
3491             break;
3492         case 16:
3493             cmode |= 4;
3494             break;
3495         case 24:
3496             cmode |= 6;
3497             break;
3498         default:
3499             break;
3500         }
3501     } else {
3502         ASSERT_MSG(false, "%s only supports size of {16, 32}!", __func__);
3503     }
3504     EncodeModImm(Q, op, cmode, 0, Rd, imm);
3505 }
3506 
ABI_PushRegisters(u32 registers,ARM64Reg tmp)3507 void ARM64FloatEmitter::ABI_PushRegisters(u32 registers, ARM64Reg tmp) {
3508     bool bundled_loadstore = false;
3509 
3510     for (int i = 0; i < 32; ++i) {
3511         if (!Common::Bit(i, registers))
3512             continue;
3513 
3514         int count = 0;
3515         while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) {
3516         }
3517         if (count > 1) {
3518             bundled_loadstore = true;
3519             break;
3520         }
3521     }
3522 
3523     if (bundled_loadstore && tmp != INVALID_REG) {
3524         int num_regs = Common::BitCount(registers);
3525         m_emit->SUB(SP, SP, num_regs * 16);
3526         m_emit->ADD(tmp, SP, 0);
3527         std::vector<ARM64Reg> island_regs;
3528         for (int i = 0; i < 32; ++i) {
3529             if (!Common::Bit(i, registers))
3530                 continue;
3531 
3532             int count = 0;
3533 
3534             // 0 = true
3535             // 1 < 4 && registers[i + 1] true!
3536             // 2 < 4 && registers[i + 2] true!
3537             // 3 < 4 && registers[i + 3] true!
3538             // 4 < 4 && registers[i + 4] false!
3539             while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) {
3540             }
3541 
3542             if (count == 1)
3543                 island_regs.push_back((ARM64Reg)(Q0 + i));
3544             else
3545                 ST1(64, count, INDEX_POST, (ARM64Reg)(Q0 + i), tmp);
3546 
3547             i += count - 1;
3548         }
3549 
3550         // Handle island registers
3551         std::vector<ARM64Reg> pair_regs;
3552         for (auto& it : island_regs) {
3553             pair_regs.push_back(it);
3554             if (pair_regs.size() == 2) {
3555                 STP(128, INDEX_POST, pair_regs[0], pair_regs[1], tmp, 32);
3556                 pair_regs.clear();
3557             }
3558         }
3559         if (pair_regs.size())
3560             STR(128, INDEX_POST, pair_regs[0], tmp, 16);
3561     } else {
3562         std::vector<ARM64Reg> pair_regs;
3563         for (int i = 0; i < 32; ++i) {
3564             if (Common::Bit(i, registers)) {
3565                 pair_regs.push_back((ARM64Reg)(Q0 + i));
3566                 if (pair_regs.size() == 2) {
3567                     STP(128, INDEX_PRE, pair_regs[0], pair_regs[1], SP, -32);
3568                     pair_regs.clear();
3569                 }
3570             }
3571         }
3572         if (pair_regs.size())
3573             STR(128, INDEX_PRE, pair_regs[0], SP, -16);
3574     }
3575 }
ABI_PopRegisters(u32 registers,ARM64Reg tmp)3576 void ARM64FloatEmitter::ABI_PopRegisters(u32 registers, ARM64Reg tmp) {
3577     bool bundled_loadstore = false;
3578     int num_regs = Common::BitCount(registers);
3579 
3580     for (int i = 0; i < 32; ++i) {
3581         if (!Common::Bit(i, registers))
3582             continue;
3583 
3584         int count = 0;
3585         while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) {
3586         }
3587         if (count > 1) {
3588             bundled_loadstore = true;
3589             break;
3590         }
3591     }
3592 
3593     if (bundled_loadstore && tmp != INVALID_REG) {
3594         // The temporary register is only used to indicate that we can use this code path
3595         std::vector<ARM64Reg> island_regs;
3596         for (int i = 0; i < 32; ++i) {
3597             if (!Common::Bit(i, registers))
3598                 continue;
3599 
3600             u8 count = 0;
3601             while (++count < 4 && (i + count) < 32 && Common::Bit(i + count, registers)) {
3602             }
3603 
3604             if (count == 1)
3605                 island_regs.push_back(static_cast<ARM64Reg>(Q0 + i));
3606             else
3607                 LD1(64, count, INDEX_POST, static_cast<ARM64Reg>(Q0 + i), SP);
3608 
3609             i += count - 1;
3610         }
3611 
3612         // Handle island registers
3613         std::vector<ARM64Reg> pair_regs;
3614         for (auto& it : island_regs) {
3615             pair_regs.push_back(it);
3616             if (pair_regs.size() == 2) {
3617                 LDP(128, INDEX_POST, pair_regs[0], pair_regs[1], SP, 32);
3618                 pair_regs.clear();
3619             }
3620         }
3621         if (pair_regs.size())
3622             LDR(128, INDEX_POST, pair_regs[0], SP, 16);
3623     } else {
3624         bool odd = num_regs % 2;
3625         std::vector<ARM64Reg> pair_regs;
3626         for (int i = 31; i >= 0; --i) {
3627             if (!Common::Bit(i, registers))
3628                 continue;
3629 
3630             if (odd) {
3631                 // First load must be a regular LDR if odd
3632                 odd = false;
3633                 LDR(128, INDEX_POST, static_cast<ARM64Reg>(Q0 + i), SP, 16);
3634             } else {
3635                 pair_regs.push_back(static_cast<ARM64Reg>(Q0 + i));
3636                 if (pair_regs.size() == 2) {
3637                     LDP(128, INDEX_POST, pair_regs[1], pair_regs[0], SP, 32);
3638                     pair_regs.clear();
3639                 }
3640             }
3641         }
3642     }
3643 }
3644 
ANDI2R(ARM64Reg Rd,ARM64Reg Rn,u64 imm,ARM64Reg scratch)3645 void ARM64XEmitter::ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3646     unsigned int n, imm_s, imm_r;
3647     if (!Is64Bit(Rn))
3648         imm &= 0xFFFFFFFF;
3649     if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
3650         AND(Rd, Rn, imm_r, imm_s, n != 0);
3651     } else {
3652         ASSERT_MSG(scratch != INVALID_REG,
3653                    "ANDI2R - failed to construct logical immediate value from "
3654                    "%08x, need scratch",
3655                    static_cast<u32>(imm));
3656         MOVI2R(scratch, imm);
3657         AND(Rd, Rn, scratch);
3658     }
3659 }
3660 
ORRI2R(ARM64Reg Rd,ARM64Reg Rn,u64 imm,ARM64Reg scratch)3661 void ARM64XEmitter::ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3662     unsigned int n, imm_s, imm_r;
3663     if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
3664         ORR(Rd, Rn, imm_r, imm_s, n != 0);
3665     } else {
3666         ASSERT_MSG(scratch != INVALID_REG,
3667                    "ORRI2R - failed to construct logical immediate value from "
3668                    "%08x, need scratch",
3669                    static_cast<u32>(imm));
3670         MOVI2R(scratch, imm);
3671         ORR(Rd, Rn, scratch);
3672     }
3673 }
3674 
EORI2R(ARM64Reg Rd,ARM64Reg Rn,u64 imm,ARM64Reg scratch)3675 void ARM64XEmitter::EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3676     unsigned int n, imm_s, imm_r;
3677     if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
3678         EOR(Rd, Rn, imm_r, imm_s, n != 0);
3679     } else {
3680         ASSERT_MSG(scratch != INVALID_REG,
3681                    "EORI2R - failed to construct logical immediate value from "
3682                    "%08x, need scratch",
3683                    static_cast<u32>(imm));
3684         MOVI2R(scratch, imm);
3685         EOR(Rd, Rn, scratch);
3686     }
3687 }
3688 
ANDSI2R(ARM64Reg Rd,ARM64Reg Rn,u64 imm,ARM64Reg scratch)3689 void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3690     unsigned int n, imm_s, imm_r;
3691     if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
3692         ANDS(Rd, Rn, imm_r, imm_s, n != 0);
3693     } else {
3694         ASSERT_MSG(scratch != INVALID_REG,
3695                    "ANDSI2R - failed to construct logical immediate value from "
3696                    "%08x, need scratch",
3697                    static_cast<u32>(imm));
3698         MOVI2R(scratch, imm);
3699         ANDS(Rd, Rn, scratch);
3700     }
3701 }
3702 
AddImmediate(ARM64Reg Rd,ARM64Reg Rn,u64 imm,bool shift,bool negative,bool flags)3703 void ARM64XEmitter::AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, bool negative,
3704                                  bool flags) {
3705     switch ((negative << 1) | static_cast<unsigned int>(flags)) {
3706     case 0:
3707         ADD(Rd, Rn, static_cast<u32>(imm), shift);
3708         break;
3709     case 1:
3710         ADDS(Rd, Rn, static_cast<u32>(imm), shift);
3711         break;
3712     case 2:
3713         SUB(Rd, Rn, static_cast<u32>(imm), shift);
3714         break;
3715     case 3:
3716         SUBS(Rd, Rn, static_cast<u32>(imm), shift);
3717         break;
3718     }
3719 }
3720 
ADDI2R_internal(ARM64Reg Rd,ARM64Reg Rn,u64 imm,bool negative,bool flags,ARM64Reg scratch)3721 void ARM64XEmitter::ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags,
3722                                     ARM64Reg scratch) {
3723     bool has_scratch = scratch != INVALID_REG;
3724     u64 imm_neg = Is64Bit(Rd) ? ~imm + 1 : (~imm + 1) & 0xFFFFFFFFuLL;
3725     bool neg_neg = negative ? false : true;
3726 
3727     // Fast paths, aarch64 immediate instructions
3728     // Try them all first
3729     if (imm <= 0xFFF) {
3730         AddImmediate(Rd, Rn, imm, false, negative, flags);
3731         return;
3732     }
3733     if (imm <= 0xFFFFFF && (imm & 0xFFF) == 0) {
3734         AddImmediate(Rd, Rn, imm >> 12, true, negative, flags);
3735         return;
3736     }
3737     if (imm_neg <= 0xFFF) {
3738         AddImmediate(Rd, Rn, imm_neg, false, neg_neg, flags);
3739         return;
3740     }
3741     if (imm_neg <= 0xFFFFFF && (imm_neg & 0xFFF) == 0) {
3742         AddImmediate(Rd, Rn, imm_neg >> 12, true, neg_neg, flags);
3743         return;
3744     }
3745 
3746     // ADD+ADD is slower than MOVK+ADD, but inplace.
3747     // But it supports a few more bits, so use it to avoid MOVK+MOVK+ADD.
3748     // As this splits the addition in two parts, this must not be done on setting
3749     // flags.
3750     if (!flags && (imm >= 0x10000u || !has_scratch) && imm < 0x1000000u) {
3751         AddImmediate(Rd, Rn, imm & 0xFFF, false, negative, false);
3752         AddImmediate(Rd, Rd, imm >> 12, true, negative, false);
3753         return;
3754     }
3755     if (!flags && (imm_neg >= 0x10000u || !has_scratch) && imm_neg < 0x1000000u) {
3756         AddImmediate(Rd, Rn, imm_neg & 0xFFF, false, neg_neg, false);
3757         AddImmediate(Rd, Rd, imm_neg >> 12, true, neg_neg, false);
3758         return;
3759     }
3760 
3761     ASSERT_MSG(has_scratch,
3762                "ADDI2R - failed to construct arithmetic immediate value from "
3763                "%08x, need scratch",
3764                static_cast<u32>(imm));
3765 
3766     negative ^= MOVI2R2(scratch, imm, imm_neg);
3767     switch ((negative << 1) | static_cast<unsigned int>(flags)) {
3768     case 0:
3769         ADD(Rd, Rn, scratch);
3770         break;
3771     case 1:
3772         ADDS(Rd, Rn, scratch);
3773         break;
3774     case 2:
3775         SUB(Rd, Rn, scratch);
3776         break;
3777     case 3:
3778         SUBS(Rd, Rn, scratch);
3779         break;
3780     }
3781 }
3782 
ADDI2R(ARM64Reg Rd,ARM64Reg Rn,u64 imm,ARM64Reg scratch)3783 void ARM64XEmitter::ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3784     ADDI2R_internal(Rd, Rn, imm, false, false, scratch);
3785 }
3786 
ADDSI2R(ARM64Reg Rd,ARM64Reg Rn,u64 imm,ARM64Reg scratch)3787 void ARM64XEmitter::ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3788     ADDI2R_internal(Rd, Rn, imm, false, true, scratch);
3789 }
3790 
SUBI2R(ARM64Reg Rd,ARM64Reg Rn,u64 imm,ARM64Reg scratch)3791 void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3792     ADDI2R_internal(Rd, Rn, imm, true, false, scratch);
3793 }
3794 
SUBSI2R(ARM64Reg Rd,ARM64Reg Rn,u64 imm,ARM64Reg scratch)3795 void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3796     ADDI2R_internal(Rd, Rn, imm, true, true, scratch);
3797 }
3798 
CMPI2R(ARM64Reg Rn,u64 imm,ARM64Reg scratch)3799 void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3800     ADDI2R_internal(Is64Bit(Rn) ? ZR : WZR, Rn, imm, true, true, scratch);
3801 }
3802 
TryADDI2R(ARM64Reg Rd,ARM64Reg Rn,u32 imm)3803 bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
3804     u32 val;
3805     bool shift;
3806     if (IsImmArithmetic(imm, &val, &shift))
3807         ADD(Rd, Rn, val, shift);
3808     else
3809         return false;
3810 
3811     return true;
3812 }
3813 
TrySUBI2R(ARM64Reg Rd,ARM64Reg Rn,u32 imm)3814 bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
3815     u32 val;
3816     bool shift;
3817     if (IsImmArithmetic(imm, &val, &shift))
3818         SUB(Rd, Rn, val, shift);
3819     else
3820         return false;
3821 
3822     return true;
3823 }
3824 
TryCMPI2R(ARM64Reg Rn,u32 imm)3825 bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u32 imm) {
3826     u32 val;
3827     bool shift;
3828     if (IsImmArithmetic(imm, &val, &shift))
3829         CMP(Rn, val, shift);
3830     else
3831         return false;
3832 
3833     return true;
3834 }
3835 
TryANDI2R(ARM64Reg Rd,ARM64Reg Rn,u32 imm)3836 bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
3837     u32 n, imm_r, imm_s;
3838     if (IsImmLogical(imm, 32, &n, &imm_s, &imm_r))
3839         AND(Rd, Rn, imm_r, imm_s, n != 0);
3840     else
3841         return false;
3842 
3843     return true;
3844 }
TryORRI2R(ARM64Reg Rd,ARM64Reg Rn,u32 imm)3845 bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
3846     u32 n, imm_r, imm_s;
3847     if (IsImmLogical(imm, 32, &n, &imm_s, &imm_r))
3848         ORR(Rd, Rn, imm_r, imm_s, n != 0);
3849     else
3850         return false;
3851 
3852     return true;
3853 }
TryEORI2R(ARM64Reg Rd,ARM64Reg Rn,u32 imm)3854 bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u32 imm) {
3855     u32 n, imm_r, imm_s;
3856     if (IsImmLogical(imm, 32, &n, &imm_s, &imm_r))
3857         EOR(Rd, Rn, imm_r, imm_s, n != 0);
3858     else
3859         return false;
3860 
3861     return true;
3862 }
3863 
MOVI2F(ARM64Reg Rd,float value,ARM64Reg scratch,bool negate)3864 void ARM64FloatEmitter::MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch, bool negate) {
3865     ASSERT_MSG(!IsDouble(Rd), "MOVI2F does not yet support double precision");
3866     uint8_t imm8;
3867     if (value == 0.0) {
3868         FMOV(Rd, IsDouble(Rd) ? ZR : WZR);
3869         if (negate)
3870             FNEG(Rd, Rd);
3871         // TODO: There are some other values we could generate with the float-imm
3872         // instruction, like 1.0...
3873     } else if (FPImm8FromFloat(value, &imm8)) {
3874         FMOV(Rd, imm8);
3875     } else {
3876         ASSERT_MSG(scratch != INVALID_REG,
3877                    "Failed to find a way to generate FP immediate %f without scratch", value);
3878         if (negate)
3879             value = -value;
3880 
3881         const u32 ival = Dynarmic::Common::BitCast<u32>(value);
3882         m_emit->MOVI2R(scratch, ival);
3883         FMOV(Rd, scratch);
3884     }
3885 }
3886 
3887 // TODO: Quite a few values could be generated easily using the MOVI instruction
3888 // and friends.
MOVI2FDUP(ARM64Reg Rd,float value,ARM64Reg scratch)3889 void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch) {
3890     // TODO: Make it work with more element sizes
3891     // TODO: Optimize - there are shorter solution for many values
3892     ARM64Reg s = static_cast<ARM64Reg>(S0 + DecodeReg(Rd));
3893     MOVI2F(s, value, scratch);
3894     DUP(32, Rd, Rd, 0);
3895 }
3896 
3897 } // namespace Dynarmic::BackendA64::Arm64Gen
3898