1 // Copyright 2015 Dolphin Emulator Project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4
5 #pragma once
6
7 #include <functional>
8
9 #include "Common/ArmCommon.h"
10 #include "Common/BitSet.h"
11 #include "Common/CodeBlock.h"
12 #include "Common/Common.h"
13 #include "Common/Log.h"
14
15 #define DYNA_REC JIT
16
17 #ifdef FMAX
18 #undef FMAX
19 #endif
20 #ifdef FMIN
21 #undef FMIN
22 #endif
23
24 namespace Arm64Gen
25 {
26
27 // X30 serves a dual purpose as a link register
28 // Encoded as <u3:type><u5:reg>
29 // Types:
30 // 000 - 32bit GPR
31 // 001 - 64bit GPR
32 // 010 - VFP single precision
CountLeadingZeros(uint64_t value,int width)33 // 100 - VFP double precision
34 // 110 - VFP quad precision
35 enum ARM64Reg
36 {
37 // 32bit registers
38 W0 = 0, W1, W2, W3, W4, W5, W6,
39 W7, W8, W9, W10, W11, W12, W13, W14,
40 W15, W16, W17, W18, W19, W20, W21, W22,
41 W23, W24, W25, W26, W27, W28, W29, W30,
42
43 WSP, // 32bit stack pointer
44
45 // 64bit registers
46 X0 = 0x20, X1, X2, X3, X4, X5, X6,
47 X7, X8, X9, X10, X11, X12, X13, X14,
48 X15, X16, X17, X18, X19, X20, X21, X22,
49 X23, X24, X25, X26, X27, X28, X29, X30,
50
51 SP, // 64bit stack pointer
52
53 // VFP single precision registers
54 S0 = 0x40, S1, S2, S3, S4, S5, S6,
55 S7, S8, S9, S10, S11, S12, S13,
56 S14, S15, S16, S17, S18, S19, S20,
57 S21, S22, S23, S24, S25, S26, S27,
58 S28, S29, S30, S31,
59
60 // VFP Double Precision registers
61 D0 = 0x80, D1, D2, D3, D4, D5, D6, D7,
62 D8, D9, D10, D11, D12, D13, D14, D15,
63 D16, D17, D18, D19, D20, D21, D22, D23,
64 D24, D25, D26, D27, D28, D29, D30, D31,
65
66 // ASIMD Quad-Word registers
67 Q0 = 0xC0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
68 Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
69 Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23,
70 Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31,
71
72 // For PRFM(prefetch memory) encoding
73 // This is encoded in the Rt register
74 // Data preload
75 PLDL1KEEP = 0, PLDL1STRM,
76 PLDL2KEEP, PLDL2STRM,
77 PLDL3KEEP, PLDL3STRM,
78 // Instruction preload
79 PLIL1KEEP = 8, PLIL1STRM,
80 PLIL2KEEP, PLIL2STRM,
81 PLIL3KEEP, PLIL3STRM,
82 // Prepare for store
83 PLTL1KEEP = 16, PLTL1STRM,
84 PLTL2KEEP, PLTL2STRM,
85 PLTL3KEEP, PLTL3STRM,
86
87 WZR = WSP,
88 ZR = SP,
89 FP = X29,
90 LR = X30,
91
92 INVALID_REG = 0xFFFFFFFF
93 };
94
95 // R19-R28. R29 (FP), R30 (LR) are always saved and FP updated appropriately.
96 const u32 ALL_CALLEE_SAVED = 0x1FF80000;
97 const u32 ALL_CALLEE_SAVED_FP = 0x0000FF00; // d8-d15
98
99 inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
100 inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
101 inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
102 inline bool IsScalar(ARM64Reg reg) { return IsSingle(reg) || IsDouble(reg); }
103 inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }
104 inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }
105 inline bool IsGPR(ARM64Reg reg) { return (int)reg < 0x40; }
106
107 int CountLeadingZeros(uint64_t value, int width);
108
109 inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }
110 inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); }
111 inline ARM64Reg EncodeRegToSingle(ARM64Reg reg) { return (ARM64Reg)(DecodeReg(reg) + S0); }
112 inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); }
113 inline ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return (ARM64Reg)(reg | 0xC0); }
114
115 // For AND/TST/ORR/EOR etc
116 bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned int *imm_s, unsigned int *imm_r);
117 // For ADD/SUB
118 bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift);
119
120 float FPImm8ToFloat(uint8_t bits);
121 bool FPImm8FromFloat(float value, uint8_t *immOut);
122
123 enum OpType
124 {
125 TYPE_IMM = 0,
126 TYPE_REG,
127 TYPE_IMMSREG,
128 TYPE_RSR,
129 TYPE_MEM
130 };
131
132 enum ShiftType
133 {
134 ST_LSL = 0,
135 ST_LSR = 1,
136 ST_ASR = 2,
137 ST_ROR = 3,
138 };
139
140 enum IndexType
141 {
142 INDEX_UNSIGNED = 0,
143 INDEX_POST = 1,
144 INDEX_PRE = 2,
145 INDEX_SIGNED = 3, // used in LDP/STP
146 };
147
148 enum ShiftAmount
149 {
150 SHIFT_0 = 0,
151 SHIFT_16 = 1,
152 SHIFT_32 = 2,
153 SHIFT_48 = 3,
154 };
155
156 enum RoundingMode {
157 ROUND_A, // round to nearest, ties to away
158 ROUND_M, // round towards -inf
159 ROUND_N, // round to nearest, ties to even
160 ROUND_P, // round towards +inf
161 ROUND_Z, // round towards zero
162 };
163
164 struct FixupBranch
165 {
166 // Pointer to executable code address.
167 const u8 *ptr;
168 // Type defines
169 // 0 = CBZ (32bit)
170 // 1 = CBNZ (32bit)
171 // 2 = B (conditional)
172 // 3 = TBZ
173 // 4 = TBNZ
174 // 5 = B (unconditional)
175 // 6 = BL (unconditional)
176 u32 type;
177
178 // Used with B.cond
179 CCFlags cond;
180
181 // Used with TBZ/TBNZ
182 u8 bit;
183
184 // Used with Test/Compare and Branch
185 ARM64Reg reg;
186 };
187
188 enum PStateField
189 {
190 FIELD_SPSel = 0,
191 FIELD_DAIFSet,
192 FIELD_DAIFClr,
193 FIELD_NZCV, // The only system registers accessible from EL0 (user space)
194 FIELD_FPCR = 0x340,
195 FIELD_FPSR = 0x341,
196 };
197
198 enum SystemHint
199 {
200 HINT_NOP = 0,
201 HINT_YIELD,
202 HINT_WFE,
203 HINT_WFI,
204 HINT_SEV,
205 HINT_SEVL,
206 };
207
208 enum BarrierType
209 {
210 OSHLD = 1,
211 OSHST = 2,
212 OSH = 3,
213 NSHLD = 5,
214 NSHST = 6,
215 NSH = 7,
216 ISHLD = 9,
217 ISHST = 10,
218 ISH = 11,
219 LD = 13,
220 ST = 14,
221 SY = 15,
222 };
223
224 class ArithOption
225 {
226 public:
227 enum WidthSpecifier
228 {
229 WIDTH_DEFAULT,
230 WIDTH_32BIT,
231 WIDTH_64BIT,
232 };
233
234 enum ExtendSpecifier
235 {
236 EXTEND_UXTB = 0x0,
237 EXTEND_UXTH = 0x1,
238 EXTEND_UXTW = 0x2, /* Also LSL on 32bit width */
239 EXTEND_UXTX = 0x3, /* Also LSL on 64bit width */
240 EXTEND_SXTB = 0x4,
241 EXTEND_SXTH = 0x5,
242 EXTEND_SXTW = 0x6,
243 EXTEND_SXTX = 0x7,
244 };
245
246 enum TypeSpecifier
247 {
248 TYPE_EXTENDEDREG,
249 TYPE_IMM,
250 TYPE_SHIFTEDREG,
251 };
252
253 private:
254 ARM64Reg m_destReg;
255 WidthSpecifier m_width;
256 ExtendSpecifier m_extend;
257 TypeSpecifier m_type;
258 ShiftType m_shifttype;
259 u32 m_shift;
260
261 public:
262 ArithOption(ARM64Reg Rd, bool index = false)
263 {
EncodeSize(int size)264 // Indexed registers are a certain feature of AARch64
265 // On Loadstore instructions that use a register offset
266 // We can have the register as an index
267 // If we are indexing then the offset register will
268 // be shifted to the left so we are indexing at intervals
269 // of the size of what we are loading
270 // 8-bit: Index does nothing
271 // 16-bit: Index LSL 1
272 // 32-bit: Index LSL 2
273 // 64-bit: Index LSL 3
274 if (index)
275 m_shift = 4;
276 else
277 m_shift = 0;
SetCodePointer(const u8 * ptr,u8 * writePtr)278
279 m_destReg = Rd;
280 m_type = TYPE_EXTENDEDREG;
281 if (Is64Bit(Rd))
282 {
283 m_width = WIDTH_64BIT;
284 m_extend = EXTEND_UXTX;
GetCodePointer() const285 }
286 else
287 {
288 m_width = WIDTH_32BIT;
289 m_extend = EXTEND_UXTW;
290 }
291 m_shifttype = ST_LSL;
292 }
293 ArithOption(ARM64Reg Rd, ShiftType shift_type, u32 shift)
294 {
ReserveCodeSpace(u32 bytes)295 m_destReg = Rd;
296 m_shift = shift;
297 m_shifttype = shift_type;
298 m_type = TYPE_SHIFTEDREG;
299 if (Is64Bit(Rd))
300 {
AlignCode16()301 m_width = WIDTH_64BIT;
302 if (shift == 64)
303 m_shift = 0;
304 }
305 else
306 {
307 m_width = WIDTH_32BIT;
308 if (shift == 32)
AlignCodePage()309 m_shift = 0;
310 }
311 }
312 TypeSpecifier GetType() const
313 {
314 return m_type;
315 }
316 ARM64Reg GetReg() const
317 {
FlushIcache()318 return m_destReg;
319 }
320 u32 GetData() const
321 {
322 switch (m_type)
323 {
FlushIcacheSection(const u8 * start,const u8 * end)324 case TYPE_EXTENDEDREG:
325 return (m_extend << 13) |
326 (m_shift << 10);
327 break;
328 case TYPE_SHIFTEDREG:
329 return (m_shifttype << 22) |
330 (m_shift << 10);
331 break;
332 default:
333 _dbg_assert_msg_(false, "Invalid type in GetData");
334 break;
335 }
336 return 0;
337 }
338 };
339
340 class ARM64XEmitter
341 {
342 friend class ARM64FloatEmitter;
343 friend class ARM64CodeBlock;
344
345 private:
346 const u8 *m_code = nullptr;
347 u8 *m_writable = nullptr;
348 const u8 *m_lastCacheFlushEnd = nullptr;
349
350 void EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr);
351 void EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr);
352 void EncodeUnconditionalBranchInst(u32 op, const void* ptr);
353 void EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn);
354 void EncodeExceptionInst(u32 instenc, u32 imm);
355 void EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt);
356 void EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
357 void EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
358 void EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
359 void EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
360 void EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
361 void EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn);
362 void EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
363 void EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
364 void EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
365 void EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm);
366 void EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt);
367 void EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
368 void EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
369 void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size);
370 void EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos);
371 void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
372 void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
373 void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
374 void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);
375 void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
376 void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
377 void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
378
379 protected:
380 inline void Write32(u32 value)
381 {
382 *(u32 *)m_writable = value;
383 m_code += 4;
384 m_writable += 4;
385 }
386
387 public:
388 ARM64XEmitter()
389 {
390 }
391
392 ARM64XEmitter(const u8 *codePtr, u8 *writablePtr);
393
394 virtual ~ARM64XEmitter()
395 {
396 }
397
398 void SetCodePointer(const u8 *ptr, u8 *writePtr);
399 const u8* GetCodePointer() const;
400
401 void ReserveCodeSpace(u32 bytes);
402 const u8* AlignCode16();
403 const u8* AlignCodePage();
404 void FlushIcache();
405 void FlushIcacheSection(const u8* start, const u8* end);
406 u8* GetWritableCodePtr();
407
408 // FixupBranch branching
409 void SetJumpTarget(FixupBranch const& branch);
410 FixupBranch CBZ(ARM64Reg Rt);
411 FixupBranch CBNZ(ARM64Reg Rt);
412 FixupBranch B(CCFlags cond);
413 FixupBranch TBZ(ARM64Reg Rt, u8 bit);
414 FixupBranch TBNZ(ARM64Reg Rt, u8 bit);
415 FixupBranch B();
416 FixupBranch BL();
417
418 // Compare and Branch
419 void CBZ(ARM64Reg Rt, const void* ptr);
420 void CBNZ(ARM64Reg Rt, const void* ptr);
421
422 // Conditional Branch
423 void B(CCFlags cond, const void* ptr);
424
425 // Test and Branch
426 void TBZ(ARM64Reg Rt, u8 bits, const void* ptr);
427 void TBNZ(ARM64Reg Rt, u8 bits, const void* ptr);
428
429 // Unconditional Branch
430 void B(const void* ptr);
431 void BL(const void* ptr);
432
433 // Unconditional Branch (register)
434 void BR(ARM64Reg Rn);
435 void BLR(ARM64Reg Rn);
436 void RET(ARM64Reg Rn = X30);
437 void ERET();
438 void DRPS();
439
440 // Exception generation
441 void SVC(u32 imm);
442 void HVC(u32 imm);
443 void SMC(u32 imm);
444 void BRK(u32 imm);
445 void HLT(u32 imm);
446 void DCPS1(u32 imm);
447 void DCPS2(u32 imm);
448 void DCPS3(u32 imm);
449
450 // System
451 void _MSR(PStateField field, u8 imm);
452
453 void _MSR(PStateField field, ARM64Reg Rt);
454 void MRS(ARM64Reg Rt, PStateField field);
455
456 void HINT(SystemHint op);
457 void CLREX();
458 void DSB(BarrierType type);
459 void DMB(BarrierType type);
460 void ISB(BarrierType type);
461
462 // Add/Subtract (Extended/Shifted register)
463 void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
464 void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
465 void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
466 void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
467 void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
468 void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
469 void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
470 void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
471 void CMN(ARM64Reg Rn, ARM64Reg Rm);
472 void CMN(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
473 void CMP(ARM64Reg Rn, ARM64Reg Rm);
474 void CMP(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
475
476 // Add/Subtract (with carry)
477 void ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
478 void ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
479 void SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
480 void SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
481
482 // Conditional Compare (immediate)
483 void CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
EncodeCompareBranchInst(u32 op,ARM64Reg Rt,const void * ptr)484 void CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
485
486 // Conditional Compare (register)
487 void CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
488 void CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
489
490 // Conditional Select
491 void CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
492 void CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
493 void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
494 void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
495
496 // Aliases
497 void CSET(ARM64Reg Rd, CCFlags cond) {
498 ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR;
499 CSINC(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
EncodeTestBranchInst(u32 op,ARM64Reg Rt,u8 bits,const void * ptr)500 }
501 void NEG(ARM64Reg Rd, ARM64Reg Rs) {
502 SUB(Rd, Is64Bit(Rd) ? ZR : WZR, Rs);
503 }
504
505 // Data-Processing 1 source
506 void RBIT(ARM64Reg Rd, ARM64Reg Rn);
507 void REV16(ARM64Reg Rd, ARM64Reg Rn);
508 void REV32(ARM64Reg Rd, ARM64Reg Rn);
509 void REV64(ARM64Reg Rd, ARM64Reg Rn);
510 void CLZ(ARM64Reg Rd, ARM64Reg Rn);
511 void CLS(ARM64Reg Rd, ARM64Reg Rn);
512
513 // Data-Processing 2 source
514 void UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
515 void SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
EncodeUnconditionalBranchInst(u32 op,const void * ptr)516 void LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
517 void LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
518 void ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
519 void RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
520 void CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
521 void CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
522 void CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
523 void CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
524 void CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
525 void CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
526 void CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
527 void CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
528
EncodeUnconditionalBranchInst(u32 opc,u32 op2,u32 op3,u32 op4,ARM64Reg Rn)529 // Data-Processing 3 source
530 void MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
531 void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
532 void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
533 void SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
534 void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
EncodeExceptionInst(u32 instenc,u32 imm)535 void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
536 void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
537 void UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
538 void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
539 void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
540 void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
541 void MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
EncodeSystemInst(u32 op0,u32 op1,u32 CRn,u32 CRm,u32 op2,ARM64Reg Rt)542
543 // Logical (shifted register)
544 void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
545 void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
546 void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
EncodeArithmeticInst(u32 instenc,bool flags,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Option)547 void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
548 void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
549 void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
550 void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
551 void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
552 void TST(ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
553
554 // Wrap the above for saner syntax
555 void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { AND(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
556 void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BIC(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
557 void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
EncodeArithmeticCarryInst(u32 op,bool flags,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)558 void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORN(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
559 void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EOR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
560 void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EON(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
561 void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ANDS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
562 void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BICS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
563 void TST(ARM64Reg Rn, ARM64Reg Rm) { TST(Rn, Rm, ArithOption(Is64Bit(Rn) ? ZR : WZR, ST_LSL, 0)); }
564
565 // Convenience wrappers around ORR. These match the official convenience syntax.
566 void MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift);
567 void MOV(ARM64Reg Rd, ARM64Reg Rm);
568 void MVN(ARM64Reg Rd, ARM64Reg Rm);
EncodeCondCompareImmInst(u32 op,ARM64Reg Rn,u32 imm,u32 nzcv,CCFlags cond)569
570 // Wrapper around ADD reg, reg, imm.
571 void MOVfromSP(ARM64Reg Rd);
572 void MOVtoSP(ARM64Reg Rn);
573
574 // TODO: These are "slow" as they use arith+shift, should be replaced with UBFM/EXTR variants.
575 void LSR(ARM64Reg Rd, ARM64Reg Rm, int shift);
576 void LSL(ARM64Reg Rd, ARM64Reg Rm, int shift);
577 void ASR(ARM64Reg Rd, ARM64Reg Rm, int shift);
578 void ROR(ARM64Reg Rd, ARM64Reg Rm, int shift);
579
580 // Logical (immediate)
EncodeCondCompareRegInst(u32 op,ARM64Reg Rn,ARM64Reg Rm,u32 nzcv,CCFlags cond)581 void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
582 void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
583 void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
584 void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
585 void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
586
587 // Add/subtract (immediate)
588 void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
589 void ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
590 void SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
591 void SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
592 void CMP(ARM64Reg Rn, u32 imm, bool shift = false);
EncodeCondSelectInst(u32 instenc,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,CCFlags cond)593 void CMN(ARM64Reg Rn, u32 imm, bool shift = false);
594
595 // Data Processing (Immediate)
596 void MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);
597 void MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);
598 void MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);
599
600 // Bitfield move
601 void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
602 void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
603 void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
604 void BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
EncodeData1SrcInst(u32 instenc,ARM64Reg Rd,ARM64Reg Rn)605 void UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
606
607 // Extract register (ROR with two inputs, if same then faster on A67)
608 void EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift);
609
610 // Aliases
611 void SXTB(ARM64Reg Rd, ARM64Reg Rn);
612 void SXTH(ARM64Reg Rd, ARM64Reg Rn);
613 void SXTW(ARM64Reg Rd, ARM64Reg Rn);
614 void UXTB(ARM64Reg Rd, ARM64Reg Rn);
615 void UXTH(ARM64Reg Rd, ARM64Reg Rn);
EncodeData2SrcInst(u32 instenc,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm)616
617 void UBFX(ARM64Reg Rd, ARM64Reg Rn, int lsb, int width) {
618 UBFM(Rd, Rn, lsb, lsb + width - 1);
619 }
620
621 // Load Register (Literal)
622 void LDR(ARM64Reg Rt, u32 imm);
623 void LDRSW(ARM64Reg Rt, u32 imm);
624 void PRFM(ARM64Reg Rt, u32 imm);
625
626 // Load/Store Exclusive
627 void STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
EncodeData3SrcInst(u32 instenc,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ARM64Reg Ra)628 void STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
629 void LDXRB(ARM64Reg Rt, ARM64Reg Rn);
630 void LDAXRB(ARM64Reg Rt, ARM64Reg Rn);
631 void STLRB(ARM64Reg Rt, ARM64Reg Rn);
632 void LDARB(ARM64Reg Rt, ARM64Reg Rn);
633 void STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
634 void STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
635 void LDXRH(ARM64Reg Rt, ARM64Reg Rn);
636 void LDAXRH(ARM64Reg Rt, ARM64Reg Rn);
637 void STLRH(ARM64Reg Rt, ARM64Reg Rn);
638 void LDARH(ARM64Reg Rt, ARM64Reg Rn);
639 void STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
640 void STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
EncodeLogicalInst(u32 instenc,ARM64Reg Rd,ARM64Reg Rn,ARM64Reg Rm,ArithOption Shift)641 void STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
642 void STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
643 void LDXR(ARM64Reg Rt, ARM64Reg Rn);
644 void LDAXR(ARM64Reg Rt, ARM64Reg Rn);
645 void LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
646 void LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
647 void STLR(ARM64Reg Rt, ARM64Reg Rn);
648 void LDAR(ARM64Reg Rt, ARM64Reg Rn);
649
650 // Load/Store no-allocate pair (offset)
651 void STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
EncodeLoadRegisterInst(u32 bitop,ARM64Reg Rt,u32 imm)652 void LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
653
654 // Load/Store register (immediate indexed)
655 void STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
656 void LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
657 void LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
658 void STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
659 void LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
660 void LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
661 void STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
662 void LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
663 void LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
664
EncodeLoadStoreExcInst(u32 instenc,ARM64Reg Rs,ARM64Reg Rt2,ARM64Reg Rn,ARM64Reg Rt)665 // Load/Store register (register offset)
666 void STRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
667 void LDRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
668 void LDRSB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
669 void STRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
670 void LDRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
671 void LDRSH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
672 void STR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
673 void LDR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
674 void LDRSW(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
675 void PRFM(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
676
EncodeLoadStorePairedInst(u32 op,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,u32 imm)677 // Load/Store register (unscaled offset)
678 void STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
679 void LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
680 void LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
681 void STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
682 void LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
683 void LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
684 void STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
685 void LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
686 void LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
687
688 // Load/Store pair
689 void LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
690 void LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
691 void STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
692
693 // Address of label/page PC-relative
694 void ADR(ARM64Reg Rd, s32 imm);
695 void ADRP(ARM64Reg Rd, s32 imm);
696
697 // Wrapper around MOVZ+MOVK
698 void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true);
699 template <class P>
700 void MOVP2R(ARM64Reg Rd, P *ptr) {
701 _assert_msg_(Is64Bit(Rd), "Can't store pointers in 32-bit registers");
702 MOVI2R(Rd, (uintptr_t)ptr);
703 }
704
705 // Wrapper around AND x, y, imm etc. If you are sure the imm will work, no need to pass a scratch register.
EncodeLoadStoreIndexedInst(u32 op,u32 op2,ARM64Reg Rt,ARM64Reg Rn,s32 imm)706 void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
707 void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
708 void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG) { ANDSI2R(Is64Bit(Rn) ? ZR : WZR, Rn, imm, scratch); }
709 void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
710 void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
711 void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
712
713 void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
714 void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
715 void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
716
717 bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
718 bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
719 bool TryCMPI2R(ARM64Reg Rn, u64 imm);
EncodeLoadStoreIndexedInst(u32 op,ARM64Reg Rt,ARM64Reg Rn,s32 imm,u8 size)720
721 bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
722 bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
723 bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
724
725 // Pseudo-instruction for convenience. PUSH pushes 16 bytes even though we only push a single register.
726 // This is so the stack pointer is always 16-byte aligned, which is checked by hardware!
727 void PUSH(ARM64Reg Rd);
728 void POP(ARM64Reg Rd);
729 void PUSH2(ARM64Reg Rd, ARM64Reg Rn);
730 void POP2(ARM64Reg Rd, ARM64Reg Rn);
731
732
733 // Utility to generate a call to a std::function object.
734 //
735 // Unfortunately, calling operator() directly is undefined behavior in C++
736 // (this method might be a thunk in the case of multi-inheritance) so we
737 // have to go through a trampoline function.
738 template <typename T, typename... Args>
739 static void CallLambdaTrampoline(const std::function<T(Args...)>* f,
740 Args... args)
741 {
742 (*f)(args...);
743 }
744
745 // This function expects you to have set up the state.
EncodeMOVWideInst(u32 op,ARM64Reg Rd,u32 imm,ShiftAmount pos)746 // Overwrites X0 and X30
747 template <typename T, typename... Args>
748 ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f)
749 {
750 auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;
751 MOVI2R(X30, (uintptr_t)trampoline);
752 MOVI2R(X0, (uintptr_t)const_cast<void*>((const void*)f));
753 return X30;
754 }
755
EncodeBitfieldMOVInst(u32 op,ARM64Reg Rd,ARM64Reg Rn,u32 immr,u32 imms)756 // Plain function call
757 void QuickCallFunction(ARM64Reg scratchreg, const void *func);
758 template <typename T> void QuickCallFunction(ARM64Reg scratchreg, T func) {
759 QuickCallFunction(scratchreg, (const void *)func);
760 }
761 };
762
763 class ARM64FloatEmitter
764 {
765 public:
EncodeLoadStoreRegisterOffset(u32 size,u32 opc,ARM64Reg Rt,ARM64Reg Rn,ArithOption Rm)766 ARM64FloatEmitter(ARM64XEmitter* emit) : m_emit(emit) {}
767
768 void LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
769 void STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
770
771 // Loadstore unscaled
772 void LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
773 void STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
774
775 // Loadstore single structure
EncodeAddSubImmInst(u32 op,bool flags,u32 shift,u32 imm,ARM64Reg Rn,ARM64Reg Rd)776 void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
777 void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
778 void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
779 void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
780 void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
781 void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
782 void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
783 void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
784
785 // Loadstore multiple structure
786 void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
787 void LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
EncodeLogicalImmInst(u32 op,ARM64Reg Rd,ARM64Reg Rn,u32 immr,u32 imms,int n)788 void ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
789 void ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
790
791 // Loadstore paired
792 void LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
793 void STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
794
795 // Loadstore register offset
796 void STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
797 void LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
798
799 // Scalar - 1 Source
800 void FABS(ARM64Reg Rd, ARM64Reg Rn);
EncodeLoadStorePair(u32 op,u32 load,IndexType type,ARM64Reg Rt,ARM64Reg Rt2,ARM64Reg Rn,s32 imm)801 void FNEG(ARM64Reg Rd, ARM64Reg Rn);
802 void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
803 void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
804
805 // Scalar - 2 Source
806 void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
807 void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
808 void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
809 void FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
810 void FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
811 void FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
812 void FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
813 void FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
814 void FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
815
816 // Scalar - 3 Source. Note - the accumulator is last on ARM!
817 void FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
818 void FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
819 void FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
820 void FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
821
822 // Scalar floating point immediate
823 void FMOV(ARM64Reg Rd, uint8_t imm8);
824
825 // Vector
826 void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
827 void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
828 void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
829 void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
830 void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
831 void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
832 void FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
833 void FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
834 void FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
835 void FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
836 void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
837 void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);
838 void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
EncodeAddressInst(u32 op,ARM64Reg Rd,s32 imm)839 void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
840 void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
841 void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
842 void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
843 void FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn);
844 void FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
845 void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
846 void NOT(ARM64Reg Rd, ARM64Reg Rn);
EncodeLoadStoreUnscaled(u32 size,u32 op,ARM64Reg Rt,ARM64Reg Rn,s32 imm)847 void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
848 void MOV(ARM64Reg Rd, ARM64Reg Rn) {
849 ORR(Rd, Rn, Rn);
850 }
851
852 void UMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
853 void UMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
854 void SMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
855 void SMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
IsInRangeImm19(s64 distance)856
857 void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
858 void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
859 void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
IsInRangeImm14(s64 distance)860 void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
861 void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
862 void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
863 void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
IsInRangeImm26(s64 distance)864 void SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
865 void SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
866 void UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
867 void UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
MaskImm19(s64 distance)868 void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
869 void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
870
871 // Move
MaskImm14(s64 distance)872 void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);
873 void INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn);
874 void INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2);
875 void UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
MaskImm26(s64 distance)876 void SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
877
878 // One source
879 void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
880
SetJumpTarget(FixupBranch const & branch)881 // Scalar convert float to int, in a lot of variants.
882 // Note that the scalar version of this operation has two encodings, one that goes to an integer register
883 // and one that outputs to a scalar fp register.
884 void FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
885 void FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
886
887 // Scalar convert int to float. No rounding mode specifier necessary.
888 void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
889 void UCVTF(ARM64Reg Rd, ARM64Reg Rn);
890
891 // Scalar fixed point to float. scale is the number of fractional bits.
892 void SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
893 void UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
894
895 // Float comparison
896 void FCMP(ARM64Reg Rn, ARM64Reg Rm);
897 void FCMP(ARM64Reg Rn);
898 void FCMPE(ARM64Reg Rn, ARM64Reg Rm);
899 void FCMPE(ARM64Reg Rn);
900 void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
901 void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn);
902 void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
903 void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
904 void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
905 void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
906 void FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
907 void FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
908
909 // Conditional select
910 void FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
911
912 // Permute
913 void UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
914 void TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
915 void ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
916 void UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
917 void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
918 void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
919
920 // Shift by immediate
921 void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
922 void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
923 void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
924 void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
925 void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
926 void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
CBZ(ARM64Reg Rt)927 void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
928 void SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
929 void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
930 void UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
931
932 void SHL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
933 void USHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
934 void SSHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
935
CBNZ(ARM64Reg Rt)936 // vector x indexed element
937 void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
938 void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
939
940 void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
941 void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG);
942
943 // ABI related
944 void ABI_PushRegisters(uint32_t gpr_registers, uint32_t fp_registers);
B(CCFlags cond)945 void ABI_PopRegisters(uint32_t gpr_registers, uint32_t fp_registers);
946
947 private:
948 ARM64XEmitter* m_emit;
949 inline void Write32(u32 value) { m_emit->Write32(value); }
950
951 // Emitting functions
952 void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
953 void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
TBZ(ARM64Reg Rt,u8 bit)954 void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
955 void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
956 void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
957 void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
958 void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
959 void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
960 void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
961 void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn);
962 void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
963 void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
TBNZ(ARM64Reg Rt,u8 bit)964 void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
965 void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8);
966 void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
967 void EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
968 void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
969 void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
970 void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
971 void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
972 void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
973 void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);
B()974 void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
975 void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
976 void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
977
978 void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
979 void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
980 void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
981 void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
BL()982 void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
983 };
984
985 class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
986 {
987 private:
988 void PoisonMemory(int offset) override;
989 };
990 }
991