1 //===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains small standalone helper functions and enum definitions for
11 // the AArch64 target useful for the compiler back-end and the MC libraries.
12 // As such, it deliberately does not include references to LLVM core
13 // code gen types, passes, etc..
14 //
15 //===----------------------------------------------------------------------===//
16
17 /* Capstone Disassembly Engine */
18 /* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2019 */
19
20 #ifndef CS_LLVM_AARCH64_BASEINFO_H
21 #define CS_LLVM_AARCH64_BASEINFO_H
22
23 #include <ctype.h>
24 #include <string.h>
25 #include "AArch64Mapping.h"
26
27 #ifndef __cplusplus
28 #if defined (WIN32) || defined (WIN64) || defined (_WIN32) || defined (_WIN64)
29 #define inline /* inline */
30 #endif
31 #endif
32
getWRegFromXReg(unsigned Reg)33 inline static unsigned getWRegFromXReg(unsigned Reg)
34 {
35 switch (Reg) {
36 default: break;
37 case ARM64_REG_X0: return ARM64_REG_W0;
38 case ARM64_REG_X1: return ARM64_REG_W1;
39 case ARM64_REG_X2: return ARM64_REG_W2;
40 case ARM64_REG_X3: return ARM64_REG_W3;
41 case ARM64_REG_X4: return ARM64_REG_W4;
42 case ARM64_REG_X5: return ARM64_REG_W5;
43 case ARM64_REG_X6: return ARM64_REG_W6;
44 case ARM64_REG_X7: return ARM64_REG_W7;
45 case ARM64_REG_X8: return ARM64_REG_W8;
46 case ARM64_REG_X9: return ARM64_REG_W9;
47 case ARM64_REG_X10: return ARM64_REG_W10;
48 case ARM64_REG_X11: return ARM64_REG_W11;
49 case ARM64_REG_X12: return ARM64_REG_W12;
50 case ARM64_REG_X13: return ARM64_REG_W13;
51 case ARM64_REG_X14: return ARM64_REG_W14;
52 case ARM64_REG_X15: return ARM64_REG_W15;
53 case ARM64_REG_X16: return ARM64_REG_W16;
54 case ARM64_REG_X17: return ARM64_REG_W17;
55 case ARM64_REG_X18: return ARM64_REG_W18;
56 case ARM64_REG_X19: return ARM64_REG_W19;
57 case ARM64_REG_X20: return ARM64_REG_W20;
58 case ARM64_REG_X21: return ARM64_REG_W21;
59 case ARM64_REG_X22: return ARM64_REG_W22;
60 case ARM64_REG_X23: return ARM64_REG_W23;
61 case ARM64_REG_X24: return ARM64_REG_W24;
62 case ARM64_REG_X25: return ARM64_REG_W25;
63 case ARM64_REG_X26: return ARM64_REG_W26;
64 case ARM64_REG_X27: return ARM64_REG_W27;
65 case ARM64_REG_X28: return ARM64_REG_W28;
66 case ARM64_REG_FP: return ARM64_REG_W29;
67 case ARM64_REG_LR: return ARM64_REG_W30;
68 case ARM64_REG_SP: return ARM64_REG_WSP;
69 case ARM64_REG_XZR: return ARM64_REG_WZR;
70 }
71
72 // For anything else, return it unchanged.
73 return Reg;
74 }
75
getXRegFromWReg(unsigned Reg)76 inline static unsigned getXRegFromWReg(unsigned Reg)
77 {
78 switch (Reg) {
79 case ARM64_REG_W0: return ARM64_REG_X0;
80 case ARM64_REG_W1: return ARM64_REG_X1;
81 case ARM64_REG_W2: return ARM64_REG_X2;
82 case ARM64_REG_W3: return ARM64_REG_X3;
83 case ARM64_REG_W4: return ARM64_REG_X4;
84 case ARM64_REG_W5: return ARM64_REG_X5;
85 case ARM64_REG_W6: return ARM64_REG_X6;
86 case ARM64_REG_W7: return ARM64_REG_X7;
87 case ARM64_REG_W8: return ARM64_REG_X8;
88 case ARM64_REG_W9: return ARM64_REG_X9;
89 case ARM64_REG_W10: return ARM64_REG_X10;
90 case ARM64_REG_W11: return ARM64_REG_X11;
91 case ARM64_REG_W12: return ARM64_REG_X12;
92 case ARM64_REG_W13: return ARM64_REG_X13;
93 case ARM64_REG_W14: return ARM64_REG_X14;
94 case ARM64_REG_W15: return ARM64_REG_X15;
95 case ARM64_REG_W16: return ARM64_REG_X16;
96 case ARM64_REG_W17: return ARM64_REG_X17;
97 case ARM64_REG_W18: return ARM64_REG_X18;
98 case ARM64_REG_W19: return ARM64_REG_X19;
99 case ARM64_REG_W20: return ARM64_REG_X20;
100 case ARM64_REG_W21: return ARM64_REG_X21;
101 case ARM64_REG_W22: return ARM64_REG_X22;
102 case ARM64_REG_W23: return ARM64_REG_X23;
103 case ARM64_REG_W24: return ARM64_REG_X24;
104 case ARM64_REG_W25: return ARM64_REG_X25;
105 case ARM64_REG_W26: return ARM64_REG_X26;
106 case ARM64_REG_W27: return ARM64_REG_X27;
107 case ARM64_REG_W28: return ARM64_REG_X28;
108 case ARM64_REG_W29: return ARM64_REG_FP;
109 case ARM64_REG_W30: return ARM64_REG_LR;
110 case ARM64_REG_WSP: return ARM64_REG_SP;
111 case ARM64_REG_WZR: return ARM64_REG_XZR;
112 }
113
114 // For anything else, return it unchanged.
115 return Reg;
116 }
117
getBRegFromDReg(unsigned Reg)118 inline static unsigned getBRegFromDReg(unsigned Reg)
119 {
120 switch (Reg) {
121 case ARM64_REG_D0: return ARM64_REG_B0;
122 case ARM64_REG_D1: return ARM64_REG_B1;
123 case ARM64_REG_D2: return ARM64_REG_B2;
124 case ARM64_REG_D3: return ARM64_REG_B3;
125 case ARM64_REG_D4: return ARM64_REG_B4;
126 case ARM64_REG_D5: return ARM64_REG_B5;
127 case ARM64_REG_D6: return ARM64_REG_B6;
128 case ARM64_REG_D7: return ARM64_REG_B7;
129 case ARM64_REG_D8: return ARM64_REG_B8;
130 case ARM64_REG_D9: return ARM64_REG_B9;
131 case ARM64_REG_D10: return ARM64_REG_B10;
132 case ARM64_REG_D11: return ARM64_REG_B11;
133 case ARM64_REG_D12: return ARM64_REG_B12;
134 case ARM64_REG_D13: return ARM64_REG_B13;
135 case ARM64_REG_D14: return ARM64_REG_B14;
136 case ARM64_REG_D15: return ARM64_REG_B15;
137 case ARM64_REG_D16: return ARM64_REG_B16;
138 case ARM64_REG_D17: return ARM64_REG_B17;
139 case ARM64_REG_D18: return ARM64_REG_B18;
140 case ARM64_REG_D19: return ARM64_REG_B19;
141 case ARM64_REG_D20: return ARM64_REG_B20;
142 case ARM64_REG_D21: return ARM64_REG_B21;
143 case ARM64_REG_D22: return ARM64_REG_B22;
144 case ARM64_REG_D23: return ARM64_REG_B23;
145 case ARM64_REG_D24: return ARM64_REG_B24;
146 case ARM64_REG_D25: return ARM64_REG_B25;
147 case ARM64_REG_D26: return ARM64_REG_B26;
148 case ARM64_REG_D27: return ARM64_REG_B27;
149 case ARM64_REG_D28: return ARM64_REG_B28;
150 case ARM64_REG_D29: return ARM64_REG_B29;
151 case ARM64_REG_D30: return ARM64_REG_B30;
152 case ARM64_REG_D31: return ARM64_REG_B31;
153 }
154
155 // For anything else, return it unchanged.
156 return Reg;
157 }
158
getDRegFromBReg(unsigned Reg)159 inline static unsigned getDRegFromBReg(unsigned Reg)
160 {
161 switch (Reg) {
162 case ARM64_REG_B0: return ARM64_REG_D0;
163 case ARM64_REG_B1: return ARM64_REG_D1;
164 case ARM64_REG_B2: return ARM64_REG_D2;
165 case ARM64_REG_B3: return ARM64_REG_D3;
166 case ARM64_REG_B4: return ARM64_REG_D4;
167 case ARM64_REG_B5: return ARM64_REG_D5;
168 case ARM64_REG_B6: return ARM64_REG_D6;
169 case ARM64_REG_B7: return ARM64_REG_D7;
170 case ARM64_REG_B8: return ARM64_REG_D8;
171 case ARM64_REG_B9: return ARM64_REG_D9;
172 case ARM64_REG_B10: return ARM64_REG_D10;
173 case ARM64_REG_B11: return ARM64_REG_D11;
174 case ARM64_REG_B12: return ARM64_REG_D12;
175 case ARM64_REG_B13: return ARM64_REG_D13;
176 case ARM64_REG_B14: return ARM64_REG_D14;
177 case ARM64_REG_B15: return ARM64_REG_D15;
178 case ARM64_REG_B16: return ARM64_REG_D16;
179 case ARM64_REG_B17: return ARM64_REG_D17;
180 case ARM64_REG_B18: return ARM64_REG_D18;
181 case ARM64_REG_B19: return ARM64_REG_D19;
182 case ARM64_REG_B20: return ARM64_REG_D20;
183 case ARM64_REG_B21: return ARM64_REG_D21;
184 case ARM64_REG_B22: return ARM64_REG_D22;
185 case ARM64_REG_B23: return ARM64_REG_D23;
186 case ARM64_REG_B24: return ARM64_REG_D24;
187 case ARM64_REG_B25: return ARM64_REG_D25;
188 case ARM64_REG_B26: return ARM64_REG_D26;
189 case ARM64_REG_B27: return ARM64_REG_D27;
190 case ARM64_REG_B28: return ARM64_REG_D28;
191 case ARM64_REG_B29: return ARM64_REG_D29;
192 case ARM64_REG_B30: return ARM64_REG_D30;
193 case ARM64_REG_B31: return ARM64_REG_D31;
194 }
195
196 // For anything else, return it unchanged.
197 return Reg;
198 }
199
200 // // Enums corresponding to AArch64 condition codes
201 // The CondCodes constants map directly to the 4-bit encoding of the
202 // condition field for predicated instructions.
203 typedef enum AArch64CC_CondCode { // Meaning (integer) Meaning (floating-point)
204 AArch64CC_EQ = 0x0, // Equal Equal
205 AArch64CC_NE = 0x1, // Not equal Not equal, or unordered
206 AArch64CC_HS = 0x2, // Unsigned higher or same >, ==, or unordered
207 AArch64CC_LO = 0x3, // Unsigned lower Less than
208 AArch64CC_MI = 0x4, // Minus, negative Less than
209 AArch64CC_PL = 0x5, // Plus, positive or zero >, ==, or unordered
210 AArch64CC_VS = 0x6, // Overflow Unordered
211 AArch64CC_VC = 0x7, // No overflow Not unordered
212 AArch64CC_HI = 0x8, // Unsigned higher Greater than, or unordered
213 AArch64CC_LS = 0x9, // Unsigned lower or same Less than or equal
214 AArch64CC_GE = 0xa, // Greater than or equal Greater than or equal
215 AArch64CC_LT = 0xb, // Less than Less than, or unordered
216 AArch64CC_GT = 0xc, // Greater than Greater than
217 AArch64CC_LE = 0xd, // Less than or equal <, ==, or unordered
218 AArch64CC_AL = 0xe, // Always (unconditional) Always (unconditional)
219 AArch64CC_NV = 0xf, // Always (unconditional) Always (unconditional)
220 // Note the NV exists purely to disassemble 0b1111. Execution is "always".
221 AArch64CC_Invalid
222 } AArch64CC_CondCode;
223
getInvertedCondCode(AArch64CC_CondCode Code)224 inline static AArch64CC_CondCode getInvertedCondCode(AArch64CC_CondCode Code)
225 {
226 // To reverse a condition it's necessary to only invert the low bit:
227 return (AArch64CC_CondCode)((unsigned)Code ^ 0x1);
228 }
229
getCondCodeName(AArch64CC_CondCode CC)230 inline static const char *getCondCodeName(AArch64CC_CondCode CC)
231 {
232 switch (CC) {
233 default: return NULL; // never reach
234 case AArch64CC_EQ: return "eq";
235 case AArch64CC_NE: return "ne";
236 case AArch64CC_HS: return "hs";
237 case AArch64CC_LO: return "lo";
238 case AArch64CC_MI: return "mi";
239 case AArch64CC_PL: return "pl";
240 case AArch64CC_VS: return "vs";
241 case AArch64CC_VC: return "vc";
242 case AArch64CC_HI: return "hi";
243 case AArch64CC_LS: return "ls";
244 case AArch64CC_GE: return "ge";
245 case AArch64CC_LT: return "lt";
246 case AArch64CC_GT: return "gt";
247 case AArch64CC_LE: return "le";
248 case AArch64CC_AL: return "al";
249 case AArch64CC_NV: return "nv";
250 }
251 }
252
253 /// Given a condition code, return NZCV flags that would satisfy that condition.
254 /// The flag bits are in the format expected by the ccmp instructions.
255 /// Note that many different flag settings can satisfy a given condition code,
256 /// this function just returns one of them.
getNZCVToSatisfyCondCode(AArch64CC_CondCode Code)257 inline static unsigned getNZCVToSatisfyCondCode(AArch64CC_CondCode Code)
258 {
259 // NZCV flags encoded as expected by ccmp instructions, ARMv8 ISA 5.5.7.
260 enum { N = 8, Z = 4, C = 2, V = 1 };
261 switch (Code) {
262 default: // llvm_unreachable("Unknown condition code");
263 case AArch64CC_EQ: return Z; // Z == 1
264 case AArch64CC_NE: return 0; // Z == 0
265 case AArch64CC_HS: return C; // C == 1
266 case AArch64CC_LO: return 0; // C == 0
267 case AArch64CC_MI: return N; // N == 1
268 case AArch64CC_PL: return 0; // N == 0
269 case AArch64CC_VS: return V; // V == 1
270 case AArch64CC_VC: return 0; // V == 0
271 case AArch64CC_HI: return C; // C == 1 && Z == 0
272 case AArch64CC_LS: return 0; // C == 0 || Z == 1
273 case AArch64CC_GE: return 0; // N == V
274 case AArch64CC_LT: return N; // N != V
275 case AArch64CC_GT: return 0; // Z == 0 && N == V
276 case AArch64CC_LE: return Z; // Z == 1 || N != V
277 }
278 }
279
280 /// Instances of this class can perform bidirectional mapping from random
281 /// identifier strings to operand encodings. For example "MSR" takes a named
282 /// system-register which must be encoded somehow and decoded for printing. This
283 /// central location means that the information for those transformations is not
284 /// duplicated and remains in sync.
285 ///
286 /// FIXME: currently the algorithm is a completely unoptimised linear
287 /// search. Obviously this could be improved, but we would probably want to work
288 /// out just how often these instructions are emitted before working on it. It
289 /// might even be optimal to just reorder the tables for the common instructions
290 /// rather than changing the algorithm.
291 typedef struct A64NamedImmMapper_Mapping {
292 const char *Name;
293 uint32_t Value;
294 } A64NamedImmMapper_Mapping;
295
296 typedef struct A64NamedImmMapper {
297 const A64NamedImmMapper_Mapping *Pairs;
298 size_t NumPairs;
299 uint32_t TooBigImm;
300 } A64NamedImmMapper;
301
302 typedef struct A64SysRegMapper {
303 const A64NamedImmMapper_Mapping *SysRegPairs;
304 const A64NamedImmMapper_Mapping *InstPairs;
305 size_t NumInstPairs;
306 } A64SysRegMapper;
307
308 typedef enum A64SE_ShiftExtSpecifiers {
309 A64SE_Invalid = -1,
310 A64SE_LSL,
311 A64SE_MSL,
312 A64SE_LSR,
313 A64SE_ASR,
314 A64SE_ROR,
315
316 A64SE_UXTB,
317 A64SE_UXTH,
318 A64SE_UXTW,
319 A64SE_UXTX,
320
321 A64SE_SXTB,
322 A64SE_SXTH,
323 A64SE_SXTW,
324 A64SE_SXTX
325 } A64SE_ShiftExtSpecifiers;
326
327 typedef enum A64Layout_VectorLayout {
328 A64Layout_Invalid = -1,
329 A64Layout_VL_8B,
330 A64Layout_VL_4H,
331 A64Layout_VL_2S,
332 A64Layout_VL_1D,
333
334 A64Layout_VL_16B,
335 A64Layout_VL_8H,
336 A64Layout_VL_4S,
337 A64Layout_VL_2D,
338
339 // Bare layout for the 128-bit vector
340 // (only show ".b", ".h", ".s", ".d" without vector number)
341 A64Layout_VL_B,
342 A64Layout_VL_H,
343 A64Layout_VL_S,
344 A64Layout_VL_D
345 } A64Layout_VectorLayout;
346
AArch64VectorLayoutToString(A64Layout_VectorLayout Layout)347 inline static const char *AArch64VectorLayoutToString(A64Layout_VectorLayout Layout)
348 {
349 switch (Layout) {
350 default: return NULL; // never reach
351 case A64Layout_VL_8B: return ".8b";
352 case A64Layout_VL_4H: return ".4h";
353 case A64Layout_VL_2S: return ".2s";
354 case A64Layout_VL_1D: return ".1d";
355 case A64Layout_VL_16B: return ".16b";
356 case A64Layout_VL_8H: return ".8h";
357 case A64Layout_VL_4S: return ".4s";
358 case A64Layout_VL_2D: return ".2d";
359 case A64Layout_VL_B: return ".b";
360 case A64Layout_VL_H: return ".h";
361 case A64Layout_VL_S: return ".s";
362 case A64Layout_VL_D: return ".d";
363 }
364 }
365
AArch64StringToVectorLayout(char * LayoutStr)366 inline static A64Layout_VectorLayout AArch64StringToVectorLayout(char *LayoutStr)
367 {
368 if (!strcmp(LayoutStr, ".8b"))
369 return A64Layout_VL_8B;
370
371 if (!strcmp(LayoutStr, ".4h"))
372 return A64Layout_VL_4H;
373
374 if (!strcmp(LayoutStr, ".2s"))
375 return A64Layout_VL_2S;
376
377 if (!strcmp(LayoutStr, ".1d"))
378 return A64Layout_VL_1D;
379
380 if (!strcmp(LayoutStr, ".16b"))
381 return A64Layout_VL_16B;
382
383 if (!strcmp(LayoutStr, ".8h"))
384 return A64Layout_VL_8H;
385
386 if (!strcmp(LayoutStr, ".4s"))
387 return A64Layout_VL_4S;
388
389 if (!strcmp(LayoutStr, ".2d"))
390 return A64Layout_VL_2D;
391
392 if (!strcmp(LayoutStr, ".b"))
393 return A64Layout_VL_B;
394
395 if (!strcmp(LayoutStr, ".s"))
396 return A64Layout_VL_S;
397
398 if (!strcmp(LayoutStr, ".d"))
399 return A64Layout_VL_D;
400
401 return A64Layout_Invalid;
402 }
403
404 /// Target Operand Flag enum.
405 enum TOF {
406 //===------------------------------------------------------------------===//
407 // AArch64 Specific MachineOperand flags.
408
409 MO_NO_FLAG,
410
411 MO_FRAGMENT = 0xf,
412
413 /// MO_PAGE - A symbol operand with this flag represents the pc-relative
414 /// offset of the 4K page containing the symbol. This is used with the
415 /// ADRP instruction.
416 MO_PAGE = 1,
417
418 /// MO_PAGEOFF - A symbol operand with this flag represents the offset of
419 /// that symbol within a 4K page. This offset is added to the page address
420 /// to produce the complete address.
421 MO_PAGEOFF = 2,
422
423 /// MO_G3 - A symbol operand with this flag (granule 3) represents the high
424 /// 16-bits of a 64-bit address, used in a MOVZ or MOVK instruction
425 MO_G3 = 3,
426
427 /// MO_G2 - A symbol operand with this flag (granule 2) represents the bits
428 /// 32-47 of a 64-bit address, used in a MOVZ or MOVK instruction
429 MO_G2 = 4,
430
431 /// MO_G1 - A symbol operand with this flag (granule 1) represents the bits
432 /// 16-31 of a 64-bit address, used in a MOVZ or MOVK instruction
433 MO_G1 = 5,
434
435 /// MO_G0 - A symbol operand with this flag (granule 0) represents the bits
436 /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
437 MO_G0 = 6,
438
439 /// MO_HI12 - This flag indicates that a symbol operand represents the bits
440 /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left-
441 /// by-12-bits instruction.
442 MO_HI12 = 7,
443
444 /// MO_GOT - This flag indicates that a symbol operand represents the
445 /// address of the GOT entry for the symbol, rather than the address of
446 /// the symbol itself.
447 MO_GOT = 0x10,
448
449 /// MO_NC - Indicates whether the linker is expected to check the symbol
450 /// reference for overflow. For example in an ADRP/ADD pair of relocations
451 /// the ADRP usually does check, but not the ADD.
452 MO_NC = 0x20,
453
454 /// MO_TLS - Indicates that the operand being accessed is some kind of
455 /// thread-local symbol. On Darwin, only one type of thread-local access
456 /// exists (pre linker-relaxation), but on ELF the TLSModel used for the
457 /// referee will affect interpretation.
458 MO_TLS = 0x40,
459
460 /// MO_DLLIMPORT - On a symbol operand, this represents that the reference
461 /// to the symbol is for an import stub. This is used for DLL import
462 /// storage class indication on Windows.
463 MO_DLLIMPORT = 0x80,
464 };
465
466 typedef struct SysAlias {
467 const char *Name;
468 uint16_t Encoding;
469 } SysAlias;
470
471 #define AT SysAlias
472 #define DB SysAlias
473 #define DC SysAlias
474 #define SVEPRFM SysAlias
475 #define PRFM SysAlias
476 #define PSB SysAlias
477 #define ISB SysAlias
478 #define TSB SysAlias
479 #define PState SysAlias
480 #define SVEPREDPAT SysAlias
481
482 typedef struct SysAliasReg {
483 const char *Name;
484 uint16_t Encoding;
485 bool NeedsReg;
486 } SysAliasReg;
487
488 #define IC SysAliasReg
489 #define TLBI SysAliasReg
490
491 typedef struct SysAliasSysReg {
492 const char *Name;
493 uint16_t Encoding;
494 bool Readable;
495 bool Writeable;
496 } SysAliasSysReg;
497
498 #define SysReg SysAliasSysReg
499
500 typedef struct ExactFPImm {
501 const char *Name;
502 int Enum;
503 const char *Repr;
504 } ExactFPImm;
505
506 const AT *lookupATByEncoding(uint16_t Encoding);
507 const DB *lookupDBByEncoding(uint16_t Encoding);
508 const DC *lookupDCByEncoding(uint16_t Encoding);
509 const IC *lookupICByEncoding(uint16_t Encoding);
510 const TLBI *lookupTLBIByEncoding(uint16_t Encoding);
511 const SVEPRFM *lookupSVEPRFMByEncoding(uint16_t Encoding);
512 const PRFM *lookupPRFMByEncoding(uint16_t Encoding);
513 const PSB *AArch64PSBHint_lookupPSBByEncoding(uint16_t Encoding);
514 const ISB *lookupISBByEncoding(uint16_t Encoding);
515 const TSB *lookupTSBByEncoding(uint16_t Encoding);
516 const SysReg *lookupSysRegByEncoding(uint16_t Encoding);
517 const PState *lookupPStateByEncoding(uint16_t Encoding);
518 const SVEPREDPAT *lookupSVEPREDPATByEncoding(uint16_t Encoding);
519 const ExactFPImm *lookupExactFPImmByEnum(uint16_t Encoding);
520
521 // NOTE: result must be 128 bytes to contain the result
522 void AArch64SysReg_genericRegisterString(uint32_t Bits, char *result);
523
524 #include "AArch64GenSystemOperands_enum.inc"
525
526 #endif
527