1 /*
2 * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #ifndef CPU_AARCH64_ASSEMBLER_AARCH64_HPP
27 #define CPU_AARCH64_ASSEMBLER_AARCH64_HPP
28
29 #include "asm/register.hpp"
30
31 #ifdef __GNUC__
32
33 // __nop needs volatile so that compiler doesn't optimize it away
34 #define NOP() asm volatile ("nop");
35
36 #elif defined(_MSC_VER)
37
38 // Use MSVC instrinsic: https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019#I
39 #define NOP() __nop();
40
41 #endif
42
43
44 // definitions of various symbolic names for machine registers
45
46 // First intercalls between C and Java which use 8 general registers
47 // and 8 floating registers
48
49 // we also have to copy between x86 and ARM registers but that's a
50 // secondary complication -- not all code employing C call convention
51 // executes as x86 code though -- we generate some of it
52
53 class Argument {
54 public:
55 enum {
56 n_int_register_parameters_c = 8, // r0, r1, ... r7 (c_rarg0, c_rarg1, ...)
57 n_float_register_parameters_c = 8, // v0, v1, ... v7 (c_farg0, c_farg1, ... )
58
59 n_int_register_parameters_j = 8, // r1, ... r7, r0 (rj_rarg0, j_rarg1, ...
60 n_float_register_parameters_j = 8 // v0, v1, ... v7 (j_farg0, j_farg1, ...
61 };
62 };
63
64 REGISTER_DECLARATION(Register, c_rarg0, r0);
65 REGISTER_DECLARATION(Register, c_rarg1, r1);
66 REGISTER_DECLARATION(Register, c_rarg2, r2);
67 REGISTER_DECLARATION(Register, c_rarg3, r3);
68 REGISTER_DECLARATION(Register, c_rarg4, r4);
69 REGISTER_DECLARATION(Register, c_rarg5, r5);
70 REGISTER_DECLARATION(Register, c_rarg6, r6);
71 REGISTER_DECLARATION(Register, c_rarg7, r7);
72
73 REGISTER_DECLARATION(FloatRegister, c_farg0, v0);
74 REGISTER_DECLARATION(FloatRegister, c_farg1, v1);
75 REGISTER_DECLARATION(FloatRegister, c_farg2, v2);
76 REGISTER_DECLARATION(FloatRegister, c_farg3, v3);
77 REGISTER_DECLARATION(FloatRegister, c_farg4, v4);
78 REGISTER_DECLARATION(FloatRegister, c_farg5, v5);
79 REGISTER_DECLARATION(FloatRegister, c_farg6, v6);
80 REGISTER_DECLARATION(FloatRegister, c_farg7, v7);
81
82 // Symbolically name the register arguments used by the Java calling convention.
83 // We have control over the convention for java so we can do what we please.
84 // What pleases us is to offset the java calling convention so that when
85 // we call a suitable jni method the arguments are lined up and we don't
86 // have to do much shuffling. A suitable jni method is non-static and a
87 // small number of arguments
88 //
89 // |--------------------------------------------------------------------|
90 // | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 |
91 // |--------------------------------------------------------------------|
92 // | r0 r1 r2 r3 r4 r5 r6 r7 |
93 // |--------------------------------------------------------------------|
94 // | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 |
95 // |--------------------------------------------------------------------|
96
97
98 REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
99 REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
100 REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
101 REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
102 REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
103 REGISTER_DECLARATION(Register, j_rarg5, c_rarg6);
104 REGISTER_DECLARATION(Register, j_rarg6, c_rarg7);
105 REGISTER_DECLARATION(Register, j_rarg7, c_rarg0);
106
107 // Java floating args are passed as per C
108
109 REGISTER_DECLARATION(FloatRegister, j_farg0, v0);
110 REGISTER_DECLARATION(FloatRegister, j_farg1, v1);
111 REGISTER_DECLARATION(FloatRegister, j_farg2, v2);
112 REGISTER_DECLARATION(FloatRegister, j_farg3, v3);
113 REGISTER_DECLARATION(FloatRegister, j_farg4, v4);
114 REGISTER_DECLARATION(FloatRegister, j_farg5, v5);
115 REGISTER_DECLARATION(FloatRegister, j_farg6, v6);
116 REGISTER_DECLARATION(FloatRegister, j_farg7, v7);
117
118 // registers used to hold VM data either temporarily within a method
119 // or across method calls
120
121 // volatile (caller-save) registers
122
123 // r8 is used for indirect result location return
124 // we use it and r9 as scratch registers
125 REGISTER_DECLARATION(Register, rscratch1, r8);
126 REGISTER_DECLARATION(Register, rscratch2, r9);
127
128 // current method -- must be in a call-clobbered register
129 REGISTER_DECLARATION(Register, rmethod, r12);
130
131 // non-volatile (callee-save) registers are r16-29
132 // of which the following are dedicated global state
133
134 // link register
135 REGISTER_DECLARATION(Register, lr, r30);
136 // frame pointer
137 REGISTER_DECLARATION(Register, rfp, r29);
138 // current thread
139 REGISTER_DECLARATION(Register, rthread, r28);
140 // base of heap
141 REGISTER_DECLARATION(Register, rheapbase, r27);
142 // constant pool cache
143 REGISTER_DECLARATION(Register, rcpool, r26);
144 // monitors allocated on stack
145 REGISTER_DECLARATION(Register, rmonitors, r25);
146 // locals on stack
147 REGISTER_DECLARATION(Register, rlocals, r24);
148 // bytecode pointer
149 REGISTER_DECLARATION(Register, rbcp, r22);
150 // Dispatch table base
151 REGISTER_DECLARATION(Register, rdispatch, r21);
152 // Java stack pointer
153 REGISTER_DECLARATION(Register, esp, r20);
154
155 // Preserved predicate register with all elements set TRUE.
156 REGISTER_DECLARATION(PRegister, ptrue, p7);
157
158 #define assert_cond(ARG1) assert(ARG1, #ARG1)
159
160 namespace asm_util {
161 uint32_t encode_logical_immediate(bool is32, uint64_t imm);
162 };
163
164 using namespace asm_util;
165
166
167 class Assembler;
168
169 class Instruction_aarch64 {
170 unsigned insn;
171 #ifdef ASSERT
172 unsigned bits;
173 #endif
174 Assembler *assem;
175
176 public:
177
Instruction_aarch64(class Assembler * as)178 Instruction_aarch64(class Assembler *as) {
179 #ifdef ASSERT
180 bits = 0;
181 #endif
182 insn = 0;
183 assem = as;
184 }
185
186 inline ~Instruction_aarch64();
187
get_insn()188 unsigned &get_insn() { return insn; }
189 #ifdef ASSERT
get_bits()190 unsigned &get_bits() { return bits; }
191 #endif
192
extend(unsigned val,int hi=31,int lo=0)193 static inline int32_t extend(unsigned val, int hi = 31, int lo = 0) {
194 union {
195 unsigned u;
196 int n;
197 };
198
199 u = val << (31 - hi);
200 n = n >> (31 - hi + lo);
201 return n;
202 }
203
extract(uint32_t val,int msb,int lsb)204 static inline uint32_t extract(uint32_t val, int msb, int lsb) {
205 int nbits = msb - lsb + 1;
206 assert_cond(msb >= lsb);
207 uint32_t mask = checked_cast<uint32_t>(right_n_bits(nbits));
208 uint32_t result = val >> lsb;
209 result &= mask;
210 return result;
211 }
212
sextract(uint32_t val,int msb,int lsb)213 static inline int32_t sextract(uint32_t val, int msb, int lsb) {
214 uint32_t uval = extract(val, msb, lsb);
215 return extend(uval, msb - lsb);
216 }
217
patch(address a,int msb,int lsb,uint64_t val)218 static void patch(address a, int msb, int lsb, uint64_t val) {
219 int nbits = msb - lsb + 1;
220 guarantee(val < (1ULL << nbits), "Field too big for insn");
221 assert_cond(msb >= lsb);
222 unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
223 val <<= lsb;
224 mask <<= lsb;
225 unsigned target = *(unsigned *)a;
226 target &= ~mask;
227 target |= val;
228 *(unsigned *)a = target;
229 }
230
spatch(address a,int msb,int lsb,int64_t val)231 static void spatch(address a, int msb, int lsb, int64_t val) {
232 int nbits = msb - lsb + 1;
233 int64_t chk = val >> (nbits - 1);
234 guarantee (chk == -1 || chk == 0, "Field too big for insn");
235 unsigned uval = val;
236 unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
237 uval &= mask;
238 uval <<= lsb;
239 mask <<= lsb;
240 unsigned target = *(unsigned *)a;
241 target &= ~mask;
242 target |= uval;
243 *(unsigned *)a = target;
244 }
245
f(unsigned val,int msb,int lsb)246 void f(unsigned val, int msb, int lsb) {
247 int nbits = msb - lsb + 1;
248 guarantee(val < (1ULL << nbits), "Field too big for insn");
249 assert_cond(msb >= lsb);
250 unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
251 val <<= lsb;
252 mask <<= lsb;
253 insn |= val;
254 assert_cond((bits & mask) == 0);
255 #ifdef ASSERT
256 bits |= mask;
257 #endif
258 }
259
f(unsigned val,int bit)260 void f(unsigned val, int bit) {
261 f(val, bit, bit);
262 }
263
sf(int64_t val,int msb,int lsb)264 void sf(int64_t val, int msb, int lsb) {
265 int nbits = msb - lsb + 1;
266 int64_t chk = val >> (nbits - 1);
267 guarantee (chk == -1 || chk == 0, "Field too big for insn");
268 unsigned uval = val;
269 unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
270 uval &= mask;
271 f(uval, lsb + nbits - 1, lsb);
272 }
273
rf(Register r,int lsb)274 void rf(Register r, int lsb) {
275 f(r->encoding_nocheck(), lsb + 4, lsb);
276 }
277
278 // reg|ZR
zrf(Register r,int lsb)279 void zrf(Register r, int lsb) {
280 f(r->encoding_nocheck() - (r == zr), lsb + 4, lsb);
281 }
282
283 // reg|SP
srf(Register r,int lsb)284 void srf(Register r, int lsb) {
285 f(r == sp ? 31 : r->encoding_nocheck(), lsb + 4, lsb);
286 }
287
rf(FloatRegister r,int lsb)288 void rf(FloatRegister r, int lsb) {
289 f(r->encoding_nocheck(), lsb + 4, lsb);
290 }
291
prf(PRegister r,int lsb)292 void prf(PRegister r, int lsb) {
293 f(r->encoding_nocheck(), lsb + 3, lsb);
294 }
295
pgrf(PRegister r,int lsb)296 void pgrf(PRegister r, int lsb) {
297 f(r->encoding_nocheck(), lsb + 2, lsb);
298 }
299
get(int msb=31,int lsb=0)300 unsigned get(int msb = 31, int lsb = 0) {
301 int nbits = msb - lsb + 1;
302 unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)) << lsb;
303 assert_cond((bits & mask) == mask);
304 return (insn & mask) >> lsb;
305 }
306
fixed(unsigned value,unsigned mask)307 void fixed(unsigned value, unsigned mask) {
308 assert_cond ((mask & bits) == 0);
309 #ifdef ASSERT
310 bits |= mask;
311 #endif
312 insn |= value;
313 }
314 };
315
316 #define starti Instruction_aarch64 do_not_use(this); set_current(&do_not_use)
317
318 class PrePost {
319 int _offset;
320 Register _r;
321 public:
PrePost(Register reg,int o)322 PrePost(Register reg, int o) : _offset(o), _r(reg) { }
offset()323 int offset() { return _offset; }
reg()324 Register reg() { return _r; }
325 };
326
327 class Pre : public PrePost {
328 public:
Pre(Register reg,int o)329 Pre(Register reg, int o) : PrePost(reg, o) { }
330 };
331 class Post : public PrePost {
332 Register _idx;
333 bool _is_postreg;
334 public:
Post(Register reg,int o)335 Post(Register reg, int o) : PrePost(reg, o) { _idx = NULL; _is_postreg = false; }
Post(Register reg,Register idx)336 Post(Register reg, Register idx) : PrePost(reg, 0) { _idx = idx; _is_postreg = true; }
idx_reg()337 Register idx_reg() { return _idx; }
is_postreg()338 bool is_postreg() {return _is_postreg; }
339 };
340
341 namespace ext
342 {
343 enum operation { uxtb, uxth, uxtw, uxtx, sxtb, sxth, sxtw, sxtx };
344 };
345
346 // Addressing modes
347 class Address {
348 public:
349
350 enum mode { no_mode, base_plus_offset, pre, post, post_reg, pcrel,
351 base_plus_offset_reg, literal };
352
353 // Shift and extend for base reg + reg offset addressing
354 class extend {
355 int _option, _shift;
356 ext::operation _op;
357 public:
extend()358 extend() { }
extend(int s,int o,ext::operation op)359 extend(int s, int o, ext::operation op) : _option(o), _shift(s), _op(op) { }
option() const360 int option() const{ return _option; }
shift() const361 int shift() const { return _shift; }
op() const362 ext::operation op() const { return _op; }
363 };
364 class uxtw : public extend {
365 public:
uxtw(int shift=-1)366 uxtw(int shift = -1): extend(shift, 0b010, ext::uxtw) { }
367 };
368 class lsl : public extend {
369 public:
lsl(int shift=-1)370 lsl(int shift = -1): extend(shift, 0b011, ext::uxtx) { }
371 };
372 class sxtw : public extend {
373 public:
sxtw(int shift=-1)374 sxtw(int shift = -1): extend(shift, 0b110, ext::sxtw) { }
375 };
376 class sxtx : public extend {
377 public:
sxtx(int shift=-1)378 sxtx(int shift = -1): extend(shift, 0b111, ext::sxtx) { }
379 };
380
381 private:
382 Register _base;
383 Register _index;
384 int64_t _offset;
385 enum mode _mode;
386 extend _ext;
387
388 RelocationHolder _rspec;
389
390 // Typically we use AddressLiterals we want to use their rval
391 // However in some situations we want the lval (effect address) of
392 // the item. We provide a special factory for making those lvals.
393 bool _is_lval;
394
395 // If the target is far we'll need to load the ea of this to a
396 // register to reach it. Otherwise if near we can do PC-relative
397 // addressing.
398 address _target;
399
400 public:
Address()401 Address()
402 : _mode(no_mode) { }
Address(Register r)403 Address(Register r)
404 : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(0) { }
Address(Register r,int o)405 Address(Register r, int o)
406 : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
Address(Register r,long o)407 Address(Register r, long o)
408 : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
Address(Register r,long long o)409 Address(Register r, long long o)
410 : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
Address(Register r,unsigned int o)411 Address(Register r, unsigned int o)
412 : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
Address(Register r,unsigned long o)413 Address(Register r, unsigned long o)
414 : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
Address(Register r,unsigned long long o)415 Address(Register r, unsigned long long o)
416 : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
Address(Register r,ByteSize disp)417 Address(Register r, ByteSize disp)
418 : Address(r, in_bytes(disp)) { }
Address(Register r,Register r1,extend ext=lsl ())419 Address(Register r, Register r1, extend ext = lsl())
420 : _base(r), _index(r1), _offset(0), _mode(base_plus_offset_reg),
421 _ext(ext), _target(0) { }
Address(Pre p)422 Address(Pre p)
423 : _base(p.reg()), _offset(p.offset()), _mode(pre) { }
Address(Post p)424 Address(Post p)
425 : _base(p.reg()), _index(p.idx_reg()), _offset(p.offset()),
426 _mode(p.is_postreg() ? post_reg : post), _target(0) { }
Address(address target,RelocationHolder const & rspec)427 Address(address target, RelocationHolder const& rspec)
428 : _mode(literal),
429 _rspec(rspec),
430 _is_lval(false),
431 _target(target) { }
432 Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type);
Address(Register base,RegisterOrConstant index,extend ext=lsl ())433 Address(Register base, RegisterOrConstant index, extend ext = lsl())
434 : _base (base),
435 _offset(0), _ext(ext), _target(0) {
436 if (index.is_register()) {
437 _mode = base_plus_offset_reg;
438 _index = index.as_register();
439 } else {
440 guarantee(ext.option() == ext::uxtx, "should be");
441 assert(index.is_constant(), "should be");
442 _mode = base_plus_offset;
443 _offset = index.as_constant() << ext.shift();
444 }
445 }
446
base() const447 Register base() const {
448 guarantee((_mode == base_plus_offset || _mode == base_plus_offset_reg
449 || _mode == post || _mode == post_reg),
450 "wrong mode");
451 return _base;
452 }
offset() const453 int64_t offset() const {
454 return _offset;
455 }
index() const456 Register index() const {
457 return _index;
458 }
getMode() const459 mode getMode() const {
460 return _mode;
461 }
uses(Register reg) const462 bool uses(Register reg) const { return _base == reg || _index == reg; }
target() const463 address target() const { return _target; }
rspec() const464 const RelocationHolder& rspec() const { return _rspec; }
465
encode(Instruction_aarch64 * i) const466 void encode(Instruction_aarch64 *i) const {
467 i->f(0b111, 29, 27);
468 i->srf(_base, 5);
469
470 switch(_mode) {
471 case base_plus_offset:
472 {
473 unsigned size = i->get(31, 30);
474 if (i->get(26, 26) && i->get(23, 23)) {
475 // SIMD Q Type - Size = 128 bits
476 assert(size == 0, "bad size");
477 size = 0b100;
478 }
479 unsigned mask = (1 << size) - 1;
480 if (_offset < 0 || _offset & mask)
481 {
482 i->f(0b00, 25, 24);
483 i->f(0, 21), i->f(0b00, 11, 10);
484 i->sf(_offset, 20, 12);
485 } else {
486 i->f(0b01, 25, 24);
487 i->f(_offset >> size, 21, 10);
488 }
489 }
490 break;
491
492 case base_plus_offset_reg:
493 {
494 i->f(0b00, 25, 24);
495 i->f(1, 21);
496 i->rf(_index, 16);
497 i->f(_ext.option(), 15, 13);
498 unsigned size = i->get(31, 30);
499 if (i->get(26, 26) && i->get(23, 23)) {
500 // SIMD Q Type - Size = 128 bits
501 assert(size == 0, "bad size");
502 size = 0b100;
503 }
504 if (size == 0) // It's a byte
505 i->f(_ext.shift() >= 0, 12);
506 else {
507 assert(_ext.shift() <= 0 || _ext.shift() == (int)size, "bad shift");
508 i->f(_ext.shift() > 0, 12);
509 }
510 i->f(0b10, 11, 10);
511 }
512 break;
513
514 case pre:
515 i->f(0b00, 25, 24);
516 i->f(0, 21), i->f(0b11, 11, 10);
517 i->sf(_offset, 20, 12);
518 break;
519
520 case post:
521 i->f(0b00, 25, 24);
522 i->f(0, 21), i->f(0b01, 11, 10);
523 i->sf(_offset, 20, 12);
524 break;
525
526 default:
527 ShouldNotReachHere();
528 }
529 }
530
encode_pair(Instruction_aarch64 * i) const531 void encode_pair(Instruction_aarch64 *i) const {
532 switch(_mode) {
533 case base_plus_offset:
534 i->f(0b010, 25, 23);
535 break;
536 case pre:
537 i->f(0b011, 25, 23);
538 break;
539 case post:
540 i->f(0b001, 25, 23);
541 break;
542 default:
543 ShouldNotReachHere();
544 }
545
546 unsigned size; // Operand shift in 32-bit words
547
548 if (i->get(26, 26)) { // float
549 switch(i->get(31, 30)) {
550 case 0b10:
551 size = 2; break;
552 case 0b01:
553 size = 1; break;
554 case 0b00:
555 size = 0; break;
556 default:
557 ShouldNotReachHere();
558 size = 0; // unreachable
559 }
560 } else {
561 size = i->get(31, 31);
562 }
563
564 size = 4 << size;
565 guarantee(_offset % size == 0, "bad offset");
566 i->sf(_offset / size, 21, 15);
567 i->srf(_base, 5);
568 }
569
encode_nontemporal_pair(Instruction_aarch64 * i) const570 void encode_nontemporal_pair(Instruction_aarch64 *i) const {
571 // Only base + offset is allowed
572 i->f(0b000, 25, 23);
573 unsigned size = i->get(31, 31);
574 size = 4 << size;
575 guarantee(_offset % size == 0, "bad offset");
576 i->sf(_offset / size, 21, 15);
577 i->srf(_base, 5);
578 guarantee(_mode == Address::base_plus_offset,
579 "Bad addressing mode for non-temporal op");
580 }
581
582 void lea(MacroAssembler *, Register) const;
583
584 static bool offset_ok_for_immed(int64_t offset, uint shift);
585
offset_ok_for_sve_immed(long offset,int shift,int vl)586 static bool offset_ok_for_sve_immed(long offset, int shift, int vl /* sve vector length */) {
587 if (offset % vl == 0) {
588 // Convert address offset into sve imm offset (MUL VL).
589 int sve_offset = offset / vl;
590 if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) {
591 // sve_offset can be encoded
592 return true;
593 }
594 }
595 return false;
596 }
597 };
598
599 // Convience classes
600 class RuntimeAddress: public Address {
601
602 public:
603
RuntimeAddress(address target)604 RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {}
605
606 };
607
608 class OopAddress: public Address {
609
610 public:
611
OopAddress(address target)612 OopAddress(address target) : Address(target, relocInfo::oop_type){}
613
614 };
615
616 class ExternalAddress: public Address {
617 private:
reloc_for_target(address target)618 static relocInfo::relocType reloc_for_target(address target) {
619 // Sometimes ExternalAddress is used for values which aren't
620 // exactly addresses, like the card table base.
621 // external_word_type can't be used for values in the first page
622 // so just skip the reloc in that case.
623 return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
624 }
625
626 public:
627
ExternalAddress(address target)628 ExternalAddress(address target) : Address(target, reloc_for_target(target)) {}
629
630 };
631
632 class InternalAddress: public Address {
633
634 public:
635
InternalAddress(address target)636 InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {}
637 };
638
639 const int FPUStateSizeInWords = FloatRegisterImpl::number_of_registers *
640 FloatRegisterImpl::save_slots_per_register;
641
642 typedef enum {
643 PLDL1KEEP = 0b00000, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM,
644 PSTL1KEEP = 0b10000, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM,
645 PLIL1KEEP = 0b01000, PLIL1STRM, PLIL2KEEP, PLIL2STRM, PLIL3KEEP, PLIL3STRM
646 } prfop;
647
648 class Assembler : public AbstractAssembler {
649
650 #ifndef PRODUCT
651 static const uintptr_t asm_bp;
652
emit_long(jint x)653 void emit_long(jint x) {
654 if ((uintptr_t)pc() == asm_bp)
655 NOP();
656 AbstractAssembler::emit_int32(x);
657 }
658 #else
659 void emit_long(jint x) {
660 AbstractAssembler::emit_int32(x);
661 }
662 #endif
663
664 public:
665
666 enum { instruction_size = 4 };
667
668 //---< calculate length of instruction >---
669 // We just use the values set above.
670 // instruction must start at passed address
instr_len(unsigned char * instr)671 static unsigned int instr_len(unsigned char *instr) { return instruction_size; }
672
673 //---< longest instructions >---
instr_maxlen()674 static unsigned int instr_maxlen() { return instruction_size; }
675
adjust(Register base,int offset,bool preIncrement)676 Address adjust(Register base, int offset, bool preIncrement) {
677 if (preIncrement)
678 return Address(Pre(base, offset));
679 else
680 return Address(Post(base, offset));
681 }
682
pre(Register base,int offset)683 Address pre(Register base, int offset) {
684 return adjust(base, offset, true);
685 }
686
post(Register base,int offset)687 Address post(Register base, int offset) {
688 return adjust(base, offset, false);
689 }
690
post(Register base,Register idx)691 Address post(Register base, Register idx) {
692 return Address(Post(base, idx));
693 }
694
695 static address locate_next_instruction(address inst);
696
697 Instruction_aarch64* current;
698
set_current(Instruction_aarch64 * i)699 void set_current(Instruction_aarch64* i) { current = i; }
700
f(unsigned val,int msb,int lsb)701 void f(unsigned val, int msb, int lsb) {
702 current->f(val, msb, lsb);
703 }
f(unsigned val,int msb)704 void f(unsigned val, int msb) {
705 current->f(val, msb, msb);
706 }
sf(int64_t val,int msb,int lsb)707 void sf(int64_t val, int msb, int lsb) {
708 current->sf(val, msb, lsb);
709 }
rf(Register reg,int lsb)710 void rf(Register reg, int lsb) {
711 current->rf(reg, lsb);
712 }
srf(Register reg,int lsb)713 void srf(Register reg, int lsb) {
714 current->srf(reg, lsb);
715 }
zrf(Register reg,int lsb)716 void zrf(Register reg, int lsb) {
717 current->zrf(reg, lsb);
718 }
rf(FloatRegister reg,int lsb)719 void rf(FloatRegister reg, int lsb) {
720 current->rf(reg, lsb);
721 }
prf(PRegister reg,int lsb)722 void prf(PRegister reg, int lsb) {
723 current->prf(reg, lsb);
724 }
pgrf(PRegister reg,int lsb)725 void pgrf(PRegister reg, int lsb) {
726 current->pgrf(reg, lsb);
727 }
fixed(unsigned value,unsigned mask)728 void fixed(unsigned value, unsigned mask) {
729 current->fixed(value, mask);
730 }
731
emit()732 void emit() {
733 emit_long(current->get_insn());
734 assert_cond(current->get_bits() == 0xffffffff);
735 current = NULL;
736 }
737
738 typedef void (Assembler::* uncond_branch_insn)(address dest);
739 typedef void (Assembler::* compare_and_branch_insn)(Register Rt, address dest);
740 typedef void (Assembler::* test_and_branch_insn)(Register Rt, int bitpos, address dest);
741 typedef void (Assembler::* prefetch_insn)(address target, prfop);
742
743 void wrap_label(Label &L, uncond_branch_insn insn);
744 void wrap_label(Register r, Label &L, compare_and_branch_insn insn);
745 void wrap_label(Register r, int bitpos, Label &L, test_and_branch_insn insn);
746 void wrap_label(Label &L, prfop, prefetch_insn insn);
747
748 // PC-rel. addressing
749
750 void adr(Register Rd, address dest);
751 void _adrp(Register Rd, address dest);
752
753 void adr(Register Rd, const Address &dest);
754 void _adrp(Register Rd, const Address &dest);
755
adr(Register Rd,Label & L)756 void adr(Register Rd, Label &L) {
757 wrap_label(Rd, L, &Assembler::Assembler::adr);
758 }
_adrp(Register Rd,Label & L)759 void _adrp(Register Rd, Label &L) {
760 wrap_label(Rd, L, &Assembler::_adrp);
761 }
762
763 void adrp(Register Rd, const Address &dest, uint64_t &offset);
764
765 #undef INSN
766
767 void add_sub_immediate(Register Rd, Register Rn, unsigned uimm, int op,
768 int negated_op);
769
770 // Add/subtract (immediate)
771 #define INSN(NAME, decode, negated) \
772 void NAME(Register Rd, Register Rn, unsigned imm, unsigned shift) { \
773 starti; \
774 f(decode, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); \
775 zrf(Rd, 0), srf(Rn, 5); \
776 } \
777 \
778 void NAME(Register Rd, Register Rn, unsigned imm) { \
779 starti; \
780 add_sub_immediate(Rd, Rn, imm, decode, negated); \
781 }
782
783 INSN(addsw, 0b001, 0b011);
784 INSN(subsw, 0b011, 0b001);
785 INSN(adds, 0b101, 0b111);
786 INSN(subs, 0b111, 0b101);
787
788 #undef INSN
789
790 #define INSN(NAME, decode, negated) \
791 void NAME(Register Rd, Register Rn, unsigned imm) { \
792 starti; \
793 add_sub_immediate(Rd, Rn, imm, decode, negated); \
794 }
795
796 INSN(addw, 0b000, 0b010);
797 INSN(subw, 0b010, 0b000);
798 INSN(add, 0b100, 0b110);
799 INSN(sub, 0b110, 0b100);
800
801 #undef INSN
802
803 // Logical (immediate)
804 #define INSN(NAME, decode, is32) \
805 void NAME(Register Rd, Register Rn, uint64_t imm) { \
806 starti; \
807 uint32_t val = encode_logical_immediate(is32, imm); \
808 f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10); \
809 srf(Rd, 0), zrf(Rn, 5); \
810 }
811
812 INSN(andw, 0b000, true);
813 INSN(orrw, 0b001, true);
814 INSN(eorw, 0b010, true);
815 INSN(andr, 0b100, false);
816 INSN(orr, 0b101, false);
817 INSN(eor, 0b110, false);
818
819 #undef INSN
820
821 #define INSN(NAME, decode, is32) \
822 void NAME(Register Rd, Register Rn, uint64_t imm) { \
823 starti; \
824 uint32_t val = encode_logical_immediate(is32, imm); \
825 f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10); \
826 zrf(Rd, 0), zrf(Rn, 5); \
827 }
828
829 INSN(ands, 0b111, false);
830 INSN(andsw, 0b011, true);
831
832 #undef INSN
833
834 // Move wide (immediate)
835 #define INSN(NAME, opcode) \
836 void NAME(Register Rd, unsigned imm, unsigned shift = 0) { \
837 assert_cond((shift/16)*16 == shift); \
838 starti; \
839 f(opcode, 31, 29), f(0b100101, 28, 23), f(shift/16, 22, 21), \
840 f(imm, 20, 5); \
841 rf(Rd, 0); \
842 }
843
844 INSN(movnw, 0b000);
845 INSN(movzw, 0b010);
846 INSN(movkw, 0b011);
847 INSN(movn, 0b100);
848 INSN(movz, 0b110);
849 INSN(movk, 0b111);
850
851 #undef INSN
852
853 // Bitfield
854 #define INSN(NAME, opcode, size) \
855 void NAME(Register Rd, Register Rn, unsigned immr, unsigned imms) { \
856 starti; \
857 guarantee(size == 1 || (immr < 32 && imms < 32), "incorrect immr/imms");\
858 f(opcode, 31, 22), f(immr, 21, 16), f(imms, 15, 10); \
859 zrf(Rn, 5), rf(Rd, 0); \
860 }
861
862 INSN(sbfmw, 0b0001001100, 0);
863 INSN(bfmw, 0b0011001100, 0);
864 INSN(ubfmw, 0b0101001100, 0);
865 INSN(sbfm, 0b1001001101, 1);
866 INSN(bfm, 0b1011001101, 1);
867 INSN(ubfm, 0b1101001101, 1);
868
869 #undef INSN
870
871 // Extract
872 #define INSN(NAME, opcode, size) \
873 void NAME(Register Rd, Register Rn, Register Rm, unsigned imms) { \
874 starti; \
875 guarantee(size == 1 || imms < 32, "incorrect imms"); \
876 f(opcode, 31, 21), f(imms, 15, 10); \
877 zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); \
878 }
879
880 INSN(extrw, 0b00010011100, 0);
881 INSN(extr, 0b10010011110, 1);
882
883 #undef INSN
884
885 // The maximum range of a branch is fixed for the AArch64
886 // architecture. In debug mode we shrink it in order to test
887 // trampolines, but not so small that branches in the interpreter
888 // are out of range.
889 static const uint64_t branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
890
reachable_from_branch_at(address branch,address target)891 static bool reachable_from_branch_at(address branch, address target) {
892 return uabs(target - branch) < branch_range;
893 }
894
895 // Unconditional branch (immediate)
896 #define INSN(NAME, opcode) \
897 void NAME(address dest) { \
898 starti; \
899 int64_t offset = (dest - pc()) >> 2; \
900 DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \
901 f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0); \
902 } \
903 void NAME(Label &L) { \
904 wrap_label(L, &Assembler::NAME); \
905 } \
906 void NAME(const Address &dest);
907
908 INSN(b, 0);
909 INSN(bl, 1);
910
911 #undef INSN
912
913 // Compare & branch (immediate)
914 #define INSN(NAME, opcode) \
915 void NAME(Register Rt, address dest) { \
916 int64_t offset = (dest - pc()) >> 2; \
917 starti; \
918 f(opcode, 31, 24), sf(offset, 23, 5), rf(Rt, 0); \
919 } \
920 void NAME(Register Rt, Label &L) { \
921 wrap_label(Rt, L, &Assembler::NAME); \
922 }
923
924 INSN(cbzw, 0b00110100);
925 INSN(cbnzw, 0b00110101);
926 INSN(cbz, 0b10110100);
927 INSN(cbnz, 0b10110101);
928
929 #undef INSN
930
931 // Test & branch (immediate)
932 #define INSN(NAME, opcode) \
933 void NAME(Register Rt, int bitpos, address dest) { \
934 int64_t offset = (dest - pc()) >> 2; \
935 int b5 = bitpos >> 5; \
936 bitpos &= 0x1f; \
937 starti; \
938 f(b5, 31), f(opcode, 30, 24), f(bitpos, 23, 19), sf(offset, 18, 5); \
939 rf(Rt, 0); \
940 } \
941 void NAME(Register Rt, int bitpos, Label &L) { \
942 wrap_label(Rt, bitpos, L, &Assembler::NAME); \
943 }
944
945 INSN(tbz, 0b0110110);
946 INSN(tbnz, 0b0110111);
947
948 #undef INSN
949
950 // Conditional branch (immediate)
951 enum Condition
952 {EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV};
953
br(Condition cond,address dest)954 void br(Condition cond, address dest) {
955 int64_t offset = (dest - pc()) >> 2;
956 starti;
957 f(0b0101010, 31, 25), f(0, 24), sf(offset, 23, 5), f(0, 4), f(cond, 3, 0);
958 }
959
960 #define INSN(NAME, cond) \
961 void NAME(address dest) { \
962 br(cond, dest); \
963 }
964
965 INSN(beq, EQ);
966 INSN(bne, NE);
967 INSN(bhs, HS);
968 INSN(bcs, CS);
969 INSN(blo, LO);
970 INSN(bcc, CC);
971 INSN(bmi, MI);
972 INSN(bpl, PL);
973 INSN(bvs, VS);
974 INSN(bvc, VC);
975 INSN(bhi, HI);
976 INSN(bls, LS);
977 INSN(bge, GE);
978 INSN(blt, LT);
979 INSN(bgt, GT);
980 INSN(ble, LE);
981 INSN(bal, AL);
982 INSN(bnv, NV);
983
984 void br(Condition cc, Label &L);
985
986 #undef INSN
987
988 // Exception generation
generate_exception(int opc,int op2,int LL,unsigned imm)989 void generate_exception(int opc, int op2, int LL, unsigned imm) {
990 starti;
991 f(0b11010100, 31, 24);
992 f(opc, 23, 21), f(imm, 20, 5), f(op2, 4, 2), f(LL, 1, 0);
993 }
994
995 #define INSN(NAME, opc, op2, LL) \
996 void NAME(unsigned imm) { \
997 generate_exception(opc, op2, LL, imm); \
998 }
999
1000 INSN(svc, 0b000, 0, 0b01);
1001 INSN(hvc, 0b000, 0, 0b10);
1002 INSN(smc, 0b000, 0, 0b11);
1003 INSN(brk, 0b001, 0, 0b00);
1004 INSN(hlt, 0b010, 0, 0b00);
1005 INSN(dcps1, 0b101, 0, 0b01);
1006 INSN(dcps2, 0b101, 0, 0b10);
1007 INSN(dcps3, 0b101, 0, 0b11);
1008
1009 #undef INSN
1010
1011 // System
system(int op0,int op1,int CRn,int CRm,int op2,Register rt=dummy_reg)1012 void system(int op0, int op1, int CRn, int CRm, int op2,
1013 Register rt = dummy_reg)
1014 {
1015 starti;
1016 f(0b11010101000, 31, 21);
1017 f(op0, 20, 19);
1018 f(op1, 18, 16);
1019 f(CRn, 15, 12);
1020 f(CRm, 11, 8);
1021 f(op2, 7, 5);
1022 rf(rt, 0);
1023 }
1024
hint(int imm)1025 void hint(int imm) {
1026 system(0b00, 0b011, 0b0010, 0b0000, imm);
1027 }
1028
nop()1029 void nop() {
1030 hint(0);
1031 }
1032
yield()1033 void yield() {
1034 hint(1);
1035 }
1036
wfe()1037 void wfe() {
1038 hint(2);
1039 }
1040
wfi()1041 void wfi() {
1042 hint(3);
1043 }
1044
sev()1045 void sev() {
1046 hint(4);
1047 }
1048
sevl()1049 void sevl() {
1050 hint(5);
1051 }
1052
1053 // we only provide mrs and msr for the special purpose system
1054 // registers where op1 (instr[20:19]) == 11 and, (currently) only
1055 // use it for FPSR n.b msr has L (instr[21]) == 0 mrs has L == 1
1056
msr(int op1,int CRn,int CRm,int op2,Register rt)1057 void msr(int op1, int CRn, int CRm, int op2, Register rt) {
1058 starti;
1059 f(0b1101010100011, 31, 19);
1060 f(op1, 18, 16);
1061 f(CRn, 15, 12);
1062 f(CRm, 11, 8);
1063 f(op2, 7, 5);
1064 // writing zr is ok
1065 zrf(rt, 0);
1066 }
1067
mrs(int op1,int CRn,int CRm,int op2,Register rt)1068 void mrs(int op1, int CRn, int CRm, int op2, Register rt) {
1069 starti;
1070 f(0b1101010100111, 31, 19);
1071 f(op1, 18, 16);
1072 f(CRn, 15, 12);
1073 f(CRm, 11, 8);
1074 f(op2, 7, 5);
1075 // reading to zr is a mistake
1076 rf(rt, 0);
1077 }
1078
1079 enum barrier {OSHLD = 0b0001, OSHST, OSH, NSHLD=0b0101, NSHST, NSH,
1080 ISHLD = 0b1001, ISHST, ISH, LD=0b1101, ST, SY};
1081
dsb(barrier imm)1082 void dsb(barrier imm) {
1083 system(0b00, 0b011, 0b00011, imm, 0b100);
1084 }
1085
dmb(barrier imm)1086 void dmb(barrier imm) {
1087 system(0b00, 0b011, 0b00011, imm, 0b101);
1088 }
1089
isb()1090 void isb() {
1091 system(0b00, 0b011, 0b00011, SY, 0b110);
1092 }
1093
sys(int op1,int CRn,int CRm,int op2,Register rt=(Register)0b11111)1094 void sys(int op1, int CRn, int CRm, int op2,
1095 Register rt = (Register)0b11111) {
1096 system(0b01, op1, CRn, CRm, op2, rt);
1097 }
1098
1099 // Only implement operations accessible from EL0 or higher, i.e.,
1100 // op1 CRn CRm op2
1101 // IC IVAU 3 7 5 1
1102 // DC CVAC 3 7 10 1
1103 // DC CVAP 3 7 12 1
1104 // DC CVAU 3 7 11 1
1105 // DC CIVAC 3 7 14 1
1106 // DC ZVA 3 7 4 1
1107 // So only deal with the CRm field.
1108 enum icache_maintenance {IVAU = 0b0101};
1109 enum dcache_maintenance {CVAC = 0b1010, CVAP = 0b1100, CVAU = 0b1011, CIVAC = 0b1110, ZVA = 0b100};
1110
dc(dcache_maintenance cm,Register Rt)1111 void dc(dcache_maintenance cm, Register Rt) {
1112 sys(0b011, 0b0111, cm, 0b001, Rt);
1113 }
1114
ic(icache_maintenance cm,Register Rt)1115 void ic(icache_maintenance cm, Register Rt) {
1116 sys(0b011, 0b0111, cm, 0b001, Rt);
1117 }
1118
1119 // A more convenient access to dmb for our purposes
1120 enum Membar_mask_bits {
1121 // We can use ISH for a barrier because the ARM ARM says "This
1122 // architecture assumes that all Processing Elements that use the
1123 // same operating system or hypervisor are in the same Inner
1124 // Shareable shareability domain."
1125 StoreStore = ISHST,
1126 LoadStore = ISHLD,
1127 LoadLoad = ISHLD,
1128 StoreLoad = ISH,
1129 AnyAny = ISH
1130 };
1131
membar(Membar_mask_bits order_constraint)1132 void membar(Membar_mask_bits order_constraint) {
1133 dmb(Assembler::barrier(order_constraint));
1134 }
1135
1136 // Unconditional branch (register)
branch_reg(Register R,int opc)1137 void branch_reg(Register R, int opc) {
1138 starti;
1139 f(0b1101011, 31, 25);
1140 f(opc, 24, 21);
1141 f(0b11111000000, 20, 10);
1142 rf(R, 5);
1143 f(0b00000, 4, 0);
1144 }
1145
1146 #define INSN(NAME, opc) \
1147 void NAME(Register R) { \
1148 branch_reg(R, opc); \
1149 }
1150
1151 INSN(br, 0b0000);
1152 INSN(blr, 0b0001);
1153 INSN(ret, 0b0010);
1154
1155 void ret(void *p); // This forces a compile-time error for ret(0)
1156
1157 #undef INSN
1158
1159 #define INSN(NAME, opc) \
1160 void NAME() { \
1161 branch_reg(dummy_reg, opc); \
1162 }
1163
1164 INSN(eret, 0b0100);
1165 INSN(drps, 0b0101);
1166
1167 #undef INSN
1168
1169 // Load/store exclusive
1170 enum operand_size { byte, halfword, word, xword };
1171
load_store_exclusive(Register Rs,Register Rt1,Register Rt2,Register Rn,enum operand_size sz,int op,bool ordered)1172 void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
1173 Register Rn, enum operand_size sz, int op, bool ordered) {
1174 starti;
1175 f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
1176 rf(Rs, 16), f(ordered, 15), zrf(Rt2, 10), srf(Rn, 5), zrf(Rt1, 0);
1177 }
1178
load_exclusive(Register dst,Register addr,enum operand_size sz,bool ordered)1179 void load_exclusive(Register dst, Register addr,
1180 enum operand_size sz, bool ordered) {
1181 load_store_exclusive(dummy_reg, dst, dummy_reg, addr,
1182 sz, 0b010, ordered);
1183 }
1184
store_exclusive(Register status,Register new_val,Register addr,enum operand_size sz,bool ordered)1185 void store_exclusive(Register status, Register new_val, Register addr,
1186 enum operand_size sz, bool ordered) {
1187 load_store_exclusive(status, new_val, dummy_reg, addr,
1188 sz, 0b000, ordered);
1189 }
1190
1191 #define INSN4(NAME, sz, op, o0) /* Four registers */ \
1192 void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) { \
1193 guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \
1194 load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0); \
1195 }
1196
1197 #define INSN3(NAME, sz, op, o0) /* Three registers */ \
1198 void NAME(Register Rs, Register Rt, Register Rn) { \
1199 guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \
1200 load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \
1201 }
1202
1203 #define INSN2(NAME, sz, op, o0) /* Two registers */ \
1204 void NAME(Register Rt, Register Rn) { \
1205 load_store_exclusive(dummy_reg, Rt, dummy_reg, \
1206 Rn, sz, op, o0); \
1207 }
1208
1209 #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
1210 void NAME(Register Rt1, Register Rt2, Register Rn) { \
1211 guarantee(Rt1 != Rt2, "unpredictable instruction"); \
1212 load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0); \
1213 }
1214
1215 // bytes
1216 INSN3(stxrb, byte, 0b000, 0);
1217 INSN3(stlxrb, byte, 0b000, 1);
1218 INSN2(ldxrb, byte, 0b010, 0);
1219 INSN2(ldaxrb, byte, 0b010, 1);
1220 INSN2(stlrb, byte, 0b100, 1);
1221 INSN2(ldarb, byte, 0b110, 1);
1222
1223 // halfwords
1224 INSN3(stxrh, halfword, 0b000, 0);
1225 INSN3(stlxrh, halfword, 0b000, 1);
1226 INSN2(ldxrh, halfword, 0b010, 0);
1227 INSN2(ldaxrh, halfword, 0b010, 1);
1228 INSN2(stlrh, halfword, 0b100, 1);
1229 INSN2(ldarh, halfword, 0b110, 1);
1230
1231 // words
1232 INSN3(stxrw, word, 0b000, 0);
1233 INSN3(stlxrw, word, 0b000, 1);
1234 INSN4(stxpw, word, 0b001, 0);
1235 INSN4(stlxpw, word, 0b001, 1);
1236 INSN2(ldxrw, word, 0b010, 0);
1237 INSN2(ldaxrw, word, 0b010, 1);
1238 INSN_FOO(ldxpw, word, 0b011, 0);
1239 INSN_FOO(ldaxpw, word, 0b011, 1);
1240 INSN2(stlrw, word, 0b100, 1);
1241 INSN2(ldarw, word, 0b110, 1);
1242
1243 // xwords
1244 INSN3(stxr, xword, 0b000, 0);
1245 INSN3(stlxr, xword, 0b000, 1);
1246 INSN4(stxp, xword, 0b001, 0);
1247 INSN4(stlxp, xword, 0b001, 1);
1248 INSN2(ldxr, xword, 0b010, 0);
1249 INSN2(ldaxr, xword, 0b010, 1);
1250 INSN_FOO(ldxp, xword, 0b011, 0);
1251 INSN_FOO(ldaxp, xword, 0b011, 1);
1252 INSN2(stlr, xword, 0b100, 1);
1253 INSN2(ldar, xword, 0b110, 1);
1254
1255 #undef INSN2
1256 #undef INSN3
1257 #undef INSN4
1258 #undef INSN_FOO
1259
1260 // 8.1 Compare and swap extensions
lse_cas(Register Rs,Register Rt,Register Rn,enum operand_size sz,bool a,bool r,bool not_pair)1261 void lse_cas(Register Rs, Register Rt, Register Rn,
1262 enum operand_size sz, bool a, bool r, bool not_pair) {
1263 starti;
1264 if (! not_pair) { // Pair
1265 assert(sz == word || sz == xword, "invalid size");
1266 /* The size bit is in bit 30, not 31 */
1267 sz = (operand_size)(sz == word ? 0b00:0b01);
1268 }
1269 f(sz, 31, 30), f(0b001000, 29, 24), f(not_pair ? 1 : 0, 23), f(a, 22), f(1, 21);
1270 zrf(Rs, 16), f(r, 15), f(0b11111, 14, 10), srf(Rn, 5), zrf(Rt, 0);
1271 }
1272
1273 // CAS
1274 #define INSN(NAME, a, r) \
1275 void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \
1276 assert(Rs != Rn && Rs != Rt, "unpredictable instruction"); \
1277 lse_cas(Rs, Rt, Rn, sz, a, r, true); \
1278 }
INSN(cas,false,false)1279 INSN(cas, false, false)
1280 INSN(casa, true, false)
1281 INSN(casl, false, true)
1282 INSN(casal, true, true)
1283 #undef INSN
1284
1285 // CASP
1286 #define INSN(NAME, a, r) \
1287 void NAME(operand_size sz, Register Rs, Register Rs1, \
1288 Register Rt, Register Rt1, Register Rn) { \
1289 assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 && \
1290 Rs->successor() == Rs1 && Rt->successor() == Rt1 && \
1291 Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers"); \
1292 lse_cas(Rs, Rt, Rn, sz, a, r, false); \
1293 }
1294 INSN(casp, false, false)
1295 INSN(caspa, true, false)
1296 INSN(caspl, false, true)
1297 INSN(caspal, true, true)
1298 #undef INSN
1299
1300 // 8.1 Atomic operations
1301 void lse_atomic(Register Rs, Register Rt, Register Rn,
1302 enum operand_size sz, int op1, int op2, bool a, bool r) {
1303 starti;
1304 f(sz, 31, 30), f(0b111000, 29, 24), f(a, 23), f(r, 22), f(1, 21);
1305 zrf(Rs, 16), f(op1, 15), f(op2, 14, 12), f(0, 11, 10), srf(Rn, 5), zrf(Rt, 0);
1306 }
1307
1308 #define INSN(NAME, NAME_A, NAME_L, NAME_AL, op1, op2) \
1309 void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \
1310 lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, false); \
1311 } \
1312 void NAME_A(operand_size sz, Register Rs, Register Rt, Register Rn) { \
1313 lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, false); \
1314 } \
1315 void NAME_L(operand_size sz, Register Rs, Register Rt, Register Rn) { \
1316 lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, true); \
1317 } \
1318 void NAME_AL(operand_size sz, Register Rs, Register Rt, Register Rn) {\
1319 lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, true); \
1320 }
1321 INSN(ldadd, ldadda, ldaddl, ldaddal, 0, 0b000);
1322 INSN(ldbic, ldbica, ldbicl, ldbical, 0, 0b001);
1323 INSN(ldeor, ldeora, ldeorl, ldeoral, 0, 0b010);
1324 INSN(ldorr, ldorra, ldorrl, ldorral, 0, 0b011);
1325 INSN(ldsmax, ldsmaxa, ldsmaxl, ldsmaxal, 0, 0b100);
1326 INSN(ldsmin, ldsmina, ldsminl, ldsminal, 0, 0b101);
1327 INSN(ldumax, ldumaxa, ldumaxl, ldumaxal, 0, 0b110);
1328 INSN(ldumin, ldumina, lduminl, lduminal, 0, 0b111);
1329 INSN(swp, swpa, swpl, swpal, 1, 0b000);
1330 #undef INSN
1331
1332 // Load register (literal)
1333 #define INSN(NAME, opc, V) \
1334 void NAME(Register Rt, address dest) { \
1335 int64_t offset = (dest - pc()) >> 2; \
1336 starti; \
1337 f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \
1338 sf(offset, 23, 5); \
1339 rf(Rt, 0); \
1340 } \
1341 void NAME(Register Rt, address dest, relocInfo::relocType rtype) { \
1342 InstructionMark im(this); \
1343 guarantee(rtype == relocInfo::internal_word_type, \
1344 "only internal_word_type relocs make sense here"); \
1345 code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); \
1346 NAME(Rt, dest); \
1347 } \
1348 void NAME(Register Rt, Label &L) { \
1349 wrap_label(Rt, L, &Assembler::NAME); \
1350 }
1351
1352 INSN(ldrw, 0b00, 0);
1353 INSN(ldr, 0b01, 0);
1354 INSN(ldrsw, 0b10, 0);
1355
1356 #undef INSN
1357
1358 #define INSN(NAME, opc, V) \
1359 void NAME(FloatRegister Rt, address dest) { \
1360 int64_t offset = (dest - pc()) >> 2; \
1361 starti; \
1362 f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \
1363 sf(offset, 23, 5); \
1364 rf((Register)Rt, 0); \
1365 }
1366
1367 INSN(ldrs, 0b00, 1);
1368 INSN(ldrd, 0b01, 1);
1369 INSN(ldrq, 0b10, 1);
1370
1371 #undef INSN
1372
1373 #define INSN(NAME, size, opc) \
1374 void NAME(FloatRegister Rt, Register Rn) { \
1375 starti; \
1376 f(size, 31, 30), f(0b111100, 29, 24), f(opc, 23, 22), f(0, 21); \
1377 f(0, 20, 12), f(0b01, 11, 10); \
1378 rf(Rn, 5), rf((Register)Rt, 0); \
1379 }
1380
1381 INSN(ldrs, 0b10, 0b01);
1382 INSN(ldrd, 0b11, 0b01);
1383 INSN(ldrq, 0b00, 0b11);
1384
1385 #undef INSN
1386
1387
1388 #define INSN(NAME, opc, V) \
1389 void NAME(address dest, prfop op = PLDL1KEEP) { \
1390 int64_t offset = (dest - pc()) >> 2; \
1391 starti; \
1392 f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \
1393 sf(offset, 23, 5); \
1394 f(op, 4, 0); \
1395 } \
1396 void NAME(Label &L, prfop op = PLDL1KEEP) { \
1397 wrap_label(L, op, &Assembler::NAME); \
1398 }
1399
1400 INSN(prfm, 0b11, 0);
1401
1402 #undef INSN
1403
1404 // Load/store
ld_st1(int opc,int p1,int V,int L,Register Rt1,Register Rt2,Address adr,bool no_allocate)1405 void ld_st1(int opc, int p1, int V, int L,
1406 Register Rt1, Register Rt2, Address adr, bool no_allocate) {
1407 starti;
1408 f(opc, 31, 30), f(p1, 29, 27), f(V, 26), f(L, 22);
1409 zrf(Rt2, 10), zrf(Rt1, 0);
1410 if (no_allocate) {
1411 adr.encode_nontemporal_pair(current);
1412 } else {
1413 adr.encode_pair(current);
1414 }
1415 }
1416
1417 // Load/store register pair (offset)
1418 #define INSN(NAME, size, p1, V, L, no_allocate) \
1419 void NAME(Register Rt1, Register Rt2, Address adr) { \
1420 ld_st1(size, p1, V, L, Rt1, Rt2, adr, no_allocate); \
1421 }
1422
1423 INSN(stpw, 0b00, 0b101, 0, 0, false);
1424 INSN(ldpw, 0b00, 0b101, 0, 1, false);
1425 INSN(ldpsw, 0b01, 0b101, 0, 1, false);
1426 INSN(stp, 0b10, 0b101, 0, 0, false);
1427 INSN(ldp, 0b10, 0b101, 0, 1, false);
1428
1429 // Load/store no-allocate pair (offset)
1430 INSN(stnpw, 0b00, 0b101, 0, 0, true);
1431 INSN(ldnpw, 0b00, 0b101, 0, 1, true);
1432 INSN(stnp, 0b10, 0b101, 0, 0, true);
1433 INSN(ldnp, 0b10, 0b101, 0, 1, true);
1434
1435 #undef INSN
1436
1437 #define INSN(NAME, size, p1, V, L, no_allocate) \
1438 void NAME(FloatRegister Rt1, FloatRegister Rt2, Address adr) { \
1439 ld_st1(size, p1, V, L, (Register)Rt1, (Register)Rt2, adr, no_allocate); \
1440 }
1441
1442 INSN(stps, 0b00, 0b101, 1, 0, false);
1443 INSN(ldps, 0b00, 0b101, 1, 1, false);
1444 INSN(stpd, 0b01, 0b101, 1, 0, false);
1445 INSN(ldpd, 0b01, 0b101, 1, 1, false);
1446 INSN(stpq, 0b10, 0b101, 1, 0, false);
1447 INSN(ldpq, 0b10, 0b101, 1, 1, false);
1448
1449 #undef INSN
1450
1451 // Load/store register (all modes)
ld_st2(Register Rt,const Address & adr,int size,int op,int V=0)1452 void ld_st2(Register Rt, const Address &adr, int size, int op, int V = 0) {
1453 starti;
1454
1455 f(V, 26); // general reg?
1456 zrf(Rt, 0);
1457
1458 // Encoding for literal loads is done here (rather than pushed
1459 // down into Address::encode) because the encoding of this
1460 // instruction is too different from all of the other forms to
1461 // make it worth sharing.
1462 if (adr.getMode() == Address::literal) {
1463 assert(size == 0b10 || size == 0b11, "bad operand size in ldr");
1464 assert(op == 0b01, "literal form can only be used with loads");
1465 f(size & 0b01, 31, 30), f(0b011, 29, 27), f(0b00, 25, 24);
1466 int64_t offset = (adr.target() - pc()) >> 2;
1467 sf(offset, 23, 5);
1468 code_section()->relocate(pc(), adr.rspec());
1469 return;
1470 }
1471
1472 f(size, 31, 30);
1473 f(op, 23, 22); // str
1474 adr.encode(current);
1475 }
1476
1477 #define INSN(NAME, size, op) \
1478 void NAME(Register Rt, const Address &adr) { \
1479 ld_st2(Rt, adr, size, op); \
1480 } \
1481
1482 INSN(str, 0b11, 0b00);
1483 INSN(strw, 0b10, 0b00);
1484 INSN(strb, 0b00, 0b00);
1485 INSN(strh, 0b01, 0b00);
1486
1487 INSN(ldr, 0b11, 0b01);
1488 INSN(ldrw, 0b10, 0b01);
1489 INSN(ldrb, 0b00, 0b01);
1490 INSN(ldrh, 0b01, 0b01);
1491
1492 INSN(ldrsb, 0b00, 0b10);
1493 INSN(ldrsbw, 0b00, 0b11);
1494 INSN(ldrsh, 0b01, 0b10);
1495 INSN(ldrshw, 0b01, 0b11);
1496 INSN(ldrsw, 0b10, 0b10);
1497
1498 #undef INSN
1499
1500 #define INSN(NAME, size, op) \
1501 void NAME(const Address &adr, prfop pfop = PLDL1KEEP) { \
1502 ld_st2((Register)pfop, adr, size, op); \
1503 }
1504
1505 INSN(prfm, 0b11, 0b10); // FIXME: PRFM should not be used with
1506 // writeback modes, but the assembler
1507 // doesn't enfore that.
1508
1509 #undef INSN
1510
1511 #define INSN(NAME, size, op) \
1512 void NAME(FloatRegister Rt, const Address &adr) { \
1513 ld_st2((Register)Rt, adr, size, op, 1); \
1514 }
1515
1516 INSN(strd, 0b11, 0b00);
1517 INSN(strs, 0b10, 0b00);
1518 INSN(ldrd, 0b11, 0b01);
1519 INSN(ldrs, 0b10, 0b01);
1520 INSN(strq, 0b00, 0b10);
1521 INSN(ldrq, 0x00, 0b11);
1522
1523 #undef INSN
1524
1525 /* SIMD extensions
1526 *
1527 * We just use FloatRegister in the following. They are exactly the same
1528 * as SIMD registers.
1529 */
1530 public:
1531
1532 enum SIMD_Arrangement {
1533 T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q, INVALID_ARRANGEMENT
1534 };
1535
1536 private:
1537
1538 static SIMD_Arrangement _esize2arrangement_table[9][2];
1539
1540 public:
1541
1542 static SIMD_Arrangement esize2arrangement(int esize, bool isQ);
1543
1544 enum SIMD_RegVariant {
1545 B, H, S, D, Q, INVALID
1546 };
1547
1548 enum shift_kind { LSL, LSR, ASR, ROR };
1549
op_shifted_reg(unsigned decode,enum shift_kind kind,unsigned shift,unsigned size,unsigned op)1550 void op_shifted_reg(unsigned decode,
1551 enum shift_kind kind, unsigned shift,
1552 unsigned size, unsigned op) {
1553 f(size, 31);
1554 f(op, 30, 29);
1555 f(decode, 28, 24);
1556 f(shift, 15, 10);
1557 f(kind, 23, 22);
1558 }
1559
1560 // Logical (shifted register)
1561 #define INSN(NAME, size, op, N) \
1562 void NAME(Register Rd, Register Rn, Register Rm, \
1563 enum shift_kind kind = LSL, unsigned shift = 0) { \
1564 starti; \
1565 guarantee(size == 1 || shift < 32, "incorrect shift"); \
1566 f(N, 21); \
1567 zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); \
1568 op_shifted_reg(0b01010, kind, shift, size, op); \
1569 }
1570
1571 INSN(andr, 1, 0b00, 0);
1572 INSN(orr, 1, 0b01, 0);
1573 INSN(eor, 1, 0b10, 0);
1574 INSN(ands, 1, 0b11, 0);
1575 INSN(andw, 0, 0b00, 0);
1576 INSN(orrw, 0, 0b01, 0);
1577 INSN(eorw, 0, 0b10, 0);
1578 INSN(andsw, 0, 0b11, 0);
1579
1580 #undef INSN
1581
1582 #define INSN(NAME, size, op, N) \
1583 void NAME(Register Rd, Register Rn, Register Rm, \
1584 enum shift_kind kind = LSL, unsigned shift = 0) { \
1585 starti; \
1586 f(N, 21); \
1587 zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); \
1588 op_shifted_reg(0b01010, kind, shift, size, op); \
1589 } \
1590 \
1591 /* These instructions have no immediate form. Provide an overload so \
1592 that if anyone does try to use an immediate operand -- this has \
1593 happened! -- we'll get a compile-time error. */ \
1594 void NAME(Register Rd, Register Rn, unsigned imm, \
1595 enum shift_kind kind = LSL, unsigned shift = 0) { \
1596 assert(false, " can't be used with immediate operand"); \
1597 }
1598
1599 INSN(bic, 1, 0b00, 1);
1600 INSN(orn, 1, 0b01, 1);
1601 INSN(eon, 1, 0b10, 1);
1602 INSN(bics, 1, 0b11, 1);
1603 INSN(bicw, 0, 0b00, 1);
1604 INSN(ornw, 0, 0b01, 1);
1605 INSN(eonw, 0, 0b10, 1);
1606 INSN(bicsw, 0, 0b11, 1);
1607
1608 #undef INSN
1609
1610 #ifdef _WIN64
1611 // In MSVC, `mvn` is defined as a macro and it affects compilation
1612 #undef mvn
1613 #endif
1614
1615 // Aliases for short forms of orn
mvn(Register Rd,Register Rm,enum shift_kind kind=LSL,unsigned shift=0)1616 void mvn(Register Rd, Register Rm,
1617 enum shift_kind kind = LSL, unsigned shift = 0) {
1618 orn(Rd, zr, Rm, kind, shift);
1619 }
1620
mvnw(Register Rd,Register Rm,enum shift_kind kind=LSL,unsigned shift=0)1621 void mvnw(Register Rd, Register Rm,
1622 enum shift_kind kind = LSL, unsigned shift = 0) {
1623 ornw(Rd, zr, Rm, kind, shift);
1624 }
1625
1626 // Add/subtract (shifted register)
1627 #define INSN(NAME, size, op) \
1628 void NAME(Register Rd, Register Rn, Register Rm, \
1629 enum shift_kind kind, unsigned shift = 0) { \
1630 starti; \
1631 f(0, 21); \
1632 assert_cond(kind != ROR); \
1633 guarantee(size == 1 || shift < 32, "incorrect shift");\
1634 zrf(Rd, 0), zrf(Rn, 5), zrf(Rm, 16); \
1635 op_shifted_reg(0b01011, kind, shift, size, op); \
1636 }
1637
1638 INSN(add, 1, 0b000);
1639 INSN(sub, 1, 0b10);
1640 INSN(addw, 0, 0b000);
1641 INSN(subw, 0, 0b10);
1642
1643 INSN(adds, 1, 0b001);
1644 INSN(subs, 1, 0b11);
1645 INSN(addsw, 0, 0b001);
1646 INSN(subsw, 0, 0b11);
1647
1648 #undef INSN
1649
1650 // Add/subtract (extended register)
1651 #define INSN(NAME, op) \
1652 void NAME(Register Rd, Register Rn, Register Rm, \
1653 ext::operation option, int amount = 0) { \
1654 starti; \
1655 zrf(Rm, 16), srf(Rn, 5), srf(Rd, 0); \
1656 add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
1657 }
1658
add_sub_extended_reg(unsigned op,unsigned decode,Register Rd,Register Rn,Register Rm,unsigned opt,ext::operation option,unsigned imm)1659 void add_sub_extended_reg(unsigned op, unsigned decode,
1660 Register Rd, Register Rn, Register Rm,
1661 unsigned opt, ext::operation option, unsigned imm) {
1662 guarantee(imm <= 4, "shift amount must be <= 4");
1663 f(op, 31, 29), f(decode, 28, 24), f(opt, 23, 22), f(1, 21);
1664 f(option, 15, 13), f(imm, 12, 10);
1665 }
1666
1667 INSN(addw, 0b000);
1668 INSN(subw, 0b010);
1669 INSN(add, 0b100);
1670 INSN(sub, 0b110);
1671
1672 #undef INSN
1673
1674 #define INSN(NAME, op) \
1675 void NAME(Register Rd, Register Rn, Register Rm, \
1676 ext::operation option, int amount = 0) { \
1677 starti; \
1678 zrf(Rm, 16), srf(Rn, 5), zrf(Rd, 0); \
1679 add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
1680 }
1681
1682 INSN(addsw, 0b001);
1683 INSN(subsw, 0b011);
1684 INSN(adds, 0b101);
1685 INSN(subs, 0b111);
1686
1687 #undef INSN
1688
1689 // Aliases for short forms of add and sub
1690 #define INSN(NAME) \
1691 void NAME(Register Rd, Register Rn, Register Rm) { \
1692 if (Rd == sp || Rn == sp) \
1693 NAME(Rd, Rn, Rm, ext::uxtx); \
1694 else \
1695 NAME(Rd, Rn, Rm, LSL); \
1696 }
1697
1698 INSN(addw);
1699 INSN(subw);
1700 INSN(add);
1701 INSN(sub);
1702
1703 INSN(addsw);
1704 INSN(subsw);
1705 INSN(adds);
1706 INSN(subs);
1707
1708 #undef INSN
1709
1710 // Add/subtract (with carry)
add_sub_carry(unsigned op,Register Rd,Register Rn,Register Rm)1711 void add_sub_carry(unsigned op, Register Rd, Register Rn, Register Rm) {
1712 starti;
1713 f(op, 31, 29);
1714 f(0b11010000, 28, 21);
1715 f(0b000000, 15, 10);
1716 zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);
1717 }
1718
1719 #define INSN(NAME, op) \
1720 void NAME(Register Rd, Register Rn, Register Rm) { \
1721 add_sub_carry(op, Rd, Rn, Rm); \
1722 }
1723
1724 INSN(adcw, 0b000);
1725 INSN(adcsw, 0b001);
1726 INSN(sbcw, 0b010);
1727 INSN(sbcsw, 0b011);
1728 INSN(adc, 0b100);
1729 INSN(adcs, 0b101);
1730 INSN(sbc,0b110);
1731 INSN(sbcs, 0b111);
1732
1733 #undef INSN
1734
1735 // Conditional compare (both kinds)
conditional_compare(unsigned op,int o1,int o2,int o3,Register Rn,unsigned imm5,unsigned nzcv,unsigned cond)1736 void conditional_compare(unsigned op, int o1, int o2, int o3,
1737 Register Rn, unsigned imm5, unsigned nzcv,
1738 unsigned cond) {
1739 starti;
1740 f(op, 31, 29);
1741 f(0b11010010, 28, 21);
1742 f(cond, 15, 12);
1743 f(o1, 11);
1744 f(o2, 10);
1745 f(o3, 4);
1746 f(nzcv, 3, 0);
1747 f(imm5, 20, 16), zrf(Rn, 5);
1748 }
1749
1750 #define INSN(NAME, op) \
1751 void NAME(Register Rn, Register Rm, int imm, Condition cond) { \
1752 int regNumber = (Rm == zr ? 31 : (uintptr_t)Rm); \
1753 conditional_compare(op, 0, 0, 0, Rn, regNumber, imm, cond); \
1754 } \
1755 \
1756 void NAME(Register Rn, int imm5, int imm, Condition cond) { \
1757 conditional_compare(op, 1, 0, 0, Rn, imm5, imm, cond); \
1758 }
1759
1760 INSN(ccmnw, 0b001);
1761 INSN(ccmpw, 0b011);
1762 INSN(ccmn, 0b101);
1763 INSN(ccmp, 0b111);
1764
1765 #undef INSN
1766
1767 // Conditional select
conditional_select(unsigned op,unsigned op2,Register Rd,Register Rn,Register Rm,unsigned cond)1768 void conditional_select(unsigned op, unsigned op2,
1769 Register Rd, Register Rn, Register Rm,
1770 unsigned cond) {
1771 starti;
1772 f(op, 31, 29);
1773 f(0b11010100, 28, 21);
1774 f(cond, 15, 12);
1775 f(op2, 11, 10);
1776 zrf(Rm, 16), zrf(Rn, 5), rf(Rd, 0);
1777 }
1778
1779 #define INSN(NAME, op, op2) \
1780 void NAME(Register Rd, Register Rn, Register Rm, Condition cond) { \
1781 conditional_select(op, op2, Rd, Rn, Rm, cond); \
1782 }
1783
1784 INSN(cselw, 0b000, 0b00);
1785 INSN(csincw, 0b000, 0b01);
1786 INSN(csinvw, 0b010, 0b00);
1787 INSN(csnegw, 0b010, 0b01);
1788 INSN(csel, 0b100, 0b00);
1789 INSN(csinc, 0b100, 0b01);
1790 INSN(csinv, 0b110, 0b00);
1791 INSN(csneg, 0b110, 0b01);
1792
1793 #undef INSN
1794
1795 // Data processing
data_processing(unsigned op29,unsigned opcode,Register Rd,Register Rn)1796 void data_processing(unsigned op29, unsigned opcode,
1797 Register Rd, Register Rn) {
1798 f(op29, 31, 29), f(0b11010110, 28, 21);
1799 f(opcode, 15, 10);
1800 rf(Rn, 5), rf(Rd, 0);
1801 }
1802
1803 // (1 source)
1804 #define INSN(NAME, op29, opcode2, opcode) \
1805 void NAME(Register Rd, Register Rn) { \
1806 starti; \
1807 f(opcode2, 20, 16); \
1808 data_processing(op29, opcode, Rd, Rn); \
1809 }
1810
1811 INSN(rbitw, 0b010, 0b00000, 0b00000);
1812 INSN(rev16w, 0b010, 0b00000, 0b00001);
1813 INSN(revw, 0b010, 0b00000, 0b00010);
1814 INSN(clzw, 0b010, 0b00000, 0b00100);
1815 INSN(clsw, 0b010, 0b00000, 0b00101);
1816
1817 INSN(rbit, 0b110, 0b00000, 0b00000);
1818 INSN(rev16, 0b110, 0b00000, 0b00001);
1819 INSN(rev32, 0b110, 0b00000, 0b00010);
1820 INSN(rev, 0b110, 0b00000, 0b00011);
1821 INSN(clz, 0b110, 0b00000, 0b00100);
1822 INSN(cls, 0b110, 0b00000, 0b00101);
1823
1824 #undef INSN
1825
1826 // (2 sources)
1827 #define INSN(NAME, op29, opcode) \
1828 void NAME(Register Rd, Register Rn, Register Rm) { \
1829 starti; \
1830 rf(Rm, 16); \
1831 data_processing(op29, opcode, Rd, Rn); \
1832 }
1833
1834 INSN(udivw, 0b000, 0b000010);
1835 INSN(sdivw, 0b000, 0b000011);
1836 INSN(lslvw, 0b000, 0b001000);
1837 INSN(lsrvw, 0b000, 0b001001);
1838 INSN(asrvw, 0b000, 0b001010);
1839 INSN(rorvw, 0b000, 0b001011);
1840
1841 INSN(udiv, 0b100, 0b000010);
1842 INSN(sdiv, 0b100, 0b000011);
1843 INSN(lslv, 0b100, 0b001000);
1844 INSN(lsrv, 0b100, 0b001001);
1845 INSN(asrv, 0b100, 0b001010);
1846 INSN(rorv, 0b100, 0b001011);
1847
1848 #undef INSN
1849
1850 // (3 sources)
data_processing(unsigned op54,unsigned op31,unsigned o0,Register Rd,Register Rn,Register Rm,Register Ra)1851 void data_processing(unsigned op54, unsigned op31, unsigned o0,
1852 Register Rd, Register Rn, Register Rm,
1853 Register Ra) {
1854 starti;
1855 f(op54, 31, 29), f(0b11011, 28, 24);
1856 f(op31, 23, 21), f(o0, 15);
1857 zrf(Rm, 16), zrf(Ra, 10), zrf(Rn, 5), zrf(Rd, 0);
1858 }
1859
1860 #define INSN(NAME, op54, op31, o0) \
1861 void NAME(Register Rd, Register Rn, Register Rm, Register Ra) { \
1862 data_processing(op54, op31, o0, Rd, Rn, Rm, Ra); \
1863 }
1864
1865 INSN(maddw, 0b000, 0b000, 0);
1866 INSN(msubw, 0b000, 0b000, 1);
1867 INSN(madd, 0b100, 0b000, 0);
1868 INSN(msub, 0b100, 0b000, 1);
1869 INSN(smaddl, 0b100, 0b001, 0);
1870 INSN(smsubl, 0b100, 0b001, 1);
1871 INSN(umaddl, 0b100, 0b101, 0);
1872 INSN(umsubl, 0b100, 0b101, 1);
1873
1874 #undef INSN
1875
1876 #define INSN(NAME, op54, op31, o0) \
1877 void NAME(Register Rd, Register Rn, Register Rm) { \
1878 data_processing(op54, op31, o0, Rd, Rn, Rm, (Register)31); \
1879 }
1880
1881 INSN(smulh, 0b100, 0b010, 0);
1882 INSN(umulh, 0b100, 0b110, 0);
1883
1884 #undef INSN
1885
1886 // Floating-point data-processing (1 source)
data_processing(unsigned op31,unsigned type,unsigned opcode,FloatRegister Vd,FloatRegister Vn)1887 void data_processing(unsigned op31, unsigned type, unsigned opcode,
1888 FloatRegister Vd, FloatRegister Vn) {
1889 starti;
1890 f(op31, 31, 29);
1891 f(0b11110, 28, 24);
1892 f(type, 23, 22), f(1, 21), f(opcode, 20, 15), f(0b10000, 14, 10);
1893 rf(Vn, 5), rf(Vd, 0);
1894 }
1895
1896 #define INSN(NAME, op31, type, opcode) \
1897 void NAME(FloatRegister Vd, FloatRegister Vn) { \
1898 data_processing(op31, type, opcode, Vd, Vn); \
1899 }
1900
1901 private:
1902 INSN(i_fmovs, 0b000, 0b00, 0b000000);
1903 public:
1904 INSN(fabss, 0b000, 0b00, 0b000001);
1905 INSN(fnegs, 0b000, 0b00, 0b000010);
1906 INSN(fsqrts, 0b000, 0b00, 0b000011);
1907 INSN(fcvts, 0b000, 0b00, 0b000101); // Single-precision to double-precision
1908
1909 private:
1910 INSN(i_fmovd, 0b000, 0b01, 0b000000);
1911 public:
1912 INSN(fabsd, 0b000, 0b01, 0b000001);
1913 INSN(fnegd, 0b000, 0b01, 0b000010);
1914 INSN(fsqrtd, 0b000, 0b01, 0b000011);
1915 INSN(fcvtd, 0b000, 0b01, 0b000100); // Double-precision to single-precision
1916
fmovd(FloatRegister Vd,FloatRegister Vn)1917 void fmovd(FloatRegister Vd, FloatRegister Vn) {
1918 assert(Vd != Vn, "should be");
1919 i_fmovd(Vd, Vn);
1920 }
1921
fmovs(FloatRegister Vd,FloatRegister Vn)1922 void fmovs(FloatRegister Vd, FloatRegister Vn) {
1923 assert(Vd != Vn, "should be");
1924 i_fmovs(Vd, Vn);
1925 }
1926
1927 private:
_fcvt_narrow_extend(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,SIMD_Arrangement Tb,bool do_extend)1928 void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
1929 FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) {
1930 assert((do_extend && (Tb >> 1) + 1 == (Ta >> 1))
1931 || (!do_extend && (Ta >> 1) + 1 == (Tb >> 1)), "Incompatible arrangement");
1932 starti;
1933 int op30 = (do_extend ? Tb : Ta) & 1;
1934 int op22 = ((do_extend ? Ta : Tb) >> 1) & 1;
1935 f(0, 31), f(op30, 30), f(0b0011100, 29, 23), f(op22, 22);
1936 f(0b100001011, 21, 13), f(do_extend ? 1 : 0, 12), f(0b10, 11, 10);
1937 rf(Vn, 5), rf(Vd, 0);
1938 }
1939
1940 public:
fcvtl(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,SIMD_Arrangement Tb)1941 void fcvtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
1942 assert(Tb == T4H || Tb == T8H|| Tb == T2S || Tb == T4S, "invalid arrangement");
1943 _fcvt_narrow_extend(Vd, Ta, Vn, Tb, true);
1944 }
1945
fcvtn(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,SIMD_Arrangement Tb)1946 void fcvtn(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
1947 assert(Ta == T4H || Ta == T8H|| Ta == T2S || Ta == T4S, "invalid arrangement");
1948 _fcvt_narrow_extend(Vd, Ta, Vn, Tb, false);
1949 }
1950
1951 #undef INSN
1952
1953 // Floating-point data-processing (2 source)
data_processing(unsigned op31,unsigned type,unsigned opcode,FloatRegister Vd,FloatRegister Vn,FloatRegister Vm)1954 void data_processing(unsigned op31, unsigned type, unsigned opcode,
1955 FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {
1956 starti;
1957 f(op31, 31, 29);
1958 f(0b11110, 28, 24);
1959 f(type, 23, 22), f(1, 21), f(opcode, 15, 10);
1960 rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
1961 }
1962
1963 #define INSN(NAME, op31, type, opcode) \
1964 void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \
1965 data_processing(op31, type, opcode, Vd, Vn, Vm); \
1966 }
1967
1968 INSN(fabds, 0b011, 0b10, 0b110101);
1969 INSN(fmuls, 0b000, 0b00, 0b000010);
1970 INSN(fdivs, 0b000, 0b00, 0b000110);
1971 INSN(fadds, 0b000, 0b00, 0b001010);
1972 INSN(fsubs, 0b000, 0b00, 0b001110);
1973 INSN(fmaxs, 0b000, 0b00, 0b010010);
1974 INSN(fmins, 0b000, 0b00, 0b010110);
1975 INSN(fnmuls, 0b000, 0b00, 0b100010);
1976
1977 INSN(fabdd, 0b011, 0b11, 0b110101);
1978 INSN(fmuld, 0b000, 0b01, 0b000010);
1979 INSN(fdivd, 0b000, 0b01, 0b000110);
1980 INSN(faddd, 0b000, 0b01, 0b001010);
1981 INSN(fsubd, 0b000, 0b01, 0b001110);
1982 INSN(fmaxd, 0b000, 0b01, 0b010010);
1983 INSN(fmind, 0b000, 0b01, 0b010110);
1984 INSN(fnmuld, 0b000, 0b01, 0b100010);
1985
1986 #undef INSN
1987
1988 // Floating-point data-processing (3 source)
data_processing(unsigned op31,unsigned type,unsigned o1,unsigned o0,FloatRegister Vd,FloatRegister Vn,FloatRegister Vm,FloatRegister Va)1989 void data_processing(unsigned op31, unsigned type, unsigned o1, unsigned o0,
1990 FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,
1991 FloatRegister Va) {
1992 starti;
1993 f(op31, 31, 29);
1994 f(0b11111, 28, 24);
1995 f(type, 23, 22), f(o1, 21), f(o0, 15);
1996 rf(Vm, 16), rf(Va, 10), rf(Vn, 5), rf(Vd, 0);
1997 }
1998
1999 #define INSN(NAME, op31, type, o1, o0) \
2000 void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, \
2001 FloatRegister Va) { \
2002 data_processing(op31, type, o1, o0, Vd, Vn, Vm, Va); \
2003 }
2004
2005 INSN(fmadds, 0b000, 0b00, 0, 0);
2006 INSN(fmsubs, 0b000, 0b00, 0, 1);
2007 INSN(fnmadds, 0b000, 0b00, 1, 0);
2008 INSN(fnmsubs, 0b000, 0b00, 1, 1);
2009
2010 INSN(fmaddd, 0b000, 0b01, 0, 0);
2011 INSN(fmsubd, 0b000, 0b01, 0, 1);
2012 INSN(fnmaddd, 0b000, 0b01, 1, 0);
2013 INSN(fnmsub, 0b000, 0b01, 1, 1);
2014
2015 #undef INSN
2016
2017 // Floating-point conditional select
fp_conditional_select(unsigned op31,unsigned type,unsigned op1,unsigned op2,Condition cond,FloatRegister Vd,FloatRegister Vn,FloatRegister Vm)2018 void fp_conditional_select(unsigned op31, unsigned type,
2019 unsigned op1, unsigned op2,
2020 Condition cond, FloatRegister Vd,
2021 FloatRegister Vn, FloatRegister Vm) {
2022 starti;
2023 f(op31, 31, 29);
2024 f(0b11110, 28, 24);
2025 f(type, 23, 22);
2026 f(op1, 21, 21);
2027 f(op2, 11, 10);
2028 f(cond, 15, 12);
2029 rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
2030 }
2031
2032 #define INSN(NAME, op31, type, op1, op2) \
2033 void NAME(FloatRegister Vd, FloatRegister Vn, \
2034 FloatRegister Vm, Condition cond) { \
2035 fp_conditional_select(op31, type, op1, op2, cond, Vd, Vn, Vm); \
2036 }
2037
2038 INSN(fcsels, 0b000, 0b00, 0b1, 0b11);
2039 INSN(fcseld, 0b000, 0b01, 0b1, 0b11);
2040
2041 #undef INSN
2042
2043 // Floating-point<->integer conversions
float_int_convert(unsigned op31,unsigned type,unsigned rmode,unsigned opcode,Register Rd,Register Rn)2044 void float_int_convert(unsigned op31, unsigned type,
2045 unsigned rmode, unsigned opcode,
2046 Register Rd, Register Rn) {
2047 starti;
2048 f(op31, 31, 29);
2049 f(0b11110, 28, 24);
2050 f(type, 23, 22), f(1, 21), f(rmode, 20, 19);
2051 f(opcode, 18, 16), f(0b000000, 15, 10);
2052 zrf(Rn, 5), zrf(Rd, 0);
2053 }
2054
2055 #define INSN(NAME, op31, type, rmode, opcode) \
2056 void NAME(Register Rd, FloatRegister Vn) { \
2057 float_int_convert(op31, type, rmode, opcode, Rd, (Register)Vn); \
2058 }
2059
2060 INSN(fcvtzsw, 0b000, 0b00, 0b11, 0b000);
2061 INSN(fcvtzs, 0b100, 0b00, 0b11, 0b000);
2062 INSN(fcvtzdw, 0b000, 0b01, 0b11, 0b000);
2063 INSN(fcvtzd, 0b100, 0b01, 0b11, 0b000);
2064
2065 INSN(fmovs, 0b000, 0b00, 0b00, 0b110);
2066 INSN(fmovd, 0b100, 0b01, 0b00, 0b110);
2067
2068 // INSN(fmovhid, 0b100, 0b10, 0b01, 0b110);
2069
2070 #undef INSN
2071
2072 #define INSN(NAME, op31, type, rmode, opcode) \
2073 void NAME(FloatRegister Vd, Register Rn) { \
2074 float_int_convert(op31, type, rmode, opcode, (Register)Vd, Rn); \
2075 }
2076
2077 INSN(fmovs, 0b000, 0b00, 0b00, 0b111);
2078 INSN(fmovd, 0b100, 0b01, 0b00, 0b111);
2079
2080 INSN(scvtfws, 0b000, 0b00, 0b00, 0b010);
2081 INSN(scvtfs, 0b100, 0b00, 0b00, 0b010);
2082 INSN(scvtfwd, 0b000, 0b01, 0b00, 0b010);
2083 INSN(scvtfd, 0b100, 0b01, 0b00, 0b010);
2084
2085 // INSN(fmovhid, 0b100, 0b10, 0b01, 0b111);
2086
2087 #undef INSN
2088
2089 enum sign_kind { SIGNED, UNSIGNED };
2090
2091 private:
_xcvtf_scalar_integer(sign_kind sign,unsigned sz,FloatRegister Rd,FloatRegister Rn)2092 void _xcvtf_scalar_integer(sign_kind sign, unsigned sz,
2093 FloatRegister Rd, FloatRegister Rn) {
2094 starti;
2095 f(0b01, 31, 30), f(sign == SIGNED ? 0 : 1, 29);
2096 f(0b111100, 27, 23), f((sz >> 1) & 1, 22), f(0b100001110110, 21, 10);
2097 rf(Rn, 5), rf(Rd, 0);
2098 }
2099
2100 public:
2101 #define INSN(NAME, sign, sz) \
2102 void NAME(FloatRegister Rd, FloatRegister Rn) { \
2103 _xcvtf_scalar_integer(sign, sz, Rd, Rn); \
2104 }
2105
2106 INSN(scvtfs, SIGNED, 0);
2107 INSN(scvtfd, SIGNED, 1);
2108
2109 #undef INSN
2110
2111 private:
_xcvtf_vector_integer(sign_kind sign,SIMD_Arrangement T,FloatRegister Rd,FloatRegister Rn)2112 void _xcvtf_vector_integer(sign_kind sign, SIMD_Arrangement T,
2113 FloatRegister Rd, FloatRegister Rn) {
2114 assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
2115 starti;
2116 f(0, 31), f(T & 1, 30), f(sign == SIGNED ? 0 : 1, 29);
2117 f(0b011100, 28, 23), f((T >> 1) & 1, 22), f(0b100001110110, 21, 10);
2118 rf(Rn, 5), rf(Rd, 0);
2119 }
2120
2121 public:
scvtfv(SIMD_Arrangement T,FloatRegister Rd,FloatRegister Rn)2122 void scvtfv(SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) {
2123 _xcvtf_vector_integer(SIGNED, T, Rd, Rn);
2124 }
2125
2126 // Floating-point compare
float_compare(unsigned op31,unsigned type,unsigned op,unsigned op2,FloatRegister Vn,FloatRegister Vm=(FloatRegister)0)2127 void float_compare(unsigned op31, unsigned type,
2128 unsigned op, unsigned op2,
2129 FloatRegister Vn, FloatRegister Vm = (FloatRegister)0) {
2130 starti;
2131 f(op31, 31, 29);
2132 f(0b11110, 28, 24);
2133 f(type, 23, 22), f(1, 21);
2134 f(op, 15, 14), f(0b1000, 13, 10), f(op2, 4, 0);
2135 rf(Vn, 5), rf(Vm, 16);
2136 }
2137
2138
2139 #define INSN(NAME, op31, type, op, op2) \
2140 void NAME(FloatRegister Vn, FloatRegister Vm) { \
2141 float_compare(op31, type, op, op2, Vn, Vm); \
2142 }
2143
2144 #define INSN1(NAME, op31, type, op, op2) \
2145 void NAME(FloatRegister Vn, double d) { \
2146 assert_cond(d == 0.0); \
2147 float_compare(op31, type, op, op2, Vn); \
2148 }
2149
2150 INSN(fcmps, 0b000, 0b00, 0b00, 0b00000);
2151 INSN1(fcmps, 0b000, 0b00, 0b00, 0b01000);
2152 // INSN(fcmpes, 0b000, 0b00, 0b00, 0b10000);
2153 // INSN1(fcmpes, 0b000, 0b00, 0b00, 0b11000);
2154
2155 INSN(fcmpd, 0b000, 0b01, 0b00, 0b00000);
2156 INSN1(fcmpd, 0b000, 0b01, 0b00, 0b01000);
2157 // INSN(fcmped, 0b000, 0b01, 0b00, 0b10000);
2158 // INSN1(fcmped, 0b000, 0b01, 0b00, 0b11000);
2159
2160 #undef INSN
2161 #undef INSN1
2162
2163 // Floating-point compare. 3-registers versions (scalar).
2164 #define INSN(NAME, sz, e) \
2165 void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \
2166 starti; \
2167 f(0b01111110, 31, 24), f(e, 23), f(sz, 22), f(1, 21), rf(Vm, 16); \
2168 f(0b111011, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2169 } \
2170
2171 INSN(facged, 1, 0); // facge-double
2172 INSN(facges, 0, 0); // facge-single
2173 INSN(facgtd, 1, 1); // facgt-double
2174 INSN(facgts, 0, 1); // facgt-single
2175
2176 #undef INSN
2177
2178 // Floating-point Move (immediate)
2179 private:
2180 unsigned pack(double value);
2181
fmov_imm(FloatRegister Vn,double value,unsigned size)2182 void fmov_imm(FloatRegister Vn, double value, unsigned size) {
2183 starti;
2184 f(0b00011110, 31, 24), f(size, 23, 22), f(1, 21);
2185 f(pack(value), 20, 13), f(0b10000000, 12, 5);
2186 rf(Vn, 0);
2187 }
2188
2189 public:
2190
fmovs(FloatRegister Vn,double value)2191 void fmovs(FloatRegister Vn, double value) {
2192 if (value)
2193 fmov_imm(Vn, value, 0b00);
2194 else
2195 movi(Vn, T2S, 0);
2196 }
fmovd(FloatRegister Vn,double value)2197 void fmovd(FloatRegister Vn, double value) {
2198 if (value)
2199 fmov_imm(Vn, value, 0b01);
2200 else
2201 movi(Vn, T1D, 0);
2202 }
2203
2204 // Floating-point rounding
2205 // type: half-precision = 11
2206 // single = 00
2207 // double = 01
2208 // rmode: A = Away = 100
2209 // I = current = 111
2210 // M = MinusInf = 010
2211 // N = eveN = 000
2212 // P = PlusInf = 001
2213 // X = eXact = 110
2214 // Z = Zero = 011
float_round(unsigned type,unsigned rmode,FloatRegister Rd,FloatRegister Rn)2215 void float_round(unsigned type, unsigned rmode, FloatRegister Rd, FloatRegister Rn) {
2216 starti;
2217 f(0b00011110, 31, 24);
2218 f(type, 23, 22);
2219 f(0b1001, 21, 18);
2220 f(rmode, 17, 15);
2221 f(0b10000, 14, 10);
2222 rf(Rn, 5), rf(Rd, 0);
2223 }
2224 #define INSN(NAME, type, rmode) \
2225 void NAME(FloatRegister Vd, FloatRegister Vn) { \
2226 float_round(type, rmode, Vd, Vn); \
2227 }
2228
2229 public:
2230 INSN(frintah, 0b11, 0b100);
2231 INSN(frintih, 0b11, 0b111);
2232 INSN(frintmh, 0b11, 0b010);
2233 INSN(frintnh, 0b11, 0b000);
2234 INSN(frintph, 0b11, 0b001);
2235 INSN(frintxh, 0b11, 0b110);
2236 INSN(frintzh, 0b11, 0b011);
2237
2238 INSN(frintas, 0b00, 0b100);
2239 INSN(frintis, 0b00, 0b111);
2240 INSN(frintms, 0b00, 0b010);
2241 INSN(frintns, 0b00, 0b000);
2242 INSN(frintps, 0b00, 0b001);
2243 INSN(frintxs, 0b00, 0b110);
2244 INSN(frintzs, 0b00, 0b011);
2245
2246 INSN(frintad, 0b01, 0b100);
2247 INSN(frintid, 0b01, 0b111);
2248 INSN(frintmd, 0b01, 0b010);
2249 INSN(frintnd, 0b01, 0b000);
2250 INSN(frintpd, 0b01, 0b001);
2251 INSN(frintxd, 0b01, 0b110);
2252 INSN(frintzd, 0b01, 0b011);
2253 #undef INSN
2254
2255 private:
2256 static short SIMD_Size_in_bytes[];
2257
2258 public:
2259 #define INSN(NAME, op) \
2260 void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) { \
2261 ld_st2((Register)Rt, adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \
2262 } \
2263
2264 INSN(ldr, 1);
2265 INSN(str, 0);
2266
2267 #undef INSN
2268
2269 private:
2270
ld_st(FloatRegister Vt,SIMD_Arrangement T,Register Xn,int op1,int op2)2271 void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) {
2272 starti;
2273 f(0,31), f((int)T & 1, 30);
2274 f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12);
2275 f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2276 }
ld_st(FloatRegister Vt,SIMD_Arrangement T,Register Xn,int imm,int op1,int op2,int regs)2277 void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
2278 int imm, int op1, int op2, int regs) {
2279
2280 bool replicate = op2 >> 2 == 3;
2281 // post-index value (imm) is formed differently for replicate/non-replicate ld* instructions
2282 int expectedImmediate = replicate ? regs * (1 << (T >> 1)) : SIMD_Size_in_bytes[T] * regs;
2283 guarantee(T < T1Q , "incorrect arrangement");
2284 guarantee(imm == expectedImmediate, "bad offset");
2285 starti;
2286 f(0,31), f((int)T & 1, 30);
2287 f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12);
2288 f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2289 }
ld_st(FloatRegister Vt,SIMD_Arrangement T,Register Xn,Register Xm,int op1,int op2)2290 void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
2291 Register Xm, int op1, int op2) {
2292 starti;
2293 f(0,31), f((int)T & 1, 30);
2294 f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12);
2295 f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2296 }
2297
ld_st(FloatRegister Vt,SIMD_Arrangement T,Address a,int op1,int op2,int regs)2298 void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2, int regs) {
2299 switch (a.getMode()) {
2300 case Address::base_plus_offset:
2301 guarantee(a.offset() == 0, "no offset allowed here");
2302 ld_st(Vt, T, a.base(), op1, op2);
2303 break;
2304 case Address::post:
2305 ld_st(Vt, T, a.base(), a.offset(), op1, op2, regs);
2306 break;
2307 case Address::post_reg:
2308 ld_st(Vt, T, a.base(), a.index(), op1, op2);
2309 break;
2310 default:
2311 ShouldNotReachHere();
2312 }
2313 }
2314
2315 public:
2316
2317 #define INSN1(NAME, op1, op2) \
2318 void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) { \
2319 ld_st(Vt, T, a, op1, op2, 1); \
2320 }
2321
2322 #define INSN2(NAME, op1, op2) \
2323 void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \
2324 assert(Vt->successor() == Vt2, "Registers must be ordered"); \
2325 ld_st(Vt, T, a, op1, op2, 2); \
2326 }
2327
2328 #define INSN3(NAME, op1, op2) \
2329 void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
2330 SIMD_Arrangement T, const Address &a) { \
2331 assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \
2332 "Registers must be ordered"); \
2333 ld_st(Vt, T, a, op1, op2, 3); \
2334 }
2335
2336 #define INSN4(NAME, op1, op2) \
2337 void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \
2338 FloatRegister Vt4, SIMD_Arrangement T, const Address &a) { \
2339 assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \
2340 Vt3->successor() == Vt4, "Registers must be ordered"); \
2341 ld_st(Vt, T, a, op1, op2, 4); \
2342 }
2343
2344 INSN1(ld1, 0b001100010, 0b0111);
2345 INSN2(ld1, 0b001100010, 0b1010);
2346 INSN3(ld1, 0b001100010, 0b0110);
2347 INSN4(ld1, 0b001100010, 0b0010);
2348
2349 INSN2(ld2, 0b001100010, 0b1000);
2350 INSN3(ld3, 0b001100010, 0b0100);
2351 INSN4(ld4, 0b001100010, 0b0000);
2352
2353 INSN1(st1, 0b001100000, 0b0111);
2354 INSN2(st1, 0b001100000, 0b1010);
2355 INSN3(st1, 0b001100000, 0b0110);
2356 INSN4(st1, 0b001100000, 0b0010);
2357
2358 INSN2(st2, 0b001100000, 0b1000);
2359 INSN3(st3, 0b001100000, 0b0100);
2360 INSN4(st4, 0b001100000, 0b0000);
2361
2362 INSN1(ld1r, 0b001101010, 0b1100);
2363 INSN2(ld2r, 0b001101011, 0b1100);
2364 INSN3(ld3r, 0b001101010, 0b1110);
2365 INSN4(ld4r, 0b001101011, 0b1110);
2366
2367 #undef INSN1
2368 #undef INSN2
2369 #undef INSN3
2370 #undef INSN4
2371
2372 #define INSN(NAME, opc) \
2373 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2374 starti; \
2375 assert(T == T8B || T == T16B, "must be T8B or T16B"); \
2376 f(0, 31), f((int)T & 1, 30), f(opc, 29, 21); \
2377 rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2378 }
2379
2380 INSN(eor, 0b101110001);
2381 INSN(orr, 0b001110101);
2382 INSN(andr, 0b001110001);
2383 INSN(bic, 0b001110011);
2384 INSN(bif, 0b101110111);
2385 INSN(bit, 0b101110101);
2386 INSN(bsl, 0b101110011);
2387 INSN(orn, 0b001110111);
2388
2389 #undef INSN
2390
2391 #define INSN(NAME, opc, opc2, acceptT2D) \
2392 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2393 guarantee(T != T1Q && T != T1D, "incorrect arrangement"); \
2394 if (!acceptT2D) guarantee(T != T2D, "incorrect arrangement"); \
2395 starti; \
2396 f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \
2397 f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16), f(opc2, 15, 10); \
2398 rf(Vn, 5), rf(Vd, 0); \
2399 }
2400
2401 INSN(addv, 0, 0b100001, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2402 INSN(subv, 1, 0b100001, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2403 INSN(uqsubv, 1, 0b001011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2404 INSN(mulv, 0, 0b100111, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2405 INSN(mlav, 0, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2406 INSN(mlsv, 1, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2407 INSN(sshl, 0, 0b010001, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2408 INSN(ushl, 1, 0b010001, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2409 INSN(addpv, 0, 0b101111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2410 INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2411 INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2412 INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2413 INSN(maxv, 0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2414 INSN(minv, 0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2415 INSN(smaxp, 0, 0b101001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2416 INSN(sminp, 0, 0b101011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2417 INSN(cmeq, 1, 0b100011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2418 INSN(cmgt, 0, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2419 INSN(cmge, 0, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2420 INSN(cmhi, 1, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2421 INSN(cmhs, 1, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2422
2423 #undef INSN
2424
2425 #define INSN(NAME, opc, opc2, accepted) \
2426 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \
2427 guarantee(T != T1Q && T != T1D, "incorrect arrangement"); \
2428 if (accepted < 3) guarantee(T != T2D, "incorrect arrangement"); \
2429 if (accepted < 2) guarantee(T != T2S, "incorrect arrangement"); \
2430 if (accepted < 1) guarantee(T == T8B || T == T16B, "incorrect arrangement"); \
2431 starti; \
2432 f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \
2433 f((int)T >> 1, 23, 22), f(opc2, 21, 10); \
2434 rf(Vn, 5), rf(Vd, 0); \
2435 }
2436
2437 INSN(absr, 0, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2438 INSN(negr, 1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2439 INSN(notr, 1, 0b100000010110, 0); // accepted arrangements: T8B, T16B
2440 INSN(addv, 0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
2441 INSN(smaxv, 0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
2442 INSN(umaxv, 1, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
2443 INSN(sminv, 0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
2444 INSN(uminv, 1, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
2445 INSN(cls, 0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2446 INSN(clz, 1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2447 INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
2448 INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2449 INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
2450
2451 #undef INSN
2452
2453 #define INSN(NAME, opc) \
2454 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \
2455 starti; \
2456 assert(T == T4S, "arrangement must be T4S"); \
2457 f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24), f(opc, 23), \
2458 f(T == T4S ? 0 : 1, 22), f(0b110000111110, 21, 10); rf(Vn, 5), rf(Vd, 0); \
2459 }
2460
2461 INSN(fmaxv, 0);
2462 INSN(fminv, 1);
2463
2464 #undef INSN
2465
2466 #define INSN(NAME, op0, cmode0) \
2467 void NAME(FloatRegister Vd, SIMD_Arrangement T, unsigned imm8, unsigned lsl = 0) { \
2468 unsigned cmode = cmode0; \
2469 unsigned op = op0; \
2470 starti; \
2471 assert(lsl == 0 || \
2472 ((T == T4H || T == T8H) && lsl == 8) || \
2473 ((T == T2S || T == T4S) && ((lsl >> 3) < 4) && ((lsl & 7) == 0)), "invalid shift");\
2474 cmode |= lsl >> 2; \
2475 if (T == T4H || T == T8H) cmode |= 0b1000; \
2476 if (!(T == T4H || T == T8H || T == T2S || T == T4S)) { \
2477 assert(op == 0 && cmode0 == 0, "must be MOVI"); \
2478 cmode = 0b1110; \
2479 if (T == T1D || T == T2D) op = 1; \
2480 } \
2481 f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19); \
2482 f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5); \
2483 rf(Vd, 0); \
2484 }
2485
2486 INSN(movi, 0, 0);
2487 INSN(orri, 0, 1);
2488 INSN(mvni, 1, 0);
2489 INSN(bici, 1, 1);
2490
2491 #undef INSN
2492
2493 #define INSN(NAME, op1, op2, op3) \
2494 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2495 starti; \
2496 assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); \
2497 f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01110, 28, 24), f(op2, 23); \
2498 f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2499 }
2500
2501 INSN(fabd, 1, 1, 0b110101);
2502 INSN(fadd, 0, 0, 0b110101);
2503 INSN(fdiv, 1, 0, 0b111111);
2504 INSN(fmul, 1, 0, 0b110111);
2505 INSN(fsub, 0, 1, 0b110101);
2506 INSN(fmla, 0, 0, 0b110011);
2507 INSN(fmls, 0, 1, 0b110011);
2508 INSN(fmax, 0, 0, 0b111101);
2509 INSN(fmin, 0, 1, 0b111101);
2510 INSN(fcmeq, 0, 0, 0b111001);
2511 INSN(fcmgt, 1, 1, 0b111001);
2512 INSN(fcmge, 1, 0, 0b111001);
2513
2514 #undef INSN
2515
2516 #define INSN(NAME, opc) \
2517 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2518 starti; \
2519 assert(T == T4S, "arrangement must be T4S"); \
2520 f(0b01011110000, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2521 }
2522
2523 INSN(sha1c, 0b000000);
2524 INSN(sha1m, 0b001000);
2525 INSN(sha1p, 0b000100);
2526 INSN(sha1su0, 0b001100);
2527 INSN(sha256h2, 0b010100);
2528 INSN(sha256h, 0b010000);
2529 INSN(sha256su1, 0b011000);
2530
2531 #undef INSN
2532
2533 #define INSN(NAME, opc) \
2534 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \
2535 starti; \
2536 assert(T == T4S, "arrangement must be T4S"); \
2537 f(0b0101111000101000, 31, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2538 }
2539
2540 INSN(sha1h, 0b000010);
2541 INSN(sha1su1, 0b000110);
2542 INSN(sha256su0, 0b001010);
2543
2544 #undef INSN
2545
2546 #define INSN(NAME, opc) \
2547 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2548 starti; \
2549 assert(T == T2D, "arrangement must be T2D"); \
2550 f(0b11001110011, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2551 }
2552
2553 INSN(sha512h, 0b100000);
2554 INSN(sha512h2, 0b100001);
2555 INSN(sha512su1, 0b100010);
2556
2557 #undef INSN
2558
2559 #define INSN(NAME, opc) \
2560 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \
2561 starti; \
2562 assert(T == T2D, "arrangement must be T2D"); \
2563 f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0); \
2564 }
2565
2566 INSN(sha512su0, 0b1100111011000000100000);
2567
2568 #undef INSN
2569
2570 #define INSN(NAME, opc) \
2571 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, FloatRegister Va) { \
2572 starti; \
2573 assert(T == T16B, "arrangement must be T16B"); \
2574 f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b0, 15, 15), rf(Va, 10), rf(Vn, 5), rf(Vd, 0); \
2575 }
2576
2577 INSN(eor3, 0b000);
2578 INSN(bcax, 0b001);
2579
2580 #undef INSN
2581
2582 #define INSN(NAME, opc) \
2583 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, unsigned imm) { \
2584 starti; \
2585 assert(T == T2D, "arrangement must be T2D"); \
2586 f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(imm, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2587 }
2588
2589 INSN(xar, 0b100);
2590
2591 #undef INSN
2592
2593 #define INSN(NAME, opc) \
2594 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2595 starti; \
2596 assert(T == T2D, "arrangement must be T2D"); \
2597 f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b100011, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2598 }
2599
2600 INSN(rax1, 0b011);
2601
2602 #undef INSN
2603
2604 #define INSN(NAME, opc) \
2605 void NAME(FloatRegister Vd, FloatRegister Vn) { \
2606 starti; \
2607 f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0); \
2608 }
2609
2610 INSN(aese, 0b0100111000101000010010);
2611 INSN(aesd, 0b0100111000101000010110);
2612 INSN(aesmc, 0b0100111000101000011010);
2613 INSN(aesimc, 0b0100111000101000011110);
2614
2615 #undef INSN
2616
2617 #define INSN(NAME, op1, op2) \
2618 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index = 0) { \
2619 starti; \
2620 assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); \
2621 assert(index >= 0 && ((T == T2D && index <= 1) || (T != T2D && index <= 3)), "invalid index"); \
2622 f(0, 31), f((int)T & 1, 30), f(op1, 29); f(0b011111, 28, 23); \
2623 f(T == T2D ? 1 : 0, 22), f(T == T2D ? 0 : index & 1, 21), rf(Vm, 16); \
2624 f(op2, 15, 12), f(T == T2D ? index : (index >> 1), 11), f(0, 10); \
2625 rf(Vn, 5), rf(Vd, 0); \
2626 }
2627
2628 // FMLA/FMLS - Vector - Scalar
2629 INSN(fmlavs, 0, 0b0001);
2630 INSN(fmlsvs, 0, 0b0101);
2631 // FMULX - Vector - Scalar
2632 INSN(fmulxvs, 1, 0b1001);
2633
2634 #undef INSN
2635
2636 // Floating-point Reciprocal Estimate
frecpe(FloatRegister Vd,FloatRegister Vn,SIMD_RegVariant type)2637 void frecpe(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) {
2638 assert(type == D || type == S, "Wrong type for frecpe");
2639 starti;
2640 f(0b010111101, 31, 23);
2641 f(type == D ? 1 : 0, 22);
2642 f(0b100001110110, 21, 10);
2643 rf(Vn, 5), rf(Vd, 0);
2644 }
2645
2646 // (long) {a, b} -> (a + b)
addpd(FloatRegister Vd,FloatRegister Vn)2647 void addpd(FloatRegister Vd, FloatRegister Vn) {
2648 starti;
2649 f(0b0101111011110001101110, 31, 10);
2650 rf(Vn, 5), rf(Vd, 0);
2651 }
2652
2653 // Floating-point AdvSIMD scalar pairwise
2654 #define INSN(NAME, op1, op2) \
2655 void NAME(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) { \
2656 starti; \
2657 assert(type == D || type == S, "Wrong type for faddp/fmaxp/fminp"); \
2658 f(0b0111111, 31, 25), f(op1, 24, 23), \
2659 f(type == S ? 0 : 1, 22), f(0b11000, 21, 17), f(op2, 16, 10), rf(Vn, 5), rf(Vd, 0); \
2660 }
2661
2662 INSN(faddp, 0b00, 0b0110110);
2663 INSN(fmaxp, 0b00, 0b0111110);
2664 INSN(fminp, 0b01, 0b0111110);
2665
2666 #undef INSN
2667
ins(FloatRegister Vd,SIMD_RegVariant T,FloatRegister Vn,int didx,int sidx)2668 void ins(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int didx, int sidx) {
2669 starti;
2670 assert(T != Q, "invalid register variant");
2671 f(0b01101110000, 31, 21), f(((didx<<1)|1)<<(int)T, 20, 16), f(0, 15);
2672 f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
2673 }
2674
2675 #define INSN(NAME, cond, op1, op2) \
2676 void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { \
2677 starti; \
2678 assert(cond, "invalid register variant"); \
2679 f(0, 31), f(op1, 30), f(0b001110000, 29, 21); \
2680 f(((idx << 1) | 1) << (int)T, 20, 16), f(op2, 15, 10); \
2681 rf(Vn, 5), rf(Rd, 0); \
2682 }
2683
2684 INSN(umov, (T != Q), (T == D ? 1 : 0), 0b001111);
2685 INSN(smov, (T < D), 1, 0b001011);
2686
2687 #undef INSN
2688
2689 #define INSN(NAME, opc, opc2, isSHR) \
2690 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
2691 starti; \
2692 /* The encodings for the immh:immb fields (bits 22:16) in *SHR are \
2693 * 0001 xxx 8B/16B, shift = 16 - UInt(immh:immb) \
2694 * 001x xxx 4H/8H, shift = 32 - UInt(immh:immb) \
2695 * 01xx xxx 2S/4S, shift = 64 - UInt(immh:immb) \
2696 * 1xxx xxx 1D/2D, shift = 128 - UInt(immh:immb) \
2697 * (1D is RESERVED) \
2698 * for SHL shift is calculated as: \
2699 * 0001 xxx 8B/16B, shift = UInt(immh:immb) - 8 \
2700 * 001x xxx 4H/8H, shift = UInt(immh:immb) - 16 \
2701 * 01xx xxx 2S/4S, shift = UInt(immh:immb) - 32 \
2702 * 1xxx xxx 1D/2D, shift = UInt(immh:immb) - 64 \
2703 * (1D is RESERVED) \
2704 */ \
2705 guarantee(!isSHR || (isSHR && (shift != 0)), "impossible encoding");\
2706 assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \
2707 int cVal = (1 << (((T >> 1) + 3) + (isSHR ? 1 : 0))); \
2708 int encodedShift = isSHR ? cVal - shift : cVal + shift; \
2709 f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \
2710 f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2711 }
2712
2713 INSN(shl, 0, 0b010101, /* isSHR = */ false);
2714 INSN(sshr, 0, 0b000001, /* isSHR = */ true);
2715 INSN(ushr, 1, 0b000001, /* isSHR = */ true);
2716 INSN(usra, 1, 0b000101, /* isSHR = */ true);
2717 INSN(ssra, 0, 0b000101, /* isSHR = */ true);
2718
2719 #undef INSN
2720
2721 #define INSN(NAME, opc, opc2, isSHR) \
2722 void NAME(FloatRegister Vd, FloatRegister Vn, int shift){ \
2723 starti; \
2724 int encodedShift = isSHR ? 128 - shift : 64 + shift; \
2725 f(0b01, 31, 30), f(opc, 29), f(0b111110, 28, 23), \
2726 f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2727 }
2728
2729 INSN(shld, 0, 0b010101, /* isSHR = */ false);
2730 INSN(sshrd, 0, 0b000001, /* isSHR = */ true);
2731 INSN(ushrd, 1, 0b000001, /* isSHR = */ true);
2732
2733 #undef INSN
2734
2735 private:
_xshll(sign_kind sign,FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,SIMD_Arrangement Tb,int shift)2736 void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
2737 starti;
2738 /* The encodings for the immh:immb fields (bits 22:16) are
2739 * 0001 xxx 8H, 8B/16B shift = xxx
2740 * 001x xxx 4S, 4H/8H shift = xxxx
2741 * 01xx xxx 2D, 2S/4S shift = xxxxx
2742 * 1xxx xxx RESERVED
2743 */
2744 assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement");
2745 assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value");
2746 f(0, 31), f(Tb & 1, 30), f(sign == SIGNED ? 0 : 1, 29), f(0b011110, 28, 23);
2747 f((1 << ((Tb>>1)+3))|shift, 22, 16);
2748 f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0);
2749 }
2750
2751 public:
ushll(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,SIMD_Arrangement Tb,int shift)2752 void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
2753 assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
2754 _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
2755 }
2756
ushll2(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,SIMD_Arrangement Tb,int shift)2757 void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
2758 assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
2759 _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
2760 }
2761
uxtl(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,SIMD_Arrangement Tb)2762 void uxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
2763 ushll(Vd, Ta, Vn, Tb, 0);
2764 }
2765
sshll(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,SIMD_Arrangement Tb,int shift)2766 void sshll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
2767 assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
2768 _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
2769 }
2770
sshll2(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,SIMD_Arrangement Tb,int shift)2771 void sshll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
2772 assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
2773 _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
2774 }
2775
sxtl(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,SIMD_Arrangement Tb)2776 void sxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
2777 sshll(Vd, Ta, Vn, Tb, 0);
2778 }
2779
2780 // Move from general purpose register
2781 // mov Vd.T[index], Rn
mov(FloatRegister Vd,SIMD_Arrangement T,int index,Register Xn)2782 void mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) {
2783 starti;
2784 f(0b01001110000, 31, 21), f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
2785 f(0b000111, 15, 10), zrf(Xn, 5), rf(Vd, 0);
2786 }
2787
2788 // Move to general purpose register
2789 // mov Rd, Vn.T[index]
mov(Register Xd,FloatRegister Vn,SIMD_Arrangement T,int index)2790 void mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) {
2791 guarantee(T >= T2S && T < T1Q, "only D and S arrangements are supported");
2792 starti;
2793 f(0, 31), f((T >= T1D) ? 1:0, 30), f(0b001110000, 29, 21);
2794 f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
2795 f(0b001111, 15, 10), rf(Vn, 5), rf(Xd, 0);
2796 }
2797
2798 private:
_pmull(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,FloatRegister Vm,SIMD_Arrangement Tb)2799 void _pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2800 starti;
2801 assert((Ta == T1Q && (Tb == T1D || Tb == T2D)) ||
2802 (Ta == T8H && (Tb == T8B || Tb == T16B)), "Invalid Size specifier");
2803 int size = (Ta == T1Q) ? 0b11 : 0b00;
2804 f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size, 23, 22);
2805 f(1, 21), rf(Vm, 16), f(0b111000, 15, 10), rf(Vn, 5), rf(Vd, 0);
2806 }
2807
2808 public:
pmull(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,FloatRegister Vm,SIMD_Arrangement Tb)2809 void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2810 assert(Tb == T1D || Tb == T8B, "pmull assumes T1D or T8B as the second size specifier");
2811 _pmull(Vd, Ta, Vn, Vm, Tb);
2812 }
2813
pmull2(FloatRegister Vd,SIMD_Arrangement Ta,FloatRegister Vn,FloatRegister Vm,SIMD_Arrangement Tb)2814 void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2815 assert(Tb == T2D || Tb == T16B, "pmull2 assumes T2D or T16B as the second size specifier");
2816 _pmull(Vd, Ta, Vn, Vm, Tb);
2817 }
2818
uqxtn(FloatRegister Vd,SIMD_Arrangement Tb,FloatRegister Vn,SIMD_Arrangement Ta)2819 void uqxtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
2820 starti;
2821 int size_b = (int)Tb >> 1;
2822 int size_a = (int)Ta >> 1;
2823 assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
2824 f(0, 31), f(Tb & 1, 30), f(0b101110, 29, 24), f(size_b, 23, 22);
2825 f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
2826 }
2827
xtn(FloatRegister Vd,SIMD_Arrangement Tb,FloatRegister Vn,SIMD_Arrangement Ta)2828 void xtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
2829 starti;
2830 int size_b = (int)Tb >> 1;
2831 int size_a = (int)Ta >> 1;
2832 assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
2833 f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size_b, 23, 22);
2834 f(0b100001001010, 21, 10), rf(Vn, 5), rf(Vd, 0);
2835 }
2836
dup(FloatRegister Vd,SIMD_Arrangement T,Register Xs)2837 void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs)
2838 {
2839 starti;
2840 assert(T != T1D, "reserved encoding");
2841 f(0,31), f((int)T & 1, 30), f(0b001110000, 29, 21);
2842 f((1 << (T >> 1)), 20, 16), f(0b000011, 15, 10), zrf(Xs, 5), rf(Vd, 0);
2843 }
2844
dup(FloatRegister Vd,SIMD_Arrangement T,FloatRegister Vn,int index=0)2845 void dup(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int index = 0)
2846 {
2847 starti;
2848 assert(T != T1D, "reserved encoding");
2849 f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21);
2850 f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
2851 f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
2852 }
2853
2854 // AdvSIMD ZIP/UZP/TRN
2855 #define INSN(NAME, opcode) \
2856 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2857 guarantee(T != T1D && T != T1Q, "invalid arrangement"); \
2858 starti; \
2859 f(0, 31), f(0b001110, 29, 24), f(0, 21), f(0, 15); \
2860 f(opcode, 14, 12), f(0b10, 11, 10); \
2861 rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); \
2862 f(T & 1, 30), f(T >> 1, 23, 22); \
2863 }
2864
2865 INSN(uzp1, 0b001);
2866 INSN(trn1, 0b010);
2867 INSN(zip1, 0b011);
2868 INSN(uzp2, 0b101);
2869 INSN(trn2, 0b110);
2870 INSN(zip2, 0b111);
2871
2872 #undef INSN
2873
2874 // CRC32 instructions
2875 #define INSN(NAME, c, sf, sz) \
2876 void NAME(Register Rd, Register Rn, Register Rm) { \
2877 starti; \
2878 f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12); \
2879 f(sz, 11, 10), rf(Rm, 16), rf(Rn, 5), rf(Rd, 0); \
2880 }
2881
2882 INSN(crc32b, 0, 0, 0b00);
2883 INSN(crc32h, 0, 0, 0b01);
2884 INSN(crc32w, 0, 0, 0b10);
2885 INSN(crc32x, 0, 1, 0b11);
2886 INSN(crc32cb, 1, 0, 0b00);
2887 INSN(crc32ch, 1, 0, 0b01);
2888 INSN(crc32cw, 1, 0, 0b10);
2889 INSN(crc32cx, 1, 1, 0b11);
2890
2891 #undef INSN
2892
2893 // Table vector lookup
2894 #define INSN(NAME, op) \
2895 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, unsigned registers, FloatRegister Vm) { \
2896 starti; \
2897 assert(T == T8B || T == T16B, "invalid arrangement"); \
2898 assert(0 < registers && registers <= 4, "invalid number of registers"); \
2899 f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21), rf(Vm, 16), f(0, 15); \
2900 f(registers - 1, 14, 13), f(op, 12),f(0b00, 11, 10), rf(Vn, 5), rf(Vd, 0); \
2901 }
2902
2903 INSN(tbl, 0);
2904 INSN(tbx, 1);
2905
2906 #undef INSN
2907
2908 // AdvSIMD two-reg misc
2909 // In this instruction group, the 2 bits in the size field ([23:22]) may be
2910 // fixed or determined by the "SIMD_Arrangement T", or both. The additional
2911 // parameter "tmask" is a 2-bit mask used to indicate which bits in the size
2912 // field are determined by the SIMD_Arrangement. The bit of "tmask" should be
2913 // set to 1 if corresponding bit marked as "x" in the ArmARM.
2914 #define INSN(NAME, U, size, tmask, opcode) \
2915 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \
2916 starti; \
2917 assert((ASSERTION), MSG); \
2918 f(0, 31), f((int)T & 1, 30), f(U, 29), f(0b01110, 28, 24); \
2919 f(size | ((int)(T >> 1) & tmask), 23, 22), f(0b10000, 21, 17); \
2920 f(opcode, 16, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0); \
2921 }
2922
2923 #define MSG "invalid arrangement"
2924
2925 #define ASSERTION (T == T2S || T == T4S || T == T2D)
2926 INSN(fsqrt, 1, 0b10, 0b01, 0b11111);
2927 INSN(fabs, 0, 0b10, 0b01, 0b01111);
2928 INSN(fneg, 1, 0b10, 0b01, 0b01111);
2929 INSN(frintn, 0, 0b00, 0b01, 0b11000);
2930 INSN(frintm, 0, 0b00, 0b01, 0b11001);
2931 INSN(frintp, 0, 0b10, 0b01, 0b11000);
2932 #undef ASSERTION
2933
2934 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
2935 INSN(rev64, 0, 0b00, 0b11, 0b00000);
2936 #undef ASSERTION
2937
2938 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H)
2939 INSN(rev32, 1, 0b00, 0b11, 0b00000);
2940 #undef ASSERTION
2941
2942 #define ASSERTION (T == T8B || T == T16B)
2943 INSN(rev16, 0, 0b00, 0b11, 0b00001);
2944 INSN(rbit, 1, 0b01, 0b00, 0b00101);
2945 #undef ASSERTION
2946
2947 #undef MSG
2948
2949 #undef INSN
2950
ext(FloatRegister Vd,SIMD_Arrangement T,FloatRegister Vn,FloatRegister Vm,int index)2951 void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
2952 {
2953 starti;
2954 assert(T == T8B || T == T16B, "invalid arrangement");
2955 assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value");
2956 f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21);
2957 rf(Vm, 16), f(0, 15), f(index, 14, 11);
2958 f(0, 10), rf(Vn, 5), rf(Vd, 0);
2959 }
2960
2961 // SVE arithmetics - unpredicated
2962 #define INSN(NAME, opcode) \
2963 void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
2964 starti; \
2965 assert(T != Q, "invalid register variant"); \
2966 f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21), \
2967 rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
2968 }
2969 INSN(sve_add, 0b000);
2970 INSN(sve_sub, 0b001);
2971 #undef INSN
2972
2973 // SVE floating-point arithmetic - unpredicated
2974 #define INSN(NAME, opcode) \
2975 void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
2976 starti; \
2977 assert(T == S || T == D, "invalid register variant"); \
2978 f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21), \
2979 rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
2980 }
2981
2982 INSN(sve_fadd, 0b000);
2983 INSN(sve_fmul, 0b010);
2984 INSN(sve_fsub, 0b001);
2985 #undef INSN
2986
2987 private:
sve_predicate_reg_insn(unsigned op24,unsigned op13,FloatRegister Zd_or_Vd,SIMD_RegVariant T,PRegister Pg,FloatRegister Zn_or_Vn)2988 void sve_predicate_reg_insn(unsigned op24, unsigned op13,
2989 FloatRegister Zd_or_Vd, SIMD_RegVariant T,
2990 PRegister Pg, FloatRegister Zn_or_Vn) {
2991 starti;
2992 f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13);
2993 pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0);
2994 }
2995
2996 public:
2997
2998 // SVE integer arithmetics - predicate
2999 #define INSN(NAME, op1, op2) \
3000 void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) { \
3001 assert(T != Q, "invalid register variant"); \
3002 sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn); \
3003 }
3004
3005 INSN(sve_abs, 0b00000100, 0b010110101); // vector abs, unary
3006 INSN(sve_add, 0b00000100, 0b000000000); // vector add
3007 INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
3008 INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right
3009 INSN(sve_cnt, 0b00000100, 0b011010101) // count non-zero bits
3010 INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element
3011 INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar
3012 INSN(sve_lsl, 0b00000100, 0b010011100); // vector logical shift left
3013 INSN(sve_lsr, 0b00000100, 0b010001100); // vector logical shift right
3014 INSN(sve_mul, 0b00000100, 0b010000000); // vector mul
3015 INSN(sve_neg, 0b00000100, 0b010111101); // vector neg, unary
3016 INSN(sve_not, 0b00000100, 0b011110101); // bitwise invert vector, unary
3017 INSN(sve_orv, 0b00000100, 0b011000001); // bitwise or reduction to scalar
3018 INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors
3019 INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
3020 INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors
3021 INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
3022 INSN(sve_sub, 0b00000100, 0b000001000); // vector sub
3023 INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
3024 #undef INSN
3025
3026 // SVE floating-point arithmetics - predicate
3027 #define INSN(NAME, op1, op2) \
3028 void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \
3029 assert(T == S || T == D, "invalid register variant"); \
3030 sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm); \
3031 }
3032
3033 INSN(sve_fabs, 0b00000100, 0b011100101);
3034 INSN(sve_fadd, 0b01100101, 0b000000100);
3035 INSN(sve_fadda, 0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd
3036 INSN(sve_fdiv, 0b01100101, 0b001101100);
3037 INSN(sve_fmax, 0b01100101, 0b000110100); // floating-point maximum
3038 INSN(sve_fmaxv, 0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar
3039 INSN(sve_fmin, 0b01100101, 0b000111100); // floating-point minimum
3040 INSN(sve_fminv, 0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar
3041 INSN(sve_fmul, 0b01100101, 0b000010100);
3042 INSN(sve_fneg, 0b00000100, 0b011101101);
3043 INSN(sve_frintm, 0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity
3044 INSN(sve_frintn, 0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even
3045 INSN(sve_frintp, 0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity
3046 INSN(sve_fsqrt, 0b01100101, 0b001101101);
3047 INSN(sve_fsub, 0b01100101, 0b000001100);
3048 #undef INSN
3049
3050 // SVE multiple-add/sub - predicated
3051 #define INSN(NAME, op0, op1, op2) \
3052 void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
3053 starti; \
3054 assert(T != Q, "invalid size"); \
3055 f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16); \
3056 f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0); \
3057 }
3058
3059 INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add: Zda = Zda + Zn * Zm
3060 INSN(sve_fmls, 0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm
3061 INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
3062 INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
3063 INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm
3064 INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm
3065 #undef INSN
3066
3067 // SVE bitwise logical - unpredicated
3068 #define INSN(NAME, opc) \
3069 void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) { \
3070 starti; \
3071 f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21), \
3072 rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0); \
3073 }
3074 INSN(sve_and, 0b00);
3075 INSN(sve_eor, 0b10);
3076 INSN(sve_orr, 0b01);
3077 INSN(sve_bic, 0b11);
3078 #undef INSN
3079
3080 // SVE shift immediate - unpredicated
3081 #define INSN(NAME, opc, isSHR) \
3082 void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \
3083 starti; \
3084 /* The encodings for the tszh:tszl:imm3 fields (bits 23:22 20:19 18:16) \
3085 * for shift right is calculated as: \
3086 * 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) \
3087 * 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) \
3088 * 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) \
3089 * 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) \
3090 * for shift left is calculated as: \
3091 * 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 \
3092 * 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 \
3093 * 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 \
3094 * 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 \
3095 */ \
3096 assert(T != Q, "Invalid register variant"); \
3097 if (isSHR) { \
3098 assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); \
3099 } else { \
3100 assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); \
3101 } \
3102 int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); \
3103 int encodedShift = isSHR ? cVal - shift : cVal + shift; \
3104 int tszh = encodedShift >> 5; \
3105 int tszl_imm = encodedShift & 0x1f; \
3106 f(0b00000100, 31, 24); \
3107 f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16); \
3108 f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0); \
3109 }
3110
3111 INSN(sve_asr, 0b100, /* isSHR = */ true);
3112 INSN(sve_lsl, 0b111, /* isSHR = */ false);
3113 INSN(sve_lsr, 0b101, /* isSHR = */ true);
3114 #undef INSN
3115
3116 private:
3117
3118 // Scalar base + immediate index
sve_ld_st1(FloatRegister Zt,Register Xn,int imm,PRegister Pg,SIMD_RegVariant T,int op1,int type,int op2)3119 void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg,
3120 SIMD_RegVariant T, int op1, int type, int op2) {
3121 starti;
3122 assert_cond(T >= type);
3123 f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
3124 f(0, 20), sf(imm, 19, 16), f(op2, 15, 13);
3125 pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
3126 }
3127
3128 // Scalar base + scalar index
sve_ld_st1(FloatRegister Zt,Register Xn,Register Xm,PRegister Pg,SIMD_RegVariant T,int op1,int type,int op2)3129 void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg,
3130 SIMD_RegVariant T, int op1, int type, int op2) {
3131 starti;
3132 assert_cond(T >= type);
3133 f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
3134 rf(Xm, 16), f(op2, 15, 13);
3135 pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
3136 }
3137
sve_ld_st1(FloatRegister Zt,PRegister Pg,SIMD_RegVariant T,const Address & a,int op1,int type,int imm_op2,int scalar_op2)3138 void sve_ld_st1(FloatRegister Zt, PRegister Pg,
3139 SIMD_RegVariant T, const Address &a,
3140 int op1, int type, int imm_op2, int scalar_op2) {
3141 switch (a.getMode()) {
3142 case Address::base_plus_offset:
3143 sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2);
3144 break;
3145 case Address::base_plus_offset_reg:
3146 sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2);
3147 break;
3148 default:
3149 ShouldNotReachHere();
3150 }
3151 }
3152
3153 public:
3154
3155 // SVE load/store - predicated
3156 #define INSN(NAME, op1, type, imm_op2, scalar_op2) \
3157 void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) { \
3158 assert(T != Q, "invalid register variant"); \
3159 sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2); \
3160 }
3161
3162 INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010);
3163 INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010);
3164 INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010);
3165 INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010);
3166 INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010);
3167 INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010);
3168 INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010);
3169 INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010);
3170 #undef INSN
3171
3172 // SVE load/store - unpredicated
3173 #define INSN(NAME, op1) \
3174 void NAME(FloatRegister Zt, const Address &a) { \
3175 starti; \
3176 assert(a.index() == noreg, "invalid address variant"); \
3177 f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \
3178 f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \
3179 }
3180
3181 INSN(sve_ldr, 0b100); // LDR (vector)
3182 INSN(sve_str, 0b111); // STR (vector)
3183 #undef INSN
3184
3185 #define INSN(NAME, op) \
3186 void NAME(Register Xd, Register Xn, int imm6) { \
3187 starti; \
3188 f(0b000001000, 31, 23), f(op, 22, 21); \
3189 srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \
3190 }
3191
3192 INSN(sve_addvl, 0b01);
3193 INSN(sve_addpl, 0b11);
3194 #undef INSN
3195
3196 // SVE inc/dec register by element count
3197 #define INSN(NAME, op) \
3198 void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \
3199 starti; \
3200 assert(T != Q, "invalid size"); \
3201 f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20); \
3202 f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0); \
3203 }
3204
3205 INSN(sve_inc, 0);
3206 INSN(sve_dec, 1);
3207 #undef INSN
3208
3209 // SVE predicate count
sve_cntp(Register Xd,SIMD_RegVariant T,PRegister Pg,PRegister Pn)3210 void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) {
3211 starti;
3212 assert(T != Q, "invalid size");
3213 f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14);
3214 prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0);
3215 }
3216
3217 // SVE dup scalar
sve_dup(FloatRegister Zd,SIMD_RegVariant T,Register Rn)3218 void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) {
3219 starti;
3220 assert(T != Q, "invalid size");
3221 f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10);
3222 srf(Rn, 5), rf(Zd, 0);
3223 }
3224
3225 // SVE dup imm
sve_dup(FloatRegister Zd,SIMD_RegVariant T,int imm8)3226 void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) {
3227 starti;
3228 assert(T != Q, "invalid size");
3229 int sh = 0;
3230 if (imm8 <= 127 && imm8 >= -128) {
3231 sh = 0;
3232 } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
3233 sh = 1;
3234 imm8 = (imm8 >> 8);
3235 } else {
3236 guarantee(false, "invalid immediate");
3237 }
3238 f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14);
3239 f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
3240 }
3241
sve_ptrue(PRegister pd,SIMD_RegVariant esize,int pattern=0b11111)3242 void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) {
3243 starti;
3244 f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10);
3245 f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
3246 }
3247
Assembler(CodeBuffer * code)3248 Assembler(CodeBuffer* code) : AbstractAssembler(code) {
3249 }
3250
3251 // Stack overflow checking
3252 virtual void bang_stack_with_offset(int offset);
3253
3254 static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm);
3255 static bool operand_valid_for_add_sub_immediate(int64_t imm);
3256 static bool operand_valid_for_float_immediate(double imm);
3257
3258 void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
3259 void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
3260 };
3261
operator |(Assembler::Membar_mask_bits a,Assembler::Membar_mask_bits b)3262 inline Assembler::Membar_mask_bits operator|(Assembler::Membar_mask_bits a,
3263 Assembler::Membar_mask_bits b) {
3264 return Assembler::Membar_mask_bits(unsigned(a)|unsigned(b));
3265 }
3266
~Instruction_aarch64()3267 Instruction_aarch64::~Instruction_aarch64() {
3268 assem->emit();
3269 }
3270
3271 #undef starti
3272
3273 // Invert a condition
operator ~(const Assembler::Condition cond)3274 inline const Assembler::Condition operator~(const Assembler::Condition cond) {
3275 return Assembler::Condition(int(cond) ^ 1);
3276 }
3277
3278 class BiasedLockingCounters;
3279
3280 extern "C" void das(uint64_t start, int len);
3281
3282 #endif // CPU_AARCH64_ASSEMBLER_AARCH64_HPP
3283