1 // Copyright 2013 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ 6 #define V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ 7 8 #include <deque> 9 #include <list> 10 #include <map> 11 #include <memory> 12 #include <vector> 13 14 #include "src/base/optional.h" 15 #include "src/codegen/arm64/constants-arm64.h" 16 #include "src/codegen/arm64/instructions-arm64.h" 17 #include "src/codegen/arm64/register-arm64.h" 18 #include "src/codegen/assembler.h" 19 #include "src/codegen/constant-pool.h" 20 #include "src/common/globals.h" 21 #include "src/utils/utils.h" 22 23 // Windows arm64 SDK defines mvn to NEON intrinsic neon_not which will not 24 // be used here. 25 #if defined(V8_OS_WIN) && defined(mvn) 26 #undef mvn 27 #endif 28 29 #if defined(V8_OS_WIN) 30 #include "src/base/platform/wrappers.h" 31 #include "src/diagnostics/unwinding-info-win64.h" 32 #endif // V8_OS_WIN 33 34 namespace v8 { 35 namespace internal { 36 37 class SafepointTableBuilder; 38 39 // ----------------------------------------------------------------------------- 40 // Immediates. 41 class Immediate { 42 public: 43 template <typename T> 44 inline explicit Immediate( 45 Handle<T> handle, RelocInfo::Mode mode = RelocInfo::FULL_EMBEDDED_OBJECT); 46 47 // This is allowed to be an implicit constructor because Immediate is 48 // a wrapper class that doesn't normally perform any type conversion. 49 template <typename T> 50 inline Immediate(T value); // NOLINT(runtime/explicit) 51 52 template <typename T> 53 inline Immediate(T value, RelocInfo::Mode rmode); 54 value()55 int64_t value() const { return value_; } rmode()56 RelocInfo::Mode rmode() const { return rmode_; } 57 58 private: 59 int64_t value_; 60 RelocInfo::Mode rmode_; 61 }; 62 63 // ----------------------------------------------------------------------------- 64 // Operands. 65 constexpr int kSmiShift = kSmiTagSize + kSmiShiftSize; 66 constexpr uint64_t kSmiShiftMask = (1ULL << kSmiShift) - 1; 67 68 // Represents an operand in a machine instruction. 69 class Operand { 70 // TODO(all): If necessary, study more in details which methods 71 // TODO(all): should be inlined or not. 72 public: 73 // rm, {<shift> {#<shift_amount>}} 74 // where <shift> is one of {LSL, LSR, ASR, ROR}. 75 // <shift_amount> is uint6_t. 76 // This is allowed to be an implicit constructor because Operand is 77 // a wrapper class that doesn't normally perform any type conversion. 78 inline Operand(Register reg, Shift shift = LSL, 79 unsigned shift_amount = 0); // NOLINT(runtime/explicit) 80 81 // rm, <extend> {#<shift_amount>} 82 // where <extend> is one of {UXTB, UXTH, UXTW, UXTX, SXTB, SXTH, SXTW, SXTX}. 83 // <shift_amount> is uint2_t. 84 inline Operand(Register reg, Extend extend, unsigned shift_amount = 0); 85 86 static Operand EmbeddedNumber(double number); // Smi or HeapNumber. 87 static Operand EmbeddedStringConstant(const StringConstantBase* str); 88 89 inline bool IsHeapObjectRequest() const; 90 inline HeapObjectRequest heap_object_request() const; 91 inline Immediate immediate_for_heap_object_request() const; 92 93 // Implicit constructor for all int types, ExternalReference, and Smi. 94 template <typename T> 95 inline Operand(T t); // NOLINT(runtime/explicit) 96 97 // Implicit constructor for int types. 98 template <typename T> 99 inline Operand(T t, RelocInfo::Mode rmode); 100 101 inline bool IsImmediate() const; 102 inline bool IsShiftedRegister() const; 103 inline bool IsExtendedRegister() const; 104 inline bool IsZero() const; 105 106 // This returns an LSL shift (<= 4) operand as an equivalent extend operand, 107 // which helps in the encoding of instructions that use the stack pointer. 108 inline Operand ToExtendedRegister() const; 109 110 // Returns new Operand adapted for using with W registers. 111 inline Operand ToW() const; 112 113 inline Immediate immediate() const; 114 inline int64_t ImmediateValue() const; 115 inline RelocInfo::Mode ImmediateRMode() const; 116 inline Register reg() const; 117 inline Shift shift() const; 118 inline Extend extend() const; 119 inline unsigned shift_amount() const; 120 121 // Relocation information. 122 bool NeedsRelocation(const Assembler* assembler) const; 123 124 private: 125 base::Optional<HeapObjectRequest> heap_object_request_; 126 Immediate immediate_; 127 Register reg_; 128 Shift shift_; 129 Extend extend_; 130 unsigned shift_amount_; 131 }; 132 133 // MemOperand represents a memory operand in a load or store instruction. 134 class MemOperand { 135 public: 136 inline MemOperand(); 137 inline explicit MemOperand(Register base, int64_t offset = 0, 138 AddrMode addrmode = Offset); 139 inline explicit MemOperand(Register base, Register regoffset, 140 Shift shift = LSL, unsigned shift_amount = 0); 141 inline explicit MemOperand(Register base, Register regoffset, Extend extend, 142 unsigned shift_amount = 0); 143 inline explicit MemOperand(Register base, const Operand& offset, 144 AddrMode addrmode = Offset); 145 base()146 const Register& base() const { return base_; } regoffset()147 const Register& regoffset() const { return regoffset_; } offset()148 int64_t offset() const { return offset_; } addrmode()149 AddrMode addrmode() const { return addrmode_; } shift()150 Shift shift() const { return shift_; } extend()151 Extend extend() const { return extend_; } shift_amount()152 unsigned shift_amount() const { return shift_amount_; } 153 inline bool IsImmediateOffset() const; 154 inline bool IsRegisterOffset() const; 155 inline bool IsPreIndex() const; 156 inline bool IsPostIndex() const; 157 158 private: 159 Register base_; 160 Register regoffset_; 161 int64_t offset_; 162 AddrMode addrmode_; 163 Shift shift_; 164 Extend extend_; 165 unsigned shift_amount_; 166 }; 167 168 // ----------------------------------------------------------------------------- 169 // Assembler. 170 171 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { 172 public: 173 // Create an assembler. Instructions and relocation information are emitted 174 // into a buffer, with the instructions starting from the beginning and the 175 // relocation information starting from the end of the buffer. See CodeDesc 176 // for a detailed comment on the layout (globals.h). 177 // 178 // If the provided buffer is nullptr, the assembler allocates and grows its 179 // own buffer. Otherwise it takes ownership of the provided buffer. 180 explicit Assembler(const AssemblerOptions&, 181 std::unique_ptr<AssemblerBuffer> = {}); 182 183 ~Assembler() override; 184 185 void AbortedCodeGeneration() override; 186 187 // System functions --------------------------------------------------------- 188 // Start generating code from the beginning of the buffer, discarding any code 189 // and data that has already been emitted into the buffer. 190 // 191 // In order to avoid any accidental transfer of state, Reset DCHECKs that the 192 // constant pool is not blocked. 193 void Reset(); 194 195 // GetCode emits any pending (non-emitted) code and fills the descriptor desc. 196 static constexpr int kNoHandlerTable = 0; 197 static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr; 198 void GetCode(Isolate* isolate, CodeDesc* desc, 199 SafepointTableBuilder* safepoint_table_builder, 200 int handler_table_offset); 201 202 // Convenience wrapper for code without safepoint or handler tables. GetCode(Isolate * isolate,CodeDesc * desc)203 void GetCode(Isolate* isolate, CodeDesc* desc) { 204 GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable); 205 } 206 207 // This function is called when on-heap-compilation invariants are 208 // invalidated. For instance, when the assembler buffer grows or a GC happens 209 // between Code object allocation and Code object finalization. 210 void FixOnHeapReferences(bool update_embedded_objects = true); 211 212 // This function is called when we fallback from on-heap to off-heap 213 // compilation and patch on-heap references to handles. 214 void FixOnHeapReferencesToHandles(); 215 216 // Insert the smallest number of nop instructions 217 // possible to align the pc offset to a multiple 218 // of m. m must be a power of 2 (>= 4). 219 void Align(int m); 220 // Insert the smallest number of zero bytes possible to align the pc offset 221 // to a mulitple of m. m must be a power of 2 (>= 2). 222 void DataAlign(int m); 223 // Aligns code to something that's optimal for a jump target for the platform. 224 void CodeTargetAlign(); LoopHeaderAlign()225 void LoopHeaderAlign() { CodeTargetAlign(); } 226 227 inline void Unreachable(); 228 229 // Label -------------------------------------------------------------------- 230 // Bind a label to the current pc. Note that labels can only be bound once, 231 // and if labels are linked to other instructions, they _must_ be bound 232 // before they go out of scope. 233 void bind(Label* label); 234 235 // RelocInfo and pools ------------------------------------------------------ 236 237 // Record relocation information for current pc_. 238 enum ConstantPoolMode { NEEDS_POOL_ENTRY, NO_POOL_ENTRY }; 239 void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0, 240 ConstantPoolMode constant_pool_mode = NEEDS_POOL_ENTRY); 241 242 // Generate a B immediate instruction with the corresponding relocation info. 243 // 'offset' is the immediate to encode in the B instruction (so it is the 244 // difference between the target and the PC of the instruction, divided by 245 // the instruction size). 246 void near_jump(int offset, RelocInfo::Mode rmode); 247 // Generate a BL immediate instruction with the corresponding relocation info. 248 // As for near_jump, 'offset' is the immediate to encode in the BL 249 // instruction. 250 void near_call(int offset, RelocInfo::Mode rmode); 251 // Generate a BL immediate instruction with the corresponding relocation info 252 // for the input HeapObjectRequest. 253 void near_call(HeapObjectRequest request); 254 255 // Return the address in the constant pool of the code target address used by 256 // the branch/call instruction at pc. 257 inline static Address target_pointer_address_at(Address pc); 258 259 // Read/Modify the code target address in the branch/call instruction at pc. 260 // The isolate argument is unused (and may be nullptr) when skipping flushing. 261 inline static Address target_address_at(Address pc, Address constant_pool); 262 263 // Read/Modify the code target address in the branch/call instruction at pc. 264 inline static Tagged_t target_compressed_address_at(Address pc, 265 Address constant_pool); 266 inline static void set_target_address_at( 267 Address pc, Address constant_pool, Address target, 268 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 269 270 inline static void set_target_compressed_address_at( 271 Address pc, Address constant_pool, Tagged_t target, 272 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 273 274 // Returns the handle for the code object called at 'pc'. 275 // This might need to be temporarily encoded as an offset into code_targets_. 276 inline Handle<Code> code_target_object_handle_at(Address pc); 277 inline EmbeddedObjectIndex embedded_object_index_referenced_from(Address pc); 278 inline void set_embedded_object_index_referenced_from( 279 Address p, EmbeddedObjectIndex index); 280 // Returns the handle for the heap object referenced at 'pc'. 281 inline Handle<HeapObject> target_object_handle_at(Address pc); 282 283 // Returns the target address for a runtime function for the call encoded 284 // at 'pc'. 285 // Runtime entries can be temporarily encoded as the offset between the 286 // runtime function entrypoint and the code range start (stored in the 287 // code_range_start field), in order to be encodable as we generate the code, 288 // before it is moved into the code space. 289 inline Address runtime_entry_at(Address pc); 290 291 // This sets the branch destination. 'location' here can be either the pc of 292 // an immediate branch or the address of an entry in the constant pool. 293 // This is for calls and branches within generated code. 294 inline static void deserialization_set_special_target_at(Address location, 295 Code code, 296 Address target); 297 298 // Get the size of the special target encoded at 'location'. 299 inline static int deserialization_special_target_size(Address location); 300 301 // This sets the internal reference at the pc. 302 inline static void deserialization_set_target_internal_reference_at( 303 Address pc, Address target, 304 RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE); 305 306 // This value is used in the serialization process and must be zero for 307 // ARM64, as the code target is split across multiple instructions and does 308 // not exist separately in the code, so the serializer should not step 309 // forwards in memory after a target is resolved and written. 310 static constexpr int kSpecialTargetSize = 0; 311 312 // Size of the generated code in bytes SizeOfGeneratedCode()313 uint64_t SizeOfGeneratedCode() const { 314 DCHECK((pc_ >= buffer_start_) && (pc_ < (buffer_start_ + buffer_->size()))); 315 return pc_ - buffer_start_; 316 } 317 318 // Return the code size generated from label to the current position. SizeOfCodeGeneratedSince(const Label * label)319 uint64_t SizeOfCodeGeneratedSince(const Label* label) { 320 DCHECK(label->is_bound()); 321 DCHECK_GE(pc_offset(), label->pos()); 322 DCHECK_LT(pc_offset(), buffer_->size()); 323 return pc_offset() - label->pos(); 324 } 325 326 // Return the number of instructions generated from label to the 327 // current position. InstructionsGeneratedSince(const Label * label)328 uint64_t InstructionsGeneratedSince(const Label* label) { 329 return SizeOfCodeGeneratedSince(label) / kInstrSize; 330 } 331 332 static bool IsConstantPoolAt(Instruction* instr); 333 static int ConstantPoolSizeAt(Instruction* instr); 334 // See Assembler::CheckConstPool for more info. 335 void EmitPoolGuard(); 336 337 // Prevent veneer pool emission until EndBlockVeneerPool is called. 338 // Call to this function can be nested but must be followed by an equal 339 // number of calls to EndBlockConstpool. 340 void StartBlockVeneerPool(); 341 342 // Resume constant pool emission. Need to be called as many time as 343 // StartBlockVeneerPool to have an effect. 344 void EndBlockVeneerPool(); 345 is_veneer_pool_blocked()346 bool is_veneer_pool_blocked() const { 347 return veneer_pool_blocked_nesting_ > 0; 348 } 349 350 // Record a deoptimization reason that can be used by a log or cpu profiler. 351 // Use --trace-deopt to enable. 352 void RecordDeoptReason(DeoptimizeReason reason, uint32_t node_id, 353 SourcePosition position, int id); 354 355 int buffer_space() const; 356 357 // Record the emission of a constant pool. 358 // 359 // The emission of constant and veneer pools depends on the size of the code 360 // generated and the number of RelocInfo recorded. 361 // The Debug mechanism needs to map code offsets between two versions of a 362 // function, compiled with and without debugger support (see for example 363 // Debug::PrepareForBreakPoints()). 364 // Compiling functions with debugger support generates additional code 365 // (DebugCodegen::GenerateSlot()). This may affect the emission of the pools 366 // and cause the version of the code with debugger support to have pools 367 // generated in different places. 368 // Recording the position and size of emitted pools allows to correctly 369 // compute the offset mappings between the different versions of a function in 370 // all situations. 371 // 372 // The parameter indicates the size of the pool (in bytes), including 373 // the marker and branch over the data. 374 void RecordConstPool(int size); 375 376 // Instruction set functions ------------------------------------------------ 377 378 // Branch / Jump instructions. 379 // For branches offsets are scaled, i.e. in instructions not in bytes. 380 // Branch to register. 381 void br(const Register& xn); 382 383 // Branch-link to register. 384 void blr(const Register& xn); 385 386 // Branch to register with return hint. 387 void ret(const Register& xn = lr); 388 389 // Unconditional branch to label. 390 void b(Label* label); 391 392 // Conditional branch to label. 393 void b(Label* label, Condition cond); 394 395 // Unconditional branch to PC offset. 396 void b(int imm26); 397 398 // Conditional branch to PC offset. 399 void b(int imm19, Condition cond); 400 401 // Branch-link to label / pc offset. 402 void bl(Label* label); 403 void bl(int imm26); 404 405 // Compare and branch to label / pc offset if zero. 406 void cbz(const Register& rt, Label* label); 407 void cbz(const Register& rt, int imm19); 408 409 // Compare and branch to label / pc offset if not zero. 410 void cbnz(const Register& rt, Label* label); 411 void cbnz(const Register& rt, int imm19); 412 413 // Test bit and branch to label / pc offset if zero. 414 void tbz(const Register& rt, unsigned bit_pos, Label* label); 415 void tbz(const Register& rt, unsigned bit_pos, int imm14); 416 417 // Test bit and branch to label / pc offset if not zero. 418 void tbnz(const Register& rt, unsigned bit_pos, Label* label); 419 void tbnz(const Register& rt, unsigned bit_pos, int imm14); 420 421 // Address calculation instructions. 422 // Calculate a PC-relative address. Unlike for branches the offset in adr is 423 // unscaled (i.e. the result can be unaligned). 424 void adr(const Register& rd, Label* label); 425 void adr(const Register& rd, int imm21); 426 427 // Data Processing instructions. 428 // Add. 429 void add(const Register& rd, const Register& rn, const Operand& operand); 430 431 // Add and update status flags. 432 void adds(const Register& rd, const Register& rn, const Operand& operand); 433 434 // Compare negative. 435 void cmn(const Register& rn, const Operand& operand); 436 437 // Subtract. 438 void sub(const Register& rd, const Register& rn, const Operand& operand); 439 440 // Subtract and update status flags. 441 void subs(const Register& rd, const Register& rn, const Operand& operand); 442 443 // Compare. 444 void cmp(const Register& rn, const Operand& operand); 445 446 // Negate. 447 void neg(const Register& rd, const Operand& operand); 448 449 // Negate and update status flags. 450 void negs(const Register& rd, const Operand& operand); 451 452 // Add with carry bit. 453 void adc(const Register& rd, const Register& rn, const Operand& operand); 454 455 // Add with carry bit and update status flags. 456 void adcs(const Register& rd, const Register& rn, const Operand& operand); 457 458 // Subtract with carry bit. 459 void sbc(const Register& rd, const Register& rn, const Operand& operand); 460 461 // Subtract with carry bit and update status flags. 462 void sbcs(const Register& rd, const Register& rn, const Operand& operand); 463 464 // Negate with carry bit. 465 void ngc(const Register& rd, const Operand& operand); 466 467 // Negate with carry bit and update status flags. 468 void ngcs(const Register& rd, const Operand& operand); 469 470 // Logical instructions. 471 // Bitwise and (A & B). 472 void and_(const Register& rd, const Register& rn, const Operand& operand); 473 474 // Bitwise and (A & B) and update status flags. 475 void ands(const Register& rd, const Register& rn, const Operand& operand); 476 477 // Bit test, and set flags. 478 void tst(const Register& rn, const Operand& operand); 479 480 // Bit clear (A & ~B). 481 void bic(const Register& rd, const Register& rn, const Operand& operand); 482 483 // Bit clear (A & ~B) and update status flags. 484 void bics(const Register& rd, const Register& rn, const Operand& operand); 485 486 // Bitwise and. 487 void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm); 488 489 // Bit clear immediate. 490 void bic(const VRegister& vd, const int imm8, const int left_shift = 0); 491 492 // Bit clear. 493 void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm); 494 495 // Bitwise insert if false. 496 void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm); 497 498 // Bitwise insert if true. 499 void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm); 500 501 // Bitwise select. 502 void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 503 504 // Polynomial multiply. 505 void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 506 507 // Vector move immediate. 508 void movi(const VRegister& vd, const uint64_t imm, Shift shift = LSL, 509 const int shift_amount = 0); 510 511 // Bitwise not. 512 void mvn(const VRegister& vd, const VRegister& vn); 513 514 // Vector move inverted immediate. 515 void mvni(const VRegister& vd, const int imm8, Shift shift = LSL, 516 const int shift_amount = 0); 517 518 // Signed saturating accumulate of unsigned value. 519 void suqadd(const VRegister& vd, const VRegister& vn); 520 521 // Unsigned saturating accumulate of signed value. 522 void usqadd(const VRegister& vd, const VRegister& vn); 523 524 // Absolute value. 525 void abs(const VRegister& vd, const VRegister& vn); 526 527 // Signed saturating absolute value. 528 void sqabs(const VRegister& vd, const VRegister& vn); 529 530 // Negate. 531 void neg(const VRegister& vd, const VRegister& vn); 532 533 // Signed saturating negate. 534 void sqneg(const VRegister& vd, const VRegister& vn); 535 536 // Bitwise not. 537 void not_(const VRegister& vd, const VRegister& vn); 538 539 // Extract narrow. 540 void xtn(const VRegister& vd, const VRegister& vn); 541 542 // Extract narrow (second part). 543 void xtn2(const VRegister& vd, const VRegister& vn); 544 545 // Signed saturating extract narrow. 546 void sqxtn(const VRegister& vd, const VRegister& vn); 547 548 // Signed saturating extract narrow (second part). 549 void sqxtn2(const VRegister& vd, const VRegister& vn); 550 551 // Unsigned saturating extract narrow. 552 void uqxtn(const VRegister& vd, const VRegister& vn); 553 554 // Unsigned saturating extract narrow (second part). 555 void uqxtn2(const VRegister& vd, const VRegister& vn); 556 557 // Signed saturating extract unsigned narrow. 558 void sqxtun(const VRegister& vd, const VRegister& vn); 559 560 // Signed saturating extract unsigned narrow (second part). 561 void sqxtun2(const VRegister& vd, const VRegister& vn); 562 563 // Move register to register. 564 void mov(const VRegister& vd, const VRegister& vn); 565 566 // Bitwise not or. 567 void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 568 569 // Bitwise exclusive or. 570 void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm); 571 572 // Bitwise or (A | B). 573 void orr(const Register& rd, const Register& rn, const Operand& operand); 574 575 // Bitwise or. 576 void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm); 577 578 // Bitwise or immediate. 579 void orr(const VRegister& vd, const int imm8, const int left_shift = 0); 580 581 // Bitwise nor (A | ~B). 582 void orn(const Register& rd, const Register& rn, const Operand& operand); 583 584 // Bitwise eor/xor (A ^ B). 585 void eor(const Register& rd, const Register& rn, const Operand& operand); 586 587 // Bitwise enor/xnor (A ^ ~B). 588 void eon(const Register& rd, const Register& rn, const Operand& operand); 589 590 // Logical shift left variable. 591 void lslv(const Register& rd, const Register& rn, const Register& rm); 592 593 // Logical shift right variable. 594 void lsrv(const Register& rd, const Register& rn, const Register& rm); 595 596 // Arithmetic shift right variable. 597 void asrv(const Register& rd, const Register& rn, const Register& rm); 598 599 // Rotate right variable. 600 void rorv(const Register& rd, const Register& rn, const Register& rm); 601 602 // Bitfield instructions. 603 // Bitfield move. 604 void bfm(const Register& rd, const Register& rn, int immr, int imms); 605 606 // Signed bitfield move. 607 void sbfm(const Register& rd, const Register& rn, int immr, int imms); 608 609 // Unsigned bitfield move. 610 void ubfm(const Register& rd, const Register& rn, int immr, int imms); 611 612 // Bfm aliases. 613 // Bitfield insert. bfi(const Register & rd,const Register & rn,int lsb,int width)614 void bfi(const Register& rd, const Register& rn, int lsb, int width) { 615 DCHECK_GE(width, 1); 616 DCHECK(lsb + width <= rn.SizeInBits()); 617 bfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); 618 } 619 620 // Bitfield extract and insert low. bfxil(const Register & rd,const Register & rn,int lsb,int width)621 void bfxil(const Register& rd, const Register& rn, int lsb, int width) { 622 DCHECK_GE(width, 1); 623 DCHECK(lsb + width <= rn.SizeInBits()); 624 bfm(rd, rn, lsb, lsb + width - 1); 625 } 626 627 // Sbfm aliases. 628 // Arithmetic shift right. asr(const Register & rd,const Register & rn,int shift)629 void asr(const Register& rd, const Register& rn, int shift) { 630 DCHECK(shift < rd.SizeInBits()); 631 sbfm(rd, rn, shift, rd.SizeInBits() - 1); 632 } 633 634 // Signed bitfield insert in zero. sbfiz(const Register & rd,const Register & rn,int lsb,int width)635 void sbfiz(const Register& rd, const Register& rn, int lsb, int width) { 636 DCHECK_GE(width, 1); 637 DCHECK(lsb + width <= rn.SizeInBits()); 638 sbfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); 639 } 640 641 // Signed bitfield extract. sbfx(const Register & rd,const Register & rn,int lsb,int width)642 void sbfx(const Register& rd, const Register& rn, int lsb, int width) { 643 DCHECK_GE(width, 1); 644 DCHECK(lsb + width <= rn.SizeInBits()); 645 sbfm(rd, rn, lsb, lsb + width - 1); 646 } 647 648 // Signed extend byte. sxtb(const Register & rd,const Register & rn)649 void sxtb(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 7); } 650 651 // Signed extend halfword. sxth(const Register & rd,const Register & rn)652 void sxth(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 15); } 653 654 // Signed extend word. sxtw(const Register & rd,const Register & rn)655 void sxtw(const Register& rd, const Register& rn) { sbfm(rd, rn, 0, 31); } 656 657 // Ubfm aliases. 658 // Logical shift left. lsl(const Register & rd,const Register & rn,int shift)659 void lsl(const Register& rd, const Register& rn, int shift) { 660 int reg_size = rd.SizeInBits(); 661 DCHECK(shift < reg_size); 662 ubfm(rd, rn, (reg_size - shift) % reg_size, reg_size - shift - 1); 663 } 664 665 // Logical shift right. lsr(const Register & rd,const Register & rn,int shift)666 void lsr(const Register& rd, const Register& rn, int shift) { 667 DCHECK(shift < rd.SizeInBits()); 668 ubfm(rd, rn, shift, rd.SizeInBits() - 1); 669 } 670 671 // Unsigned bitfield insert in zero. ubfiz(const Register & rd,const Register & rn,int lsb,int width)672 void ubfiz(const Register& rd, const Register& rn, int lsb, int width) { 673 DCHECK_GE(width, 1); 674 DCHECK(lsb + width <= rn.SizeInBits()); 675 ubfm(rd, rn, (rd.SizeInBits() - lsb) & (rd.SizeInBits() - 1), width - 1); 676 } 677 678 // Unsigned bitfield extract. ubfx(const Register & rd,const Register & rn,int lsb,int width)679 void ubfx(const Register& rd, const Register& rn, int lsb, int width) { 680 DCHECK_GE(width, 1); 681 DCHECK(lsb + width <= rn.SizeInBits()); 682 ubfm(rd, rn, lsb, lsb + width - 1); 683 } 684 685 // Unsigned extend byte. uxtb(const Register & rd,const Register & rn)686 void uxtb(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 7); } 687 688 // Unsigned extend halfword. uxth(const Register & rd,const Register & rn)689 void uxth(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 15); } 690 691 // Unsigned extend word. uxtw(const Register & rd,const Register & rn)692 void uxtw(const Register& rd, const Register& rn) { ubfm(rd, rn, 0, 31); } 693 694 // Extract. 695 void extr(const Register& rd, const Register& rn, const Register& rm, 696 int lsb); 697 698 // Conditional select: rd = cond ? rn : rm. 699 void csel(const Register& rd, const Register& rn, const Register& rm, 700 Condition cond); 701 702 // Conditional select increment: rd = cond ? rn : rm + 1. 703 void csinc(const Register& rd, const Register& rn, const Register& rm, 704 Condition cond); 705 706 // Conditional select inversion: rd = cond ? rn : ~rm. 707 void csinv(const Register& rd, const Register& rn, const Register& rm, 708 Condition cond); 709 710 // Conditional select negation: rd = cond ? rn : -rm. 711 void csneg(const Register& rd, const Register& rn, const Register& rm, 712 Condition cond); 713 714 // Conditional set: rd = cond ? 1 : 0. 715 void cset(const Register& rd, Condition cond); 716 717 // Conditional set minus: rd = cond ? -1 : 0. 718 void csetm(const Register& rd, Condition cond); 719 720 // Conditional increment: rd = cond ? rn + 1 : rn. 721 void cinc(const Register& rd, const Register& rn, Condition cond); 722 723 // Conditional invert: rd = cond ? ~rn : rn. 724 void cinv(const Register& rd, const Register& rn, Condition cond); 725 726 // Conditional negate: rd = cond ? -rn : rn. 727 void cneg(const Register& rd, const Register& rn, Condition cond); 728 729 // Extr aliases. ror(const Register & rd,const Register & rs,unsigned shift)730 void ror(const Register& rd, const Register& rs, unsigned shift) { 731 extr(rd, rs, rs, shift); 732 } 733 734 // Conditional comparison. 735 // Conditional compare negative. 736 void ccmn(const Register& rn, const Operand& operand, StatusFlags nzcv, 737 Condition cond); 738 739 // Conditional compare. 740 void ccmp(const Register& rn, const Operand& operand, StatusFlags nzcv, 741 Condition cond); 742 743 // Multiplication. 744 // 32 x 32 -> 32-bit and 64 x 64 -> 64-bit multiply. 745 void mul(const Register& rd, const Register& rn, const Register& rm); 746 747 // 32 + 32 x 32 -> 32-bit and 64 + 64 x 64 -> 64-bit multiply accumulate. 748 void madd(const Register& rd, const Register& rn, const Register& rm, 749 const Register& ra); 750 751 // -(32 x 32) -> 32-bit and -(64 x 64) -> 64-bit multiply. 752 void mneg(const Register& rd, const Register& rn, const Register& rm); 753 754 // 32 - 32 x 32 -> 32-bit and 64 - 64 x 64 -> 64-bit multiply subtract. 755 void msub(const Register& rd, const Register& rn, const Register& rm, 756 const Register& ra); 757 758 // 32 x 32 -> 64-bit multiply. 759 void smull(const Register& rd, const Register& rn, const Register& rm); 760 761 // Xd = bits<127:64> of Xn * Xm. 762 void smulh(const Register& rd, const Register& rn, const Register& rm); 763 764 // Signed 32 x 32 -> 64-bit multiply and accumulate. 765 void smaddl(const Register& rd, const Register& rn, const Register& rm, 766 const Register& ra); 767 768 // Unsigned 32 x 32 -> 64-bit multiply and accumulate. 769 void umaddl(const Register& rd, const Register& rn, const Register& rm, 770 const Register& ra); 771 772 // Signed 32 x 32 -> 64-bit multiply and subtract. 773 void smsubl(const Register& rd, const Register& rn, const Register& rm, 774 const Register& ra); 775 776 // Unsigned 32 x 32 -> 64-bit multiply and subtract. 777 void umsubl(const Register& rd, const Register& rn, const Register& rm, 778 const Register& ra); 779 780 // Signed integer divide. 781 void sdiv(const Register& rd, const Register& rn, const Register& rm); 782 783 // Unsigned integer divide. 784 void udiv(const Register& rd, const Register& rn, const Register& rm); 785 786 // Bit count, bit reverse and endian reverse. 787 void rbit(const Register& rd, const Register& rn); 788 void rev16(const Register& rd, const Register& rn); 789 void rev32(const Register& rd, const Register& rn); 790 void rev(const Register& rd, const Register& rn); 791 void clz(const Register& rd, const Register& rn); 792 void cls(const Register& rd, const Register& rn); 793 794 // Pointer Authentication Code for Instruction address, using key B, with 795 // address in x17 and modifier in x16 [Armv8.3]. 796 void pacib1716(); 797 798 // Pointer Authentication Code for Instruction address, using key B, with 799 // address in LR and modifier in SP [Armv8.3]. 800 void pacibsp(); 801 802 // Authenticate Instruction address, using key B, with address in x17 and 803 // modifier in x16 [Armv8.3]. 804 void autib1716(); 805 806 // Authenticate Instruction address, using key B, with address in LR and 807 // modifier in SP [Armv8.3]. 808 void autibsp(); 809 810 // Memory instructions. 811 812 // Load integer or FP register. 813 void ldr(const CPURegister& rt, const MemOperand& src); 814 815 // Store integer or FP register. 816 void str(const CPURegister& rt, const MemOperand& dst); 817 818 // Load word with sign extension. 819 void ldrsw(const Register& rt, const MemOperand& src); 820 821 // Load byte. 822 void ldrb(const Register& rt, const MemOperand& src); 823 824 // Store byte. 825 void strb(const Register& rt, const MemOperand& dst); 826 827 // Load byte with sign extension. 828 void ldrsb(const Register& rt, const MemOperand& src); 829 830 // Load half-word. 831 void ldrh(const Register& rt, const MemOperand& src); 832 833 // Store half-word. 834 void strh(const Register& rt, const MemOperand& dst); 835 836 // Load half-word with sign extension. 837 void ldrsh(const Register& rt, const MemOperand& src); 838 839 // Load integer or FP register pair. 840 void ldp(const CPURegister& rt, const CPURegister& rt2, 841 const MemOperand& src); 842 843 // Store integer or FP register pair. 844 void stp(const CPURegister& rt, const CPURegister& rt2, 845 const MemOperand& dst); 846 847 // Load word pair with sign extension. 848 void ldpsw(const Register& rt, const Register& rt2, const MemOperand& src); 849 850 // Load literal to register from a pc relative address. 851 void ldr_pcrel(const CPURegister& rt, int imm19); 852 853 // Load literal to register. 854 void ldr(const CPURegister& rt, const Immediate& imm); 855 void ldr(const CPURegister& rt, const Operand& operand); 856 857 // Load-acquire word. 858 void ldar(const Register& rt, const Register& rn); 859 860 // Load-acquire exclusive word. 861 void ldaxr(const Register& rt, const Register& rn); 862 863 // Store-release word. 864 void stlr(const Register& rt, const Register& rn); 865 866 // Store-release exclusive word. 867 void stlxr(const Register& rs, const Register& rt, const Register& rn); 868 869 // Load-acquire byte. 870 void ldarb(const Register& rt, const Register& rn); 871 872 // Load-acquire exclusive byte. 873 void ldaxrb(const Register& rt, const Register& rn); 874 875 // Store-release byte. 876 void stlrb(const Register& rt, const Register& rn); 877 878 // Store-release exclusive byte. 879 void stlxrb(const Register& rs, const Register& rt, const Register& rn); 880 881 // Load-acquire half-word. 882 void ldarh(const Register& rt, const Register& rn); 883 884 // Load-acquire exclusive half-word. 885 void ldaxrh(const Register& rt, const Register& rn); 886 887 // Store-release half-word. 888 void stlrh(const Register& rt, const Register& rn); 889 890 // Store-release exclusive half-word. 891 void stlxrh(const Register& rs, const Register& rt, const Register& rn); 892 893 // Move instructions. The default shift of -1 indicates that the move 894 // instruction will calculate an appropriate 16-bit immediate and left shift 895 // that is equal to the 64-bit immediate argument. If an explicit left shift 896 // is specified (0, 16, 32 or 48), the immediate must be a 16-bit value. 897 // 898 // For movk, an explicit shift can be used to indicate which half word should 899 // be overwritten, eg. movk(x0, 0, 0) will overwrite the least-significant 900 // half word with zero, whereas movk(x0, 0, 48) will overwrite the 901 // most-significant. 902 903 // Move and keep. 904 void movk(const Register& rd, uint64_t imm, int shift = -1) { 905 MoveWide(rd, imm, shift, MOVK); 906 } 907 908 // Move with non-zero. 909 void movn(const Register& rd, uint64_t imm, int shift = -1) { 910 MoveWide(rd, imm, shift, MOVN); 911 } 912 913 // Move with zero. 914 void movz(const Register& rd, uint64_t imm, int shift = -1) { 915 MoveWide(rd, imm, shift, MOVZ); 916 } 917 918 // Misc instructions. 919 // Monitor debug-mode breakpoint. 920 void brk(int code); 921 922 // Halting debug-mode breakpoint. 923 void hlt(int code); 924 925 // Move register to register. 926 void mov(const Register& rd, const Register& rn); 927 928 // Move NOT(operand) to register. 929 void mvn(const Register& rd, const Operand& operand); 930 931 // System instructions. 932 // Move to register from system register. 933 void mrs(const Register& rt, SystemRegister sysreg); 934 935 // Move from register to system register. 936 void msr(SystemRegister sysreg, const Register& rt); 937 938 // System hint. 939 void hint(SystemHint code); 940 941 // Data memory barrier 942 void dmb(BarrierDomain domain, BarrierType type); 943 944 // Data synchronization barrier 945 void dsb(BarrierDomain domain, BarrierType type); 946 947 // Instruction synchronization barrier 948 void isb(); 949 950 // Conditional speculation barrier. 951 void csdb(); 952 953 // Branch target identification. 954 void bti(BranchTargetIdentifier id); 955 956 // No-op. nop()957 void nop() { hint(NOP); } 958 959 // Different nop operations are used by the code generator to detect certain 960 // states of the generated code. 961 enum NopMarkerTypes { 962 DEBUG_BREAK_NOP, 963 INTERRUPT_CODE_NOP, 964 ADR_FAR_NOP, 965 FIRST_NOP_MARKER = DEBUG_BREAK_NOP, 966 LAST_NOP_MARKER = ADR_FAR_NOP 967 }; 968 969 void nop(NopMarkerTypes n); 970 971 // Add. 972 void add(const VRegister& vd, const VRegister& vn, const VRegister& vm); 973 974 // Unsigned halving add. 975 void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 976 977 // Subtract. 978 void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 979 980 // Signed halving add. 981 void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 982 983 // Multiply by scalar element. 984 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm, 985 int vm_index); 986 987 // Multiply-add by scalar element. 988 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm, 989 int vm_index); 990 991 // Multiply-subtract by scalar element. 992 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm, 993 int vm_index); 994 995 // Signed long multiply-add by scalar element. 996 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, 997 int vm_index); 998 999 // Signed long multiply-add by scalar element (second part). 1000 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1001 int vm_index); 1002 1003 // Unsigned long multiply-add by scalar element. 1004 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1005 int vm_index); 1006 1007 // Unsigned long multiply-add by scalar element (second part). 1008 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1009 int vm_index); 1010 1011 // Signed long multiply-sub by scalar element. 1012 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1013 int vm_index); 1014 1015 // Signed long multiply-sub by scalar element (second part). 1016 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1017 int vm_index); 1018 1019 // Unsigned long multiply-sub by scalar element. 1020 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1021 int vm_index); 1022 1023 // Unsigned long multiply-sub by scalar element (second part). 1024 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1025 int vm_index); 1026 1027 // Signed long multiply by scalar element. 1028 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1029 int vm_index); 1030 1031 // Signed long multiply by scalar element (second part). 1032 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1033 int vm_index); 1034 1035 // Unsigned long multiply by scalar element. 1036 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1037 int vm_index); 1038 1039 // Unsigned long multiply by scalar element (second part). 1040 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1041 int vm_index); 1042 1043 // Add narrow returning high half. 1044 void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1045 1046 // Add narrow returning high half (second part). 1047 void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1048 1049 // Signed saturating double long multiply by element. 1050 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1051 int vm_index); 1052 1053 // Signed saturating double long multiply by element (second part). 1054 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1055 int vm_index); 1056 1057 // Signed saturating doubling long multiply-add by element. 1058 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1059 int vm_index); 1060 1061 // Signed saturating doubling long multiply-add by element (second part). 1062 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1063 int vm_index); 1064 1065 // Signed saturating doubling long multiply-sub by element. 1066 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1067 int vm_index); 1068 1069 // Signed saturating doubling long multiply-sub by element (second part). 1070 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1071 int vm_index); 1072 1073 // Compare bitwise to zero. 1074 void cmeq(const VRegister& vd, const VRegister& vn, int value); 1075 1076 // Compare signed greater than or equal to zero. 1077 void cmge(const VRegister& vd, const VRegister& vn, int value); 1078 1079 // Compare signed greater than zero. 1080 void cmgt(const VRegister& vd, const VRegister& vn, int value); 1081 1082 // Compare signed less than or equal to zero. 1083 void cmle(const VRegister& vd, const VRegister& vn, int value); 1084 1085 // Compare signed less than zero. 1086 void cmlt(const VRegister& vd, const VRegister& vn, int value); 1087 1088 // Unsigned rounding halving add. 1089 void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1090 1091 // Compare equal. 1092 void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1093 1094 // Compare signed greater than or equal. 1095 void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1096 1097 // Compare signed greater than. 1098 void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1099 1100 // Compare unsigned higher. 1101 void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1102 1103 // Compare unsigned higher or same. 1104 void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1105 1106 // Compare bitwise test bits nonzero. 1107 void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1108 1109 // Signed shift left by register. 1110 void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1111 1112 // Unsigned shift left by register. 1113 void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1114 1115 // Signed saturating doubling long multiply-subtract. 1116 void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1117 1118 // Signed saturating doubling long multiply-subtract (second part). 1119 void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1120 1121 // Signed saturating doubling long multiply. 1122 void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1123 1124 // Signed saturating doubling long multiply (second part). 1125 void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1126 1127 // Signed saturating doubling multiply returning high half. 1128 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1129 1130 // Signed saturating rounding doubling multiply returning high half. 1131 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1132 1133 // Signed saturating doubling multiply element returning high half. 1134 void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1135 int vm_index); 1136 1137 // Signed saturating rounding doubling multiply element returning high half. 1138 void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1139 int vm_index); 1140 1141 // Unsigned long multiply long. 1142 void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1143 1144 // Unsigned long multiply (second part). 1145 void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1146 1147 // Rounding add narrow returning high half. 1148 void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1149 1150 // Subtract narrow returning high half. 1151 void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1152 1153 // Subtract narrow returning high half (second part). 1154 void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1155 1156 // Rounding add narrow returning high half (second part). 1157 void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1158 1159 // Rounding subtract narrow returning high half. 1160 void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1161 1162 // Rounding subtract narrow returning high half (second part). 1163 void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1164 1165 // Signed saturating shift left by register. 1166 void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1167 1168 // Unsigned saturating shift left by register. 1169 void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1170 1171 // Signed rounding shift left by register. 1172 void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1173 1174 // Unsigned rounding shift left by register. 1175 void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1176 1177 // Signed saturating rounding shift left by register. 1178 void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1179 1180 // Unsigned saturating rounding shift left by register. 1181 void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1182 1183 // Signed absolute difference. 1184 void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1185 1186 // Unsigned absolute difference and accumulate. 1187 void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1188 1189 // Shift left by immediate and insert. 1190 void sli(const VRegister& vd, const VRegister& vn, int shift); 1191 1192 // Shift right by immediate and insert. 1193 void sri(const VRegister& vd, const VRegister& vn, int shift); 1194 1195 // Signed maximum. 1196 void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1197 1198 // Signed pairwise maximum. 1199 void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1200 1201 // Add across vector. 1202 void addv(const VRegister& vd, const VRegister& vn); 1203 1204 // Signed add long across vector. 1205 void saddlv(const VRegister& vd, const VRegister& vn); 1206 1207 // Unsigned add long across vector. 1208 void uaddlv(const VRegister& vd, const VRegister& vn); 1209 1210 // FP maximum number across vector. 1211 void fmaxnmv(const VRegister& vd, const VRegister& vn); 1212 1213 // FP maximum across vector. 1214 void fmaxv(const VRegister& vd, const VRegister& vn); 1215 1216 // FP minimum number across vector. 1217 void fminnmv(const VRegister& vd, const VRegister& vn); 1218 1219 // FP minimum across vector. 1220 void fminv(const VRegister& vd, const VRegister& vn); 1221 1222 // Signed maximum across vector. 1223 void smaxv(const VRegister& vd, const VRegister& vn); 1224 1225 // Signed minimum. 1226 void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1227 1228 // Signed minimum pairwise. 1229 void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1230 1231 // Signed minimum across vector. 1232 void sminv(const VRegister& vd, const VRegister& vn); 1233 1234 // One-element structure store from one register. 1235 void st1(const VRegister& vt, const MemOperand& src); 1236 1237 // One-element structure store from two registers. 1238 void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1239 1240 // One-element structure store from three registers. 1241 void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1242 const MemOperand& src); 1243 1244 // One-element structure store from four registers. 1245 void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1246 const VRegister& vt4, const MemOperand& src); 1247 1248 // One-element single structure store from one lane. 1249 void st1(const VRegister& vt, int lane, const MemOperand& src); 1250 1251 // Two-element structure store from two registers. 1252 void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1253 1254 // Two-element single structure store from two lanes. 1255 void st2(const VRegister& vt, const VRegister& vt2, int lane, 1256 const MemOperand& src); 1257 1258 // Three-element structure store from three registers. 1259 void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1260 const MemOperand& src); 1261 1262 // Three-element single structure store from three lanes. 1263 void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1264 int lane, const MemOperand& src); 1265 1266 // Four-element structure store from four registers. 1267 void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1268 const VRegister& vt4, const MemOperand& src); 1269 1270 // Four-element single structure store from four lanes. 1271 void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1272 const VRegister& vt4, int lane, const MemOperand& src); 1273 1274 // Unsigned add long. 1275 void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1276 1277 // Unsigned add long (second part). 1278 void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1279 1280 // Unsigned add wide. 1281 void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1282 1283 // Unsigned add wide (second part). 1284 void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1285 1286 // Signed add long. 1287 void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1288 1289 // Signed add long (second part). 1290 void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1291 1292 // Signed add wide. 1293 void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1294 1295 // Signed add wide (second part). 1296 void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1297 1298 // Unsigned subtract long. 1299 void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1300 1301 // Unsigned subtract long (second part). 1302 void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1303 1304 // Unsigned subtract wide. 1305 void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1306 1307 // Signed subtract long. 1308 void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1309 1310 // Signed subtract long (second part). 1311 void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1312 1313 // Signed integer subtract wide. 1314 void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1315 1316 // Signed integer subtract wide (second part). 1317 void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1318 1319 // Unsigned subtract wide (second part). 1320 void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1321 1322 // Unsigned maximum. 1323 void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1324 1325 // Unsigned pairwise maximum. 1326 void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1327 1328 // Unsigned maximum across vector. 1329 void umaxv(const VRegister& vd, const VRegister& vn); 1330 1331 // Unsigned minimum. 1332 void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1333 1334 // Unsigned pairwise minimum. 1335 void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1336 1337 // Unsigned minimum across vector. 1338 void uminv(const VRegister& vd, const VRegister& vn); 1339 1340 // Transpose vectors (primary). 1341 void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1342 1343 // Transpose vectors (secondary). 1344 void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1345 1346 // Unzip vectors (primary). 1347 void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1348 1349 // Unzip vectors (secondary). 1350 void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1351 1352 // Zip vectors (primary). 1353 void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1354 1355 // Zip vectors (secondary). 1356 void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1357 1358 // Signed shift right by immediate. 1359 void sshr(const VRegister& vd, const VRegister& vn, int shift); 1360 1361 // Unsigned shift right by immediate. 1362 void ushr(const VRegister& vd, const VRegister& vn, int shift); 1363 1364 // Signed rounding shift right by immediate. 1365 void srshr(const VRegister& vd, const VRegister& vn, int shift); 1366 1367 // Unsigned rounding shift right by immediate. 1368 void urshr(const VRegister& vd, const VRegister& vn, int shift); 1369 1370 // Signed shift right by immediate and accumulate. 1371 void ssra(const VRegister& vd, const VRegister& vn, int shift); 1372 1373 // Unsigned shift right by immediate and accumulate. 1374 void usra(const VRegister& vd, const VRegister& vn, int shift); 1375 1376 // Signed rounding shift right by immediate and accumulate. 1377 void srsra(const VRegister& vd, const VRegister& vn, int shift); 1378 1379 // Unsigned rounding shift right by immediate and accumulate. 1380 void ursra(const VRegister& vd, const VRegister& vn, int shift); 1381 1382 // Shift right narrow by immediate. 1383 void shrn(const VRegister& vd, const VRegister& vn, int shift); 1384 1385 // Shift right narrow by immediate (second part). 1386 void shrn2(const VRegister& vd, const VRegister& vn, int shift); 1387 1388 // Rounding shift right narrow by immediate. 1389 void rshrn(const VRegister& vd, const VRegister& vn, int shift); 1390 1391 // Rounding shift right narrow by immediate (second part). 1392 void rshrn2(const VRegister& vd, const VRegister& vn, int shift); 1393 1394 // Unsigned saturating shift right narrow by immediate. 1395 void uqshrn(const VRegister& vd, const VRegister& vn, int shift); 1396 1397 // Unsigned saturating shift right narrow by immediate (second part). 1398 void uqshrn2(const VRegister& vd, const VRegister& vn, int shift); 1399 1400 // Unsigned saturating rounding shift right narrow by immediate. 1401 void uqrshrn(const VRegister& vd, const VRegister& vn, int shift); 1402 1403 // Unsigned saturating rounding shift right narrow by immediate (second part). 1404 void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift); 1405 1406 // Signed saturating shift right narrow by immediate. 1407 void sqshrn(const VRegister& vd, const VRegister& vn, int shift); 1408 1409 // Signed saturating shift right narrow by immediate (second part). 1410 void sqshrn2(const VRegister& vd, const VRegister& vn, int shift); 1411 1412 // Signed saturating rounded shift right narrow by immediate. 1413 void sqrshrn(const VRegister& vd, const VRegister& vn, int shift); 1414 1415 // Signed saturating rounded shift right narrow by immediate (second part). 1416 void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift); 1417 1418 // Signed saturating shift right unsigned narrow by immediate. 1419 void sqshrun(const VRegister& vd, const VRegister& vn, int shift); 1420 1421 // Signed saturating shift right unsigned narrow by immediate (second part). 1422 void sqshrun2(const VRegister& vd, const VRegister& vn, int shift); 1423 1424 // Signed sat rounded shift right unsigned narrow by immediate. 1425 void sqrshrun(const VRegister& vd, const VRegister& vn, int shift); 1426 1427 // Signed sat rounded shift right unsigned narrow by immediate (second part). 1428 void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift); 1429 1430 // FP reciprocal step. 1431 void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1432 1433 // FP reciprocal estimate. 1434 void frecpe(const VRegister& vd, const VRegister& vn); 1435 1436 // FP reciprocal square root estimate. 1437 void frsqrte(const VRegister& vd, const VRegister& vn); 1438 1439 // FP reciprocal square root step. 1440 void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1441 1442 // Signed absolute difference and accumulate long. 1443 void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1444 1445 // Signed absolute difference and accumulate long (second part). 1446 void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1447 1448 // Unsigned absolute difference and accumulate long. 1449 void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1450 1451 // Unsigned absolute difference and accumulate long (second part). 1452 void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1453 1454 // Signed absolute difference long. 1455 void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1456 1457 // Signed absolute difference long (second part). 1458 void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1459 1460 // Unsigned absolute difference long. 1461 void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1462 1463 // Unsigned absolute difference long (second part). 1464 void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1465 1466 // Polynomial multiply long. 1467 void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1468 1469 // Polynomial multiply long (second part). 1470 void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1471 1472 // Signed long multiply-add. 1473 void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1474 1475 // Signed long multiply-add (second part). 1476 void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1477 1478 // Unsigned long multiply-add. 1479 void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1480 1481 // Unsigned long multiply-add (second part). 1482 void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1483 1484 // Signed long multiply-sub. 1485 void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1486 1487 // Signed long multiply-sub (second part). 1488 void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1489 1490 // Unsigned long multiply-sub. 1491 void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1492 1493 // Unsigned long multiply-sub (second part). 1494 void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1495 1496 // Signed long multiply. 1497 void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1498 1499 // Signed long multiply (second part). 1500 void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1501 1502 // Signed saturating doubling long multiply-add. 1503 void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1504 1505 // Signed saturating doubling long multiply-add (second part). 1506 void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1507 1508 // Unsigned absolute difference. 1509 void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1510 1511 // Signed absolute difference and accumulate. 1512 void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1513 1514 // FP instructions. 1515 // Move immediate to FP register. 1516 void fmov(const VRegister& fd, double imm); 1517 void fmov(const VRegister& fd, float imm); 1518 1519 // Move FP register to register. 1520 void fmov(const Register& rd, const VRegister& fn); 1521 1522 // Move register to FP register. 1523 void fmov(const VRegister& fd, const Register& rn); 1524 1525 // Move FP register to FP register. 1526 void fmov(const VRegister& fd, const VRegister& fn); 1527 1528 // Move 64-bit register to top half of 128-bit FP register. 1529 void fmov(const VRegister& vd, int index, const Register& rn); 1530 1531 // Move top half of 128-bit FP register to 64-bit register. 1532 void fmov(const Register& rd, const VRegister& vn, int index); 1533 1534 // FP add. 1535 void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1536 1537 // FP subtract. 1538 void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1539 1540 // FP multiply. 1541 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1542 1543 // FP compare equal to zero. 1544 void fcmeq(const VRegister& vd, const VRegister& vn, double imm); 1545 1546 // FP greater than zero. 1547 void fcmgt(const VRegister& vd, const VRegister& vn, double imm); 1548 1549 // FP greater than or equal to zero. 1550 void fcmge(const VRegister& vd, const VRegister& vn, double imm); 1551 1552 // FP less than or equal to zero. 1553 void fcmle(const VRegister& vd, const VRegister& vn, double imm); 1554 1555 // FP less than to zero. 1556 void fcmlt(const VRegister& vd, const VRegister& vn, double imm); 1557 1558 // FP absolute difference. 1559 void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1560 1561 // FP pairwise add vector. 1562 void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1563 1564 // FP pairwise add scalar. 1565 void faddp(const VRegister& vd, const VRegister& vn); 1566 1567 // FP pairwise maximum scalar. 1568 void fmaxp(const VRegister& vd, const VRegister& vn); 1569 1570 // FP pairwise maximum number scalar. 1571 void fmaxnmp(const VRegister& vd, const VRegister& vn); 1572 1573 // FP pairwise minimum number scalar. 1574 void fminnmp(const VRegister& vd, const VRegister& vn); 1575 1576 // FP vector multiply accumulate. 1577 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1578 1579 // FP vector multiply subtract. 1580 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1581 1582 // FP vector multiply extended. 1583 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1584 1585 // FP absolute greater than or equal. 1586 void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1587 1588 // FP absolute greater than. 1589 void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1590 1591 // FP multiply by element. 1592 void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1593 int vm_index); 1594 1595 // FP fused multiply-add to accumulator by element. 1596 void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1597 int vm_index); 1598 1599 // FP fused multiply-sub from accumulator by element. 1600 void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1601 int vm_index); 1602 1603 // FP multiply extended by element. 1604 void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1605 int vm_index); 1606 1607 // FP compare equal. 1608 void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1609 1610 // FP greater than. 1611 void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1612 1613 // FP greater than or equal. 1614 void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1615 1616 // FP pairwise maximum vector. 1617 void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1618 1619 // FP pairwise minimum vector. 1620 void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1621 1622 // FP pairwise minimum scalar. 1623 void fminp(const VRegister& vd, const VRegister& vn); 1624 1625 // FP pairwise maximum number vector. 1626 void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1627 1628 // FP pairwise minimum number vector. 1629 void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1630 1631 // FP fused multiply-add. 1632 void fmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1633 const VRegister& va); 1634 1635 // FP fused multiply-subtract. 1636 void fmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1637 const VRegister& va); 1638 1639 // FP fused multiply-add and negate. 1640 void fnmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1641 const VRegister& va); 1642 1643 // FP fused multiply-subtract and negate. 1644 void fnmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1645 const VRegister& va); 1646 1647 // FP multiply-negate scalar. 1648 void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1649 1650 // FP reciprocal exponent scalar. 1651 void frecpx(const VRegister& vd, const VRegister& vn); 1652 1653 // FP divide. 1654 void fdiv(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1655 1656 // FP maximum. 1657 void fmax(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1658 1659 // FP minimum. 1660 void fmin(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1661 1662 // FP maximum. 1663 void fmaxnm(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1664 1665 // FP minimum. 1666 void fminnm(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1667 1668 // FP absolute. 1669 void fabs(const VRegister& vd, const VRegister& vn); 1670 1671 // FP negate. 1672 void fneg(const VRegister& vd, const VRegister& vn); 1673 1674 // FP square root. 1675 void fsqrt(const VRegister& vd, const VRegister& vn); 1676 1677 // FP round to integer nearest with ties to away. 1678 void frinta(const VRegister& vd, const VRegister& vn); 1679 1680 // FP round to integer, implicit rounding. 1681 void frinti(const VRegister& vd, const VRegister& vn); 1682 1683 // FP round to integer toward minus infinity. 1684 void frintm(const VRegister& vd, const VRegister& vn); 1685 1686 // FP round to integer nearest with ties to even. 1687 void frintn(const VRegister& vd, const VRegister& vn); 1688 1689 // FP round to integer towards plus infinity. 1690 void frintp(const VRegister& vd, const VRegister& vn); 1691 1692 // FP round to integer, exact, implicit rounding. 1693 void frintx(const VRegister& vd, const VRegister& vn); 1694 1695 // FP round to integer towards zero. 1696 void frintz(const VRegister& vd, const VRegister& vn); 1697 1698 // FP compare registers. 1699 void fcmp(const VRegister& vn, const VRegister& vm); 1700 1701 // FP compare immediate. 1702 void fcmp(const VRegister& vn, double value); 1703 1704 // FP conditional compare. 1705 void fccmp(const VRegister& vn, const VRegister& vm, StatusFlags nzcv, 1706 Condition cond); 1707 1708 // FP conditional select. 1709 void fcsel(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1710 Condition cond); 1711 1712 // Common FP Convert functions. 1713 void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op); 1714 void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op); 1715 1716 // FP convert between precisions. 1717 void fcvt(const VRegister& vd, const VRegister& vn); 1718 1719 // FP convert to higher precision. 1720 void fcvtl(const VRegister& vd, const VRegister& vn); 1721 1722 // FP convert to higher precision (second part). 1723 void fcvtl2(const VRegister& vd, const VRegister& vn); 1724 1725 // FP convert to lower precision. 1726 void fcvtn(const VRegister& vd, const VRegister& vn); 1727 1728 // FP convert to lower prevision (second part). 1729 void fcvtn2(const VRegister& vd, const VRegister& vn); 1730 1731 // FP convert to lower precision, rounding to odd. 1732 void fcvtxn(const VRegister& vd, const VRegister& vn); 1733 1734 // FP convert to lower precision, rounding to odd (second part). 1735 void fcvtxn2(const VRegister& vd, const VRegister& vn); 1736 1737 // FP convert to signed integer, nearest with ties to away. 1738 void fcvtas(const Register& rd, const VRegister& vn); 1739 1740 // FP convert to unsigned integer, nearest with ties to away. 1741 void fcvtau(const Register& rd, const VRegister& vn); 1742 1743 // FP convert to signed integer, nearest with ties to away. 1744 void fcvtas(const VRegister& vd, const VRegister& vn); 1745 1746 // FP convert to unsigned integer, nearest with ties to away. 1747 void fcvtau(const VRegister& vd, const VRegister& vn); 1748 1749 // FP convert to signed integer, round towards -infinity. 1750 void fcvtms(const Register& rd, const VRegister& vn); 1751 1752 // FP convert to unsigned integer, round towards -infinity. 1753 void fcvtmu(const Register& rd, const VRegister& vn); 1754 1755 // FP convert to signed integer, round towards -infinity. 1756 void fcvtms(const VRegister& vd, const VRegister& vn); 1757 1758 // FP convert to unsigned integer, round towards -infinity. 1759 void fcvtmu(const VRegister& vd, const VRegister& vn); 1760 1761 // FP convert to signed integer, nearest with ties to even. 1762 void fcvtns(const Register& rd, const VRegister& vn); 1763 1764 // FP JavaScript convert to signed integer, rounding toward zero [Armv8.3]. 1765 void fjcvtzs(const Register& rd, const VRegister& vn); 1766 1767 // FP convert to unsigned integer, nearest with ties to even. 1768 void fcvtnu(const Register& rd, const VRegister& vn); 1769 1770 // FP convert to signed integer, nearest with ties to even. 1771 void fcvtns(const VRegister& rd, const VRegister& vn); 1772 1773 // FP convert to unsigned integer, nearest with ties to even. 1774 void fcvtnu(const VRegister& rd, const VRegister& vn); 1775 1776 // FP convert to signed integer or fixed-point, round towards zero. 1777 void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0); 1778 1779 // FP convert to unsigned integer or fixed-point, round towards zero. 1780 void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0); 1781 1782 // FP convert to signed integer or fixed-point, round towards zero. 1783 void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0); 1784 1785 // FP convert to unsigned integer or fixed-point, round towards zero. 1786 void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0); 1787 1788 // FP convert to signed integer, round towards +infinity. 1789 void fcvtps(const Register& rd, const VRegister& vn); 1790 1791 // FP convert to unsigned integer, round towards +infinity. 1792 void fcvtpu(const Register& rd, const VRegister& vn); 1793 1794 // FP convert to signed integer, round towards +infinity. 1795 void fcvtps(const VRegister& vd, const VRegister& vn); 1796 1797 // FP convert to unsigned integer, round towards +infinity. 1798 void fcvtpu(const VRegister& vd, const VRegister& vn); 1799 1800 // Convert signed integer or fixed point to FP. 1801 void scvtf(const VRegister& fd, const Register& rn, int fbits = 0); 1802 1803 // Convert unsigned integer or fixed point to FP. 1804 void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0); 1805 1806 // Convert signed integer or fixed-point to FP. 1807 void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 1808 1809 // Convert unsigned integer or fixed-point to FP. 1810 void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); 1811 1812 // Extract vector from pair of vectors. 1813 void ext(const VRegister& vd, const VRegister& vn, const VRegister& vm, 1814 int index); 1815 1816 // Duplicate vector element to vector or scalar. 1817 void dup(const VRegister& vd, const VRegister& vn, int vn_index); 1818 1819 // Duplicate general-purpose register to vector. 1820 void dup(const VRegister& vd, const Register& rn); 1821 1822 // Insert vector element from general-purpose register. 1823 void ins(const VRegister& vd, int vd_index, const Register& rn); 1824 1825 // Move general-purpose register to a vector element. 1826 void mov(const VRegister& vd, int vd_index, const Register& rn); 1827 1828 // Unsigned move vector element to general-purpose register. 1829 void umov(const Register& rd, const VRegister& vn, int vn_index); 1830 1831 // Move vector element to general-purpose register. 1832 void mov(const Register& rd, const VRegister& vn, int vn_index); 1833 1834 // Move vector element to scalar. 1835 void mov(const VRegister& vd, const VRegister& vn, int vn_index); 1836 1837 // Insert vector element from another vector element. 1838 void ins(const VRegister& vd, int vd_index, const VRegister& vn, 1839 int vn_index); 1840 1841 // Move vector element to another vector element. 1842 void mov(const VRegister& vd, int vd_index, const VRegister& vn, 1843 int vn_index); 1844 1845 // Signed move vector element to general-purpose register. 1846 void smov(const Register& rd, const VRegister& vn, int vn_index); 1847 1848 // One-element structure load to one register. 1849 void ld1(const VRegister& vt, const MemOperand& src); 1850 1851 // One-element structure load to two registers. 1852 void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1853 1854 // One-element structure load to three registers. 1855 void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1856 const MemOperand& src); 1857 1858 // One-element structure load to four registers. 1859 void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1860 const VRegister& vt4, const MemOperand& src); 1861 1862 // One-element single structure load to one lane. 1863 void ld1(const VRegister& vt, int lane, const MemOperand& src); 1864 1865 // One-element single structure load to all lanes. 1866 void ld1r(const VRegister& vt, const MemOperand& src); 1867 1868 // Two-element structure load. 1869 void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1870 1871 // Two-element single structure load to one lane. 1872 void ld2(const VRegister& vt, const VRegister& vt2, int lane, 1873 const MemOperand& src); 1874 1875 // Two-element single structure load to all lanes. 1876 void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src); 1877 1878 // Three-element structure load. 1879 void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1880 const MemOperand& src); 1881 1882 // Three-element single structure load to one lane. 1883 void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1884 int lane, const MemOperand& src); 1885 1886 // Three-element single structure load to all lanes. 1887 void ld3r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1888 const MemOperand& src); 1889 1890 // Four-element structure load. 1891 void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1892 const VRegister& vt4, const MemOperand& src); 1893 1894 // Four-element single structure load to one lane. 1895 void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1896 const VRegister& vt4, int lane, const MemOperand& src); 1897 1898 // Four-element single structure load to all lanes. 1899 void ld4r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, 1900 const VRegister& vt4, const MemOperand& src); 1901 1902 // Count leading sign bits. 1903 void cls(const VRegister& vd, const VRegister& vn); 1904 1905 // Count leading zero bits (vector). 1906 void clz(const VRegister& vd, const VRegister& vn); 1907 1908 // Population count per byte. 1909 void cnt(const VRegister& vd, const VRegister& vn); 1910 1911 // Reverse bit order. 1912 void rbit(const VRegister& vd, const VRegister& vn); 1913 1914 // Reverse elements in 16-bit halfwords. 1915 void rev16(const VRegister& vd, const VRegister& vn); 1916 1917 // Reverse elements in 32-bit words. 1918 void rev32(const VRegister& vd, const VRegister& vn); 1919 1920 // Reverse elements in 64-bit doublewords. 1921 void rev64(const VRegister& vd, const VRegister& vn); 1922 1923 // Unsigned reciprocal square root estimate. 1924 void ursqrte(const VRegister& vd, const VRegister& vn); 1925 1926 // Unsigned reciprocal estimate. 1927 void urecpe(const VRegister& vd, const VRegister& vn); 1928 1929 // Signed pairwise long add and accumulate. 1930 void sadalp(const VRegister& vd, const VRegister& vn); 1931 1932 // Signed pairwise long add. 1933 void saddlp(const VRegister& vd, const VRegister& vn); 1934 1935 // Unsigned pairwise long add. 1936 void uaddlp(const VRegister& vd, const VRegister& vn); 1937 1938 // Unsigned pairwise long add and accumulate. 1939 void uadalp(const VRegister& vd, const VRegister& vn); 1940 1941 // Shift left by immediate. 1942 void shl(const VRegister& vd, const VRegister& vn, int shift); 1943 1944 // Signed saturating shift left by immediate. 1945 void sqshl(const VRegister& vd, const VRegister& vn, int shift); 1946 1947 // Signed saturating shift left unsigned by immediate. 1948 void sqshlu(const VRegister& vd, const VRegister& vn, int shift); 1949 1950 // Unsigned saturating shift left by immediate. 1951 void uqshl(const VRegister& vd, const VRegister& vn, int shift); 1952 1953 // Signed shift left long by immediate. 1954 void sshll(const VRegister& vd, const VRegister& vn, int shift); 1955 1956 // Signed shift left long by immediate (second part). 1957 void sshll2(const VRegister& vd, const VRegister& vn, int shift); 1958 1959 // Signed extend long. 1960 void sxtl(const VRegister& vd, const VRegister& vn); 1961 1962 // Signed extend long (second part). 1963 void sxtl2(const VRegister& vd, const VRegister& vn); 1964 1965 // Unsigned shift left long by immediate. 1966 void ushll(const VRegister& vd, const VRegister& vn, int shift); 1967 1968 // Unsigned shift left long by immediate (second part). 1969 void ushll2(const VRegister& vd, const VRegister& vn, int shift); 1970 1971 // Shift left long by element size. 1972 void shll(const VRegister& vd, const VRegister& vn, int shift); 1973 1974 // Shift left long by element size (second part). 1975 void shll2(const VRegister& vd, const VRegister& vn, int shift); 1976 1977 // Unsigned extend long. 1978 void uxtl(const VRegister& vd, const VRegister& vn); 1979 1980 // Unsigned extend long (second part). 1981 void uxtl2(const VRegister& vd, const VRegister& vn); 1982 1983 // Signed rounding halving add. 1984 void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1985 1986 // Unsigned halving sub. 1987 void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1988 1989 // Signed halving sub. 1990 void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1991 1992 // Unsigned saturating add. 1993 void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1994 1995 // Signed saturating add. 1996 void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); 1997 1998 // Unsigned saturating subtract. 1999 void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2000 2001 // Signed saturating subtract. 2002 void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2003 2004 // Add pairwise. 2005 void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2006 2007 // Add pair of elements scalar. 2008 void addp(const VRegister& vd, const VRegister& vn); 2009 2010 // Multiply-add to accumulator. 2011 void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2012 2013 // Multiply-subtract to accumulator. 2014 void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2015 2016 // Multiply. 2017 void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2018 2019 // Table lookup from one register. 2020 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2021 2022 // Table lookup from two registers. 2023 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2024 const VRegister& vm); 2025 2026 // Table lookup from three registers. 2027 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2028 const VRegister& vn3, const VRegister& vm); 2029 2030 // Table lookup from four registers. 2031 void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2032 const VRegister& vn3, const VRegister& vn4, const VRegister& vm); 2033 2034 // Table lookup extension from one register. 2035 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm); 2036 2037 // Table lookup extension from two registers. 2038 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2039 const VRegister& vm); 2040 2041 // Table lookup extension from three registers. 2042 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2043 const VRegister& vn3, const VRegister& vm); 2044 2045 // Table lookup extension from four registers. 2046 void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, 2047 const VRegister& vn3, const VRegister& vn4, const VRegister& vm); 2048 2049 // Instruction functions used only for test, debug, and patching. 2050 // Emit raw instructions in the instruction stream. dci(Instr raw_inst)2051 void dci(Instr raw_inst) { Emit(raw_inst); } 2052 2053 // Emit 8 bits of data in the instruction stream. dc8(uint8_t data)2054 void dc8(uint8_t data) { EmitData(&data, sizeof(data)); } 2055 2056 // Emit 32 bits of data in the instruction stream. dc32(uint32_t data)2057 void dc32(uint32_t data) { EmitData(&data, sizeof(data)); } 2058 2059 // Emit 64 bits of data in the instruction stream. dc64(uint64_t data)2060 void dc64(uint64_t data) { EmitData(&data, sizeof(data)); } 2061 2062 // Emit an address in the instruction stream. 2063 void dcptr(Label* label); 2064 2065 // Copy a string into the instruction stream, including the terminating 2066 // nullptr character. The instruction pointer (pc_) is then aligned correctly 2067 // for subsequent instructions. 2068 void EmitStringData(const char* string); 2069 2070 // Pseudo-instructions ------------------------------------------------------ 2071 2072 // Parameters are described in arm64/instructions-arm64.h. 2073 void debug(const char* message, uint32_t code, Instr params = BREAK); 2074 2075 // Required by V8. db(uint8_t data)2076 void db(uint8_t data) { dc8(data); } 2077 void dd(uint32_t data, RelocInfo::Mode rmode = RelocInfo::NONE) { 2078 BlockPoolsScope no_pool_scope(this); 2079 if (!RelocInfo::IsNone(rmode)) { 2080 DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) || 2081 RelocInfo::IsLiteralConstant(rmode)); 2082 RecordRelocInfo(rmode); 2083 } 2084 dc32(data); 2085 } 2086 void dq(uint64_t data, RelocInfo::Mode rmode = RelocInfo::NONE) { 2087 BlockPoolsScope no_pool_scope(this); 2088 if (!RelocInfo::IsNone(rmode)) { 2089 DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) || 2090 RelocInfo::IsLiteralConstant(rmode)); 2091 RecordRelocInfo(rmode); 2092 } 2093 dc64(data); 2094 } 2095 void dp(uintptr_t data, RelocInfo::Mode rmode = RelocInfo::NONE) { 2096 BlockPoolsScope no_pool_scope(this); 2097 if (!RelocInfo::IsNone(rmode)) { 2098 DCHECK(RelocInfo::IsDataEmbeddedObject(rmode) || 2099 RelocInfo::IsLiteralConstant(rmode)); 2100 RecordRelocInfo(rmode); 2101 } 2102 dc64(data); 2103 } 2104 2105 // Code generation helpers -------------------------------------------------- 2106 pc()2107 Instruction* pc() const { return Instruction::Cast(pc_); } 2108 InstructionAt(ptrdiff_t offset)2109 Instruction* InstructionAt(ptrdiff_t offset) const { 2110 return reinterpret_cast<Instruction*>(buffer_start_ + offset); 2111 } 2112 InstructionOffset(Instruction * instr)2113 ptrdiff_t InstructionOffset(Instruction* instr) const { 2114 return reinterpret_cast<byte*>(instr) - buffer_start_; 2115 } 2116 2117 // Register encoding. Rd(CPURegister rd)2118 static Instr Rd(CPURegister rd) { 2119 DCHECK_NE(rd.code(), kSPRegInternalCode); 2120 return rd.code() << Rd_offset; 2121 } 2122 Rn(CPURegister rn)2123 static Instr Rn(CPURegister rn) { 2124 DCHECK_NE(rn.code(), kSPRegInternalCode); 2125 return rn.code() << Rn_offset; 2126 } 2127 Rm(CPURegister rm)2128 static Instr Rm(CPURegister rm) { 2129 DCHECK_NE(rm.code(), kSPRegInternalCode); 2130 return rm.code() << Rm_offset; 2131 } 2132 RmNot31(CPURegister rm)2133 static Instr RmNot31(CPURegister rm) { 2134 DCHECK_NE(rm.code(), kSPRegInternalCode); 2135 DCHECK(!rm.IsZero()); 2136 return Rm(rm); 2137 } 2138 Ra(CPURegister ra)2139 static Instr Ra(CPURegister ra) { 2140 DCHECK_NE(ra.code(), kSPRegInternalCode); 2141 return ra.code() << Ra_offset; 2142 } 2143 Rt(CPURegister rt)2144 static Instr Rt(CPURegister rt) { 2145 DCHECK_NE(rt.code(), kSPRegInternalCode); 2146 return rt.code() << Rt_offset; 2147 } 2148 Rt2(CPURegister rt2)2149 static Instr Rt2(CPURegister rt2) { 2150 DCHECK_NE(rt2.code(), kSPRegInternalCode); 2151 return rt2.code() << Rt2_offset; 2152 } 2153 Rs(CPURegister rs)2154 static Instr Rs(CPURegister rs) { 2155 DCHECK_NE(rs.code(), kSPRegInternalCode); 2156 return rs.code() << Rs_offset; 2157 } 2158 2159 // These encoding functions allow the stack pointer to be encoded, and 2160 // disallow the zero register. RdSP(Register rd)2161 static Instr RdSP(Register rd) { 2162 DCHECK(!rd.IsZero()); 2163 return (rd.code() & kRegCodeMask) << Rd_offset; 2164 } 2165 RnSP(Register rn)2166 static Instr RnSP(Register rn) { 2167 DCHECK(!rn.IsZero()); 2168 return (rn.code() & kRegCodeMask) << Rn_offset; 2169 } 2170 2171 // Flags encoding. 2172 inline static Instr Flags(FlagsUpdate S); 2173 inline static Instr Cond(Condition cond); 2174 2175 // PC-relative address encoding. 2176 inline static Instr ImmPCRelAddress(int imm21); 2177 2178 // Branch encoding. 2179 inline static Instr ImmUncondBranch(int imm26); 2180 inline static Instr ImmCondBranch(int imm19); 2181 inline static Instr ImmCmpBranch(int imm19); 2182 inline static Instr ImmTestBranch(int imm14); 2183 inline static Instr ImmTestBranchBit(unsigned bit_pos); 2184 2185 // Data Processing encoding. 2186 inline static Instr SF(Register rd); 2187 inline static Instr ImmAddSub(int imm); 2188 inline static Instr ImmS(unsigned imms, unsigned reg_size); 2189 inline static Instr ImmR(unsigned immr, unsigned reg_size); 2190 inline static Instr ImmSetBits(unsigned imms, unsigned reg_size); 2191 inline static Instr ImmRotate(unsigned immr, unsigned reg_size); 2192 inline static Instr ImmLLiteral(int imm19); 2193 inline static Instr BitN(unsigned bitn, unsigned reg_size); 2194 inline static Instr ShiftDP(Shift shift); 2195 inline static Instr ImmDPShift(unsigned amount); 2196 inline static Instr ExtendMode(Extend extend); 2197 inline static Instr ImmExtendShift(unsigned left_shift); 2198 inline static Instr ImmCondCmp(unsigned imm); 2199 inline static Instr Nzcv(StatusFlags nzcv); 2200 2201 static bool IsImmAddSub(int64_t immediate); 2202 static bool IsImmLogical(uint64_t value, unsigned width, unsigned* n, 2203 unsigned* imm_s, unsigned* imm_r); 2204 2205 // MemOperand offset encoding. 2206 inline static Instr ImmLSUnsigned(int imm12); 2207 inline static Instr ImmLS(int imm9); 2208 inline static Instr ImmLSPair(int imm7, unsigned size); 2209 inline static Instr ImmShiftLS(unsigned shift_amount); 2210 inline static Instr ImmException(int imm16); 2211 inline static Instr ImmSystemRegister(int imm15); 2212 inline static Instr ImmHint(int imm7); 2213 inline static Instr ImmBarrierDomain(int imm2); 2214 inline static Instr ImmBarrierType(int imm2); 2215 inline static unsigned CalcLSDataSize(LoadStoreOp op); 2216 2217 // Instruction bits for vector format in data processing operations. VFormat(VRegister vd)2218 static Instr VFormat(VRegister vd) { 2219 if (vd.Is64Bits()) { 2220 switch (vd.LaneCount()) { 2221 case 2: 2222 return NEON_2S; 2223 case 4: 2224 return NEON_4H; 2225 case 8: 2226 return NEON_8B; 2227 default: 2228 UNREACHABLE(); 2229 } 2230 } else { 2231 DCHECK(vd.Is128Bits()); 2232 switch (vd.LaneCount()) { 2233 case 2: 2234 return NEON_2D; 2235 case 4: 2236 return NEON_4S; 2237 case 8: 2238 return NEON_8H; 2239 case 16: 2240 return NEON_16B; 2241 default: 2242 UNREACHABLE(); 2243 } 2244 } 2245 } 2246 2247 // Instruction bits for vector format in floating point data processing 2248 // operations. FPFormat(VRegister vd)2249 static Instr FPFormat(VRegister vd) { 2250 if (vd.LaneCount() == 1) { 2251 // Floating point scalar formats. 2252 DCHECK(vd.Is32Bits() || vd.Is64Bits()); 2253 return vd.Is64Bits() ? FP64 : FP32; 2254 } 2255 2256 // Two lane floating point vector formats. 2257 if (vd.LaneCount() == 2) { 2258 DCHECK(vd.Is64Bits() || vd.Is128Bits()); 2259 return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S; 2260 } 2261 2262 // Four lane floating point vector format. 2263 DCHECK((vd.LaneCount() == 4) && vd.Is128Bits()); 2264 return NEON_FP_4S; 2265 } 2266 2267 // Instruction bits for vector format in load and store operations. LSVFormat(VRegister vd)2268 static Instr LSVFormat(VRegister vd) { 2269 if (vd.Is64Bits()) { 2270 switch (vd.LaneCount()) { 2271 case 1: 2272 return LS_NEON_1D; 2273 case 2: 2274 return LS_NEON_2S; 2275 case 4: 2276 return LS_NEON_4H; 2277 case 8: 2278 return LS_NEON_8B; 2279 default: 2280 UNREACHABLE(); 2281 } 2282 } else { 2283 DCHECK(vd.Is128Bits()); 2284 switch (vd.LaneCount()) { 2285 case 2: 2286 return LS_NEON_2D; 2287 case 4: 2288 return LS_NEON_4S; 2289 case 8: 2290 return LS_NEON_8H; 2291 case 16: 2292 return LS_NEON_16B; 2293 default: 2294 UNREACHABLE(); 2295 } 2296 } 2297 } 2298 2299 // Instruction bits for scalar format in data processing operations. SFormat(VRegister vd)2300 static Instr SFormat(VRegister vd) { 2301 DCHECK(vd.IsScalar()); 2302 switch (vd.SizeInBytes()) { 2303 case 1: 2304 return NEON_B; 2305 case 2: 2306 return NEON_H; 2307 case 4: 2308 return NEON_S; 2309 case 8: 2310 return NEON_D; 2311 default: 2312 UNREACHABLE(); 2313 } 2314 } 2315 ImmNEONHLM(int index,int num_bits)2316 static Instr ImmNEONHLM(int index, int num_bits) { 2317 int h, l, m; 2318 if (num_bits == 3) { 2319 DCHECK(is_uint3(index)); 2320 h = (index >> 2) & 1; 2321 l = (index >> 1) & 1; 2322 m = (index >> 0) & 1; 2323 } else if (num_bits == 2) { 2324 DCHECK(is_uint2(index)); 2325 h = (index >> 1) & 1; 2326 l = (index >> 0) & 1; 2327 m = 0; 2328 } else { 2329 DCHECK(is_uint1(index) && (num_bits == 1)); 2330 h = (index >> 0) & 1; 2331 l = 0; 2332 m = 0; 2333 } 2334 return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset); 2335 } 2336 ImmNEONExt(int imm4)2337 static Instr ImmNEONExt(int imm4) { 2338 DCHECK(is_uint4(imm4)); 2339 return imm4 << ImmNEONExt_offset; 2340 } 2341 ImmNEON5(Instr format,int index)2342 static Instr ImmNEON5(Instr format, int index) { 2343 DCHECK(is_uint4(index)); 2344 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 2345 int imm5 = (index << (s + 1)) | (1 << s); 2346 return imm5 << ImmNEON5_offset; 2347 } 2348 ImmNEON4(Instr format,int index)2349 static Instr ImmNEON4(Instr format, int index) { 2350 DCHECK(is_uint4(index)); 2351 int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); 2352 int imm4 = index << s; 2353 return imm4 << ImmNEON4_offset; 2354 } 2355 ImmNEONabcdefgh(int imm8)2356 static Instr ImmNEONabcdefgh(int imm8) { 2357 DCHECK(is_uint8(imm8)); 2358 Instr instr; 2359 instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset; 2360 instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset; 2361 return instr; 2362 } 2363 NEONCmode(int cmode)2364 static Instr NEONCmode(int cmode) { 2365 DCHECK(is_uint4(cmode)); 2366 return cmode << NEONCmode_offset; 2367 } 2368 NEONModImmOp(int op)2369 static Instr NEONModImmOp(int op) { 2370 DCHECK(is_uint1(op)); 2371 return op << NEONModImmOp_offset; 2372 } 2373 2374 static bool IsImmLSUnscaled(int64_t offset); 2375 static bool IsImmLSScaled(int64_t offset, unsigned size); 2376 static bool IsImmLLiteral(int64_t offset); 2377 2378 // Move immediates encoding. 2379 inline static Instr ImmMoveWide(int imm); 2380 inline static Instr ShiftMoveWide(int shift); 2381 2382 // FP Immediates. 2383 static Instr ImmFP(double imm); 2384 static Instr ImmNEONFP(double imm); 2385 inline static Instr FPScale(unsigned scale); 2386 2387 // FP register type. 2388 inline static Instr FPType(VRegister fd); 2389 2390 // Unused on this architecture. MaybeEmitOutOfLineConstantPool()2391 void MaybeEmitOutOfLineConstantPool() {} 2392 ForceConstantPoolEmissionWithoutJump()2393 void ForceConstantPoolEmissionWithoutJump() { 2394 constpool_.Check(Emission::kForced, Jump::kOmitted); 2395 } ForceConstantPoolEmissionWithJump()2396 void ForceConstantPoolEmissionWithJump() { 2397 constpool_.Check(Emission::kForced, Jump::kRequired); 2398 } 2399 // Check if the const pool needs to be emitted while pretending that {margin} 2400 // more bytes of instructions have already been emitted. 2401 void EmitConstPoolWithJumpIfNeeded(size_t margin = 0) { 2402 constpool_.Check(Emission::kIfNeeded, Jump::kRequired, margin); 2403 } 2404 2405 // Used by veneer checks below - returns the max (= overapproximated) pc 2406 // offset after the veneer pool, if the veneer pool were to be emitted 2407 // immediately. 2408 intptr_t MaxPCOffsetAfterVeneerPoolIfEmittedNow(size_t margin); 2409 // Returns true if we should emit a veneer as soon as possible for a branch 2410 // which can at most reach to specified pc. ShouldEmitVeneer(int max_reachable_pc,size_t margin)2411 bool ShouldEmitVeneer(int max_reachable_pc, size_t margin) { 2412 return max_reachable_pc < MaxPCOffsetAfterVeneerPoolIfEmittedNow(margin); 2413 } 2414 bool ShouldEmitVeneers(size_t margin = kVeneerDistanceMargin) { 2415 return ShouldEmitVeneer(unresolved_branches_first_limit(), margin); 2416 } 2417 2418 // The code size generated for a veneer. Currently one branch 2419 // instruction. This is for code size checking purposes, and can be extended 2420 // in the future for example if we decide to add nops between the veneers. 2421 static constexpr int kVeneerCodeSize = 1 * kInstrSize; 2422 2423 void RecordVeneerPool(int location_offset, int size); 2424 // Emits veneers for branches that are approaching their maximum range. 2425 // If need_protection is true, the veneers are protected by a branch jumping 2426 // over the code. 2427 void EmitVeneers(bool force_emit, bool need_protection, 2428 size_t margin = kVeneerDistanceMargin); EmitVeneersGuard()2429 void EmitVeneersGuard() { EmitPoolGuard(); } 2430 // Checks whether veneers need to be emitted at this point. 2431 // If force_emit is set, a veneer is generated for *all* unresolved branches. 2432 void CheckVeneerPool(bool force_emit, bool require_jump, 2433 size_t margin = kVeneerDistanceMargin); 2434 2435 using BlockConstPoolScope = ConstantPool::BlockScope; 2436 2437 class V8_NODISCARD BlockPoolsScope { 2438 public: 2439 // Block veneer and constant pool. Emits pools if necessary to ensure that 2440 // {margin} more bytes can be emitted without triggering pool emission. 2441 explicit BlockPoolsScope(Assembler* assem, size_t margin = 0) assem_(assem)2442 : assem_(assem), block_const_pool_(assem, margin) { 2443 assem_->CheckVeneerPool(false, true, margin); 2444 assem_->StartBlockVeneerPool(); 2445 } 2446 BlockPoolsScope(Assembler * assem,PoolEmissionCheck check)2447 BlockPoolsScope(Assembler* assem, PoolEmissionCheck check) 2448 : assem_(assem), block_const_pool_(assem, check) { 2449 assem_->StartBlockVeneerPool(); 2450 } ~BlockPoolsScope()2451 ~BlockPoolsScope() { assem_->EndBlockVeneerPool(); } 2452 2453 private: 2454 Assembler* assem_; 2455 BlockConstPoolScope block_const_pool_; 2456 DISALLOW_IMPLICIT_CONSTRUCTORS(BlockPoolsScope); 2457 }; 2458 2459 #if defined(V8_OS_WIN) GetXdataEncoder()2460 win64_unwindinfo::XdataEncoder* GetXdataEncoder() { 2461 return xdata_encoder_.get(); 2462 } 2463 2464 win64_unwindinfo::BuiltinUnwindInfo GetUnwindInfo() const; 2465 #endif 2466 2467 protected: 2468 inline const Register& AppropriateZeroRegFor(const CPURegister& reg) const; 2469 2470 void LoadStore(const CPURegister& rt, const MemOperand& addr, LoadStoreOp op); 2471 void LoadStorePair(const CPURegister& rt, const CPURegister& rt2, 2472 const MemOperand& addr, LoadStorePairOp op); 2473 void LoadStoreStruct(const VRegister& vt, const MemOperand& addr, 2474 NEONLoadStoreMultiStructOp op); 2475 void LoadStoreStruct1(const VRegister& vt, int reg_count, 2476 const MemOperand& addr); 2477 void LoadStoreStructSingle(const VRegister& vt, uint32_t lane, 2478 const MemOperand& addr, 2479 NEONLoadStoreSingleStructOp op); 2480 void LoadStoreStructSingleAllLanes(const VRegister& vt, 2481 const MemOperand& addr, 2482 NEONLoadStoreSingleStructOp op); 2483 void LoadStoreStructVerify(const VRegister& vt, const MemOperand& addr, 2484 Instr op); 2485 2486 static bool IsImmLSPair(int64_t offset, unsigned size); 2487 2488 void Logical(const Register& rd, const Register& rn, const Operand& operand, 2489 LogicalOp op); 2490 void LogicalImmediate(const Register& rd, const Register& rn, unsigned n, 2491 unsigned imm_s, unsigned imm_r, LogicalOp op); 2492 2493 void ConditionalCompare(const Register& rn, const Operand& operand, 2494 StatusFlags nzcv, Condition cond, 2495 ConditionalCompareOp op); 2496 static bool IsImmConditionalCompare(int64_t immediate); 2497 2498 void AddSubWithCarry(const Register& rd, const Register& rn, 2499 const Operand& operand, FlagsUpdate S, 2500 AddSubWithCarryOp op); 2501 2502 // Functions for emulating operands not directly supported by the instruction 2503 // set. 2504 void EmitShift(const Register& rd, const Register& rn, Shift shift, 2505 unsigned amount); 2506 void EmitExtendShift(const Register& rd, const Register& rn, Extend extend, 2507 unsigned left_shift); 2508 2509 void AddSub(const Register& rd, const Register& rn, const Operand& operand, 2510 FlagsUpdate S, AddSubOp op); 2511 2512 static bool IsImmFP32(float imm); 2513 static bool IsImmFP64(double imm); 2514 2515 // Find an appropriate LoadStoreOp or LoadStorePairOp for the specified 2516 // registers. Only simple loads are supported; sign- and zero-extension (such 2517 // as in LDPSW_x or LDRB_w) are not supported. 2518 static inline LoadStoreOp LoadOpFor(const CPURegister& rt); 2519 static inline LoadStorePairOp LoadPairOpFor(const CPURegister& rt, 2520 const CPURegister& rt2); 2521 static inline LoadStoreOp StoreOpFor(const CPURegister& rt); 2522 static inline LoadStorePairOp StorePairOpFor(const CPURegister& rt, 2523 const CPURegister& rt2); 2524 static inline LoadLiteralOp LoadLiteralOpFor(const CPURegister& rt); 2525 2526 // Remove the specified branch from the unbound label link chain. 2527 // If available, a veneer for this label can be used for other branches in the 2528 // chain if the link chain cannot be fixed up without this branch. 2529 void RemoveBranchFromLabelLinkChain(Instruction* branch, Label* label, 2530 Instruction* label_veneer = nullptr); 2531 2532 private: 2533 static uint32_t FPToImm8(double imm); 2534 2535 // Instruction helpers. 2536 void MoveWide(const Register& rd, uint64_t imm, int shift, 2537 MoveWideImmediateOp mov_op); 2538 void DataProcShiftedRegister(const Register& rd, const Register& rn, 2539 const Operand& operand, FlagsUpdate S, Instr op); 2540 void DataProcExtendedRegister(const Register& rd, const Register& rn, 2541 const Operand& operand, FlagsUpdate S, 2542 Instr op); 2543 void ConditionalSelect(const Register& rd, const Register& rn, 2544 const Register& rm, Condition cond, 2545 ConditionalSelectOp op); 2546 void DataProcessing1Source(const Register& rd, const Register& rn, 2547 DataProcessing1SourceOp op); 2548 void DataProcessing3Source(const Register& rd, const Register& rn, 2549 const Register& rm, const Register& ra, 2550 DataProcessing3SourceOp op); 2551 void FPDataProcessing1Source(const VRegister& fd, const VRegister& fn, 2552 FPDataProcessing1SourceOp op); 2553 void FPDataProcessing2Source(const VRegister& fd, const VRegister& fn, 2554 const VRegister& fm, 2555 FPDataProcessing2SourceOp op); 2556 void FPDataProcessing3Source(const VRegister& fd, const VRegister& fn, 2557 const VRegister& fm, const VRegister& fa, 2558 FPDataProcessing3SourceOp op); 2559 void NEONAcrossLanesL(const VRegister& vd, const VRegister& vn, 2560 NEONAcrossLanesOp op); 2561 void NEONAcrossLanes(const VRegister& vd, const VRegister& vn, 2562 NEONAcrossLanesOp op); 2563 void NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8, 2564 const int left_shift, 2565 NEONModifiedImmediateOp op); 2566 void NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8, 2567 const int shift_amount, 2568 NEONModifiedImmediateOp op); 2569 void NEON3Same(const VRegister& vd, const VRegister& vn, const VRegister& vm, 2570 NEON3SameOp vop); 2571 void NEONFP3Same(const VRegister& vd, const VRegister& vn, 2572 const VRegister& vm, Instr op); 2573 void NEON3DifferentL(const VRegister& vd, const VRegister& vn, 2574 const VRegister& vm, NEON3DifferentOp vop); 2575 void NEON3DifferentW(const VRegister& vd, const VRegister& vn, 2576 const VRegister& vm, NEON3DifferentOp vop); 2577 void NEON3DifferentHN(const VRegister& vd, const VRegister& vn, 2578 const VRegister& vm, NEON3DifferentOp vop); 2579 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, 2580 NEON2RegMiscOp vop, double value = 0.0); 2581 void NEON2RegMisc(const VRegister& vd, const VRegister& vn, 2582 NEON2RegMiscOp vop, int value = 0); 2583 void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op); 2584 void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op); 2585 void NEONPerm(const VRegister& vd, const VRegister& vn, const VRegister& vm, 2586 NEONPermOp op); 2587 void NEONFPByElement(const VRegister& vd, const VRegister& vn, 2588 const VRegister& vm, int vm_index, 2589 NEONByIndexedElementOp op); 2590 void NEONByElement(const VRegister& vd, const VRegister& vn, 2591 const VRegister& vm, int vm_index, 2592 NEONByIndexedElementOp op); 2593 void NEONByElementL(const VRegister& vd, const VRegister& vn, 2594 const VRegister& vm, int vm_index, 2595 NEONByIndexedElementOp op); 2596 void NEONShiftImmediate(const VRegister& vd, const VRegister& vn, 2597 NEONShiftImmediateOp op, int immh_immb); 2598 void NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn, 2599 int shift, NEONShiftImmediateOp op); 2600 void NEONShiftRightImmediate(const VRegister& vd, const VRegister& vn, 2601 int shift, NEONShiftImmediateOp op); 2602 void NEONShiftImmediateL(const VRegister& vd, const VRegister& vn, int shift, 2603 NEONShiftImmediateOp op); 2604 void NEONShiftImmediateN(const VRegister& vd, const VRegister& vn, int shift, 2605 NEONShiftImmediateOp op); 2606 void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop); 2607 void NEONTable(const VRegister& vd, const VRegister& vn, const VRegister& vm, 2608 NEONTableOp op); 2609 2610 Instr LoadStoreStructAddrModeField(const MemOperand& addr); 2611 2612 // Label helpers. 2613 2614 // Return an offset for a label-referencing instruction, typically a branch. 2615 int LinkAndGetByteOffsetTo(Label* label); 2616 2617 // This is the same as LinkAndGetByteOffsetTo, but return an offset 2618 // suitable for fields that take instruction offsets. 2619 inline int LinkAndGetInstructionOffsetTo(Label* label); 2620 2621 static constexpr int kStartOfLabelLinkChain = 0; 2622 2623 // Verify that a label's link chain is intact. 2624 void CheckLabelLinkChain(Label const* label); 2625 2626 // Emit the instruction at pc_. Emit(Instr instruction)2627 void Emit(Instr instruction) { 2628 STATIC_ASSERT(sizeof(*pc_) == 1); 2629 STATIC_ASSERT(sizeof(instruction) == kInstrSize); 2630 DCHECK_LE(pc_ + sizeof(instruction), buffer_start_ + buffer_->size()); 2631 2632 memcpy(pc_, &instruction, sizeof(instruction)); 2633 pc_ += sizeof(instruction); 2634 CheckBuffer(); 2635 } 2636 2637 // Emit data inline in the instruction stream. EmitData(void const * data,unsigned size)2638 void EmitData(void const* data, unsigned size) { 2639 DCHECK_EQ(sizeof(*pc_), 1); 2640 DCHECK_LE(pc_ + size, buffer_start_ + buffer_->size()); 2641 2642 // TODO(all): Somehow register we have some data here. Then we can 2643 // disassemble it correctly. 2644 memcpy(pc_, data, size); 2645 pc_ += size; 2646 CheckBuffer(); 2647 } 2648 2649 void GrowBuffer(); 2650 V8_INLINE void CheckBufferSpace(); 2651 void CheckBuffer(); 2652 2653 // Emission of the veneer pools may be blocked in some code sequences. 2654 int veneer_pool_blocked_nesting_; // Block emission if this is not zero. 2655 2656 // Relocation info generation 2657 // Each relocation is encoded as a variable size value 2658 static constexpr int kMaxRelocSize = RelocInfoWriter::kMaxSize; 2659 RelocInfoWriter reloc_info_writer; 2660 2661 // Internal reference positions, required for (potential) patching in 2662 // GrowBuffer(); contains only those internal references whose labels 2663 // are already bound. 2664 std::deque<int> internal_reference_positions_; 2665 2666 protected: 2667 // Code generation 2668 // The relocation writer's position is at least kGap bytes below the end of 2669 // the generated instructions. This is so that multi-instruction sequences do 2670 // not have to check for overflow. The same is true for writes of large 2671 // relocation info entries, and debug strings encoded in the instruction 2672 // stream. 2673 static constexpr int kGap = 64; 2674 STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap); 2675 2676 public: 2677 #ifdef DEBUG 2678 // Functions used for testing. GetConstantPoolEntriesSizeForTesting()2679 size_t GetConstantPoolEntriesSizeForTesting() const { 2680 // Do not include branch over the pool. 2681 return constpool_.Entry32Count() * kInt32Size + 2682 constpool_.Entry64Count() * kInt64Size; 2683 } 2684 GetCheckConstPoolIntervalForTesting()2685 static size_t GetCheckConstPoolIntervalForTesting() { 2686 return ConstantPool::kCheckInterval; 2687 } 2688 GetApproxMaxDistToConstPoolForTesting()2689 static size_t GetApproxMaxDistToConstPoolForTesting() { 2690 return ConstantPool::kApproxDistToPool64; 2691 } 2692 EmbeddedObjectMatches(int pc_offset,Handle<Object> object,EmbeddedObjectIndex index)2693 bool EmbeddedObjectMatches(int pc_offset, Handle<Object> object, 2694 EmbeddedObjectIndex index) { 2695 return *reinterpret_cast<uint64_t*>(buffer_->start() + pc_offset) == 2696 (IsOnHeap() ? object->ptr() : index); 2697 } 2698 #endif 2699 2700 class FarBranchInfo { 2701 public: FarBranchInfo(int offset,Label * label)2702 FarBranchInfo(int offset, Label* label) 2703 : pc_offset_(offset), label_(label) {} 2704 // Offset of the branch in the code generation buffer. 2705 int pc_offset_; 2706 // The label branched to. 2707 Label* label_; 2708 }; 2709 2710 protected: 2711 // Information about unresolved (forward) branches. 2712 // The Assembler is only allowed to delete out-of-date information from here 2713 // after a label is bound. The MacroAssembler uses this information to 2714 // generate veneers. 2715 // 2716 // The second member gives information about the unresolved branch. The first 2717 // member of the pair is the maximum offset that the branch can reach in the 2718 // buffer. The map is sorted according to this reachable offset, allowing to 2719 // easily check when veneers need to be emitted. 2720 // Note that the maximum reachable offset (first member of the pairs) should 2721 // always be positive but has the same type as the return value for 2722 // pc_offset() for convenience. 2723 std::multimap<int, FarBranchInfo> unresolved_branches_; 2724 2725 // We generate a veneer for a branch if we reach within this distance of the 2726 // limit of the range. 2727 static constexpr int kVeneerDistanceMargin = 1 * KB; 2728 // The factor of 2 is a finger in the air guess. With a default margin of 2729 // 1KB, that leaves us an addional 256 instructions to avoid generating a 2730 // protective branch. 2731 static constexpr int kVeneerNoProtectionFactor = 2; 2732 static constexpr int kVeneerDistanceCheckMargin = 2733 kVeneerNoProtectionFactor * kVeneerDistanceMargin; unresolved_branches_first_limit()2734 int unresolved_branches_first_limit() const { 2735 DCHECK(!unresolved_branches_.empty()); 2736 return unresolved_branches_.begin()->first; 2737 } 2738 // This PC-offset of the next veneer pool check helps reduce the overhead 2739 // of checking for veneer pools. 2740 // It is maintained to the closest unresolved branch limit minus the maximum 2741 // veneer margin (or kMaxInt if there are no unresolved branches). 2742 int next_veneer_pool_check_; 2743 2744 #if defined(V8_OS_WIN) 2745 std::unique_ptr<win64_unwindinfo::XdataEncoder> xdata_encoder_; 2746 #endif 2747 2748 private: 2749 // Avoid overflows for displacements etc. 2750 static const int kMaximalBufferSize = 512 * MB; 2751 2752 // If a veneer is emitted for a branch instruction, that instruction must be 2753 // removed from the associated label's link chain so that the assembler does 2754 // not later attempt (likely unsuccessfully) to patch it to branch directly to 2755 // the label. 2756 void DeleteUnresolvedBranchInfoForLabel(Label* label); 2757 // This function deletes the information related to the label by traversing 2758 // the label chain, and for each PC-relative instruction in the chain checking 2759 // if pending unresolved information exists. Its complexity is proportional to 2760 // the length of the label chain. 2761 void DeleteUnresolvedBranchInfoForLabelTraverse(Label* label); 2762 2763 void AllocateAndInstallRequestedHeapObjects(Isolate* isolate); 2764 2765 int WriteCodeComments(); 2766 2767 // The pending constant pool. 2768 ConstantPool constpool_; 2769 2770 friend class EnsureSpace; 2771 friend class ConstantPool; 2772 }; 2773 2774 class PatchingAssembler : public Assembler { 2775 public: 2776 // Create an Assembler with a buffer starting at 'start'. 2777 // The buffer size is 2778 // size of instructions to patch + kGap 2779 // Where kGap is the distance from which the Assembler tries to grow the 2780 // buffer. 2781 // If more or fewer instructions than expected are generated or if some 2782 // relocation information takes space in the buffer, the PatchingAssembler 2783 // will crash trying to grow the buffer. 2784 // Note that the instruction cache will not be flushed. PatchingAssembler(const AssemblerOptions & options,byte * start,unsigned count)2785 PatchingAssembler(const AssemblerOptions& options, byte* start, 2786 unsigned count) 2787 : Assembler(options, 2788 ExternalAssemblerBuffer(start, count * kInstrSize + kGap)), 2789 block_constant_pool_emission_scope(this) {} 2790 ~PatchingAssembler()2791 ~PatchingAssembler() { 2792 // Verify we have generated the number of instruction we expected. 2793 DCHECK_EQ(pc_offset() + kGap, buffer_->size()); 2794 } 2795 2796 // See definition of PatchAdrFar() for details. 2797 static constexpr int kAdrFarPatchableNNops = 2; 2798 static constexpr int kAdrFarPatchableNInstrs = kAdrFarPatchableNNops + 2; 2799 void PatchAdrFar(int64_t target_offset); 2800 void PatchSubSp(uint32_t immediate); 2801 2802 private: 2803 BlockPoolsScope block_constant_pool_emission_scope; 2804 }; 2805 2806 class EnsureSpace { 2807 public: 2808 explicit V8_INLINE EnsureSpace(Assembler* assembler); 2809 2810 private: 2811 Assembler::BlockPoolsScope block_pools_scope_; 2812 }; 2813 2814 } // namespace internal 2815 } // namespace v8 2816 2817 #endif // V8_CODEGEN_ARM64_ASSEMBLER_ARM64_H_ 2818