1 // Copyright (c) 1994-2006 Sun Microsystems Inc. 2 // All Rights Reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // - Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // 11 // - Redistribution in binary form must reproduce the above copyright 12 // notice, this list of conditions and the following disclaimer in the 13 // documentation and/or other materials provided with the distribution. 14 // 15 // - Neither the name of Sun Microsystems or the names of contributors may 16 // be used to endorse or promote products derived from this software without 17 // specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // The original source code covered by the above license above has been 32 // modified significantly by Google Inc. 33 // Copyright 2021 the V8 project authors. All rights reserved. 34 35 #ifndef V8_CODEGEN_RISCV64_ASSEMBLER_RISCV64_H_ 36 #define V8_CODEGEN_RISCV64_ASSEMBLER_RISCV64_H_ 37 38 #include <stdio.h> 39 40 #include <memory> 41 #include <set> 42 43 #include "src/codegen/assembler.h" 44 #include "src/codegen/constant-pool.h" 45 #include "src/codegen/external-reference.h" 46 #include "src/codegen/label.h" 47 #include "src/codegen/riscv64/constants-riscv64.h" 48 #include "src/codegen/riscv64/register-riscv64.h" 49 #include "src/objects/contexts.h" 50 #include "src/objects/smi.h" 51 52 namespace v8 { 53 namespace internal { 54 55 #define DEBUG_PRINTF(...) \ 56 if (FLAG_riscv_debug) { \ 57 printf(__VA_ARGS__); \ 58 } 59 60 class SafepointTableBuilder; 61 62 // ----------------------------------------------------------------------------- 63 // Machine instruction Operands. 64 constexpr int kSmiShift = kSmiTagSize + kSmiShiftSize; 65 constexpr uint64_t kSmiShiftMask = (1UL << kSmiShift) - 1; 66 // Class Operand represents a shifter operand in data processing instructions. 67 class Operand { 68 public: 69 // Immediate. 70 V8_INLINE explicit Operand(int64_t immediate, 71 RelocInfo::Mode rmode = RelocInfo::NONE) rm_(no_reg)72 : rm_(no_reg), rmode_(rmode) { 73 value_.immediate = immediate; 74 } Operand(const ExternalReference & f)75 V8_INLINE explicit Operand(const ExternalReference& f) 76 : rm_(no_reg), rmode_(RelocInfo::EXTERNAL_REFERENCE) { 77 value_.immediate = static_cast<int64_t>(f.address()); 78 } 79 V8_INLINE explicit Operand(const char* s); 80 explicit Operand(Handle<HeapObject> handle); Operand(Smi value)81 V8_INLINE explicit Operand(Smi value) : rm_(no_reg), rmode_(RelocInfo::NONE) { 82 value_.immediate = static_cast<intptr_t>(value.ptr()); 83 } 84 85 static Operand EmbeddedNumber(double number); // Smi or HeapNumber. 86 static Operand EmbeddedStringConstant(const StringConstantBase* str); 87 88 // Register. Operand(Register rm)89 V8_INLINE explicit Operand(Register rm) : rm_(rm) {} 90 91 // Return true if this is a register operand. 92 V8_INLINE bool is_reg() const; 93 94 inline int64_t immediate() const; 95 IsImmediate()96 bool IsImmediate() const { return !rm_.is_valid(); } 97 heap_object_request()98 HeapObjectRequest heap_object_request() const { 99 DCHECK(IsHeapObjectRequest()); 100 return value_.heap_object_request; 101 } 102 IsHeapObjectRequest()103 bool IsHeapObjectRequest() const { 104 DCHECK_IMPLIES(is_heap_object_request_, IsImmediate()); 105 DCHECK_IMPLIES(is_heap_object_request_, 106 rmode_ == RelocInfo::FULL_EMBEDDED_OBJECT || 107 rmode_ == RelocInfo::CODE_TARGET); 108 return is_heap_object_request_; 109 } 110 rm()111 Register rm() const { return rm_; } 112 rmode()113 RelocInfo::Mode rmode() const { return rmode_; } 114 115 private: 116 Register rm_; 117 union Value { Value()118 Value() {} 119 HeapObjectRequest heap_object_request; // if is_heap_object_request_ 120 int64_t immediate; // otherwise 121 } value_; // valid if rm_ == no_reg 122 bool is_heap_object_request_ = false; 123 RelocInfo::Mode rmode_; 124 125 friend class Assembler; 126 friend class MacroAssembler; 127 }; 128 129 // On RISC-V we have only one addressing mode with base_reg + offset. 130 // Class MemOperand represents a memory operand in load and store instructions. 131 class V8_EXPORT_PRIVATE MemOperand : public Operand { 132 public: 133 // Immediate value attached to offset. 134 enum OffsetAddend { offset_minus_one = -1, offset_zero = 0 }; 135 136 explicit MemOperand(Register rn, int32_t offset = 0); 137 explicit MemOperand(Register rn, int32_t unit, int32_t multiplier, 138 OffsetAddend offset_addend = offset_zero); offset()139 int32_t offset() const { return offset_; } 140 OffsetIsInt12Encodable()141 bool OffsetIsInt12Encodable() const { return is_int12(offset_); } 142 143 private: 144 int32_t offset_; 145 146 friend class Assembler; 147 }; 148 149 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { 150 public: 151 // Create an assembler. Instructions and relocation information are emitted 152 // into a buffer, with the instructions starting from the beginning and the 153 // relocation information starting from the end of the buffer. See CodeDesc 154 // for a detailed comment on the layout (globals.h). 155 // 156 // If the provided buffer is nullptr, the assembler allocates and grows its 157 // own buffer. Otherwise it takes ownership of the provided buffer. 158 explicit Assembler(const AssemblerOptions&, 159 std::unique_ptr<AssemblerBuffer> = {}); 160 161 virtual ~Assembler(); 162 void AbortedCodeGeneration(); 163 // GetCode emits any pending (non-emitted) code and fills the descriptor desc. 164 static constexpr int kNoHandlerTable = 0; 165 static constexpr SafepointTableBuilder* kNoSafepointTable = nullptr; 166 void GetCode(Isolate* isolate, CodeDesc* desc, 167 SafepointTableBuilder* safepoint_table_builder, 168 int handler_table_offset); 169 170 // Convenience wrapper for code without safepoint or handler tables. GetCode(Isolate * isolate,CodeDesc * desc)171 void GetCode(Isolate* isolate, CodeDesc* desc) { 172 GetCode(isolate, desc, kNoSafepointTable, kNoHandlerTable); 173 } 174 175 // Unused on this architecture. MaybeEmitOutOfLineConstantPool()176 void MaybeEmitOutOfLineConstantPool() {} 177 178 // Label operations & relative jumps (PPUM Appendix D). 179 // 180 // Takes a branch opcode (cc) and a label (L) and generates 181 // either a backward branch or a forward branch and links it 182 // to the label fixup chain. Usage: 183 // 184 // Label L; // unbound label 185 // j(cc, &L); // forward branch to unbound label 186 // bind(&L); // bind label to the current pc 187 // j(cc, &L); // backward branch to bound label 188 // bind(&L); // illegal: a label may be bound only once 189 // 190 // Note: The same Label can be used for forward and backward branches 191 // but it may be bound only once. 192 void bind(Label* L); // Binds an unbound label L to current code position. 193 194 enum OffsetSize : int { 195 kOffset21 = 21, // RISCV jal 196 kOffset12 = 12, // RISCV imm12 197 kOffset20 = 20, // RISCV imm20 198 kOffset13 = 13, // RISCV branch 199 kOffset32 = 32, // RISCV auipc + instr_I 200 kOffset11 = 11, // RISCV C_J 201 kOffset8 = 8 // RISCV compressed branch 202 }; 203 204 // Determines if Label is bound and near enough so that branch instruction 205 // can be used to reach it, instead of jump instruction. 206 bool is_near(Label* L); 207 bool is_near(Label* L, OffsetSize bits); 208 bool is_near_branch(Label* L); 209 210 // Get offset from instr. 211 int BranchOffset(Instr instr); 212 static int BrachlongOffset(Instr auipc, Instr jalr); 213 static int PatchBranchlongOffset(Address pc, Instr auipc, Instr instr_I, 214 int32_t offset); 215 int JumpOffset(Instr instr); 216 int CJumpOffset(Instr instr); 217 int CBranchOffset(Instr instr); 218 static int LdOffset(Instr instr); 219 static int AuipcOffset(Instr instr); 220 static int JalrOffset(Instr instr); 221 222 // Returns the branch offset to the given label from the current code 223 // position. Links the label to the current position if it is still unbound. 224 // Manages the jump elimination optimization if the second parameter is true. 225 int32_t branch_offset_helper(Label* L, OffsetSize bits); branch_offset(Label * L)226 inline int32_t branch_offset(Label* L) { 227 return branch_offset_helper(L, OffsetSize::kOffset13); 228 } jump_offset(Label * L)229 inline int32_t jump_offset(Label* L) { 230 return branch_offset_helper(L, OffsetSize::kOffset21); 231 } cjump_offset(Label * L)232 inline int16_t cjump_offset(Label* L) { 233 return (int16_t)branch_offset_helper(L, OffsetSize::kOffset11); 234 } cbranch_offset(Label * L)235 inline int32_t cbranch_offset(Label* L) { 236 return branch_offset_helper(L, OffsetSize::kOffset8); 237 } 238 239 uint64_t jump_address(Label* L); 240 uint64_t branch_long_offset(Label* L); 241 242 // Puts a labels target address at the given position. 243 // The high 8 bits are set to zero. 244 void label_at_put(Label* L, int at_offset); 245 246 // Read/Modify the code target address in the branch/call instruction at pc. 247 // The isolate argument is unused (and may be nullptr) when skipping flushing. 248 static Address target_address_at(Address pc); 249 V8_INLINE static void set_target_address_at( 250 Address pc, Address target, 251 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED) { 252 set_target_value_at(pc, target, icache_flush_mode); 253 } 254 255 static Address target_address_at(Address pc, Address constant_pool); 256 257 static void set_target_address_at( 258 Address pc, Address constant_pool, Address target, 259 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 260 261 // Read/Modify the code target address in the branch/call instruction at pc. 262 inline static Tagged_t target_compressed_address_at(Address pc, 263 Address constant_pool); 264 inline static void set_target_compressed_address_at( 265 Address pc, Address constant_pool, Tagged_t target, 266 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 267 268 inline Handle<Object> code_target_object_handle_at(Address pc, 269 Address constant_pool); 270 inline Handle<HeapObject> compressed_embedded_object_handle_at( 271 Address pc, Address constant_pool); 272 273 static bool IsConstantPoolAt(Instruction* instr); 274 static int ConstantPoolSizeAt(Instruction* instr); 275 // See Assembler::CheckConstPool for more info. 276 void EmitPoolGuard(); 277 278 static void set_target_value_at( 279 Address pc, uint64_t target, 280 ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED); 281 282 static void JumpLabelToJumpRegister(Address pc); 283 284 // This sets the branch destination (which gets loaded at the call address). 285 // This is for calls and branches within generated code. The serializer 286 // has already deserialized the lui/ori instructions etc. 287 inline static void deserialization_set_special_target_at( 288 Address instruction_payload, Code code, Address target); 289 290 // Get the size of the special target encoded at 'instruction_payload'. 291 inline static int deserialization_special_target_size( 292 Address instruction_payload); 293 294 // This sets the internal reference at the pc. 295 inline static void deserialization_set_target_internal_reference_at( 296 Address pc, Address target, 297 RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE); 298 299 // Difference between address of current opcode and target address offset. 300 static constexpr int kBranchPCOffset = kInstrSize; 301 302 // Difference between address of current opcode and target address offset, 303 // when we are generatinga sequence of instructions for long relative PC 304 // branches 305 static constexpr int kLongBranchPCOffset = 3 * kInstrSize; 306 307 // Adjust ra register in branch delay slot of bal instruction so to skip 308 // instructions not needed after optimization of PIC in 309 // TurboAssembler::BranchAndLink method. 310 311 static constexpr int kOptimizedBranchAndLinkLongReturnOffset = 4 * kInstrSize; 312 313 // Here we are patching the address in the LUI/ADDI instruction pair. 314 // These values are used in the serialization process and must be zero for 315 // RISC-V platform, as Code, Embedded Object or External-reference pointers 316 // are split across two consecutive instructions and don't exist separately 317 // in the code, so the serializer should not step forwards in memory after 318 // a target is resolved and written. 319 static constexpr int kSpecialTargetSize = 0; 320 321 // Number of consecutive instructions used to store 32bit/64bit constant. 322 // This constant was used in RelocInfo::target_address_address() function 323 // to tell serializer address of the instruction that follows 324 // LUI/ADDI instruction pair. 325 static constexpr int kInstructionsFor32BitConstant = 2; 326 static constexpr int kInstructionsFor64BitConstant = 8; 327 328 // Difference between address of current opcode and value read from pc 329 // register. 330 static constexpr int kPcLoadDelta = 4; 331 332 // Bits available for offset field in branches 333 static constexpr int kBranchOffsetBits = 13; 334 335 // Bits available for offset field in jump 336 static constexpr int kJumpOffsetBits = 21; 337 338 // Bits available for offset field in compresed jump 339 static constexpr int kCJalOffsetBits = 12; 340 341 // Bits available for offset field in compressed branch 342 static constexpr int kCBranchOffsetBits = 9; 343 344 // Max offset for b instructions with 12-bit offset field (multiple of 2) 345 static constexpr int kMaxBranchOffset = (1 << (13 - 1)) - 1; 346 347 // Max offset for jal instruction with 20-bit offset field (multiple of 2) 348 static constexpr int kMaxJumpOffset = (1 << (21 - 1)) - 1; 349 350 static constexpr int kTrampolineSlotsSize = 2 * kInstrSize; 351 GetScratchRegisterList()352 RegList* GetScratchRegisterList() { return &scratch_register_list_; } 353 354 // --------------------------------------------------------------------------- 355 // Code generation. 356 357 // This function is called when on-heap-compilation invariants are 358 // invalidated. For instance, when the assembler buffer grows or a GC happens 359 // between Code object allocation and Code object finalization. 360 void FixOnHeapReferences(bool update_embedded_objects = true); 361 // This function is called when we fallback from on-heap to off-heap 362 // compilation and patch on-heap references to handles. 363 void FixOnHeapReferencesToHandles(); 364 // Insert the smallest number of nop instructions 365 // possible to align the pc offset to a multiple 366 // of m. m must be a power of 2 (>= 4). 367 void Align(int m); 368 // Insert the smallest number of zero bytes possible to align the pc offset 369 // to a mulitple of m. m must be a power of 2 (>= 2). 370 void DataAlign(int m); 371 // Aligns code to something that's optimal for a jump target for the platform. 372 void CodeTargetAlign(); LoopHeaderAlign()373 void LoopHeaderAlign() { CodeTargetAlign(); } 374 375 // Different nop operations are used by the code generator to detect certain 376 // states of the generated code. 377 enum NopMarkerTypes { 378 NON_MARKING_NOP = 0, 379 DEBUG_BREAK_NOP, 380 // IC markers. 381 PROPERTY_ACCESS_INLINED, 382 PROPERTY_ACCESS_INLINED_CONTEXT, 383 PROPERTY_ACCESS_INLINED_CONTEXT_DONT_DELETE, 384 // Helper values. 385 LAST_CODE_MARKER, 386 FIRST_IC_MARKER = PROPERTY_ACCESS_INLINED, 387 }; 388 389 // RISC-V Instructions Emited to a buffer 390 391 void lui(Register rd, int32_t imm20); 392 void auipc(Register rd, int32_t imm20); 393 394 // Jumps 395 void jal(Register rd, int32_t imm20); 396 void jalr(Register rd, Register rs1, int16_t imm12); 397 398 // Branches 399 void beq(Register rs1, Register rs2, int16_t imm12); beq(Register rs1,Register rs2,Label * L)400 inline void beq(Register rs1, Register rs2, Label* L) { 401 beq(rs1, rs2, branch_offset(L)); 402 } 403 void bne(Register rs1, Register rs2, int16_t imm12); bne(Register rs1,Register rs2,Label * L)404 inline void bne(Register rs1, Register rs2, Label* L) { 405 bne(rs1, rs2, branch_offset(L)); 406 } 407 void blt(Register rs1, Register rs2, int16_t imm12); blt(Register rs1,Register rs2,Label * L)408 inline void blt(Register rs1, Register rs2, Label* L) { 409 blt(rs1, rs2, branch_offset(L)); 410 } 411 void bge(Register rs1, Register rs2, int16_t imm12); bge(Register rs1,Register rs2,Label * L)412 inline void bge(Register rs1, Register rs2, Label* L) { 413 bge(rs1, rs2, branch_offset(L)); 414 } 415 void bltu(Register rs1, Register rs2, int16_t imm12); bltu(Register rs1,Register rs2,Label * L)416 inline void bltu(Register rs1, Register rs2, Label* L) { 417 bltu(rs1, rs2, branch_offset(L)); 418 } 419 void bgeu(Register rs1, Register rs2, int16_t imm12); bgeu(Register rs1,Register rs2,Label * L)420 inline void bgeu(Register rs1, Register rs2, Label* L) { 421 bgeu(rs1, rs2, branch_offset(L)); 422 } 423 424 // Loads 425 void lb(Register rd, Register rs1, int16_t imm12); 426 void lh(Register rd, Register rs1, int16_t imm12); 427 void lw(Register rd, Register rs1, int16_t imm12); 428 void lbu(Register rd, Register rs1, int16_t imm12); 429 void lhu(Register rd, Register rs1, int16_t imm12); 430 431 // Stores 432 void sb(Register source, Register base, int16_t imm12); 433 void sh(Register source, Register base, int16_t imm12); 434 void sw(Register source, Register base, int16_t imm12); 435 436 // Arithmetic with immediate 437 void addi(Register rd, Register rs1, int16_t imm12); 438 void slti(Register rd, Register rs1, int16_t imm12); 439 void sltiu(Register rd, Register rs1, int16_t imm12); 440 void xori(Register rd, Register rs1, int16_t imm12); 441 void ori(Register rd, Register rs1, int16_t imm12); 442 void andi(Register rd, Register rs1, int16_t imm12); 443 void slli(Register rd, Register rs1, uint8_t shamt); 444 void srli(Register rd, Register rs1, uint8_t shamt); 445 void srai(Register rd, Register rs1, uint8_t shamt); 446 447 // Arithmetic 448 void add(Register rd, Register rs1, Register rs2); 449 void sub(Register rd, Register rs1, Register rs2); 450 void sll(Register rd, Register rs1, Register rs2); 451 void slt(Register rd, Register rs1, Register rs2); 452 void sltu(Register rd, Register rs1, Register rs2); 453 void xor_(Register rd, Register rs1, Register rs2); 454 void srl(Register rd, Register rs1, Register rs2); 455 void sra(Register rd, Register rs1, Register rs2); 456 void or_(Register rd, Register rs1, Register rs2); 457 void and_(Register rd, Register rs1, Register rs2); 458 459 // Memory fences 460 void fence(uint8_t pred, uint8_t succ); 461 void fence_tso(); 462 463 // Environment call / break 464 void ecall(); 465 void ebreak(); 466 467 // This is a de facto standard (as set by GNU binutils) 32-bit unimplemented 468 // instruction (i.e., it should always trap, if your implementation has 469 // invalid instruction traps). 470 void unimp(); 471 472 // CSR 473 void csrrw(Register rd, ControlStatusReg csr, Register rs1); 474 void csrrs(Register rd, ControlStatusReg csr, Register rs1); 475 void csrrc(Register rd, ControlStatusReg csr, Register rs1); 476 void csrrwi(Register rd, ControlStatusReg csr, uint8_t imm5); 477 void csrrsi(Register rd, ControlStatusReg csr, uint8_t imm5); 478 void csrrci(Register rd, ControlStatusReg csr, uint8_t imm5); 479 480 // RV64I 481 void lwu(Register rd, Register rs1, int16_t imm12); 482 void ld(Register rd, Register rs1, int16_t imm12); 483 void sd(Register source, Register base, int16_t imm12); 484 void addiw(Register rd, Register rs1, int16_t imm12); 485 void slliw(Register rd, Register rs1, uint8_t shamt); 486 void srliw(Register rd, Register rs1, uint8_t shamt); 487 void sraiw(Register rd, Register rs1, uint8_t shamt); 488 void addw(Register rd, Register rs1, Register rs2); 489 void subw(Register rd, Register rs1, Register rs2); 490 void sllw(Register rd, Register rs1, Register rs2); 491 void srlw(Register rd, Register rs1, Register rs2); 492 void sraw(Register rd, Register rs1, Register rs2); 493 494 // RV32M Standard Extension 495 void mul(Register rd, Register rs1, Register rs2); 496 void mulh(Register rd, Register rs1, Register rs2); 497 void mulhsu(Register rd, Register rs1, Register rs2); 498 void mulhu(Register rd, Register rs1, Register rs2); 499 void div(Register rd, Register rs1, Register rs2); 500 void divu(Register rd, Register rs1, Register rs2); 501 void rem(Register rd, Register rs1, Register rs2); 502 void remu(Register rd, Register rs1, Register rs2); 503 504 // RV64M Standard Extension (in addition to RV32M) 505 void mulw(Register rd, Register rs1, Register rs2); 506 void divw(Register rd, Register rs1, Register rs2); 507 void divuw(Register rd, Register rs1, Register rs2); 508 void remw(Register rd, Register rs1, Register rs2); 509 void remuw(Register rd, Register rs1, Register rs2); 510 511 // RV32A Standard Extension 512 void lr_w(bool aq, bool rl, Register rd, Register rs1); 513 void sc_w(bool aq, bool rl, Register rd, Register rs1, Register rs2); 514 void amoswap_w(bool aq, bool rl, Register rd, Register rs1, Register rs2); 515 void amoadd_w(bool aq, bool rl, Register rd, Register rs1, Register rs2); 516 void amoxor_w(bool aq, bool rl, Register rd, Register rs1, Register rs2); 517 void amoand_w(bool aq, bool rl, Register rd, Register rs1, Register rs2); 518 void amoor_w(bool aq, bool rl, Register rd, Register rs1, Register rs2); 519 void amomin_w(bool aq, bool rl, Register rd, Register rs1, Register rs2); 520 void amomax_w(bool aq, bool rl, Register rd, Register rs1, Register rs2); 521 void amominu_w(bool aq, bool rl, Register rd, Register rs1, Register rs2); 522 void amomaxu_w(bool aq, bool rl, Register rd, Register rs1, Register rs2); 523 524 // RV64A Standard Extension (in addition to RV32A) 525 void lr_d(bool aq, bool rl, Register rd, Register rs1); 526 void sc_d(bool aq, bool rl, Register rd, Register rs1, Register rs2); 527 void amoswap_d(bool aq, bool rl, Register rd, Register rs1, Register rs2); 528 void amoadd_d(bool aq, bool rl, Register rd, Register rs1, Register rs2); 529 void amoxor_d(bool aq, bool rl, Register rd, Register rs1, Register rs2); 530 void amoand_d(bool aq, bool rl, Register rd, Register rs1, Register rs2); 531 void amoor_d(bool aq, bool rl, Register rd, Register rs1, Register rs2); 532 void amomin_d(bool aq, bool rl, Register rd, Register rs1, Register rs2); 533 void amomax_d(bool aq, bool rl, Register rd, Register rs1, Register rs2); 534 void amominu_d(bool aq, bool rl, Register rd, Register rs1, Register rs2); 535 void amomaxu_d(bool aq, bool rl, Register rd, Register rs1, Register rs2); 536 537 // RV32F Standard Extension 538 void flw(FPURegister rd, Register rs1, int16_t imm12); 539 void fsw(FPURegister source, Register base, int16_t imm12); 540 void fmadd_s(FPURegister rd, FPURegister rs1, FPURegister rs2, 541 FPURegister rs3, RoundingMode frm = RNE); 542 void fmsub_s(FPURegister rd, FPURegister rs1, FPURegister rs2, 543 FPURegister rs3, RoundingMode frm = RNE); 544 void fnmsub_s(FPURegister rd, FPURegister rs1, FPURegister rs2, 545 FPURegister rs3, RoundingMode frm = RNE); 546 void fnmadd_s(FPURegister rd, FPURegister rs1, FPURegister rs2, 547 FPURegister rs3, RoundingMode frm = RNE); 548 void fadd_s(FPURegister rd, FPURegister rs1, FPURegister rs2, 549 RoundingMode frm = RNE); 550 void fsub_s(FPURegister rd, FPURegister rs1, FPURegister rs2, 551 RoundingMode frm = RNE); 552 void fmul_s(FPURegister rd, FPURegister rs1, FPURegister rs2, 553 RoundingMode frm = RNE); 554 void fdiv_s(FPURegister rd, FPURegister rs1, FPURegister rs2, 555 RoundingMode frm = RNE); 556 void fsqrt_s(FPURegister rd, FPURegister rs1, RoundingMode frm = RNE); 557 void fsgnj_s(FPURegister rd, FPURegister rs1, FPURegister rs2); 558 void fsgnjn_s(FPURegister rd, FPURegister rs1, FPURegister rs2); 559 void fsgnjx_s(FPURegister rd, FPURegister rs1, FPURegister rs2); 560 void fmin_s(FPURegister rd, FPURegister rs1, FPURegister rs2); 561 void fmax_s(FPURegister rd, FPURegister rs1, FPURegister rs2); 562 void fcvt_w_s(Register rd, FPURegister rs1, RoundingMode frm = RNE); 563 void fcvt_wu_s(Register rd, FPURegister rs1, RoundingMode frm = RNE); 564 void fmv_x_w(Register rd, FPURegister rs1); 565 void feq_s(Register rd, FPURegister rs1, FPURegister rs2); 566 void flt_s(Register rd, FPURegister rs1, FPURegister rs2); 567 void fle_s(Register rd, FPURegister rs1, FPURegister rs2); 568 void fclass_s(Register rd, FPURegister rs1); 569 void fcvt_s_w(FPURegister rd, Register rs1, RoundingMode frm = RNE); 570 void fcvt_s_wu(FPURegister rd, Register rs1, RoundingMode frm = RNE); 571 void fmv_w_x(FPURegister rd, Register rs1); 572 573 // RV64F Standard Extension (in addition to RV32F) 574 void fcvt_l_s(Register rd, FPURegister rs1, RoundingMode frm = RNE); 575 void fcvt_lu_s(Register rd, FPURegister rs1, RoundingMode frm = RNE); 576 void fcvt_s_l(FPURegister rd, Register rs1, RoundingMode frm = RNE); 577 void fcvt_s_lu(FPURegister rd, Register rs1, RoundingMode frm = RNE); 578 579 // RV32D Standard Extension 580 void fld(FPURegister rd, Register rs1, int16_t imm12); 581 void fsd(FPURegister source, Register base, int16_t imm12); 582 void fmadd_d(FPURegister rd, FPURegister rs1, FPURegister rs2, 583 FPURegister rs3, RoundingMode frm = RNE); 584 void fmsub_d(FPURegister rd, FPURegister rs1, FPURegister rs2, 585 FPURegister rs3, RoundingMode frm = RNE); 586 void fnmsub_d(FPURegister rd, FPURegister rs1, FPURegister rs2, 587 FPURegister rs3, RoundingMode frm = RNE); 588 void fnmadd_d(FPURegister rd, FPURegister rs1, FPURegister rs2, 589 FPURegister rs3, RoundingMode frm = RNE); 590 void fadd_d(FPURegister rd, FPURegister rs1, FPURegister rs2, 591 RoundingMode frm = RNE); 592 void fsub_d(FPURegister rd, FPURegister rs1, FPURegister rs2, 593 RoundingMode frm = RNE); 594 void fmul_d(FPURegister rd, FPURegister rs1, FPURegister rs2, 595 RoundingMode frm = RNE); 596 void fdiv_d(FPURegister rd, FPURegister rs1, FPURegister rs2, 597 RoundingMode frm = RNE); 598 void fsqrt_d(FPURegister rd, FPURegister rs1, RoundingMode frm = RNE); 599 void fsgnj_d(FPURegister rd, FPURegister rs1, FPURegister rs2); 600 void fsgnjn_d(FPURegister rd, FPURegister rs1, FPURegister rs2); 601 void fsgnjx_d(FPURegister rd, FPURegister rs1, FPURegister rs2); 602 void fmin_d(FPURegister rd, FPURegister rs1, FPURegister rs2); 603 void fmax_d(FPURegister rd, FPURegister rs1, FPURegister rs2); 604 void fcvt_s_d(FPURegister rd, FPURegister rs1, RoundingMode frm = RNE); 605 void fcvt_d_s(FPURegister rd, FPURegister rs1, RoundingMode frm = RNE); 606 void feq_d(Register rd, FPURegister rs1, FPURegister rs2); 607 void flt_d(Register rd, FPURegister rs1, FPURegister rs2); 608 void fle_d(Register rd, FPURegister rs1, FPURegister rs2); 609 void fclass_d(Register rd, FPURegister rs1); 610 void fcvt_w_d(Register rd, FPURegister rs1, RoundingMode frm = RNE); 611 void fcvt_wu_d(Register rd, FPURegister rs1, RoundingMode frm = RNE); 612 void fcvt_d_w(FPURegister rd, Register rs1, RoundingMode frm = RNE); 613 void fcvt_d_wu(FPURegister rd, Register rs1, RoundingMode frm = RNE); 614 615 // RV64D Standard Extension (in addition to RV32D) 616 void fcvt_l_d(Register rd, FPURegister rs1, RoundingMode frm = RNE); 617 void fcvt_lu_d(Register rd, FPURegister rs1, RoundingMode frm = RNE); 618 void fmv_x_d(Register rd, FPURegister rs1); 619 void fcvt_d_l(FPURegister rd, Register rs1, RoundingMode frm = RNE); 620 void fcvt_d_lu(FPURegister rd, Register rs1, RoundingMode frm = RNE); 621 void fmv_d_x(FPURegister rd, Register rs1); 622 623 // RV64C Standard Extension 624 void c_nop(); 625 void c_addi(Register rd, int8_t imm6); 626 void c_addiw(Register rd, int8_t imm6); 627 void c_addi16sp(int16_t imm10); 628 void c_addi4spn(Register rd, int16_t uimm10); 629 void c_li(Register rd, int8_t imm6); 630 void c_lui(Register rd, int8_t imm6); 631 void c_slli(Register rd, uint8_t shamt6); 632 void c_fldsp(FPURegister rd, uint16_t uimm9); 633 void c_lwsp(Register rd, uint16_t uimm8); 634 void c_ldsp(Register rd, uint16_t uimm9); 635 void c_jr(Register rs1); 636 void c_mv(Register rd, Register rs2); 637 void c_ebreak(); 638 void c_jalr(Register rs1); 639 void c_j(int16_t imm12); c_j(Label * L)640 inline void c_j(Label* L) { c_j(cjump_offset(L)); } 641 void c_add(Register rd, Register rs2); 642 void c_sub(Register rd, Register rs2); 643 void c_and(Register rd, Register rs2); 644 void c_xor(Register rd, Register rs2); 645 void c_or(Register rd, Register rs2); 646 void c_subw(Register rd, Register rs2); 647 void c_addw(Register rd, Register rs2); 648 void c_swsp(Register rs2, uint16_t uimm8); 649 void c_sdsp(Register rs2, uint16_t uimm9); 650 void c_fsdsp(FPURegister rs2, uint16_t uimm9); 651 void c_lw(Register rd, Register rs1, uint16_t uimm7); 652 void c_ld(Register rd, Register rs1, uint16_t uimm8); 653 void c_fld(FPURegister rd, Register rs1, uint16_t uimm8); 654 void c_sw(Register rs2, Register rs1, uint16_t uimm7); 655 void c_sd(Register rs2, Register rs1, uint16_t uimm8); 656 void c_fsd(FPURegister rs2, Register rs1, uint16_t uimm8); 657 void c_bnez(Register rs1, int16_t imm9); c_bnez(Register rs1,Label * L)658 inline void c_bnez(Register rs1, Label* L) { c_bnez(rs1, branch_offset(L)); } 659 void c_beqz(Register rs1, int16_t imm9); c_beqz(Register rs1,Label * L)660 inline void c_beqz(Register rs1, Label* L) { c_beqz(rs1, branch_offset(L)); } 661 void c_srli(Register rs1, int8_t shamt6); 662 void c_srai(Register rs1, int8_t shamt6); 663 void c_andi(Register rs1, int8_t imm6); 664 void NOP(); 665 void EBREAK(); 666 667 // RVV 668 static int32_t GenZimm(VSew vsew, Vlmul vlmul, TailAgnosticType tail = tu, 669 MaskAgnosticType mask = mu) { 670 return (mask << 7) | (tail << 6) | ((vsew & 0x7) << 3) | (vlmul & 0x7); 671 } 672 673 void vsetvli(Register rd, Register rs1, VSew vsew, Vlmul vlmul, 674 TailAgnosticType tail = tu, MaskAgnosticType mask = mu); 675 676 void vsetivli(Register rd, uint8_t uimm, VSew vsew, Vlmul vlmul, 677 TailAgnosticType tail = tu, MaskAgnosticType mask = mu); 678 679 inline void vsetvlmax(Register rd, VSew vsew, Vlmul vlmul, 680 TailAgnosticType tail = tu, 681 MaskAgnosticType mask = mu) { 682 vsetvli(rd, zero_reg, vsew, vlmul, tu, mu); 683 } 684 685 inline void vsetvl(VSew vsew, Vlmul vlmul, TailAgnosticType tail = tu, 686 MaskAgnosticType mask = mu) { 687 vsetvli(zero_reg, zero_reg, vsew, vlmul, tu, mu); 688 } 689 690 void vsetvl(Register rd, Register rs1, Register rs2); 691 692 void vl(VRegister vd, Register rs1, uint8_t lumop, VSew vsew, 693 MaskType mask = NoMask); 694 void vls(VRegister vd, Register rs1, Register rs2, VSew vsew, 695 MaskType mask = NoMask); 696 void vlx(VRegister vd, Register rs1, VRegister vs3, VSew vsew, 697 MaskType mask = NoMask); 698 699 void vs(VRegister vd, Register rs1, uint8_t sumop, VSew vsew, 700 MaskType mask = NoMask); 701 void vss(VRegister vd, Register rs1, Register rs2, VSew vsew, 702 MaskType mask = NoMask); 703 void vsx(VRegister vd, Register rs1, VRegister vs3, VSew vsew, 704 MaskType mask = NoMask); 705 706 void vsu(VRegister vd, Register rs1, VRegister vs3, VSew vsew, 707 MaskType mask = NoMask); 708 709 #define SegInstr(OP) \ 710 void OP##seg2(ARG); \ 711 void OP##seg3(ARG); \ 712 void OP##seg4(ARG); \ 713 void OP##seg5(ARG); \ 714 void OP##seg6(ARG); \ 715 void OP##seg7(ARG); \ 716 void OP##seg8(ARG); 717 718 #define ARG \ 719 VRegister vd, Register rs1, uint8_t lumop, VSew vsew, MaskType mask = NoMask 720 721 SegInstr(vl) SegInstr(vs) 722 #undef ARG 723 724 #define ARG \ 725 VRegister vd, Register rs1, Register rs2, VSew vsew, MaskType mask = NoMask 726 727 SegInstr(vls) SegInstr(vss) 728 #undef ARG 729 730 #define ARG \ 731 VRegister vd, Register rs1, VRegister rs2, VSew vsew, MaskType mask = NoMask 732 733 SegInstr(vsx) SegInstr(vlx) 734 #undef ARG 735 #undef SegInstr 736 737 // RVV Vector Arithmetic Instruction 738 739 void vmv_vv(VRegister vd, VRegister vs1); 740 void vmv_vx(VRegister vd, Register rs1); 741 void vmv_vi(VRegister vd, uint8_t simm5); 742 void vmv_xs(Register rd, VRegister vs2); 743 void vmv_sx(VRegister vd, Register rs1); 744 void vmerge_vv(VRegister vd, VRegister vs1, VRegister vs2); 745 void vmerge_vx(VRegister vd, Register rs1, VRegister vs2); 746 void vmerge_vi(VRegister vd, uint8_t imm5, VRegister vs2); 747 748 void vadc_vv(VRegister vd, VRegister vs1, VRegister vs2); 749 void vadc_vx(VRegister vd, Register rs1, VRegister vs2); 750 void vadc_vi(VRegister vd, uint8_t imm5, VRegister vs2); 751 752 void vmadc_vv(VRegister vd, VRegister vs1, VRegister vs2); 753 void vmadc_vx(VRegister vd, Register rs1, VRegister vs2); 754 void vmadc_vi(VRegister vd, uint8_t imm5, VRegister vs2); 755 756 #define DEFINE_OPIVV(name, funct6) \ 757 void name##_vv(VRegister vd, VRegister vs2, VRegister vs1, \ 758 MaskType mask = NoMask); 759 760 #define DEFINE_OPIVX(name, funct6) \ 761 void name##_vx(VRegister vd, VRegister vs2, Register rs1, \ 762 MaskType mask = NoMask); 763 764 #define DEFINE_OPIVI(name, funct6) \ 765 void name##_vi(VRegister vd, VRegister vs2, int8_t imm5, \ 766 MaskType mask = NoMask); 767 768 #define DEFINE_OPMVV(name, funct6) \ 769 void name##_vs(VRegister vd, VRegister vs2, VRegister vs1, \ 770 MaskType mask = NoMask); 771 772 #define DEFINE_OPMVX(name, funct6) \ 773 void name##_vx(VRegister vd, VRegister vs2, Register rs1, \ 774 MaskType mask = NoMask); 775 776 #define DEFINE_OPFVV(name, funct6) \ 777 void name##_vv(VRegister vd, VRegister vs2, VRegister vs1, \ 778 MaskType mask = NoMask); 779 780 #define DEFINE_OPFVF(name, funct6) \ 781 void name##_vf(VRegister vd, VRegister vs2, FPURegister fs1, \ 782 MaskType mask = NoMask); 783 DEFINE_OPIVV(vadd,VADD_FUNCT6)784 DEFINE_OPIVV(vadd, VADD_FUNCT6) 785 DEFINE_OPIVX(vadd, VADD_FUNCT6) 786 DEFINE_OPIVI(vadd, VADD_FUNCT6) 787 DEFINE_OPIVV(vsub, VSUB_FUNCT6) 788 DEFINE_OPIVX(vsub, VSUB_FUNCT6) 789 DEFINE_OPIVX(vsadd, VSADD_FUNCT6) 790 DEFINE_OPIVV(vsadd, VSADD_FUNCT6) 791 DEFINE_OPIVI(vsadd, VSADD_FUNCT6) 792 DEFINE_OPIVX(vsaddu, VSADD_FUNCT6) 793 DEFINE_OPIVV(vsaddu, VSADDU_FUNCT6) 794 DEFINE_OPIVI(vsaddu, VSADDU_FUNCT6) 795 DEFINE_OPIVX(vssub, VSSUB_FUNCT6) 796 DEFINE_OPIVV(vssub, VSSUB_FUNCT6) 797 DEFINE_OPIVX(vssubu, VSSUBU_FUNCT6) 798 DEFINE_OPIVV(vssubu, VSSUBU_FUNCT6) 799 DEFINE_OPIVX(vrsub, VRSUB_FUNCT6) 800 DEFINE_OPIVI(vrsub, VRSUB_FUNCT6) 801 DEFINE_OPIVV(vminu, VMINU_FUNCT6) 802 DEFINE_OPIVX(vminu, VMINU_FUNCT6) 803 DEFINE_OPIVV(vmin, VMIN_FUNCT6) 804 DEFINE_OPIVX(vmin, VMIN_FUNCT6) 805 DEFINE_OPIVV(vmaxu, VMAXU_FUNCT6) 806 DEFINE_OPIVX(vmaxu, VMAXU_FUNCT6) 807 DEFINE_OPIVV(vmax, VMAX_FUNCT6) 808 DEFINE_OPIVX(vmax, VMAX_FUNCT6) 809 DEFINE_OPIVV(vand, VAND_FUNCT6) 810 DEFINE_OPIVX(vand, VAND_FUNCT6) 811 DEFINE_OPIVI(vand, VAND_FUNCT6) 812 DEFINE_OPIVV(vor, VOR_FUNCT6) 813 DEFINE_OPIVX(vor, VOR_FUNCT6) 814 DEFINE_OPIVI(vor, VOR_FUNCT6) 815 DEFINE_OPIVV(vxor, VXOR_FUNCT6) 816 DEFINE_OPIVX(vxor, VXOR_FUNCT6) 817 DEFINE_OPIVI(vxor, VXOR_FUNCT6) 818 DEFINE_OPIVV(vrgather, VRGATHER_FUNCT6) 819 DEFINE_OPIVX(vrgather, VRGATHER_FUNCT6) 820 DEFINE_OPIVI(vrgather, VRGATHER_FUNCT6) 821 822 DEFINE_OPIVX(vslidedown, VSLIDEDOWN_FUNCT6) 823 DEFINE_OPIVI(vslidedown, VSLIDEDOWN_FUNCT6) 824 DEFINE_OPIVX(vslideup, VSLIDEUP_FUNCT6) 825 DEFINE_OPIVI(vslideup, VSLIDEUP_FUNCT6) 826 827 DEFINE_OPIVV(vmseq, VMSEQ_FUNCT6) 828 DEFINE_OPIVX(vmseq, VMSEQ_FUNCT6) 829 DEFINE_OPIVI(vmseq, VMSEQ_FUNCT6) 830 831 DEFINE_OPIVV(vmsne, VMSNE_FUNCT6) 832 DEFINE_OPIVX(vmsne, VMSNE_FUNCT6) 833 DEFINE_OPIVI(vmsne, VMSNE_FUNCT6) 834 835 DEFINE_OPIVV(vmsltu, VMSLTU_FUNCT6) 836 DEFINE_OPIVX(vmsltu, VMSLTU_FUNCT6) 837 838 DEFINE_OPIVV(vmslt, VMSLT_FUNCT6) 839 DEFINE_OPIVX(vmslt, VMSLT_FUNCT6) 840 841 DEFINE_OPIVV(vmsle, VMSLE_FUNCT6) 842 DEFINE_OPIVX(vmsle, VMSLE_FUNCT6) 843 DEFINE_OPIVI(vmsle, VMSLE_FUNCT6) 844 845 DEFINE_OPIVV(vmsleu, VMSLEU_FUNCT6) 846 DEFINE_OPIVX(vmsleu, VMSLEU_FUNCT6) 847 DEFINE_OPIVI(vmsleu, VMSLEU_FUNCT6) 848 849 DEFINE_OPIVI(vmsgt, VMSGT_FUNCT6) 850 DEFINE_OPIVX(vmsgt, VMSGT_FUNCT6) 851 852 DEFINE_OPIVI(vmsgtu, VMSGTU_FUNCT6) 853 DEFINE_OPIVX(vmsgtu, VMSGTU_FUNCT6) 854 855 DEFINE_OPIVV(vsrl, VSRL_FUNCT6) 856 DEFINE_OPIVX(vsrl, VSRL_FUNCT6) 857 DEFINE_OPIVI(vsrl, VSRL_FUNCT6) 858 859 DEFINE_OPIVV(vsll, VSLL_FUNCT6) 860 DEFINE_OPIVX(vsll, VSLL_FUNCT6) 861 DEFINE_OPIVI(vsll, VSLL_FUNCT6) 862 863 DEFINE_OPMVV(vredmaxu, VREDMAXU_FUNCT6) 864 DEFINE_OPMVV(vredmax, VREDMAX_FUNCT6) 865 DEFINE_OPMVV(vredmin, VREDMIN_FUNCT6) 866 DEFINE_OPMVV(vredminu, VREDMINU_FUNCT6) 867 868 DEFINE_OPFVV(vfadd, VFADD_FUNCT6) 869 DEFINE_OPFVF(vfadd, VFADD_FUNCT6) 870 DEFINE_OPFVV(vfsub, VFSUB_FUNCT6) 871 DEFINE_OPFVF(vfsub, VFSUB_FUNCT6) 872 DEFINE_OPFVV(vfdiv, VFDIV_FUNCT6) 873 DEFINE_OPFVF(vfdiv, VFDIV_FUNCT6) 874 DEFINE_OPFVV(vfmul, VFMUL_FUNCT6) 875 DEFINE_OPFVF(vfmul, VFMUL_FUNCT6) 876 877 DEFINE_OPFVV(vmfeq, VMFEQ_FUNCT6) 878 DEFINE_OPFVV(vmfne, VMFNE_FUNCT6) 879 DEFINE_OPFVV(vmflt, VMFLT_FUNCT6) 880 DEFINE_OPFVV(vmfle, VMFLE_FUNCT6) 881 DEFINE_OPFVV(vfmax, VMFMAX_FUNCT6) 882 DEFINE_OPFVV(vfmin, VMFMIN_FUNCT6) 883 884 DEFINE_OPFVV(vfsngj, VFSGNJ_FUNCT6) 885 DEFINE_OPFVF(vfsngj, VFSGNJ_FUNCT6) 886 DEFINE_OPFVV(vfsngjn, VFSGNJN_FUNCT6) 887 DEFINE_OPFVF(vfsngjn, VFSGNJN_FUNCT6) 888 DEFINE_OPFVV(vfsngjx, VFSGNJX_FUNCT6) 889 DEFINE_OPFVF(vfsngjx, VFSGNJX_FUNCT6) 890 891 #undef DEFINE_OPIVI 892 #undef DEFINE_OPIVV 893 #undef DEFINE_OPIVX 894 #undef DEFINE_OPMVV 895 #undef DEFINE_OPMVX 896 #undef DEFINE_OPFVV 897 #undef DEFINE_OPFVF 898 899 #define DEFINE_VFUNARY(name, funct6, vs1) \ 900 void name(VRegister vd, VRegister vs2, MaskType mask = NoMask) { \ 901 GenInstrV(funct6, OP_FVV, vd, vs1, vs2, mask); \ 902 } 903 904 DEFINE_VFUNARY(vfcvt_xu_f_v, VFUNARY0_FUNCT6, VFCVT_XU_F_V) 905 DEFINE_VFUNARY(vfcvt_x_f_v, VFUNARY0_FUNCT6, VFCVT_X_F_V) 906 DEFINE_VFUNARY(vfcvt_f_x_v, VFUNARY0_FUNCT6, VFCVT_F_X_V) 907 DEFINE_VFUNARY(vfcvt_f_xu_v, VFUNARY0_FUNCT6, VFCVT_F_XU_V) 908 DEFINE_VFUNARY(vfncvt_f_f_w, VFUNARY0_FUNCT6, VFNCVT_F_F_W) 909 910 DEFINE_VFUNARY(vfclass_v, VFUNARY1_FUNCT6, VFCLASS_V) 911 #undef DEFINE_VFUNARY 912 913 void vnot_vv(VRegister dst, VRegister src) { vxor_vi(dst, src, -1); } 914 vneg_vv(VRegister dst,VRegister src)915 void vneg_vv(VRegister dst, VRegister src) { vrsub_vx(dst, src, zero_reg); } 916 vfneg_vv(VRegister dst,VRegister src)917 void vfneg_vv(VRegister dst, VRegister src) { vfsngjn_vv(dst, src, src); } vfabs_vv(VRegister dst,VRegister src)918 void vfabs_vv(VRegister dst, VRegister src) { vfsngjx_vv(dst, src, src); } 919 // Privileged 920 void uret(); 921 void sret(); 922 void mret(); 923 void wfi(); 924 void sfence_vma(Register rs1, Register rs2); 925 926 // Assembler Pseudo Instructions (Tables 25.2, 25.3, RISC-V Unprivileged ISA) 927 void nop(); 928 void RV_li(Register rd, int64_t imm); 929 // Returns the number of instructions required to load the immediate 930 static int li_estimate(int64_t imm, bool is_get_temp_reg = false); 931 // Loads an immediate, always using 8 instructions, regardless of the value, 932 // so that it can be modified later. 933 void li_constant(Register rd, int64_t imm); 934 void li_ptr(Register rd, int64_t imm); 935 mv(Register rd,Register rs)936 void mv(Register rd, Register rs) { addi(rd, rs, 0); } not_(Register rd,Register rs)937 void not_(Register rd, Register rs) { xori(rd, rs, -1); } neg(Register rd,Register rs)938 void neg(Register rd, Register rs) { sub(rd, zero_reg, rs); } negw(Register rd,Register rs)939 void negw(Register rd, Register rs) { subw(rd, zero_reg, rs); } sext_w(Register rd,Register rs)940 void sext_w(Register rd, Register rs) { addiw(rd, rs, 0); } seqz(Register rd,Register rs)941 void seqz(Register rd, Register rs) { sltiu(rd, rs, 1); } snez(Register rd,Register rs)942 void snez(Register rd, Register rs) { sltu(rd, zero_reg, rs); } sltz(Register rd,Register rs)943 void sltz(Register rd, Register rs) { slt(rd, rs, zero_reg); } sgtz(Register rd,Register rs)944 void sgtz(Register rd, Register rs) { slt(rd, zero_reg, rs); } 945 fmv_s(FPURegister rd,FPURegister rs)946 void fmv_s(FPURegister rd, FPURegister rs) { fsgnj_s(rd, rs, rs); } fabs_s(FPURegister rd,FPURegister rs)947 void fabs_s(FPURegister rd, FPURegister rs) { fsgnjx_s(rd, rs, rs); } fneg_s(FPURegister rd,FPURegister rs)948 void fneg_s(FPURegister rd, FPURegister rs) { fsgnjn_s(rd, rs, rs); } fmv_d(FPURegister rd,FPURegister rs)949 void fmv_d(FPURegister rd, FPURegister rs) { fsgnj_d(rd, rs, rs); } fabs_d(FPURegister rd,FPURegister rs)950 void fabs_d(FPURegister rd, FPURegister rs) { fsgnjx_d(rd, rs, rs); } fneg_d(FPURegister rd,FPURegister rs)951 void fneg_d(FPURegister rd, FPURegister rs) { fsgnjn_d(rd, rs, rs); } 952 beqz(Register rs,int16_t imm13)953 void beqz(Register rs, int16_t imm13) { beq(rs, zero_reg, imm13); } beqz(Register rs1,Label * L)954 inline void beqz(Register rs1, Label* L) { beqz(rs1, branch_offset(L)); } bnez(Register rs,int16_t imm13)955 void bnez(Register rs, int16_t imm13) { bne(rs, zero_reg, imm13); } bnez(Register rs1,Label * L)956 inline void bnez(Register rs1, Label* L) { bnez(rs1, branch_offset(L)); } blez(Register rs,int16_t imm13)957 void blez(Register rs, int16_t imm13) { bge(zero_reg, rs, imm13); } blez(Register rs1,Label * L)958 inline void blez(Register rs1, Label* L) { blez(rs1, branch_offset(L)); } bgez(Register rs,int16_t imm13)959 void bgez(Register rs, int16_t imm13) { bge(rs, zero_reg, imm13); } bgez(Register rs1,Label * L)960 inline void bgez(Register rs1, Label* L) { bgez(rs1, branch_offset(L)); } bltz(Register rs,int16_t imm13)961 void bltz(Register rs, int16_t imm13) { blt(rs, zero_reg, imm13); } bltz(Register rs1,Label * L)962 inline void bltz(Register rs1, Label* L) { bltz(rs1, branch_offset(L)); } bgtz(Register rs,int16_t imm13)963 void bgtz(Register rs, int16_t imm13) { blt(zero_reg, rs, imm13); } 964 bgtz(Register rs1,Label * L)965 inline void bgtz(Register rs1, Label* L) { bgtz(rs1, branch_offset(L)); } bgt(Register rs1,Register rs2,int16_t imm13)966 void bgt(Register rs1, Register rs2, int16_t imm13) { blt(rs2, rs1, imm13); } bgt(Register rs1,Register rs2,Label * L)967 inline void bgt(Register rs1, Register rs2, Label* L) { 968 bgt(rs1, rs2, branch_offset(L)); 969 } ble(Register rs1,Register rs2,int16_t imm13)970 void ble(Register rs1, Register rs2, int16_t imm13) { bge(rs2, rs1, imm13); } ble(Register rs1,Register rs2,Label * L)971 inline void ble(Register rs1, Register rs2, Label* L) { 972 ble(rs1, rs2, branch_offset(L)); 973 } bgtu(Register rs1,Register rs2,int16_t imm13)974 void bgtu(Register rs1, Register rs2, int16_t imm13) { 975 bltu(rs2, rs1, imm13); 976 } bgtu(Register rs1,Register rs2,Label * L)977 inline void bgtu(Register rs1, Register rs2, Label* L) { 978 bgtu(rs1, rs2, branch_offset(L)); 979 } bleu(Register rs1,Register rs2,int16_t imm13)980 void bleu(Register rs1, Register rs2, int16_t imm13) { 981 bgeu(rs2, rs1, imm13); 982 } bleu(Register rs1,Register rs2,Label * L)983 inline void bleu(Register rs1, Register rs2, Label* L) { 984 bleu(rs1, rs2, branch_offset(L)); 985 } 986 j(int32_t imm21)987 void j(int32_t imm21) { jal(zero_reg, imm21); } j(Label * L)988 inline void j(Label* L) { j(jump_offset(L)); } b(Label * L)989 inline void b(Label* L) { j(L); } jal(int32_t imm21)990 void jal(int32_t imm21) { jal(ra, imm21); } jal(Label * L)991 inline void jal(Label* L) { jal(jump_offset(L)); } jr(Register rs)992 void jr(Register rs) { jalr(zero_reg, rs, 0); } jr(Register rs,int32_t imm12)993 void jr(Register rs, int32_t imm12) { jalr(zero_reg, rs, imm12); } jalr(Register rs,int32_t imm12)994 void jalr(Register rs, int32_t imm12) { jalr(ra, rs, imm12); } jalr(Register rs)995 void jalr(Register rs) { jalr(ra, rs, 0); } ret()996 void ret() { jalr(zero_reg, ra, 0); } call(int32_t offset)997 void call(int32_t offset) { 998 auipc(ra, (offset >> 12) + ((offset & 0x800) >> 11)); 999 jalr(ra, ra, offset << 20 >> 20); 1000 } 1001 1002 // Read instructions-retired counter rdinstret(Register rd)1003 void rdinstret(Register rd) { csrrs(rd, csr_instret, zero_reg); } rdinstreth(Register rd)1004 void rdinstreth(Register rd) { csrrs(rd, csr_instreth, zero_reg); } rdcycle(Register rd)1005 void rdcycle(Register rd) { csrrs(rd, csr_cycle, zero_reg); } rdcycleh(Register rd)1006 void rdcycleh(Register rd) { csrrs(rd, csr_cycleh, zero_reg); } rdtime(Register rd)1007 void rdtime(Register rd) { csrrs(rd, csr_time, zero_reg); } rdtimeh(Register rd)1008 void rdtimeh(Register rd) { csrrs(rd, csr_timeh, zero_reg); } 1009 csrr(Register rd,ControlStatusReg csr)1010 void csrr(Register rd, ControlStatusReg csr) { csrrs(rd, csr, zero_reg); } csrw(ControlStatusReg csr,Register rs)1011 void csrw(ControlStatusReg csr, Register rs) { csrrw(zero_reg, csr, rs); } csrs(ControlStatusReg csr,Register rs)1012 void csrs(ControlStatusReg csr, Register rs) { csrrs(zero_reg, csr, rs); } csrc(ControlStatusReg csr,Register rs)1013 void csrc(ControlStatusReg csr, Register rs) { csrrc(zero_reg, csr, rs); } 1014 csrwi(ControlStatusReg csr,uint8_t imm)1015 void csrwi(ControlStatusReg csr, uint8_t imm) { csrrwi(zero_reg, csr, imm); } csrsi(ControlStatusReg csr,uint8_t imm)1016 void csrsi(ControlStatusReg csr, uint8_t imm) { csrrsi(zero_reg, csr, imm); } csrci(ControlStatusReg csr,uint8_t imm)1017 void csrci(ControlStatusReg csr, uint8_t imm) { csrrci(zero_reg, csr, imm); } 1018 frcsr(Register rd)1019 void frcsr(Register rd) { csrrs(rd, csr_fcsr, zero_reg); } fscsr(Register rd,Register rs)1020 void fscsr(Register rd, Register rs) { csrrw(rd, csr_fcsr, rs); } fscsr(Register rs)1021 void fscsr(Register rs) { csrrw(zero_reg, csr_fcsr, rs); } 1022 frrm(Register rd)1023 void frrm(Register rd) { csrrs(rd, csr_frm, zero_reg); } fsrm(Register rd,Register rs)1024 void fsrm(Register rd, Register rs) { csrrw(rd, csr_frm, rs); } fsrm(Register rs)1025 void fsrm(Register rs) { csrrw(zero_reg, csr_frm, rs); } 1026 frflags(Register rd)1027 void frflags(Register rd) { csrrs(rd, csr_fflags, zero_reg); } fsflags(Register rd,Register rs)1028 void fsflags(Register rd, Register rs) { csrrw(rd, csr_fflags, rs); } fsflags(Register rs)1029 void fsflags(Register rs) { csrrw(zero_reg, csr_fflags, rs); } 1030 1031 // Other pseudo instructions that are not part of RISCV pseudo assemly nor(Register rd,Register rs,Register rt)1032 void nor(Register rd, Register rs, Register rt) { 1033 or_(rd, rs, rt); 1034 not_(rd, rd); 1035 } 1036 sync()1037 void sync() { fence(0b1111, 0b1111); } 1038 void break_(uint32_t code, bool break_as_stop = false); 1039 void stop(uint32_t code = kMaxStopCode); 1040 1041 // Check the code size generated from label to here. SizeOfCodeGeneratedSince(Label * label)1042 int SizeOfCodeGeneratedSince(Label* label) { 1043 return pc_offset() - label->pos(); 1044 } 1045 1046 // Check the number of instructions generated from label to here. InstructionsGeneratedSince(Label * label)1047 int InstructionsGeneratedSince(Label* label) { 1048 return SizeOfCodeGeneratedSince(label) / kInstrSize; 1049 } 1050 1051 using BlockConstPoolScope = ConstantPool::BlockScope; 1052 // Class for scoping postponing the trampoline pool generation. 1053 class BlockTrampolinePoolScope { 1054 public: 1055 explicit BlockTrampolinePoolScope(Assembler* assem, int margin = 0) assem_(assem)1056 : assem_(assem) { 1057 assem_->StartBlockTrampolinePool(); 1058 } 1059 BlockTrampolinePoolScope(Assembler * assem,PoolEmissionCheck check)1060 explicit BlockTrampolinePoolScope(Assembler* assem, PoolEmissionCheck check) 1061 : assem_(assem) { 1062 assem_->StartBlockTrampolinePool(); 1063 } ~BlockTrampolinePoolScope()1064 ~BlockTrampolinePoolScope() { assem_->EndBlockTrampolinePool(); } 1065 1066 private: 1067 Assembler* assem_; 1068 DISALLOW_IMPLICIT_CONSTRUCTORS(BlockTrampolinePoolScope); 1069 }; 1070 1071 // Class for postponing the assembly buffer growth. Typically used for 1072 // sequences of instructions that must be emitted as a unit, before 1073 // buffer growth (and relocation) can occur. 1074 // This blocking scope is not nestable. 1075 class BlockGrowBufferScope { 1076 public: BlockGrowBufferScope(Assembler * assem)1077 explicit BlockGrowBufferScope(Assembler* assem) : assem_(assem) { 1078 assem_->StartBlockGrowBuffer(); 1079 } ~BlockGrowBufferScope()1080 ~BlockGrowBufferScope() { assem_->EndBlockGrowBuffer(); } 1081 1082 private: 1083 Assembler* assem_; 1084 1085 DISALLOW_IMPLICIT_CONSTRUCTORS(BlockGrowBufferScope); 1086 }; 1087 1088 // Record a deoptimization reason that can be used by a log or cpu profiler. 1089 // Use --trace-deopt to enable. 1090 void RecordDeoptReason(DeoptimizeReason reason, uint32_t node_id, 1091 SourcePosition position, int id); 1092 1093 static int RelocateInternalReference(RelocInfo::Mode rmode, Address pc, 1094 intptr_t pc_delta); 1095 static void RelocateRelativeReference(RelocInfo::Mode rmode, Address pc, 1096 intptr_t pc_delta); 1097 1098 // Writes a single byte or word of data in the code stream. Used for 1099 // inline tables, e.g., jump-tables. 1100 void db(uint8_t data); 1101 void dd(uint32_t data, RelocInfo::Mode rmode = RelocInfo::NONE); 1102 void dq(uint64_t data, RelocInfo::Mode rmode = RelocInfo::NONE); 1103 void dp(uintptr_t data, RelocInfo::Mode rmode = RelocInfo::NONE) { 1104 dq(data, rmode); 1105 } 1106 void dd(Label* label); 1107 pc()1108 Instruction* pc() const { return reinterpret_cast<Instruction*>(pc_); } 1109 1110 // Postpone the generation of the trampoline pool for the specified number of 1111 // instructions. 1112 void BlockTrampolinePoolFor(int instructions); 1113 1114 // Check if there is less than kGap bytes available in the buffer. 1115 // If this is the case, we need to grow the buffer before emitting 1116 // an instruction or relocation information. overflow()1117 inline bool overflow() const { return pc_ >= reloc_info_writer.pos() - kGap; } 1118 1119 // Get the number of bytes available in the buffer. available_space()1120 inline intptr_t available_space() const { 1121 return reloc_info_writer.pos() - pc_; 1122 } 1123 1124 // Read/patch instructions. instr_at(Address pc)1125 static Instr instr_at(Address pc) { return *reinterpret_cast<Instr*>(pc); } instr_at_put(Address pc,Instr instr)1126 static void instr_at_put(Address pc, Instr instr) { 1127 *reinterpret_cast<Instr*>(pc) = instr; 1128 } instr_at(int pos)1129 Instr instr_at(int pos) { 1130 return *reinterpret_cast<Instr*>(buffer_start_ + pos); 1131 } instr_at_put(int pos,Instr instr)1132 void instr_at_put(int pos, Instr instr) { 1133 *reinterpret_cast<Instr*>(buffer_start_ + pos) = instr; 1134 } 1135 instr_at_put(int pos,ShortInstr instr)1136 void instr_at_put(int pos, ShortInstr instr) { 1137 *reinterpret_cast<ShortInstr*>(buffer_start_ + pos) = instr; 1138 } 1139 toAddress(int pos)1140 Address toAddress(int pos) { 1141 return reinterpret_cast<Address>(buffer_start_ + pos); 1142 } 1143 1144 // Check if an instruction is a branch of some kind. 1145 static bool IsBranch(Instr instr); 1146 static bool IsCBranch(Instr instr); 1147 static bool IsNop(Instr instr); 1148 static bool IsJump(Instr instr); 1149 static bool IsJal(Instr instr); 1150 static bool IsCJal(Instr instr); 1151 static bool IsJalr(Instr instr); 1152 static bool IsLui(Instr instr); 1153 static bool IsAuipc(Instr instr); 1154 static bool IsAddiw(Instr instr); 1155 static bool IsAddi(Instr instr); 1156 static bool IsOri(Instr instr); 1157 static bool IsSlli(Instr instr); 1158 static bool IsLd(Instr instr); 1159 void CheckTrampolinePool(); 1160 1161 // Get the code target object for a pc-relative call or jump. 1162 V8_INLINE Handle<Code> relative_code_target_object_handle_at( 1163 Address pc_) const; 1164 UnboundLabelsCount()1165 inline int UnboundLabelsCount() { return unbound_labels_count_; } 1166 1167 using BlockPoolsScope = BlockTrampolinePoolScope; 1168 1169 void RecordConstPool(int size); 1170 ForceConstantPoolEmissionWithoutJump()1171 void ForceConstantPoolEmissionWithoutJump() { 1172 constpool_.Check(Emission::kForced, Jump::kOmitted); 1173 } ForceConstantPoolEmissionWithJump()1174 void ForceConstantPoolEmissionWithJump() { 1175 constpool_.Check(Emission::kForced, Jump::kRequired); 1176 } 1177 // Check if the const pool needs to be emitted while pretending that {margin} 1178 // more bytes of instructions have already been emitted. 1179 void EmitConstPoolWithJumpIfNeeded(size_t margin = 0) { 1180 constpool_.Check(Emission::kIfNeeded, Jump::kRequired, margin); 1181 } 1182 1183 void EmitConstPoolWithoutJumpIfNeeded(size_t margin = 0) { 1184 constpool_.Check(Emission::kIfNeeded, Jump::kOmitted, margin); 1185 } 1186 RecordEntry(uint32_t data,RelocInfo::Mode rmode)1187 void RecordEntry(uint32_t data, RelocInfo::Mode rmode) { 1188 constpool_.RecordEntry(data, rmode); 1189 } 1190 RecordEntry(uint64_t data,RelocInfo::Mode rmode)1191 void RecordEntry(uint64_t data, RelocInfo::Mode rmode) { 1192 constpool_.RecordEntry(data, rmode); 1193 } 1194 1195 class VectorUnit { 1196 public: sew()1197 inline int32_t sew() const { return 2 ^ (sew_ + 3); } 1198 vlmax()1199 inline int32_t vlmax() const { 1200 if ((lmul_ & 0b100) != 0) { 1201 return (kRvvVLEN / sew()) >> (lmul_ & 0b11); 1202 } else { 1203 return ((kRvvVLEN << lmul_) / sew()); 1204 } 1205 } 1206 VectorUnit(Assembler * assm)1207 explicit VectorUnit(Assembler* assm) : assm_(assm) {} 1208 set(Register rd,VSew sew,Vlmul lmul)1209 void set(Register rd, VSew sew, Vlmul lmul) { 1210 if (sew != sew_ || lmul != lmul_ || vl != vlmax()) { 1211 sew_ = sew; 1212 lmul_ = lmul; 1213 vl = vlmax(); 1214 assm_->vsetvlmax(rd, sew_, lmul_); 1215 } 1216 } 1217 set(RoundingMode mode)1218 void set(RoundingMode mode) { 1219 if (mode_ != mode) { 1220 assm_->addi(kScratchReg, zero_reg, mode << kFcsrFrmShift); 1221 assm_->fscsr(kScratchReg); 1222 mode_ = mode; 1223 } 1224 } set(Register rd,Register rs1,VSew sew,Vlmul lmul)1225 void set(Register rd, Register rs1, VSew sew, Vlmul lmul) { 1226 if (sew != sew_ || lmul != lmul_) { 1227 sew_ = sew; 1228 lmul_ = lmul; 1229 vl = 0; 1230 assm_->vsetvli(rd, rs1, sew_, lmul_); 1231 } 1232 } 1233 set(VSew sew,Vlmul lmul)1234 void set(VSew sew, Vlmul lmul) { 1235 if (sew != sew_ || lmul != lmul_) { 1236 sew_ = sew; 1237 lmul_ = lmul; 1238 assm_->vsetvl(sew_, lmul_); 1239 } 1240 } 1241 1242 private: 1243 VSew sew_ = E8; 1244 Vlmul lmul_ = m1; 1245 int32_t vl = 0; 1246 Assembler* assm_; 1247 RoundingMode mode_ = RNE; 1248 }; 1249 1250 VectorUnit VU; 1251 1252 void CheckTrampolinePoolQuick(int extra_instructions = 0) { 1253 DEBUG_PRINTF("\tpc_offset:%d %d\n", pc_offset(), 1254 next_buffer_check_ - extra_instructions * kInstrSize); 1255 if (pc_offset() >= next_buffer_check_ - extra_instructions * kInstrSize) { 1256 CheckTrampolinePool(); 1257 } 1258 } 1259 1260 protected: 1261 // Readable constants for base and offset adjustment helper, these indicate if 1262 // aside from offset, another value like offset + 4 should fit into int16. 1263 enum class OffsetAccessType : bool { 1264 SINGLE_ACCESS = false, 1265 TWO_ACCESSES = true 1266 }; 1267 1268 // Determine whether need to adjust base and offset of memroy load/store 1269 bool NeedAdjustBaseAndOffset( 1270 const MemOperand& src, OffsetAccessType = OffsetAccessType::SINGLE_ACCESS, 1271 int second_Access_add_to_offset = 4); 1272 1273 // Helper function for memory load/store using base register and offset. 1274 void AdjustBaseAndOffset( 1275 MemOperand* src, Register scratch, 1276 OffsetAccessType access_type = OffsetAccessType::SINGLE_ACCESS, 1277 int second_access_add_to_offset = 4); 1278 1279 inline static void set_target_internal_reference_encoded_at(Address pc, 1280 Address target); 1281 buffer_space()1282 int64_t buffer_space() const { return reloc_info_writer.pos() - pc_; } 1283 1284 // Decode branch instruction at pos and return branch target pos. 1285 int target_at(int pos, bool is_internal); 1286 1287 // Patch branch instruction at pos to branch to given branch target pos. 1288 void target_at_put(int pos, int target_pos, bool is_internal, 1289 bool trampoline = false); 1290 1291 // Say if we need to relocate with this mode. 1292 bool MustUseReg(RelocInfo::Mode rmode); 1293 1294 // Record reloc info for current pc_. 1295 void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0); 1296 1297 // Block the emission of the trampoline pool before pc_offset. BlockTrampolinePoolBefore(int pc_offset)1298 void BlockTrampolinePoolBefore(int pc_offset) { 1299 if (no_trampoline_pool_before_ < pc_offset) 1300 no_trampoline_pool_before_ = pc_offset; 1301 } 1302 StartBlockTrampolinePool()1303 void StartBlockTrampolinePool() { 1304 DEBUG_PRINTF("\tStartBlockTrampolinePool\n"); 1305 trampoline_pool_blocked_nesting_++; 1306 } 1307 EndBlockTrampolinePool()1308 void EndBlockTrampolinePool() { 1309 trampoline_pool_blocked_nesting_--; 1310 DEBUG_PRINTF("\ttrampoline_pool_blocked_nesting:%d\n", 1311 trampoline_pool_blocked_nesting_); 1312 if (trampoline_pool_blocked_nesting_ == 0) { 1313 CheckTrampolinePoolQuick(1); 1314 } 1315 } 1316 is_trampoline_pool_blocked()1317 bool is_trampoline_pool_blocked() const { 1318 return trampoline_pool_blocked_nesting_ > 0; 1319 } 1320 has_exception()1321 bool has_exception() const { return internal_trampoline_exception_; } 1322 is_trampoline_emitted()1323 bool is_trampoline_emitted() const { return trampoline_emitted_; } 1324 1325 // Temporarily block automatic assembly buffer growth. StartBlockGrowBuffer()1326 void StartBlockGrowBuffer() { 1327 DCHECK(!block_buffer_growth_); 1328 block_buffer_growth_ = true; 1329 } 1330 EndBlockGrowBuffer()1331 void EndBlockGrowBuffer() { 1332 DCHECK(block_buffer_growth_); 1333 block_buffer_growth_ = false; 1334 } 1335 is_buffer_growth_blocked()1336 bool is_buffer_growth_blocked() const { return block_buffer_growth_; } 1337 1338 #ifdef DEBUG EmbeddedObjectMatches(int pc_offset,Handle<Object> object)1339 bool EmbeddedObjectMatches(int pc_offset, Handle<Object> object) { 1340 return target_address_at( 1341 reinterpret_cast<Address>(buffer_->start() + pc_offset)) == 1342 (IsOnHeap() ? object->ptr() : object.address()); 1343 } 1344 #endif 1345 1346 private: 1347 // Avoid overflows for displacements etc. 1348 static const int kMaximalBufferSize = 512 * MB; 1349 1350 // Buffer size and constant pool distance are checked together at regular 1351 // intervals of kBufferCheckInterval emitted bytes. 1352 static constexpr int kBufferCheckInterval = 1 * KB / 2; 1353 1354 // Code generation. 1355 // The relocation writer's position is at least kGap bytes below the end of 1356 // the generated instructions. This is so that multi-instruction sequences do 1357 // not have to check for overflow. The same is true for writes of large 1358 // relocation info entries. 1359 static constexpr int kGap = 64; 1360 STATIC_ASSERT(AssemblerBase::kMinimalBufferSize >= 2 * kGap); 1361 1362 // Repeated checking whether the trampoline pool should be emitted is rather 1363 // expensive. By default we only check again once a number of instructions 1364 // has been generated. 1365 static constexpr int kCheckConstIntervalInst = 32; 1366 static constexpr int kCheckConstInterval = 1367 kCheckConstIntervalInst * kInstrSize; 1368 1369 int next_buffer_check_; // pc offset of next buffer check. 1370 1371 // Emission of the trampoline pool may be blocked in some code sequences. 1372 int trampoline_pool_blocked_nesting_; // Block emission if this is not zero. 1373 int no_trampoline_pool_before_; // Block emission before this pc offset. 1374 1375 // Keep track of the last emitted pool to guarantee a maximal distance. 1376 int last_trampoline_pool_end_; // pc offset of the end of the last pool. 1377 1378 // Automatic growth of the assembly buffer may be blocked for some sequences. 1379 bool block_buffer_growth_; // Block growth when true. 1380 1381 // Relocation information generation. 1382 // Each relocation is encoded as a variable size value. 1383 static constexpr int kMaxRelocSize = RelocInfoWriter::kMaxSize; 1384 RelocInfoWriter reloc_info_writer; 1385 1386 // The bound position, before this we cannot do instruction elimination. 1387 int last_bound_pos_; 1388 1389 // Code emission. 1390 inline void CheckBuffer(); 1391 void GrowBuffer(); 1392 inline void emit(Instr x); 1393 inline void emit(ShortInstr x); 1394 inline void emit(uint64_t x); 1395 template <typename T> 1396 inline void EmitHelper(T x); 1397 1398 static void disassembleInstr(Instr instr); 1399 1400 // Instruction generation. 1401 1402 // ----- Top-level instruction formats match those in the ISA manual 1403 // (R, I, S, B, U, J). These match the formats defined in LLVM's 1404 // RISCVInstrFormats.td. 1405 void GenInstrR(uint8_t funct7, uint8_t funct3, Opcode opcode, Register rd, 1406 Register rs1, Register rs2); 1407 void GenInstrR(uint8_t funct7, uint8_t funct3, Opcode opcode, FPURegister rd, 1408 FPURegister rs1, FPURegister rs2); 1409 void GenInstrR(uint8_t funct7, uint8_t funct3, Opcode opcode, Register rd, 1410 FPURegister rs1, Register rs2); 1411 void GenInstrR(uint8_t funct7, uint8_t funct3, Opcode opcode, FPURegister rd, 1412 Register rs1, Register rs2); 1413 void GenInstrR(uint8_t funct7, uint8_t funct3, Opcode opcode, FPURegister rd, 1414 FPURegister rs1, Register rs2); 1415 void GenInstrR(uint8_t funct7, uint8_t funct3, Opcode opcode, Register rd, 1416 FPURegister rs1, FPURegister rs2); 1417 void GenInstrR4(uint8_t funct2, Opcode opcode, Register rd, Register rs1, 1418 Register rs2, Register rs3, RoundingMode frm); 1419 void GenInstrR4(uint8_t funct2, Opcode opcode, FPURegister rd, 1420 FPURegister rs1, FPURegister rs2, FPURegister rs3, 1421 RoundingMode frm); 1422 void GenInstrRAtomic(uint8_t funct5, bool aq, bool rl, uint8_t funct3, 1423 Register rd, Register rs1, Register rs2); 1424 void GenInstrRFrm(uint8_t funct7, Opcode opcode, Register rd, Register rs1, 1425 Register rs2, RoundingMode frm); 1426 void GenInstrI(uint8_t funct3, Opcode opcode, Register rd, Register rs1, 1427 int16_t imm12); 1428 void GenInstrI(uint8_t funct3, Opcode opcode, FPURegister rd, Register rs1, 1429 int16_t imm12); 1430 void GenInstrIShift(bool arithshift, uint8_t funct3, Opcode opcode, 1431 Register rd, Register rs1, uint8_t shamt); 1432 void GenInstrIShiftW(bool arithshift, uint8_t funct3, Opcode opcode, 1433 Register rd, Register rs1, uint8_t shamt); 1434 void GenInstrS(uint8_t funct3, Opcode opcode, Register rs1, Register rs2, 1435 int16_t imm12); 1436 void GenInstrS(uint8_t funct3, Opcode opcode, Register rs1, FPURegister rs2, 1437 int16_t imm12); 1438 void GenInstrB(uint8_t funct3, Opcode opcode, Register rs1, Register rs2, 1439 int16_t imm12); 1440 void GenInstrU(Opcode opcode, Register rd, int32_t imm20); 1441 void GenInstrJ(Opcode opcode, Register rd, int32_t imm20); 1442 void GenInstrCR(uint8_t funct4, Opcode opcode, Register rd, Register rs2); 1443 void GenInstrCA(uint8_t funct6, Opcode opcode, Register rd, uint8_t funct, 1444 Register rs2); 1445 void GenInstrCI(uint8_t funct3, Opcode opcode, Register rd, int8_t imm6); 1446 void GenInstrCIU(uint8_t funct3, Opcode opcode, Register rd, uint8_t uimm6); 1447 void GenInstrCIU(uint8_t funct3, Opcode opcode, FPURegister rd, 1448 uint8_t uimm6); 1449 void GenInstrCIW(uint8_t funct3, Opcode opcode, Register rd, uint8_t uimm8); 1450 void GenInstrCSS(uint8_t funct3, Opcode opcode, FPURegister rs2, 1451 uint8_t uimm6); 1452 void GenInstrCSS(uint8_t funct3, Opcode opcode, Register rs2, uint8_t uimm6); 1453 void GenInstrCL(uint8_t funct3, Opcode opcode, Register rd, Register rs1, 1454 uint8_t uimm5); 1455 void GenInstrCL(uint8_t funct3, Opcode opcode, FPURegister rd, Register rs1, 1456 uint8_t uimm5); 1457 void GenInstrCS(uint8_t funct3, Opcode opcode, Register rs2, Register rs1, 1458 uint8_t uimm5); 1459 void GenInstrCS(uint8_t funct3, Opcode opcode, FPURegister rs2, Register rs1, 1460 uint8_t uimm5); 1461 void GenInstrCJ(uint8_t funct3, Opcode opcode, uint16_t uint11); 1462 void GenInstrCB(uint8_t funct3, Opcode opcode, Register rs1, uint8_t uimm8); 1463 void GenInstrCBA(uint8_t funct3, uint8_t funct2, Opcode opcode, Register rs1, 1464 int8_t imm6); 1465 1466 // ----- Instruction class templates match those in LLVM's RISCVInstrInfo.td 1467 void GenInstrBranchCC_rri(uint8_t funct3, Register rs1, Register rs2, 1468 int16_t imm12); 1469 void GenInstrLoad_ri(uint8_t funct3, Register rd, Register rs1, 1470 int16_t imm12); 1471 void GenInstrStore_rri(uint8_t funct3, Register rs1, Register rs2, 1472 int16_t imm12); 1473 void GenInstrALU_ri(uint8_t funct3, Register rd, Register rs1, int16_t imm12); 1474 void GenInstrShift_ri(bool arithshift, uint8_t funct3, Register rd, 1475 Register rs1, uint8_t shamt); 1476 void GenInstrALU_rr(uint8_t funct7, uint8_t funct3, Register rd, Register rs1, 1477 Register rs2); 1478 void GenInstrCSR_ir(uint8_t funct3, Register rd, ControlStatusReg csr, 1479 Register rs1); 1480 void GenInstrCSR_ii(uint8_t funct3, Register rd, ControlStatusReg csr, 1481 uint8_t rs1); 1482 void GenInstrShiftW_ri(bool arithshift, uint8_t funct3, Register rd, 1483 Register rs1, uint8_t shamt); 1484 void GenInstrALUW_rr(uint8_t funct7, uint8_t funct3, Register rd, 1485 Register rs1, Register rs2); 1486 void GenInstrPriv(uint8_t funct7, Register rs1, Register rs2); 1487 void GenInstrLoadFP_ri(uint8_t funct3, FPURegister rd, Register rs1, 1488 int16_t imm12); 1489 void GenInstrStoreFP_rri(uint8_t funct3, Register rs1, FPURegister rs2, 1490 int16_t imm12); 1491 void GenInstrALUFP_rr(uint8_t funct7, uint8_t funct3, FPURegister rd, 1492 FPURegister rs1, FPURegister rs2); 1493 void GenInstrALUFP_rr(uint8_t funct7, uint8_t funct3, FPURegister rd, 1494 Register rs1, Register rs2); 1495 void GenInstrALUFP_rr(uint8_t funct7, uint8_t funct3, FPURegister rd, 1496 FPURegister rs1, Register rs2); 1497 void GenInstrALUFP_rr(uint8_t funct7, uint8_t funct3, Register rd, 1498 FPURegister rs1, Register rs2); 1499 void GenInstrALUFP_rr(uint8_t funct7, uint8_t funct3, Register rd, 1500 FPURegister rs1, FPURegister rs2); 1501 1502 // ----------------------------RVV------------------------------------------ 1503 // vsetvl 1504 void GenInstrV(Register rd, Register rs1, Register rs2); 1505 // vsetvli 1506 void GenInstrV(Register rd, Register rs1, uint32_t zimm); 1507 // OPIVV OPFVV OPMVV 1508 void GenInstrV(uint8_t funct6, Opcode opcode, VRegister vd, VRegister vs1, 1509 VRegister vs2, MaskType mask = NoMask); 1510 void GenInstrV(uint8_t funct6, Opcode opcode, VRegister vd, int8_t vs1, 1511 VRegister vs2, MaskType mask = NoMask); 1512 // OPMVV OPFVV 1513 void GenInstrV(uint8_t funct6, Opcode opcode, Register rd, VRegister vs1, 1514 VRegister vs2, MaskType mask = NoMask); 1515 1516 // OPIVX OPMVX 1517 void GenInstrV(uint8_t funct6, Opcode opcode, VRegister vd, Register rs1, 1518 VRegister vs2, MaskType mask = NoMask); 1519 // OPFVF 1520 void GenInstrV(uint8_t funct6, Opcode opcode, VRegister vd, FPURegister fs1, 1521 VRegister vs2, MaskType mask = NoMask); 1522 // OPMVX 1523 void GenInstrV(uint8_t funct6, Register rd, Register rs1, VRegister vs2, 1524 MaskType mask = NoMask); 1525 // OPIVI 1526 void GenInstrV(uint8_t funct6, VRegister vd, int8_t simm5, VRegister vs2, 1527 MaskType mask = NoMask); 1528 1529 // VL VS 1530 void GenInstrV(Opcode opcode, uint8_t width, VRegister vd, Register rs1, 1531 uint8_t umop, MaskType mask, uint8_t IsMop, bool IsMew, 1532 uint8_t Nf); 1533 1534 void GenInstrV(Opcode opcode, uint8_t width, VRegister vd, Register rs1, 1535 Register rs2, MaskType mask, uint8_t IsMop, bool IsMew, 1536 uint8_t Nf); 1537 // VL VS AMO 1538 void GenInstrV(Opcode opcode, uint8_t width, VRegister vd, Register rs1, 1539 VRegister vs2, MaskType mask, uint8_t IsMop, bool IsMew, 1540 uint8_t Nf); 1541 1542 // Labels. 1543 void print(const Label* L); 1544 void bind_to(Label* L, int pos); 1545 void next(Label* L, bool is_internal); 1546 1547 // One trampoline consists of: 1548 // - space for trampoline slots, 1549 // - space for labels. 1550 // 1551 // Space for trampoline slots is equal to slot_count * 2 * kInstrSize. 1552 // Space for trampoline slots precedes space for labels. Each label is of one 1553 // instruction size, so total amount for labels is equal to 1554 // label_count * kInstrSize. 1555 class Trampoline { 1556 public: Trampoline()1557 Trampoline() { 1558 start_ = 0; 1559 next_slot_ = 0; 1560 free_slot_count_ = 0; 1561 end_ = 0; 1562 } Trampoline(int start,int slot_count)1563 Trampoline(int start, int slot_count) { 1564 start_ = start; 1565 next_slot_ = start; 1566 free_slot_count_ = slot_count; 1567 end_ = start + slot_count * kTrampolineSlotsSize; 1568 } start()1569 int start() { return start_; } end()1570 int end() { return end_; } take_slot()1571 int take_slot() { 1572 int trampoline_slot = kInvalidSlotPos; 1573 if (free_slot_count_ <= 0) { 1574 // We have run out of space on trampolines. 1575 // Make sure we fail in debug mode, so we become aware of each case 1576 // when this happens. 1577 DCHECK(0); 1578 // Internal exception will be caught. 1579 } else { 1580 trampoline_slot = next_slot_; 1581 free_slot_count_--; 1582 next_slot_ += kTrampolineSlotsSize; 1583 } 1584 return trampoline_slot; 1585 } 1586 1587 private: 1588 int start_; 1589 int end_; 1590 int next_slot_; 1591 int free_slot_count_; 1592 }; 1593 1594 int32_t get_trampoline_entry(int32_t pos); 1595 int unbound_labels_count_; 1596 // After trampoline is emitted, long branches are used in generated code for 1597 // the forward branches whose target offsets could be beyond reach of branch 1598 // instruction. We use this information to trigger different mode of 1599 // branch instruction generation, where we use jump instructions rather 1600 // than regular branch instructions. 1601 bool trampoline_emitted_ = false; 1602 static constexpr int kInvalidSlotPos = -1; 1603 1604 // Internal reference positions, required for unbounded internal reference 1605 // labels. 1606 std::set<int64_t> internal_reference_positions_; is_internal_reference(Label * L)1607 bool is_internal_reference(Label* L) { 1608 return internal_reference_positions_.find(L->pos()) != 1609 internal_reference_positions_.end(); 1610 } 1611 1612 Trampoline trampoline_; 1613 bool internal_trampoline_exception_; 1614 1615 RegList scratch_register_list_; 1616 1617 private: 1618 ConstantPool constpool_; 1619 1620 void AllocateAndInstallRequestedHeapObjects(Isolate* isolate); 1621 1622 int WriteCodeComments(); 1623 1624 friend class RegExpMacroAssemblerRISCV; 1625 friend class RelocInfo; 1626 friend class BlockTrampolinePoolScope; 1627 friend class EnsureSpace; 1628 friend class ConstantPool; 1629 }; 1630 1631 class EnsureSpace { 1632 public: 1633 explicit inline EnsureSpace(Assembler* assembler); 1634 }; 1635 1636 class V8_EXPORT_PRIVATE UseScratchRegisterScope { 1637 public: 1638 explicit UseScratchRegisterScope(Assembler* assembler); 1639 ~UseScratchRegisterScope(); 1640 1641 Register Acquire(); 1642 bool hasAvailable() const; Include(const RegList & list)1643 void Include(const RegList& list) { *available_ |= list; } Exclude(const RegList & list)1644 void Exclude(const RegList& list) { *available_ &= ~list; } 1645 void Include(const Register& reg1, const Register& reg2 = no_reg) { 1646 RegList list(reg1.bit() | reg2.bit()); 1647 Include(list); 1648 } 1649 void Exclude(const Register& reg1, const Register& reg2 = no_reg) { 1650 RegList list(reg1.bit() | reg2.bit()); 1651 Exclude(list); 1652 } 1653 1654 private: 1655 RegList* available_; 1656 RegList old_available_; 1657 }; 1658 1659 } // namespace internal 1660 } // namespace v8 1661 1662 #endif // V8_CODEGEN_RISCV64_ASSEMBLER_RISCV64_H_ 1663