1 /* 2 * %CopyrightBegin% 3 * 4 * Copyright Ericsson AB 2020-2020. All Rights Reserved. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 * 18 * %CopyrightEnd% 19 */ 20 21 #include <string> 22 #include <vector> 23 #include <unordered_map> 24 #include <map> 25 26 #ifndef ASMJIT_ASMJIT_H_INCLUDED 27 # include <asmjit/asmjit.hpp> 28 #endif 29 30 extern "C" 31 { 32 #ifdef HAVE_CONFIG_H 33 # include "config.h" 34 #endif 35 36 #include "sys.h" 37 #include "erl_vm.h" 38 #include "global.h" 39 #include "beam_catches.h" 40 41 #include "beam_asm.h" 42 } 43 44 #include "beam_jit_common.hpp" 45 46 using namespace asmjit; 47 48 class BeamAssembler : public ErrorHandler { 49 protected: 50 /* Holds code and relocation information. */ 51 CodeHolder code; 52 53 /* TODO: Want to change this to x86::Builder in order to be able to patch 54 * the correct I into the code after code generation */ 55 x86::Assembler a; 56 57 FileLogger logger; 58 59 Section *rodata = nullptr; 60 61 /* * * * * * * * * */ 62 63 /* Points at x_reg_array inside an ErtsSchedulerRegisters struct, allowing 64 * the aux_regs field to be addressed with an 8-bit displacement. */ 65 const x86::Gp registers = x86::rbx; 66 67 #ifdef NATIVE_ERLANG_STACK 68 /* The Erlang stack pointer, note that it uses RSP and is therefore invalid 69 * when running on the runtime stack. */ 70 const x86::Gp E = x86::rsp; 71 72 /* Cached copy of Erlang stack pointer used to speed up stack switches when 73 * we know that the runtime doesn't read or modify the Erlang stack. 74 * 75 * If we find ourselves pressed for registers in the future, we could save 76 * this in the same slot as `registers` as that can be trivially recomputed 77 * from the top of the runtime stack. */ 78 const x86::Gp E_saved = x86::r12; 79 80 #else 81 const x86::Gp E = x86::r12; 82 #endif 83 84 const x86::Gp c_p = x86::r13; 85 const x86::Gp FCALLS = x86::r14; 86 const x86::Gp HTOP = x86::r15; 87 88 /* Local copy of the active code index. 89 * 90 * This is set to ERTS_SAVE_CALLS_CODE_IX when save_calls is active, which 91 * routes us to a common handler routine that calls save_calls before 92 * jumping to the actual code. */ 93 const x86::Gp active_code_ix = x86::rbp; 94 95 #ifdef ERTS_MSACC_EXTENDED_STATES 96 const x86::Mem erts_msacc_cache = getSchedulerRegRef( 97 offsetof(ErtsSchedulerRegisters, aux_regs.d.erts_msacc_cache)); 98 #endif 99 100 /* * * * * * * * * */ 101 #ifdef WIN32 102 const x86::Gp ARG1 = x86::rcx; 103 const x86::Gp ARG2 = x86::rdx; 104 const x86::Gp ARG3 = x86::r8; 105 const x86::Gp ARG4 = x86::r9; 106 const x86::Gp ARG5 = x86::r10; 107 const x86::Gp ARG6 = x86::r11; 108 109 const x86::Gp ARG1d = x86::ecx; 110 const x86::Gp ARG2d = x86::edx; 111 const x86::Gp ARG3d = x86::r8d; 112 const x86::Gp ARG4d = x86::r9d; 113 const x86::Gp ARG5d = x86::r10d; 114 const x86::Gp ARG6d = x86::r11d; 115 #else 116 const x86::Gp ARG1 = x86::rdi; 117 const x86::Gp ARG2 = x86::rsi; 118 const x86::Gp ARG3 = x86::rdx; 119 const x86::Gp ARG4 = x86::rcx; 120 const x86::Gp ARG5 = x86::r8; 121 const x86::Gp ARG6 = x86::r9; 122 123 const x86::Gp ARG1d = x86::edi; 124 const x86::Gp ARG2d = x86::esi; 125 const x86::Gp ARG3d = x86::edx; 126 const x86::Gp ARG4d = x86::ecx; 127 const x86::Gp ARG5d = x86::r8d; 128 const x86::Gp ARG6d = x86::r9d; 129 #endif 130 131 const x86::Gp RET = x86::rax; 132 const x86::Gp RETd = x86::eax; 133 const x86::Gp RETb = x86::al; 134 135 const x86::Mem TMP_MEM1q = getSchedulerRegRef( 136 offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[0])); 137 const x86::Mem TMP_MEM2q = getSchedulerRegRef( 138 offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[1])); 139 const x86::Mem TMP_MEM3q = getSchedulerRegRef( 140 offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[2])); 141 const x86::Mem TMP_MEM4q = getSchedulerRegRef( 142 offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[3])); 143 const x86::Mem TMP_MEM5q = getSchedulerRegRef( 144 offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[4])); 145 146 const x86::Mem TMP_MEM1d = getSchedulerRegRef( 147 offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[0]), 148 sizeof(Uint32)); 149 const x86::Mem TMP_MEM2d = getSchedulerRegRef( 150 offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[1]), 151 sizeof(Uint32)); 152 const x86::Mem TMP_MEM3d = getSchedulerRegRef( 153 offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[2]), 154 sizeof(Uint32)); 155 const x86::Mem TMP_MEM4d = getSchedulerRegRef( 156 offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[3]), 157 sizeof(Uint32)); 158 const x86::Mem TMP_MEM5d = getSchedulerRegRef( 159 offsetof(ErtsSchedulerRegisters, aux_regs.d.TMP_MEM[4]), 160 sizeof(Uint32)); 161 162 enum Distance { dShort, dLong }; 163 164 public: 165 static bool hasCpuFeature(uint32_t featureId); 166 BeamAssembler()167 BeamAssembler() : code() { 168 /* Setup with default code info */ 169 Error err = code.init(hostEnvironment()); 170 ERTS_ASSERT(!err && "Failed to init codeHolder"); 171 172 err = code.newSection(&rodata, 173 ".rodata", 174 SIZE_MAX, 175 Section::kFlagConst, 176 8); 177 ERTS_ASSERT(!err && "Failed to create .rodata section"); 178 179 err = code.attach(&a); 180 181 ERTS_ASSERT(!err && "Failed to attach codeHolder"); 182 #ifdef DEBUG 183 a.addValidationOptions(BaseEmitter::kValidationOptionAssembler); 184 #endif 185 a.addEncodingOptions(BaseEmitter::kEncodingOptionOptimizeForSize); 186 code.setErrorHandler(this); 187 } 188 BeamAssembler(const std::string & log)189 BeamAssembler(const std::string &log) : BeamAssembler() { 190 if (erts_jit_asm_dump) { 191 setLogger(log + ".asm"); 192 } 193 } 194 ~BeamAssembler()195 ~BeamAssembler() { 196 if (logger.file()) 197 fclose(logger.file()); 198 } 199 getBaseAddress()200 void *getBaseAddress() { 201 ASSERT(code.hasBaseAddress()); 202 return (void *)code.baseAddress(); 203 } 204 getOffset()205 size_t getOffset() { 206 return a.offset(); 207 } 208 209 protected: _codegen(JitAllocator * allocator,const void ** executable_ptr,void ** writable_ptr)210 void _codegen(JitAllocator *allocator, 211 const void **executable_ptr, 212 void **writable_ptr) { 213 Error err = code.flatten(); 214 ERTS_ASSERT(!err && "Could not flatten code"); 215 err = code.resolveUnresolvedLinks(); 216 ERTS_ASSERT(!err && "Could not resolve all links"); 217 218 /* Verify that all labels are bound */ 219 #ifdef DEBUG 220 for (auto e : code.labelEntries()) { 221 if (!e->isBound()) { 222 erts_exit(ERTS_ABORT_EXIT, "Label %s is not bound", e->name()); 223 } 224 } 225 #endif 226 227 err = allocator->alloc(const_cast<void **>(executable_ptr), 228 writable_ptr, 229 code.codeSize() + 16); 230 231 if (err == ErrorCode::kErrorTooManyHandles) { 232 ERTS_ASSERT(!"Failed to allocate module code: " 233 "out of file descriptors"); 234 } else if (err) { 235 ERTS_ASSERT("Failed to allocate module code"); 236 } 237 238 code.relocateToBase((uint64_t)*executable_ptr); 239 code.copyFlattenedData(*writable_ptr, 240 code.codeSize(), 241 CodeHolder::kCopyPadSectionBuffer); 242 #ifdef DEBUG 243 if (FileLogger *l = dynamic_cast<FileLogger *>(code.logger())) 244 if (FILE *f = l->file()) 245 fprintf(f, "; CODE_SIZE: %zd\n", code.codeSize()); 246 #endif 247 } 248 getCode(Label label)249 void *getCode(Label label) { 250 ASSERT(label.isValid()); 251 return (char *)getBaseAddress() + code.labelOffsetFromBase(label); 252 } 253 getCode(char * labelName)254 byte *getCode(char *labelName) { 255 return (byte *)getCode(code.labelByName(labelName, strlen(labelName))); 256 } 257 handleError(Error err,const char * message,BaseEmitter * origin)258 void handleError(Error err, const char *message, BaseEmitter *origin) { 259 comment(message); 260 fflush(logger.file()); 261 ASSERT(0 && "Fault instruction encode"); 262 } 263 getRuntimeStackRef() const264 constexpr x86::Mem getRuntimeStackRef() const { 265 int base = offsetof(ErtsSchedulerRegisters, aux_regs.d.runtime_stack); 266 267 return getSchedulerRegRef(base); 268 } 269 270 #if !defined(NATIVE_ERLANG_STACK) 271 # ifdef JIT_HARD_DEBUG getInitialSPRef() const272 constexpr x86::Mem getInitialSPRef() const { 273 int base = offsetof(ErtsSchedulerRegisters, initial_sp); 274 275 return getSchedulerRegRef(base); 276 } 277 # endif 278 getCPRef() const279 constexpr x86::Mem getCPRef() const { 280 return x86::qword_ptr(E); 281 } 282 #endif 283 getSchedulerRegRef(int offset,size_t size=sizeof (UWord)) const284 constexpr x86::Mem getSchedulerRegRef(int offset, 285 size_t size = sizeof(UWord)) const { 286 const int x_reg_offset = 287 offsetof(ErtsSchedulerRegisters, x_reg_array.d); 288 289 /* The entire aux_reg field should be addressable with an 8-bit 290 * displacement. */ 291 ERTS_CT_ASSERT(x_reg_offset <= 128); 292 293 return x86::Mem(registers, offset - x_reg_offset, size); 294 } 295 getFRef(int index,size_t size=sizeof (UWord)) const296 constexpr x86::Mem getFRef(int index, size_t size = sizeof(UWord)) const { 297 int base = offsetof(ErtsSchedulerRegisters, f_reg_array.d); 298 int offset = index * sizeof(FloatDef); 299 300 ASSERT(index >= 0 && index <= 1023); 301 return getSchedulerRegRef(base + offset, size); 302 } 303 getXRef(int index,size_t size=sizeof (UWord)) const304 constexpr x86::Mem getXRef(int index, size_t size = sizeof(UWord)) const { 305 int base = offsetof(ErtsSchedulerRegisters, x_reg_array.d); 306 int offset = index * sizeof(Eterm); 307 308 ASSERT(index >= 0 && index < ERTS_X_REGS_ALLOCATED); 309 return getSchedulerRegRef(base + offset, size); 310 } 311 getYRef(int index,size_t size=sizeof (UWord)) const312 constexpr x86::Mem getYRef(int index, size_t size = sizeof(UWord)) const { 313 ASSERT(index >= 0 && index <= 1023); 314 315 #ifdef NATIVE_ERLANG_STACK 316 return x86::Mem(E, index * sizeof(Eterm), size); 317 #else 318 return x86::Mem(E, (index + CP_SIZE) * sizeof(Eterm), size); 319 #endif 320 } 321 getCARRef(x86::Gp Src,size_t size=sizeof (UWord)) const322 constexpr x86::Mem getCARRef(x86::Gp Src, 323 size_t size = sizeof(UWord)) const { 324 return x86::Mem(Src, -TAG_PRIMARY_LIST, size); 325 } 326 getCDRRef(x86::Gp Src,size_t size=sizeof (UWord)) const327 constexpr x86::Mem getCDRRef(x86::Gp Src, 328 size_t size = sizeof(UWord)) const { 329 return x86::Mem(Src, -TAG_PRIMARY_LIST + sizeof(Eterm), size); 330 } 331 align_erlang_cp()332 void align_erlang_cp() { 333 /* Align so that the current address forms a valid CP. */ 334 ERTS_CT_ASSERT(_CPMASK == 3); 335 a.align(kAlignCode, 4); 336 ASSERT(is_CP(a.offset())); 337 } 338 load_x_reg_array(x86::Gp reg)339 void load_x_reg_array(x86::Gp reg) { 340 /* By definition. */ 341 a.mov(reg, registers); 342 } 343 load_erl_bits_state(x86::Gp reg)344 void load_erl_bits_state(x86::Gp reg) { 345 int offset = 346 offsetof(ErtsSchedulerRegisters, aux_regs.d.erl_bits_state); 347 348 a.lea(reg, getSchedulerRegRef(offset)); 349 } 350 351 /* Ensure that the Erlang stack is used and the redzone is unused. 352 * We combine those test to minimize the number of instructions. 353 */ emit_assert_redzone_unused()354 void emit_assert_redzone_unused() { 355 #ifdef JIT_HARD_DEBUG 356 const int REDZONE_BYTES = S_REDZONE * sizeof(Eterm); 357 Label ok = a.newLabel(), crash = a.newLabel(); 358 359 /* We modify the stack pointer to avoid spilling into a register, 360 * TMP_MEM, or using the stack. */ 361 a.sub(E, imm(REDZONE_BYTES)); 362 a.cmp(HTOP, E); 363 a.short_().ja(crash); 364 a.cmp(E, x86::qword_ptr(c_p, offsetof(Process, hend))); 365 a.short_().jle(ok); 366 367 a.bind(crash); 368 a.ud2(); 369 370 a.bind(ok); 371 a.add(E, imm(REDZONE_BYTES)); 372 #endif 373 } 374 375 /* 376 * Calls an Erlang function. 377 */ 378 template<typename Any> erlang_call(Any Target,const x86::Gp & spill)379 void erlang_call(Any Target, const x86::Gp &spill) { 380 #ifdef NATIVE_ERLANG_STACK 381 /* We use the Erlang stack as the native stack. We can use a 382 * native `call` instruction. */ 383 emit_assert_redzone_unused(); 384 aligned_call(Target); 385 #else 386 Label next = a.newLabel(); 387 388 /* Save the return CP on the stack. */ 389 a.lea(spill, x86::qword_ptr(next)); 390 a.mov(getCPRef(), spill); 391 392 a.jmp(Target); 393 394 /* Need to align this label in order for it to be recognized as 395 * is_CP. */ 396 align_erlang_cp(); 397 a.bind(next); 398 #endif 399 } 400 401 /* 402 * Calls the given address in shared fragment, ensuring that the 403 * redzone is unused and that the return address forms a valid 404 * CP. 405 */ 406 template<typename Any> fragment_call(Any Target)407 void fragment_call(Any Target) { 408 emit_assert_redzone_unused(); 409 410 #if defined(JIT_HARD_DEBUG) && !defined(NATIVE_ERLANG_STACK) 411 /* Verify that the stack has not grown. */ 412 Label next = a.newLabel(); 413 a.cmp(x86::rsp, getInitialSPRef()); 414 a.short_().je(next); 415 a.ud2(); 416 a.bind(next); 417 #endif 418 419 aligned_call(Target); 420 } 421 422 /* 423 * Calls the given function pointer. In a debug build with 424 * JIT_HARD_DEBUG defined, it will be enforced that the redzone is 425 * unused. 426 * 427 * The return will NOT be aligned, and thus will not form a valid 428 * CP. That means that call code must not scan the stack in any 429 * way. That means, for example, that the called code must not 430 * throw an exception, do a garbage collection, or cause a context 431 * switch. 432 */ safe_fragment_call(void (* Target)())433 void safe_fragment_call(void (*Target)()) { 434 emit_assert_redzone_unused(); 435 a.call(imm(Target)); 436 } 437 438 template<typename FuncPtr> aligned_call(FuncPtr (* target))439 void aligned_call(FuncPtr(*target)) { 440 /* Calls to absolute addresses (encoded in the address table) are 441 * always 6 bytes long. */ 442 aligned_call(imm(target), 6); 443 } 444 aligned_call(Label target)445 void aligned_call(Label target) { 446 /* Relative calls are always 5 bytes long. */ 447 aligned_call(target, 5); 448 } 449 450 template<typename OperandType> aligned_call(OperandType target)451 void aligned_call(OperandType target) { 452 /* Other calls are variable size. While it would be nice to use this 453 * method for pointer/label calls too, `asmjit` writes relocations into 454 * the code buffer itself and overwriting them causes all kinds of 455 * havoc. */ 456 size_t call_offset, call_size; 457 458 call_offset = a.offset(); 459 a.call(target); 460 461 call_size = a.offset() - call_offset; 462 a.setOffset(call_offset); 463 464 aligned_call(target, call_size); 465 } 466 467 /* Calls the given address, ensuring that the return address forms a valid 468 * CP. */ 469 template<typename OperandType> aligned_call(OperandType target,size_t size)470 void aligned_call(OperandType target, size_t size) { 471 /* The return address must be 4-byte aligned to form a valid CP, so 472 * we'll align according to the size of the call instruction. */ 473 ssize_t next_address = (a.offset() + size); 474 475 ERTS_CT_ASSERT(_CPMASK == 3); 476 if (next_address % 4) { 477 ssize_t nop_count = 4 - next_address % 4; 478 479 a.embed(nops[nop_count - 1], nop_count); 480 } 481 482 #ifdef JIT_HARD_DEBUG 483 /* TODO: When frame pointers are in place, assert (at runtime) that the 484 * destination has a `push rbp; mov rbp, rsp` sequence. */ 485 #endif 486 487 a.call(target); 488 ASSERT(is_CP(a.offset())); 489 } 490 491 /* Canned instruction sequences for multi-byte NOPs */ 492 static const uint8_t *nops[3]; 493 static const uint8_t nop1[1]; 494 static const uint8_t nop2[2]; 495 static const uint8_t nop3[3]; 496 runtime_call(x86::Gp func,unsigned args)497 void runtime_call(x86::Gp func, unsigned args) { 498 ASSERT(args < 5); 499 500 emit_assert_runtime_stack(); 501 502 #ifdef WIN32 503 a.sub(x86::rsp, imm(4 * sizeof(UWord))); 504 a.call(func); 505 a.add(x86::rsp, imm(4 * sizeof(UWord))); 506 #else 507 a.call(func); 508 #endif 509 } 510 511 template<typename T> 512 struct function_arity; 513 template<typename T, typename... Args> 514 struct function_arity<T(Args...)> 515 : std::integral_constant<int, sizeof...(Args)> {}; 516 517 template<int expected_arity, typename T> runtime_call(T (* func))518 void runtime_call(T(*func)) { 519 static_assert(expected_arity == function_arity<T>()); 520 521 emit_assert_runtime_stack(); 522 523 #ifdef WIN32 524 unsigned pushed; 525 switch (expected_arity) { 526 case 6: 527 case 5: 528 /* We push ARG6 to keep the stack aligned even when we only have 5 529 * arguments. It does no harm, and is slightly more compact than 530 * sub/push/sub. */ 531 a.push(ARG6); 532 a.push(ARG5); 533 a.sub(x86::rsp, imm(4 * sizeof(UWord))); 534 pushed = 6; 535 break; 536 default: 537 a.sub(x86::rsp, imm(4 * sizeof(UWord))); 538 pushed = 4; 539 } 540 541 #endif 542 543 a.call(imm(func)); 544 545 #ifdef WIN32 546 a.add(x86::rsp, imm(pushed * sizeof(UWord))); 547 #endif 548 } 549 550 template<typename T> abs_jmp(T (* addr))551 void abs_jmp(T(*addr)) { 552 a.jmp(imm(addr)); 553 } 554 555 /* Explicitly position-independent absolute jump, for use in fragments that 556 * need to be memcpy'd for performance reasons (e.g. export entries) */ 557 template<typename T> pic_jmp(T (* addr))558 void pic_jmp(T(*addr)) { 559 a.mov(ARG6, imm(addr)); 560 a.jmp(ARG6); 561 } 562 getArgRef(const ArgVal & val,size_t size=sizeof (UWord)) const563 constexpr x86::Mem getArgRef(const ArgVal &val, 564 size_t size = sizeof(UWord)) const { 565 switch (val.getType()) { 566 case ArgVal::TYPE::l: 567 return getFRef(val.getValue(), size); 568 case ArgVal::TYPE::x: 569 return getXRef(val.getValue(), size); 570 case ArgVal::TYPE::y: 571 return getYRef(val.getValue(), size); 572 default: 573 ERTS_ASSERT(!"NYI"); 574 return x86::Mem(); 575 } 576 } 577 578 /* Returns the current code address for the export entry in `Src` 579 * 580 * Export tracing, save_calls, etc is implemented by shared fragments that 581 * assume that the export entry is in RET, so we have to copy it over if it 582 * isn't already. */ emit_setup_export_call(const x86::Gp & Src)583 x86::Mem emit_setup_export_call(const x86::Gp &Src) { 584 return emit_setup_export_call(Src, active_code_ix); 585 } 586 emit_setup_export_call(const x86::Gp & Src,const x86::Gp & CodeIndex)587 x86::Mem emit_setup_export_call(const x86::Gp &Src, 588 const x86::Gp &CodeIndex) { 589 if (RET != Src) { 590 a.mov(RET, Src); 591 } 592 593 return x86::qword_ptr(RET, CodeIndex, 3, offsetof(Export, addresses)); 594 } 595 596 /* Discards a continuation pointer, including the frame pointer if 597 * applicable. */ emit_discard_cp()598 void emit_discard_cp() { 599 emit_assert_erlang_stack(); 600 601 a.add(x86::rsp, imm(CP_SIZE * sizeof(Eterm))); 602 } 603 emit_assert_runtime_stack()604 void emit_assert_runtime_stack() { 605 #ifdef JIT_HARD_DEBUG 606 Label crash = a.newLabel(), next = a.newLabel(); 607 608 # ifdef NATIVE_ERLANG_STACK 609 /* Ensure that we are using the runtime stack. */ 610 int end_offs, start_offs; 611 612 end_offs = offsetof(ErtsSchedulerRegisters, runtime_stack_end); 613 start_offs = offsetof(ErtsSchedulerRegisters, runtime_stack_start); 614 615 a.cmp(E, getSchedulerRegRef(end_offs)); 616 a.short_().jbe(crash); 617 a.cmp(E, getSchedulerRegRef(start_offs)); 618 a.short_().ja(crash); 619 # endif 620 621 /* Are we 16-byte aligned? */ 622 a.test(x86::rsp, (16 - 1)); 623 a.short_().je(next); 624 625 a.bind(crash); 626 a.ud2(); 627 628 a.bind(next); 629 #endif 630 } 631 emit_assert_erlang_stack()632 void emit_assert_erlang_stack() { 633 #ifdef JIT_HARD_DEBUG 634 Label crash = a.newLabel(), next = a.newLabel(); 635 636 /* Are we term-aligned? */ 637 a.test(E, imm(sizeof(Eterm) - 1)); 638 a.short_().jne(crash); 639 640 a.cmp(E, x86::qword_ptr(c_p, offsetof(Process, heap))); 641 a.short_().jl(crash); 642 a.cmp(E, x86::qword_ptr(c_p, offsetof(Process, hend))); 643 a.short_().jle(next); 644 645 a.bind(crash); 646 a.ud2(); 647 a.bind(next); 648 #endif 649 } 650 651 enum Update : int { 652 eStack = (1 << 0), 653 eHeap = (1 << 1), 654 eReductions = (1 << 2), 655 eCodeIndex = (1 << 3) 656 }; 657 658 template<int Spec = 0> emit_enter_runtime()659 void emit_enter_runtime() { 660 emit_assert_erlang_stack(); 661 662 ERTS_CT_ASSERT((Spec & (Update::eReductions | Update::eStack | 663 Update::eHeap)) == Spec); 664 665 #ifdef NATIVE_ERLANG_STACK 666 if (!(Spec & Update::eStack)) { 667 a.mov(E_saved, E); 668 } 669 #endif 670 671 if ((Spec & (Update::eHeap | Update::eStack)) == 672 (Update::eHeap | Update::eStack)) { 673 /* To update both heap and stack we use sse instructions like gcc 674 -O3 does. Basically it is this function run through gcc -O3: 675 676 struct a { long a; long b; long c; }; 677 678 void test(long a, long b, long c, struct a *s) { 679 s->a = a; 680 s->b = b; 681 s->c = c; 682 } 683 */ 684 ERTS_CT_ASSERT(offsetof(Process, stop) - offsetof(Process, htop) == 685 8); 686 a.movq(x86::xmm0, HTOP); 687 a.movq(x86::xmm1, E); 688 if (Spec & Update::eReductions) { 689 a.mov(x86::qword_ptr(c_p, offsetof(Process, fcalls)), FCALLS); 690 } 691 a.punpcklqdq(x86::xmm0, x86::xmm1); 692 a.movups(x86::xmmword_ptr(c_p, offsetof(Process, htop)), x86::xmm0); 693 } else { 694 if ((Spec & Update::eStack)) { 695 a.mov(x86::qword_ptr(c_p, offsetof(Process, stop)), E); 696 } 697 698 if (Spec & Update::eHeap) { 699 a.mov(x86::qword_ptr(c_p, offsetof(Process, htop)), HTOP); 700 } 701 702 if (Spec & Update::eReductions) { 703 a.mov(x86::qword_ptr(c_p, offsetof(Process, fcalls)), FCALLS); 704 } 705 } 706 707 #ifdef NATIVE_ERLANG_STACK 708 a.lea(E, getRuntimeStackRef()); 709 #else 710 /* Keeping track of stack alignment across shared fragments would be 711 * too much of a maintenance burden, so we stash and align the stack 712 * pointer at runtime instead. */ 713 a.mov(getRuntimeStackRef(), x86::rsp); 714 715 a.sub(x86::rsp, imm(15)); 716 a.and_(x86::rsp, imm(-16)); 717 #endif 718 } 719 720 template<int Spec = 0> emit_leave_runtime()721 void emit_leave_runtime() { 722 emit_assert_runtime_stack(); 723 724 ERTS_CT_ASSERT((Spec & (Update::eReductions | Update::eStack | 725 Update::eHeap | Update::eCodeIndex)) == Spec); 726 727 #ifdef NATIVE_ERLANG_STACK 728 if (!(Spec & Update::eStack)) { 729 a.mov(E, E_saved); 730 } 731 #endif 732 if ((Spec & Update::eStack)) { 733 a.mov(E, x86::qword_ptr(c_p, offsetof(Process, stop))); 734 } 735 736 if (Spec & Update::eHeap) { 737 a.mov(HTOP, x86::qword_ptr(c_p, offsetof(Process, htop))); 738 } 739 740 if (Spec & Update::eReductions) { 741 a.mov(FCALLS, x86::qword_ptr(c_p, offsetof(Process, fcalls))); 742 } 743 744 if (Spec & Update::eCodeIndex) { 745 /* Updates the local copy of the active code index, retaining 746 * save_calls if active. */ 747 a.mov(ARG1, imm(&the_active_code_index)); 748 a.mov(ARG1d, x86::dword_ptr(ARG1)); 749 750 a.cmp(active_code_ix, imm(ERTS_SAVE_CALLS_CODE_IX)); 751 a.cmovne(active_code_ix, ARG1); 752 } 753 754 #if !defined(NATIVE_ERLANG_STACK) 755 /* Restore the unaligned stack pointer we saved on enter. */ 756 a.mov(x86::rsp, getRuntimeStackRef()); 757 #endif 758 } 759 emit_is_boxed(Label Fail,x86::Gp Src,Distance dist=dLong)760 void emit_is_boxed(Label Fail, x86::Gp Src, Distance dist = dLong) { 761 /* Use the shortest possible instruction depending on the source 762 * register. */ 763 if (Src == x86::rax || Src == x86::rdi || Src == x86::rsi || 764 Src == x86::rcx || Src == x86::rdx) { 765 a.test(Src.r8(), imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED)); 766 } else { 767 a.test(Src.r32(), imm(_TAG_PRIMARY_MASK - TAG_PRIMARY_BOXED)); 768 } 769 if (dist == dShort) { 770 a.short_().jne(Fail); 771 } else { 772 a.jne(Fail); 773 } 774 } 775 emit_ptr_val(x86::Gp Dst,x86::Gp Src)776 x86::Gp emit_ptr_val(x86::Gp Dst, x86::Gp Src) { 777 #if !defined(TAG_LITERAL_PTR) 778 return Src; 779 #else 780 if (Dst != Src) { 781 a.mov(Dst, Src); 782 } 783 784 /* We intentionally skip TAG_PTR_MASK__ here, as we want to use 785 * plain `emit_boxed_val` when we know the argument can't be a literal, 786 * such as in bit-syntax matching. 787 * 788 * This comes at very little cost as `emit_boxed_val` nearly always has 789 * a displacement. */ 790 a.and_(Dst, imm(~TAG_LITERAL_PTR)); 791 return Dst; 792 #endif 793 } 794 emit_boxed_val(x86::Gp Src,int32_t bytes=0,size_t size=sizeof (UWord)) const795 constexpr x86::Mem emit_boxed_val(x86::Gp Src, 796 int32_t bytes = 0, 797 size_t size = sizeof(UWord)) const { 798 ASSERT(bytes % sizeof(Eterm) == 0); 799 return x86::Mem(Src, bytes - TAG_PRIMARY_BOXED, size); 800 } 801 emit_test_the_non_value(x86::Gp Reg)802 void emit_test_the_non_value(x86::Gp Reg) { 803 if (THE_NON_VALUE == 0) { 804 a.test(Reg.r32(), Reg.r32()); 805 } else { 806 a.cmp(Reg, imm(THE_NON_VALUE)); 807 } 808 } 809 810 /* 811 * Generate the shortest instruction for setting a register to an immediate 812 * value. May clear flags. 813 */ mov_imm(x86::Gp to,Uint value)814 void mov_imm(x86::Gp to, Uint value) { 815 if (value == 0) { 816 /* 817 * Generate the shortest instruction to set the register to zero. 818 * 819 * 48 c7 c0 00 00 00 00 mov rax, 0 820 * b8 00 00 00 00 mov eax, 0 821 * 31 c0 xor eax, eax 822 * 823 * Thus, "xor eax, eax" is five bytes shorter than "mov rax, 0". 824 * 825 * Note: xor clears ZF and CF; mov does not change any flags. 826 */ 827 a.xor_(to.r32(), to.r32()); 828 } else { 829 a.mov(to, imm(value)); 830 } 831 } 832 833 public: 834 void embed_rodata(const char *labelName, const char *buff, size_t size); 835 void embed_bss(const char *labelName, size_t size); 836 837 void embed_zeros(size_t size); 838 setLogger(std::string log)839 void setLogger(std::string log) { 840 FILE *f = fopen(log.data(), "w+"); 841 842 /* FIXME: Don't crash when loading multiple modules with the same name. 843 * 844 * setLogger(nullptr) disables logging. */ 845 if (f) { 846 setvbuf(f, NULL, _IONBF, 0); 847 } 848 849 setLogger(f); 850 } 851 setLogger(FILE * log)852 void setLogger(FILE *log) { 853 logger.setFile(log); 854 logger.setIndentation(FormatOptions::kIndentationCode, 4); 855 code.setLogger(&logger); 856 } 857 858 template<typename... Ts> comment(const char * format,Ts...args)859 void comment(const char *format, Ts... args) { 860 if (logger.file()) { 861 char buff[1024]; 862 erts_snprintf(buff, sizeof(buff), format, args...); 863 a.commentf("# %s", buff); 864 } 865 } 866 867 struct AsmRange { 868 ErtsCodePtr start; 869 ErtsCodePtr stop; 870 std::string name; 871 872 /* Not used yet */ 873 std::string file; 874 unsigned line; 875 }; 876 877 void update_gdb_jit_info(std::string modulename, 878 std::vector<AsmRange> &functions); 879 embed(void * data,uint32_t size)880 void embed(void *data, uint32_t size) { 881 a.embed((char *)data, size); 882 } 883 }; 884 885 class BeamGlobalAssembler : public BeamAssembler { 886 typedef void (BeamGlobalAssembler::*emitFptr)(void); 887 typedef void (*fptr)(void); 888 889 /* Please keep this in alphabetical order. */ 890 #define BEAM_GLOBAL_FUNCS(_) \ 891 _(arith_compare_shared) \ 892 _(arith_eq_shared) \ 893 _(bif_nif_epilogue) \ 894 _(bif_element_shared) \ 895 _(bif_export_trap) \ 896 _(bs_add_shared) \ 897 _(bs_size_check_shared) \ 898 _(bs_fixed_integer_shared) \ 899 _(bs_get_tail_shared) \ 900 _(call_bif_shared) \ 901 _(call_light_bif_shared) \ 902 _(call_nif_early) \ 903 _(call_nif_shared) \ 904 _(catch_end_shared) \ 905 _(dispatch_bif) \ 906 _(dispatch_nif) \ 907 _(dispatch_return) \ 908 _(dispatch_save_calls) \ 909 _(error_action_code) \ 910 _(export_trampoline) \ 911 _(garbage_collect) \ 912 _(generic_bp_global) \ 913 _(generic_bp_local) \ 914 _(debug_bp) \ 915 _(handle_error_shared_prologue) \ 916 _(handle_error_shared) \ 917 _(handle_element_error) \ 918 _(handle_hd_error) \ 919 _(i_band_body_shared) \ 920 _(i_band_guard_shared) \ 921 _(i_bif_body_shared) \ 922 _(i_bif_guard_shared) \ 923 _(i_bor_body_shared) \ 924 _(i_bor_guard_shared) \ 925 _(i_bnot_body_shared) \ 926 _(i_bnot_guard_shared) \ 927 _(i_bsl_guard_shared) \ 928 _(i_bsl_body_shared) \ 929 _(i_bsr_guard_shared) \ 930 _(i_bsr_body_shared) \ 931 _(i_bxor_body_shared) \ 932 _(i_bxor_guard_shared) \ 933 _(i_func_info_shared) \ 934 _(i_load_nif_shared) \ 935 _(i_length_guard_shared) \ 936 _(i_length_body_shared) \ 937 _(i_loop_rec_shared) \ 938 _(i_new_small_map_lit_shared) \ 939 _(i_test_yield_shared) \ 940 _(increment_body_shared) \ 941 _(int_div_rem_body_shared) \ 942 _(int_div_rem_guard_shared) \ 943 _(minus_body_shared) \ 944 _(minus_guard_shared) \ 945 _(new_map_shared) \ 946 _(plus_body_shared) \ 947 _(plus_guard_shared) \ 948 _(process_main) \ 949 _(times_body_shared) \ 950 _(times_guard_shared) \ 951 _(unary_minus_body_shared) \ 952 _(unary_minus_guard_shared) \ 953 _(update_map_assoc_shared) \ 954 _(update_map_exact_guard_shared) \ 955 _(update_map_exact_body_shared) 956 957 /* Labels exported from within process_main */ 958 #define PROCESS_MAIN_LABELS(_) \ 959 _(context_switch) \ 960 _(context_switch_simplified) \ 961 _(do_schedule) 962 963 #define DECL_ENUM(NAME) NAME, 964 965 enum GlobalLabels : uint32_t { 966 BEAM_GLOBAL_FUNCS(DECL_ENUM) PROCESS_MAIN_LABELS(DECL_ENUM) 967 }; 968 #undef DECL_ENUM 969 970 static const std::map<GlobalLabels, emitFptr> emitPtrs; 971 static const std::map<GlobalLabels, std::string> labelNames; 972 std::unordered_map<GlobalLabels, Label> labels; 973 std::unordered_map<GlobalLabels, fptr> ptrs; 974 975 #define DECL_FUNC(NAME) void emit_##NAME(void); 976 977 BEAM_GLOBAL_FUNCS(DECL_FUNC); 978 #undef DECL_FUNC 979 980 template<typename T> 981 void emit_bitwise_fallback_body(T(*func_ptr), const ErtsCodeMFA *mfa); 982 983 template<typename T> 984 void emit_bitwise_fallback_guard(T(*func_ptr)); 985 986 x86::Mem emit_i_length_common(Label fail, int state_size); 987 988 void emit_handle_error(); 989 990 public: 991 BeamGlobalAssembler(JitAllocator *allocator); 992 get(GlobalLabels lbl)993 void (*get(GlobalLabels lbl))(void) { 994 ASSERT(ptrs[lbl]); 995 return ptrs[lbl]; 996 } 997 998 #define GET_CODE(NAME) \ 999 void (*get_##NAME(void))() { \ 1000 return get(NAME); \ 1001 } 1002 1003 BEAM_GLOBAL_FUNCS(GET_CODE) 1004 PROCESS_MAIN_LABELS(GET_CODE) 1005 #undef GET_CODE 1006 }; 1007 1008 class BeamModuleAssembler : public BeamAssembler { 1009 typedef unsigned BeamLabel; 1010 1011 /* Map of label number to asmjit Label */ 1012 typedef std::unordered_map<BeamLabel, Label> LabelMap; 1013 LabelMap labels; 1014 1015 struct patch { 1016 Label where; 1017 int64_t ptr_offs; 1018 int64_t val_offs; 1019 }; 1020 1021 struct patch_catch { 1022 struct patch patch; 1023 Label handler; 1024 }; 1025 std::vector<struct patch_catch> catches; 1026 1027 /* Map of import entry to patch labels and mfa */ 1028 struct patch_import { 1029 std::vector<struct patch> patches; 1030 ErtsCodeMFA mfa; 1031 }; 1032 typedef std::unordered_map<unsigned, struct patch_import> ImportMap; 1033 ImportMap imports; 1034 1035 /* Map of fun entry to patch labels */ 1036 struct patch_lambda { 1037 std::vector<struct patch> patches; 1038 ErlFunEntry fe; 1039 }; 1040 typedef std::unordered_map<unsigned, struct patch_lambda> LambdaMap; 1041 LambdaMap lambdas; 1042 1043 /* Map of literals to patch labels */ 1044 struct patch_literal { 1045 std::vector<struct patch> patches; 1046 }; 1047 typedef std::unordered_map<unsigned, struct patch_literal> LiteralMap; 1048 LiteralMap literals; 1049 1050 /* All string patches */ 1051 std::vector<struct patch> strings; 1052 1053 /* All functions that have been seen so far */ 1054 std::vector<BeamLabel> functions; 1055 1056 BeamGlobalAssembler *ga; 1057 1058 /* Used by emit to populate the labelToMFA map */ 1059 Label currLabel; 1060 unsigned prev_op = 0; 1061 Label codeHeader; 1062 Label funcInfo; 1063 Label funcYield; 1064 Label genericBPTramp; 1065 Label on_load; 1066 1067 Label floatMax; 1068 Label floatSignMask; 1069 1070 Eterm mod; 1071 1072 /* Save the last PC for an error. */ 1073 size_t last_error_offset = 0; 1074 1075 public: 1076 BeamModuleAssembler(BeamGlobalAssembler *ga, 1077 Eterm mod, 1078 unsigned num_labels); 1079 BeamModuleAssembler(BeamGlobalAssembler *ga, 1080 Eterm mod, 1081 unsigned num_labels, 1082 unsigned num_functions); 1083 1084 bool emit(unsigned op, const std::vector<ArgVal> &args); 1085 1086 void codegen(JitAllocator *allocator, 1087 const void **executable_ptr, 1088 void **writable_ptr, 1089 const BeamCodeHeader *in_hdr, 1090 const BeamCodeHeader **out_exec_hdr, 1091 BeamCodeHeader **out_rw_hdr); 1092 1093 void codegen(JitAllocator *allocator, 1094 const void **executable_ptr, 1095 void **writable_ptr); 1096 1097 void codegen(char *buff, size_t len); 1098 1099 ErtsCodePtr getCode(unsigned label); getCode(Label label)1100 void *getCode(Label label) { 1101 return BeamAssembler::getCode(label); 1102 } getCode(char * labelName)1103 byte *getCode(char *labelName) { 1104 return BeamAssembler::getCode(labelName); 1105 } 1106 1107 Label embed_vararg_rodata(const std::vector<ArgVal> &args, int y_offset); 1108 getCodeSize()1109 unsigned getCodeSize() { 1110 ASSERT(code.hasBaseAddress()); 1111 return code.codeSize(); 1112 } 1113 1114 void copyCodeHeader(BeamCodeHeader *hdr); 1115 BeamCodeHeader *getCodeHeader(void); 1116 const ErtsCodeInfo *getOnLoad(void); 1117 1118 unsigned patchCatches(char *rw_base); 1119 void patchLambda(char *rw_base, unsigned index, BeamInstr I); 1120 void patchLiteral(char *rw_base, unsigned index, Eterm lit); 1121 void patchImport(char *rw_base, unsigned index, BeamInstr I); 1122 void patchStrings(char *rw_base, const byte *string); 1123 1124 protected: 1125 /* Helpers */ 1126 void emit_gc_test(const ArgVal &Stack, 1127 const ArgVal &Heap, 1128 const ArgVal &Live); 1129 void emit_gc_test_preserve(const ArgVal &Need, 1130 const ArgVal &Live, 1131 x86::Gp term); 1132 1133 x86::Mem emit_variable_apply(bool includeI); 1134 x86::Mem emit_fixed_apply(const ArgVal &arity, bool includeI); 1135 1136 x86::Gp emit_call_fun(const ArgVal &Fun); 1137 x86::Gp emit_apply_fun(void); 1138 1139 void emit_is_binary(Label Fail, x86::Gp Src, Label next, Label subbin); 1140 1141 void emit_get_list(const x86::Gp boxed_ptr, 1142 const ArgVal &Hd, 1143 const ArgVal &Tl); 1144 1145 void emit_div_rem(const ArgVal &Fail, 1146 const ArgVal &LHS, 1147 const ArgVal &RHS, 1148 const ErtsCodeMFA *error_mfa); 1149 1150 void emit_setup_guard_bif(const std::vector<ArgVal> &args, 1151 const ArgVal &bif); 1152 1153 void emit_bif_arg_error(std::vector<ArgVal> args, const ErtsCodeMFA *mfa); 1154 void emit_error(int code); 1155 1156 x86::Mem emit_bs_get_integer_prologue(Label next, 1157 Label fail, 1158 int flags, 1159 int size); 1160 1161 int emit_bs_get_field_size(const ArgVal &Size, 1162 int unit, 1163 Label Fail, 1164 const x86::Gp &out, 1165 unsigned max_size = 0); 1166 1167 void emit_bs_get_utf8(const ArgVal &Ctx, const ArgVal &Fail); 1168 void emit_bs_get_utf16(const ArgVal &Ctx, 1169 const ArgVal &Fail, 1170 const ArgVal &Flags); 1171 1172 void emit_handle_error(); 1173 void emit_handle_error(const ErtsCodeMFA *exp); 1174 void emit_handle_error(Label I, const ErtsCodeMFA *exp); 1175 void emit_validate(const ArgVal &arity); 1176 void emit_bs_skip_bits(const ArgVal &Fail, const ArgVal &Ctx); 1177 1178 void emit_linear_search(x86::Gp val, 1179 const ArgVal &Fail, 1180 const std::vector<ArgVal> &args); 1181 1182 void emit_check_float(Label next, x86::Xmm value); 1183 1184 void emit_is_small(Label fail, x86::Gp Reg); 1185 void emit_is_both_small(Label fail, x86::Gp A, x86::Gp B); 1186 1187 void emit_validate_unicode(Label next, Label fail, x86::Gp value); 1188 1189 void emit_bif_is_eq_ne_exact_immed(const ArgVal &Src, 1190 const ArgVal &Immed, 1191 const ArgVal &Dst, 1192 Eterm fail_value, 1193 Eterm succ_value); 1194 1195 void emit_proc_lc_unrequire(void); 1196 void emit_proc_lc_require(void); 1197 1198 void emit_nyi(const char *msg); 1199 void emit_nyi(void); 1200 1201 void emit_binsearch_nodes(size_t Left, 1202 size_t Right, 1203 const ArgVal &Fail, 1204 const std::vector<ArgVal> &args); 1205 1206 bool emit_optimized_three_way_select(const ArgVal &Fail, 1207 const std::vector<ArgVal> &args); 1208 1209 #ifdef DEBUG 1210 void emit_tuple_assertion(const ArgVal &Src, x86::Gp tuple_reg); 1211 #endif 1212 1213 #include "beamasm_protos.h" 1214 make_move_patch(x86::Gp to,std::vector<struct patch> & patches,int64_t offset=0)1215 void make_move_patch(x86::Gp to, 1216 std::vector<struct patch> &patches, 1217 int64_t offset = 0) { 1218 const int MOV_IMM64_PAYLOAD_OFFSET = 2; 1219 Label lbl = a.newLabel(); 1220 1221 a.bind(lbl); 1222 a.long_().mov(to, imm(LLONG_MAX)); 1223 1224 patches.push_back({lbl, MOV_IMM64_PAYLOAD_OFFSET, offset}); 1225 } 1226 make_word_patch(std::vector<struct patch> & patches)1227 void make_word_patch(std::vector<struct patch> &patches) { 1228 Label lbl = a.newLabel(); 1229 UWord word = LLONG_MAX; 1230 1231 a.bind(lbl); 1232 a.embed(reinterpret_cast<char *>(&word), sizeof(word)); 1233 1234 patches.push_back({lbl, 0, 0}); 1235 } 1236 1237 template<typename A, typename B> mov_arg(A to,B from)1238 void mov_arg(A to, B from) { 1239 /* We can't move to or from Y registers when we're on the runtime 1240 * stack, so we'll conservatively disallow all mov_args in the hopes of 1241 * finding such bugs sooner. */ 1242 emit_assert_erlang_stack(); 1243 1244 mov_arg(to, from, ARG1); 1245 } 1246 1247 template<typename T> cmp_arg(T oper,const ArgVal & val)1248 void cmp_arg(T oper, const ArgVal &val) { 1249 cmp_arg(oper, val, ARG1); 1250 } 1251 cmp_arg(x86::Mem mem,const ArgVal & val,const x86::Gp & spill)1252 void cmp_arg(x86::Mem mem, const ArgVal &val, const x86::Gp &spill) { 1253 /* Note that the cast to Sint is necessary to handle negative numbers 1254 * such as NIL. */ 1255 if (val.isImmed() && Support::isInt32((Sint)val.getValue())) { 1256 a.cmp(mem, imm(val.getValue())); 1257 } else { 1258 mov_arg(spill, val); 1259 a.cmp(mem, spill); 1260 } 1261 } 1262 cmp_arg(x86::Gp gp,const ArgVal & val,const x86::Gp & spill)1263 void cmp_arg(x86::Gp gp, const ArgVal &val, const x86::Gp &spill) { 1264 if (val.isImmed() && Support::isInt32((Sint)val.getValue())) { 1265 a.cmp(gp, imm(val.getValue())); 1266 } else { 1267 mov_arg(spill, val); 1268 a.cmp(gp, spill); 1269 } 1270 } 1271 1272 /* Note: May clear flags. */ mov_arg(x86::Gp to,const ArgVal & from,const x86::Gp & spill)1273 void mov_arg(x86::Gp to, const ArgVal &from, const x86::Gp &spill) { 1274 if (from.isMem()) { 1275 a.mov(to, getArgRef(from)); 1276 } else if (from.isLiteral()) { 1277 make_move_patch(to, literals[from.getValue()].patches); 1278 } else { 1279 mov_imm(to, from.getValue()); 1280 } 1281 } 1282 mov_arg(x86::Mem to,const ArgVal & from,const x86::Gp & spill)1283 void mov_arg(x86::Mem to, const ArgVal &from, const x86::Gp &spill) { 1284 if (from.isImmed()) { 1285 if (Support::isInt32((Sint)from.getValue())) { 1286 a.mov(to, imm(from.getValue())); 1287 } else { 1288 a.mov(spill, imm(from.getValue())); 1289 a.mov(to, spill); 1290 } 1291 } else { 1292 mov_arg(spill, from); 1293 a.mov(to, spill); 1294 } 1295 } 1296 mov_arg(const ArgVal & to,x86::Gp from,const x86::Gp & spill)1297 void mov_arg(const ArgVal &to, x86::Gp from, const x86::Gp &spill) { 1298 (void)spill; 1299 1300 a.mov(getArgRef(to), from); 1301 } 1302 mov_arg(const ArgVal & to,BeamInstr from,const x86::Gp & spill)1303 void mov_arg(const ArgVal &to, BeamInstr from, const x86::Gp &spill) { 1304 if (Support::isInt32((Sint)from)) { 1305 a.mov(getArgRef(to), imm(from)); 1306 } else { 1307 a.mov(spill, imm(from)); 1308 mov_arg(to, spill); 1309 } 1310 } 1311 mov_arg(const ArgVal & to,const ArgVal & from,const x86::Gp & spill)1312 void mov_arg(const ArgVal &to, const ArgVal &from, const x86::Gp &spill) { 1313 if (from.isMem()) { 1314 mov_arg(spill, from); 1315 mov_arg(to, spill); 1316 } else { 1317 mov_arg(getArgRef(to), from); 1318 } 1319 } 1320 }; 1321 1322 void beamasm_update_perf_info(std::string modulename, 1323 std::vector<BeamAssembler::AsmRange> &ranges); 1324