1/* 2 * %CopyrightBegin% 3 * 4 * Copyright Ericsson AB 2004-2016. All Rights Reserved. 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 * 18 * %CopyrightEnd% 19 */ 20 21#define ASM 22#include "hipe_ppc_asm.h" 23#include "hipe_literals.h" 24#include "hipe_mode_switch.h" 25 26 .text 27 .p2align 2 28 29#if defined(__powerpc64__) 30/* 31 * Enter Erlang from C. 32 * Create a new frame on the C stack. 33 * Save C callee-save registers (r14-r31) in the frame. 34 * Save r0 (C return address) in the caller's LR save slot. 35 * Retrieve the process pointer from the C argument registers. 36 * Return to LR. 37 * Do not clobber the C argument registers (r3-r10). 38 * 39 * Usage: mflr r0 SEMI bl .enter 40 */ 41.enter: 42 # Our PPC64 ELF ABI frame must include: 43 # - 48 (6*8) bytes for AIX-like linkage area 44 # - 64 (8*8) bytes for AIX-like parameter area for 45 # recursive C calls with up to 8 parameter words 46 # - padding to make the frame a multiple of 16 bytes 47 # - 144 (18*8) bytes for saving r14-r31 48 # The final size is 256 bytes. 49 # stdu is required for atomic alloc+init 50 stdu r1,-256(r1) /* 0(r1) contains r1+256 */ 51 std r14, 112(r1) 52 std r15, 120(r1) 53 std r16, 128(r1) 54 std r17, 136(r1) 55 std r18, 144(r1) 56 std r19, 152(r1) 57 std r20, 160(r1) 58 std r21, 168(r1) 59 std r22, 176(r1) 60 std r23, 184(r1) 61 std r24, 192(r1) 62 std r25, 200(r1) 63 std r26, 208(r1) 64 std r27, 216(r1) 65 std r28, 224(r1) 66 std r29, 232(r1) 67 std r30, 240(r1) 68 std r31, 248(r1) 69 std r0, 256+16(r1) /* caller saved LR in r0 */ 70 mr P, r3 /* get the process pointer */ 71 blr 72 73/* 74 * Return to the calling C function. 75 * The return value is in r3. 76 * 77 * .nosave_exit saves no state 78 * .flush_exit saves NSP and other cached P state. 79 * .suspend_exit also saves RA. 80 */ 81.suspend_exit: 82 /* save RA, so we can be resumed */ 83 mflr r0 84 std r0, P_NRA(P) 85.flush_exit: 86 /* flush cached P state */ 87 SAVE_CACHED_STATE 88.nosave_exit: 89 /* restore callee-save registers, drop frame, return */ 90 ld r0, 256+16(r1) 91 mtlr r0 92 ld r14, 112(r1) 93 ld r15, 120(r1) 94 ld r16, 128(r1) 95 ld r17, 136(r1) 96 ld r18, 144(r1) 97 ld r19, 152(r1) 98 ld r20, 160(r1) 99 ld r21, 168(r1) 100 ld r22, 176(r1) 101 ld r23, 184(r1) 102 ld r24, 192(r1) 103 ld r25, 200(r1) 104 ld r26, 208(r1) 105 ld r27, 216(r1) 106 ld r28, 224(r1) 107 ld r29, 232(r1) /* kills HP */ 108 ld r30, 240(r1) /* kills NSP */ 109 ld r31, 248(r1) /* kills P */ 110 addi r1, r1, 256 111 blr 112#else /* !__powerpc64__ */ 113/* 114 * Enter Erlang from C. 115 * Create a new frame on the C stack. 116 * Save C callee-save registers (r14-r31) in the frame. 117 * Save r0 (C return address) in the frame's LR save slot. 118 * Retrieve the process pointer from the C argument registers. 119 * Return to LR. 120 * Do not clobber the C argument registers (r3-r10). 121 * 122 * Usage: mflr r0 SEMI bl .enter 123 */ 124.enter: 125 # A unified Linux/OSX C frame must include: 126 # - 24 bytes for AIX/OSX-like linkage area 127 # - 28 bytes for AIX/OSX-like parameter area for 128 # recursive C calls with up to 7 parameter words 129 # - 76 bytes for saving r14-r31 and LR 130 # - padding to make it a multiple of 16 bytes 131 # The final size is 128 bytes. 132 # stwu is required for atomic alloc+init 133 stwu r1,-128(r1) /* 0(r1) contains r1+128 */ 134 stw r14, 52(r1) 135 stw r15, 56(r1) 136 stw r16, 60(r1) 137 stw r17, 64(r1) 138 stw r18, 68(r1) 139 stw r19, 72(r1) 140 stw r20, 76(r1) 141 stw r21, 80(r1) 142 stw r22, 84(r1) 143 stw r23, 88(r1) 144 stw r24, 92(r1) 145 stw r25, 96(r1) 146 stw r26, 100(r1) 147 stw r27, 104(r1) 148 stw r28, 108(r1) 149 stw r29, 112(r1) 150 stw r30, 116(r1) 151 stw r31, 120(r1) 152 stw r0, 124(r1) /* caller saved LR in r0 */ 153 mr P, r3 /* get the process pointer */ 154 blr 155 156/* 157 * Return to the calling C function. 158 * The return value is in r3. 159 * 160 * .nosave_exit saves no state 161 * .flush_exit saves NSP and other cached P state. 162 * .suspend_exit also saves RA. 163 */ 164.suspend_exit: 165 /* save RA, so we can be resumed */ 166 mflr r0 167 stw r0, P_NRA(P) 168.flush_exit: 169 /* flush cached P state */ 170 SAVE_CACHED_STATE 171.nosave_exit: 172 /* restore callee-save registers, drop frame, return */ 173 lwz r0, 124(r1) 174 mtlr r0 175 lwz r14, 52(r1) 176 lwz r15, 56(r1) 177 lwz r16, 60(r1) 178 lwz r17, 64(r1) 179 lwz r18, 68(r1) 180 lwz r19, 72(r1) 181 lwz r20, 76(r1) 182 lwz r21, 80(r1) 183 lwz r22, 84(r1) 184 lwz r23, 88(r1) 185 lwz r24, 92(r1) 186 lwz r25, 96(r1) 187 lwz r26, 100(r1) 188 lwz r27, 104(r1) 189 lwz r28, 108(r1) 190 lwz r29, 112(r1) /* kills HP */ 191 lwz r30, 116(r1) /* kills NSP */ 192 lwz r31, 120(r1) /* kills P */ 193 addi r1, r1, 128 194 blr 195#endif /* !__powerpc64__ */ 196 197/* 198 * int hipe_ppc_call_to_native(Process *p); 199 * Emulated code recursively calls native code. 200 */ 201 OPD(hipe_ppc_call_to_native) 202 GLOBAL(CSYM(hipe_ppc_call_to_native)) 203CSYM(hipe_ppc_call_to_native): 204 /* save C context */ 205 mflr r0 206 bl .enter 207 /* prepare to call the target */ 208 LOAD r0, P_NCALLEE(P) 209 mtctr r0 210 /* get argument registers */ 211 LOAD_ARG_REGS 212 /* cache some P state in registers */ 213 RESTORE_CACHED_STATE 214 /* call the target */ 215 bctrl /* defines LR (a.k.a. NRA) */ 216/* FALLTHROUGH 217 * 218 * We export this return address so that hipe_mode_switch() can discover 219 * when native code tailcalls emulated code. 220 * 221 * This is where native code returns to emulated code. 222 */ 223 GLOBAL(ASYM(nbif_return)) 224ASYM(nbif_return): 225 STORE r3, P_ARG0(P) /* save retval */ 226 li r3, HIPE_MODE_SWITCH_RES_RETURN 227 b .flush_exit 228 229/* 230 * int hipe_ppc_return_to_native(Process *p); 231 * Emulated code returns to its native code caller. 232 */ 233 OPD(hipe_ppc_return_to_native) 234 GLOBAL(CSYM(hipe_ppc_return_to_native)) 235CSYM(hipe_ppc_return_to_native): 236 /* save C context */ 237 mflr r0 238 bl .enter 239 /* restore return address */ 240 LOAD r0, P_NRA(P) 241 mtlr r0 242 /* cache some P state in registers */ 243 RESTORE_CACHED_STATE 244 /* get return value */ 245 LOAD r3, P_ARG0(P) 246 /* 247 * Return using the current return address. 248 * The parameters were popped at the original native-to-emulated 249 * call (hipe_call_from_native_is_recursive), so a plain ret suffices. 250 */ 251 blr 252 253/* 254 * int hipe_ppc_tailcall_to_native(Process *p); 255 * Emulated code tailcalls native code. 256 */ 257 OPD(hipe_ppc_tailcall_to_native) 258 GLOBAL(CSYM(hipe_ppc_tailcall_to_native)) 259CSYM(hipe_ppc_tailcall_to_native): 260 /* save C context */ 261 mflr r0 262 bl .enter 263 /* prepare to call the target */ 264 LOAD r0, P_NCALLEE(P) 265 mtctr r0 266 /* get argument registers */ 267 LOAD_ARG_REGS 268 /* restore return address */ 269 LOAD r0, P_NRA(P) 270 mtlr r0 271 /* cache some P state in registers */ 272 RESTORE_CACHED_STATE 273 /* call the target */ 274 bctr 275 276/* 277 * int hipe_ppc_throw_to_native(Process *p); 278 * Emulated code throws an exception to its native code caller. 279 */ 280 OPD(hipe_ppc_throw_to_native) 281 GLOBAL(CSYM(hipe_ppc_throw_to_native)) 282CSYM(hipe_ppc_throw_to_native): 283 /* save C context */ 284 mflr r0 285 bl .enter 286 /* prepare to invoke handler */ 287 LOAD r0, P_NCALLEE(P) /* set by hipe_find_handler() */ 288 mtctr r0 289 /* cache some P state in registers */ 290 RESTORE_CACHED_STATE 291 /* invoke the handler */ 292 bctr 293 294/* 295 * Native code calls emulated code via a stub 296 * which should look as follows: 297 * 298 * stub for f/N: 299 * <set r12 to f's export entry address> 300 * <set r0 to N> 301 * b nbif_callemu 302 * 303 * The stub may need to create &nbif_callemu as a 32-bit immediate 304 * in a scratch register if the branch needs a trampoline. The code 305 * for creating a 32-bit immediate in r0 is potentially slower than 306 * for other registers (an add must be replaced by an or, and adds 307 * are potentially faster than ors), so it is better to use r0 for 308 * the arity (a small immediate), making r11 available for trampolines. 309 * (See "The PowerPC Compiler Writer's Guide, section 3.2.3.1.) 310 * 311 * XXX: Different stubs for different number of register parameters? 312 */ 313 GLOBAL(ASYM(nbif_callemu)) 314ASYM(nbif_callemu): 315 STORE r12, P_CALLEE_EXP(P) 316 STORE r0, P_ARITY(P) 317 STORE_ARG_REGS 318 li r3, HIPE_MODE_SWITCH_RES_CALL_EXPORTED 319 b .suspend_exit 320 321/* 322 * nbif_apply 323 */ 324 GLOBAL(ASYM(nbif_apply)) 325ASYM(nbif_apply): 326 STORE_ARG_REGS 327 li r3, HIPE_MODE_SWITCH_RES_APPLY 328 b .suspend_exit 329 330/* 331 * Native code calls an emulated-mode closure via a stub defined below. 332 * 333 * The closure is appended as the last actual parameter, and parameters 334 * beyond the first few passed in registers are pushed onto the stack in 335 * left-to-right order. 336 * Hence, the location of the closure parameter only depends on the number 337 * of parameters in registers, not the total number of parameters. 338 */ 339#if NR_ARG_REGS >= 6 340 GLOBAL(ASYM(nbif_ccallemu6)) 341ASYM(nbif_ccallemu6): 342 STORE ARG5, P_ARG5(P) 343#if NR_ARG_REGS > 6 344 mr ARG5, ARG6 345#else 346 LOAD ARG5, 0(NSP) 347#endif 348 /*FALLTHROUGH*/ 349#endif 350 351#if NR_ARG_REGS >= 5 352 GLOBAL(ASYM(nbif_ccallemu5)) 353ASYM(nbif_ccallemu5): 354 STORE ARG4, P_ARG4(P) 355#if NR_ARG_REGS > 5 356 mr ARG4, ARG5 357#else 358 LOAD ARG4, 0(NSP) 359#endif 360 /*FALLTHROUGH*/ 361#endif 362 363#if NR_ARG_REGS >= 4 364 GLOBAL(ASYM(nbif_ccallemu4)) 365ASYM(nbif_ccallemu4): 366 STORE ARG3, P_ARG3(P) 367#if NR_ARG_REGS > 4 368 mr ARG3, ARG4 369#else 370 LOAD ARG3, 0(NSP) 371#endif 372 /*FALLTHROUGH*/ 373#endif 374 375#if NR_ARG_REGS >= 3 376 GLOBAL(ASYM(nbif_ccallemu3)) 377ASYM(nbif_ccallemu3): 378 STORE ARG2, P_ARG2(P) 379#if NR_ARG_REGS > 3 380 mr ARG2, ARG3 381#else 382 LOAD ARG2, 0(NSP) 383#endif 384 /*FALLTHROUGH*/ 385#endif 386 387#if NR_ARG_REGS >= 2 388 GLOBAL(ASYM(nbif_ccallemu2)) 389ASYM(nbif_ccallemu2): 390 STORE ARG1, P_ARG1(P) 391#if NR_ARG_REGS > 2 392 mr ARG1, ARG2 393#else 394 LOAD ARG1, 0(NSP) 395#endif 396 /*FALLTHROUGH*/ 397#endif 398 399#if NR_ARG_REGS >= 1 400 GLOBAL(ASYM(nbif_ccallemu1)) 401ASYM(nbif_ccallemu1): 402 STORE ARG0, P_ARG0(P) 403#if NR_ARG_REGS > 1 404 mr ARG0, ARG1 405#else 406 LOAD ARG0, 0(NSP) 407#endif 408 /*FALLTHROUGH*/ 409#endif 410 411 GLOBAL(ASYM(nbif_ccallemu0)) 412ASYM(nbif_ccallemu0): 413 /* We use r4 not ARG0 here because ARG0 is not 414 defined when NR_ARG_REGS == 0. */ 415#if NR_ARG_REGS == 0 416 LOAD r4, 0(NSP) /* get the closure */ 417#endif 418 STORE r4, P_CLOSURE(P) /* save the closure */ 419 li r3, HIPE_MODE_SWITCH_RES_CALL_CLOSURE 420 b .suspend_exit 421 422/* 423 * This is where native code suspends. 424 */ 425 GLOBAL(ASYM(nbif_suspend_0)) 426ASYM(nbif_suspend_0): 427 li r3, HIPE_MODE_SWITCH_RES_SUSPEND 428 b .suspend_exit 429 430/* 431 * Suspend from a receive (waiting for a message) 432 */ 433 GLOBAL(ASYM(nbif_suspend_msg)) 434ASYM(nbif_suspend_msg): 435 li r3, HIPE_MODE_SWITCH_RES_WAIT 436 b .suspend_exit 437 438/* 439 * Suspend from a receive with a timeout (waiting for a message) 440 * if (!(p->flags & F_TIMO)) { suspend } 441 * else { return 0; } 442 */ 443 GLOBAL(ASYM(nbif_suspend_msg_timeout)) 444ASYM(nbif_suspend_msg_timeout): 445 LOAD r4, P_FLAGS(P) 446 li r3, HIPE_MODE_SWITCH_RES_WAIT_TIMEOUT 447 /* this relies on F_TIMO (1<<2) fitting in a uimm16 */ 448 andi. r0, r4, F_TIMO 449 beq- .suspend_exit /* sees the CR state from andi. above */ 450 /* timeout has occurred */ 451 li r3, 0 452 blr 453 454/* 455 * This is the default exception handler for native code. 456 */ 457 GLOBAL(ASYM(nbif_fail)) 458ASYM(nbif_fail): 459 li r3, HIPE_MODE_SWITCH_RES_THROW 460 b .flush_exit /* no need to save RA */ 461 462 OPD(nbif_0_gc_after_bif) 463 OPD(nbif_1_gc_after_bif) 464 OPD(nbif_2_gc_after_bif) 465 OPD(nbif_3_gc_after_bif) 466 OPD(nbif_4_gc_after_bif) 467 GLOBAL(CSYM(nbif_0_gc_after_bif)) 468 GLOBAL(CSYM(nbif_1_gc_after_bif)) 469 GLOBAL(CSYM(nbif_2_gc_after_bif)) 470 GLOBAL(CSYM(nbif_3_gc_after_bif)) 471 GLOBAL(CSYM(nbif_4_gc_after_bif)) 472CSYM(nbif_0_gc_after_bif): 473 li r4, 0 474 b .gc_after_bif 475CSYM(nbif_1_gc_after_bif): 476 li r4, 1 477 b .gc_after_bif 478CSYM(nbif_2_gc_after_bif): 479 li r4, 2 480 b .gc_after_bif 481CSYM(nbif_3_gc_after_bif): 482 li r4, 3 483 b .gc_after_bif 484CSYM(nbif_4_gc_after_bif): 485 li r4, 4 486 /*FALLTHROUGH*/ 487.gc_after_bif: 488 stw r4, P_NARITY(P) /* Note: narity is a 32-bit field */ 489 STORE TEMP_LR, P_NRA(P) 490 STORE NSP, P_NSP(P) 491 mflr TEMP_LR 492 li r6, 0 /* Pass 0 in arity */ 493 li r5, 0 /* Pass NULL in regs */ 494 mr r4, r3 495 mr r3, P 496 bl CSYM(erts_gc_after_bif_call) 497 mtlr TEMP_LR 498 LOAD TEMP_LR, P_NRA(P) 499 li r4, 0 500 stw r4, P_NARITY(P) /* Note: narity is a 32-bit field */ 501 blr 502 503/* 504 * We end up here when a BIF called from native signals an 505 * exceptional condition. 506 * The heap pointer was just read from P. 507 * TEMP_LR contains a copy of LR 508 */ 509 OPD(nbif_0_simple_exception) 510 GLOBAL(CSYM(nbif_0_simple_exception)) 511CSYM(nbif_0_simple_exception): 512 li r4, 0 513 b .nbif_simple_exception 514 515 OPD(nbif_1_simple_exception) 516 GLOBAL(CSYM(nbif_1_simple_exception)) 517CSYM(nbif_1_simple_exception): 518 li r4, 1 519 b .nbif_simple_exception 520 521 OPD(nbif_2_simple_exception) 522 GLOBAL(CSYM(nbif_2_simple_exception)) 523CSYM(nbif_2_simple_exception): 524 li r4, 2 525 b .nbif_simple_exception 526 527 OPD(nbif_3_simple_exception) 528 GLOBAL(CSYM(nbif_3_simple_exception)) 529CSYM(nbif_3_simple_exception): 530 li r4, 3 531 b .nbif_simple_exception 532 533 OPD(nbif_4_simple_exception) 534 GLOBAL(CSYM(nbif_4_simple_exception)) 535CSYM(nbif_4_simple_exception): 536 li r4, 4 537 /*FALLTHROUGH*/ 538.nbif_simple_exception: 539 LOAD r3, P_FREASON(P) 540 CMPI r3, FREASON_TRAP 541 beq- .handle_trap 542 /* 543 * Find and invoke catch handler (it must exist). 544 * The heap pointer was just read from P. 545 * TEMP_LR should contain the current call's return address. 546 * r4 should contain the current call's arity. 547 */ 548 STORE NSP, P_NSP(P) 549 STORE TEMP_LR, P_NRA(P) 550 stw r4, P_NARITY(P) /* Note: narity is a 32-bit field */ 551 /* find and prepare to invoke the handler */ 552 mr r3, P 553 bl CSYM(hipe_handle_exception) /* Note: hipe_handle_exception() conses */ 554 /* prepare to invoke handler */ 555 LOAD r0, P_NCALLEE(P) /* set by hipe_find_handler() */ 556 mtctr r0 557 RESTORE_CACHED_STATE /* NSP updated by hipe_find_handler() */ 558 /* now invoke the handler */ 559 bctr 560 561 /* 562 * A BIF failed with freason TRAP: 563 * - the BIF's arity is in r4 564 * - the native RA was saved in TEMP_LR before the BIF call 565 * - the native heap/stack/reds registers are saved in P 566 */ 567.handle_trap: 568 li r3, HIPE_MODE_SWITCH_RES_TRAP 569 STORE NSP, P_NSP(P) 570 stw r4, P_NARITY(P) /* Note: narity is a 32-bit field */ 571 STORE TEMP_LR, P_NRA(P) 572 b .nosave_exit 573 574/* 575 * nbif_stack_trap_ra: trap return address for maintaining 576 * the gray/white stack boundary 577 */ 578 GLOBAL(ASYM(nbif_stack_trap_ra)) 579ASYM(nbif_stack_trap_ra): /* a return address, not a function */ 580 # This only handles a single return value. 581 # If we have more, we need to save them in the PCB. 582 mr TEMP_ARG0, r3 /* save retval */ 583 STORE NSP, P_NSP(P) 584 mr r3, P 585 bl CSYM(hipe_handle_stack_trap) /* must not cons */ 586 mtctr r3 /* original RA */ 587 mr r3, TEMP_ARG0 /* restore retval */ 588 bctr /* resume at original RA */ 589 590/* 591 * hipe_ppc_inc_stack 592 * Caller saved its LR in TEMP_LR (== TEMP1) before calling us. 593 */ 594 GLOBAL(ASYM(hipe_ppc_inc_stack)) 595ASYM(hipe_ppc_inc_stack): 596 STORE_ARG_REGS 597 mflr TEMP_ARG0 598 STORE NSP, P_NSP(P) 599 mr r3, P 600 # hipe_inc_nstack reads and writes NSP and NSP_LIMIT, 601 # but does not access LR/RA, HP, or FCALLS. 602 bl CSYM(hipe_inc_nstack) 603 mtlr TEMP_ARG0 604 LOAD NSP, P_NSP(P) 605 LOAD_ARG_REGS 606 blr 607 608#if defined(__linux__) && defined(__ELF__) 609.section .note.GNU-stack,"",%progbits 610#endif 611