1//===----------------------Hexagon builtin routine ------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9 10// Functions that implement common sequences in function prologues and epilogues 11// used to save code size 12 13 .macro FUNCTION_BEGIN name 14 .text 15 .globl \name 16 .type \name, @function 17 .falign 18\name: 19 .endm 20 21 .macro FUNCTION_END name 22 .size \name, . - \name 23 .endm 24 25 .macro FALLTHROUGH_TAIL_CALL name0 name1 26 .size \name0, . - \name0 27 .globl \name1 28 .type \name1, @function 29 .falign 30\name1: 31 .endm 32 33 34 35 36// Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at 37// fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48. 38 39 40 41 42// The compiler knows that the __save_* functions clobber LR. No other 43// registers should be used without informing the compiler. 44 45// Since we can only issue one store per packet, we don't hurt performance by 46// simply jumping to the right point in this sequence of stores. 47 48FUNCTION_BEGIN __save_r27_through_r16 49 memd(fp+#-48) = r17:16 50FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18 51 memd(fp+#-40) = r19:18 52FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20 53 memd(fp+#-32) = r21:20 54FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22 55 memd(fp+#-24) = r23:22 56FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24 57 memd(fp+#-16) = r25:24 58 { 59 memd(fp+#-8) = r27:26 60 jumpr lr 61 } 62FUNCTION_END __save_r27_through_r24 63 64 65 66 67// For each of the *_before_sibcall functions, jumpr lr is executed in parallel 68// with deallocframe. That way, the return gets the old value of lr, which is 69// where these functions need to return, and at the same time, lr gets the value 70// it needs going into the sibcall. 71 72FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall 73 { 74 r21:20 = memd(fp+#-32) 75 r23:22 = memd(fp+#-24) 76 } 77FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall 78 { 79 r25:24 = memd(fp+#-16) 80 jump __restore_r27_through_r26_and_deallocframe_before_sibcall 81 } 82FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall 83 84 85 86 87FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall 88 r17:16 = memd(fp+#-48) 89FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall 90 { 91 r19:18 = memd(fp+#-40) 92 r21:20 = memd(fp+#-32) 93 } 94FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall 95 { 96 r23:22 = memd(fp+#-24) 97 r25:24 = memd(fp+#-16) 98 } 99FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall 100 { 101 r27:26 = memd(fp+#-8) 102 deallocframe 103 jumpr lr 104 } 105FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall 106 107 108 109 110// Here we use the extra load bandwidth to restore LR early, allowing the return 111// to occur in parallel with the deallocframe. 112 113FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe 114 { 115 r17:16 = memd(fp+#-48) 116 r19:18 = memd(fp+#-40) 117 } 118FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe 119 { 120 r21:20 = memd(fp+#-32) 121 r23:22 = memd(fp+#-24) 122 } 123FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe 124 { 125 lr = memw(fp+#4) 126 r25:24 = memd(fp+#-16) 127 } 128 { 129 r27:26 = memd(fp+#-8) 130 deallocframe 131 jumpr lr 132 } 133FUNCTION_END __restore_r27_through_r24_and_deallocframe 134 135 136 137 138// Here the load bandwidth is maximized for all three functions. 139 140FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe 141 { 142 r19:18 = memd(fp+#-40) 143 r21:20 = memd(fp+#-32) 144 } 145FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe 146 { 147 r23:22 = memd(fp+#-24) 148 r25:24 = memd(fp+#-16) 149 } 150FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe 151 { 152 r27:26 = memd(fp+#-8) 153 deallocframe 154 } 155 jumpr lr 156FUNCTION_END __restore_r27_through_r26_and_deallocframe 157