1*3cab2bb3Spatrick//===----------------------Hexagon builtin routine ------------------------===// 2*3cab2bb3Spatrick// 3*3cab2bb3Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*3cab2bb3Spatrick// See https://llvm.org/LICENSE.txt for license information. 5*3cab2bb3Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*3cab2bb3Spatrick// 7*3cab2bb3Spatrick//===----------------------------------------------------------------------===// 8*3cab2bb3Spatrick 9*3cab2bb3Spatrick 10*3cab2bb3Spatrick// Functions that implement common sequences in function prologues and epilogues 11*3cab2bb3Spatrick// used to save code size 12*3cab2bb3Spatrick 13*3cab2bb3Spatrick .macro FUNCTION_BEGIN name 14*3cab2bb3Spatrick .text 15*3cab2bb3Spatrick .globl \name 16*3cab2bb3Spatrick .type \name, @function 17*3cab2bb3Spatrick .falign 18*3cab2bb3Spatrick\name: 19*3cab2bb3Spatrick .endm 20*3cab2bb3Spatrick 21*3cab2bb3Spatrick .macro FUNCTION_END name 22*3cab2bb3Spatrick .size \name, . - \name 23*3cab2bb3Spatrick .endm 24*3cab2bb3Spatrick 25*3cab2bb3Spatrick .macro FALLTHROUGH_TAIL_CALL name0 name1 26*3cab2bb3Spatrick .size \name0, . - \name0 27*3cab2bb3Spatrick .globl \name1 28*3cab2bb3Spatrick .type \name1, @function 29*3cab2bb3Spatrick .falign 30*3cab2bb3Spatrick\name1: 31*3cab2bb3Spatrick .endm 32*3cab2bb3Spatrick 33*3cab2bb3Spatrick 34*3cab2bb3Spatrick 35*3cab2bb3Spatrick 36*3cab2bb3Spatrick// Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at 37*3cab2bb3Spatrick// fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48. 38*3cab2bb3Spatrick 39*3cab2bb3Spatrick 40*3cab2bb3Spatrick 41*3cab2bb3Spatrick 42*3cab2bb3Spatrick// The compiler knows that the __save_* functions clobber LR. No other 43*3cab2bb3Spatrick// registers should be used without informing the compiler. 44*3cab2bb3Spatrick 45*3cab2bb3Spatrick// Since we can only issue one store per packet, we don't hurt performance by 46*3cab2bb3Spatrick// simply jumping to the right point in this sequence of stores. 47*3cab2bb3Spatrick 48*3cab2bb3SpatrickFUNCTION_BEGIN __save_r27_through_r16 49*3cab2bb3Spatrick memd(fp+#-48) = r17:16 50*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18 51*3cab2bb3Spatrick memd(fp+#-40) = r19:18 52*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20 53*3cab2bb3Spatrick memd(fp+#-32) = r21:20 54*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22 55*3cab2bb3Spatrick memd(fp+#-24) = r23:22 56*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24 57*3cab2bb3Spatrick memd(fp+#-16) = r25:24 58*3cab2bb3Spatrick { 59*3cab2bb3Spatrick memd(fp+#-8) = r27:26 60*3cab2bb3Spatrick jumpr lr 61*3cab2bb3Spatrick } 62*3cab2bb3SpatrickFUNCTION_END __save_r27_through_r24 63*3cab2bb3Spatrick 64*3cab2bb3Spatrick 65*3cab2bb3Spatrick 66*3cab2bb3Spatrick 67*3cab2bb3Spatrick// For each of the *_before_sibcall functions, jumpr lr is executed in parallel 68*3cab2bb3Spatrick// with deallocframe. That way, the return gets the old value of lr, which is 69*3cab2bb3Spatrick// where these functions need to return, and at the same time, lr gets the value 70*3cab2bb3Spatrick// it needs going into the sibcall. 71*3cab2bb3Spatrick 72*3cab2bb3SpatrickFUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall 73*3cab2bb3Spatrick { 74*3cab2bb3Spatrick r21:20 = memd(fp+#-32) 75*3cab2bb3Spatrick r23:22 = memd(fp+#-24) 76*3cab2bb3Spatrick } 77*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall 78*3cab2bb3Spatrick { 79*3cab2bb3Spatrick r25:24 = memd(fp+#-16) 80*3cab2bb3Spatrick jump __restore_r27_through_r26_and_deallocframe_before_sibcall 81*3cab2bb3Spatrick } 82*3cab2bb3SpatrickFUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall 83*3cab2bb3Spatrick 84*3cab2bb3Spatrick 85*3cab2bb3Spatrick 86*3cab2bb3Spatrick 87*3cab2bb3SpatrickFUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall 88*3cab2bb3Spatrick r17:16 = memd(fp+#-48) 89*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall 90*3cab2bb3Spatrick { 91*3cab2bb3Spatrick r19:18 = memd(fp+#-40) 92*3cab2bb3Spatrick r21:20 = memd(fp+#-32) 93*3cab2bb3Spatrick } 94*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall 95*3cab2bb3Spatrick { 96*3cab2bb3Spatrick r23:22 = memd(fp+#-24) 97*3cab2bb3Spatrick r25:24 = memd(fp+#-16) 98*3cab2bb3Spatrick } 99*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall 100*3cab2bb3Spatrick { 101*3cab2bb3Spatrick r27:26 = memd(fp+#-8) 102*3cab2bb3Spatrick deallocframe 103*3cab2bb3Spatrick jumpr lr 104*3cab2bb3Spatrick } 105*3cab2bb3SpatrickFUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall 106*3cab2bb3Spatrick 107*3cab2bb3Spatrick 108*3cab2bb3Spatrick 109*3cab2bb3Spatrick 110*3cab2bb3Spatrick// Here we use the extra load bandwidth to restore LR early, allowing the return 111*3cab2bb3Spatrick// to occur in parallel with the deallocframe. 112*3cab2bb3Spatrick 113*3cab2bb3SpatrickFUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe 114*3cab2bb3Spatrick { 115*3cab2bb3Spatrick r17:16 = memd(fp+#-48) 116*3cab2bb3Spatrick r19:18 = memd(fp+#-40) 117*3cab2bb3Spatrick } 118*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe 119*3cab2bb3Spatrick { 120*3cab2bb3Spatrick r21:20 = memd(fp+#-32) 121*3cab2bb3Spatrick r23:22 = memd(fp+#-24) 122*3cab2bb3Spatrick } 123*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe 124*3cab2bb3Spatrick { 125*3cab2bb3Spatrick lr = memw(fp+#4) 126*3cab2bb3Spatrick r25:24 = memd(fp+#-16) 127*3cab2bb3Spatrick } 128*3cab2bb3Spatrick { 129*3cab2bb3Spatrick r27:26 = memd(fp+#-8) 130*3cab2bb3Spatrick deallocframe 131*3cab2bb3Spatrick jumpr lr 132*3cab2bb3Spatrick } 133*3cab2bb3SpatrickFUNCTION_END __restore_r27_through_r24_and_deallocframe 134*3cab2bb3Spatrick 135*3cab2bb3Spatrick 136*3cab2bb3Spatrick 137*3cab2bb3Spatrick 138*3cab2bb3Spatrick// Here the load bandwidth is maximized for all three functions. 139*3cab2bb3Spatrick 140*3cab2bb3SpatrickFUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe 141*3cab2bb3Spatrick { 142*3cab2bb3Spatrick r19:18 = memd(fp+#-40) 143*3cab2bb3Spatrick r21:20 = memd(fp+#-32) 144*3cab2bb3Spatrick } 145*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe 146*3cab2bb3Spatrick { 147*3cab2bb3Spatrick r23:22 = memd(fp+#-24) 148*3cab2bb3Spatrick r25:24 = memd(fp+#-16) 149*3cab2bb3Spatrick } 150*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe 151*3cab2bb3Spatrick { 152*3cab2bb3Spatrick r27:26 = memd(fp+#-8) 153*3cab2bb3Spatrick deallocframe 154*3cab2bb3Spatrick } 155*3cab2bb3Spatrick jumpr lr 156*3cab2bb3SpatrickFUNCTION_END __restore_r27_through_r26_and_deallocframe 157