1*3cab2bb3Spatrick//===----------------------Hexagon builtin routine ------------------------===//
2*3cab2bb3Spatrick//
3*3cab2bb3Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*3cab2bb3Spatrick// See https://llvm.org/LICENSE.txt for license information.
5*3cab2bb3Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*3cab2bb3Spatrick//
7*3cab2bb3Spatrick//===----------------------------------------------------------------------===//
8*3cab2bb3Spatrick
9*3cab2bb3Spatrick
10*3cab2bb3Spatrick// Functions that implement common sequences in function prologues and epilogues
11*3cab2bb3Spatrick// used to save code size
12*3cab2bb3Spatrick
13*3cab2bb3Spatrick	.macro FUNCTION_BEGIN name
14*3cab2bb3Spatrick	.text
15*3cab2bb3Spatrick	.globl \name
16*3cab2bb3Spatrick	.type  \name, @function
17*3cab2bb3Spatrick	.falign
18*3cab2bb3Spatrick\name:
19*3cab2bb3Spatrick	.endm
20*3cab2bb3Spatrick
21*3cab2bb3Spatrick	.macro FUNCTION_END name
22*3cab2bb3Spatrick	.size  \name, . - \name
23*3cab2bb3Spatrick	.endm
24*3cab2bb3Spatrick
25*3cab2bb3Spatrick	.macro FALLTHROUGH_TAIL_CALL name0 name1
26*3cab2bb3Spatrick	.size \name0, . - \name0
27*3cab2bb3Spatrick	.globl \name1
28*3cab2bb3Spatrick	.type \name1, @function
29*3cab2bb3Spatrick	.falign
30*3cab2bb3Spatrick\name1:
31*3cab2bb3Spatrick	.endm
32*3cab2bb3Spatrick
33*3cab2bb3Spatrick
34*3cab2bb3Spatrick
35*3cab2bb3Spatrick
36*3cab2bb3Spatrick// Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at
37*3cab2bb3Spatrick// fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48.
38*3cab2bb3Spatrick
39*3cab2bb3Spatrick
40*3cab2bb3Spatrick
41*3cab2bb3Spatrick
42*3cab2bb3Spatrick// The compiler knows that the __save_* functions clobber LR.  No other
43*3cab2bb3Spatrick// registers should be used without informing the compiler.
44*3cab2bb3Spatrick
45*3cab2bb3Spatrick// Since we can only issue one store per packet, we don't hurt performance by
46*3cab2bb3Spatrick// simply jumping to the right point in this sequence of stores.
47*3cab2bb3Spatrick
48*3cab2bb3SpatrickFUNCTION_BEGIN __save_r27_through_r16
49*3cab2bb3Spatrick		memd(fp+#-48) = r17:16
50*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18
51*3cab2bb3Spatrick		memd(fp+#-40) = r19:18
52*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20
53*3cab2bb3Spatrick		memd(fp+#-32) = r21:20
54*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22
55*3cab2bb3Spatrick		memd(fp+#-24) = r23:22
56*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24
57*3cab2bb3Spatrick		memd(fp+#-16) = r25:24
58*3cab2bb3Spatrick	{
59*3cab2bb3Spatrick		memd(fp+#-8) = r27:26
60*3cab2bb3Spatrick		jumpr lr
61*3cab2bb3Spatrick	}
62*3cab2bb3SpatrickFUNCTION_END __save_r27_through_r24
63*3cab2bb3Spatrick
64*3cab2bb3Spatrick
65*3cab2bb3Spatrick
66*3cab2bb3Spatrick
67*3cab2bb3Spatrick// For each of the *_before_sibcall functions, jumpr lr is executed in parallel
68*3cab2bb3Spatrick// with deallocframe.  That way, the return gets the old value of lr, which is
69*3cab2bb3Spatrick// where these functions need to return, and at the same time, lr gets the value
70*3cab2bb3Spatrick// it needs going into the sibcall.
71*3cab2bb3Spatrick
72*3cab2bb3SpatrickFUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall
73*3cab2bb3Spatrick	{
74*3cab2bb3Spatrick		r21:20 = memd(fp+#-32)
75*3cab2bb3Spatrick		r23:22 = memd(fp+#-24)
76*3cab2bb3Spatrick	}
77*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall
78*3cab2bb3Spatrick	{
79*3cab2bb3Spatrick		r25:24 = memd(fp+#-16)
80*3cab2bb3Spatrick		jump __restore_r27_through_r26_and_deallocframe_before_sibcall
81*3cab2bb3Spatrick	}
82*3cab2bb3SpatrickFUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall
83*3cab2bb3Spatrick
84*3cab2bb3Spatrick
85*3cab2bb3Spatrick
86*3cab2bb3Spatrick
87*3cab2bb3SpatrickFUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall
88*3cab2bb3Spatrick		r17:16 = memd(fp+#-48)
89*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall
90*3cab2bb3Spatrick	{
91*3cab2bb3Spatrick		r19:18 = memd(fp+#-40)
92*3cab2bb3Spatrick		r21:20 = memd(fp+#-32)
93*3cab2bb3Spatrick	}
94*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall
95*3cab2bb3Spatrick	{
96*3cab2bb3Spatrick		r23:22 = memd(fp+#-24)
97*3cab2bb3Spatrick		r25:24 = memd(fp+#-16)
98*3cab2bb3Spatrick	}
99*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall
100*3cab2bb3Spatrick	{
101*3cab2bb3Spatrick		r27:26 = memd(fp+#-8)
102*3cab2bb3Spatrick		deallocframe
103*3cab2bb3Spatrick		jumpr lr
104*3cab2bb3Spatrick	}
105*3cab2bb3SpatrickFUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall
106*3cab2bb3Spatrick
107*3cab2bb3Spatrick
108*3cab2bb3Spatrick
109*3cab2bb3Spatrick
110*3cab2bb3Spatrick// Here we use the extra load bandwidth to restore LR early, allowing the return
111*3cab2bb3Spatrick// to occur in parallel with the deallocframe.
112*3cab2bb3Spatrick
113*3cab2bb3SpatrickFUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe
114*3cab2bb3Spatrick	{
115*3cab2bb3Spatrick		r17:16 = memd(fp+#-48)
116*3cab2bb3Spatrick		r19:18 = memd(fp+#-40)
117*3cab2bb3Spatrick	}
118*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe
119*3cab2bb3Spatrick	{
120*3cab2bb3Spatrick		r21:20 = memd(fp+#-32)
121*3cab2bb3Spatrick		r23:22 = memd(fp+#-24)
122*3cab2bb3Spatrick	}
123*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe
124*3cab2bb3Spatrick	{
125*3cab2bb3Spatrick		lr = memw(fp+#4)
126*3cab2bb3Spatrick		r25:24 = memd(fp+#-16)
127*3cab2bb3Spatrick	}
128*3cab2bb3Spatrick	{
129*3cab2bb3Spatrick		r27:26 = memd(fp+#-8)
130*3cab2bb3Spatrick		deallocframe
131*3cab2bb3Spatrick		jumpr lr
132*3cab2bb3Spatrick	}
133*3cab2bb3SpatrickFUNCTION_END __restore_r27_through_r24_and_deallocframe
134*3cab2bb3Spatrick
135*3cab2bb3Spatrick
136*3cab2bb3Spatrick
137*3cab2bb3Spatrick
138*3cab2bb3Spatrick// Here the load bandwidth is maximized for all three functions.
139*3cab2bb3Spatrick
140*3cab2bb3SpatrickFUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe
141*3cab2bb3Spatrick	{
142*3cab2bb3Spatrick		r19:18 = memd(fp+#-40)
143*3cab2bb3Spatrick		r21:20 = memd(fp+#-32)
144*3cab2bb3Spatrick	}
145*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe
146*3cab2bb3Spatrick	{
147*3cab2bb3Spatrick		r23:22 = memd(fp+#-24)
148*3cab2bb3Spatrick		r25:24 = memd(fp+#-16)
149*3cab2bb3Spatrick	}
150*3cab2bb3SpatrickFALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe
151*3cab2bb3Spatrick	{
152*3cab2bb3Spatrick		r27:26 = memd(fp+#-8)
153*3cab2bb3Spatrick		deallocframe
154*3cab2bb3Spatrick	}
155*3cab2bb3Spatrick		jumpr lr
156*3cab2bb3SpatrickFUNCTION_END __restore_r27_through_r26_and_deallocframe
157