1//===----------------------Hexagon builtin routine ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9
10// Functions that implement common sequences in function prologues and epilogues
11// used to save code size
12
13	.macro FUNCTION_BEGIN name
14	.text
15	.globl \name
16	.type  \name, @function
17	.falign
18\name:
19	.endm
20
21	.macro FUNCTION_END name
22	.size  \name, . - \name
23	.endm
24
25	.macro FALLTHROUGH_TAIL_CALL name0 name1
26	.size \name0, . - \name0
27	.globl \name1
28	.type \name1, @function
29	.falign
30\name1:
31	.endm
32
33
34
35
36// Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at
37// fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48.
38
39
40
41
42// The compiler knows that the __save_* functions clobber LR.  No other
43// registers should be used without informing the compiler.
44
45// Since we can only issue one store per packet, we don't hurt performance by
46// simply jumping to the right point in this sequence of stores.
47
48FUNCTION_BEGIN __save_r27_through_r16
49		memd(fp+#-48) = r17:16
50FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18
51		memd(fp+#-40) = r19:18
52FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20
53		memd(fp+#-32) = r21:20
54FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22
55		memd(fp+#-24) = r23:22
56FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24
57		memd(fp+#-16) = r25:24
58	{
59		memd(fp+#-8) = r27:26
60		jumpr lr
61	}
62FUNCTION_END __save_r27_through_r24
63
64
65
66
67// For each of the *_before_sibcall functions, jumpr lr is executed in parallel
68// with deallocframe.  That way, the return gets the old value of lr, which is
69// where these functions need to return, and at the same time, lr gets the value
70// it needs going into the sibcall.
71
72FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall
73	{
74		r21:20 = memd(fp+#-32)
75		r23:22 = memd(fp+#-24)
76	}
77FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall
78	{
79		r25:24 = memd(fp+#-16)
80		jump __restore_r27_through_r26_and_deallocframe_before_sibcall
81	}
82FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall
83
84
85
86
87FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall
88		r17:16 = memd(fp+#-48)
89FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall
90	{
91		r19:18 = memd(fp+#-40)
92		r21:20 = memd(fp+#-32)
93	}
94FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall
95	{
96		r23:22 = memd(fp+#-24)
97		r25:24 = memd(fp+#-16)
98	}
99FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall
100	{
101		r27:26 = memd(fp+#-8)
102		deallocframe
103		jumpr lr
104	}
105FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall
106
107
108
109
110// Here we use the extra load bandwidth to restore LR early, allowing the return
111// to occur in parallel with the deallocframe.
112
113FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe
114	{
115		r17:16 = memd(fp+#-48)
116		r19:18 = memd(fp+#-40)
117	}
118FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe
119	{
120		r21:20 = memd(fp+#-32)
121		r23:22 = memd(fp+#-24)
122	}
123FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe
124	{
125		lr = memw(fp+#4)
126		r25:24 = memd(fp+#-16)
127	}
128	{
129		r27:26 = memd(fp+#-8)
130		deallocframe
131		jumpr lr
132	}
133FUNCTION_END __restore_r27_through_r24_and_deallocframe
134
135
136
137
138// Here the load bandwidth is maximized for all three functions.
139
140FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe
141	{
142		r19:18 = memd(fp+#-40)
143		r21:20 = memd(fp+#-32)
144	}
145FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe
146	{
147		r23:22 = memd(fp+#-24)
148		r25:24 = memd(fp+#-16)
149	}
150FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe
151	{
152		r27:26 = memd(fp+#-8)
153		deallocframe
154	}
155		jumpr lr
156FUNCTION_END __restore_r27_through_r26_and_deallocframe
157