1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
2; RUN: llc -march=amdgcn -mcpu=tonga -misched=gcn-ilp -verify-machineinstrs < %s | FileCheck %s
3
4; We expect a three digit VGPR usage here since only one wave requested.
5; CHECK: NumVgprs: {{[0-9][0-9][0-9]$}}
6
7define amdgpu_kernel void @load_fma_store(float addrspace(3)* nocapture readonly %arg, float addrspace(1)* nocapture %arg1) #1 {
8bb:
9  %tmp = getelementptr inbounds float, float addrspace(3)* %arg, i32 1
10  %tmp2 = load float, float addrspace(3)* %tmp, align 4
11  %tmp3 = getelementptr inbounds float, float addrspace(3)* %arg, i32 2
12  %tmp4 = load float, float addrspace(3)* %tmp3, align 4
13  %tmp5 = getelementptr inbounds float, float addrspace(3)* %arg, i32 3
14  %tmp6 = load float, float addrspace(3)* %tmp5, align 4
15  %tmp7 = tail call float @llvm.fmuladd.f32(float %tmp2, float %tmp4, float %tmp6)
16  %tmp8 = getelementptr inbounds float, float addrspace(3)* %arg, i32 5
17  %tmp9 = load float, float addrspace(3)* %tmp8, align 4
18  %tmp10 = getelementptr inbounds float, float addrspace(3)* %arg, i32 6
19  %tmp11 = load float, float addrspace(3)* %tmp10, align 4
20  %tmp12 = getelementptr inbounds float, float addrspace(3)* %arg, i32 7
21  %tmp13 = load float, float addrspace(3)* %tmp12, align 4
22  %tmp14 = tail call float @llvm.fmuladd.f32(float %tmp9, float %tmp11, float %tmp13)
23  %tmp15 = getelementptr inbounds float, float addrspace(3)* %arg, i32 9
24  %tmp16 = load float, float addrspace(3)* %tmp15, align 4
25  %tmp17 = getelementptr inbounds float, float addrspace(3)* %arg, i32 10
26  %tmp18 = load float, float addrspace(3)* %tmp17, align 4
27  %tmp19 = getelementptr inbounds float, float addrspace(3)* %arg, i32 11
28  %tmp20 = load float, float addrspace(3)* %tmp19, align 4
29  %tmp21 = tail call float @llvm.fmuladd.f32(float %tmp16, float %tmp18, float %tmp20)
30  %tmp22 = getelementptr inbounds float, float addrspace(3)* %arg, i32 13
31  %tmp23 = load float, float addrspace(3)* %tmp22, align 4
32  %tmp24 = getelementptr inbounds float, float addrspace(3)* %arg, i32 14
33  %tmp25 = load float, float addrspace(3)* %tmp24, align 4
34  %tmp26 = getelementptr inbounds float, float addrspace(3)* %arg, i32 15
35  %tmp27 = load float, float addrspace(3)* %tmp26, align 4
36  %tmp28 = tail call float @llvm.fmuladd.f32(float %tmp23, float %tmp25, float %tmp27)
37  %tmp29 = getelementptr inbounds float, float addrspace(3)* %arg, i32 17
38  %tmp30 = load float, float addrspace(3)* %tmp29, align 4
39  %tmp31 = getelementptr inbounds float, float addrspace(3)* %arg, i32 18
40  %tmp32 = load float, float addrspace(3)* %tmp31, align 4
41  %tmp33 = getelementptr inbounds float, float addrspace(3)* %arg, i32 19
42  %tmp34 = load float, float addrspace(3)* %tmp33, align 4
43  %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp30, float %tmp32, float %tmp34)
44  %tmp36 = getelementptr inbounds float, float addrspace(3)* %arg, i32 21
45  %tmp37 = load float, float addrspace(3)* %tmp36, align 4
46  %tmp38 = getelementptr inbounds float, float addrspace(3)* %arg, i32 22
47  %tmp39 = load float, float addrspace(3)* %tmp38, align 4
48  %tmp40 = getelementptr inbounds float, float addrspace(3)* %arg, i32 23
49  %tmp41 = load float, float addrspace(3)* %tmp40, align 4
50  %tmp42 = tail call float @llvm.fmuladd.f32(float %tmp37, float %tmp39, float %tmp41)
51  %tmp43 = getelementptr inbounds float, float addrspace(3)* %arg, i32 25
52  %tmp44 = load float, float addrspace(3)* %tmp43, align 4
53  %tmp45 = getelementptr inbounds float, float addrspace(3)* %arg, i32 26
54  %tmp46 = load float, float addrspace(3)* %tmp45, align 4
55  %tmp47 = getelementptr inbounds float, float addrspace(3)* %arg, i32 27
56  %tmp48 = load float, float addrspace(3)* %tmp47, align 4
57  %tmp49 = tail call float @llvm.fmuladd.f32(float %tmp44, float %tmp46, float %tmp48)
58  %tmp50 = getelementptr inbounds float, float addrspace(3)* %arg, i32 29
59  %tmp51 = load float, float addrspace(3)* %tmp50, align 4
60  %tmp52 = getelementptr inbounds float, float addrspace(3)* %arg, i32 30
61  %tmp53 = load float, float addrspace(3)* %tmp52, align 4
62  %tmp54 = getelementptr inbounds float, float addrspace(3)* %arg, i32 31
63  %tmp55 = load float, float addrspace(3)* %tmp54, align 4
64  %tmp56 = tail call float @llvm.fmuladd.f32(float %tmp51, float %tmp53, float %tmp55)
65  %tmp57 = getelementptr inbounds float, float addrspace(3)* %arg, i32 33
66  %tmp58 = load float, float addrspace(3)* %tmp57, align 4
67  %tmp59 = getelementptr inbounds float, float addrspace(3)* %arg, i32 34
68  %tmp60 = load float, float addrspace(3)* %tmp59, align 4
69  %tmp61 = getelementptr inbounds float, float addrspace(3)* %arg, i32 35
70  %tmp62 = load float, float addrspace(3)* %tmp61, align 4
71  %tmp63 = tail call float @llvm.fmuladd.f32(float %tmp58, float %tmp60, float %tmp62)
72  %tmp64 = getelementptr inbounds float, float addrspace(3)* %arg, i32 37
73  %tmp65 = load float, float addrspace(3)* %tmp64, align 4
74  %tmp66 = getelementptr inbounds float, float addrspace(3)* %arg, i32 38
75  %tmp67 = load float, float addrspace(3)* %tmp66, align 4
76  %tmp68 = getelementptr inbounds float, float addrspace(3)* %arg, i32 39
77  %tmp69 = load float, float addrspace(3)* %tmp68, align 4
78  %tmp70 = tail call float @llvm.fmuladd.f32(float %tmp65, float %tmp67, float %tmp69)
79  %tmp71 = getelementptr inbounds float, float addrspace(3)* %arg, i32 41
80  %tmp72 = load float, float addrspace(3)* %tmp71, align 4
81  %tmp73 = getelementptr inbounds float, float addrspace(3)* %arg, i32 42
82  %tmp74 = load float, float addrspace(3)* %tmp73, align 4
83  %tmp75 = getelementptr inbounds float, float addrspace(3)* %arg, i32 43
84  %tmp76 = load float, float addrspace(3)* %tmp75, align 4
85  %tmp77 = tail call float @llvm.fmuladd.f32(float %tmp72, float %tmp74, float %tmp76)
86  %tmp78 = getelementptr inbounds float, float addrspace(3)* %arg, i32 45
87  %tmp79 = load float, float addrspace(3)* %tmp78, align 4
88  %tmp80 = getelementptr inbounds float, float addrspace(3)* %arg, i32 46
89  %tmp81 = load float, float addrspace(3)* %tmp80, align 4
90  %tmp82 = getelementptr inbounds float, float addrspace(3)* %arg, i32 47
91  %tmp83 = load float, float addrspace(3)* %tmp82, align 4
92  %tmp84 = tail call float @llvm.fmuladd.f32(float %tmp79, float %tmp81, float %tmp83)
93  %tmp85 = getelementptr inbounds float, float addrspace(3)* %arg, i32 49
94  %tmp86 = load float, float addrspace(3)* %tmp85, align 4
95  %tmp87 = getelementptr inbounds float, float addrspace(3)* %arg, i32 50
96  %tmp88 = load float, float addrspace(3)* %tmp87, align 4
97  %tmp89 = getelementptr inbounds float, float addrspace(3)* %arg, i32 51
98  %tmp90 = load float, float addrspace(3)* %tmp89, align 4
99  %tmp91 = tail call float @llvm.fmuladd.f32(float %tmp86, float %tmp88, float %tmp90)
100  %tmp92 = getelementptr inbounds float, float addrspace(3)* %arg, i32 53
101  %tmp93 = load float, float addrspace(3)* %tmp92, align 4
102  %tmp94 = getelementptr inbounds float, float addrspace(3)* %arg, i32 54
103  %tmp95 = load float, float addrspace(3)* %tmp94, align 4
104  %tmp96 = getelementptr inbounds float, float addrspace(3)* %arg, i32 55
105  %tmp97 = load float, float addrspace(3)* %tmp96, align 4
106  %tmp98 = tail call float @llvm.fmuladd.f32(float %tmp93, float %tmp95, float %tmp97)
107  %tmp99 = getelementptr inbounds float, float addrspace(3)* %arg, i32 57
108  %tmp100 = load float, float addrspace(3)* %tmp99, align 4
109  %tmp101 = getelementptr inbounds float, float addrspace(3)* %arg, i32 58
110  %tmp102 = load float, float addrspace(3)* %tmp101, align 4
111  %tmp103 = getelementptr inbounds float, float addrspace(3)* %arg, i32 59
112  %tmp104 = load float, float addrspace(3)* %tmp103, align 4
113  %tmp105 = tail call float @llvm.fmuladd.f32(float %tmp100, float %tmp102, float %tmp104)
114  %tmp106 = getelementptr inbounds float, float addrspace(3)* %arg, i32 61
115  %tmp107 = load float, float addrspace(3)* %tmp106, align 4
116  %tmp108 = getelementptr inbounds float, float addrspace(3)* %arg, i32 62
117  %tmp109 = load float, float addrspace(3)* %tmp108, align 4
118  %tmp110 = getelementptr inbounds float, float addrspace(3)* %arg, i32 63
119  %tmp111 = load float, float addrspace(3)* %tmp110, align 4
120  %tmp112 = tail call float @llvm.fmuladd.f32(float %tmp107, float %tmp109, float %tmp111)
121  %tmp113 = getelementptr inbounds float, float addrspace(3)* %arg, i32 65
122  %tmp114 = load float, float addrspace(3)* %tmp113, align 4
123  %tmp115 = getelementptr inbounds float, float addrspace(3)* %arg, i32 66
124  %tmp116 = load float, float addrspace(3)* %tmp115, align 4
125  %tmp117 = getelementptr inbounds float, float addrspace(3)* %arg, i32 67
126  %tmp118 = load float, float addrspace(3)* %tmp117, align 4
127  %tmp119 = tail call float @llvm.fmuladd.f32(float %tmp114, float %tmp116, float %tmp118)
128  %tmp120 = getelementptr inbounds float, float addrspace(3)* %arg, i32 69
129  %tmp121 = load float, float addrspace(3)* %tmp120, align 4
130  %tmp122 = getelementptr inbounds float, float addrspace(3)* %arg, i32 70
131  %tmp123 = load float, float addrspace(3)* %tmp122, align 4
132  %tmp124 = getelementptr inbounds float, float addrspace(3)* %arg, i32 71
133  %tmp125 = load float, float addrspace(3)* %tmp124, align 4
134  %tmp126 = tail call float @llvm.fmuladd.f32(float %tmp121, float %tmp123, float %tmp125)
135  %tmp127 = getelementptr inbounds float, float addrspace(3)* %arg, i32 73
136  %tmp128 = load float, float addrspace(3)* %tmp127, align 4
137  %tmp129 = getelementptr inbounds float, float addrspace(3)* %arg, i32 74
138  %tmp130 = load float, float addrspace(3)* %tmp129, align 4
139  %tmp131 = getelementptr inbounds float, float addrspace(3)* %arg, i32 75
140  %tmp132 = load float, float addrspace(3)* %tmp131, align 4
141  %tmp133 = tail call float @llvm.fmuladd.f32(float %tmp128, float %tmp130, float %tmp132)
142  %tmp134 = getelementptr inbounds float, float addrspace(3)* %arg, i32 77
143  %tmp135 = load float, float addrspace(3)* %tmp134, align 4
144  %tmp136 = getelementptr inbounds float, float addrspace(3)* %arg, i32 78
145  %tmp137 = load float, float addrspace(3)* %tmp136, align 4
146  %tmp138 = getelementptr inbounds float, float addrspace(3)* %arg, i32 79
147  %tmp139 = load float, float addrspace(3)* %tmp138, align 4
148  %tmp140 = tail call float @llvm.fmuladd.f32(float %tmp135, float %tmp137, float %tmp139)
149  %tmp141 = getelementptr inbounds float, float addrspace(3)* %arg, i32 81
150  %tmp142 = load float, float addrspace(3)* %tmp141, align 4
151  %tmp143 = getelementptr inbounds float, float addrspace(3)* %arg, i32 82
152  %tmp144 = load float, float addrspace(3)* %tmp143, align 4
153  %tmp145 = getelementptr inbounds float, float addrspace(3)* %arg, i32 83
154  %tmp146 = load float, float addrspace(3)* %tmp145, align 4
155  %tmp147 = tail call float @llvm.fmuladd.f32(float %tmp142, float %tmp144, float %tmp146)
156  %tmp148 = getelementptr inbounds float, float addrspace(3)* %arg, i32 85
157  %tmp149 = load float, float addrspace(3)* %tmp148, align 4
158  %tmp150 = getelementptr inbounds float, float addrspace(3)* %arg, i32 86
159  %tmp151 = load float, float addrspace(3)* %tmp150, align 4
160  %tmp152 = getelementptr inbounds float, float addrspace(3)* %arg, i32 87
161  %tmp153 = load float, float addrspace(3)* %tmp152, align 4
162  %tmp154 = tail call float @llvm.fmuladd.f32(float %tmp149, float %tmp151, float %tmp153)
163  %tmp155 = getelementptr inbounds float, float addrspace(3)* %arg, i32 89
164  %tmp156 = load float, float addrspace(3)* %tmp155, align 4
165  %tmp157 = getelementptr inbounds float, float addrspace(3)* %arg, i32 90
166  %tmp158 = load float, float addrspace(3)* %tmp157, align 4
167  %tmp159 = getelementptr inbounds float, float addrspace(3)* %arg, i32 91
168  %tmp160 = load float, float addrspace(3)* %tmp159, align 4
169  %tmp161 = tail call float @llvm.fmuladd.f32(float %tmp156, float %tmp158, float %tmp160)
170  %tmp162 = getelementptr inbounds float, float addrspace(3)* %arg, i32 93
171  %tmp163 = load float, float addrspace(3)* %tmp162, align 4
172  %tmp164 = getelementptr inbounds float, float addrspace(3)* %arg, i32 94
173  %tmp165 = load float, float addrspace(3)* %tmp164, align 4
174  %tmp166 = getelementptr inbounds float, float addrspace(3)* %arg, i32 95
175  %tmp167 = load float, float addrspace(3)* %tmp166, align 4
176  %tmp168 = tail call float @llvm.fmuladd.f32(float %tmp163, float %tmp165, float %tmp167)
177  %tmp169 = getelementptr inbounds float, float addrspace(3)* %arg, i32 97
178  %tmp170 = load float, float addrspace(3)* %tmp169, align 4
179  %tmp171 = getelementptr inbounds float, float addrspace(3)* %arg, i32 98
180  %tmp172 = load float, float addrspace(3)* %tmp171, align 4
181  %tmp173 = getelementptr inbounds float, float addrspace(3)* %arg, i32 99
182  %tmp174 = load float, float addrspace(3)* %tmp173, align 4
183  %tmp175 = tail call float @llvm.fmuladd.f32(float %tmp170, float %tmp172, float %tmp174)
184  %tmp176 = getelementptr inbounds float, float addrspace(3)* %arg, i32 101
185  %tmp177 = load float, float addrspace(3)* %tmp176, align 4
186  %tmp178 = getelementptr inbounds float, float addrspace(3)* %arg, i32 102
187  %tmp179 = load float, float addrspace(3)* %tmp178, align 4
188  %tmp180 = getelementptr inbounds float, float addrspace(3)* %arg, i32 103
189  %tmp181 = load float, float addrspace(3)* %tmp180, align 4
190  %tmp182 = tail call float @llvm.fmuladd.f32(float %tmp177, float %tmp179, float %tmp181)
191  %tmp183 = getelementptr inbounds float, float addrspace(3)* %arg, i32 105
192  %tmp184 = load float, float addrspace(3)* %tmp183, align 4
193  %tmp185 = getelementptr inbounds float, float addrspace(3)* %arg, i32 106
194  %tmp186 = load float, float addrspace(3)* %tmp185, align 4
195  %tmp187 = getelementptr inbounds float, float addrspace(3)* %arg, i32 107
196  %tmp188 = load float, float addrspace(3)* %tmp187, align 4
197  %tmp189 = tail call float @llvm.fmuladd.f32(float %tmp184, float %tmp186, float %tmp188)
198  %tmp190 = getelementptr inbounds float, float addrspace(3)* %arg, i32 109
199  %tmp191 = load float, float addrspace(3)* %tmp190, align 4
200  %tmp192 = getelementptr inbounds float, float addrspace(3)* %arg, i32 110
201  %tmp193 = load float, float addrspace(3)* %tmp192, align 4
202  %tmp194 = getelementptr inbounds float, float addrspace(3)* %arg, i32 111
203  %tmp195 = load float, float addrspace(3)* %tmp194, align 4
204  %tmp196 = tail call float @llvm.fmuladd.f32(float %tmp191, float %tmp193, float %tmp195)
205  %tmp197 = getelementptr inbounds float, float addrspace(3)* %arg, i32 113
206  %tmp198 = load float, float addrspace(3)* %tmp197, align 4
207  %tmp199 = getelementptr inbounds float, float addrspace(3)* %arg, i32 114
208  %tmp200 = load float, float addrspace(3)* %tmp199, align 4
209  %tmp201 = getelementptr inbounds float, float addrspace(3)* %arg, i32 115
210  %tmp202 = load float, float addrspace(3)* %tmp201, align 4
211  %tmp203 = tail call float @llvm.fmuladd.f32(float %tmp198, float %tmp200, float %tmp202)
212  %tmp204 = getelementptr inbounds float, float addrspace(3)* %arg, i32 117
213  %tmp205 = load float, float addrspace(3)* %tmp204, align 4
214  %tmp206 = getelementptr inbounds float, float addrspace(3)* %arg, i32 118
215  %tmp207 = load float, float addrspace(3)* %tmp206, align 4
216  %tmp208 = getelementptr inbounds float, float addrspace(3)* %arg, i32 119
217  %tmp209 = load float, float addrspace(3)* %tmp208, align 4
218  %tmp210 = tail call float @llvm.fmuladd.f32(float %tmp205, float %tmp207, float %tmp209)
219  %tmp211 = getelementptr inbounds float, float addrspace(3)* %arg, i32 121
220  %tmp212 = load float, float addrspace(3)* %tmp211, align 4
221  %tmp213 = getelementptr inbounds float, float addrspace(3)* %arg, i32 122
222  %tmp214 = load float, float addrspace(3)* %tmp213, align 4
223  %tmp215 = getelementptr inbounds float, float addrspace(3)* %arg, i32 123
224  %tmp216 = load float, float addrspace(3)* %tmp215, align 4
225  %tmp217 = tail call float @llvm.fmuladd.f32(float %tmp212, float %tmp214, float %tmp216)
226  %tmp218 = getelementptr inbounds float, float addrspace(3)* %arg, i32 125
227  %tmp219 = load float, float addrspace(3)* %tmp218, align 4
228  %tmp220 = getelementptr inbounds float, float addrspace(3)* %arg, i32 126
229  %tmp221 = load float, float addrspace(3)* %tmp220, align 4
230  %tmp222 = getelementptr inbounds float, float addrspace(3)* %arg, i32 127
231  %tmp223 = load float, float addrspace(3)* %tmp222, align 4
232  %tmp224 = tail call float @llvm.fmuladd.f32(float %tmp219, float %tmp221, float %tmp223)
233  %tmp225 = getelementptr inbounds float, float addrspace(3)* %arg, i32 129
234  %tmp226 = load float, float addrspace(3)* %tmp225, align 4
235  %tmp227 = getelementptr inbounds float, float addrspace(3)* %arg, i32 130
236  %tmp228 = load float, float addrspace(3)* %tmp227, align 4
237  %tmp229 = getelementptr inbounds float, float addrspace(3)* %arg, i32 131
238  %tmp230 = load float, float addrspace(3)* %tmp229, align 4
239  %tmp231 = tail call float @llvm.fmuladd.f32(float %tmp226, float %tmp228, float %tmp230)
240  %tmp232 = getelementptr inbounds float, float addrspace(3)* %arg, i32 133
241  %tmp233 = load float, float addrspace(3)* %tmp232, align 4
242  %tmp234 = getelementptr inbounds float, float addrspace(3)* %arg, i32 134
243  %tmp235 = load float, float addrspace(3)* %tmp234, align 4
244  %tmp236 = getelementptr inbounds float, float addrspace(3)* %arg, i32 135
245  %tmp237 = load float, float addrspace(3)* %tmp236, align 4
246  %tmp238 = tail call float @llvm.fmuladd.f32(float %tmp233, float %tmp235, float %tmp237)
247  %tmp239 = getelementptr inbounds float, float addrspace(3)* %arg, i32 137
248  %tmp240 = load float, float addrspace(3)* %tmp239, align 4
249  %tmp241 = getelementptr inbounds float, float addrspace(3)* %arg, i32 138
250  %tmp242 = load float, float addrspace(3)* %tmp241, align 4
251  %tmp243 = getelementptr inbounds float, float addrspace(3)* %arg, i32 139
252  %tmp244 = load float, float addrspace(3)* %tmp243, align 4
253  %tmp245 = tail call float @llvm.fmuladd.f32(float %tmp240, float %tmp242, float %tmp244)
254  %tmp246 = getelementptr inbounds float, float addrspace(3)* %arg, i32 141
255  %tmp247 = load float, float addrspace(3)* %tmp246, align 4
256  %tmp248 = getelementptr inbounds float, float addrspace(3)* %arg, i32 142
257  %tmp249 = load float, float addrspace(3)* %tmp248, align 4
258  %tmp250 = getelementptr inbounds float, float addrspace(3)* %arg, i32 143
259  %tmp251 = load float, float addrspace(3)* %tmp250, align 4
260  %tmp252 = tail call float @llvm.fmuladd.f32(float %tmp247, float %tmp249, float %tmp251)
261  %tmp253 = getelementptr inbounds float, float addrspace(3)* %arg, i32 145
262  %tmp254 = load float, float addrspace(3)* %tmp253, align 4
263  %tmp255 = getelementptr inbounds float, float addrspace(3)* %arg, i32 146
264  %tmp256 = load float, float addrspace(3)* %tmp255, align 4
265  %tmp257 = getelementptr inbounds float, float addrspace(3)* %arg, i32 147
266  %tmp258 = load float, float addrspace(3)* %tmp257, align 4
267  %tmp259 = tail call float @llvm.fmuladd.f32(float %tmp254, float %tmp256, float %tmp258)
268  %tmp260 = getelementptr inbounds float, float addrspace(3)* %arg, i32 149
269  %tmp261 = load float, float addrspace(3)* %tmp260, align 4
270  %tmp262 = getelementptr inbounds float, float addrspace(3)* %arg, i32 150
271  %tmp263 = load float, float addrspace(3)* %tmp262, align 4
272  %tmp264 = getelementptr inbounds float, float addrspace(3)* %arg, i32 151
273  %tmp265 = load float, float addrspace(3)* %tmp264, align 4
274  %tmp266 = tail call float @llvm.fmuladd.f32(float %tmp261, float %tmp263, float %tmp265)
275  %tmp267 = getelementptr inbounds float, float addrspace(3)* %arg, i32 153
276  %tmp268 = load float, float addrspace(3)* %tmp267, align 4
277  %tmp269 = getelementptr inbounds float, float addrspace(3)* %arg, i32 154
278  %tmp270 = load float, float addrspace(3)* %tmp269, align 4
279  %tmp271 = getelementptr inbounds float, float addrspace(3)* %arg, i32 155
280  %tmp272 = load float, float addrspace(3)* %tmp271, align 4
281  %tmp273 = tail call float @llvm.fmuladd.f32(float %tmp268, float %tmp270, float %tmp272)
282  %tmp274 = getelementptr inbounds float, float addrspace(3)* %arg, i32 157
283  %tmp275 = load float, float addrspace(3)* %tmp274, align 4
284  %tmp276 = getelementptr inbounds float, float addrspace(3)* %arg, i32 158
285  %tmp277 = load float, float addrspace(3)* %tmp276, align 4
286  %tmp278 = getelementptr inbounds float, float addrspace(3)* %arg, i32 159
287  %tmp279 = load float, float addrspace(3)* %tmp278, align 4
288  %tmp280 = tail call float @llvm.fmuladd.f32(float %tmp275, float %tmp277, float %tmp279)
289  %tmp281 = getelementptr inbounds float, float addrspace(3)* %arg, i32 161
290  %tmp282 = load float, float addrspace(3)* %tmp281, align 4
291  %tmp283 = getelementptr inbounds float, float addrspace(3)* %arg, i32 162
292  %tmp284 = load float, float addrspace(3)* %tmp283, align 4
293  %tmp285 = getelementptr inbounds float, float addrspace(3)* %arg, i32 163
294  %tmp286 = load float, float addrspace(3)* %tmp285, align 4
295  %tmp287 = tail call float @llvm.fmuladd.f32(float %tmp282, float %tmp284, float %tmp286)
296  %tmp288 = getelementptr inbounds float, float addrspace(3)* %arg, i32 165
297  %tmp289 = load float, float addrspace(3)* %tmp288, align 4
298  %tmp290 = getelementptr inbounds float, float addrspace(3)* %arg, i32 166
299  %tmp291 = load float, float addrspace(3)* %tmp290, align 4
300  %tmp292 = getelementptr inbounds float, float addrspace(3)* %arg, i32 167
301  %tmp293 = load float, float addrspace(3)* %tmp292, align 4
302  %tmp294 = tail call float @llvm.fmuladd.f32(float %tmp289, float %tmp291, float %tmp293)
303  %tmp295 = getelementptr inbounds float, float addrspace(3)* %arg, i32 169
304  %tmp296 = load float, float addrspace(3)* %tmp295, align 4
305  %tmp297 = getelementptr inbounds float, float addrspace(3)* %arg, i32 170
306  %tmp298 = load float, float addrspace(3)* %tmp297, align 4
307  %tmp299 = getelementptr inbounds float, float addrspace(3)* %arg, i32 171
308  %tmp300 = load float, float addrspace(3)* %tmp299, align 4
309  %tmp301 = tail call float @llvm.fmuladd.f32(float %tmp296, float %tmp298, float %tmp300)
310  %tmp302 = getelementptr inbounds float, float addrspace(3)* %arg, i32 173
311  %tmp303 = load float, float addrspace(3)* %tmp302, align 4
312  %tmp304 = getelementptr inbounds float, float addrspace(3)* %arg, i32 174
313  %tmp305 = load float, float addrspace(3)* %tmp304, align 4
314  %tmp306 = getelementptr inbounds float, float addrspace(3)* %arg, i32 175
315  %tmp307 = load float, float addrspace(3)* %tmp306, align 4
316  %tmp308 = tail call float @llvm.fmuladd.f32(float %tmp303, float %tmp305, float %tmp307)
317  %tmp309 = getelementptr inbounds float, float addrspace(3)* %arg, i32 177
318  %tmp310 = load float, float addrspace(3)* %tmp309, align 4
319  %tmp311 = getelementptr inbounds float, float addrspace(3)* %arg, i32 178
320  %tmp312 = load float, float addrspace(3)* %tmp311, align 4
321  %tmp313 = getelementptr inbounds float, float addrspace(3)* %arg, i32 179
322  %tmp314 = load float, float addrspace(3)* %tmp313, align 4
323  %tmp315 = tail call float @llvm.fmuladd.f32(float %tmp310, float %tmp312, float %tmp314)
324  %tmp316 = getelementptr inbounds float, float addrspace(3)* %arg, i32 181
325  %tmp317 = load float, float addrspace(3)* %tmp316, align 4
326  %tmp318 = getelementptr inbounds float, float addrspace(3)* %arg, i32 182
327  %tmp319 = load float, float addrspace(3)* %tmp318, align 4
328  %tmp320 = getelementptr inbounds float, float addrspace(3)* %arg, i32 183
329  %tmp321 = load float, float addrspace(3)* %tmp320, align 4
330  %tmp322 = tail call float @llvm.fmuladd.f32(float %tmp317, float %tmp319, float %tmp321)
331  %tmp323 = getelementptr inbounds float, float addrspace(3)* %arg, i32 185
332  %tmp324 = load float, float addrspace(3)* %tmp323, align 4
333  %tmp325 = getelementptr inbounds float, float addrspace(3)* %arg, i32 186
334  %tmp326 = load float, float addrspace(3)* %tmp325, align 4
335  %tmp327 = getelementptr inbounds float, float addrspace(3)* %arg, i32 187
336  %tmp328 = load float, float addrspace(3)* %tmp327, align 4
337  %tmp329 = tail call float @llvm.fmuladd.f32(float %tmp324, float %tmp326, float %tmp328)
338  %tmp330 = getelementptr inbounds float, float addrspace(3)* %arg, i32 189
339  %tmp331 = load float, float addrspace(3)* %tmp330, align 4
340  %tmp332 = getelementptr inbounds float, float addrspace(3)* %arg, i32 190
341  %tmp333 = load float, float addrspace(3)* %tmp332, align 4
342  %tmp334 = getelementptr inbounds float, float addrspace(3)* %arg, i32 191
343  %tmp335 = load float, float addrspace(3)* %tmp334, align 4
344  %tmp336 = tail call float @llvm.fmuladd.f32(float %tmp331, float %tmp333, float %tmp335)
345  %tmp337 = getelementptr inbounds float, float addrspace(3)* %arg, i32 193
346  %tmp338 = load float, float addrspace(3)* %tmp337, align 4
347  %tmp339 = getelementptr inbounds float, float addrspace(3)* %arg, i32 194
348  %tmp340 = load float, float addrspace(3)* %tmp339, align 4
349  %tmp341 = getelementptr inbounds float, float addrspace(3)* %arg, i32 195
350  %tmp342 = load float, float addrspace(3)* %tmp341, align 4
351  %tmp343 = tail call float @llvm.fmuladd.f32(float %tmp338, float %tmp340, float %tmp342)
352  %tmp344 = getelementptr inbounds float, float addrspace(3)* %arg, i32 197
353  %tmp345 = load float, float addrspace(3)* %tmp344, align 4
354  %tmp346 = getelementptr inbounds float, float addrspace(3)* %arg, i32 198
355  %tmp347 = load float, float addrspace(3)* %tmp346, align 4
356  %tmp348 = getelementptr inbounds float, float addrspace(3)* %arg, i32 199
357  %tmp349 = load float, float addrspace(3)* %tmp348, align 4
358  %tmp350 = tail call float @llvm.fmuladd.f32(float %tmp345, float %tmp347, float %tmp349)
359  %tmp351 = getelementptr inbounds float, float addrspace(3)* %arg, i32 201
360  %tmp352 = load float, float addrspace(3)* %tmp351, align 4
361  %tmp353 = getelementptr inbounds float, float addrspace(3)* %arg, i32 202
362  %tmp354 = load float, float addrspace(3)* %tmp353, align 4
363  %tmp355 = getelementptr inbounds float, float addrspace(3)* %arg, i32 203
364  %tmp356 = load float, float addrspace(3)* %tmp355, align 4
365  %tmp357 = tail call float @llvm.fmuladd.f32(float %tmp352, float %tmp354, float %tmp356)
366  %tmp358 = getelementptr inbounds float, float addrspace(3)* %arg, i32 205
367  %tmp359 = load float, float addrspace(3)* %tmp358, align 4
368  %tmp360 = getelementptr inbounds float, float addrspace(3)* %arg, i32 206
369  %tmp361 = load float, float addrspace(3)* %tmp360, align 4
370  %tmp362 = getelementptr inbounds float, float addrspace(3)* %arg, i32 207
371  %tmp363 = load float, float addrspace(3)* %tmp362, align 4
372  %tmp364 = tail call float @llvm.fmuladd.f32(float %tmp359, float %tmp361, float %tmp363)
373  %tmp365 = getelementptr inbounds float, float addrspace(3)* %arg, i32 209
374  %tmp366 = load float, float addrspace(3)* %tmp365, align 4
375  %tmp367 = getelementptr inbounds float, float addrspace(3)* %arg, i32 210
376  %tmp368 = load float, float addrspace(3)* %tmp367, align 4
377  %tmp369 = getelementptr inbounds float, float addrspace(3)* %arg, i32 211
378  %tmp370 = load float, float addrspace(3)* %tmp369, align 4
379  %tmp371 = tail call float @llvm.fmuladd.f32(float %tmp366, float %tmp368, float %tmp370)
380  %tmp372 = getelementptr inbounds float, float addrspace(3)* %arg, i32 213
381  %tmp373 = load float, float addrspace(3)* %tmp372, align 4
382  %tmp374 = getelementptr inbounds float, float addrspace(3)* %arg, i32 214
383  %tmp375 = load float, float addrspace(3)* %tmp374, align 4
384  %tmp376 = getelementptr inbounds float, float addrspace(3)* %arg, i32 215
385  %tmp377 = load float, float addrspace(3)* %tmp376, align 4
386  %tmp378 = tail call float @llvm.fmuladd.f32(float %tmp373, float %tmp375, float %tmp377)
387  %tmp379 = getelementptr inbounds float, float addrspace(3)* %arg, i32 217
388  %tmp380 = load float, float addrspace(3)* %tmp379, align 4
389  %tmp381 = getelementptr inbounds float, float addrspace(3)* %arg, i32 218
390  %tmp382 = load float, float addrspace(3)* %tmp381, align 4
391  %tmp383 = getelementptr inbounds float, float addrspace(3)* %arg, i32 219
392  %tmp384 = load float, float addrspace(3)* %tmp383, align 4
393  %tmp385 = tail call float @llvm.fmuladd.f32(float %tmp380, float %tmp382, float %tmp384)
394  %tmp386 = getelementptr inbounds float, float addrspace(3)* %arg, i32 221
395  %tmp387 = load float, float addrspace(3)* %tmp386, align 4
396  %tmp388 = getelementptr inbounds float, float addrspace(3)* %arg, i32 222
397  %tmp389 = load float, float addrspace(3)* %tmp388, align 4
398  %tmp390 = getelementptr inbounds float, float addrspace(3)* %arg, i32 223
399  %tmp391 = load float, float addrspace(3)* %tmp390, align 4
400  %tmp392 = tail call float @llvm.fmuladd.f32(float %tmp387, float %tmp389, float %tmp391)
401  %tmp393 = getelementptr inbounds float, float addrspace(3)* %arg, i32 225
402  %tmp394 = load float, float addrspace(3)* %tmp393, align 4
403  %tmp395 = getelementptr inbounds float, float addrspace(3)* %arg, i32 226
404  %tmp396 = load float, float addrspace(3)* %tmp395, align 4
405  %tmp397 = getelementptr inbounds float, float addrspace(3)* %arg, i32 227
406  %tmp398 = load float, float addrspace(3)* %tmp397, align 4
407  %tmp399 = tail call float @llvm.fmuladd.f32(float %tmp394, float %tmp396, float %tmp398)
408  %tmp400 = getelementptr inbounds float, float addrspace(3)* %arg, i32 229
409  %tmp401 = load float, float addrspace(3)* %tmp400, align 4
410  %tmp402 = getelementptr inbounds float, float addrspace(3)* %arg, i32 230
411  %tmp403 = load float, float addrspace(3)* %tmp402, align 4
412  %tmp404 = getelementptr inbounds float, float addrspace(3)* %arg, i32 231
413  %tmp405 = load float, float addrspace(3)* %tmp404, align 4
414  %tmp406 = tail call float @llvm.fmuladd.f32(float %tmp401, float %tmp403, float %tmp405)
415  %tmp407 = getelementptr inbounds float, float addrspace(3)* %arg, i32 233
416  %tmp408 = load float, float addrspace(3)* %tmp407, align 4
417  %tmp409 = getelementptr inbounds float, float addrspace(3)* %arg, i32 234
418  %tmp410 = load float, float addrspace(3)* %tmp409, align 4
419  %tmp411 = getelementptr inbounds float, float addrspace(3)* %arg, i32 235
420  %tmp412 = load float, float addrspace(3)* %tmp411, align 4
421  %tmp413 = tail call float @llvm.fmuladd.f32(float %tmp408, float %tmp410, float %tmp412)
422  %tmp414 = getelementptr inbounds float, float addrspace(3)* %arg, i32 237
423  %tmp415 = load float, float addrspace(3)* %tmp414, align 4
424  %tmp416 = getelementptr inbounds float, float addrspace(3)* %arg, i32 238
425  %tmp417 = load float, float addrspace(3)* %tmp416, align 4
426  %tmp418 = getelementptr inbounds float, float addrspace(3)* %arg, i32 239
427  %tmp419 = load float, float addrspace(3)* %tmp418, align 4
428  %tmp420 = tail call float @llvm.fmuladd.f32(float %tmp415, float %tmp417, float %tmp419)
429  %tmp421 = getelementptr inbounds float, float addrspace(3)* %arg, i32 241
430  %tmp422 = load float, float addrspace(3)* %tmp421, align 4
431  %tmp423 = getelementptr inbounds float, float addrspace(3)* %arg, i32 242
432  %tmp424 = load float, float addrspace(3)* %tmp423, align 4
433  %tmp425 = getelementptr inbounds float, float addrspace(3)* %arg, i32 243
434  %tmp426 = load float, float addrspace(3)* %tmp425, align 4
435  %tmp427 = tail call float @llvm.fmuladd.f32(float %tmp422, float %tmp424, float %tmp426)
436  %tmp428 = getelementptr inbounds float, float addrspace(3)* %arg, i32 245
437  %tmp429 = load float, float addrspace(3)* %tmp428, align 4
438  %tmp430 = getelementptr inbounds float, float addrspace(3)* %arg, i32 246
439  %tmp431 = load float, float addrspace(3)* %tmp430, align 4
440  %tmp432 = getelementptr inbounds float, float addrspace(3)* %arg, i32 247
441  %tmp433 = load float, float addrspace(3)* %tmp432, align 4
442  %tmp434 = tail call float @llvm.fmuladd.f32(float %tmp429, float %tmp431, float %tmp433)
443  %tmp435 = getelementptr inbounds float, float addrspace(3)* %arg, i32 249
444  %tmp436 = load float, float addrspace(3)* %tmp435, align 4
445  %tmp437 = getelementptr inbounds float, float addrspace(3)* %arg, i32 250
446  %tmp438 = load float, float addrspace(3)* %tmp437, align 4
447  %tmp439 = getelementptr inbounds float, float addrspace(3)* %arg, i32 251
448  %tmp440 = load float, float addrspace(3)* %tmp439, align 4
449  %tmp441 = tail call float @llvm.fmuladd.f32(float %tmp436, float %tmp438, float %tmp440)
450  %tmp442 = getelementptr inbounds float, float addrspace(3)* %arg, i32 253
451  %tmp443 = load float, float addrspace(3)* %tmp442, align 4
452  %tmp444 = getelementptr inbounds float, float addrspace(3)* %arg, i32 254
453  %tmp445 = load float, float addrspace(3)* %tmp444, align 4
454  %tmp446 = getelementptr inbounds float, float addrspace(3)* %arg, i32 255
455  %tmp447 = load float, float addrspace(3)* %tmp446, align 4
456  %tmp448 = tail call float @llvm.fmuladd.f32(float %tmp443, float %tmp445, float %tmp447)
457  store float %tmp7, float addrspace(1)* %arg1, align 4
458  %tmp449 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 1
459  store float %tmp14, float addrspace(1)* %tmp449, align 4
460  %tmp450 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 2
461  store float %tmp21, float addrspace(1)* %tmp450, align 4
462  %tmp451 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 3
463  store float %tmp28, float addrspace(1)* %tmp451, align 4
464  %tmp452 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 4
465  store float %tmp35, float addrspace(1)* %tmp452, align 4
466  %tmp453 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 5
467  store float %tmp42, float addrspace(1)* %tmp453, align 4
468  %tmp454 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 6
469  store float %tmp49, float addrspace(1)* %tmp454, align 4
470  %tmp455 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 7
471  store float %tmp56, float addrspace(1)* %tmp455, align 4
472  %tmp456 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 8
473  store float %tmp63, float addrspace(1)* %tmp456, align 4
474  %tmp457 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 9
475  store float %tmp70, float addrspace(1)* %tmp457, align 4
476  %tmp458 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 10
477  store float %tmp77, float addrspace(1)* %tmp458, align 4
478  %tmp459 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 11
479  store float %tmp84, float addrspace(1)* %tmp459, align 4
480  %tmp460 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 12
481  store float %tmp91, float addrspace(1)* %tmp460, align 4
482  %tmp461 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 13
483  store float %tmp98, float addrspace(1)* %tmp461, align 4
484  %tmp462 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 14
485  store float %tmp105, float addrspace(1)* %tmp462, align 4
486  %tmp463 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 15
487  store float %tmp112, float addrspace(1)* %tmp463, align 4
488  %tmp464 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 16
489  store float %tmp119, float addrspace(1)* %tmp464, align 4
490  %tmp465 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 17
491  store float %tmp126, float addrspace(1)* %tmp465, align 4
492  %tmp466 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 18
493  store float %tmp133, float addrspace(1)* %tmp466, align 4
494  %tmp467 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 19
495  store float %tmp140, float addrspace(1)* %tmp467, align 4
496  %tmp468 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 20
497  store float %tmp147, float addrspace(1)* %tmp468, align 4
498  %tmp469 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 21
499  store float %tmp154, float addrspace(1)* %tmp469, align 4
500  %tmp470 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 22
501  store float %tmp161, float addrspace(1)* %tmp470, align 4
502  %tmp471 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 23
503  store float %tmp168, float addrspace(1)* %tmp471, align 4
504  %tmp472 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 24
505  store float %tmp175, float addrspace(1)* %tmp472, align 4
506  %tmp473 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 25
507  store float %tmp182, float addrspace(1)* %tmp473, align 4
508  %tmp474 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 26
509  store float %tmp189, float addrspace(1)* %tmp474, align 4
510  %tmp475 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 27
511  store float %tmp196, float addrspace(1)* %tmp475, align 4
512  %tmp476 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 28
513  store float %tmp203, float addrspace(1)* %tmp476, align 4
514  %tmp477 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 29
515  store float %tmp210, float addrspace(1)* %tmp477, align 4
516  %tmp478 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 30
517  store float %tmp217, float addrspace(1)* %tmp478, align 4
518  %tmp479 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 31
519  store float %tmp224, float addrspace(1)* %tmp479, align 4
520  %tmp480 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 32
521  store float %tmp231, float addrspace(1)* %tmp480, align 4
522  %tmp481 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 33
523  store float %tmp238, float addrspace(1)* %tmp481, align 4
524  %tmp482 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 34
525  store float %tmp245, float addrspace(1)* %tmp482, align 4
526  %tmp483 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 35
527  store float %tmp252, float addrspace(1)* %tmp483, align 4
528  %tmp484 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 36
529  store float %tmp259, float addrspace(1)* %tmp484, align 4
530  %tmp485 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 37
531  store float %tmp266, float addrspace(1)* %tmp485, align 4
532  %tmp486 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 38
533  store float %tmp273, float addrspace(1)* %tmp486, align 4
534  %tmp487 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 39
535  store float %tmp280, float addrspace(1)* %tmp487, align 4
536  %tmp488 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 40
537  store float %tmp287, float addrspace(1)* %tmp488, align 4
538  %tmp489 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 41
539  store float %tmp294, float addrspace(1)* %tmp489, align 4
540  %tmp490 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 42
541  store float %tmp301, float addrspace(1)* %tmp490, align 4
542  %tmp491 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 43
543  store float %tmp308, float addrspace(1)* %tmp491, align 4
544  %tmp492 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 44
545  store float %tmp315, float addrspace(1)* %tmp492, align 4
546  %tmp493 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 45
547  store float %tmp322, float addrspace(1)* %tmp493, align 4
548  %tmp494 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 46
549  store float %tmp329, float addrspace(1)* %tmp494, align 4
550  %tmp495 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 47
551  store float %tmp336, float addrspace(1)* %tmp495, align 4
552  %tmp496 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 48
553  store float %tmp343, float addrspace(1)* %tmp496, align 4
554  %tmp497 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 49
555  store float %tmp350, float addrspace(1)* %tmp497, align 4
556  %tmp498 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 50
557  store float %tmp357, float addrspace(1)* %tmp498, align 4
558  %tmp499 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 51
559  store float %tmp364, float addrspace(1)* %tmp499, align 4
560  %tmp500 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 52
561  store float %tmp371, float addrspace(1)* %tmp500, align 4
562  %tmp501 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 53
563  store float %tmp378, float addrspace(1)* %tmp501, align 4
564  %tmp502 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 54
565  store float %tmp385, float addrspace(1)* %tmp502, align 4
566  %tmp503 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 55
567  store float %tmp392, float addrspace(1)* %tmp503, align 4
568  %tmp504 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 56
569  store float %tmp399, float addrspace(1)* %tmp504, align 4
570  %tmp505 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 57
571  store float %tmp406, float addrspace(1)* %tmp505, align 4
572  %tmp506 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 58
573  store float %tmp413, float addrspace(1)* %tmp506, align 4
574  %tmp507 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 59
575  store float %tmp420, float addrspace(1)* %tmp507, align 4
576  %tmp508 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 60
577  store float %tmp427, float addrspace(1)* %tmp508, align 4
578  %tmp509 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 61
579  store float %tmp434, float addrspace(1)* %tmp509, align 4
580  %tmp510 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 62
581  store float %tmp441, float addrspace(1)* %tmp510, align 4
582  %tmp511 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 63
583  store float %tmp448, float addrspace(1)* %tmp511, align 4
584  ret void
585}
586
587; Function Attrs: nounwind readnone
588declare float @llvm.fmuladd.f32(float, float, float) #0
589
590attributes #0 = { nounwind readnone }
591attributes #1 = { "amdgpu-waves-per-eu"="1,1" }
592