1; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
3
4define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
5  ; CHECK: add_v4f32:
6
7  %1 = load <4 x float>* %a
8  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
9  %2 = load <4 x float>* %b
10  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
11  %3 = fadd <4 x float> %1, %2
12  ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
13  store <4 x float> %3, <4 x float>* %c
14  ; CHECK-DAG: st.w [[R3]], 0($4)
15
16  ret void
17  ; CHECK: .size add_v4f32
18}
19
20define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
21  ; CHECK: add_v2f64:
22
23  %1 = load <2 x double>* %a
24  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
25  %2 = load <2 x double>* %b
26  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
27  %3 = fadd <2 x double> %1, %2
28  ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
29  store <2 x double> %3, <2 x double>* %c
30  ; CHECK-DAG: st.d [[R3]], 0($4)
31
32  ret void
33  ; CHECK: .size add_v2f64
34}
35
36define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
37  ; CHECK: sub_v4f32:
38
39  %1 = load <4 x float>* %a
40  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
41  %2 = load <4 x float>* %b
42  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
43  %3 = fsub <4 x float> %1, %2
44  ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
45  store <4 x float> %3, <4 x float>* %c
46  ; CHECK-DAG: st.w [[R3]], 0($4)
47
48  ret void
49  ; CHECK: .size sub_v4f32
50}
51
52define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
53  ; CHECK: sub_v2f64:
54
55  %1 = load <2 x double>* %a
56  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
57  %2 = load <2 x double>* %b
58  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
59  %3 = fsub <2 x double> %1, %2
60  ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
61  store <2 x double> %3, <2 x double>* %c
62  ; CHECK-DAG: st.d [[R3]], 0($4)
63
64  ret void
65  ; CHECK: .size sub_v2f64
66}
67
68define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
69  ; CHECK: mul_v4f32:
70
71  %1 = load <4 x float>* %a
72  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
73  %2 = load <4 x float>* %b
74  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
75  %3 = fmul <4 x float> %1, %2
76  ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
77  store <4 x float> %3, <4 x float>* %c
78  ; CHECK-DAG: st.w [[R3]], 0($4)
79
80  ret void
81  ; CHECK: .size mul_v4f32
82}
83
84define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
85  ; CHECK: mul_v2f64:
86
87  %1 = load <2 x double>* %a
88  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
89  %2 = load <2 x double>* %b
90  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
91  %3 = fmul <2 x double> %1, %2
92  ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
93  store <2 x double> %3, <2 x double>* %c
94  ; CHECK-DAG: st.d [[R3]], 0($4)
95
96  ret void
97  ; CHECK: .size mul_v2f64
98}
99
100define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
101                       <4 x float>* %c) nounwind {
102  ; CHECK: fma_v4f32:
103
104  %1 = load <4 x float>* %a
105  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
106  %2 = load <4 x float>* %b
107  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
108  %3 = load <4 x float>* %c
109  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
110  %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2,
111                                              <4 x float> %3)
112  ; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]]
113  store <4 x float> %4, <4 x float>* %d
114  ; CHECK-DAG: st.w [[R1]], 0($4)
115
116  ret void
117  ; CHECK: .size fma_v4f32
118}
119
120define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
121                       <2 x double>* %c) nounwind {
122  ; CHECK: fma_v2f64:
123
124  %1 = load <2 x double>* %a
125  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
126  %2 = load <2 x double>* %b
127  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
128  %3 = load <2 x double>* %c
129  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
130  %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2,
131                                               <2 x double> %3)
132  ; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]]
133  store <2 x double> %4, <2 x double>* %d
134  ; CHECK-DAG: st.d [[R1]], 0($4)
135
136  ret void
137  ; CHECK: .size fma_v2f64
138}
139
140define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
141                       <4 x float>* %c) nounwind {
142  ; CHECK: fmsub_v4f32:
143
144  %1 = load <4 x float>* %a
145  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
146  %2 = load <4 x float>* %b
147  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
148  %3 = load <4 x float>* %c
149  ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
150  %4 = fmul <4 x float> %2, %3
151  %5 = fsub <4 x float> %1, %4
152  ; CHECK-DAG: fmsub.w [[R1]], [[R2]], [[R3]]
153  store <4 x float> %5, <4 x float>* %d
154  ; CHECK-DAG: st.w [[R1]], 0($4)
155
156  ret void
157  ; CHECK: .size fmsub_v4f32
158}
159
160define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
161                       <2 x double>* %c) nounwind {
162  ; CHECK: fmsub_v2f64:
163
164  %1 = load <2 x double>* %a
165  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
166  %2 = load <2 x double>* %b
167  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
168  %3 = load <2 x double>* %c
169  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
170  %4 = fmul <2 x double> %2, %3
171  %5 = fsub <2 x double> %1, %4
172  ; CHECK-DAG: fmsub.d [[R1]], [[R2]], [[R3]]
173  store <2 x double> %5, <2 x double>* %d
174  ; CHECK-DAG: st.d [[R1]], 0($4)
175
176  ret void
177  ; CHECK: .size fmsub_v2f64
178}
179
180define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
181  ; CHECK: fdiv_v4f32:
182
183  %1 = load <4 x float>* %a
184  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
185  %2 = load <4 x float>* %b
186  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
187  %3 = fdiv <4 x float> %1, %2
188  ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
189  store <4 x float> %3, <4 x float>* %c
190  ; CHECK-DAG: st.w [[R3]], 0($4)
191
192  ret void
193  ; CHECK: .size fdiv_v4f32
194}
195
196define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
197  ; CHECK: fdiv_v2f64:
198
199  %1 = load <2 x double>* %a
200  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
201  %2 = load <2 x double>* %b
202  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
203  %3 = fdiv <2 x double> %1, %2
204  ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
205  store <2 x double> %3, <2 x double>* %c
206  ; CHECK-DAG: st.d [[R3]], 0($4)
207
208  ret void
209  ; CHECK: .size fdiv_v2f64
210}
211
212define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
213  ; CHECK: fabs_v4f32:
214
215  %1 = load <4 x float>* %a
216  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
217  %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1)
218  ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
219  store <4 x float> %2, <4 x float>* %c
220  ; CHECK-DAG: st.w [[R3]], 0($4)
221
222  ret void
223  ; CHECK: .size fabs_v4f32
224}
225
226define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
227  ; CHECK: fabs_v2f64:
228
229  %1 = load <2 x double>* %a
230  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
231  %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1)
232  ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]]
233  store <2 x double> %2, <2 x double>* %c
234  ; CHECK-DAG: st.d [[R3]], 0($4)
235
236  ret void
237  ; CHECK: .size fabs_v2f64
238}
239
240define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
241  ; CHECK: fexp2_v4f32:
242
243  %1 = load <4 x float>* %a
244  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
245  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
246  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
247  ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
248  ; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]]
249  store <4 x float> %2, <4 x float>* %c
250  ; CHECK-DAG: st.w [[R4]], 0($4)
251
252  ret void
253  ; CHECK: .size fexp2_v4f32
254}
255
256define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
257  ; CHECK: fexp2_v2f64:
258
259  %1 = load <2 x double>* %a
260  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
261  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
262  ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
263  ; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]]
264  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
265  store <2 x double> %2, <2 x double>* %c
266  ; CHECK-DAG: st.d [[R4]], 0($4)
267
268  ret void
269  ; CHECK: .size fexp2_v2f64
270}
271
272define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
273  ; CHECK: fexp2_v4f32_2:
274
275  %1 = load <4 x float>* %a
276  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
277  %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
278  %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2
279  ; CHECK-DAG: lui [[R3:\$[0-9]+]], 16384
280  ; CHECK-DAG: fill.w [[R4:\$w[0-9]+]], [[R3]]
281  ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]]
282  store <4 x float> %3, <4 x float>* %c
283  ; CHECK-DAG: st.w [[R5]], 0($4)
284
285  ret void
286  ; CHECK: .size fexp2_v4f32_2
287}
288
289define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
290  ; CHECK:      .8byte 4611686018427387904
291  ; CHECK-NEXT: .8byte 4611686018427387904
292  ; CHECK: fexp2_v2f64_2:
293
294  %1 = load <2 x double>* %a
295  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
296  %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
297  %3 = fmul <2 x double> <double 2.0, double 2.0>, %2
298  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], %lo(
299  ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
300  store <2 x double> %3, <2 x double>* %c
301  ; CHECK-DAG: st.d [[R4]], 0($4)
302
303  ret void
304  ; CHECK: .size fexp2_v2f64_2
305}
306
307define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
308  ; CHECK: fsqrt_v4f32:
309
310  %1 = load <4 x float>* %a
311  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
312  %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1)
313  ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]]
314  store <4 x float> %2, <4 x float>* %c
315  ; CHECK-DAG: st.w [[R3]], 0($4)
316
317  ret void
318  ; CHECK: .size fsqrt_v4f32
319}
320
321define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
322  ; CHECK: fsqrt_v2f64:
323
324  %1 = load <2 x double>* %a
325  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
326  %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1)
327  ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]]
328  store <2 x double> %2, <2 x double>* %c
329  ; CHECK-DAG: st.d [[R3]], 0($4)
330
331  ret void
332  ; CHECK: .size fsqrt_v2f64
333}
334
335define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
336  ; CHECK: ffint_u_v4f32:
337
338  %1 = load <4 x i32>* %a
339  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
340  %2 = uitofp <4 x i32> %1 to <4 x float>
341  ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]]
342  store <4 x float> %2, <4 x float>* %c
343  ; CHECK-DAG: st.w [[R3]], 0($4)
344
345  ret void
346  ; CHECK: .size ffint_u_v4f32
347}
348
349define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
350  ; CHECK: ffint_u_v2f64:
351
352  %1 = load <2 x i64>* %a
353  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
354  %2 = uitofp <2 x i64> %1 to <2 x double>
355  ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]]
356  store <2 x double> %2, <2 x double>* %c
357  ; CHECK-DAG: st.d [[R3]], 0($4)
358
359  ret void
360  ; CHECK: .size ffint_u_v2f64
361}
362
363define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
364  ; CHECK: ffint_s_v4f32:
365
366  %1 = load <4 x i32>* %a
367  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
368  %2 = sitofp <4 x i32> %1 to <4 x float>
369  ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]]
370  store <4 x float> %2, <4 x float>* %c
371  ; CHECK-DAG: st.w [[R3]], 0($4)
372
373  ret void
374  ; CHECK: .size ffint_s_v4f32
375}
376
377define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
378  ; CHECK: ffint_s_v2f64:
379
380  %1 = load <2 x i64>* %a
381  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
382  %2 = sitofp <2 x i64> %1 to <2 x double>
383  ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]]
384  store <2 x double> %2, <2 x double>* %c
385  ; CHECK-DAG: st.d [[R3]], 0($4)
386
387  ret void
388  ; CHECK: .size ffint_s_v2f64
389}
390
391define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
392  ; CHECK: ftrunc_u_v4f32:
393
394  %1 = load <4 x float>* %a
395  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
396  %2 = fptoui <4 x float> %1 to <4 x i32>
397  ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]]
398  store <4 x i32> %2, <4 x i32>* %c
399  ; CHECK-DAG: st.w [[R3]], 0($4)
400
401  ret void
402  ; CHECK: .size ftrunc_u_v4f32
403}
404
405define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
406  ; CHECK: ftrunc_u_v2f64:
407
408  %1 = load <2 x double>* %a
409  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
410  %2 = fptoui <2 x double> %1 to <2 x i64>
411  ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]]
412  store <2 x i64> %2, <2 x i64>* %c
413  ; CHECK-DAG: st.d [[R3]], 0($4)
414
415  ret void
416  ; CHECK: .size ftrunc_u_v2f64
417}
418
419define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
420  ; CHECK: ftrunc_s_v4f32:
421
422  %1 = load <4 x float>* %a
423  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
424  %2 = fptosi <4 x float> %1 to <4 x i32>
425  ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]]
426  store <4 x i32> %2, <4 x i32>* %c
427  ; CHECK-DAG: st.w [[R3]], 0($4)
428
429  ret void
430  ; CHECK: .size ftrunc_s_v4f32
431}
432
433define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
434  ; CHECK: ftrunc_s_v2f64:
435
436  %1 = load <2 x double>* %a
437  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
438  %2 = fptosi <2 x double> %1 to <2 x i64>
439  ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]]
440  store <2 x i64> %2, <2 x i64>* %c
441  ; CHECK-DAG: st.d [[R3]], 0($4)
442
443  ret void
444  ; CHECK: .size ftrunc_s_v2f64
445}
446
447declare <4 x float>  @llvm.fabs.v4f32(<4 x float>  %Val)
448declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val)
449declare <4 x float>  @llvm.exp2.v4f32(<4 x float>  %val)
450declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val)
451declare <4 x float>  @llvm.fma.v4f32(<4 x float>  %a, <4 x float>  %b,
452                                     <4 x float>  %c)
453declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b,
454                                     <2 x double> %c)
455declare <4 x float>  @llvm.sqrt.v4f32(<4 x float>  %Val)
456declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val)
457