1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+bf16 | FileCheck %s
3
4; bfloat16x4_t test_vcreate_bf16(uint64_t a) { return vcreate_bf16(a); }
5define <4 x bfloat> @test_vcreate_bf16(i64 %a) nounwind {
6; CHECK-LABEL: test_vcreate_bf16:
7; CHECK:       // %bb.0: // %entry
8; CHECK-NEXT:    fmov d0, x0
9; CHECK-NEXT:    ret
10entry:
11  %0 = bitcast i64 %a to <4 x bfloat>
12  ret <4 x bfloat> %0
13}
14
15; bfloat16x4_t test_vdup_n_bf16(bfloat16_t v) { return vdup_n_bf16(v); }
16define <4 x bfloat> @test_vdup_n_bf16(bfloat %v) nounwind {
17; CHECK-LABEL: test_vdup_n_bf16:
18; CHECK:       // %bb.0: // %entry
19; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
20; CHECK-NEXT:    dup v0.4h, v0.h[0]
21; CHECK-NEXT:    ret
22entry:
23  %vecinit.i = insertelement <4 x bfloat> undef, bfloat %v, i32 0
24  %vecinit3.i = shufflevector <4 x bfloat> %vecinit.i, <4 x bfloat> undef, <4 x i32> zeroinitializer
25  ret <4 x bfloat> %vecinit3.i
26}
27
28; bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) { return vdupq_n_bf16(v); }
29define <8 x bfloat> @test_vdupq_n_bf16(bfloat %v) nounwind {
30; CHECK-LABEL: test_vdupq_n_bf16:
31; CHECK:       // %bb.0: // %entry
32; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
33; CHECK-NEXT:    dup v0.8h, v0.h[0]
34; CHECK-NEXT:    ret
35entry:
36  %vecinit.i = insertelement <8 x bfloat> undef, bfloat %v, i32 0
37  %vecinit7.i = shufflevector <8 x bfloat> %vecinit.i, <8 x bfloat> undef, <8 x i32> zeroinitializer
38  ret <8 x bfloat> %vecinit7.i
39}
40
41; bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) { return vdup_lane_bf16(v, 1); }
42define <4 x bfloat> @test_vdup_lane_bf16(<4 x bfloat> %v) nounwind {
43; CHECK-LABEL: test_vdup_lane_bf16:
44; CHECK:       // %bb.0: // %entry
45; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
46; CHECK-NEXT:    dup v0.4h, v0.h[1]
47; CHECK-NEXT:    ret
48entry:
49  %lane = shufflevector <4 x bfloat> %v, <4 x bfloat> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
50  ret <4 x bfloat> %lane
51}
52
53; bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) { return vdupq_lane_bf16(v, 1); }
54define <8 x bfloat> @test_vdupq_lane_bf16(<4 x bfloat> %v) nounwind {
55; CHECK-LABEL: test_vdupq_lane_bf16:
56; CHECK:       // %bb.0: // %entry
57; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
58; CHECK-NEXT:    dup v0.8h, v0.h[1]
59; CHECK-NEXT:    ret
60entry:
61  %lane = shufflevector <4 x bfloat> %v, <4 x bfloat> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
62  ret <8 x bfloat> %lane
63}
64
65; bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) { return vdup_laneq_bf16(v, 7); }
66define <4 x bfloat> @test_vdup_laneq_bf16(<8 x bfloat> %v) nounwind {
67; CHECK-LABEL: test_vdup_laneq_bf16:
68; CHECK:       // %bb.0: // %entry
69; CHECK-NEXT:    dup v0.4h, v0.h[7]
70; CHECK-NEXT:    ret
71entry:
72  %lane = shufflevector <8 x bfloat> %v, <8 x bfloat> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
73  ret <4 x bfloat> %lane
74}
75
76; bfloat16x8_t test_vdupq_laneq_bf16(bfloat16x8_t v) { return vdupq_laneq_bf16(v, 7); }
77define <8 x bfloat> @test_vdupq_laneq_bf16(<8 x bfloat> %v) nounwind {
78; CHECK-LABEL: test_vdupq_laneq_bf16:
79; CHECK:       // %bb.0: // %entry
80; CHECK-NEXT:    dup v0.8h, v0.h[7]
81; CHECK-NEXT:    ret
82entry:
83  %lane = shufflevector <8 x bfloat> %v, <8 x bfloat> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
84  ret <8 x bfloat> %lane
85}
86
87; bfloat16x8_t test_vcombine_bf16(bfloat16x4_t low, bfloat16x4_t high) { return vcombine_bf16(low, high); }
88define <8 x bfloat> @test_vcombine_bf16(<4 x bfloat> %low, <4 x bfloat> %high) nounwind {
89; CHECK-LABEL: test_vcombine_bf16:
90; CHECK:       // %bb.0: // %entry
91; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
92; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
93; CHECK-NEXT:    mov v0.d[1], v1.d[0]
94; CHECK-NEXT:    ret
95entry:
96  %shuffle.i = shufflevector <4 x bfloat> %low, <4 x bfloat> %high, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
97  ret <8 x bfloat> %shuffle.i
98}
99
100; bfloat16x4_t test_vget_high_bf16(bfloat16x8_t a) { return vget_high_bf16(a); }
101define <4 x bfloat> @test_vget_high_bf16(<8 x bfloat> %a) nounwind {
102; CHECK-LABEL: test_vget_high_bf16:
103; CHECK:       // %bb.0: // %entry
104; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
105; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
106; CHECK-NEXT:    ret
107entry:
108  %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
109  ret <4 x bfloat> %shuffle.i
110}
111
112; bfloat16x4_t test_vget_low_bf16(bfloat16x8_t a) { return vget_low_bf16(a); }
113define <4 x bfloat> @test_vget_low_bf16(<8 x bfloat> %a) nounwind {
114; CHECK-LABEL: test_vget_low_bf16:
115; CHECK:       // %bb.0: // %entry
116; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
117; CHECK-NEXT:    ret
118entry:
119  %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
120  ret <4 x bfloat> %shuffle.i
121}
122
123; bfloat16_t test_vget_lane_bf16(bfloat16x4_t v) { return vget_lane_bf16(v, 1); }
124define bfloat @test_vget_lane_bf16(<4 x bfloat> %v) nounwind {
125; CHECK-LABEL: test_vget_lane_bf16:
126; CHECK:       // %bb.0: // %entry
127; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
128; CHECK-NEXT:    mov h0, v0.h[1]
129; CHECK-NEXT:    ret
130entry:
131  %vget_lane = extractelement <4 x bfloat> %v, i32 1
132  ret bfloat %vget_lane
133}
134
135; bfloat16_t test_vgetq_lane_bf16(bfloat16x8_t v) { return vgetq_lane_bf16(v, 7); }
136define bfloat @test_vgetq_lane_bf16(<8 x bfloat> %v) nounwind {
137; CHECK-LABEL: test_vgetq_lane_bf16:
138; CHECK:       // %bb.0: // %entry
139; CHECK-NEXT:    mov h0, v0.h[7]
140; CHECK-NEXT:    ret
141entry:
142  %vgetq_lane = extractelement <8 x bfloat> %v, i32 7
143  ret bfloat %vgetq_lane
144}
145
146; bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) { return vset_lane_bf16(a, v, 1); }
147define <4 x bfloat> @test_vset_lane_bf16(bfloat %a, <4 x bfloat> %v) nounwind {
148; CHECK-LABEL: test_vset_lane_bf16:
149; CHECK:       // %bb.0: // %entry
150; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
151; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
152; CHECK-NEXT:    mov v1.h[1], v0.h[0]
153; CHECK-NEXT:    fmov d0, d1
154; CHECK-NEXT:    ret
155entry:
156  %vset_lane = insertelement <4 x bfloat> %v, bfloat %a, i32 1
157  ret <4 x bfloat> %vset_lane
158}
159
160; bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) { return vsetq_lane_bf16(a, v, 7); }
161define <8 x bfloat> @test_vsetq_lane_bf16(bfloat %a, <8 x bfloat> %v) nounwind {
162; CHECK-LABEL: test_vsetq_lane_bf16:
163; CHECK:       // %bb.0: // %entry
164; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
165; CHECK-NEXT:    mov v1.h[7], v0.h[0]
166; CHECK-NEXT:    mov v0.16b, v1.16b
167; CHECK-NEXT:    ret
168entry:
169  %vset_lane = insertelement <8 x bfloat> %v, bfloat %a, i32 7
170  ret <8 x bfloat> %vset_lane
171}
172
173; bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { return vduph_lane_bf16(v, 1); }
174define bfloat @test_vduph_lane_bf16(<4 x bfloat> %v) nounwind {
175; CHECK-LABEL: test_vduph_lane_bf16:
176; CHECK:       // %bb.0: // %entry
177; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
178; CHECK-NEXT:    mov h0, v0.h[1]
179; CHECK-NEXT:    ret
180entry:
181  %vget_lane = extractelement <4 x bfloat> %v, i32 1
182  ret bfloat %vget_lane
183}
184
185; bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { return vduph_laneq_bf16(v, 7); }
186define bfloat @test_vduph_laneq_bf16(<8 x bfloat> %v) nounwind {
187; CHECK-LABEL: test_vduph_laneq_bf16:
188; CHECK:       // %bb.0: // %entry
189; CHECK-NEXT:    mov h0, v0.h[7]
190; CHECK-NEXT:    ret
191entry:
192  %vgetq_lane = extractelement <8 x bfloat> %v, i32 7
193  ret bfloat %vgetq_lane
194}
195
196; vcopy_lane_bf16(a, 1, b, 3);
197define <4 x bfloat> @test_vcopy_lane_bf16_v1(<4 x bfloat> %a, <4 x bfloat> %b) nounwind {
198; CHECK-LABEL: test_vcopy_lane_bf16_v1:
199; CHECK:       // %bb.0: // %entry
200; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
201; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
202; CHECK-NEXT:    mov v0.h[1], v1.h[3]
203; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
204; CHECK-NEXT:    ret
205entry:
206  %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
207  ret <4 x bfloat> %vset_lane
208}
209
210; vcopy_lane_bf16(a, 2, b, 0);
211define <4 x bfloat> @test_vcopy_lane_bf16_v2(<4 x bfloat> %a, <4 x bfloat> %b) nounwind {
212; CHECK-LABEL: test_vcopy_lane_bf16_v2:
213; CHECK:       // %bb.0: // %entry
214; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
215; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
216; CHECK-NEXT:    mov v0.h[2], v1.h[0]
217; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
218; CHECK-NEXT:    ret
219entry:
220  %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
221  ret <4 x bfloat> %vset_lane
222}
223
224; vcopyq_lane_bf16(a, 0, b, 2);
225define <8 x bfloat> @test_vcopyq_lane_bf16_v1(<8 x bfloat> %a, <4 x bfloat> %b) nounwind {
226; CHECK-LABEL: test_vcopyq_lane_bf16_v1:
227; CHECK:       // %bb.0: // %entry
228; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
229; CHECK-NEXT:    mov v0.h[0], v1.h[2]
230; CHECK-NEXT:    ret
231entry:
232  %0 = shufflevector <4 x bfloat> %b, <4 x bfloat> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
233  %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %0, <8 x i32> <i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
234  ret <8 x bfloat> %vset_lane
235}
236
237; vcopyq_lane_bf16(a, 6, b, 0);
238define <8 x bfloat> @test_vcopyq_lane_bf16_v2(<8 x bfloat> %a, <4 x bfloat> %b) nounwind {
239; CHECK-LABEL: test_vcopyq_lane_bf16_v2:
240; CHECK:       // %bb.0: // %entry
241; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
242; CHECK-NEXT:    mov v0.h[6], v1.h[0]
243; CHECK-NEXT:    ret
244entry:
245  %0 = shufflevector <4 x bfloat> %b, <4 x bfloat> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
246  %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 7>
247  ret <8 x bfloat> %vset_lane
248}
249
250; vcopy_laneq_bf16(a, 0, b, 7);
251define <4 x bfloat> @test_vcopy_laneq_bf16_v1(<4 x bfloat> %a, <8 x bfloat> %b) nounwind {
252; CHECK-LABEL: test_vcopy_laneq_bf16_v1:
253; CHECK:       // %bb.0: // %entry
254; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
255; CHECK-NEXT:    mov v0.h[0], v1.h[7]
256; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
257; CHECK-NEXT:    ret
258entry:
259  %vgetq_lane = extractelement <8 x bfloat> %b, i32 7
260  %vset_lane = insertelement <4 x bfloat> %a, bfloat %vgetq_lane, i32 0
261  ret <4 x bfloat> %vset_lane
262}
263
264; vcopy_laneq_bf16(a, 3, b, 4);
265define <4 x bfloat> @test_vcopy_laneq_bf16_v2(<4 x bfloat> %a, <8 x bfloat> %b) nounwind {
266; CHECK-LABEL: test_vcopy_laneq_bf16_v2:
267; CHECK:       // %bb.0: // %entry
268; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
269; CHECK-NEXT:    mov v0.h[3], v1.h[4]
270; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
271; CHECK-NEXT:    ret
272entry:
273  %vgetq_lane = extractelement <8 x bfloat> %b, i32 4
274  %vset_lane = insertelement <4 x bfloat> %a, bfloat %vgetq_lane, i32 3
275  ret <4 x bfloat> %vset_lane
276}
277
278; vcopyq_laneq_bf16(a, 3, b, 7);
279define <8 x bfloat> @test_vcopyq_laneq_bf16_v1(<8 x bfloat> %a, <8 x bfloat> %b) nounwind {
280; CHECK-LABEL: test_vcopyq_laneq_bf16_v1:
281; CHECK:       // %bb.0: // %entry
282; CHECK-NEXT:    mov v0.h[3], v1.h[7]
283; CHECK-NEXT:    ret
284entry:
285  %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6, i32 7>
286  ret <8 x bfloat> %vset_lane
287}
288
289; vcopyq_laneq_bf16(a, 6, b, 2);
290define <8 x bfloat> @test_vcopyq_laneq_bf16_v2(<8 x bfloat> %a, <8 x bfloat> %b) nounwind {
291; CHECK-LABEL: test_vcopyq_laneq_bf16_v2:
292; CHECK:       // %bb.0: // %entry
293; CHECK-NEXT:    mov v0.h[6], v1.h[2]
294; CHECK-NEXT:    ret
295entry:
296  %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 10, i32 7>
297  ret <8 x bfloat> %vset_lane
298}
299