1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
2
3define   <16 x i32> @_inreg16xi32(i32 %a) {
4; CHECK-LABEL: _inreg16xi32:
5; CHECK:       ## BB#0:
6; CHECK-NEXT:    vpbroadcastd %edi, %zmm0
7; CHECK-NEXT:    retq
8  %b = insertelement <16 x i32> undef, i32 %a, i32 0
9  %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
10  ret <16 x i32> %c
11}
12
13define   <8 x i64> @_inreg8xi64(i64 %a) {
14; CHECK-LABEL: _inreg8xi64:
15; CHECK:       ## BB#0:
16; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0
17; CHECK-NEXT:    retq
18  %b = insertelement <8 x i64> undef, i64 %a, i32 0
19  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
20  ret <8 x i64> %c
21}
22
23;CHECK-LABEL: _ss16xfloat_v4
24;CHECK: vbroadcastss %xmm0, %zmm0
25;CHECK: ret
26define   <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
27  %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer
28  ret <16 x float> %b
29}
30
31define   <16 x float> @_inreg16xfloat(float %a) {
32; CHECK-LABEL: _inreg16xfloat:
33; CHECK:       ## BB#0:
34; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
35; CHECK-NEXT:    retq
36  %b = insertelement <16 x float> undef, float %a, i32 0
37  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
38  ret <16 x float> %c
39}
40
41;CHECK-LABEL: _ss16xfloat_mask:
42;CHECK: vbroadcastss %xmm0, %zmm1 {%k1}
43;CHECK: ret
44define   <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
45  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
46  %b = insertelement <16 x float> undef, float %a, i32 0
47  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
48  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
49  ret <16 x float> %r
50}
51
52;CHECK-LABEL: _ss16xfloat_maskz:
53;CHECK: vbroadcastss %xmm0, %zmm0 {%k1} {z}
54;CHECK: ret
55define   <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
56  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
57  %b = insertelement <16 x float> undef, float %a, i32 0
58  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
59  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
60  ret <16 x float> %r
61}
62
63;CHECK-LABEL: _ss16xfloat_load:
64;CHECK: vbroadcastss (%{{.*}}, %zmm
65;CHECK: ret
66define   <16 x float> @_ss16xfloat_load(float* %a.ptr) {
67  %a = load float* %a.ptr
68  %b = insertelement <16 x float> undef, float %a, i32 0
69  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
70  ret <16 x float> %c
71}
72
73;CHECK-LABEL: _ss16xfloat_mask_load:
74;CHECK: vbroadcastss (%rdi), %zmm0 {%k1}
75;CHECK: ret
76define   <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
77  %a = load float* %a.ptr
78  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
79  %b = insertelement <16 x float> undef, float %a, i32 0
80  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
81  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
82  ret <16 x float> %r
83}
84
85;CHECK-LABEL: _ss16xfloat_maskz_load:
86;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} {z}
87;CHECK: ret
88define   <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
89  %a = load float* %a.ptr
90  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
91  %b = insertelement <16 x float> undef, float %a, i32 0
92  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
93  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
94  ret <16 x float> %r
95}
96
97define   <8 x double> @_inreg8xdouble(double %a) {
98; CHECK-LABEL: _inreg8xdouble:
99; CHECK:       ## BB#0:
100; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
101; CHECK-NEXT:    retq
102  %b = insertelement <8 x double> undef, double %a, i32 0
103  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
104  ret <8 x double> %c
105}
106
107;CHECK-LABEL: _sd8xdouble_mask:
108;CHECK: vbroadcastsd %xmm0, %zmm1 {%k1}
109;CHECK: ret
110define   <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
111  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
112  %b = insertelement <8 x double> undef, double %a, i32 0
113  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
114  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
115  ret <8 x double> %r
116}
117
118;CHECK-LABEL: _sd8xdouble_maskz:
119;CHECK: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
120;CHECK: ret
121define   <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
122  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
123  %b = insertelement <8 x double> undef, double %a, i32 0
124  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
125  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
126  ret <8 x double> %r
127}
128
129;CHECK-LABEL: _sd8xdouble_load:
130;CHECK: vbroadcastsd (%rdi), %zmm
131;CHECK: ret
132define   <8 x double> @_sd8xdouble_load(double* %a.ptr) {
133  %a = load double* %a.ptr
134  %b = insertelement <8 x double> undef, double %a, i32 0
135  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
136  ret <8 x double> %c
137}
138
139;CHECK-LABEL: _sd8xdouble_mask_load:
140;CHECK: vbroadcastsd (%rdi), %zmm0 {%k1}
141;CHECK: ret
142define   <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
143  %a = load double* %a.ptr
144  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
145  %b = insertelement <8 x double> undef, double %a, i32 0
146  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
147  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
148  ret <8 x double> %r
149}
150
151define   <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
152; CHECK-LABEL: _sd8xdouble_maskz_load:
153; CHECK:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
154; CHECK:    ret
155  %a = load double* %a.ptr
156  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
157  %b = insertelement <8 x double> undef, double %a, i32 0
158  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
159  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
160  ret <8 x double> %r
161}
162
163define   <16 x i32> @_xmm16xi32(<16 x i32> %a) {
164; CHECK-LABEL: _xmm16xi32:
165; CHECK:       ## BB#0:
166; CHECK-NEXT:    vpbroadcastd %xmm0, %zmm0
167; CHECK-NEXT:    retq
168  %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
169  ret <16 x i32> %b
170}
171
172define   <16 x float> @_xmm16xfloat(<16 x float> %a) {
173; CHECK-LABEL: _xmm16xfloat:
174; CHECK:       ## BB#0:
175; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
176; CHECK-NEXT:    retq
177  %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
178  ret <16 x float> %b
179}
180
181define <16 x i32> @test_vbroadcast() {
182; CHECK-LABEL: test_vbroadcast:
183; CHECK:       ## BB#0: ## %entry
184; CHECK-NEXT:    vpxord %zmm0, %zmm0, %zmm0
185; CHECK-NEXT:    vcmpunordps %zmm0, %zmm0, %k1
186; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
187; CHECK-NEXT:    knotw %k1, %k1
188; CHECK-NEXT:    vmovdqu32 %zmm0, %zmm0 {%k1} {z}
189; CHECK-NEXT:    retq
190entry:
191  %0 = sext <16 x i1> zeroinitializer to <16 x i32>
192  %1 = fcmp uno <16 x float> undef, zeroinitializer
193  %2 = sext <16 x i1> %1 to <16 x i32>
194  %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2
195  ret <16 x i32> %3
196}
197
198; We implement the set1 intrinsics with vector initializers.  Verify that the
199; IR generated will produce broadcasts at the end.
200define <8 x double> @test_set1_pd(double %d) #2 {
201; CHECK-LABEL: test_set1_pd:
202; CHECK:       ## BB#0: ## %entry
203; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
204; CHECK-NEXT:    retq
205entry:
206  %vecinit.i = insertelement <8 x double> undef, double %d, i32 0
207  %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1
208  %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2
209  %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3
210  %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4
211  %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5
212  %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6
213  %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7
214  ret <8 x double> %vecinit7.i
215}
216
217define <8 x i64> @test_set1_epi64(i64 %d) #2 {
218; CHECK-LABEL: test_set1_epi64:
219; CHECK:       ## BB#0: ## %entry
220; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0
221; CHECK-NEXT:    retq
222entry:
223  %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0
224  %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1
225  %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2
226  %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3
227  %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4
228  %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5
229  %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6
230  %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7
231  ret <8 x i64> %vecinit7.i
232}
233
234define <16 x float> @test_set1_ps(float %f) #2 {
235; CHECK-LABEL: test_set1_ps:
236; CHECK:       ## BB#0: ## %entry
237; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
238; CHECK-NEXT:    retq
239entry:
240  %vecinit.i = insertelement <16 x float> undef, float %f, i32 0
241  %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1
242  %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2
243  %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3
244  %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4
245  %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5
246  %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6
247  %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7
248  %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8
249  %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9
250  %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10
251  %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11
252  %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12
253  %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13
254  %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14
255  %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15
256  ret <16 x float> %vecinit15.i
257}
258
259define <16 x i32> @test_set1_epi32(i32 %f) #2 {
260; CHECK-LABEL: test_set1_epi32:
261; CHECK:       ## BB#0: ## %entry
262; CHECK-NEXT:    vpbroadcastd %edi, %zmm0
263; CHECK-NEXT:    retq
264entry:
265  %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0
266  %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1
267  %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2
268  %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3
269  %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4
270  %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5
271  %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6
272  %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7
273  %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8
274  %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9
275  %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10
276  %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11
277  %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12
278  %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13
279  %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14
280  %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15
281  ret <16 x i32> %vecinit15.i
282}
283
284; We implement the scalar broadcast intrinsics with vector initializers.
285; Verify that the IR generated will produce the broadcast at the end.
286define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
287; CHECK-LABEL: test_mm512_broadcastsd_pd:
288; CHECK:       ## BB#0: ## %entry
289; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
290; CHECK-NEXT:    retq
291entry:
292  %0 = extractelement <2 x double> %a, i32 0
293  %vecinit.i = insertelement <8 x double> undef, double %0, i32 0
294  %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1
295  %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2
296  %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3
297  %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4
298  %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5
299  %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6
300  %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7
301  ret <8 x double> %vecinit7.i
302}
303