1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
2
3; CHECK: vbroadcastsd (%
4define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
5entry:
6  %q = load i64* %ptr, align 8
7  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
8  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
9  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
10  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
11  ret <4 x i64> %vecinit6.i
12}
13
14; CHECK: vbroadcastss (%
15define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
16entry:
17  %q = load i32* %ptr, align 4
18  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
19  %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
20  %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
21  %vecinit6.i = insertelement <8 x i32> %vecinit4.i, i32 %q, i32 3
22  ret <8 x i32> %vecinit6.i
23}
24
25; CHECK: vbroadcastsd (%
26define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
27entry:
28  %q = load double* %ptr, align 8
29  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
30  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
31  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
32  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
33  ret <4 x double> %vecinit6.i
34}
35
36; CHECK: vbroadcastss (%
37define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
38entry:
39  %q = load float* %ptr, align 4
40  %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
41  %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
42  %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
43  %vecinit6.i = insertelement <8 x float> %vecinit4.i, float %q, i32 3
44  ret <8 x float> %vecinit6.i
45}
46
47;;;; 128-bit versions
48
49; CHECK: vbroadcastss (%
50define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
51entry:
52  %q = load float* %ptr, align 4
53  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
54  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
55  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
56  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
57  ret <4 x float> %vecinit6.i
58}
59
60
61; CHECK: _e2
62; CHECK-NOT: vbroadcastss
63; CHECK: ret
64define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
65    %vecinit.i = insertelement <4 x float> undef, float      0xbf80000000000000, i32 0
66  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
67  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
68  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
69  ret <4 x float> %vecinit6.i
70}
71
72
73; CHECK: vbroadcastss (%
74define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
75entry:
76  %q = load i32* %ptr, align 4
77  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
78  %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
79  %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
80  %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %q, i32 3
81  ret <4 x i32> %vecinit6.i
82}
83
84; Unsupported vbroadcasts
85
86; CHECK: _G
87; CHECK-NOT: broadcast (%
88; CHECK: ret
89define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
90entry:
91  %q = load i64* %ptr, align 8
92  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
93  %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
94  ret <2 x i64> %vecinit2.i
95}
96
97; CHECK: _H
98; CHECK-NOT: broadcast
99; CHECK: ret
100define <4 x i32> @H(<4 x i32> %a) {
101  %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
102  ret <4 x i32> %x
103}
104
105; CHECK: _I
106; CHECK-NOT: broadcast (%
107; CHECK: ret
108define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
109entry:
110  %q = load double* %ptr, align 4
111  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
112  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
113  ret <2 x double> %vecinit2.i
114}
115
116; CHECK: _RR
117; CHECK: vbroadcastss (%
118; CHECK: ret
119define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
120entry:
121  %q = load float* %ptr, align 4
122  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
123  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
124  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
125  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
126  ; force a chain
127  %j = load i32* %k, align 4
128  store i32 %j, i32* undef
129  ret <4 x float> %vecinit6.i
130}
131
132
133; CHECK: _RR2
134; CHECK: vbroadcastss (%
135; CHECK: ret
136define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
137entry:
138  %q = load float* %ptr, align 4
139  %v = insertelement <4 x float> undef, float %q, i32 0
140  %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
141  ret <4 x float> %t
142}
143
144