1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
2
3;CHECK-LABEL: test1:
4;CHECK: vinsertps
5;CHECK: vinsertf32x4
6;CHECK: ret
7define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
8  %rrr = load float* %br
9  %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
10  %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
11  ret <16 x float> %rrr3
12}
13
14;CHECK-LABEL: test2:
15;CHECK: vinsertf32x4
16;CHECK: vextractf32x4
17;CHECK: vinsertf32x4
18;CHECK: ret
19define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
20  %rrr = load double* %br
21  %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
22  %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
23  ret <8 x double> %rrr3
24}
25
26;CHECK-LABEL: test3:
27;CHECK: vextractf32x4
28;CHECK: vinsertf32x4
29;CHECK: ret
30define <16 x float> @test3(<16 x float> %x) nounwind {
31  %eee = extractelement <16 x float> %x, i32 4
32  %rrr2 = insertelement <16 x float> %x, float %eee, i32 1
33  ret <16 x float> %rrr2
34}
35
36;CHECK-LABEL: test4:
37;CHECK: vextracti32x4
38;CHECK: vinserti32x4
39;CHECK: ret
40define <8 x i64> @test4(<8 x i64> %x) nounwind {
41  %eee = extractelement <8 x i64> %x, i32 4
42  %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
43  ret <8 x i64> %rrr2
44}
45
46;CHECK-LABEL: test5:
47;CHECK: vextractpsz
48;CHECK: ret
49define i32 @test5(<4 x float> %x) nounwind {
50  %ef = extractelement <4 x float> %x, i32 3
51  %ei = bitcast float %ef to i32
52  ret i32 %ei
53}
54
55;CHECK-LABEL: test6:
56;CHECK: vextractpsz {{.*}}, (%rdi)
57;CHECK: ret
58define void @test6(<4 x float> %x, float* %out) nounwind {
59  %ef = extractelement <4 x float> %x, i32 3
60  store float %ef, float* %out, align 4
61  ret void
62}
63
64;CHECK-LABEL: test7
65;CHECK: vmovdz
66;CHECK: vpermps %zmm
67;CHECK: ret
68define float @test7(<16 x float> %x, i32 %ind) nounwind {
69  %e = extractelement <16 x float> %x, i32 %ind
70  ret float %e
71}
72
73;CHECK-LABEL: test8
74;CHECK: vmovqz
75;CHECK: vpermpd %zmm
76;CHECK: ret
77define double @test8(<8 x double> %x, i32 %ind) nounwind {
78  %e = extractelement <8 x double> %x, i32 %ind
79  ret double %e
80}
81
82;CHECK-LABEL: test9
83;CHECK: vmovd
84;CHECK: vpermps %ymm
85;CHECK: ret
86define float @test9(<8 x float> %x, i32 %ind) nounwind {
87  %e = extractelement <8 x float> %x, i32 %ind
88  ret float %e
89}
90
91;CHECK-LABEL: test10
92;CHECK: vmovdz
93;CHECK: vpermd %zmm
94;CHEKK: vmovdz  %xmm0, %eax
95;CHECK: ret
96define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
97  %e = extractelement <16 x i32> %x, i32 %ind
98  ret i32 %e
99}
100
101;CHECK-LABEL: test11
102;CHECK: movl    $260
103;CHECK: bextrl
104;CHECK: movl    $268
105;CHECK: bextrl
106;CHECK: ret
107define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
108  %cmp_res = icmp ult <16 x i32> %a, %b
109  %ia = extractelement <16 x i1> %cmp_res, i32 4
110  %ib = extractelement <16 x i1> %cmp_res, i32 12
111
112  br i1 %ia, label %A, label %B
113
114  A:
115    ret <16 x i32>%b
116  B:
117   %c = add <16 x i32>%b, %a
118  br i1 %ib, label %C, label %D
119  C:
120   %c1 = sub <16 x i32>%c, %a
121   ret <16 x i32>%c1
122  D:
123   %c2 = mul <16 x i32>%c, %a
124   ret <16 x i32>%c2
125}
126