1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
2
3define i16 @mask16(i16 %x) {
4  %m0 = bitcast i16 %x to <16 x i1>
5  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
6  %ret = bitcast <16 x i1> %m1 to i16
7  ret i16 %ret
8; CHECK-LABEL: mask16
9; CHECK: kmovw
10; CHECK-NEXT: knotw
11; CHECK-NEXT: kmovw
12; CHECK: ret
13}
14
15define i8 @mask8(i8 %x) {
16  %m0 = bitcast i8 %x to <8 x i1>
17  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
18  %ret = bitcast <8 x i1> %m1 to i8
19  ret i8 %ret
20; CHECK-LABEL: mask8
21; CHECK: kmovw
22; CHECK-NEXT: knotw
23; CHECK-NEXT: kmovw
24; CHECK: ret
25}
26
27define void @mask16_mem(i16* %ptr) {
28  %x = load i16* %ptr, align 4
29  %m0 = bitcast i16 %x to <16 x i1>
30  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
31  %ret = bitcast <16 x i1> %m1 to i16
32  store i16 %ret, i16* %ptr, align 4
33  ret void
34; CHECK-LABEL: mask16_mem
35; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}}
36; CHECK-NEXT: knotw
37; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
38; CHECK: ret
39}
40
41define void @mask8_mem(i8* %ptr) {
42  %x = load i8* %ptr, align 4
43  %m0 = bitcast i8 %x to <8 x i1>
44  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
45  %ret = bitcast <8 x i1> %m1 to i8
46  store i8 %ret, i8* %ptr, align 4
47  ret void
48; CHECK-LABEL: mask8_mem
49; CHECK: kmovw ([[ARG1]]), %k{{[0-7]}}
50; CHECK-NEXT: knotw
51; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
52; CHECK: ret
53}
54
55define i16 @mand16(i16 %x, i16 %y) {
56  %ma = bitcast i16 %x to <16 x i1>
57  %mb = bitcast i16 %y to <16 x i1>
58  %mc = and <16 x i1> %ma, %mb
59  %md = xor <16 x i1> %ma, %mb
60  %me = or <16 x i1> %mc, %md
61  %ret = bitcast <16 x i1> %me to i16
62; CHECK: kandw
63; CHECK: kxorw
64; CHECK: korw
65  ret i16 %ret
66}
67
68; CHECK: shuf_test1
69; CHECK: kshiftrw        $8
70; CHECK:ret
71define i8 @shuf_test1(i16 %v) nounwind {
72   %v1 = bitcast i16 %v to <16 x i1>
73   %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
74   %mask1 = bitcast <8 x i1> %mask to i8
75   ret i8 %mask1
76}
77
78; CHECK: zext_test1
79; CHECK: kshiftlw
80; CHECK: kshiftrw
81; CHECK: kmovw
82; CHECK:ret
83define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
84  %cmp_res = icmp ugt <16 x i32> %a, %b
85  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
86  %res = zext i1 %cmp_res.i1 to i32
87  ret i32 %res
88}
89
90; CHECK: zext_test2
91; CHECK: kshiftlw
92; CHECK: kshiftrw
93; CHECK: kmovw
94; CHECK:ret
95define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
96  %cmp_res = icmp ugt <16 x i32> %a, %b
97  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
98  %res = zext i1 %cmp_res.i1 to i16
99  ret i16 %res
100}
101
102; CHECK: zext_test3
103; CHECK: kshiftlw
104; CHECK: kshiftrw
105; CHECK: kmovw
106; CHECK:ret
107define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
108  %cmp_res = icmp ugt <16 x i32> %a, %b
109  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
110  %res = zext i1 %cmp_res.i1 to i8
111  ret i8 %res
112}
113