1; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 2 3define i16 @mask16(i16 %x) { 4 %m0 = bitcast i16 %x to <16 x i1> 5 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 6 %ret = bitcast <16 x i1> %m1 to i16 7 ret i16 %ret 8; CHECK-LABEL: mask16 9; CHECK: kmovw 10; CHECK-NEXT: knotw 11; CHECK-NEXT: kmovw 12; CHECK: ret 13} 14 15define i8 @mask8(i8 %x) { 16 %m0 = bitcast i8 %x to <8 x i1> 17 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 18 %ret = bitcast <8 x i1> %m1 to i8 19 ret i8 %ret 20; CHECK-LABEL: mask8 21; CHECK: kmovw 22; CHECK-NEXT: knotw 23; CHECK-NEXT: kmovw 24; CHECK: ret 25} 26 27define void @mask16_mem(i16* %ptr) { 28 %x = load i16* %ptr, align 4 29 %m0 = bitcast i16 %x to <16 x i1> 30 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 31 %ret = bitcast <16 x i1> %m1 to i16 32 store i16 %ret, i16* %ptr, align 4 33 ret void 34; CHECK-LABEL: mask16_mem 35; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}} 36; CHECK-NEXT: knotw 37; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]]) 38; CHECK: ret 39} 40 41define void @mask8_mem(i8* %ptr) { 42 %x = load i8* %ptr, align 4 43 %m0 = bitcast i8 %x to <8 x i1> 44 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 45 %ret = bitcast <8 x i1> %m1 to i8 46 store i8 %ret, i8* %ptr, align 4 47 ret void 48; CHECK-LABEL: mask8_mem 49; CHECK: kmovw ([[ARG1]]), %k{{[0-7]}} 50; CHECK-NEXT: knotw 51; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]]) 52; CHECK: ret 53} 54 55define i16 @mand16(i16 %x, i16 %y) { 56 %ma = bitcast i16 %x to <16 x i1> 57 %mb = bitcast i16 %y to <16 x i1> 58 %mc = and <16 x i1> %ma, %mb 59 %md = xor <16 x i1> %ma, %mb 60 %me = or <16 x i1> %mc, %md 61 %ret = bitcast <16 x i1> %me to i16 62; CHECK: kandw 63; CHECK: kxorw 64; CHECK: korw 65 ret i16 %ret 66} 67 68; CHECK: shuf_test1 69; CHECK: kshiftrw $8 70; CHECK:ret 71define i8 @shuf_test1(i16 %v) nounwind { 72 %v1 = bitcast i16 %v to <16 x i1> 73 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 74 %mask1 = bitcast <8 x i1> %mask to i8 75 ret i8 %mask1 76} 77 78; CHECK: zext_test1 79; CHECK: kshiftlw 80; CHECK: kshiftrw 81; CHECK: kmovw 82; CHECK:ret 83define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { 84 %cmp_res = icmp ugt <16 x i32> %a, %b 85 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 86 %res = zext i1 %cmp_res.i1 to i32 87 ret i32 %res 88} 89 90; CHECK: zext_test2 91; CHECK: kshiftlw 92; CHECK: kshiftrw 93; CHECK: kmovw 94; CHECK:ret 95define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { 96 %cmp_res = icmp ugt <16 x i32> %a, %b 97 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 98 %res = zext i1 %cmp_res.i1 to i16 99 ret i16 %res 100} 101 102; CHECK: zext_test3 103; CHECK: kshiftlw 104; CHECK: kshiftrw 105; CHECK: kmovw 106; CHECK:ret 107define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { 108 %cmp_res = icmp ugt <16 x i32> %a, %b 109 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 110 %res = zext i1 %cmp_res.i1 to i8 111 ret i8 %res 112} 113