1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl | FileCheck %s --check-prefixes=AVX512VL 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512dq,avx512bw | FileCheck %s --check-prefixes=AVX512VLDQBW 4 5; This test makes sure we don't use movmsk instructions when masked compares 6; would be better. The use of the getmant intrinsic introduces a convertion 7; scalar to vXi1 late after movmsk has been formed. Requiring it to be reversed. 8 9declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8) 10declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8) 11declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 12declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 13 14define <2 x double> @movmsk2(<2 x double> %x0, <2 x double> %x2, <2 x i64> %mask) { 15; AVX512VL-LABEL: movmsk2: 16; AVX512VL: ## %bb.0: 17; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 18; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 19; AVX512VL-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} 20; AVX512VL-NEXT: vmovapd %xmm1, %xmm0 21; AVX512VL-NEXT: retq 22; 23; AVX512VLDQBW-LABEL: movmsk2: 24; AVX512VLDQBW: ## %bb.0: 25; AVX512VLDQBW-NEXT: vpmovq2m %xmm2, %k1 26; AVX512VLDQBW-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} 27; AVX512VLDQBW-NEXT: vmovapd %xmm1, %xmm0 28; AVX512VLDQBW-NEXT: retq 29 %a = icmp slt <2 x i64> %mask, zeroinitializer 30 %b = bitcast <2 x i1> %a to i2 31 %c = zext i2 %b to i8 32 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %c) 33 ret <2 x double> %res 34} 35 36define <4 x double> @movmsk4(<4 x double> %x0, <4 x double> %x2, <4 x i32> %mask) { 37; AVX512VL-LABEL: movmsk4: 38; AVX512VL: ## %bb.0: 39; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 40; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 41; AVX512VL-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1} 42; AVX512VL-NEXT: vmovapd %ymm1, %ymm0 43; AVX512VL-NEXT: retq 44; 45; AVX512VLDQBW-LABEL: movmsk4: 46; AVX512VLDQBW: ## %bb.0: 47; AVX512VLDQBW-NEXT: vpmovd2m %xmm2, %k1 48; AVX512VLDQBW-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1} 49; AVX512VLDQBW-NEXT: vmovapd %ymm1, %ymm0 50; AVX512VLDQBW-NEXT: retq 51 %a = icmp slt <4 x i32> %mask, zeroinitializer 52 %b = bitcast <4 x i1> %a to i4 53 %c = zext i4 %b to i8 54 %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %c) 55 ret <4 x double> %res 56} 57 58define <8 x double> @movmsk8(<8 x double> %x0, <8 x double> %x2, <8 x i32> %mask) { 59; AVX512VL-LABEL: movmsk8: 60; AVX512VL: ## %bb.0: 61; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 62; AVX512VL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1 63; AVX512VL-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1} 64; AVX512VL-NEXT: vmovapd %zmm1, %zmm0 65; AVX512VL-NEXT: retq 66; 67; AVX512VLDQBW-LABEL: movmsk8: 68; AVX512VLDQBW: ## %bb.0: 69; AVX512VLDQBW-NEXT: vpmovd2m %ymm2, %k1 70; AVX512VLDQBW-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1} 71; AVX512VLDQBW-NEXT: vmovapd %zmm1, %zmm0 72; AVX512VLDQBW-NEXT: retq 73 %a = icmp slt <8 x i32> %mask, zeroinitializer 74 %b = bitcast <8 x i1> %a to i8 75 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %b, i32 4) 76 ret <8 x double> %res 77} 78 79define <16 x float> @movmsk16(<16 x float> %x0, <16 x float> %x2, <16 x i8> %mask) { 80; AVX512VL-LABEL: movmsk16: 81; AVX512VL: ## %bb.0: 82; AVX512VL-NEXT: vpmovmskb %xmm2, %eax 83; AVX512VL-NEXT: kmovw %eax, %k1 84; AVX512VL-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1} 85; AVX512VL-NEXT: vmovaps %zmm1, %zmm0 86; AVX512VL-NEXT: retq 87; 88; AVX512VLDQBW-LABEL: movmsk16: 89; AVX512VLDQBW: ## %bb.0: 90; AVX512VLDQBW-NEXT: vpmovb2m %xmm2, %k1 91; AVX512VLDQBW-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1} 92; AVX512VLDQBW-NEXT: vmovaps %zmm1, %zmm0 93; AVX512VLDQBW-NEXT: retq 94 %a = icmp slt <16 x i8> %mask, zeroinitializer 95 %b = bitcast <16 x i1> %a to i16 96 %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %b, i32 4) 97 ret <16 x float> %res 98} 99 100; Similar to above but with fp types bitcasted to int for the slt. 101define <2 x double> @movmsk2_fp(<2 x double> %x0, <2 x double> %x2, <2 x double> %mask) { 102; AVX512VL-LABEL: movmsk2_fp: 103; AVX512VL: ## %bb.0: 104; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 105; AVX512VL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 106; AVX512VL-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} 107; AVX512VL-NEXT: vmovapd %xmm1, %xmm0 108; AVX512VL-NEXT: retq 109; 110; AVX512VLDQBW-LABEL: movmsk2_fp: 111; AVX512VLDQBW: ## %bb.0: 112; AVX512VLDQBW-NEXT: vpmovq2m %xmm2, %k1 113; AVX512VLDQBW-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1} 114; AVX512VLDQBW-NEXT: vmovapd %xmm1, %xmm0 115; AVX512VLDQBW-NEXT: retq 116 %q = bitcast <2 x double> %mask to <2 x i64> 117 %a = icmp slt <2 x i64> %q, zeroinitializer 118 %b = bitcast <2 x i1> %a to i2 119 %c = zext i2 %b to i8 120 %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %c) 121 ret <2 x double> %res 122} 123 124define <4 x double> @movmsk4_fp(<4 x double> %x0, <4 x double> %x2, <4 x float> %mask) { 125; AVX512VL-LABEL: movmsk4_fp: 126; AVX512VL: ## %bb.0: 127; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 128; AVX512VL-NEXT: vpcmpgtd %xmm2, %xmm3, %k1 129; AVX512VL-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1} 130; AVX512VL-NEXT: vmovapd %ymm1, %ymm0 131; AVX512VL-NEXT: retq 132; 133; AVX512VLDQBW-LABEL: movmsk4_fp: 134; AVX512VLDQBW: ## %bb.0: 135; AVX512VLDQBW-NEXT: vpmovd2m %xmm2, %k1 136; AVX512VLDQBW-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1} 137; AVX512VLDQBW-NEXT: vmovapd %ymm1, %ymm0 138; AVX512VLDQBW-NEXT: retq 139 %q = bitcast <4 x float> %mask to <4 x i32> 140 %a = icmp slt <4 x i32> %q, zeroinitializer 141 %b = bitcast <4 x i1> %a to i4 142 %c = zext i4 %b to i8 143 %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %c) 144 ret <4 x double> %res 145} 146 147define <8 x double> @movmsk8_fp(<8 x double> %x0, <8 x double> %x2, <8 x float> %mask) { 148; AVX512VL-LABEL: movmsk8_fp: 149; AVX512VL: ## %bb.0: 150; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 151; AVX512VL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1 152; AVX512VL-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1} 153; AVX512VL-NEXT: vmovapd %zmm1, %zmm0 154; AVX512VL-NEXT: retq 155; 156; AVX512VLDQBW-LABEL: movmsk8_fp: 157; AVX512VLDQBW: ## %bb.0: 158; AVX512VLDQBW-NEXT: vpmovd2m %ymm2, %k1 159; AVX512VLDQBW-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1} 160; AVX512VLDQBW-NEXT: vmovapd %zmm1, %zmm0 161; AVX512VLDQBW-NEXT: retq 162 %q = bitcast <8 x float> %mask to <8 x i32> 163 %a = icmp slt <8 x i32> %q, zeroinitializer 164 %b = bitcast <8 x i1> %a to i8 165 %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %b, i32 4) 166 ret <8 x double> %res 167} 168