1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX1,AVX-32,AVX1-32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=CHECK,AVX1,AVX1-64 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 -O3 | FileCheck %s --check-prefixes=CHECK,AVX2,AVX-32,AVX2-32 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -O3 | FileCheck %s --check-prefixes=CHECK,AVX2,AVX2-64 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX-32,AVX512F-32 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,AVX512F,AVX512F-64 8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-32,AVX512VL-32 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64 10; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64 12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-32 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQVL,AVX512DQVL-64 14 15declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i1(<8 x i1>, metadata, metadata) 16declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1>, metadata, metadata) 17declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i8(<8 x i8>, metadata, metadata) 18declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i8(<8 x i8>, metadata, metadata) 19declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i16(<8 x i16>, metadata, metadata) 20declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i16(<8 x i16>, metadata, metadata) 21declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i32(<8 x i32>, metadata, metadata) 22declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i32(<8 x i32>, metadata, metadata) 23declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i1(<4 x i1>, metadata, metadata) 24declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i1(<4 x i1>, metadata, metadata) 25declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i8(<4 x i8>, metadata, metadata) 26declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i8(<4 x i8>, metadata, metadata) 27declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i16(<4 x i16>, metadata, metadata) 28declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i16(<4 x i16>, metadata, metadata) 29declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>, metadata, metadata) 30declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata) 31declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata) 32declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata) 33declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata) 34declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata) 35 36define <8 x float> @sitofp_v8i1_v8f32(<8 x i1> %x) #0 { 37; AVX1-LABEL: sitofp_v8i1_v8f32: 38; AVX1: # %bb.0: 39; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7] 40; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 41; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 42; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 43; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 44; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 45; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 46; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 47; AVX1-NEXT: ret{{[l|q]}} 48; 49; AVX2-LABEL: sitofp_v8i1_v8f32: 50; AVX2: # %bb.0: 51; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 52; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 53; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 54; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 55; AVX2-NEXT: ret{{[l|q]}} 56; 57; AVX512F-LABEL: sitofp_v8i1_v8f32: 58; AVX512F: # %bb.0: 59; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 60; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0 61; AVX512F-NEXT: vpsrad $31, %ymm0, %ymm0 62; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0 63; AVX512F-NEXT: ret{{[l|q]}} 64; 65; AVX512VL-LABEL: sitofp_v8i1_v8f32: 66; AVX512VL: # %bb.0: 67; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 68; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 69; AVX512VL-NEXT: vpsrad $31, %ymm0, %ymm0 70; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 71; AVX512VL-NEXT: ret{{[l|q]}} 72; 73; AVX512DQ-LABEL: sitofp_v8i1_v8f32: 74; AVX512DQ: # %bb.0: 75; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 76; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 77; AVX512DQ-NEXT: vpsrad $31, %ymm0, %ymm0 78; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0 79; AVX512DQ-NEXT: ret{{[l|q]}} 80; 81; AVX512DQVL-LABEL: sitofp_v8i1_v8f32: 82; AVX512DQVL: # %bb.0: 83; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 84; AVX512DQVL-NEXT: vpslld $31, %ymm0, %ymm0 85; AVX512DQVL-NEXT: vpsrad $31, %ymm0, %ymm0 86; AVX512DQVL-NEXT: vcvtdq2ps %ymm0, %ymm0 87; AVX512DQVL-NEXT: ret{{[l|q]}} 88 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i1(<8 x i1> %x, 89 metadata !"round.dynamic", 90 metadata !"fpexcept.strict") #0 91 ret <8 x float> %result 92} 93 94define <8 x float> @uitofp_v8i1_v8f32(<8 x i1> %x) #0 { 95; AVX1-32-LABEL: uitofp_v8i1_v8f32: 96; AVX1-32: # %bb.0: 97; AVX1-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 98; AVX1-32-NEXT: vpxor %xmm1, %xmm1, %xmm1 99; AVX1-32-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 100; AVX1-32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 101; AVX1-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 102; AVX1-32-NEXT: vcvtdq2ps %ymm0, %ymm0 103; AVX1-32-NEXT: retl 104; 105; AVX1-64-LABEL: uitofp_v8i1_v8f32: 106; AVX1-64: # %bb.0: 107; AVX1-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 108; AVX1-64-NEXT: vpxor %xmm1, %xmm1, %xmm1 109; AVX1-64-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 110; AVX1-64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 111; AVX1-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 112; AVX1-64-NEXT: vcvtdq2ps %ymm0, %ymm0 113; AVX1-64-NEXT: retq 114; 115; AVX2-32-LABEL: uitofp_v8i1_v8f32: 116; AVX2-32: # %bb.0: 117; AVX2-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 118; AVX2-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 119; AVX2-32-NEXT: vcvtdq2ps %ymm0, %ymm0 120; AVX2-32-NEXT: retl 121; 122; AVX2-64-LABEL: uitofp_v8i1_v8f32: 123; AVX2-64: # %bb.0: 124; AVX2-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 125; AVX2-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 126; AVX2-64-NEXT: vcvtdq2ps %ymm0, %ymm0 127; AVX2-64-NEXT: retq 128; 129; AVX512F-32-LABEL: uitofp_v8i1_v8f32: 130; AVX512F-32: # %bb.0: 131; AVX512F-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 132; AVX512F-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 133; AVX512F-32-NEXT: vcvtdq2ps %ymm0, %ymm0 134; AVX512F-32-NEXT: retl 135; 136; AVX512F-64-LABEL: uitofp_v8i1_v8f32: 137; AVX512F-64: # %bb.0: 138; AVX512F-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 139; AVX512F-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 140; AVX512F-64-NEXT: vcvtdq2ps %ymm0, %ymm0 141; AVX512F-64-NEXT: retq 142; 143; AVX512VL-32-LABEL: uitofp_v8i1_v8f32: 144; AVX512VL-32: # %bb.0: 145; AVX512VL-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 146; AVX512VL-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 147; AVX512VL-32-NEXT: vcvtdq2ps %ymm0, %ymm0 148; AVX512VL-32-NEXT: retl 149; 150; AVX512VL-64-LABEL: uitofp_v8i1_v8f32: 151; AVX512VL-64: # %bb.0: 152; AVX512VL-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 153; AVX512VL-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 154; AVX512VL-64-NEXT: vcvtdq2ps %ymm0, %ymm0 155; AVX512VL-64-NEXT: retq 156; 157; AVX512DQ-32-LABEL: uitofp_v8i1_v8f32: 158; AVX512DQ-32: # %bb.0: 159; AVX512DQ-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 160; AVX512DQ-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 161; AVX512DQ-32-NEXT: vcvtdq2ps %ymm0, %ymm0 162; AVX512DQ-32-NEXT: retl 163; 164; AVX512DQ-64-LABEL: uitofp_v8i1_v8f32: 165; AVX512DQ-64: # %bb.0: 166; AVX512DQ-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 167; AVX512DQ-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 168; AVX512DQ-64-NEXT: vcvtdq2ps %ymm0, %ymm0 169; AVX512DQ-64-NEXT: retq 170; 171; AVX512DQVL-32-LABEL: uitofp_v8i1_v8f32: 172; AVX512DQVL-32: # %bb.0: 173; AVX512DQVL-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 174; AVX512DQVL-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 175; AVX512DQVL-32-NEXT: vcvtdq2ps %ymm0, %ymm0 176; AVX512DQVL-32-NEXT: retl 177; 178; AVX512DQVL-64-LABEL: uitofp_v8i1_v8f32: 179; AVX512DQVL-64: # %bb.0: 180; AVX512DQVL-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 181; AVX512DQVL-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 182; AVX512DQVL-64-NEXT: vcvtdq2ps %ymm0, %ymm0 183; AVX512DQVL-64-NEXT: retq 184 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1> %x, 185 metadata !"round.dynamic", 186 metadata !"fpexcept.strict") #0 187 ret <8 x float> %result 188} 189 190define <8 x float> @sitofp_v8i8_v8f32(<8 x i8> %x) #0 { 191; AVX1-LABEL: sitofp_v8i8_v8f32: 192; AVX1: # %bb.0: 193; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 194; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 195; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 196; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 197; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 198; AVX1-NEXT: ret{{[l|q]}} 199; 200; AVX2-LABEL: sitofp_v8i8_v8f32: 201; AVX2: # %bb.0: 202; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 203; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 204; AVX2-NEXT: ret{{[l|q]}} 205; 206; AVX512F-LABEL: sitofp_v8i8_v8f32: 207; AVX512F: # %bb.0: 208; AVX512F-NEXT: vpmovsxbd %xmm0, %ymm0 209; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0 210; AVX512F-NEXT: ret{{[l|q]}} 211; 212; AVX512VL-LABEL: sitofp_v8i8_v8f32: 213; AVX512VL: # %bb.0: 214; AVX512VL-NEXT: vpmovsxbd %xmm0, %ymm0 215; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 216; AVX512VL-NEXT: ret{{[l|q]}} 217; 218; AVX512DQ-LABEL: sitofp_v8i8_v8f32: 219; AVX512DQ: # %bb.0: 220; AVX512DQ-NEXT: vpmovsxbd %xmm0, %ymm0 221; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0 222; AVX512DQ-NEXT: ret{{[l|q]}} 223; 224; AVX512DQVL-LABEL: sitofp_v8i8_v8f32: 225; AVX512DQVL: # %bb.0: 226; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %ymm0 227; AVX512DQVL-NEXT: vcvtdq2ps %ymm0, %ymm0 228; AVX512DQVL-NEXT: ret{{[l|q]}} 229 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i8(<8 x i8> %x, 230 metadata !"round.dynamic", 231 metadata !"fpexcept.strict") #0 232 ret <8 x float> %result 233} 234 235define <8 x float> @uitofp_v8i8_v8f32(<8 x i8> %x) #0 { 236; AVX1-LABEL: uitofp_v8i8_v8f32: 237; AVX1: # %bb.0: 238; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 239; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 240; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 241; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 242; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 243; AVX1-NEXT: ret{{[l|q]}} 244; 245; AVX2-LABEL: uitofp_v8i8_v8f32: 246; AVX2: # %bb.0: 247; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 248; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 249; AVX2-NEXT: ret{{[l|q]}} 250; 251; AVX512F-LABEL: uitofp_v8i8_v8f32: 252; AVX512F: # %bb.0: 253; AVX512F-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 254; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0 255; AVX512F-NEXT: ret{{[l|q]}} 256; 257; AVX512VL-LABEL: uitofp_v8i8_v8f32: 258; AVX512VL: # %bb.0: 259; AVX512VL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 260; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 261; AVX512VL-NEXT: ret{{[l|q]}} 262; 263; AVX512DQ-LABEL: uitofp_v8i8_v8f32: 264; AVX512DQ: # %bb.0: 265; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 266; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0 267; AVX512DQ-NEXT: ret{{[l|q]}} 268; 269; AVX512DQVL-LABEL: uitofp_v8i8_v8f32: 270; AVX512DQVL: # %bb.0: 271; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 272; AVX512DQVL-NEXT: vcvtdq2ps %ymm0, %ymm0 273; AVX512DQVL-NEXT: ret{{[l|q]}} 274 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i8(<8 x i8> %x, 275 metadata !"round.dynamic", 276 metadata !"fpexcept.strict") #0 277 ret <8 x float> %result 278} 279 280define <8 x float> @sitofp_v8i16_v8f32(<8 x i16> %x) #0 { 281; AVX1-LABEL: sitofp_v8i16_v8f32: 282; AVX1: # %bb.0: 283; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 284; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 285; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 286; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 287; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 288; AVX1-NEXT: ret{{[l|q]}} 289; 290; AVX2-LABEL: sitofp_v8i16_v8f32: 291; AVX2: # %bb.0: 292; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 293; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 294; AVX2-NEXT: ret{{[l|q]}} 295; 296; AVX512F-LABEL: sitofp_v8i16_v8f32: 297; AVX512F: # %bb.0: 298; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0 299; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0 300; AVX512F-NEXT: ret{{[l|q]}} 301; 302; AVX512VL-LABEL: sitofp_v8i16_v8f32: 303; AVX512VL: # %bb.0: 304; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 305; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 306; AVX512VL-NEXT: ret{{[l|q]}} 307; 308; AVX512DQ-LABEL: sitofp_v8i16_v8f32: 309; AVX512DQ: # %bb.0: 310; AVX512DQ-NEXT: vpmovsxwd %xmm0, %ymm0 311; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0 312; AVX512DQ-NEXT: ret{{[l|q]}} 313; 314; AVX512DQVL-LABEL: sitofp_v8i16_v8f32: 315; AVX512DQVL: # %bb.0: 316; AVX512DQVL-NEXT: vpmovsxwd %xmm0, %ymm0 317; AVX512DQVL-NEXT: vcvtdq2ps %ymm0, %ymm0 318; AVX512DQVL-NEXT: ret{{[l|q]}} 319 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i16(<8 x i16> %x, 320 metadata !"round.dynamic", 321 metadata !"fpexcept.strict") #0 322 ret <8 x float> %result 323} 324 325define <8 x float> @uitofp_v8i16_v8f32(<8 x i16> %x) #0 { 326; AVX1-LABEL: uitofp_v8i16_v8f32: 327; AVX1: # %bb.0: 328; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 329; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 330; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 331; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 332; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 333; AVX1-NEXT: ret{{[l|q]}} 334; 335; AVX2-LABEL: uitofp_v8i16_v8f32: 336; AVX2: # %bb.0: 337; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 338; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 339; AVX2-NEXT: ret{{[l|q]}} 340; 341; AVX512F-LABEL: uitofp_v8i16_v8f32: 342; AVX512F: # %bb.0: 343; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 344; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0 345; AVX512F-NEXT: ret{{[l|q]}} 346; 347; AVX512VL-LABEL: uitofp_v8i16_v8f32: 348; AVX512VL: # %bb.0: 349; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 350; AVX512VL-NEXT: vcvtdq2ps %ymm0, %ymm0 351; AVX512VL-NEXT: ret{{[l|q]}} 352; 353; AVX512DQ-LABEL: uitofp_v8i16_v8f32: 354; AVX512DQ: # %bb.0: 355; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 356; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0 357; AVX512DQ-NEXT: ret{{[l|q]}} 358; 359; AVX512DQVL-LABEL: uitofp_v8i16_v8f32: 360; AVX512DQVL: # %bb.0: 361; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 362; AVX512DQVL-NEXT: vcvtdq2ps %ymm0, %ymm0 363; AVX512DQVL-NEXT: ret{{[l|q]}} 364 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i16(<8 x i16> %x, 365 metadata !"round.dynamic", 366 metadata !"fpexcept.strict") #0 367 ret <8 x float> %result 368} 369 370define <8 x float> @sitofp_v8i32_v8f32(<8 x i32> %x) #0 { 371; CHECK-LABEL: sitofp_v8i32_v8f32: 372; CHECK: # %bb.0: 373; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 374; CHECK-NEXT: ret{{[l|q]}} 375 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i32(<8 x i32> %x, 376 metadata !"round.dynamic", 377 metadata !"fpexcept.strict") #0 378 ret <8 x float> %result 379} 380 381define <8 x float> @uitofp_v8i32_v8f32(<8 x i32> %x) #0 { 382; AVX1-32-LABEL: uitofp_v8i32_v8f32: 383; AVX1-32: # %bb.0: 384; AVX1-32-NEXT: vpsrld $16, %xmm0, %xmm1 385; AVX1-32-NEXT: vextractf128 $1, %ymm0, %xmm2 386; AVX1-32-NEXT: vpsrld $16, %xmm2, %xmm2 387; AVX1-32-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 388; AVX1-32-NEXT: vcvtdq2ps %ymm1, %ymm1 389; AVX1-32-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 390; AVX1-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 391; AVX1-32-NEXT: vcvtdq2ps %ymm0, %ymm0 392; AVX1-32-NEXT: vaddps %ymm0, %ymm1, %ymm0 393; AVX1-32-NEXT: retl 394; 395; AVX1-64-LABEL: uitofp_v8i32_v8f32: 396; AVX1-64: # %bb.0: 397; AVX1-64-NEXT: vpsrld $16, %xmm0, %xmm1 398; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm2 399; AVX1-64-NEXT: vpsrld $16, %xmm2, %xmm2 400; AVX1-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 401; AVX1-64-NEXT: vcvtdq2ps %ymm1, %ymm1 402; AVX1-64-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 403; AVX1-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 404; AVX1-64-NEXT: vcvtdq2ps %ymm0, %ymm0 405; AVX1-64-NEXT: vaddps %ymm0, %ymm1, %ymm0 406; AVX1-64-NEXT: retq 407; 408; AVX2-LABEL: uitofp_v8i32_v8f32: 409; AVX2: # %bb.0: 410; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] 411; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 412; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 413; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] 414; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] 415; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 416; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0 417; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 418; AVX2-NEXT: ret{{[l|q]}} 419; 420; AVX512F-LABEL: uitofp_v8i32_v8f32: 421; AVX512F: # %bb.0: 422; AVX512F-NEXT: vmovaps %ymm0, %ymm0 423; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 424; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 425; AVX512F-NEXT: ret{{[l|q]}} 426; 427; AVX512VL-LABEL: uitofp_v8i32_v8f32: 428; AVX512VL: # %bb.0: 429; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0 430; AVX512VL-NEXT: ret{{[l|q]}} 431; 432; AVX512DQ-LABEL: uitofp_v8i32_v8f32: 433; AVX512DQ: # %bb.0: 434; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 435; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 436; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 437; AVX512DQ-NEXT: ret{{[l|q]}} 438; 439; AVX512DQVL-LABEL: uitofp_v8i32_v8f32: 440; AVX512DQVL: # %bb.0: 441; AVX512DQVL-NEXT: vcvtudq2ps %ymm0, %ymm0 442; AVX512DQVL-NEXT: ret{{[l|q]}} 443 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i32(<8 x i32> %x, 444 metadata !"round.dynamic", 445 metadata !"fpexcept.strict") #0 446 ret <8 x float> %result 447} 448 449define <4 x double> @sitofp_v4i1_v4f64(<4 x i1> %x) #0 { 450; CHECK-LABEL: sitofp_v4i1_v4f64: 451; CHECK: # %bb.0: 452; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 453; CHECK-NEXT: vpsrad $31, %xmm0, %xmm0 454; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 455; CHECK-NEXT: ret{{[l|q]}} 456 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i1(<4 x i1> %x, 457 metadata !"round.dynamic", 458 metadata !"fpexcept.strict") #0 459 ret <4 x double> %result 460} 461 462define <4 x double> @uitofp_v4i1_v4f64(<4 x i1> %x) #0 { 463; AVX1-32-LABEL: uitofp_v4i1_v4f64: 464; AVX1-32: # %bb.0: 465; AVX1-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 466; AVX1-32-NEXT: vcvtdq2pd %xmm0, %ymm0 467; AVX1-32-NEXT: retl 468; 469; AVX1-64-LABEL: uitofp_v4i1_v4f64: 470; AVX1-64: # %bb.0: 471; AVX1-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 472; AVX1-64-NEXT: vcvtdq2pd %xmm0, %ymm0 473; AVX1-64-NEXT: retq 474; 475; AVX2-LABEL: uitofp_v4i1_v4f64: 476; AVX2: # %bb.0: 477; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] 478; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 479; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 480; AVX2-NEXT: ret{{[l|q]}} 481; 482; AVX512F-LABEL: uitofp_v4i1_v4f64: 483; AVX512F: # %bb.0: 484; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] 485; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0 486; AVX512F-NEXT: vcvtdq2pd %xmm0, %ymm0 487; AVX512F-NEXT: ret{{[l|q]}} 488; 489; AVX512VL-32-LABEL: uitofp_v4i1_v4f64: 490; AVX512VL-32: # %bb.0: 491; AVX512VL-32-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 492; AVX512VL-32-NEXT: vcvtdq2pd %xmm0, %ymm0 493; AVX512VL-32-NEXT: retl 494; 495; AVX512VL-64-LABEL: uitofp_v4i1_v4f64: 496; AVX512VL-64: # %bb.0: 497; AVX512VL-64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 498; AVX512VL-64-NEXT: vcvtdq2pd %xmm0, %ymm0 499; AVX512VL-64-NEXT: retq 500; 501; AVX512DQ-LABEL: uitofp_v4i1_v4f64: 502; AVX512DQ: # %bb.0: 503; AVX512DQ-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] 504; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 505; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0 506; AVX512DQ-NEXT: ret{{[l|q]}} 507; 508; AVX512DQVL-32-LABEL: uitofp_v4i1_v4f64: 509; AVX512DQVL-32: # %bb.0: 510; AVX512DQVL-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 511; AVX512DQVL-32-NEXT: vcvtdq2pd %xmm0, %ymm0 512; AVX512DQVL-32-NEXT: retl 513; 514; AVX512DQVL-64-LABEL: uitofp_v4i1_v4f64: 515; AVX512DQVL-64: # %bb.0: 516; AVX512DQVL-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 517; AVX512DQVL-64-NEXT: vcvtdq2pd %xmm0, %ymm0 518; AVX512DQVL-64-NEXT: retq 519 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i1(<4 x i1> %x, 520 metadata !"round.dynamic", 521 metadata !"fpexcept.strict") #0 522 ret <4 x double> %result 523} 524 525define <4 x double> @sitofp_v4i8_v4f64(<4 x i8> %x) #0 { 526; CHECK-LABEL: sitofp_v4i8_v4f64: 527; CHECK: # %bb.0: 528; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0 529; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 530; CHECK-NEXT: ret{{[l|q]}} 531 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i8(<4 x i8> %x, 532 metadata !"round.dynamic", 533 metadata !"fpexcept.strict") #0 534 ret <4 x double> %result 535} 536 537define <4 x double> @uitofp_v4i8_v4f64(<4 x i8> %x) #0 { 538; CHECK-LABEL: uitofp_v4i8_v4f64: 539; CHECK: # %bb.0: 540; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 541; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 542; CHECK-NEXT: ret{{[l|q]}} 543 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i8(<4 x i8> %x, 544 metadata !"round.dynamic", 545 metadata !"fpexcept.strict") #0 546 ret <4 x double> %result 547} 548 549define <4 x double> @sitofp_v4i16_v4f64(<4 x i16> %x) #0 { 550; CHECK-LABEL: sitofp_v4i16_v4f64: 551; CHECK: # %bb.0: 552; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 553; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 554; CHECK-NEXT: ret{{[l|q]}} 555 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i16(<4 x i16> %x, 556 metadata !"round.dynamic", 557 metadata !"fpexcept.strict") #0 558 ret <4 x double> %result 559} 560 561define <4 x double> @uitofp_v4i16_v4f64(<4 x i16> %x) #0 { 562; CHECK-LABEL: uitofp_v4i16_v4f64: 563; CHECK: # %bb.0: 564; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 565; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 566; CHECK-NEXT: ret{{[l|q]}} 567 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i16(<4 x i16> %x, 568 metadata !"round.dynamic", 569 metadata !"fpexcept.strict") #0 570 ret <4 x double> %result 571} 572 573define <4 x double> @sitofp_v4i32_v4f64(<4 x i32> %x) #0 { 574; CHECK-LABEL: sitofp_v4i32_v4f64: 575; CHECK: # %bb.0: 576; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 577; CHECK-NEXT: ret{{[l|q]}} 578 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %x, 579 metadata !"round.dynamic", 580 metadata !"fpexcept.strict") #0 581 ret <4 x double> %result 582} 583 584define <4 x double> @uitofp_v4i32_v4f64(<4 x i32> %x) #0 { 585; AVX1-LABEL: uitofp_v4i32_v4f64: 586; AVX1: # %bb.0: 587; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 588; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 589; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 590; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 591; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 592; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 593; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 594; AVX1-NEXT: ret{{[l|q]}} 595; 596; AVX2-LABEL: uitofp_v4i32_v4f64: 597; AVX2: # %bb.0: 598; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 599; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 600; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 601; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 602; AVX2-NEXT: ret{{[l|q]}} 603; 604; AVX512F-LABEL: uitofp_v4i32_v4f64: 605; AVX512F: # %bb.0: 606; AVX512F-NEXT: vmovaps %xmm0, %xmm0 607; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 608; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 609; AVX512F-NEXT: ret{{[l|q]}} 610; 611; AVX512VL-LABEL: uitofp_v4i32_v4f64: 612; AVX512VL: # %bb.0: 613; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 614; AVX512VL-NEXT: ret{{[l|q]}} 615; 616; AVX512DQ-LABEL: uitofp_v4i32_v4f64: 617; AVX512DQ: # %bb.0: 618; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 619; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 620; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 621; AVX512DQ-NEXT: ret{{[l|q]}} 622; 623; AVX512DQVL-LABEL: uitofp_v4i32_v4f64: 624; AVX512DQVL: # %bb.0: 625; AVX512DQVL-NEXT: vcvtudq2pd %xmm0, %ymm0 626; AVX512DQVL-NEXT: ret{{[l|q]}} 627 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x, 628 metadata !"round.dynamic", 629 metadata !"fpexcept.strict") #0 630 ret <4 x double> %result 631} 632 633define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 { 634; AVX-32-LABEL: sitofp_v4i64_v4f64: 635; AVX-32: # %bb.0: 636; AVX-32-NEXT: pushl %ebp 637; AVX-32-NEXT: .cfi_def_cfa_offset 8 638; AVX-32-NEXT: .cfi_offset %ebp, -8 639; AVX-32-NEXT: movl %esp, %ebp 640; AVX-32-NEXT: .cfi_def_cfa_register %ebp 641; AVX-32-NEXT: andl $-8, %esp 642; AVX-32-NEXT: subl $64, %esp 643; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 644; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 645; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 646; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0 647; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 648; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3] 649; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 650; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 651; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) 652; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 653; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) 654; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 655; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) 656; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 657; AVX-32-NEXT: fstpl (%esp) 658; AVX-32-NEXT: wait 659; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 660; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 661; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 662; AVX-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 663; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 664; AVX-32-NEXT: movl %ebp, %esp 665; AVX-32-NEXT: popl %ebp 666; AVX-32-NEXT: .cfi_def_cfa %esp, 4 667; AVX-32-NEXT: retl 668; 669; AVX1-64-LABEL: sitofp_v4i64_v4f64: 670; AVX1-64: # %bb.0: 671; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm1 672; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax 673; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 674; AVX1-64-NEXT: vmovq %xmm1, %rax 675; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 676; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 677; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax 678; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 679; AVX1-64-NEXT: vmovq %xmm0, %rax 680; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 681; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 682; AVX1-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 683; AVX1-64-NEXT: retq 684; 685; AVX2-64-LABEL: sitofp_v4i64_v4f64: 686; AVX2-64: # %bb.0: 687; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm1 688; AVX2-64-NEXT: vpextrq $1, %xmm1, %rax 689; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 690; AVX2-64-NEXT: vmovq %xmm1, %rax 691; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 692; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 693; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax 694; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 695; AVX2-64-NEXT: vmovq %xmm0, %rax 696; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 697; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 698; AVX2-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 699; AVX2-64-NEXT: retq 700; 701; AVX512F-64-LABEL: sitofp_v4i64_v4f64: 702; AVX512F-64: # %bb.0: 703; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm1 704; AVX512F-64-NEXT: vpextrq $1, %xmm1, %rax 705; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 706; AVX512F-64-NEXT: vmovq %xmm1, %rax 707; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 708; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 709; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax 710; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 711; AVX512F-64-NEXT: vmovq %xmm0, %rax 712; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 713; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 714; AVX512F-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 715; AVX512F-64-NEXT: retq 716; 717; AVX512VL-64-LABEL: sitofp_v4i64_v4f64: 718; AVX512VL-64: # %bb.0: 719; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm1 720; AVX512VL-64-NEXT: vpextrq $1, %xmm1, %rax 721; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 722; AVX512VL-64-NEXT: vmovq %xmm1, %rax 723; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 724; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 725; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax 726; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 727; AVX512VL-64-NEXT: vmovq %xmm0, %rax 728; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 729; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 730; AVX512VL-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 731; AVX512VL-64-NEXT: retq 732; 733; AVX512DQ-LABEL: sitofp_v4i64_v4f64: 734; AVX512DQ: # %bb.0: 735; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 736; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 737; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 738; AVX512DQ-NEXT: ret{{[l|q]}} 739; 740; AVX512DQVL-LABEL: sitofp_v4i64_v4f64: 741; AVX512DQVL: # %bb.0: 742; AVX512DQVL-NEXT: vcvtqq2pd %ymm0, %ymm0 743; AVX512DQVL-NEXT: ret{{[l|q]}} 744 %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x, 745 metadata !"round.dynamic", 746 metadata !"fpexcept.strict") #0 747 ret <4 x double> %result 748} 749 750define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 { 751; AVX-32-LABEL: uitofp_v4i64_v4f64: 752; AVX-32: # %bb.0: 753; AVX-32-NEXT: pushl %ebp 754; AVX-32-NEXT: .cfi_def_cfa_offset 8 755; AVX-32-NEXT: .cfi_offset %ebp, -8 756; AVX-32-NEXT: movl %esp, %ebp 757; AVX-32-NEXT: .cfi_def_cfa_register %ebp 758; AVX-32-NEXT: andl $-8, %esp 759; AVX-32-NEXT: subl $64, %esp 760; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 761; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 762; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 763; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm1 764; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 765; AVX-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[2,3,2,3] 766; AVX-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp) 767; AVX-32-NEXT: vextractps $1, %xmm0, %eax 768; AVX-32-NEXT: shrl $31, %eax 769; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 770; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 771; AVX-32-NEXT: fstpl (%esp) 772; AVX-32-NEXT: wait 773; AVX-32-NEXT: vextractps $3, %xmm0, %eax 774; AVX-32-NEXT: shrl $31, %eax 775; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 776; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 777; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) 778; AVX-32-NEXT: wait 779; AVX-32-NEXT: vextractps $1, %xmm1, %eax 780; AVX-32-NEXT: shrl $31, %eax 781; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 782; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 783; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) 784; AVX-32-NEXT: wait 785; AVX-32-NEXT: vextractps $3, %xmm1, %eax 786; AVX-32-NEXT: shrl $31, %eax 787; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 788; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 789; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) 790; AVX-32-NEXT: wait 791; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 792; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 793; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 794; AVX-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 795; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 796; AVX-32-NEXT: movl %ebp, %esp 797; AVX-32-NEXT: popl %ebp 798; AVX-32-NEXT: .cfi_def_cfa %esp, 4 799; AVX-32-NEXT: retl 800; 801; AVX1-64-LABEL: uitofp_v4i64_v4f64: 802; AVX1-64: # %bb.0: 803; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm1 804; AVX1-64-NEXT: vpextrd $2, %xmm1, %eax 805; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 806; AVX1-64-NEXT: vmovd %xmm1, %eax 807; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 808; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0] 809; AVX1-64-NEXT: vextractps $2, %xmm0, %eax 810; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 811; AVX1-64-NEXT: vmovq %xmm0, %rax 812; AVX1-64-NEXT: movl %eax, %eax 813; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4 814; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0] 815; AVX1-64-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 816; AVX1-64-NEXT: vpextrd $3, %xmm1, %eax 817; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 818; AVX1-64-NEXT: vpextrd $1, %xmm1, %eax 819; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1 820; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] 821; AVX1-64-NEXT: vpextrd $3, %xmm0, %eax 822; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 823; AVX1-64-NEXT: vpextrd $1, %xmm0, %eax 824; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0 825; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 826; AVX1-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 827; AVX1-64-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 828; AVX1-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 829; AVX1-64-NEXT: retq 830; 831; AVX2-64-LABEL: uitofp_v4i64_v4f64: 832; AVX2-64: # %bb.0: 833; AVX2-64-NEXT: vextractf128 $1, %ymm0, %xmm1 834; AVX2-64-NEXT: vextractps $3, %xmm1, %eax 835; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 836; AVX2-64-NEXT: vextractps $1, %xmm1, %eax 837; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 838; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0] 839; AVX2-64-NEXT: vextractps $3, %xmm0, %eax 840; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 841; AVX2-64-NEXT: vextractps $1, %xmm0, %eax 842; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4 843; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0] 844; AVX2-64-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 845; AVX2-64-NEXT: vbroadcastsd {{.*#+}} ymm3 = [4.294967296E+9,4.294967296E+9,4.294967296E+9,4.294967296E+9] 846; AVX2-64-NEXT: vmulpd %ymm3, %ymm2, %ymm2 847; AVX2-64-NEXT: vextractps $2, %xmm1, %eax 848; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 849; AVX2-64-NEXT: vmovd %xmm1, %eax 850; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1 851; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] 852; AVX2-64-NEXT: vextractps $2, %xmm0, %eax 853; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 854; AVX2-64-NEXT: vmovq %xmm0, %rax 855; AVX2-64-NEXT: movl %eax, %eax 856; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0 857; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 858; AVX2-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 859; AVX2-64-NEXT: vaddpd %ymm0, %ymm2, %ymm0 860; AVX2-64-NEXT: retq 861; 862; AVX512F-64-LABEL: uitofp_v4i64_v4f64: 863; AVX512F-64: # %bb.0: 864; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm1 865; AVX512F-64-NEXT: vpextrq $1, %xmm1, %rax 866; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2 867; AVX512F-64-NEXT: vmovq %xmm1, %rax 868; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1 869; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 870; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax 871; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm2 872; AVX512F-64-NEXT: vmovq %xmm0, %rax 873; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0 874; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 875; AVX512F-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 876; AVX512F-64-NEXT: retq 877; 878; AVX512VL-64-LABEL: uitofp_v4i64_v4f64: 879; AVX512VL-64: # %bb.0: 880; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm1 881; AVX512VL-64-NEXT: vpextrq $1, %xmm1, %rax 882; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2 883; AVX512VL-64-NEXT: vmovq %xmm1, %rax 884; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1 885; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 886; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax 887; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm2 888; AVX512VL-64-NEXT: vmovq %xmm0, %rax 889; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0 890; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 891; AVX512VL-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 892; AVX512VL-64-NEXT: retq 893; 894; AVX512DQ-LABEL: uitofp_v4i64_v4f64: 895; AVX512DQ: # %bb.0: 896; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 897; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 898; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 899; AVX512DQ-NEXT: ret{{[l|q]}} 900; 901; AVX512DQVL-LABEL: uitofp_v4i64_v4f64: 902; AVX512DQVL: # %bb.0: 903; AVX512DQVL-NEXT: vcvtuqq2pd %ymm0, %ymm0 904; AVX512DQVL-NEXT: ret{{[l|q]}} 905 %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x, 906 metadata !"round.dynamic", 907 metadata !"fpexcept.strict") #0 908 ret <4 x double> %result 909} 910 911define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 { 912; AVX-32-LABEL: sitofp_v4i64_v4f32: 913; AVX-32: # %bb.0: 914; AVX-32-NEXT: pushl %ebp 915; AVX-32-NEXT: .cfi_def_cfa_offset 8 916; AVX-32-NEXT: .cfi_offset %ebp, -8 917; AVX-32-NEXT: movl %esp, %ebp 918; AVX-32-NEXT: .cfi_def_cfa_register %ebp 919; AVX-32-NEXT: andl $-8, %esp 920; AVX-32-NEXT: subl $48, %esp 921; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 922; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 923; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 924; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0 925; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 926; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3] 927; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 928; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 929; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) 930; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 931; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) 932; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 933; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) 934; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 935; AVX-32-NEXT: fstps (%esp) 936; AVX-32-NEXT: wait 937; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 938; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 939; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 940; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 941; AVX-32-NEXT: movl %ebp, %esp 942; AVX-32-NEXT: popl %ebp 943; AVX-32-NEXT: .cfi_def_cfa %esp, 4 944; AVX-32-NEXT: vzeroupper 945; AVX-32-NEXT: retl 946; 947; AVX1-64-LABEL: sitofp_v4i64_v4f32: 948; AVX1-64: # %bb.0: 949; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax 950; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 951; AVX1-64-NEXT: vmovq %xmm0, %rax 952; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 953; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 954; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm0 955; AVX1-64-NEXT: vmovq %xmm0, %rax 956; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 957; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 958; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax 959; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 960; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 961; AVX1-64-NEXT: vzeroupper 962; AVX1-64-NEXT: retq 963; 964; AVX2-64-LABEL: sitofp_v4i64_v4f32: 965; AVX2-64: # %bb.0: 966; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax 967; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 968; AVX2-64-NEXT: vmovq %xmm0, %rax 969; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 970; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 971; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm0 972; AVX2-64-NEXT: vmovq %xmm0, %rax 973; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 974; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 975; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax 976; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 977; AVX2-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 978; AVX2-64-NEXT: vzeroupper 979; AVX2-64-NEXT: retq 980; 981; AVX512F-64-LABEL: sitofp_v4i64_v4f32: 982; AVX512F-64: # %bb.0: 983; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax 984; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 985; AVX512F-64-NEXT: vmovq %xmm0, %rax 986; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 987; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 988; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm0 989; AVX512F-64-NEXT: vmovq %xmm0, %rax 990; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 991; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 992; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax 993; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 994; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 995; AVX512F-64-NEXT: vzeroupper 996; AVX512F-64-NEXT: retq 997; 998; AVX512VL-64-LABEL: sitofp_v4i64_v4f32: 999; AVX512VL-64: # %bb.0: 1000; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax 1001; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1002; AVX512VL-64-NEXT: vmovq %xmm0, %rax 1003; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 1004; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1005; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm0 1006; AVX512VL-64-NEXT: vmovq %xmm0, %rax 1007; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1008; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1009; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax 1010; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 1011; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1012; AVX512VL-64-NEXT: vzeroupper 1013; AVX512VL-64-NEXT: retq 1014; 1015; AVX512DQ-LABEL: sitofp_v4i64_v4f32: 1016; AVX512DQ: # %bb.0: 1017; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 1018; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 1019; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1020; AVX512DQ-NEXT: vzeroupper 1021; AVX512DQ-NEXT: ret{{[l|q]}} 1022; 1023; AVX512DQVL-LABEL: sitofp_v4i64_v4f32: 1024; AVX512DQVL: # %bb.0: 1025; AVX512DQVL-NEXT: vcvtqq2ps %ymm0, %xmm0 1026; AVX512DQVL-NEXT: vzeroupper 1027; AVX512DQVL-NEXT: ret{{[l|q]}} 1028 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x, 1029 metadata !"round.dynamic", 1030 metadata !"fpexcept.strict") #0 1031 ret <4 x float> %result 1032} 1033 1034define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 { 1035; AVX-32-LABEL: uitofp_v4i64_v4f32: 1036; AVX-32: # %bb.0: 1037; AVX-32-NEXT: pushl %ebp 1038; AVX-32-NEXT: .cfi_def_cfa_offset 8 1039; AVX-32-NEXT: .cfi_offset %ebp, -8 1040; AVX-32-NEXT: movl %esp, %ebp 1041; AVX-32-NEXT: .cfi_def_cfa_register %ebp 1042; AVX-32-NEXT: andl $-8, %esp 1043; AVX-32-NEXT: subl $48, %esp 1044; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 1045; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1046; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 1047; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm1 1048; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 1049; AVX-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[2,3,2,3] 1050; AVX-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp) 1051; AVX-32-NEXT: vextractps $1, %xmm0, %eax 1052; AVX-32-NEXT: shrl $31, %eax 1053; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 1054; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 1055; AVX-32-NEXT: fstps (%esp) 1056; AVX-32-NEXT: wait 1057; AVX-32-NEXT: vextractps $3, %xmm0, %eax 1058; AVX-32-NEXT: shrl $31, %eax 1059; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 1060; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 1061; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) 1062; AVX-32-NEXT: wait 1063; AVX-32-NEXT: vextractps $1, %xmm1, %eax 1064; AVX-32-NEXT: shrl $31, %eax 1065; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 1066; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 1067; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) 1068; AVX-32-NEXT: wait 1069; AVX-32-NEXT: vextractps $3, %xmm1, %eax 1070; AVX-32-NEXT: shrl $31, %eax 1071; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 1072; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 1073; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) 1074; AVX-32-NEXT: wait 1075; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1076; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 1077; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 1078; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 1079; AVX-32-NEXT: movl %ebp, %esp 1080; AVX-32-NEXT: popl %ebp 1081; AVX-32-NEXT: .cfi_def_cfa %esp, 4 1082; AVX-32-NEXT: vzeroupper 1083; AVX-32-NEXT: retl 1084; 1085; AVX1-64-LABEL: uitofp_v4i64_v4f32: 1086; AVX1-64: # %bb.0: 1087; AVX1-64-NEXT: vpsrlq $1, %xmm0, %xmm1 1088; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm2 1089; AVX1-64-NEXT: vpsrlq $1, %xmm2, %xmm3 1090; AVX1-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1091; AVX1-64-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 1092; AVX1-64-NEXT: vorpd %ymm3, %ymm1, %ymm1 1093; AVX1-64-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 1094; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax 1095; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 1096; AVX1-64-NEXT: vmovq %xmm1, %rax 1097; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4 1098; AVX1-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 1099; AVX1-64-NEXT: vextractf128 $1, %ymm1, %xmm1 1100; AVX1-64-NEXT: vmovq %xmm1, %rax 1101; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 1102; AVX1-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 1103; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax 1104; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 1105; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] 1106; AVX1-64-NEXT: vaddps %xmm1, %xmm1, %xmm3 1107; AVX1-64-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 1108; AVX1-64-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0 1109; AVX1-64-NEXT: vzeroupper 1110; AVX1-64-NEXT: retq 1111; 1112; AVX2-64-LABEL: uitofp_v4i64_v4f32: 1113; AVX2-64: # %bb.0: 1114; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 1115; AVX2-64-NEXT: vpand %ymm1, %ymm0, %ymm1 1116; AVX2-64-NEXT: vpsrlq $1, %ymm0, %ymm2 1117; AVX2-64-NEXT: vpor %ymm1, %ymm2, %ymm1 1118; AVX2-64-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 1119; AVX2-64-NEXT: vpextrq $1, %xmm1, %rax 1120; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1121; AVX2-64-NEXT: vmovq %xmm1, %rax 1122; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3 1123; AVX2-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 1124; AVX2-64-NEXT: vextracti128 $1, %ymm1, %xmm1 1125; AVX2-64-NEXT: vmovq %xmm1, %rax 1126; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 1127; AVX2-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 1128; AVX2-64-NEXT: vpextrq $1, %xmm1, %rax 1129; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1 1130; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0] 1131; AVX2-64-NEXT: vaddps %xmm1, %xmm1, %xmm2 1132; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm3 1133; AVX2-64-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 1134; AVX2-64-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 1135; AVX2-64-NEXT: vzeroupper 1136; AVX2-64-NEXT: retq 1137; 1138; AVX512F-64-LABEL: uitofp_v4i64_v4f32: 1139; AVX512F-64: # %bb.0: 1140; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax 1141; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 1142; AVX512F-64-NEXT: vmovq %xmm0, %rax 1143; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 1144; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1145; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm0 1146; AVX512F-64-NEXT: vmovq %xmm0, %rax 1147; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2 1148; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1149; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax 1150; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 1151; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1152; AVX512F-64-NEXT: vzeroupper 1153; AVX512F-64-NEXT: retq 1154; 1155; AVX512VL-64-LABEL: uitofp_v4i64_v4f32: 1156; AVX512VL-64: # %bb.0: 1157; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax 1158; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 1159; AVX512VL-64-NEXT: vmovq %xmm0, %rax 1160; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 1161; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1162; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm0 1163; AVX512VL-64-NEXT: vmovq %xmm0, %rax 1164; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2 1165; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1166; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax 1167; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 1168; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1169; AVX512VL-64-NEXT: vzeroupper 1170; AVX512VL-64-NEXT: retq 1171; 1172; AVX512DQ-LABEL: uitofp_v4i64_v4f32: 1173; AVX512DQ: # %bb.0: 1174; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 1175; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 1176; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1177; AVX512DQ-NEXT: vzeroupper 1178; AVX512DQ-NEXT: ret{{[l|q]}} 1179; 1180; AVX512DQVL-LABEL: uitofp_v4i64_v4f32: 1181; AVX512DQVL: # %bb.0: 1182; AVX512DQVL-NEXT: vcvtuqq2ps %ymm0, %xmm0 1183; AVX512DQVL-NEXT: vzeroupper 1184; AVX512DQVL-NEXT: ret{{[l|q]}} 1185 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x, 1186 metadata !"round.dynamic", 1187 metadata !"fpexcept.strict") #0 1188 ret <4 x float> %result 1189} 1190 1191attributes #0 = { strictfp } 1192