1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=ALL,NOVL,NODQ,NOVLDQ,KNL 3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=ALL,VL,VLDQ,VLBW 4; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefixes=ALL,NODQ,VL,VLNODQ,VLNOBW 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefixes=ALL,NOVL,DQNOVL 6; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefixes=ALL,NOVL,NODQ,NOVLDQ,AVX512BW 7; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefixes=ALL,VL,VLDQ,VLNOBW 8; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefixes=ALL,NODQ,VL,VLNODQ,VLBW 9 10 11define <16 x float> @sitof32(<16 x i32> %a) nounwind { 12; ALL-LABEL: sitof32: 13; ALL: # %bb.0: 14; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 15; ALL-NEXT: retq 16 %b = sitofp <16 x i32> %a to <16 x float> 17 ret <16 x float> %b 18} 19 20define <8 x double> @sltof864(<8 x i64> %a) { 21; NODQ-LABEL: sltof864: 22; NODQ: # %bb.0: 23; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1 24; NODQ-NEXT: vpextrq $1, %xmm1, %rax 25; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 26; NODQ-NEXT: vmovq %xmm1, %rax 27; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 28; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 29; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 30; NODQ-NEXT: vpextrq $1, %xmm2, %rax 31; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 32; NODQ-NEXT: vmovq %xmm2, %rax 33; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 34; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 35; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 36; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 37; NODQ-NEXT: vpextrq $1, %xmm2, %rax 38; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 39; NODQ-NEXT: vmovq %xmm2, %rax 40; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 41; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 42; NODQ-NEXT: vpextrq $1, %xmm0, %rax 43; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 44; NODQ-NEXT: vmovq %xmm0, %rax 45; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0 46; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 47; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 48; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 49; NODQ-NEXT: retq 50; 51; VLDQ-LABEL: sltof864: 52; VLDQ: # %bb.0: 53; VLDQ-NEXT: vcvtqq2pd %zmm0, %zmm0 54; VLDQ-NEXT: retq 55; 56; DQNOVL-LABEL: sltof864: 57; DQNOVL: # %bb.0: 58; DQNOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 59; DQNOVL-NEXT: retq 60 %b = sitofp <8 x i64> %a to <8 x double> 61 ret <8 x double> %b 62} 63 64define <4 x double> @slto4f64(<4 x i64> %a) { 65; NODQ-LABEL: slto4f64: 66; NODQ: # %bb.0: 67; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm1 68; NODQ-NEXT: vpextrq $1, %xmm1, %rax 69; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 70; NODQ-NEXT: vmovq %xmm1, %rax 71; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 72; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 73; NODQ-NEXT: vpextrq $1, %xmm0, %rax 74; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 75; NODQ-NEXT: vmovq %xmm0, %rax 76; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 77; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 78; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 79; NODQ-NEXT: retq 80; 81; VLDQ-LABEL: slto4f64: 82; VLDQ: # %bb.0: 83; VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0 84; VLDQ-NEXT: retq 85; 86; DQNOVL-LABEL: slto4f64: 87; DQNOVL: # %bb.0: 88; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 89; DQNOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 90; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 91; DQNOVL-NEXT: retq 92 %b = sitofp <4 x i64> %a to <4 x double> 93 ret <4 x double> %b 94} 95 96define <2 x double> @slto2f64(<2 x i64> %a) { 97; NODQ-LABEL: slto2f64: 98; NODQ: # %bb.0: 99; NODQ-NEXT: vpextrq $1, %xmm0, %rax 100; NODQ-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 101; NODQ-NEXT: vmovq %xmm0, %rax 102; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 103; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 104; NODQ-NEXT: retq 105; 106; VLDQ-LABEL: slto2f64: 107; VLDQ: # %bb.0: 108; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0 109; VLDQ-NEXT: retq 110; 111; DQNOVL-LABEL: slto2f64: 112; DQNOVL: # %bb.0: 113; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 114; DQNOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 115; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 116; DQNOVL-NEXT: vzeroupper 117; DQNOVL-NEXT: retq 118 %b = sitofp <2 x i64> %a to <2 x double> 119 ret <2 x double> %b 120} 121 122define <2 x float> @sltof2f32(<2 x i64> %a) { 123; NODQ-LABEL: sltof2f32: 124; NODQ: # %bb.0: 125; NODQ-NEXT: vpextrq $1, %xmm0, %rax 126; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 127; NODQ-NEXT: vmovq %xmm0, %rax 128; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 129; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 130; NODQ-NEXT: retq 131; 132; VLDQ-LABEL: sltof2f32: 133; VLDQ: # %bb.0: 134; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0 135; VLDQ-NEXT: retq 136; 137; DQNOVL-LABEL: sltof2f32: 138; DQNOVL: # %bb.0: 139; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 140; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 141; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 142; DQNOVL-NEXT: vzeroupper 143; DQNOVL-NEXT: retq 144 %b = sitofp <2 x i64> %a to <2 x float> 145 ret <2 x float>%b 146} 147 148define <4 x float> @slto4f32_mem(<4 x i64>* %a) { 149; NODQ-LABEL: slto4f32_mem: 150; NODQ: # %bb.0: 151; NODQ-NEXT: vmovdqu (%rdi), %xmm0 152; NODQ-NEXT: vmovdqu 16(%rdi), %xmm1 153; NODQ-NEXT: vpextrq $1, %xmm0, %rax 154; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 155; NODQ-NEXT: vmovq %xmm0, %rax 156; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 157; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 158; NODQ-NEXT: vmovq %xmm1, %rax 159; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 160; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 161; NODQ-NEXT: vpextrq $1, %xmm1, %rax 162; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 163; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 164; NODQ-NEXT: retq 165; 166; VLDQ-LABEL: slto4f32_mem: 167; VLDQ: # %bb.0: 168; VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0 169; VLDQ-NEXT: retq 170; 171; DQNOVL-LABEL: slto4f32_mem: 172; DQNOVL: # %bb.0: 173; DQNOVL-NEXT: vmovups (%rdi), %ymm0 174; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 175; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 176; DQNOVL-NEXT: vzeroupper 177; DQNOVL-NEXT: retq 178 %a1 = load <4 x i64>, <4 x i64>* %a, align 8 179 %b = sitofp <4 x i64> %a1 to <4 x float> 180 ret <4 x float>%b 181} 182 183define <4 x i64> @f64to4sl(<4 x double> %a) { 184; NODQ-LABEL: f64to4sl: 185; NODQ: # %bb.0: 186; NODQ-NEXT: vextractf128 $1, %ymm0, %xmm1 187; NODQ-NEXT: vcvttsd2si %xmm1, %rax 188; NODQ-NEXT: vmovq %rax, %xmm2 189; NODQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 190; NODQ-NEXT: vcvttsd2si %xmm1, %rax 191; NODQ-NEXT: vmovq %rax, %xmm1 192; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 193; NODQ-NEXT: vcvttsd2si %xmm0, %rax 194; NODQ-NEXT: vmovq %rax, %xmm2 195; NODQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 196; NODQ-NEXT: vcvttsd2si %xmm0, %rax 197; NODQ-NEXT: vmovq %rax, %xmm0 198; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 199; NODQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 200; NODQ-NEXT: retq 201; 202; VLDQ-LABEL: f64to4sl: 203; VLDQ: # %bb.0: 204; VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0 205; VLDQ-NEXT: retq 206; 207; DQNOVL-LABEL: f64to4sl: 208; DQNOVL: # %bb.0: 209; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 210; DQNOVL-NEXT: vcvttpd2qq %zmm0, %zmm0 211; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 212; DQNOVL-NEXT: retq 213 %b = fptosi <4 x double> %a to <4 x i64> 214 ret <4 x i64> %b 215} 216 217define <4 x i64> @f32to4sl(<4 x float> %a) { 218; NODQ-LABEL: f32to4sl: 219; NODQ: # %bb.0: 220; NODQ-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,3,3,3] 221; NODQ-NEXT: vcvttss2si %xmm1, %rax 222; NODQ-NEXT: vmovq %rax, %xmm1 223; NODQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 224; NODQ-NEXT: vcvttss2si %xmm2, %rax 225; NODQ-NEXT: vmovq %rax, %xmm2 226; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 227; NODQ-NEXT: vcvttss2si %xmm0, %rax 228; NODQ-NEXT: vmovq %rax, %xmm2 229; NODQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 230; NODQ-NEXT: vcvttss2si %xmm0, %rax 231; NODQ-NEXT: vmovq %rax, %xmm0 232; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 233; NODQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 234; NODQ-NEXT: retq 235; 236; VLDQ-LABEL: f32to4sl: 237; VLDQ: # %bb.0: 238; VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0 239; VLDQ-NEXT: retq 240; 241; DQNOVL-LABEL: f32to4sl: 242; DQNOVL: # %bb.0: 243; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 244; DQNOVL-NEXT: vcvttps2qq %ymm0, %zmm0 245; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 246; DQNOVL-NEXT: retq 247 %b = fptosi <4 x float> %a to <4 x i64> 248 ret <4 x i64> %b 249} 250 251define <4 x float> @slto4f32(<4 x i64> %a) { 252; NODQ-LABEL: slto4f32: 253; NODQ: # %bb.0: 254; NODQ-NEXT: vpextrq $1, %xmm0, %rax 255; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 256; NODQ-NEXT: vmovq %xmm0, %rax 257; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 258; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 259; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 260; NODQ-NEXT: vmovq %xmm0, %rax 261; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 262; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 263; NODQ-NEXT: vpextrq $1, %xmm0, %rax 264; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 265; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 266; NODQ-NEXT: vzeroupper 267; NODQ-NEXT: retq 268; 269; VLDQ-LABEL: slto4f32: 270; VLDQ: # %bb.0: 271; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 272; VLDQ-NEXT: vzeroupper 273; VLDQ-NEXT: retq 274; 275; DQNOVL-LABEL: slto4f32: 276; DQNOVL: # %bb.0: 277; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 278; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 279; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 280; DQNOVL-NEXT: vzeroupper 281; DQNOVL-NEXT: retq 282 %b = sitofp <4 x i64> %a to <4 x float> 283 ret <4 x float> %b 284} 285 286define <4 x float> @ulto4f32(<4 x i64> %a) { 287; NODQ-LABEL: ulto4f32: 288; NODQ: # %bb.0: 289; NODQ-NEXT: vpextrq $1, %xmm0, %rax 290; NODQ-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 291; NODQ-NEXT: vmovq %xmm0, %rax 292; NODQ-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 293; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 294; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 295; NODQ-NEXT: vmovq %xmm0, %rax 296; NODQ-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2 297; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 298; NODQ-NEXT: vpextrq $1, %xmm0, %rax 299; NODQ-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 300; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 301; NODQ-NEXT: vzeroupper 302; NODQ-NEXT: retq 303; 304; VLDQ-LABEL: ulto4f32: 305; VLDQ: # %bb.0: 306; VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 307; VLDQ-NEXT: vzeroupper 308; VLDQ-NEXT: retq 309; 310; DQNOVL-LABEL: ulto4f32: 311; DQNOVL: # %bb.0: 312; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 313; DQNOVL-NEXT: vcvtuqq2ps %zmm0, %ymm0 314; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 315; DQNOVL-NEXT: vzeroupper 316; DQNOVL-NEXT: retq 317 %b = uitofp <4 x i64> %a to <4 x float> 318 ret <4 x float> %b 319} 320 321define <8 x double> @ulto8f64(<8 x i64> %a) { 322; NODQ-LABEL: ulto8f64: 323; NODQ: # %bb.0: 324; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] 325; NODQ-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1 326; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0 327; NODQ-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 328; NODQ-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 329; NODQ-NEXT: vaddpd %zmm0, %zmm1, %zmm0 330; NODQ-NEXT: retq 331; 332; VLDQ-LABEL: ulto8f64: 333; VLDQ: # %bb.0: 334; VLDQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 335; VLDQ-NEXT: retq 336; 337; DQNOVL-LABEL: ulto8f64: 338; DQNOVL: # %bb.0: 339; DQNOVL-NEXT: vcvtuqq2pd %zmm0, %zmm0 340; DQNOVL-NEXT: retq 341 %b = uitofp <8 x i64> %a to <8 x double> 342 ret <8 x double> %b 343} 344 345define <16 x double> @ulto16f64(<16 x i64> %a) { 346; NODQ-LABEL: ulto16f64: 347; NODQ: # %bb.0: 348; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295] 349; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm3 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] 350; NODQ-NEXT: vmovdqa64 %zmm3, %zmm4 351; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm0, %zmm4 352; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0 353; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm5 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] 354; NODQ-NEXT: vporq %zmm5, %zmm0, %zmm0 355; NODQ-NEXT: vbroadcastsd {{.*#+}} zmm6 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] 356; NODQ-NEXT: vsubpd %zmm6, %zmm0, %zmm0 357; NODQ-NEXT: vaddpd %zmm0, %zmm4, %zmm0 358; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm1, %zmm3 359; NODQ-NEXT: vpsrlq $32, %zmm1, %zmm1 360; NODQ-NEXT: vporq %zmm5, %zmm1, %zmm1 361; NODQ-NEXT: vsubpd %zmm6, %zmm1, %zmm1 362; NODQ-NEXT: vaddpd %zmm1, %zmm3, %zmm1 363; NODQ-NEXT: retq 364; 365; VLDQ-LABEL: ulto16f64: 366; VLDQ: # %bb.0: 367; VLDQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 368; VLDQ-NEXT: vcvtuqq2pd %zmm1, %zmm1 369; VLDQ-NEXT: retq 370; 371; DQNOVL-LABEL: ulto16f64: 372; DQNOVL: # %bb.0: 373; DQNOVL-NEXT: vcvtuqq2pd %zmm0, %zmm0 374; DQNOVL-NEXT: vcvtuqq2pd %zmm1, %zmm1 375; DQNOVL-NEXT: retq 376 %b = uitofp <16 x i64> %a to <16 x double> 377 ret <16 x double> %b 378} 379 380define <16 x i32> @f64to16si(<16 x float> %a) nounwind { 381; ALL-LABEL: f64to16si: 382; ALL: # %bb.0: 383; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 384; ALL-NEXT: retq 385 %b = fptosi <16 x float> %a to <16 x i32> 386 ret <16 x i32> %b 387} 388 389define <16 x i8> @f32to16sc(<16 x float> %f) { 390; ALL-LABEL: f32to16sc: 391; ALL: # %bb.0: 392; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 393; ALL-NEXT: vpmovdb %zmm0, %xmm0 394; ALL-NEXT: vzeroupper 395; ALL-NEXT: retq 396 %res = fptosi <16 x float> %f to <16 x i8> 397 ret <16 x i8> %res 398} 399 400define <16 x i16> @f32to16ss(<16 x float> %f) { 401; ALL-LABEL: f32to16ss: 402; ALL: # %bb.0: 403; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 404; ALL-NEXT: vpmovdw %zmm0, %ymm0 405; ALL-NEXT: retq 406 %res = fptosi <16 x float> %f to <16 x i16> 407 ret <16 x i16> %res 408} 409 410define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { 411; ALL-LABEL: f32to16ui: 412; ALL: # %bb.0: 413; ALL-NEXT: vcvttps2udq %zmm0, %zmm0 414; ALL-NEXT: retq 415 %b = fptoui <16 x float> %a to <16 x i32> 416 ret <16 x i32> %b 417} 418 419define <16 x i8> @f32to16uc(<16 x float> %f) { 420; ALL-LABEL: f32to16uc: 421; ALL: # %bb.0: 422; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 423; ALL-NEXT: vpmovdb %zmm0, %xmm0 424; ALL-NEXT: vzeroupper 425; ALL-NEXT: retq 426 %res = fptoui <16 x float> %f to <16 x i8> 427 ret <16 x i8> %res 428} 429 430define <16 x i16> @f32to16us(<16 x float> %f) { 431; ALL-LABEL: f32to16us: 432; ALL: # %bb.0: 433; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 434; ALL-NEXT: vpmovdw %zmm0, %ymm0 435; ALL-NEXT: retq 436 %res = fptoui <16 x float> %f to <16 x i16> 437 ret <16 x i16> %res 438} 439 440define <8 x i32> @f32to8ui(<8 x float> %a) nounwind { 441; NOVL-LABEL: f32to8ui: 442; NOVL: # %bb.0: 443; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 444; NOVL-NEXT: vcvttps2udq %zmm0, %zmm0 445; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 446; NOVL-NEXT: retq 447; 448; VL-LABEL: f32to8ui: 449; VL: # %bb.0: 450; VL-NEXT: vcvttps2udq %ymm0, %ymm0 451; VL-NEXT: retq 452 %b = fptoui <8 x float> %a to <8 x i32> 453 ret <8 x i32> %b 454} 455 456define <4 x i32> @f32to4ui(<4 x float> %a) nounwind { 457; NOVL-LABEL: f32to4ui: 458; NOVL: # %bb.0: 459; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 460; NOVL-NEXT: vcvttps2udq %zmm0, %zmm0 461; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 462; NOVL-NEXT: vzeroupper 463; NOVL-NEXT: retq 464; 465; VL-LABEL: f32to4ui: 466; VL: # %bb.0: 467; VL-NEXT: vcvttps2udq %xmm0, %xmm0 468; VL-NEXT: retq 469 %b = fptoui <4 x float> %a to <4 x i32> 470 ret <4 x i32> %b 471} 472 473define <8 x i32> @f64to8ui(<8 x double> %a) nounwind { 474; ALL-LABEL: f64to8ui: 475; ALL: # %bb.0: 476; ALL-NEXT: vcvttpd2udq %zmm0, %ymm0 477; ALL-NEXT: retq 478 %b = fptoui <8 x double> %a to <8 x i32> 479 ret <8 x i32> %b 480} 481 482define <8 x i16> @f64to8us(<8 x double> %f) { 483; NOVL-LABEL: f64to8us: 484; NOVL: # %bb.0: 485; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 486; NOVL-NEXT: vpmovdw %zmm0, %ymm0 487; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 488; NOVL-NEXT: vzeroupper 489; NOVL-NEXT: retq 490; 491; VL-LABEL: f64to8us: 492; VL: # %bb.0: 493; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 494; VL-NEXT: vpmovdw %ymm0, %xmm0 495; VL-NEXT: vzeroupper 496; VL-NEXT: retq 497 %res = fptoui <8 x double> %f to <8 x i16> 498 ret <8 x i16> %res 499} 500 501define <8 x i8> @f64to8uc(<8 x double> %f) { 502; NOVL-LABEL: f64to8uc: 503; NOVL: # %bb.0: 504; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 505; NOVL-NEXT: vpmovdb %zmm0, %xmm0 506; NOVL-NEXT: vzeroupper 507; NOVL-NEXT: retq 508; 509; VL-LABEL: f64to8uc: 510; VL: # %bb.0: 511; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 512; VL-NEXT: vpmovdb %ymm0, %xmm0 513; VL-NEXT: vzeroupper 514; VL-NEXT: retq 515 %res = fptoui <8 x double> %f to <8 x i8> 516 ret <8 x i8> %res 517} 518 519define <4 x i32> @f64to4ui(<4 x double> %a) nounwind { 520; NOVL-LABEL: f64to4ui: 521; NOVL: # %bb.0: 522; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 523; NOVL-NEXT: vcvttpd2udq %zmm0, %ymm0 524; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 525; NOVL-NEXT: vzeroupper 526; NOVL-NEXT: retq 527; 528; VL-LABEL: f64to4ui: 529; VL: # %bb.0: 530; VL-NEXT: vcvttpd2udq %ymm0, %xmm0 531; VL-NEXT: vzeroupper 532; VL-NEXT: retq 533 %b = fptoui <4 x double> %a to <4 x i32> 534 ret <4 x i32> %b 535} 536 537define <8 x double> @sito8f64(<8 x i32> %a) { 538; ALL-LABEL: sito8f64: 539; ALL: # %bb.0: 540; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 541; ALL-NEXT: retq 542 %b = sitofp <8 x i32> %a to <8 x double> 543 ret <8 x double> %b 544} 545define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { 546; KNL-LABEL: i32to8f64_mask: 547; KNL: # %bb.0: 548; KNL-NEXT: kmovw %edi, %k1 549; KNL-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 550; KNL-NEXT: retq 551; 552; VLBW-LABEL: i32to8f64_mask: 553; VLBW: # %bb.0: 554; VLBW-NEXT: kmovd %edi, %k1 555; VLBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 556; VLBW-NEXT: retq 557; 558; VLNOBW-LABEL: i32to8f64_mask: 559; VLNOBW: # %bb.0: 560; VLNOBW-NEXT: kmovw %edi, %k1 561; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 562; VLNOBW-NEXT: retq 563; 564; DQNOVL-LABEL: i32to8f64_mask: 565; DQNOVL: # %bb.0: 566; DQNOVL-NEXT: kmovw %edi, %k1 567; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 568; DQNOVL-NEXT: retq 569; 570; AVX512BW-LABEL: i32to8f64_mask: 571; AVX512BW: # %bb.0: 572; AVX512BW-NEXT: kmovd %edi, %k1 573; AVX512BW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 574; AVX512BW-NEXT: retq 575 %1 = bitcast i8 %c to <8 x i1> 576 %2 = sitofp <8 x i32> %b to <8 x double> 577 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a 578 ret <8 x double> %3 579} 580define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { 581; KNL-LABEL: sito8f64_maskz: 582; KNL: # %bb.0: 583; KNL-NEXT: kmovw %edi, %k1 584; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 585; KNL-NEXT: retq 586; 587; VLBW-LABEL: sito8f64_maskz: 588; VLBW: # %bb.0: 589; VLBW-NEXT: kmovd %edi, %k1 590; VLBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 591; VLBW-NEXT: retq 592; 593; VLNOBW-LABEL: sito8f64_maskz: 594; VLNOBW: # %bb.0: 595; VLNOBW-NEXT: kmovw %edi, %k1 596; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 597; VLNOBW-NEXT: retq 598; 599; DQNOVL-LABEL: sito8f64_maskz: 600; DQNOVL: # %bb.0: 601; DQNOVL-NEXT: kmovw %edi, %k1 602; DQNOVL-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 603; DQNOVL-NEXT: retq 604; 605; AVX512BW-LABEL: sito8f64_maskz: 606; AVX512BW: # %bb.0: 607; AVX512BW-NEXT: kmovd %edi, %k1 608; AVX512BW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 609; AVX512BW-NEXT: retq 610 %1 = bitcast i8 %b to <8 x i1> 611 %2 = sitofp <8 x i32> %a to <8 x double> 612 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer 613 ret <8 x double> %3 614} 615 616define <8 x i32> @f64to8si(<8 x double> %a) { 617; ALL-LABEL: f64to8si: 618; ALL: # %bb.0: 619; ALL-NEXT: vcvttpd2dq %zmm0, %ymm0 620; ALL-NEXT: retq 621 %b = fptosi <8 x double> %a to <8 x i32> 622 ret <8 x i32> %b 623} 624 625define <8 x i16> @f64to8ss(<8 x double> %f) { 626; NOVL-LABEL: f64to8ss: 627; NOVL: # %bb.0: 628; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 629; NOVL-NEXT: vpmovdw %zmm0, %ymm0 630; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 631; NOVL-NEXT: vzeroupper 632; NOVL-NEXT: retq 633; 634; VL-LABEL: f64to8ss: 635; VL: # %bb.0: 636; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 637; VL-NEXT: vpmovdw %ymm0, %xmm0 638; VL-NEXT: vzeroupper 639; VL-NEXT: retq 640 %res = fptosi <8 x double> %f to <8 x i16> 641 ret <8 x i16> %res 642} 643 644define <8 x i8> @f64to8sc(<8 x double> %f) { 645; NOVL-LABEL: f64to8sc: 646; NOVL: # %bb.0: 647; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 648; NOVL-NEXT: vpmovdb %zmm0, %xmm0 649; NOVL-NEXT: vzeroupper 650; NOVL-NEXT: retq 651; 652; VL-LABEL: f64to8sc: 653; VL: # %bb.0: 654; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 655; VL-NEXT: vpmovdb %ymm0, %xmm0 656; VL-NEXT: vzeroupper 657; VL-NEXT: retq 658 %res = fptosi <8 x double> %f to <8 x i8> 659 ret <8 x i8> %res 660} 661 662define <4 x i32> @f64to4si(<4 x double> %a) { 663; ALL-LABEL: f64to4si: 664; ALL: # %bb.0: 665; ALL-NEXT: vcvttpd2dq %ymm0, %xmm0 666; ALL-NEXT: vzeroupper 667; ALL-NEXT: retq 668 %b = fptosi <4 x double> %a to <4 x i32> 669 ret <4 x i32> %b 670} 671 672define <16 x float> @f64to16f32(<16 x double> %b) nounwind { 673; ALL-LABEL: f64to16f32: 674; ALL: # %bb.0: 675; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0 676; ALL-NEXT: vcvtpd2ps %zmm1, %ymm1 677; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 678; ALL-NEXT: retq 679 %a = fptrunc <16 x double> %b to <16 x float> 680 ret <16 x float> %a 681} 682 683define <4 x float> @f64to4f32(<4 x double> %b) { 684; ALL-LABEL: f64to4f32: 685; ALL: # %bb.0: 686; ALL-NEXT: vcvtpd2ps %ymm0, %xmm0 687; ALL-NEXT: vzeroupper 688; ALL-NEXT: retq 689 %a = fptrunc <4 x double> %b to <4 x float> 690 ret <4 x float> %a 691} 692 693define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { 694; NOVLDQ-LABEL: f64to4f32_mask: 695; NOVLDQ: # %bb.0: 696; NOVLDQ-NEXT: vpslld $31, %xmm1, %xmm1 697; NOVLDQ-NEXT: vptestmd %zmm1, %zmm1, %k1 698; NOVLDQ-NEXT: vcvtpd2ps %ymm0, %xmm0 699; NOVLDQ-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} 700; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 701; NOVLDQ-NEXT: vzeroupper 702; NOVLDQ-NEXT: retq 703; 704; VLDQ-LABEL: f64to4f32_mask: 705; VLDQ: # %bb.0: 706; VLDQ-NEXT: vpslld $31, %xmm1, %xmm1 707; VLDQ-NEXT: vpmovd2m %xmm1, %k1 708; VLDQ-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} 709; VLDQ-NEXT: vzeroupper 710; VLDQ-NEXT: retq 711; 712; VLNODQ-LABEL: f64to4f32_mask: 713; VLNODQ: # %bb.0: 714; VLNODQ-NEXT: vpslld $31, %xmm1, %xmm1 715; VLNODQ-NEXT: vptestmd %xmm1, %xmm1, %k1 716; VLNODQ-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} 717; VLNODQ-NEXT: vzeroupper 718; VLNODQ-NEXT: retq 719; 720; DQNOVL-LABEL: f64to4f32_mask: 721; DQNOVL: # %bb.0: 722; DQNOVL-NEXT: vpslld $31, %xmm1, %xmm1 723; DQNOVL-NEXT: vpmovd2m %zmm1, %k1 724; DQNOVL-NEXT: vcvtpd2ps %ymm0, %xmm0 725; DQNOVL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} 726; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 727; DQNOVL-NEXT: vzeroupper 728; DQNOVL-NEXT: retq 729 %a = fptrunc <4 x double> %b to <4 x float> 730 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer 731 ret <4 x float> %c 732} 733 734define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { 735; ALL-LABEL: f64tof32_inreg: 736; ALL: # %bb.0: 737; ALL-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 738; ALL-NEXT: retq 739 %ext = extractelement <2 x double> %a0, i32 0 740 %cvt = fptrunc double %ext to float 741 %res = insertelement <4 x float> %a1, float %cvt, i32 0 742 ret <4 x float> %res 743} 744 745define <8 x double> @f32to8f64(<8 x float> %b) nounwind { 746; ALL-LABEL: f32to8f64: 747; ALL: # %bb.0: 748; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 749; ALL-NEXT: retq 750 %a = fpext <8 x float> %b to <8 x double> 751 ret <8 x double> %a 752} 753 754define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) { 755; NOVL-LABEL: f32to4f64_mask: 756; NOVL: # %bb.0: 757; NOVL-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 758; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 759; NOVL-NEXT: vcvtps2pd %xmm0, %ymm0 760; NOVL-NEXT: vcmpltpd %zmm2, %zmm1, %k1 761; NOVL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} 762; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 763; NOVL-NEXT: retq 764; 765; VL-LABEL: f32to4f64_mask: 766; VL: # %bb.0: 767; VL-NEXT: vcmpltpd %ymm2, %ymm1, %k1 768; VL-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} 769; VL-NEXT: retq 770 %a = fpext <4 x float> %b to <4 x double> 771 %mask = fcmp ogt <4 x double> %a1, %b1 772 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer 773 ret <4 x double> %c 774} 775 776define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1, <4 x double> %passthru) { 777; NOVL-LABEL: f32to4f64_mask_load: 778; NOVL: # %bb.0: 779; NOVL-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 780; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 781; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 782; NOVL-NEXT: vcvtps2pd (%rdi), %ymm3 783; NOVL-NEXT: vcmpltpd %zmm1, %zmm0, %k1 784; NOVL-NEXT: vblendmpd %zmm3, %zmm2, %zmm0 {%k1} 785; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 786; NOVL-NEXT: retq 787; 788; VL-LABEL: f32to4f64_mask_load: 789; VL: # %bb.0: 790; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1 791; VL-NEXT: vcvtps2pd (%rdi), %ymm2 {%k1} 792; VL-NEXT: vmovaps %ymm2, %ymm0 793; VL-NEXT: retq 794 %b = load <4 x float>, <4 x float>* %p 795 %a = fpext <4 x float> %b to <4 x double> 796 %mask = fcmp ogt <4 x double> %a1, %b1 797 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> %passthru 798 ret <4 x double> %c 799} 800 801define <4 x double> @f32to4f64_maskz_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) { 802; NOVL-LABEL: f32to4f64_maskz_load: 803; NOVL: # %bb.0: 804; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 805; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 806; NOVL-NEXT: vcvtps2pd (%rdi), %ymm2 807; NOVL-NEXT: vcmpltpd %zmm1, %zmm0, %k1 808; NOVL-NEXT: vmovapd %zmm2, %zmm0 {%k1} {z} 809; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 810; NOVL-NEXT: retq 811; 812; VL-LABEL: f32to4f64_maskz_load: 813; VL: # %bb.0: 814; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1 815; VL-NEXT: vcvtps2pd (%rdi), %ymm0 {%k1} {z} 816; VL-NEXT: retq 817 %b = load <4 x float>, <4 x float>* %p 818 %a = fpext <4 x float> %b to <4 x double> 819 %mask = fcmp ogt <4 x double> %a1, %b1 820 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer 821 ret <4 x double> %c 822} 823 824define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { 825; ALL-LABEL: f32tof64_inreg: 826; ALL: # %bb.0: 827; ALL-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 828; ALL-NEXT: retq 829 %ext = extractelement <4 x float> %a1, i32 0 830 %cvt = fpext float %ext to double 831 %res = insertelement <2 x double> %a0, double %cvt, i32 0 832 ret <2 x double> %res 833} 834 835define double @sltof64_load(i64* nocapture %e) { 836; ALL-LABEL: sltof64_load: 837; ALL: # %bb.0: # %entry 838; ALL-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 839; ALL-NEXT: retq 840entry: 841 %tmp1 = load i64, i64* %e, align 8 842 %conv = sitofp i64 %tmp1 to double 843 ret double %conv 844} 845 846define double @sitof64_load(i32* %e) { 847; ALL-LABEL: sitof64_load: 848; ALL: # %bb.0: # %entry 849; ALL-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 850; ALL-NEXT: retq 851entry: 852 %tmp1 = load i32, i32* %e, align 4 853 %conv = sitofp i32 %tmp1 to double 854 ret double %conv 855} 856 857define float @sitof32_load(i32* %e) { 858; ALL-LABEL: sitof32_load: 859; ALL: # %bb.0: # %entry 860; ALL-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 861; ALL-NEXT: retq 862entry: 863 %tmp1 = load i32, i32* %e, align 4 864 %conv = sitofp i32 %tmp1 to float 865 ret float %conv 866} 867 868define float @sltof32_load(i64* %e) { 869; ALL-LABEL: sltof32_load: 870; ALL: # %bb.0: # %entry 871; ALL-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 872; ALL-NEXT: retq 873entry: 874 %tmp1 = load i64, i64* %e, align 8 875 %conv = sitofp i64 %tmp1 to float 876 ret float %conv 877} 878 879define void @f32tof64_loadstore() { 880; ALL-LABEL: f32tof64_loadstore: 881; ALL: # %bb.0: # %entry 882; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 883; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 884; ALL-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) 885; ALL-NEXT: retq 886entry: 887 %f = alloca float, align 4 888 %d = alloca double, align 8 889 %tmp = load float, float* %f, align 4 890 %conv = fpext float %tmp to double 891 store double %conv, double* %d, align 8 892 ret void 893} 894 895define void @f64tof32_loadstore() nounwind uwtable { 896; ALL-LABEL: f64tof32_loadstore: 897; ALL: # %bb.0: # %entry 898; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 899; ALL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 900; ALL-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) 901; ALL-NEXT: retq 902entry: 903 %f = alloca float, align 4 904 %d = alloca double, align 8 905 %tmp = load double, double* %d, align 8 906 %conv = fptrunc double %tmp to float 907 store float %conv, float* %f, align 4 908 ret void 909} 910 911define double @long_to_double(i64 %x) { 912; ALL-LABEL: long_to_double: 913; ALL: # %bb.0: 914; ALL-NEXT: vmovq %rdi, %xmm0 915; ALL-NEXT: retq 916 %res = bitcast i64 %x to double 917 ret double %res 918} 919 920define i64 @double_to_long(double %x) { 921; ALL-LABEL: double_to_long: 922; ALL: # %bb.0: 923; ALL-NEXT: vmovq %xmm0, %rax 924; ALL-NEXT: retq 925 %res = bitcast double %x to i64 926 ret i64 %res 927} 928 929define float @int_to_float(i32 %x) { 930; ALL-LABEL: int_to_float: 931; ALL: # %bb.0: 932; ALL-NEXT: vmovd %edi, %xmm0 933; ALL-NEXT: retq 934 %res = bitcast i32 %x to float 935 ret float %res 936} 937 938define i32 @float_to_int(float %x) { 939; ALL-LABEL: float_to_int: 940; ALL: # %bb.0: 941; ALL-NEXT: vmovd %xmm0, %eax 942; ALL-NEXT: retq 943 %res = bitcast float %x to i32 944 ret i32 %res 945} 946 947define <16 x double> @uito16f64(<16 x i32> %a) nounwind { 948; ALL-LABEL: uito16f64: 949; ALL: # %bb.0: 950; ALL-NEXT: vcvtudq2pd %ymm0, %zmm2 951; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 952; ALL-NEXT: vcvtudq2pd %ymm0, %zmm1 953; ALL-NEXT: vmovaps %zmm2, %zmm0 954; ALL-NEXT: retq 955 %b = uitofp <16 x i32> %a to <16 x double> 956 ret <16 x double> %b 957} 958 959define <8 x float> @slto8f32(<8 x i64> %a) { 960; NODQ-LABEL: slto8f32: 961; NODQ: # %bb.0: 962; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm1 963; NODQ-NEXT: vpextrq $1, %xmm1, %rax 964; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 965; NODQ-NEXT: vmovq %xmm1, %rax 966; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 967; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 968; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm2 969; NODQ-NEXT: vmovq %xmm2, %rax 970; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3 971; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] 972; NODQ-NEXT: vpextrq $1, %xmm2, %rax 973; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2 974; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 975; NODQ-NEXT: vpextrq $1, %xmm0, %rax 976; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2 977; NODQ-NEXT: vmovq %xmm0, %rax 978; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 979; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 980; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 981; NODQ-NEXT: vmovq %xmm0, %rax 982; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 983; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 984; NODQ-NEXT: vpextrq $1, %xmm0, %rax 985; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0 986; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 987; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 988; NODQ-NEXT: retq 989; 990; VLDQ-LABEL: slto8f32: 991; VLDQ: # %bb.0: 992; VLDQ-NEXT: vcvtqq2ps %zmm0, %ymm0 993; VLDQ-NEXT: retq 994; 995; DQNOVL-LABEL: slto8f32: 996; DQNOVL: # %bb.0: 997; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 998; DQNOVL-NEXT: retq 999 %b = sitofp <8 x i64> %a to <8 x float> 1000 ret <8 x float> %b 1001} 1002 1003define <16 x float> @slto16f32(<16 x i64> %a) { 1004; NODQ-LABEL: slto16f32: 1005; NODQ: # %bb.0: 1006; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm2 1007; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1008; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3 1009; NODQ-NEXT: vmovq %xmm2, %rax 1010; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2 1011; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1012; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm3 1013; NODQ-NEXT: vmovq %xmm3, %rax 1014; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4 1015; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 1016; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1017; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 1018; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 1019; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1020; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 1021; NODQ-NEXT: vmovq %xmm1, %rax 1022; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 1023; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 1024; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm1 1025; NODQ-NEXT: vmovq %xmm1, %rax 1026; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 1027; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 1028; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1029; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 1030; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] 1031; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1032; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 1033; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1034; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 1035; NODQ-NEXT: vmovq %xmm2, %rax 1036; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2 1037; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1038; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3 1039; NODQ-NEXT: vmovq %xmm3, %rax 1040; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 1041; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 1042; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1043; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 1044; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 1045; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1046; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 1047; NODQ-NEXT: vmovq %xmm0, %rax 1048; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 1049; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 1050; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 1051; NODQ-NEXT: vmovq %xmm0, %rax 1052; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 1053; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 1054; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1055; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm0 1056; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0] 1057; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1058; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1059; NODQ-NEXT: retq 1060; 1061; VLDQ-LABEL: slto16f32: 1062; VLDQ: # %bb.0: 1063; VLDQ-NEXT: vcvtqq2ps %zmm0, %ymm0 1064; VLDQ-NEXT: vcvtqq2ps %zmm1, %ymm1 1065; VLDQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1066; VLDQ-NEXT: retq 1067; 1068; DQNOVL-LABEL: slto16f32: 1069; DQNOVL: # %bb.0: 1070; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 1071; DQNOVL-NEXT: vcvtqq2ps %zmm1, %ymm1 1072; DQNOVL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1073; DQNOVL-NEXT: retq 1074 %b = sitofp <16 x i64> %a to <16 x float> 1075 ret <16 x float> %b 1076} 1077 1078define <8 x double> @slto8f64(<8 x i64> %a) { 1079; NODQ-LABEL: slto8f64: 1080; NODQ: # %bb.0: 1081; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1 1082; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1083; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 1084; NODQ-NEXT: vmovq %xmm1, %rax 1085; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 1086; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1087; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 1088; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1089; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 1090; NODQ-NEXT: vmovq %xmm2, %rax 1091; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 1092; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1093; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1094; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 1095; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1096; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 1097; NODQ-NEXT: vmovq %xmm2, %rax 1098; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 1099; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1100; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1101; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 1102; NODQ-NEXT: vmovq %xmm0, %rax 1103; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0 1104; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 1105; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1106; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1107; NODQ-NEXT: retq 1108; 1109; VLDQ-LABEL: slto8f64: 1110; VLDQ: # %bb.0: 1111; VLDQ-NEXT: vcvtqq2pd %zmm0, %zmm0 1112; VLDQ-NEXT: retq 1113; 1114; DQNOVL-LABEL: slto8f64: 1115; DQNOVL: # %bb.0: 1116; DQNOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 1117; DQNOVL-NEXT: retq 1118 %b = sitofp <8 x i64> %a to <8 x double> 1119 ret <8 x double> %b 1120} 1121 1122define <16 x double> @slto16f64(<16 x i64> %a) { 1123; NODQ-LABEL: slto16f64: 1124; NODQ: # %bb.0: 1125; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm2 1126; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1127; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 1128; NODQ-NEXT: vmovq %xmm2, %rax 1129; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 1130; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1131; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3 1132; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1133; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4 1134; NODQ-NEXT: vmovq %xmm3, %rax 1135; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 1136; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] 1137; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1138; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm3 1139; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1140; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 1141; NODQ-NEXT: vmovq %xmm3, %rax 1142; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 1143; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] 1144; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1145; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 1146; NODQ-NEXT: vmovq %xmm0, %rax 1147; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0 1148; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] 1149; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1150; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 1151; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2 1152; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1153; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 1154; NODQ-NEXT: vmovq %xmm2, %rax 1155; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm2 1156; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1157; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3 1158; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1159; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 1160; NODQ-NEXT: vmovq %xmm3, %rax 1161; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 1162; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] 1163; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1164; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm3 1165; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1166; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 1167; NODQ-NEXT: vmovq %xmm3, %rax 1168; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 1169; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] 1170; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1171; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 1172; NODQ-NEXT: vmovq %xmm1, %rax 1173; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1 1174; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] 1175; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1176; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 1177; NODQ-NEXT: retq 1178; 1179; VLDQ-LABEL: slto16f64: 1180; VLDQ: # %bb.0: 1181; VLDQ-NEXT: vcvtqq2pd %zmm0, %zmm0 1182; VLDQ-NEXT: vcvtqq2pd %zmm1, %zmm1 1183; VLDQ-NEXT: retq 1184; 1185; DQNOVL-LABEL: slto16f64: 1186; DQNOVL: # %bb.0: 1187; DQNOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 1188; DQNOVL-NEXT: vcvtqq2pd %zmm1, %zmm1 1189; DQNOVL-NEXT: retq 1190 %b = sitofp <16 x i64> %a to <16 x double> 1191 ret <16 x double> %b 1192} 1193 1194define <8 x float> @ulto8f32(<8 x i64> %a) { 1195; NODQ-LABEL: ulto8f32: 1196; NODQ: # %bb.0: 1197; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm1 1198; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1199; NODQ-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 1200; NODQ-NEXT: vmovq %xmm1, %rax 1201; NODQ-NEXT: vcvtusi2ss %rax, %xmm3, %xmm1 1202; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 1203; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm2 1204; NODQ-NEXT: vmovq %xmm2, %rax 1205; NODQ-NEXT: vcvtusi2ss %rax, %xmm3, %xmm3 1206; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] 1207; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1208; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2 1209; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 1210; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1211; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2 1212; NODQ-NEXT: vmovq %xmm0, %rax 1213; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm3 1214; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 1215; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 1216; NODQ-NEXT: vmovq %xmm0, %rax 1217; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm3 1218; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 1219; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1220; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm0 1221; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 1222; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1223; NODQ-NEXT: retq 1224; 1225; VLDQ-LABEL: ulto8f32: 1226; VLDQ: # %bb.0: 1227; VLDQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 1228; VLDQ-NEXT: retq 1229; 1230; DQNOVL-LABEL: ulto8f32: 1231; DQNOVL: # %bb.0: 1232; DQNOVL-NEXT: vcvtuqq2ps %zmm0, %ymm0 1233; DQNOVL-NEXT: retq 1234 %b = uitofp <8 x i64> %a to <8 x float> 1235 ret <8 x float> %b 1236} 1237 1238define <16 x float> @ulto16f32(<16 x i64> %a) { 1239; NODQ-LABEL: ulto16f32: 1240; NODQ: # %bb.0: 1241; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm2 1242; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1243; NODQ-NEXT: vcvtusi2ss %rax, %xmm3, %xmm3 1244; NODQ-NEXT: vmovq %xmm2, %rax 1245; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2 1246; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1247; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm3 1248; NODQ-NEXT: vmovq %xmm3, %rax 1249; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm4 1250; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 1251; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1252; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 1253; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 1254; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1255; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 1256; NODQ-NEXT: vmovq %xmm1, %rax 1257; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4 1258; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 1259; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm1 1260; NODQ-NEXT: vmovq %xmm1, %rax 1261; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4 1262; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 1263; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1264; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm1 1265; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] 1266; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1267; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 1268; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1269; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 1270; NODQ-NEXT: vmovq %xmm2, %rax 1271; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm2 1272; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1273; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3 1274; NODQ-NEXT: vmovq %xmm3, %rax 1275; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4 1276; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 1277; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1278; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 1279; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 1280; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1281; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 1282; NODQ-NEXT: vmovq %xmm0, %rax 1283; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4 1284; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 1285; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 1286; NODQ-NEXT: vmovq %xmm0, %rax 1287; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4 1288; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 1289; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1290; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm0 1291; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0] 1292; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1293; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1294; NODQ-NEXT: retq 1295; 1296; VLDQ-LABEL: ulto16f32: 1297; VLDQ: # %bb.0: 1298; VLDQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 1299; VLDQ-NEXT: vcvtuqq2ps %zmm1, %ymm1 1300; VLDQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1301; VLDQ-NEXT: retq 1302; 1303; DQNOVL-LABEL: ulto16f32: 1304; DQNOVL: # %bb.0: 1305; DQNOVL-NEXT: vcvtuqq2ps %zmm0, %ymm0 1306; DQNOVL-NEXT: vcvtuqq2ps %zmm1, %ymm1 1307; DQNOVL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1308; DQNOVL-NEXT: retq 1309 %b = uitofp <16 x i64> %a to <16 x float> 1310 ret <16 x float> %b 1311} 1312 1313define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { 1314; KNL-LABEL: uito8f64_mask: 1315; KNL: # %bb.0: 1316; KNL-NEXT: kmovw %edi, %k1 1317; KNL-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 1318; KNL-NEXT: retq 1319; 1320; VLBW-LABEL: uito8f64_mask: 1321; VLBW: # %bb.0: 1322; VLBW-NEXT: kmovd %edi, %k1 1323; VLBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 1324; VLBW-NEXT: retq 1325; 1326; VLNOBW-LABEL: uito8f64_mask: 1327; VLNOBW: # %bb.0: 1328; VLNOBW-NEXT: kmovw %edi, %k1 1329; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 1330; VLNOBW-NEXT: retq 1331; 1332; DQNOVL-LABEL: uito8f64_mask: 1333; DQNOVL: # %bb.0: 1334; DQNOVL-NEXT: kmovw %edi, %k1 1335; DQNOVL-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 1336; DQNOVL-NEXT: retq 1337; 1338; AVX512BW-LABEL: uito8f64_mask: 1339; AVX512BW: # %bb.0: 1340; AVX512BW-NEXT: kmovd %edi, %k1 1341; AVX512BW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 1342; AVX512BW-NEXT: retq 1343 %1 = bitcast i8 %c to <8 x i1> 1344 %2 = uitofp <8 x i32> %b to <8 x double> 1345 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a 1346 ret <8 x double> %3 1347} 1348define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { 1349; KNL-LABEL: uito8f64_maskz: 1350; KNL: # %bb.0: 1351; KNL-NEXT: kmovw %edi, %k1 1352; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} 1353; KNL-NEXT: retq 1354; 1355; VLBW-LABEL: uito8f64_maskz: 1356; VLBW: # %bb.0: 1357; VLBW-NEXT: kmovd %edi, %k1 1358; VLBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} 1359; VLBW-NEXT: retq 1360; 1361; VLNOBW-LABEL: uito8f64_maskz: 1362; VLNOBW: # %bb.0: 1363; VLNOBW-NEXT: kmovw %edi, %k1 1364; VLNOBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} 1365; VLNOBW-NEXT: retq 1366; 1367; DQNOVL-LABEL: uito8f64_maskz: 1368; DQNOVL: # %bb.0: 1369; DQNOVL-NEXT: kmovw %edi, %k1 1370; DQNOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} 1371; DQNOVL-NEXT: retq 1372; 1373; AVX512BW-LABEL: uito8f64_maskz: 1374; AVX512BW: # %bb.0: 1375; AVX512BW-NEXT: kmovd %edi, %k1 1376; AVX512BW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} 1377; AVX512BW-NEXT: retq 1378 %1 = bitcast i8 %b to <8 x i1> 1379 %2 = uitofp <8 x i32> %a to <8 x double> 1380 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer 1381 ret <8 x double> %3 1382} 1383 1384define <4 x double> @uito4f64(<4 x i32> %a) nounwind { 1385; NOVL-LABEL: uito4f64: 1386; NOVL: # %bb.0: 1387; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1388; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 1389; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1390; NOVL-NEXT: retq 1391; 1392; VL-LABEL: uito4f64: 1393; VL: # %bb.0: 1394; VL-NEXT: vcvtudq2pd %xmm0, %ymm0 1395; VL-NEXT: retq 1396 %b = uitofp <4 x i32> %a to <4 x double> 1397 ret <4 x double> %b 1398} 1399 1400define <16 x float> @uito16f32(<16 x i32> %a) nounwind { 1401; ALL-LABEL: uito16f32: 1402; ALL: # %bb.0: 1403; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0 1404; ALL-NEXT: retq 1405 %b = uitofp <16 x i32> %a to <16 x float> 1406 ret <16 x float> %b 1407} 1408 1409define <8 x double> @uito8f64(<8 x i32> %a) { 1410; ALL-LABEL: uito8f64: 1411; ALL: # %bb.0: 1412; ALL-NEXT: vcvtudq2pd %ymm0, %zmm0 1413; ALL-NEXT: retq 1414 %b = uitofp <8 x i32> %a to <8 x double> 1415 ret <8 x double> %b 1416} 1417 1418define <8 x float> @uito8f32(<8 x i32> %a) nounwind { 1419; NOVL-LABEL: uito8f32: 1420; NOVL: # %bb.0: 1421; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1422; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0 1423; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1424; NOVL-NEXT: retq 1425; 1426; VL-LABEL: uito8f32: 1427; VL: # %bb.0: 1428; VL-NEXT: vcvtudq2ps %ymm0, %ymm0 1429; VL-NEXT: retq 1430 %b = uitofp <8 x i32> %a to <8 x float> 1431 ret <8 x float> %b 1432} 1433 1434define <4 x float> @uito4f32(<4 x i32> %a) nounwind { 1435; NOVL-LABEL: uito4f32: 1436; NOVL: # %bb.0: 1437; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1438; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0 1439; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1440; NOVL-NEXT: vzeroupper 1441; NOVL-NEXT: retq 1442; 1443; VL-LABEL: uito4f32: 1444; VL: # %bb.0: 1445; VL-NEXT: vcvtudq2ps %xmm0, %xmm0 1446; VL-NEXT: retq 1447 %b = uitofp <4 x i32> %a to <4 x float> 1448 ret <4 x float> %b 1449} 1450 1451define i32 @fptosi(float %a) nounwind { 1452; ALL-LABEL: fptosi: 1453; ALL: # %bb.0: 1454; ALL-NEXT: vcvttss2si %xmm0, %eax 1455; ALL-NEXT: retq 1456 %b = fptosi float %a to i32 1457 ret i32 %b 1458} 1459 1460define i32 @fptoui(float %a) nounwind { 1461; ALL-LABEL: fptoui: 1462; ALL: # %bb.0: 1463; ALL-NEXT: vcvttss2usi %xmm0, %eax 1464; ALL-NEXT: retq 1465 %b = fptoui float %a to i32 1466 ret i32 %b 1467} 1468 1469define float @uitof32(i32 %a) nounwind { 1470; ALL-LABEL: uitof32: 1471; ALL: # %bb.0: 1472; ALL-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 1473; ALL-NEXT: retq 1474 %b = uitofp i32 %a to float 1475 ret float %b 1476} 1477 1478define double @uitof64(i32 %a) nounwind { 1479; ALL-LABEL: uitof64: 1480; ALL: # %bb.0: 1481; ALL-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 1482; ALL-NEXT: retq 1483 %b = uitofp i32 %a to double 1484 ret double %b 1485} 1486 1487define <16 x float> @sbto16f32(<16 x i32> %a) { 1488; NODQ-LABEL: sbto16f32: 1489; NODQ: # %bb.0: 1490; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1491; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 1492; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1493; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0 1494; NODQ-NEXT: retq 1495; 1496; VLDQ-LABEL: sbto16f32: 1497; VLDQ: # %bb.0: 1498; VLDQ-NEXT: vpmovd2m %zmm0, %k0 1499; VLDQ-NEXT: vpmovm2d %k0, %zmm0 1500; VLDQ-NEXT: vcvtdq2ps %zmm0, %zmm0 1501; VLDQ-NEXT: retq 1502; 1503; DQNOVL-LABEL: sbto16f32: 1504; DQNOVL: # %bb.0: 1505; DQNOVL-NEXT: vpmovd2m %zmm0, %k0 1506; DQNOVL-NEXT: vpmovm2d %k0, %zmm0 1507; DQNOVL-NEXT: vcvtdq2ps %zmm0, %zmm0 1508; DQNOVL-NEXT: retq 1509 %mask = icmp slt <16 x i32> %a, zeroinitializer 1510 %1 = sitofp <16 x i1> %mask to <16 x float> 1511 ret <16 x float> %1 1512} 1513 1514define <16 x float> @scto16f32(<16 x i8> %a) { 1515; ALL-LABEL: scto16f32: 1516; ALL: # %bb.0: 1517; ALL-NEXT: vpmovsxbd %xmm0, %zmm0 1518; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1519; ALL-NEXT: retq 1520 %1 = sitofp <16 x i8> %a to <16 x float> 1521 ret <16 x float> %1 1522} 1523 1524define <16 x float> @ssto16f32(<16 x i16> %a) { 1525; ALL-LABEL: ssto16f32: 1526; ALL: # %bb.0: 1527; ALL-NEXT: vpmovsxwd %ymm0, %zmm0 1528; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1529; ALL-NEXT: retq 1530 %1 = sitofp <16 x i16> %a to <16 x float> 1531 ret <16 x float> %1 1532} 1533 1534define <8 x double> @ssto16f64(<8 x i16> %a) { 1535; ALL-LABEL: ssto16f64: 1536; ALL: # %bb.0: 1537; ALL-NEXT: vpmovsxwd %xmm0, %ymm0 1538; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1539; ALL-NEXT: retq 1540 %1 = sitofp <8 x i16> %a to <8 x double> 1541 ret <8 x double> %1 1542} 1543 1544define <8 x double> @scto8f64(<8 x i8> %a) { 1545; ALL-LABEL: scto8f64: 1546; ALL: # %bb.0: 1547; ALL-NEXT: vpmovsxbd %xmm0, %ymm0 1548; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1549; ALL-NEXT: retq 1550 %1 = sitofp <8 x i8> %a to <8 x double> 1551 ret <8 x double> %1 1552} 1553 1554define <16 x double> @scto16f64(<16 x i8> %a) { 1555; ALL-LABEL: scto16f64: 1556; ALL: # %bb.0: 1557; ALL-NEXT: vpmovsxbd %xmm0, %zmm1 1558; ALL-NEXT: vcvtdq2pd %ymm1, %zmm0 1559; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1560; ALL-NEXT: vcvtdq2pd %ymm1, %zmm1 1561; ALL-NEXT: retq 1562 %b = sitofp <16 x i8> %a to <16 x double> 1563 ret <16 x double> %b 1564} 1565 1566define <16 x double> @sbto16f64(<16 x double> %a) { 1567; NODQ-LABEL: sbto16f64: 1568; NODQ: # %bb.0: 1569; NODQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1570; NODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0 1571; NODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1 1572; NODQ-NEXT: kunpckbw %k0, %k1, %k1 1573; NODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 1574; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0 1575; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1576; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1 1577; NODQ-NEXT: retq 1578; 1579; VLDQ-LABEL: sbto16f64: 1580; VLDQ: # %bb.0: 1581; VLDQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1582; VLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0 1583; VLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1 1584; VLDQ-NEXT: kunpckbw %k0, %k1, %k0 1585; VLDQ-NEXT: vpmovm2d %k0, %zmm1 1586; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm0 1587; VLDQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1588; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1 1589; VLDQ-NEXT: retq 1590; 1591; DQNOVL-LABEL: sbto16f64: 1592; DQNOVL: # %bb.0: 1593; DQNOVL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1594; DQNOVL-NEXT: vcmpltpd %zmm0, %zmm2, %k0 1595; DQNOVL-NEXT: vcmpltpd %zmm1, %zmm2, %k1 1596; DQNOVL-NEXT: kunpckbw %k0, %k1, %k0 1597; DQNOVL-NEXT: vpmovm2d %k0, %zmm1 1598; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm0 1599; DQNOVL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1600; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm1 1601; DQNOVL-NEXT: retq 1602 %cmpres = fcmp ogt <16 x double> %a, zeroinitializer 1603 %1 = sitofp <16 x i1> %cmpres to <16 x double> 1604 ret <16 x double> %1 1605} 1606 1607define <8 x double> @sbto8f64(<8 x double> %a) { 1608; NOVLDQ-LABEL: sbto8f64: 1609; NOVLDQ: # %bb.0: 1610; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1611; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 1612; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1613; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0 1614; NOVLDQ-NEXT: retq 1615; 1616; VLDQ-LABEL: sbto8f64: 1617; VLDQ: # %bb.0: 1618; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1619; VLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 1620; VLDQ-NEXT: vpmovm2d %k0, %ymm0 1621; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0 1622; VLDQ-NEXT: retq 1623; 1624; VLNODQ-LABEL: sbto8f64: 1625; VLNODQ: # %bb.0: 1626; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1627; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 1628; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 1629; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 1630; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0 1631; VLNODQ-NEXT: retq 1632; 1633; DQNOVL-LABEL: sbto8f64: 1634; DQNOVL: # %bb.0: 1635; DQNOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1636; DQNOVL-NEXT: vcmpltpd %zmm0, %zmm1, %k0 1637; DQNOVL-NEXT: vpmovm2d %k0, %zmm0 1638; DQNOVL-NEXT: vcvtdq2pd %ymm0, %zmm0 1639; DQNOVL-NEXT: retq 1640 %cmpres = fcmp ogt <8 x double> %a, zeroinitializer 1641 %1 = sitofp <8 x i1> %cmpres to <8 x double> 1642 ret <8 x double> %1 1643} 1644 1645define <8 x float> @sbto8f32(<8 x float> %a) { 1646; ALL-LABEL: sbto8f32: 1647; ALL: # %bb.0: 1648; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 1649; ALL-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 1650; ALL-NEXT: vcvtdq2ps %ymm0, %ymm0 1651; ALL-NEXT: retq 1652 %cmpres = fcmp ogt <8 x float> %a, zeroinitializer 1653 %1 = sitofp <8 x i1> %cmpres to <8 x float> 1654 ret <8 x float> %1 1655} 1656 1657define <4 x float> @sbto4f32(<4 x float> %a) { 1658; ALL-LABEL: sbto4f32: 1659; ALL: # %bb.0: 1660; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 1661; ALL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 1662; ALL-NEXT: vcvtdq2ps %xmm0, %xmm0 1663; ALL-NEXT: retq 1664 %cmpres = fcmp ogt <4 x float> %a, zeroinitializer 1665 %1 = sitofp <4 x i1> %cmpres to <4 x float> 1666 ret <4 x float> %1 1667} 1668 1669define <4 x double> @sbto4f64(<4 x double> %a) { 1670; NOVL-LABEL: sbto4f64: 1671; NOVL: # %bb.0: 1672; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1673; NOVL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 1674; NOVL-NEXT: vpmovqd %zmm0, %ymm0 1675; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0 1676; NOVL-NEXT: retq 1677; 1678; VLDQ-LABEL: sbto4f64: 1679; VLDQ: # %bb.0: 1680; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1681; VLDQ-NEXT: vcmpltpd %ymm0, %ymm1, %k0 1682; VLDQ-NEXT: vpmovm2d %k0, %xmm0 1683; VLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0 1684; VLDQ-NEXT: retq 1685; 1686; VLNODQ-LABEL: sbto4f64: 1687; VLNODQ: # %bb.0: 1688; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1689; VLNODQ-NEXT: vcmpltpd %ymm0, %ymm1, %k1 1690; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 1691; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 1692; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0 1693; VLNODQ-NEXT: retq 1694 %cmpres = fcmp ogt <4 x double> %a, zeroinitializer 1695 %1 = sitofp <4 x i1> %cmpres to <4 x double> 1696 ret <4 x double> %1 1697} 1698 1699define <2 x float> @sbto2f32(<2 x float> %a) { 1700; ALL-LABEL: sbto2f32: 1701; ALL: # %bb.0: 1702; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 1703; ALL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 1704; ALL-NEXT: vcvtdq2ps %xmm0, %xmm0 1705; ALL-NEXT: retq 1706 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer 1707 %1 = sitofp <2 x i1> %cmpres to <2 x float> 1708 ret <2 x float> %1 1709} 1710 1711define <2 x double> @sbto2f64(<2 x double> %a) { 1712; NOVL-LABEL: sbto2f64: 1713; NOVL: # %bb.0: 1714; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1715; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 1716; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 1717; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0 1718; NOVL-NEXT: retq 1719; 1720; VLDQ-LABEL: sbto2f64: 1721; VLDQ: # %bb.0: 1722; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1723; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %k0 1724; VLDQ-NEXT: vpmovm2d %k0, %xmm0 1725; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 1726; VLDQ-NEXT: retq 1727; 1728; VLNODQ-LABEL: sbto2f64: 1729; VLNODQ: # %bb.0: 1730; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1731; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %k1 1732; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 1733; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 1734; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 1735; VLNODQ-NEXT: retq 1736 %cmpres = fcmp ogt <2 x double> %a, zeroinitializer 1737 %1 = sitofp <2 x i1> %cmpres to <2 x double> 1738 ret <2 x double> %1 1739} 1740 1741define <16 x float> @ucto16f32(<16 x i8> %a) { 1742; ALL-LABEL: ucto16f32: 1743; ALL: # %bb.0: 1744; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1745; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1746; ALL-NEXT: retq 1747 %b = uitofp <16 x i8> %a to <16 x float> 1748 ret <16 x float>%b 1749} 1750 1751define <8 x double> @ucto8f64(<8 x i8> %a) { 1752; ALL-LABEL: ucto8f64: 1753; ALL: # %bb.0: 1754; ALL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1755; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1756; ALL-NEXT: retq 1757 %b = uitofp <8 x i8> %a to <8 x double> 1758 ret <8 x double> %b 1759} 1760 1761define <16 x float> @swto16f32(<16 x i16> %a) { 1762; ALL-LABEL: swto16f32: 1763; ALL: # %bb.0: 1764; ALL-NEXT: vpmovsxwd %ymm0, %zmm0 1765; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1766; ALL-NEXT: retq 1767 %b = sitofp <16 x i16> %a to <16 x float> 1768 ret <16 x float> %b 1769} 1770 1771define <8 x double> @swto8f64(<8 x i16> %a) { 1772; ALL-LABEL: swto8f64: 1773; ALL: # %bb.0: 1774; ALL-NEXT: vpmovsxwd %xmm0, %ymm0 1775; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1776; ALL-NEXT: retq 1777 %b = sitofp <8 x i16> %a to <8 x double> 1778 ret <8 x double> %b 1779} 1780 1781define <16 x double> @swto16f64(<16 x i16> %a) { 1782; ALL-LABEL: swto16f64: 1783; ALL: # %bb.0: 1784; ALL-NEXT: vpmovsxwd %ymm0, %zmm1 1785; ALL-NEXT: vcvtdq2pd %ymm1, %zmm0 1786; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1787; ALL-NEXT: vcvtdq2pd %ymm1, %zmm1 1788; ALL-NEXT: retq 1789 %b = sitofp <16 x i16> %a to <16 x double> 1790 ret <16 x double> %b 1791} 1792 1793define <16 x double> @ucto16f64(<16 x i8> %a) { 1794; ALL-LABEL: ucto16f64: 1795; ALL: # %bb.0: 1796; ALL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1797; ALL-NEXT: vcvtdq2pd %ymm1, %zmm0 1798; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1799; ALL-NEXT: vcvtdq2pd %ymm1, %zmm1 1800; ALL-NEXT: retq 1801 %b = uitofp <16 x i8> %a to <16 x double> 1802 ret <16 x double> %b 1803} 1804 1805define <16 x float> @uwto16f32(<16 x i16> %a) { 1806; ALL-LABEL: uwto16f32: 1807; ALL: # %bb.0: 1808; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1809; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1810; ALL-NEXT: retq 1811 %b = uitofp <16 x i16> %a to <16 x float> 1812 ret <16 x float> %b 1813} 1814 1815define <8 x double> @uwto8f64(<8 x i16> %a) { 1816; ALL-LABEL: uwto8f64: 1817; ALL: # %bb.0: 1818; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1819; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1820; ALL-NEXT: retq 1821 %b = uitofp <8 x i16> %a to <8 x double> 1822 ret <8 x double> %b 1823} 1824 1825define <16 x double> @uwto16f64(<16 x i16> %a) { 1826; ALL-LABEL: uwto16f64: 1827; ALL: # %bb.0: 1828; ALL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1829; ALL-NEXT: vcvtdq2pd %ymm1, %zmm0 1830; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1831; ALL-NEXT: vcvtdq2pd %ymm1, %zmm1 1832; ALL-NEXT: retq 1833 %b = uitofp <16 x i16> %a to <16 x double> 1834 ret <16 x double> %b 1835} 1836 1837define <16 x float> @sito16f32(<16 x i32> %a) { 1838; ALL-LABEL: sito16f32: 1839; ALL: # %bb.0: 1840; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1841; ALL-NEXT: retq 1842 %b = sitofp <16 x i32> %a to <16 x float> 1843 ret <16 x float> %b 1844} 1845 1846define <16 x double> @sito16f64(<16 x i32> %a) { 1847; ALL-LABEL: sito16f64: 1848; ALL: # %bb.0: 1849; ALL-NEXT: vcvtdq2pd %ymm0, %zmm2 1850; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 1851; ALL-NEXT: vcvtdq2pd %ymm0, %zmm1 1852; ALL-NEXT: vmovaps %zmm2, %zmm0 1853; ALL-NEXT: retq 1854 %b = sitofp <16 x i32> %a to <16 x double> 1855 ret <16 x double> %b 1856} 1857 1858define <16 x float> @usto16f32(<16 x i16> %a) { 1859; ALL-LABEL: usto16f32: 1860; ALL: # %bb.0: 1861; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1862; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1863; ALL-NEXT: retq 1864 %b = uitofp <16 x i16> %a to <16 x float> 1865 ret <16 x float> %b 1866} 1867 1868define <16 x float> @ubto16f32(<16 x i32> %a) { 1869; NODQ-LABEL: ubto16f32: 1870; NODQ: # %bb.0: 1871; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1872; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 1873; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1874; NODQ-NEXT: vpsrld $31, %zmm0, %zmm0 1875; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0 1876; NODQ-NEXT: retq 1877; 1878; VLDQ-LABEL: ubto16f32: 1879; VLDQ: # %bb.0: 1880; VLDQ-NEXT: vpmovd2m %zmm0, %k0 1881; VLDQ-NEXT: vpmovm2d %k0, %zmm0 1882; VLDQ-NEXT: vpsrld $31, %zmm0, %zmm0 1883; VLDQ-NEXT: vcvtdq2ps %zmm0, %zmm0 1884; VLDQ-NEXT: retq 1885; 1886; DQNOVL-LABEL: ubto16f32: 1887; DQNOVL: # %bb.0: 1888; DQNOVL-NEXT: vpmovd2m %zmm0, %k0 1889; DQNOVL-NEXT: vpmovm2d %k0, %zmm0 1890; DQNOVL-NEXT: vpsrld $31, %zmm0, %zmm0 1891; DQNOVL-NEXT: vcvtdq2ps %zmm0, %zmm0 1892; DQNOVL-NEXT: retq 1893 %mask = icmp slt <16 x i32> %a, zeroinitializer 1894 %1 = uitofp <16 x i1> %mask to <16 x float> 1895 ret <16 x float> %1 1896} 1897 1898define <16 x double> @ubto16f64(<16 x i32> %a) { 1899; NODQ-LABEL: ubto16f64: 1900; NODQ: # %bb.0: 1901; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1902; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 1903; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1904; NODQ-NEXT: vpsrld $31, %zmm0, %zmm1 1905; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0 1906; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1907; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1 1908; NODQ-NEXT: retq 1909; 1910; VLDQ-LABEL: ubto16f64: 1911; VLDQ: # %bb.0: 1912; VLDQ-NEXT: vpmovd2m %zmm0, %k0 1913; VLDQ-NEXT: vpmovm2d %k0, %zmm0 1914; VLDQ-NEXT: vpsrld $31, %zmm0, %zmm1 1915; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm0 1916; VLDQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1917; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1 1918; VLDQ-NEXT: retq 1919; 1920; DQNOVL-LABEL: ubto16f64: 1921; DQNOVL: # %bb.0: 1922; DQNOVL-NEXT: vpmovd2m %zmm0, %k0 1923; DQNOVL-NEXT: vpmovm2d %k0, %zmm0 1924; DQNOVL-NEXT: vpsrld $31, %zmm0, %zmm1 1925; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm0 1926; DQNOVL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1927; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm1 1928; DQNOVL-NEXT: retq 1929 %mask = icmp slt <16 x i32> %a, zeroinitializer 1930 %1 = uitofp <16 x i1> %mask to <16 x double> 1931 ret <16 x double> %1 1932} 1933 1934define <8 x float> @ubto8f32(<8 x i32> %a) { 1935; NOVL-LABEL: ubto8f32: 1936; NOVL: # %bb.0: 1937; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1938; NOVL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1939; NOVL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216] 1940; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0 1941; NOVL-NEXT: retq 1942; 1943; VL-LABEL: ubto8f32: 1944; VL: # %bb.0: 1945; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1946; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1947; VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 1948; VL-NEXT: retq 1949 %mask = icmp slt <8 x i32> %a, zeroinitializer 1950 %1 = uitofp <8 x i1> %mask to <8 x float> 1951 ret <8 x float> %1 1952} 1953 1954define <8 x double> @ubto8f64(<8 x i32> %a) { 1955; ALL-LABEL: ubto8f64: 1956; ALL: # %bb.0: 1957; ALL-NEXT: vpsrld $31, %ymm0, %ymm0 1958; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1959; ALL-NEXT: retq 1960 %mask = icmp slt <8 x i32> %a, zeroinitializer 1961 %1 = uitofp <8 x i1> %mask to <8 x double> 1962 ret <8 x double> %1 1963} 1964 1965define <4 x float> @ubto4f32(<4 x i32> %a) { 1966; NOVL-LABEL: ubto4f32: 1967; NOVL: # %bb.0: 1968; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1969; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 1970; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1065353216,1065353216,1065353216,1065353216] 1971; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0 1972; NOVL-NEXT: retq 1973; 1974; VL-LABEL: ubto4f32: 1975; VL: # %bb.0: 1976; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1977; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 1978; VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1979; VL-NEXT: retq 1980 %mask = icmp slt <4 x i32> %a, zeroinitializer 1981 %1 = uitofp <4 x i1> %mask to <4 x float> 1982 ret <4 x float> %1 1983} 1984 1985define <4 x double> @ubto4f64(<4 x i32> %a) { 1986; ALL-LABEL: ubto4f64: 1987; ALL: # %bb.0: 1988; ALL-NEXT: vpsrld $31, %xmm0, %xmm0 1989; ALL-NEXT: vcvtdq2pd %xmm0, %ymm0 1990; ALL-NEXT: retq 1991 %mask = icmp slt <4 x i32> %a, zeroinitializer 1992 %1 = uitofp <4 x i1> %mask to <4 x double> 1993 ret <4 x double> %1 1994} 1995 1996define <2 x float> @ubto2f32(<2 x i32> %a) { 1997; NOVL-LABEL: ubto2f32: 1998; NOVL: # %bb.0: 1999; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 2000; NOVL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2001; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1065353216,1065353216,1065353216,1065353216] 2002; NOVL-NEXT: vpandn %xmm1, %xmm0, %xmm0 2003; NOVL-NEXT: retq 2004; 2005; VL-LABEL: ubto2f32: 2006; VL: # %bb.0: 2007; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 2008; VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2009; VL-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 2010; VL-NEXT: retq 2011 %mask = icmp ne <2 x i32> %a, zeroinitializer 2012 %1 = uitofp <2 x i1> %mask to <2 x float> 2013 ret <2 x float> %1 2014} 2015 2016define <2 x double> @ubto2f64(<2 x i32> %a) { 2017; NOVL-LABEL: ubto2f64: 2018; NOVL: # %bb.0: 2019; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 2020; NOVL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2021; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 2022; NOVL-NEXT: vpandn %xmm1, %xmm0, %xmm0 2023; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0 2024; NOVL-NEXT: retq 2025; 2026; VL-LABEL: ubto2f64: 2027; VL: # %bb.0: 2028; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 2029; VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2030; VL-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 2031; VL-NEXT: vcvtdq2pd %xmm0, %xmm0 2032; VL-NEXT: retq 2033 %mask = icmp ne <2 x i32> %a, zeroinitializer 2034 %1 = uitofp <2 x i1> %mask to <2 x double> 2035 ret <2 x double> %1 2036} 2037 2038define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) { 2039; NOVLDQ-LABEL: test_2f64toub: 2040; NOVLDQ: # %bb.0: 2041; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2042; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2043; NOVLDQ-NEXT: vcvttpd2udq %zmm0, %ymm0 2044; NOVLDQ-NEXT: vpslld $31, %ymm0, %ymm0 2045; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2046; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2047; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2048; NOVLDQ-NEXT: vzeroupper 2049; NOVLDQ-NEXT: retq 2050; 2051; VLDQ-LABEL: test_2f64toub: 2052; VLDQ: # %bb.0: 2053; VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 2054; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2055; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2056; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2057; VLDQ-NEXT: retq 2058; 2059; VLNODQ-LABEL: test_2f64toub: 2060; VLNODQ: # %bb.0: 2061; VLNODQ-NEXT: vcvttpd2udq %xmm0, %xmm0 2062; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 2063; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2064; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2065; VLNODQ-NEXT: retq 2066; 2067; DQNOVL-LABEL: test_2f64toub: 2068; DQNOVL: # %bb.0: 2069; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2070; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2071; DQNOVL-NEXT: vcvttpd2udq %zmm0, %ymm0 2072; DQNOVL-NEXT: vpslld $31, %ymm0, %ymm0 2073; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2074; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2075; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2076; DQNOVL-NEXT: vzeroupper 2077; DQNOVL-NEXT: retq 2078 %mask = fptoui <2 x double> %a to <2 x i1> 2079 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer 2080 ret <2 x i64> %select 2081} 2082 2083define <4 x i64> @test_4f64toub(<4 x double> %a, <4 x i64> %passthru) { 2084; NOVLDQ-LABEL: test_4f64toub: 2085; NOVLDQ: # %bb.0: 2086; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2087; NOVLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2088; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2089; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2090; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2091; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2092; NOVLDQ-NEXT: retq 2093; 2094; VLDQ-LABEL: test_4f64toub: 2095; VLDQ: # %bb.0: 2096; VLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2097; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2098; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2099; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2100; VLDQ-NEXT: retq 2101; 2102; VLNODQ-LABEL: test_4f64toub: 2103; VLNODQ: # %bb.0: 2104; VLNODQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2105; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 2106; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2107; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2108; VLNODQ-NEXT: retq 2109; 2110; DQNOVL-LABEL: test_4f64toub: 2111; DQNOVL: # %bb.0: 2112; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2113; DQNOVL-NEXT: vcvttpd2dq %ymm0, %xmm0 2114; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 2115; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2116; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2117; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2118; DQNOVL-NEXT: retq 2119 %mask = fptoui <4 x double> %a to <4 x i1> 2120 %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer 2121 ret <4 x i64> %select 2122} 2123 2124define <8 x i64> @test_8f64toub(<8 x double> %a, <8 x i64> %passthru) { 2125; NOVLDQ-LABEL: test_8f64toub: 2126; NOVLDQ: # %bb.0: 2127; NOVLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2128; NOVLDQ-NEXT: vpslld $31, %ymm0, %ymm0 2129; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2130; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2131; NOVLDQ-NEXT: retq 2132; 2133; VLDQ-LABEL: test_8f64toub: 2134; VLDQ: # %bb.0: 2135; VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2136; VLDQ-NEXT: vpslld $31, %ymm0, %ymm0 2137; VLDQ-NEXT: vpmovd2m %ymm0, %k1 2138; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2139; VLDQ-NEXT: retq 2140; 2141; VLNODQ-LABEL: test_8f64toub: 2142; VLNODQ: # %bb.0: 2143; VLNODQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2144; VLNODQ-NEXT: vpslld $31, %ymm0, %ymm0 2145; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 2146; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2147; VLNODQ-NEXT: retq 2148; 2149; DQNOVL-LABEL: test_8f64toub: 2150; DQNOVL: # %bb.0: 2151; DQNOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 2152; DQNOVL-NEXT: vpslld $31, %ymm0, %ymm0 2153; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2154; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2155; DQNOVL-NEXT: retq 2156 %mask = fptoui <8 x double> %a to <8 x i1> 2157 %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer 2158 ret <8 x i64> %select 2159} 2160 2161define <2 x i64> @test_2f32toub(<2 x float> %a, <2 x i64> %passthru) { 2162; NOVLDQ-LABEL: test_2f32toub: 2163; NOVLDQ: # %bb.0: 2164; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2165; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2166; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2167; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2168; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2169; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2170; NOVLDQ-NEXT: vzeroupper 2171; NOVLDQ-NEXT: retq 2172; 2173; VLDQ-LABEL: test_2f32toub: 2174; VLDQ: # %bb.0: 2175; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2176; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2177; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2178; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2179; VLDQ-NEXT: retq 2180; 2181; VLNODQ-LABEL: test_2f32toub: 2182; VLNODQ: # %bb.0: 2183; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 2184; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 2185; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2186; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2187; VLNODQ-NEXT: retq 2188; 2189; DQNOVL-LABEL: test_2f32toub: 2190; DQNOVL: # %bb.0: 2191; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2192; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 2193; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 2194; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2195; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2196; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2197; DQNOVL-NEXT: vzeroupper 2198; DQNOVL-NEXT: retq 2199 %mask = fptoui <2 x float> %a to <2 x i1> 2200 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer 2201 ret <2 x i64> %select 2202} 2203 2204define <4 x i64> @test_4f32toub(<4 x float> %a, <4 x i64> %passthru) { 2205; NOVLDQ-LABEL: test_4f32toub: 2206; NOVLDQ: # %bb.0: 2207; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2208; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2209; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2210; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2211; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2212; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2213; NOVLDQ-NEXT: retq 2214; 2215; VLDQ-LABEL: test_4f32toub: 2216; VLDQ: # %bb.0: 2217; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2218; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2219; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2220; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2221; VLDQ-NEXT: retq 2222; 2223; VLNODQ-LABEL: test_4f32toub: 2224; VLNODQ: # %bb.0: 2225; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 2226; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 2227; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2228; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2229; VLNODQ-NEXT: retq 2230; 2231; DQNOVL-LABEL: test_4f32toub: 2232; DQNOVL: # %bb.0: 2233; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2234; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 2235; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 2236; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2237; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2238; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2239; DQNOVL-NEXT: retq 2240 %mask = fptoui <4 x float> %a to <4 x i1> 2241 %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer 2242 ret <4 x i64> %select 2243} 2244 2245define <8 x i64> @test_8f32toub(<8 x float> %a, <8 x i64> %passthru) { 2246; NOVLDQ-LABEL: test_8f32toub: 2247; NOVLDQ: # %bb.0: 2248; NOVLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 2249; NOVLDQ-NEXT: vpslld $31, %ymm0, %ymm0 2250; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2251; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2252; NOVLDQ-NEXT: retq 2253; 2254; VLDQ-LABEL: test_8f32toub: 2255; VLDQ: # %bb.0: 2256; VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 2257; VLDQ-NEXT: vpslld $31, %ymm0, %ymm0 2258; VLDQ-NEXT: vpmovd2m %ymm0, %k1 2259; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2260; VLDQ-NEXT: retq 2261; 2262; VLNODQ-LABEL: test_8f32toub: 2263; VLNODQ: # %bb.0: 2264; VLNODQ-NEXT: vcvttps2dq %ymm0, %ymm0 2265; VLNODQ-NEXT: vpslld $31, %ymm0, %ymm0 2266; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 2267; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2268; VLNODQ-NEXT: retq 2269; 2270; DQNOVL-LABEL: test_8f32toub: 2271; DQNOVL: # %bb.0: 2272; DQNOVL-NEXT: vcvttps2dq %ymm0, %ymm0 2273; DQNOVL-NEXT: vpslld $31, %ymm0, %ymm0 2274; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2275; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2276; DQNOVL-NEXT: retq 2277 %mask = fptoui <8 x float> %a to <8 x i1> 2278 %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer 2279 ret <8 x i64> %select 2280} 2281 2282define <16 x i32> @test_16f32toub(<16 x float> %a, <16 x i32> %passthru) { 2283; NODQ-LABEL: test_16f32toub: 2284; NODQ: # %bb.0: 2285; NODQ-NEXT: vcvttps2dq %zmm0, %zmm0 2286; NODQ-NEXT: vpslld $31, %zmm0, %zmm0 2287; NODQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2288; NODQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2289; NODQ-NEXT: retq 2290; 2291; VLDQ-LABEL: test_16f32toub: 2292; VLDQ: # %bb.0: 2293; VLDQ-NEXT: vcvttps2dq %zmm0, %zmm0 2294; VLDQ-NEXT: vpslld $31, %zmm0, %zmm0 2295; VLDQ-NEXT: vpmovd2m %zmm0, %k1 2296; VLDQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2297; VLDQ-NEXT: retq 2298; 2299; DQNOVL-LABEL: test_16f32toub: 2300; DQNOVL: # %bb.0: 2301; DQNOVL-NEXT: vcvttps2dq %zmm0, %zmm0 2302; DQNOVL-NEXT: vpslld $31, %zmm0, %zmm0 2303; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2304; DQNOVL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2305; DQNOVL-NEXT: retq 2306 %mask = fptoui <16 x float> %a to <16 x i1> 2307 %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer 2308 ret <16 x i32> %select 2309} 2310 2311define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) { 2312; NOVLDQ-LABEL: test_2f64tosb: 2313; NOVLDQ: # %bb.0: 2314; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2315; NOVLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2316; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2317; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2318; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2319; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2320; NOVLDQ-NEXT: vzeroupper 2321; NOVLDQ-NEXT: retq 2322; 2323; VLDQ-LABEL: test_2f64tosb: 2324; VLDQ: # %bb.0: 2325; VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2326; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2327; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2328; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2329; VLDQ-NEXT: retq 2330; 2331; VLNODQ-LABEL: test_2f64tosb: 2332; VLNODQ: # %bb.0: 2333; VLNODQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2334; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 2335; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2336; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2337; VLNODQ-NEXT: retq 2338; 2339; DQNOVL-LABEL: test_2f64tosb: 2340; DQNOVL: # %bb.0: 2341; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2342; DQNOVL-NEXT: vcvttpd2dq %xmm0, %xmm0 2343; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 2344; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2345; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2346; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2347; DQNOVL-NEXT: vzeroupper 2348; DQNOVL-NEXT: retq 2349 %mask = fptosi <2 x double> %a to <2 x i1> 2350 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer 2351 ret <2 x i64> %select 2352} 2353 2354define <4 x i64> @test_4f64tosb(<4 x double> %a, <4 x i64> %passthru) { 2355; NOVLDQ-LABEL: test_4f64tosb: 2356; NOVLDQ: # %bb.0: 2357; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2358; NOVLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2359; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2360; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2361; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2362; NOVLDQ-NEXT: retq 2363; 2364; VLDQ-LABEL: test_4f64tosb: 2365; VLDQ: # %bb.0: 2366; VLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2367; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2368; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2369; VLDQ-NEXT: retq 2370; 2371; VLNODQ-LABEL: test_4f64tosb: 2372; VLNODQ: # %bb.0: 2373; VLNODQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2374; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2375; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2376; VLNODQ-NEXT: retq 2377; 2378; DQNOVL-LABEL: test_4f64tosb: 2379; DQNOVL: # %bb.0: 2380; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2381; DQNOVL-NEXT: vcvttpd2dq %ymm0, %xmm0 2382; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2383; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2384; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2385; DQNOVL-NEXT: retq 2386 %mask = fptosi <4 x double> %a to <4 x i1> 2387 %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer 2388 ret <4 x i64> %select 2389} 2390 2391define <8 x i64> @test_8f64tosb(<8 x double> %a, <8 x i64> %passthru) { 2392; NOVLDQ-LABEL: test_8f64tosb: 2393; NOVLDQ: # %bb.0: 2394; NOVLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2395; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2396; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2397; NOVLDQ-NEXT: retq 2398; 2399; VLDQ-LABEL: test_8f64tosb: 2400; VLDQ: # %bb.0: 2401; VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2402; VLDQ-NEXT: vpmovd2m %ymm0, %k1 2403; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2404; VLDQ-NEXT: retq 2405; 2406; VLNODQ-LABEL: test_8f64tosb: 2407; VLNODQ: # %bb.0: 2408; VLNODQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2409; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 2410; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2411; VLNODQ-NEXT: retq 2412; 2413; DQNOVL-LABEL: test_8f64tosb: 2414; DQNOVL: # %bb.0: 2415; DQNOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 2416; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2417; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2418; DQNOVL-NEXT: retq 2419 %mask = fptosi <8 x double> %a to <8 x i1> 2420 %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer 2421 ret <8 x i64> %select 2422} 2423 2424define <2 x i64> @test_2f32tosb(<2 x float> %a, <2 x i64> %passthru) { 2425; NOVLDQ-LABEL: test_2f32tosb: 2426; NOVLDQ: # %bb.0: 2427; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2428; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2429; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2430; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2431; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2432; NOVLDQ-NEXT: vzeroupper 2433; NOVLDQ-NEXT: retq 2434; 2435; VLDQ-LABEL: test_2f32tosb: 2436; VLDQ: # %bb.0: 2437; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2438; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2439; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2440; VLDQ-NEXT: retq 2441; 2442; VLNODQ-LABEL: test_2f32tosb: 2443; VLNODQ: # %bb.0: 2444; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 2445; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2446; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2447; VLNODQ-NEXT: retq 2448; 2449; DQNOVL-LABEL: test_2f32tosb: 2450; DQNOVL: # %bb.0: 2451; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2452; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 2453; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2454; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2455; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2456; DQNOVL-NEXT: vzeroupper 2457; DQNOVL-NEXT: retq 2458 %mask = fptosi <2 x float> %a to <2 x i1> 2459 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer 2460 ret <2 x i64> %select 2461} 2462 2463define <4 x i64> @test_4f32tosb(<4 x float> %a, <4 x i64> %passthru) { 2464; NOVLDQ-LABEL: test_4f32tosb: 2465; NOVLDQ: # %bb.0: 2466; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2467; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2468; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2469; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2470; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2471; NOVLDQ-NEXT: retq 2472; 2473; VLDQ-LABEL: test_4f32tosb: 2474; VLDQ: # %bb.0: 2475; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2476; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2477; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2478; VLDQ-NEXT: retq 2479; 2480; VLNODQ-LABEL: test_4f32tosb: 2481; VLNODQ: # %bb.0: 2482; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 2483; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2484; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2485; VLNODQ-NEXT: retq 2486; 2487; DQNOVL-LABEL: test_4f32tosb: 2488; DQNOVL: # %bb.0: 2489; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2490; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 2491; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2492; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2493; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2494; DQNOVL-NEXT: retq 2495 %mask = fptosi <4 x float> %a to <4 x i1> 2496 %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer 2497 ret <4 x i64> %select 2498} 2499 2500define <8 x i64> @test_8f32tosb(<8 x float> %a, <8 x i64> %passthru) { 2501; NOVLDQ-LABEL: test_8f32tosb: 2502; NOVLDQ: # %bb.0: 2503; NOVLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 2504; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2505; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2506; NOVLDQ-NEXT: retq 2507; 2508; VLDQ-LABEL: test_8f32tosb: 2509; VLDQ: # %bb.0: 2510; VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 2511; VLDQ-NEXT: vpmovd2m %ymm0, %k1 2512; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2513; VLDQ-NEXT: retq 2514; 2515; VLNODQ-LABEL: test_8f32tosb: 2516; VLNODQ: # %bb.0: 2517; VLNODQ-NEXT: vcvttps2dq %ymm0, %ymm0 2518; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 2519; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2520; VLNODQ-NEXT: retq 2521; 2522; DQNOVL-LABEL: test_8f32tosb: 2523; DQNOVL: # %bb.0: 2524; DQNOVL-NEXT: vcvttps2dq %ymm0, %ymm0 2525; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2526; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2527; DQNOVL-NEXT: retq 2528 %mask = fptosi <8 x float> %a to <8 x i1> 2529 %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer 2530 ret <8 x i64> %select 2531} 2532 2533define <16 x i32> @test_16f32tosb(<16 x float> %a, <16 x i32> %passthru) { 2534; NODQ-LABEL: test_16f32tosb: 2535; NODQ: # %bb.0: 2536; NODQ-NEXT: vcvttps2dq %zmm0, %zmm0 2537; NODQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2538; NODQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2539; NODQ-NEXT: retq 2540; 2541; VLDQ-LABEL: test_16f32tosb: 2542; VLDQ: # %bb.0: 2543; VLDQ-NEXT: vcvttps2dq %zmm0, %zmm0 2544; VLDQ-NEXT: vpmovd2m %zmm0, %k1 2545; VLDQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2546; VLDQ-NEXT: retq 2547; 2548; DQNOVL-LABEL: test_16f32tosb: 2549; DQNOVL: # %bb.0: 2550; DQNOVL-NEXT: vcvttps2dq %zmm0, %zmm0 2551; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2552; DQNOVL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2553; DQNOVL-NEXT: retq 2554 %mask = fptosi <16 x float> %a to <16 x i1> 2555 %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer 2556 ret <16 x i32> %select 2557} 2558 2559define <2 x double> @test_sito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) { 2560; SSE-LABEL: sitofp_load_2i32_to_2f64: 2561; SSE: # %bb.0: 2562; SSE-NEXT: cvtdq2pd (%rdi), %xmm0 2563; SSE-NEXT: retq 2564; 2565; AVX-LABEL: sitofp_load_2i32_to_2f64: 2566; AVX: # %bb.0: 2567; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0 2568; AVX-NEXT: retq 2569; NOVLDQ-LABEL: test_sito2f64_mask_load: 2570; NOVLDQ: # %bb.0: 2571; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2572; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 2573; NOVLDQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 2574; NOVLDQ-NEXT: vcvtdq2pd (%rdi), %xmm0 2575; NOVLDQ-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} 2576; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2577; NOVLDQ-NEXT: vzeroupper 2578; NOVLDQ-NEXT: retq 2579; 2580; VLDQ-LABEL: test_sito2f64_mask_load: 2581; VLDQ: # %bb.0: 2582; VLDQ-NEXT: vpmovq2m %xmm0, %k1 2583; VLDQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z} 2584; VLDQ-NEXT: retq 2585; 2586; VLNODQ-LABEL: test_sito2f64_mask_load: 2587; VLNODQ: # %bb.0: 2588; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 2589; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 2590; VLNODQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z} 2591; VLNODQ-NEXT: retq 2592; 2593; DQNOVL-LABEL: test_sito2f64_mask_load: 2594; DQNOVL: # %bb.0: 2595; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2596; DQNOVL-NEXT: vpmovq2m %zmm0, %k1 2597; DQNOVL-NEXT: vcvtdq2pd (%rdi), %xmm0 2598; DQNOVL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} 2599; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2600; DQNOVL-NEXT: vzeroupper 2601; DQNOVL-NEXT: retq 2602 %mask = icmp slt <2 x i64> %c, zeroinitializer 2603 %ld = load <2 x i32>, <2 x i32> *%a 2604 %cvt = sitofp <2 x i32> %ld to <2 x double> 2605 %sel = select <2 x i1> %mask, <2 x double> %cvt, <2 x double> zeroinitializer 2606 ret <2 x double> %sel 2607} 2608 2609define <2 x double> @test_uito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) { 2610; SSE-LABEL: sitofp_load_2i32_to_2f64: 2611; SSE: # %bb.0: 2612; SSE-NEXT: cvtdq2pd (%rdi), %xmm0 2613; SSE-NEXT: retq 2614; 2615; AVX-LABEL: sitofp_load_2i32_to_2f64: 2616; AVX: # %bb.0: 2617; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0 2618; AVX-NEXT: retq 2619; NOVLDQ-LABEL: test_uito2f64_mask_load: 2620; NOVLDQ: # %bb.0: 2621; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2622; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 2623; NOVLDQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k1 2624; NOVLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2625; NOVLDQ-NEXT: vcvtudq2pd %ymm0, %zmm0 2626; NOVLDQ-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} 2627; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2628; NOVLDQ-NEXT: vzeroupper 2629; NOVLDQ-NEXT: retq 2630; 2631; VLDQ-LABEL: test_uito2f64_mask_load: 2632; VLDQ: # %bb.0: 2633; VLDQ-NEXT: vpmovq2m %xmm0, %k1 2634; VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z} 2635; VLDQ-NEXT: retq 2636; 2637; VLNODQ-LABEL: test_uito2f64_mask_load: 2638; VLNODQ: # %bb.0: 2639; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 2640; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 2641; VLNODQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z} 2642; VLNODQ-NEXT: retq 2643; 2644; DQNOVL-LABEL: test_uito2f64_mask_load: 2645; DQNOVL: # %bb.0: 2646; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2647; DQNOVL-NEXT: vpmovq2m %zmm0, %k1 2648; DQNOVL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2649; DQNOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 2650; DQNOVL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} 2651; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2652; DQNOVL-NEXT: vzeroupper 2653; DQNOVL-NEXT: retq 2654 %mask = icmp slt <2 x i64> %c, zeroinitializer 2655 %ld = load <2 x i32>, <2 x i32> *%a 2656 %cvt = uitofp <2 x i32> %ld to <2 x double> 2657 %sel = select <2 x i1> %mask, <2 x double> %cvt, <2 x double> zeroinitializer 2658 ret <2 x double> %sel 2659} 2660