1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse-builtins.c 10 11define <4 x float> @test_mm_add_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 12; SSE-LABEL: test_mm_add_ps: 13; SSE: # %bb.0: 14; SSE-NEXT: addps %xmm1, %xmm0 # encoding: [0x0f,0x58,0xc1] 15; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16; 17; AVX1-LABEL: test_mm_add_ps: 18; AVX1: # %bb.0: 19; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc1] 20; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 21; 22; AVX512-LABEL: test_mm_add_ps: 23; AVX512: # %bb.0: 24; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 25; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 26 %res = fadd <4 x float> %a0, %a1 27 ret <4 x float> %res 28} 29 30define <4 x float> @test_mm_add_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 31; SSE-LABEL: test_mm_add_ss: 32; SSE: # %bb.0: 33; SSE-NEXT: addss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x58,0xc1] 34; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 35; 36; AVX1-LABEL: test_mm_add_ss: 37; AVX1: # %bb.0: 38; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x58,0xc1] 39; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 40; 41; AVX512-LABEL: test_mm_add_ss: 42; AVX512: # %bb.0: 43; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0xc1] 44; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 45 %ext0 = extractelement <4 x float> %a0, i32 0 46 %ext1 = extractelement <4 x float> %a1, i32 0 47 %fadd = fadd float %ext0, %ext1 48 %res = insertelement <4 x float> %a0, float %fadd, i32 0 49 ret <4 x float> %res 50} 51 52define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 53; SSE-LABEL: test_mm_and_ps: 54; SSE: # %bb.0: 55; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] 56; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 57; 58; AVX1-LABEL: test_mm_and_ps: 59; AVX1: # %bb.0: 60; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 61; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 62; 63; AVX512-LABEL: test_mm_and_ps: 64; AVX512: # %bb.0: 65; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 66; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 67 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 68 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 69 %res = and <4 x i32> %arg0, %arg1 70 %bc = bitcast <4 x i32> %res to <4 x float> 71 ret <4 x float> %bc 72} 73 74define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 75; SSE-LABEL: test_mm_andnot_ps: 76; SSE: # %bb.0: 77; SSE-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] 78; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 79; 80; AVX1-LABEL: test_mm_andnot_ps: 81; AVX1: # %bb.0: 82; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] 83; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2] 84; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1] 85; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 86; 87; AVX512-LABEL: test_mm_andnot_ps: 88; AVX512: # %bb.0: 89; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f] 90; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1] 91; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 92 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 93 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 94 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> 95 %res = and <4 x i32> %not, %arg1 96 %bc = bitcast <4 x i32> %res to <4 x float> 97 ret <4 x float> %bc 98} 99 100define <4 x float> @test_mm_cmpeq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 101; SSE-LABEL: test_mm_cmpeq_ps: 102; SSE: # %bb.0: 103; SSE-NEXT: cmpeqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x00] 104; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 105; 106; AVX1-LABEL: test_mm_cmpeq_ps: 107; AVX1: # %bb.0: 108; AVX1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x00] 109; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 110; 111; AVX512-LABEL: test_mm_cmpeq_ps: 112; AVX512: # %bb.0: 113; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00] 114; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 115; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 116 %cmp = fcmp oeq <4 x float> %a0, %a1 117 %sext = sext <4 x i1> %cmp to <4 x i32> 118 %res = bitcast <4 x i32> %sext to <4 x float> 119 ret <4 x float> %res 120} 121 122define <4 x float> @test_mm_cmpeq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 123; SSE-LABEL: test_mm_cmpeq_ss: 124; SSE: # %bb.0: 125; SSE-NEXT: cmpeqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x00] 126; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 127; 128; AVX-LABEL: test_mm_cmpeq_ss: 129; AVX: # %bb.0: 130; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x00] 131; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 132 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 133 ret <4 x float> %res 134} 135declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 136 137define <4 x float> @test_mm_cmpge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 138; SSE-LABEL: test_mm_cmpge_ps: 139; SSE: # %bb.0: 140; SSE-NEXT: cmpleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x02] 141; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 142; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 143; 144; AVX1-LABEL: test_mm_cmpge_ps: 145; AVX1: # %bb.0: 146; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x02] 147; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 148; 149; AVX512-LABEL: test_mm_cmpge_ps: 150; AVX512: # %bb.0: 151; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x02] 152; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 153; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 154 %cmp = fcmp ole <4 x float> %a1, %a0 155 %sext = sext <4 x i1> %cmp to <4 x i32> 156 %res = bitcast <4 x i32> %sext to <4 x float> 157 ret <4 x float> %res 158} 159 160define <4 x float> @test_mm_cmpge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 161; SSE-LABEL: test_mm_cmpge_ss: 162; SSE: # %bb.0: 163; SSE-NEXT: cmpless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x02] 164; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 165; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 166; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 167; 168; AVX-LABEL: test_mm_cmpge_ss: 169; AVX: # %bb.0: 170; AVX-NEXT: vcmpless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x02] 171; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 172; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 173; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 174 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 2) 175 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 176 ret <4 x float> %res 177} 178 179define <4 x float> @test_mm_cmpgt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 180; SSE-LABEL: test_mm_cmpgt_ps: 181; SSE: # %bb.0: 182; SSE-NEXT: cmpltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x01] 183; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 184; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 185; 186; AVX1-LABEL: test_mm_cmpgt_ps: 187; AVX1: # %bb.0: 188; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x01] 189; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 190; 191; AVX512-LABEL: test_mm_cmpgt_ps: 192; AVX512: # %bb.0: 193; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x01] 194; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 195; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 196 %cmp = fcmp olt <4 x float> %a1, %a0 197 %sext = sext <4 x i1> %cmp to <4 x i32> 198 %res = bitcast <4 x i32> %sext to <4 x float> 199 ret <4 x float> %res 200} 201 202define <4 x float> @test_mm_cmpgt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 203; SSE-LABEL: test_mm_cmpgt_ss: 204; SSE: # %bb.0: 205; SSE-NEXT: cmpltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x01] 206; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 207; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 208; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 209; 210; AVX-LABEL: test_mm_cmpgt_ss: 211; AVX: # %bb.0: 212; AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x01] 213; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 214; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 215; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 216 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 1) 217 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 218 ret <4 x float> %res 219} 220 221define <4 x float> @test_mm_cmple_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 222; SSE-LABEL: test_mm_cmple_ps: 223; SSE: # %bb.0: 224; SSE-NEXT: cmpleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x02] 225; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 226; 227; AVX1-LABEL: test_mm_cmple_ps: 228; AVX1: # %bb.0: 229; AVX1-NEXT: vcmpleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x02] 230; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 231; 232; AVX512-LABEL: test_mm_cmple_ps: 233; AVX512: # %bb.0: 234; AVX512-NEXT: vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] 235; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 236; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 237 %cmp = fcmp ole <4 x float> %a0, %a1 238 %sext = sext <4 x i1> %cmp to <4 x i32> 239 %res = bitcast <4 x i32> %sext to <4 x float> 240 ret <4 x float> %res 241} 242 243define <4 x float> @test_mm_cmple_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 244; SSE-LABEL: test_mm_cmple_ss: 245; SSE: # %bb.0: 246; SSE-NEXT: cmpless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x02] 247; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 248; 249; AVX-LABEL: test_mm_cmple_ss: 250; AVX: # %bb.0: 251; AVX-NEXT: vcmpless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x02] 252; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 253 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 2) 254 ret <4 x float> %res 255} 256 257define <4 x float> @test_mm_cmplt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 258; SSE-LABEL: test_mm_cmplt_ps: 259; SSE: # %bb.0: 260; SSE-NEXT: cmpltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x01] 261; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 262; 263; AVX1-LABEL: test_mm_cmplt_ps: 264; AVX1: # %bb.0: 265; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x01] 266; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 267; 268; AVX512-LABEL: test_mm_cmplt_ps: 269; AVX512: # %bb.0: 270; AVX512-NEXT: vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01] 271; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 272; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 273 %cmp = fcmp olt <4 x float> %a0, %a1 274 %sext = sext <4 x i1> %cmp to <4 x i32> 275 %res = bitcast <4 x i32> %sext to <4 x float> 276 ret <4 x float> %res 277} 278 279define <4 x float> @test_mm_cmplt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 280; SSE-LABEL: test_mm_cmplt_ss: 281; SSE: # %bb.0: 282; SSE-NEXT: cmpltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x01] 283; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 284; 285; AVX-LABEL: test_mm_cmplt_ss: 286; AVX: # %bb.0: 287; AVX-NEXT: vcmpltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x01] 288; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 289 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 1) 290 ret <4 x float> %res 291} 292 293define <4 x float> @test_mm_cmpneq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 294; SSE-LABEL: test_mm_cmpneq_ps: 295; SSE: # %bb.0: 296; SSE-NEXT: cmpneqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x04] 297; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 298; 299; AVX1-LABEL: test_mm_cmpneq_ps: 300; AVX1: # %bb.0: 301; AVX1-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x04] 302; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 303; 304; AVX512-LABEL: test_mm_cmpneq_ps: 305; AVX512: # %bb.0: 306; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x04] 307; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 308; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 309 %cmp = fcmp une <4 x float> %a0, %a1 310 %sext = sext <4 x i1> %cmp to <4 x i32> 311 %res = bitcast <4 x i32> %sext to <4 x float> 312 ret <4 x float> %res 313} 314 315define <4 x float> @test_mm_cmpneq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 316; SSE-LABEL: test_mm_cmpneq_ss: 317; SSE: # %bb.0: 318; SSE-NEXT: cmpneqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x04] 319; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 320; 321; AVX-LABEL: test_mm_cmpneq_ss: 322; AVX: # %bb.0: 323; AVX-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x04] 324; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 325 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 4) 326 ret <4 x float> %res 327} 328 329define <4 x float> @test_mm_cmpnge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 330; SSE-LABEL: test_mm_cmpnge_ps: 331; SSE: # %bb.0: 332; SSE-NEXT: cmpnleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x06] 333; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 334; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 335; 336; AVX1-LABEL: test_mm_cmpnge_ps: 337; AVX1: # %bb.0: 338; AVX1-NEXT: vcmpnleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x06] 339; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 340; 341; AVX512-LABEL: test_mm_cmpnge_ps: 342; AVX512: # %bb.0: 343; AVX512-NEXT: vcmpnleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x06] 344; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 345; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 346 %cmp = fcmp ugt <4 x float> %a1, %a0 347 %sext = sext <4 x i1> %cmp to <4 x i32> 348 %res = bitcast <4 x i32> %sext to <4 x float> 349 ret <4 x float> %res 350} 351 352define <4 x float> @test_mm_cmpnge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 353; SSE-LABEL: test_mm_cmpnge_ss: 354; SSE: # %bb.0: 355; SSE-NEXT: cmpnless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x06] 356; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 357; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 358; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 359; 360; AVX-LABEL: test_mm_cmpnge_ss: 361; AVX: # %bb.0: 362; AVX-NEXT: vcmpnless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x06] 363; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 364; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 365; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 366 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 6) 367 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 368 ret <4 x float> %res 369} 370 371define <4 x float> @test_mm_cmpngt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 372; SSE-LABEL: test_mm_cmpngt_ps: 373; SSE: # %bb.0: 374; SSE-NEXT: cmpnltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x05] 375; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 376; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 377; 378; AVX1-LABEL: test_mm_cmpngt_ps: 379; AVX1: # %bb.0: 380; AVX1-NEXT: vcmpnltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x05] 381; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 382; 383; AVX512-LABEL: test_mm_cmpngt_ps: 384; AVX512: # %bb.0: 385; AVX512-NEXT: vcmpnltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x05] 386; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 387; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 388 %cmp = fcmp uge <4 x float> %a1, %a0 389 %sext = sext <4 x i1> %cmp to <4 x i32> 390 %res = bitcast <4 x i32> %sext to <4 x float> 391 ret <4 x float> %res 392} 393 394define <4 x float> @test_mm_cmpngt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 395; SSE-LABEL: test_mm_cmpngt_ss: 396; SSE: # %bb.0: 397; SSE-NEXT: cmpnltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x05] 398; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 399; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 400; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 401; 402; AVX-LABEL: test_mm_cmpngt_ss: 403; AVX: # %bb.0: 404; AVX-NEXT: vcmpnltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x05] 405; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 406; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 407; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 408 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 5) 409 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 410 ret <4 x float> %res 411} 412 413define <4 x float> @test_mm_cmpnle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 414; SSE-LABEL: test_mm_cmpnle_ps: 415; SSE: # %bb.0: 416; SSE-NEXT: cmpnleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x06] 417; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 418; 419; AVX1-LABEL: test_mm_cmpnle_ps: 420; AVX1: # %bb.0: 421; AVX1-NEXT: vcmpnleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x06] 422; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 423; 424; AVX512-LABEL: test_mm_cmpnle_ps: 425; AVX512: # %bb.0: 426; AVX512-NEXT: vcmpnleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x06] 427; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 428; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 429 %cmp = fcmp ugt <4 x float> %a0, %a1 430 %sext = sext <4 x i1> %cmp to <4 x i32> 431 %res = bitcast <4 x i32> %sext to <4 x float> 432 ret <4 x float> %res 433} 434 435define <4 x float> @test_mm_cmpnle_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 436; SSE-LABEL: test_mm_cmpnle_ss: 437; SSE: # %bb.0: 438; SSE-NEXT: cmpnless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x06] 439; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 440; 441; AVX-LABEL: test_mm_cmpnle_ss: 442; AVX: # %bb.0: 443; AVX-NEXT: vcmpnless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x06] 444; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 445 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 6) 446 ret <4 x float> %res 447} 448 449define <4 x float> @test_mm_cmpnlt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 450; SSE-LABEL: test_mm_cmpnlt_ps: 451; SSE: # %bb.0: 452; SSE-NEXT: cmpnltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x05] 453; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 454; 455; AVX1-LABEL: test_mm_cmpnlt_ps: 456; AVX1: # %bb.0: 457; AVX1-NEXT: vcmpnltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x05] 458; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 459; 460; AVX512-LABEL: test_mm_cmpnlt_ps: 461; AVX512: # %bb.0: 462; AVX512-NEXT: vcmpnltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x05] 463; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 464; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 465 %cmp = fcmp uge <4 x float> %a0, %a1 466 %sext = sext <4 x i1> %cmp to <4 x i32> 467 %res = bitcast <4 x i32> %sext to <4 x float> 468 ret <4 x float> %res 469} 470 471define <4 x float> @test_mm_cmpnlt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 472; SSE-LABEL: test_mm_cmpnlt_ss: 473; SSE: # %bb.0: 474; SSE-NEXT: cmpnltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x05] 475; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 476; 477; AVX-LABEL: test_mm_cmpnlt_ss: 478; AVX: # %bb.0: 479; AVX-NEXT: vcmpnltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x05] 480; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 481 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 5) 482 ret <4 x float> %res 483} 484 485define <4 x float> @test_mm_cmpord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 486; SSE-LABEL: test_mm_cmpord_ps: 487; SSE: # %bb.0: 488; SSE-NEXT: cmpordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x07] 489; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 490; 491; AVX1-LABEL: test_mm_cmpord_ps: 492; AVX1: # %bb.0: 493; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x07] 494; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 495; 496; AVX512-LABEL: test_mm_cmpord_ps: 497; AVX512: # %bb.0: 498; AVX512-NEXT: vcmpordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x07] 499; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 500; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 501 %cmp = fcmp ord <4 x float> %a0, %a1 502 %sext = sext <4 x i1> %cmp to <4 x i32> 503 %res = bitcast <4 x i32> %sext to <4 x float> 504 ret <4 x float> %res 505} 506 507define <4 x float> @test_mm_cmpord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 508; SSE-LABEL: test_mm_cmpord_ss: 509; SSE: # %bb.0: 510; SSE-NEXT: cmpordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x07] 511; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 512; 513; AVX-LABEL: test_mm_cmpord_ss: 514; AVX: # %bb.0: 515; AVX-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x07] 516; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 517 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) 518 ret <4 x float> %res 519} 520 521define <4 x float> @test_mm_cmpunord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 522; SSE-LABEL: test_mm_cmpunord_ps: 523; SSE: # %bb.0: 524; SSE-NEXT: cmpunordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x03] 525; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 526; 527; AVX1-LABEL: test_mm_cmpunord_ps: 528; AVX1: # %bb.0: 529; AVX1-NEXT: vcmpunordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x03] 530; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 531; 532; AVX512-LABEL: test_mm_cmpunord_ps: 533; AVX512: # %bb.0: 534; AVX512-NEXT: vcmpunordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x03] 535; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 536; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 537 %cmp = fcmp uno <4 x float> %a0, %a1 538 %sext = sext <4 x i1> %cmp to <4 x i32> 539 %res = bitcast <4 x i32> %sext to <4 x float> 540 ret <4 x float> %res 541} 542 543define <4 x float> @test_mm_cmpunord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 544; SSE-LABEL: test_mm_cmpunord_ss: 545; SSE: # %bb.0: 546; SSE-NEXT: cmpunordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x03] 547; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 548; 549; AVX-LABEL: test_mm_cmpunord_ss: 550; AVX: # %bb.0: 551; AVX-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x03] 552; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 553 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 3) 554 ret <4 x float> %res 555} 556 557define i32 @test_mm_comieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 558; SSE-LABEL: test_mm_comieq_ss: 559; SSE: # %bb.0: 560; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 561; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 562; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 563; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 564; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 565; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 566; 567; AVX1-LABEL: test_mm_comieq_ss: 568; AVX1: # %bb.0: 569; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 570; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 571; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 572; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 573; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 574; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 575; 576; AVX512-LABEL: test_mm_comieq_ss: 577; AVX512: # %bb.0: 578; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 579; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 580; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 581; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 582; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 583; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 584 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 585 ret i32 %res 586} 587declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 588 589define i32 @test_mm_comige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 590; SSE-LABEL: test_mm_comige_ss: 591; SSE: # %bb.0: 592; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 593; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 594; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 595; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 596; 597; AVX1-LABEL: test_mm_comige_ss: 598; AVX1: # %bb.0: 599; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 600; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 601; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 602; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 603; 604; AVX512-LABEL: test_mm_comige_ss: 605; AVX512: # %bb.0: 606; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 607; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 608; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 609; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 610 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) 611 ret i32 %res 612} 613declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 614 615define i32 @test_mm_comigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 616; SSE-LABEL: test_mm_comigt_ss: 617; SSE: # %bb.0: 618; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 619; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 620; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 621; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 622; 623; AVX1-LABEL: test_mm_comigt_ss: 624; AVX1: # %bb.0: 625; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 626; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 627; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 628; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 629; 630; AVX512-LABEL: test_mm_comigt_ss: 631; AVX512: # %bb.0: 632; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 633; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 634; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 635; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 636 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) 637 ret i32 %res 638} 639declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 640 641define i32 @test_mm_comile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 642; SSE-LABEL: test_mm_comile_ss: 643; SSE: # %bb.0: 644; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 645; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] 646; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 647; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 648; 649; AVX1-LABEL: test_mm_comile_ss: 650; AVX1: # %bb.0: 651; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 652; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] 653; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 654; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 655; 656; AVX512-LABEL: test_mm_comile_ss: 657; AVX512: # %bb.0: 658; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 659; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] 660; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 661; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 662 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) 663 ret i32 %res 664} 665declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 666 667define i32 @test_mm_comilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 668; SSE-LABEL: test_mm_comilt_ss: 669; SSE: # %bb.0: 670; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 671; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] 672; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 673; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 674; 675; AVX1-LABEL: test_mm_comilt_ss: 676; AVX1: # %bb.0: 677; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 678; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] 679; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 680; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 681; 682; AVX512-LABEL: test_mm_comilt_ss: 683; AVX512: # %bb.0: 684; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 685; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] 686; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 687; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 688 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) 689 ret i32 %res 690} 691declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 692 693define i32 @test_mm_comineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 694; SSE-LABEL: test_mm_comineq_ss: 695; SSE: # %bb.0: 696; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 697; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 698; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 699; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 700; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 701; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 702; 703; AVX1-LABEL: test_mm_comineq_ss: 704; AVX1: # %bb.0: 705; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 706; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 707; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 708; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 709; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 710; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 711; 712; AVX512-LABEL: test_mm_comineq_ss: 713; AVX512: # %bb.0: 714; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 715; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 716; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 717; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 718; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 719; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 720 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) 721 ret i32 %res 722} 723declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 724 725define i32 @test_mm_cvt_ss2si(<4 x float> %a0) nounwind { 726; SSE-LABEL: test_mm_cvt_ss2si: 727; SSE: # %bb.0: 728; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] 729; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 730; 731; AVX1-LABEL: test_mm_cvt_ss2si: 732; AVX1: # %bb.0: 733; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] 734; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 735; 736; AVX512-LABEL: test_mm_cvt_ss2si: 737; AVX512: # %bb.0: 738; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] 739; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 740 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 741 ret i32 %res 742} 743declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 744 745define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind { 746; X86-SSE-LABEL: test_mm_cvtsi32_ss: 747; X86-SSE: # %bb.0: 748; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x2a,0x44,0x24,0x04] 749; X86-SSE-NEXT: retl # encoding: [0xc3] 750; 751; X86-AVX1-LABEL: test_mm_cvtsi32_ss: 752; X86-AVX1: # %bb.0: 753; X86-AVX1-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 754; X86-AVX1-NEXT: retl # encoding: [0xc3] 755; 756; X86-AVX512-LABEL: test_mm_cvtsi32_ss: 757; X86-AVX512: # %bb.0: 758; X86-AVX512-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 759; X86-AVX512-NEXT: retl # encoding: [0xc3] 760; 761; X64-SSE-LABEL: test_mm_cvtsi32_ss: 762; X64-SSE: # %bb.0: 763; X64-SSE-NEXT: cvtsi2ssl %edi, %xmm0 # encoding: [0xf3,0x0f,0x2a,0xc7] 764; X64-SSE-NEXT: retq # encoding: [0xc3] 765; 766; X64-AVX1-LABEL: test_mm_cvtsi32_ss: 767; X64-AVX1: # %bb.0: 768; X64-AVX1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0xc7] 769; X64-AVX1-NEXT: retq # encoding: [0xc3] 770; 771; X64-AVX512-LABEL: test_mm_cvtsi32_ss: 772; X64-AVX512: # %bb.0: 773; X64-AVX512-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc7] 774; X64-AVX512-NEXT: retq # encoding: [0xc3] 775 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1) 776 ret <4 x float> %res 777} 778declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 779 780define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind { 781; X86-SSE-LABEL: test_mm_cvtss_f32: 782; X86-SSE: # %bb.0: 783; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 784; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] 785; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 786; X86-SSE-NEXT: popl %eax # encoding: [0x58] 787; X86-SSE-NEXT: retl # encoding: [0xc3] 788; 789; X86-AVX1-LABEL: test_mm_cvtss_f32: 790; X86-AVX1: # %bb.0: 791; X86-AVX1-NEXT: pushl %eax # encoding: [0x50] 792; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] 793; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 794; X86-AVX1-NEXT: popl %eax # encoding: [0x58] 795; X86-AVX1-NEXT: retl # encoding: [0xc3] 796; 797; X86-AVX512-LABEL: test_mm_cvtss_f32: 798; X86-AVX512: # %bb.0: 799; X86-AVX512-NEXT: pushl %eax # encoding: [0x50] 800; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] 801; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 802; X86-AVX512-NEXT: popl %eax # encoding: [0x58] 803; X86-AVX512-NEXT: retl # encoding: [0xc3] 804; 805; X64-LABEL: test_mm_cvtss_f32: 806; X64: # %bb.0: 807; X64-NEXT: retq # encoding: [0xc3] 808 %res = extractelement <4 x float> %a0, i32 0 809 ret float %res 810} 811 812define i32 @test_mm_cvtss_si32(<4 x float> %a0) nounwind { 813; SSE-LABEL: test_mm_cvtss_si32: 814; SSE: # %bb.0: 815; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] 816; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 817; 818; AVX1-LABEL: test_mm_cvtss_si32: 819; AVX1: # %bb.0: 820; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] 821; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 822; 823; AVX512-LABEL: test_mm_cvtss_si32: 824; AVX512: # %bb.0: 825; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] 826; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 827 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 828 ret i32 %res 829} 830 831define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind { 832; SSE-LABEL: test_mm_cvttss_si: 833; SSE: # %bb.0: 834; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] 835; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 836; 837; AVX1-LABEL: test_mm_cvttss_si: 838; AVX1: # %bb.0: 839; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] 840; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 841; 842; AVX512-LABEL: test_mm_cvttss_si: 843; AVX512: # %bb.0: 844; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] 845; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 846 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 847 ret i32 %res 848} 849declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 850 851define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind { 852; SSE-LABEL: test_mm_cvttss_si32: 853; SSE: # %bb.0: 854; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] 855; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 856; 857; AVX1-LABEL: test_mm_cvttss_si32: 858; AVX1: # %bb.0: 859; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] 860; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 861; 862; AVX512-LABEL: test_mm_cvttss_si32: 863; AVX512: # %bb.0: 864; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] 865; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 866 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 867 ret i32 %res 868} 869 870define <4 x float> @test_mm_div_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 871; SSE-LABEL: test_mm_div_ps: 872; SSE: # %bb.0: 873; SSE-NEXT: divps %xmm1, %xmm0 # encoding: [0x0f,0x5e,0xc1] 874; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 875; 876; AVX1-LABEL: test_mm_div_ps: 877; AVX1: # %bb.0: 878; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5e,0xc1] 879; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 880; 881; AVX512-LABEL: test_mm_div_ps: 882; AVX512: # %bb.0: 883; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1] 884; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 885 %res = fdiv <4 x float> %a0, %a1 886 ret <4 x float> %res 887} 888 889define <4 x float> @test_mm_div_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 890; SSE-LABEL: test_mm_div_ss: 891; SSE: # %bb.0: 892; SSE-NEXT: divss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5e,0xc1] 893; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 894; 895; AVX1-LABEL: test_mm_div_ss: 896; AVX1: # %bb.0: 897; AVX1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5e,0xc1] 898; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 899; 900; AVX512-LABEL: test_mm_div_ss: 901; AVX512: # %bb.0: 902; AVX512-NEXT: vdivss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5e,0xc1] 903; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 904 %ext0 = extractelement <4 x float> %a0, i32 0 905 %ext1 = extractelement <4 x float> %a1, i32 0 906 %fdiv = fdiv float %ext0, %ext1 907 %res = insertelement <4 x float> %a0, float %fdiv, i32 0 908 ret <4 x float> %res 909} 910 911define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { 912; X86-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: 913; X86-SSE: # %bb.0: 914; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 915; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 916; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 917; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 918; X86-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 919; X86-SSE-NEXT: # imm = 0x1F80 920; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 921; X86-SSE-NEXT: retl # encoding: [0xc3] 922; 923; X86-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: 924; X86-AVX: # %bb.0: 925; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 926; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 927; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 928; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 929; X86-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 930; X86-AVX-NEXT: # imm = 0x1F80 931; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 932; X86-AVX-NEXT: retl # encoding: [0xc3] 933; 934; X64-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: 935; X64-SSE: # %bb.0: 936; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 937; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 938; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 939; X64-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 940; X64-SSE-NEXT: # imm = 0x1F80 941; X64-SSE-NEXT: retq # encoding: [0xc3] 942; 943; X64-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: 944; X64-AVX: # %bb.0: 945; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 946; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 947; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 948; X64-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 949; X64-AVX-NEXT: # imm = 0x1F80 950; X64-AVX-NEXT: retq # encoding: [0xc3] 951 %1 = alloca i32, align 4 952 %2 = bitcast i32* %1 to i8* 953 call void @llvm.x86.sse.stmxcsr(i8* %2) 954 %3 = load i32, i32* %1, align 4 955 %4 = and i32 %3, 8064 956 ret i32 %4 957} 958declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone 959 960define i32 @test_MM_GET_EXCEPTION_STATE() nounwind { 961; X86-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: 962; X86-SSE: # %bb.0: 963; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 964; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 965; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 966; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 967; X86-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 968; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 969; X86-SSE-NEXT: retl # encoding: [0xc3] 970; 971; X86-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: 972; X86-AVX: # %bb.0: 973; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 974; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 975; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 976; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 977; X86-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 978; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 979; X86-AVX-NEXT: retl # encoding: [0xc3] 980; 981; X64-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: 982; X64-SSE: # %bb.0: 983; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 984; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 985; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 986; X64-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 987; X64-SSE-NEXT: retq # encoding: [0xc3] 988; 989; X64-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: 990; X64-AVX: # %bb.0: 991; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 992; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 993; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 994; X64-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 995; X64-AVX-NEXT: retq # encoding: [0xc3] 996 %1 = alloca i32, align 4 997 %2 = bitcast i32* %1 to i8* 998 call void @llvm.x86.sse.stmxcsr(i8* %2) 999 %3 = load i32, i32* %1, align 4 1000 %4 = and i32 %3, 63 1001 ret i32 %4 1002} 1003 1004define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { 1005; X86-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1006; X86-SSE: # %bb.0: 1007; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1008; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1009; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1010; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1011; X86-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1012; X86-SSE-NEXT: # imm = 0x8000 1013; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1014; X86-SSE-NEXT: retl # encoding: [0xc3] 1015; 1016; X86-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1017; X86-AVX: # %bb.0: 1018; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1019; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1020; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1021; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1022; X86-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1023; X86-AVX-NEXT: # imm = 0x8000 1024; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1025; X86-AVX-NEXT: retl # encoding: [0xc3] 1026; 1027; X64-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1028; X64-SSE: # %bb.0: 1029; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1030; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1031; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1032; X64-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1033; X64-SSE-NEXT: # imm = 0x8000 1034; X64-SSE-NEXT: retq # encoding: [0xc3] 1035; 1036; X64-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1037; X64-AVX: # %bb.0: 1038; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1039; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1040; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1041; X64-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1042; X64-AVX-NEXT: # imm = 0x8000 1043; X64-AVX-NEXT: retq # encoding: [0xc3] 1044 %1 = alloca i32, align 4 1045 %2 = bitcast i32* %1 to i8* 1046 call void @llvm.x86.sse.stmxcsr(i8* %2) 1047 %3 = load i32, i32* %1, align 4 1048 %4 = and i32 %3, 32768 1049 ret i32 %4 1050} 1051 1052define i32 @test_MM_GET_ROUNDING_MODE() nounwind { 1053; X86-SSE-LABEL: test_MM_GET_ROUNDING_MODE: 1054; X86-SSE: # %bb.0: 1055; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1056; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1057; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1058; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1059; X86-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1060; X86-SSE-NEXT: # imm = 0x6000 1061; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1062; X86-SSE-NEXT: retl # encoding: [0xc3] 1063; 1064; X86-AVX-LABEL: test_MM_GET_ROUNDING_MODE: 1065; X86-AVX: # %bb.0: 1066; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1067; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1068; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1069; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1070; X86-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1071; X86-AVX-NEXT: # imm = 0x6000 1072; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1073; X86-AVX-NEXT: retl # encoding: [0xc3] 1074; 1075; X64-SSE-LABEL: test_MM_GET_ROUNDING_MODE: 1076; X64-SSE: # %bb.0: 1077; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1078; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1079; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1080; X64-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1081; X64-SSE-NEXT: # imm = 0x6000 1082; X64-SSE-NEXT: retq # encoding: [0xc3] 1083; 1084; X64-AVX-LABEL: test_MM_GET_ROUNDING_MODE: 1085; X64-AVX: # %bb.0: 1086; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1087; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1088; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1089; X64-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1090; X64-AVX-NEXT: # imm = 0x6000 1091; X64-AVX-NEXT: retq # encoding: [0xc3] 1092 %1 = alloca i32, align 4 1093 %2 = bitcast i32* %1 to i8* 1094 call void @llvm.x86.sse.stmxcsr(i8* %2) 1095 %3 = load i32, i32* %1, align 4 1096 %4 = and i32 %3, 24576 1097 ret i32 %4 1098} 1099 1100define i32 @test_mm_getcsr() nounwind { 1101; X86-SSE-LABEL: test_mm_getcsr: 1102; X86-SSE: # %bb.0: 1103; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1104; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1105; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1106; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1107; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1108; X86-SSE-NEXT: retl # encoding: [0xc3] 1109; 1110; X86-AVX-LABEL: test_mm_getcsr: 1111; X86-AVX: # %bb.0: 1112; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1113; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1114; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1115; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1116; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1117; X86-AVX-NEXT: retl # encoding: [0xc3] 1118; 1119; X64-SSE-LABEL: test_mm_getcsr: 1120; X64-SSE: # %bb.0: 1121; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1122; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1123; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1124; X64-SSE-NEXT: retq # encoding: [0xc3] 1125; 1126; X64-AVX-LABEL: test_mm_getcsr: 1127; X64-AVX: # %bb.0: 1128; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1129; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1130; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1131; X64-AVX-NEXT: retq # encoding: [0xc3] 1132 %1 = alloca i32, align 4 1133 %2 = bitcast i32* %1 to i8* 1134 call void @llvm.x86.sse.stmxcsr(i8* %2) 1135 %3 = load i32, i32* %1, align 4 1136 ret i32 %3 1137} 1138 1139define <4 x float> @test_mm_load_ps(float* %a0) nounwind { 1140; X86-SSE-LABEL: test_mm_load_ps: 1141; X86-SSE: # %bb.0: 1142; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1143; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1144; X86-SSE-NEXT: retl # encoding: [0xc3] 1145; 1146; X86-AVX1-LABEL: test_mm_load_ps: 1147; X86-AVX1: # %bb.0: 1148; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1149; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] 1150; X86-AVX1-NEXT: retl # encoding: [0xc3] 1151; 1152; X86-AVX512-LABEL: test_mm_load_ps: 1153; X86-AVX512: # %bb.0: 1154; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1155; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1156; X86-AVX512-NEXT: retl # encoding: [0xc3] 1157; 1158; X64-SSE-LABEL: test_mm_load_ps: 1159; X64-SSE: # %bb.0: 1160; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1161; X64-SSE-NEXT: retq # encoding: [0xc3] 1162; 1163; X64-AVX1-LABEL: test_mm_load_ps: 1164; X64-AVX1: # %bb.0: 1165; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 1166; X64-AVX1-NEXT: retq # encoding: [0xc3] 1167; 1168; X64-AVX512-LABEL: test_mm_load_ps: 1169; X64-AVX512: # %bb.0: 1170; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1171; X64-AVX512-NEXT: retq # encoding: [0xc3] 1172 %arg0 = bitcast float* %a0 to <4 x float>* 1173 %res = load <4 x float>, <4 x float>* %arg0, align 16 1174 ret <4 x float> %res 1175} 1176 1177define <4 x float> @test_mm_load_ps1(float* %a0) nounwind { 1178; X86-SSE-LABEL: test_mm_load_ps1: 1179; X86-SSE: # %bb.0: 1180; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1181; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1182; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1183; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1184; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1185; X86-SSE-NEXT: retl # encoding: [0xc3] 1186; 1187; X86-AVX1-LABEL: test_mm_load_ps1: 1188; X86-AVX1: # %bb.0: 1189; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1190; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] 1191; X86-AVX1-NEXT: retl # encoding: [0xc3] 1192; 1193; X86-AVX512-LABEL: test_mm_load_ps1: 1194; X86-AVX512: # %bb.0: 1195; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1196; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] 1197; X86-AVX512-NEXT: retl # encoding: [0xc3] 1198; 1199; X64-SSE-LABEL: test_mm_load_ps1: 1200; X64-SSE: # %bb.0: 1201; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1202; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1203; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1204; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1205; X64-SSE-NEXT: retq # encoding: [0xc3] 1206; 1207; X64-AVX1-LABEL: test_mm_load_ps1: 1208; X64-AVX1: # %bb.0: 1209; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] 1210; X64-AVX1-NEXT: retq # encoding: [0xc3] 1211; 1212; X64-AVX512-LABEL: test_mm_load_ps1: 1213; X64-AVX512: # %bb.0: 1214; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] 1215; X64-AVX512-NEXT: retq # encoding: [0xc3] 1216 %ld = load float, float* %a0, align 4 1217 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1218 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 1219 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 1220 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 1221 ret <4 x float> %res3 1222} 1223 1224define <4 x float> @test_mm_load_ss(float* %a0) nounwind { 1225; X86-SSE-LABEL: test_mm_load_ss: 1226; X86-SSE: # %bb.0: 1227; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1228; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1229; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1230; X86-SSE-NEXT: retl # encoding: [0xc3] 1231; 1232; X86-AVX1-LABEL: test_mm_load_ss: 1233; X86-AVX1: # %bb.0: 1234; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1235; X86-AVX1-NEXT: vmovss (%eax), %xmm0 # encoding: [0xc5,0xfa,0x10,0x00] 1236; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1237; X86-AVX1-NEXT: retl # encoding: [0xc3] 1238; 1239; X86-AVX512-LABEL: test_mm_load_ss: 1240; X86-AVX512: # %bb.0: 1241; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1242; X86-AVX512-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00] 1243; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1244; X86-AVX512-NEXT: retl # encoding: [0xc3] 1245; 1246; X64-SSE-LABEL: test_mm_load_ss: 1247; X64-SSE: # %bb.0: 1248; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1249; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1250; X64-SSE-NEXT: retq # encoding: [0xc3] 1251; 1252; X64-AVX1-LABEL: test_mm_load_ss: 1253; X64-AVX1: # %bb.0: 1254; X64-AVX1-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 1255; X64-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1256; X64-AVX1-NEXT: retq # encoding: [0xc3] 1257; 1258; X64-AVX512-LABEL: test_mm_load_ss: 1259; X64-AVX512: # %bb.0: 1260; X64-AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 1261; X64-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1262; X64-AVX512-NEXT: retq # encoding: [0xc3] 1263 %ld = load float, float* %a0, align 1 1264 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1265 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 1266 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 1267 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 1268 ret <4 x float> %res3 1269} 1270 1271define <4 x float> @test_mm_load1_ps(float* %a0) nounwind { 1272; X86-SSE-LABEL: test_mm_load1_ps: 1273; X86-SSE: # %bb.0: 1274; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1275; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1276; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1277; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1278; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1279; X86-SSE-NEXT: retl # encoding: [0xc3] 1280; 1281; X86-AVX1-LABEL: test_mm_load1_ps: 1282; X86-AVX1: # %bb.0: 1283; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1284; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] 1285; X86-AVX1-NEXT: retl # encoding: [0xc3] 1286; 1287; X86-AVX512-LABEL: test_mm_load1_ps: 1288; X86-AVX512: # %bb.0: 1289; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1290; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] 1291; X86-AVX512-NEXT: retl # encoding: [0xc3] 1292; 1293; X64-SSE-LABEL: test_mm_load1_ps: 1294; X64-SSE: # %bb.0: 1295; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1296; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1297; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1298; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1299; X64-SSE-NEXT: retq # encoding: [0xc3] 1300; 1301; X64-AVX1-LABEL: test_mm_load1_ps: 1302; X64-AVX1: # %bb.0: 1303; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] 1304; X64-AVX1-NEXT: retq # encoding: [0xc3] 1305; 1306; X64-AVX512-LABEL: test_mm_load1_ps: 1307; X64-AVX512: # %bb.0: 1308; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] 1309; X64-AVX512-NEXT: retq # encoding: [0xc3] 1310 %ld = load float, float* %a0, align 4 1311 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1312 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 1313 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 1314 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 1315 ret <4 x float> %res3 1316} 1317 1318define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { 1319; X86-SSE-LABEL: test_mm_loadh_pi: 1320; X86-SSE: # %bb.0: 1321; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1322; X86-SSE-NEXT: movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08] 1323; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1324; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04] 1325; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1326; X86-SSE-NEXT: shufps $0, %xmm1, %xmm2 # encoding: [0x0f,0xc6,0xd1,0x00] 1327; X86-SSE-NEXT: # xmm2 = xmm2[0,0],xmm1[0,0] 1328; X86-SSE-NEXT: shufps $36, %xmm2, %xmm0 # encoding: [0x0f,0xc6,0xc2,0x24] 1329; X86-SSE-NEXT: # xmm0 = xmm0[0,1],xmm2[2,0] 1330; X86-SSE-NEXT: retl # encoding: [0xc3] 1331; 1332; X86-AVX1-LABEL: test_mm_loadh_pi: 1333; X86-AVX1: # %bb.0: 1334; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1335; X86-AVX1-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00] 1336; X86-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] 1337; X86-AVX1-NEXT: retl # encoding: [0xc3] 1338; 1339; X86-AVX512-LABEL: test_mm_loadh_pi: 1340; X86-AVX512: # %bb.0: 1341; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1342; X86-AVX512-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00] 1343; X86-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] 1344; X86-AVX512-NEXT: retl # encoding: [0xc3] 1345; 1346; X64-SSE-LABEL: test_mm_loadh_pi: 1347; X64-SSE: # %bb.0: 1348; X64-SSE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] 1349; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8] 1350; X64-SSE-NEXT: shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20] 1351; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] 1352; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8] 1353; X64-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1354; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc] 1355; X64-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1356; X64-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 1357; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1358; X64-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 1359; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 1360; X64-SSE-NEXT: retq # encoding: [0xc3] 1361; 1362; X64-AVX1-LABEL: test_mm_loadh_pi: 1363; X64-AVX1: # %bb.0: 1364; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07] 1365; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] 1366; X64-AVX1-NEXT: retq # encoding: [0xc3] 1367; 1368; X64-AVX512-LABEL: test_mm_loadh_pi: 1369; X64-AVX512: # %bb.0: 1370; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07] 1371; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] 1372; X64-AVX512-NEXT: retq # encoding: [0xc3] 1373 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1374 %ld = load <2 x float>, <2 x float>* %ptr 1375 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1376 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1377 ret <4 x float> %res 1378} 1379 1380define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { 1381; X86-SSE-LABEL: test_mm_loadl_pi: 1382; X86-SSE: # %bb.0: 1383; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1384; X86-SSE-NEXT: movss (%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x10] 1385; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1386; X86-SSE-NEXT: movss 4(%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x48,0x04] 1387; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1388; X86-SSE-NEXT: shufps $0, %xmm2, %xmm1 # encoding: [0x0f,0xc6,0xca,0x00] 1389; X86-SSE-NEXT: # xmm1 = xmm1[0,0],xmm2[0,0] 1390; X86-SSE-NEXT: shufps $226, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe2] 1391; X86-SSE-NEXT: # xmm1 = xmm1[2,0],xmm0[2,3] 1392; X86-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 1393; X86-SSE-NEXT: retl # encoding: [0xc3] 1394; 1395; X86-AVX1-LABEL: test_mm_loadl_pi: 1396; X86-AVX1: # %bb.0: 1397; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1398; X86-AVX1-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00] 1399; X86-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] 1400; X86-AVX1-NEXT: retl # encoding: [0xc3] 1401; 1402; X86-AVX512-LABEL: test_mm_loadl_pi: 1403; X86-AVX512: # %bb.0: 1404; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1405; X86-AVX512-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00] 1406; X86-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] 1407; X86-AVX512-NEXT: retl # encoding: [0xc3] 1408; 1409; X64-SSE-LABEL: test_mm_loadl_pi: 1410; X64-SSE: # %bb.0: 1411; X64-SSE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] 1412; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8] 1413; X64-SSE-NEXT: shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20] 1414; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] 1415; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8] 1416; X64-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1417; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc] 1418; X64-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1419; X64-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 1420; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1421; X64-SSE-NEXT: shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4] 1422; X64-SSE-NEXT: # xmm1 = xmm1[0,1],xmm0[2,3] 1423; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 1424; X64-SSE-NEXT: retq # encoding: [0xc3] 1425; 1426; X64-AVX1-LABEL: test_mm_loadl_pi: 1427; X64-AVX1: # %bb.0: 1428; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07] 1429; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] 1430; X64-AVX1-NEXT: retq # encoding: [0xc3] 1431; 1432; X64-AVX512-LABEL: test_mm_loadl_pi: 1433; X64-AVX512: # %bb.0: 1434; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07] 1435; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] 1436; X64-AVX512-NEXT: retq # encoding: [0xc3] 1437 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1438 %ld = load <2 x float>, <2 x float>* %ptr 1439 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1440 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1441 ret <4 x float> %res 1442} 1443 1444define <4 x float> @test_mm_loadr_ps(float* %a0) nounwind { 1445; X86-SSE-LABEL: test_mm_loadr_ps: 1446; X86-SSE: # %bb.0: 1447; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1448; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1449; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 1450; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 1451; X86-SSE-NEXT: retl # encoding: [0xc3] 1452; 1453; X86-AVX1-LABEL: test_mm_loadr_ps: 1454; X86-AVX1: # %bb.0: 1455; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1456; X86-AVX1-NEXT: vpermilps $27, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] 1457; X86-AVX1-NEXT: # xmm0 = mem[3,2,1,0] 1458; X86-AVX1-NEXT: retl # encoding: [0xc3] 1459; 1460; X86-AVX512-LABEL: test_mm_loadr_ps: 1461; X86-AVX512: # %bb.0: 1462; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1463; X86-AVX512-NEXT: vpermilps $27, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] 1464; X86-AVX512-NEXT: # xmm0 = mem[3,2,1,0] 1465; X86-AVX512-NEXT: retl # encoding: [0xc3] 1466; 1467; X64-SSE-LABEL: test_mm_loadr_ps: 1468; X64-SSE: # %bb.0: 1469; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1470; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 1471; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 1472; X64-SSE-NEXT: retq # encoding: [0xc3] 1473; 1474; X64-AVX1-LABEL: test_mm_loadr_ps: 1475; X64-AVX1: # %bb.0: 1476; X64-AVX1-NEXT: vpermilps $27, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] 1477; X64-AVX1-NEXT: # xmm0 = mem[3,2,1,0] 1478; X64-AVX1-NEXT: retq # encoding: [0xc3] 1479; 1480; X64-AVX512-LABEL: test_mm_loadr_ps: 1481; X64-AVX512: # %bb.0: 1482; X64-AVX512-NEXT: vpermilps $27, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] 1483; X64-AVX512-NEXT: # xmm0 = mem[3,2,1,0] 1484; X64-AVX512-NEXT: retq # encoding: [0xc3] 1485 %arg0 = bitcast float* %a0 to <4 x float>* 1486 %ld = load <4 x float>, <4 x float>* %arg0, align 16 1487 %res = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1488 ret <4 x float> %res 1489} 1490 1491define <4 x float> @test_mm_loadu_ps(float* %a0) nounwind { 1492; X86-SSE-LABEL: test_mm_loadu_ps: 1493; X86-SSE: # %bb.0: 1494; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1495; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] 1496; X86-SSE-NEXT: retl # encoding: [0xc3] 1497; 1498; X86-AVX1-LABEL: test_mm_loadu_ps: 1499; X86-AVX1: # %bb.0: 1500; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1501; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] 1502; X86-AVX1-NEXT: retl # encoding: [0xc3] 1503; 1504; X86-AVX512-LABEL: test_mm_loadu_ps: 1505; X86-AVX512: # %bb.0: 1506; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1507; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 1508; X86-AVX512-NEXT: retl # encoding: [0xc3] 1509; 1510; X64-SSE-LABEL: test_mm_loadu_ps: 1511; X64-SSE: # %bb.0: 1512; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] 1513; X64-SSE-NEXT: retq # encoding: [0xc3] 1514; 1515; X64-AVX1-LABEL: test_mm_loadu_ps: 1516; X64-AVX1: # %bb.0: 1517; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] 1518; X64-AVX1-NEXT: retq # encoding: [0xc3] 1519; 1520; X64-AVX512-LABEL: test_mm_loadu_ps: 1521; X64-AVX512: # %bb.0: 1522; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 1523; X64-AVX512-NEXT: retq # encoding: [0xc3] 1524 %arg0 = bitcast float* %a0 to <4 x float>* 1525 %res = load <4 x float>, <4 x float>* %arg0, align 1 1526 ret <4 x float> %res 1527} 1528 1529define <4 x float> @test_mm_max_ps(<4 x float> %a0, <4 x float> %a1) { 1530; SSE-LABEL: test_mm_max_ps: 1531; SSE: # %bb.0: 1532; SSE-NEXT: maxps %xmm1, %xmm0 # encoding: [0x0f,0x5f,0xc1] 1533; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1534; 1535; AVX1-LABEL: test_mm_max_ps: 1536; AVX1: # %bb.0: 1537; AVX1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5f,0xc1] 1538; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1539; 1540; AVX512-LABEL: test_mm_max_ps: 1541; AVX512: # %bb.0: 1542; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1] 1543; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1544 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 1545 ret <4 x float> %res 1546} 1547declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1548 1549define <4 x float> @test_mm_max_ss(<4 x float> %a0, <4 x float> %a1) { 1550; SSE-LABEL: test_mm_max_ss: 1551; SSE: # %bb.0: 1552; SSE-NEXT: maxss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5f,0xc1] 1553; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1554; 1555; AVX1-LABEL: test_mm_max_ss: 1556; AVX1: # %bb.0: 1557; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5f,0xc1] 1558; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1559; 1560; AVX512-LABEL: test_mm_max_ss: 1561; AVX512: # %bb.0: 1562; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5f,0xc1] 1563; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1564 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 1565 ret <4 x float> %res 1566} 1567declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1568 1569define <4 x float> @test_mm_min_ps(<4 x float> %a0, <4 x float> %a1) { 1570; SSE-LABEL: test_mm_min_ps: 1571; SSE: # %bb.0: 1572; SSE-NEXT: minps %xmm1, %xmm0 # encoding: [0x0f,0x5d,0xc1] 1573; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1574; 1575; AVX1-LABEL: test_mm_min_ps: 1576; AVX1: # %bb.0: 1577; AVX1-NEXT: vminps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5d,0xc1] 1578; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1579; 1580; AVX512-LABEL: test_mm_min_ps: 1581; AVX512: # %bb.0: 1582; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1] 1583; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1584 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1585 ret <4 x float> %res 1586} 1587declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1588 1589define <4 x float> @test_mm_min_ss(<4 x float> %a0, <4 x float> %a1) { 1590; SSE-LABEL: test_mm_min_ss: 1591; SSE: # %bb.0: 1592; SSE-NEXT: minss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5d,0xc1] 1593; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1594; 1595; AVX1-LABEL: test_mm_min_ss: 1596; AVX1: # %bb.0: 1597; AVX1-NEXT: vminss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5d,0xc1] 1598; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1599; 1600; AVX512-LABEL: test_mm_min_ss: 1601; AVX512: # %bb.0: 1602; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5d,0xc1] 1603; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1604 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 1605 ret <4 x float> %res 1606} 1607declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1608 1609define <4 x float> @test_mm_move_ss(<4 x float> %a0, <4 x float> %a1) { 1610; SSE-LABEL: test_mm_move_ss: 1611; SSE: # %bb.0: 1612; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 1613; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 1614; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1615; 1616; AVX-LABEL: test_mm_move_ss: 1617; AVX: # %bb.0: 1618; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 1619; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 1620; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1621 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1622 ret <4 x float> %res 1623} 1624 1625define <4 x float> @test_mm_movehl_ps(<4 x float> %a0, <4 x float> %a1) { 1626; SSE-LABEL: test_mm_movehl_ps: 1627; SSE: # %bb.0: 1628; SSE-NEXT: movhlps %xmm1, %xmm0 # encoding: [0x0f,0x12,0xc1] 1629; SSE-NEXT: # xmm0 = xmm1[1],xmm0[1] 1630; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1631; 1632; AVX1-LABEL: test_mm_movehl_ps: 1633; AVX1: # %bb.0: 1634; AVX1-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x15,0xc0] 1635; AVX1-NEXT: # xmm0 = xmm1[1],xmm0[1] 1636; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1637; 1638; AVX512-LABEL: test_mm_movehl_ps: 1639; AVX512: # %bb.0: 1640; AVX512-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x15,0xc0] 1641; AVX512-NEXT: # xmm0 = xmm1[1],xmm0[1] 1642; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1643 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 1644 ret <4 x float> %res 1645} 1646 1647define <4 x float> @test_mm_movelh_ps(<4 x float> %a0, <4 x float> %a1) { 1648; SSE-LABEL: test_mm_movelh_ps: 1649; SSE: # %bb.0: 1650; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 1651; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 1652; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1653; 1654; AVX1-LABEL: test_mm_movelh_ps: 1655; AVX1: # %bb.0: 1656; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 1657; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 1658; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1659; 1660; AVX512-LABEL: test_mm_movelh_ps: 1661; AVX512: # %bb.0: 1662; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 1663; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 1664; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1665 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1666 ret <4 x float> %res 1667} 1668 1669define i32 @test_mm_movemask_ps(<4 x float> %a0) nounwind { 1670; SSE-LABEL: test_mm_movemask_ps: 1671; SSE: # %bb.0: 1672; SSE-NEXT: movmskps %xmm0, %eax # encoding: [0x0f,0x50,0xc0] 1673; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1674; 1675; AVX-LABEL: test_mm_movemask_ps: 1676; AVX: # %bb.0: 1677; AVX-NEXT: vmovmskps %xmm0, %eax # encoding: [0xc5,0xf8,0x50,0xc0] 1678; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1679 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 1680 ret i32 %res 1681} 1682declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1683 1684define <4 x float> @test_mm_mul_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1685; SSE-LABEL: test_mm_mul_ps: 1686; SSE: # %bb.0: 1687; SSE-NEXT: mulps %xmm1, %xmm0 # encoding: [0x0f,0x59,0xc1] 1688; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1689; 1690; AVX1-LABEL: test_mm_mul_ps: 1691; AVX1: # %bb.0: 1692; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x59,0xc1] 1693; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1694; 1695; AVX512-LABEL: test_mm_mul_ps: 1696; AVX512: # %bb.0: 1697; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1] 1698; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1699 %res = fmul <4 x float> %a0, %a1 1700 ret <4 x float> %res 1701} 1702 1703define <4 x float> @test_mm_mul_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 1704; SSE-LABEL: test_mm_mul_ss: 1705; SSE: # %bb.0: 1706; SSE-NEXT: mulss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x59,0xc1] 1707; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1708; 1709; AVX1-LABEL: test_mm_mul_ss: 1710; AVX1: # %bb.0: 1711; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x59,0xc1] 1712; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1713; 1714; AVX512-LABEL: test_mm_mul_ss: 1715; AVX512: # %bb.0: 1716; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0xc1] 1717; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1718 %ext0 = extractelement <4 x float> %a0, i32 0 1719 %ext1 = extractelement <4 x float> %a1, i32 0 1720 %fmul = fmul float %ext0, %ext1 1721 %res = insertelement <4 x float> %a0, float %fmul, i32 0 1722 ret <4 x float> %res 1723} 1724 1725define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1726; SSE-LABEL: test_mm_or_ps: 1727; SSE: # %bb.0: 1728; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] 1729; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1730; 1731; AVX1-LABEL: test_mm_or_ps: 1732; AVX1: # %bb.0: 1733; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 1734; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1735; 1736; AVX512-LABEL: test_mm_or_ps: 1737; AVX512: # %bb.0: 1738; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 1739; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1740 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 1741 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 1742 %res = or <4 x i32> %arg0, %arg1 1743 %bc = bitcast <4 x i32> %res to <4 x float> 1744 ret <4 x float> %bc 1745} 1746 1747define void @test_mm_prefetch(i8* %a0) { 1748; X86-LABEL: test_mm_prefetch: 1749; X86: # %bb.0: 1750; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1751; X86-NEXT: prefetchnta (%eax) # encoding: [0x0f,0x18,0x00] 1752; X86-NEXT: retl # encoding: [0xc3] 1753; 1754; X64-LABEL: test_mm_prefetch: 1755; X64: # %bb.0: 1756; X64-NEXT: prefetchnta (%rdi) # encoding: [0x0f,0x18,0x07] 1757; X64-NEXT: retq # encoding: [0xc3] 1758 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1) 1759 ret void 1760} 1761declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone 1762 1763define <4 x float> @test_mm_rcp_ps(<4 x float> %a0) { 1764; SSE-LABEL: test_mm_rcp_ps: 1765; SSE: # %bb.0: 1766; SSE-NEXT: rcpps %xmm0, %xmm0 # encoding: [0x0f,0x53,0xc0] 1767; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1768; 1769; AVX-LABEL: test_mm_rcp_ps: 1770; AVX: # %bb.0: 1771; AVX-NEXT: vrcpps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x53,0xc0] 1772; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1773 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 1774 ret <4 x float> %res 1775} 1776declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1777 1778define <4 x float> @test_mm_rcp_ss(<4 x float> %a0) { 1779; SSE-LABEL: test_mm_rcp_ss: 1780; SSE: # %bb.0: 1781; SSE-NEXT: rcpss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x53,0xc0] 1782; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1783; 1784; AVX-LABEL: test_mm_rcp_ss: 1785; AVX: # %bb.0: 1786; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x53,0xc0] 1787; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1788 %rcp = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) 1789 ret <4 x float> %rcp 1790} 1791declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1792 1793define <4 x float> @test_mm_rsqrt_ps(<4 x float> %a0) { 1794; SSE-LABEL: test_mm_rsqrt_ps: 1795; SSE: # %bb.0: 1796; SSE-NEXT: rsqrtps %xmm0, %xmm0 # encoding: [0x0f,0x52,0xc0] 1797; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1798; 1799; AVX-LABEL: test_mm_rsqrt_ps: 1800; AVX: # %bb.0: 1801; AVX-NEXT: vrsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x52,0xc0] 1802; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1803 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 1804 ret <4 x float> %res 1805} 1806declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1807 1808define <4 x float> @test_mm_rsqrt_ss(<4 x float> %a0) { 1809; SSE-LABEL: test_mm_rsqrt_ss: 1810; SSE: # %bb.0: 1811; SSE-NEXT: rsqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x52,0xc0] 1812; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1813; 1814; AVX-LABEL: test_mm_rsqrt_ss: 1815; AVX: # %bb.0: 1816; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x52,0xc0] 1817; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1818 %rsqrt = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) 1819 ret <4 x float> %rsqrt 1820} 1821declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1822 1823define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind { 1824; X86-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: 1825; X86-SSE: # %bb.0: 1826; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1827; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1828; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1829; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1830; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1831; X86-SSE-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] 1832; X86-SSE-NEXT: # imm = 0xE07F 1833; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1834; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1835; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1836; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1837; X86-SSE-NEXT: retl # encoding: [0xc3] 1838; 1839; X86-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: 1840; X86-AVX: # %bb.0: 1841; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1842; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1843; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1844; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1845; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1846; X86-AVX-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] 1847; X86-AVX-NEXT: # imm = 0xE07F 1848; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1849; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1850; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1851; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1852; X86-AVX-NEXT: retl # encoding: [0xc3] 1853; 1854; X64-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: 1855; X64-SSE: # %bb.0: 1856; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1857; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1858; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1859; X64-SSE-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] 1860; X64-SSE-NEXT: # imm = 0xE07F 1861; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1862; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1863; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1864; X64-SSE-NEXT: retq # encoding: [0xc3] 1865; 1866; X64-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: 1867; X64-AVX: # %bb.0: 1868; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1869; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1870; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1871; X64-AVX-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] 1872; X64-AVX-NEXT: # imm = 0xE07F 1873; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1874; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1875; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1876; X64-AVX-NEXT: retq # encoding: [0xc3] 1877 %1 = alloca i32, align 4 1878 %2 = bitcast i32* %1 to i8* 1879 call void @llvm.x86.sse.stmxcsr(i8* %2) 1880 %3 = load i32, i32* %1 1881 %4 = and i32 %3, -8065 1882 %5 = or i32 %4, %a0 1883 store i32 %5, i32* %1 1884 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1885 ret void 1886} 1887declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone 1888 1889define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind { 1890; X86-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: 1891; X86-SSE: # %bb.0: 1892; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1893; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1894; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1895; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1896; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1897; X86-SSE-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] 1898; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1899; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1900; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1901; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1902; X86-SSE-NEXT: retl # encoding: [0xc3] 1903; 1904; X86-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: 1905; X86-AVX: # %bb.0: 1906; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1907; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1908; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1909; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1910; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1911; X86-AVX-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] 1912; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1913; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1914; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1915; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1916; X86-AVX-NEXT: retl # encoding: [0xc3] 1917; 1918; X64-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: 1919; X64-SSE: # %bb.0: 1920; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1921; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1922; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1923; X64-SSE-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] 1924; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1925; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1926; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1927; X64-SSE-NEXT: retq # encoding: [0xc3] 1928; 1929; X64-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: 1930; X64-AVX: # %bb.0: 1931; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1932; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1933; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1934; X64-AVX-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] 1935; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1936; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1937; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1938; X64-AVX-NEXT: retq # encoding: [0xc3] 1939 %1 = alloca i32, align 4 1940 %2 = bitcast i32* %1 to i8* 1941 call void @llvm.x86.sse.stmxcsr(i8* %2) 1942 %3 = load i32, i32* %1 1943 %4 = and i32 %3, -64 1944 %5 = or i32 %4, %a0 1945 store i32 %5, i32* %1 1946 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1947 ret void 1948} 1949 1950define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind { 1951; X86-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1952; X86-SSE: # %bb.0: 1953; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1954; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1955; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1956; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1957; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1958; X86-SSE-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] 1959; X86-SSE-NEXT: # imm = 0xFFFF7FFF 1960; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1961; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1962; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1963; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1964; X86-SSE-NEXT: retl # encoding: [0xc3] 1965; 1966; X86-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1967; X86-AVX: # %bb.0: 1968; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1969; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1970; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1971; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1972; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1973; X86-AVX-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] 1974; X86-AVX-NEXT: # imm = 0xFFFF7FFF 1975; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1976; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1977; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1978; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1979; X86-AVX-NEXT: retl # encoding: [0xc3] 1980; 1981; X64-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1982; X64-SSE: # %bb.0: 1983; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1984; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1985; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1986; X64-SSE-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] 1987; X64-SSE-NEXT: # imm = 0xFFFF7FFF 1988; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1989; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1990; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1991; X64-SSE-NEXT: retq # encoding: [0xc3] 1992; 1993; X64-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1994; X64-AVX: # %bb.0: 1995; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1996; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1997; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1998; X64-AVX-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] 1999; X64-AVX-NEXT: # imm = 0xFFFF7FFF 2000; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2001; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2002; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2003; X64-AVX-NEXT: retq # encoding: [0xc3] 2004 %1 = alloca i32, align 4 2005 %2 = bitcast i32* %1 to i8* 2006 call void @llvm.x86.sse.stmxcsr(i8* %2) 2007 %3 = load i32, i32* %1 2008 %4 = and i32 %3, -32769 2009 %5 = or i32 %4, %a0 2010 store i32 %5, i32* %1 2011 call void @llvm.x86.sse.ldmxcsr(i8* %2) 2012 ret void 2013} 2014 2015define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 2016; X86-SSE-LABEL: test_mm_set_ps: 2017; X86-SSE: # %bb.0: 2018; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 2019; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2020; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 2021; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2022; X86-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 2023; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2024; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08] 2025; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2026; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x04] 2027; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 2028; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 2029; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 2030; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 2031; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 2032; X86-SSE-NEXT: retl # encoding: [0xc3] 2033; 2034; X86-AVX1-LABEL: test_mm_set_ps: 2035; X86-AVX1: # %bb.0: 2036; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2037; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2038; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2039; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2040; X86-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2041; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2042; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08] 2043; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2044; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20] 2045; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2046; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04] 2047; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2048; X86-AVX1-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30] 2049; X86-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0] 2050; X86-AVX1-NEXT: retl # encoding: [0xc3] 2051; 2052; X86-AVX512-LABEL: test_mm_set_ps: 2053; X86-AVX512: # %bb.0: 2054; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2055; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2056; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2057; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2058; X86-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2059; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2060; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08] 2061; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2062; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20] 2063; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2064; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04] 2065; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2066; X86-AVX512-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30] 2067; X86-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0] 2068; X86-AVX512-NEXT: retl # encoding: [0xc3] 2069; 2070; X64-SSE-LABEL: test_mm_set_ps: 2071; X64-SSE: # %bb.0: 2072; X64-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 2073; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2074; X64-SSE-NEXT: unpcklps %xmm2, %xmm3 # encoding: [0x0f,0x14,0xda] 2075; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 2076; X64-SSE-NEXT: movlhps %xmm1, %xmm3 # encoding: [0x0f,0x16,0xd9] 2077; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm1[0] 2078; X64-SSE-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3] 2079; X64-SSE-NEXT: retq # encoding: [0xc3] 2080; 2081; X64-AVX1-LABEL: test_mm_set_ps: 2082; X64-AVX1: # %bb.0: 2083; X64-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2084; X64-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2085; X64-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2086; X64-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2087; X64-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2088; X64-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2089; X64-AVX1-NEXT: retq # encoding: [0xc3] 2090; 2091; X64-AVX512-LABEL: test_mm_set_ps: 2092; X64-AVX512: # %bb.0: 2093; X64-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2094; X64-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2095; X64-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2096; X64-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2097; X64-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2098; X64-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2099; X64-AVX512-NEXT: retq # encoding: [0xc3] 2100 %res0 = insertelement <4 x float> undef, float %a3, i32 0 2101 %res1 = insertelement <4 x float> %res0, float %a2, i32 1 2102 %res2 = insertelement <4 x float> %res1, float %a1, i32 2 2103 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2104 ret <4 x float> %res3 2105} 2106 2107define <4 x float> @test_mm_set_ps1(float %a0) nounwind { 2108; X86-SSE-LABEL: test_mm_set_ps1: 2109; X86-SSE: # %bb.0: 2110; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2111; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2112; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2113; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2114; X86-SSE-NEXT: retl # encoding: [0xc3] 2115; 2116; X86-AVX1-LABEL: test_mm_set_ps1: 2117; X86-AVX1: # %bb.0: 2118; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2119; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2120; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2121; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2122; X86-AVX1-NEXT: retl # encoding: [0xc3] 2123; 2124; X86-AVX512-LABEL: test_mm_set_ps1: 2125; X86-AVX512: # %bb.0: 2126; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2127; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2128; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2129; X86-AVX512-NEXT: retl # encoding: [0xc3] 2130; 2131; X64-SSE-LABEL: test_mm_set_ps1: 2132; X64-SSE: # %bb.0: 2133; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2134; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2135; X64-SSE-NEXT: retq # encoding: [0xc3] 2136; 2137; X64-AVX1-LABEL: test_mm_set_ps1: 2138; X64-AVX1: # %bb.0: 2139; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2140; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2141; X64-AVX1-NEXT: retq # encoding: [0xc3] 2142; 2143; X64-AVX512-LABEL: test_mm_set_ps1: 2144; X64-AVX512: # %bb.0: 2145; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2146; X64-AVX512-NEXT: retq # encoding: [0xc3] 2147 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2148 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 2149 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 2150 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2151 ret <4 x float> %res3 2152} 2153 2154define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind { 2155; X86-SSE-LABEL: test_MM_SET_ROUNDING_MODE: 2156; X86-SSE: # %bb.0: 2157; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 2158; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2159; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 2160; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 2161; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 2162; X86-SSE-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] 2163; X86-SSE-NEXT: # imm = 0x9FFF 2164; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 2165; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 2166; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 2167; X86-SSE-NEXT: popl %eax # encoding: [0x58] 2168; X86-SSE-NEXT: retl # encoding: [0xc3] 2169; 2170; X86-AVX-LABEL: test_MM_SET_ROUNDING_MODE: 2171; X86-AVX: # %bb.0: 2172; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 2173; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2174; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 2175; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 2176; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 2177; X86-AVX-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] 2178; X86-AVX-NEXT: # imm = 0x9FFF 2179; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 2180; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 2181; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 2182; X86-AVX-NEXT: popl %eax # encoding: [0x58] 2183; X86-AVX-NEXT: retl # encoding: [0xc3] 2184; 2185; X64-SSE-LABEL: test_MM_SET_ROUNDING_MODE: 2186; X64-SSE: # %bb.0: 2187; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2188; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 2189; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2190; X64-SSE-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] 2191; X64-SSE-NEXT: # imm = 0x9FFF 2192; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2193; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2194; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 2195; X64-SSE-NEXT: retq # encoding: [0xc3] 2196; 2197; X64-AVX-LABEL: test_MM_SET_ROUNDING_MODE: 2198; X64-AVX: # %bb.0: 2199; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2200; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 2201; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2202; X64-AVX-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] 2203; X64-AVX-NEXT: # imm = 0x9FFF 2204; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2205; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2206; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2207; X64-AVX-NEXT: retq # encoding: [0xc3] 2208 %1 = alloca i32, align 4 2209 %2 = bitcast i32* %1 to i8* 2210 call void @llvm.x86.sse.stmxcsr(i8* %2) 2211 %3 = load i32, i32* %1 2212 %4 = and i32 %3, -24577 2213 %5 = or i32 %4, %a0 2214 store i32 %5, i32* %1 2215 call void @llvm.x86.sse.ldmxcsr(i8* %2) 2216 ret void 2217} 2218 2219define <4 x float> @test_mm_set_ss(float %a0) nounwind { 2220; X86-SSE-LABEL: test_mm_set_ss: 2221; X86-SSE: # %bb.0: 2222; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04] 2223; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2224; X86-SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 2225; X86-SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 2226; X86-SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 2227; X86-SSE-NEXT: retl # encoding: [0xc3] 2228; 2229; X86-AVX1-LABEL: test_mm_set_ss: 2230; X86-AVX1: # %bb.0: 2231; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2232; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2233; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2234; X86-AVX1-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2235; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2236; X86-AVX1-NEXT: retl # encoding: [0xc3] 2237; 2238; X86-AVX512-LABEL: test_mm_set_ss: 2239; X86-AVX512: # %bb.0: 2240; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2241; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2242; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2243; X86-AVX512-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2244; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2245; X86-AVX512-NEXT: retl # encoding: [0xc3] 2246; 2247; X64-SSE-LABEL: test_mm_set_ss: 2248; X64-SSE: # %bb.0: 2249; X64-SSE-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9] 2250; X64-SSE-NEXT: movss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0x10,0xc8] 2251; X64-SSE-NEXT: # xmm1 = xmm0[0],xmm1[1,2,3] 2252; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 2253; X64-SSE-NEXT: retq # encoding: [0xc3] 2254; 2255; X64-AVX-LABEL: test_mm_set_ss: 2256; X64-AVX: # %bb.0: 2257; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2258; X64-AVX-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2259; X64-AVX-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2260; X64-AVX-NEXT: retq # encoding: [0xc3] 2261 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2262 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 2263 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 2264 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 2265 ret <4 x float> %res3 2266} 2267 2268define <4 x float> @test_mm_set1_ps(float %a0) nounwind { 2269; X86-SSE-LABEL: test_mm_set1_ps: 2270; X86-SSE: # %bb.0: 2271; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2272; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2273; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2274; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2275; X86-SSE-NEXT: retl # encoding: [0xc3] 2276; 2277; X86-AVX1-LABEL: test_mm_set1_ps: 2278; X86-AVX1: # %bb.0: 2279; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2280; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2281; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2282; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2283; X86-AVX1-NEXT: retl # encoding: [0xc3] 2284; 2285; X86-AVX512-LABEL: test_mm_set1_ps: 2286; X86-AVX512: # %bb.0: 2287; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2288; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2289; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2290; X86-AVX512-NEXT: retl # encoding: [0xc3] 2291; 2292; X64-SSE-LABEL: test_mm_set1_ps: 2293; X64-SSE: # %bb.0: 2294; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2295; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2296; X64-SSE-NEXT: retq # encoding: [0xc3] 2297; 2298; X64-AVX1-LABEL: test_mm_set1_ps: 2299; X64-AVX1: # %bb.0: 2300; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2301; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2302; X64-AVX1-NEXT: retq # encoding: [0xc3] 2303; 2304; X64-AVX512-LABEL: test_mm_set1_ps: 2305; X64-AVX512: # %bb.0: 2306; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2307; X64-AVX512-NEXT: retq # encoding: [0xc3] 2308 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2309 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 2310 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 2311 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2312 ret <4 x float> %res3 2313} 2314 2315define void @test_mm_setcsr(i32 %a0) nounwind { 2316; X86-SSE-LABEL: test_mm_setcsr: 2317; X86-SSE: # %bb.0: 2318; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] 2319; X86-SSE-NEXT: ldmxcsr (%eax) # encoding: [0x0f,0xae,0x10] 2320; X86-SSE-NEXT: retl # encoding: [0xc3] 2321; 2322; X86-AVX-LABEL: test_mm_setcsr: 2323; X86-AVX: # %bb.0: 2324; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] 2325; X86-AVX-NEXT: vldmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x10] 2326; X86-AVX-NEXT: retl # encoding: [0xc3] 2327; 2328; X64-SSE-LABEL: test_mm_setcsr: 2329; X64-SSE: # %bb.0: 2330; X64-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] 2331; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2332; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 2333; X64-SSE-NEXT: retq # encoding: [0xc3] 2334; 2335; X64-AVX-LABEL: test_mm_setcsr: 2336; X64-AVX: # %bb.0: 2337; X64-AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] 2338; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2339; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2340; X64-AVX-NEXT: retq # encoding: [0xc3] 2341 %st = alloca i32, align 4 2342 store i32 %a0, i32* %st, align 4 2343 %bc = bitcast i32* %st to i8* 2344 call void @llvm.x86.sse.ldmxcsr(i8* %bc) 2345 ret void 2346} 2347 2348define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 2349; X86-SSE-LABEL: test_mm_setr_ps: 2350; X86-SSE: # %bb.0: 2351; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 2352; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2353; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 2354; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2355; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 2356; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2357; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] 2358; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 2359; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2360; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2361; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 2362; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2363; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 2364; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 2365; X86-SSE-NEXT: retl # encoding: [0xc3] 2366; 2367; X86-AVX1-LABEL: test_mm_setr_ps: 2368; X86-AVX1: # %bb.0: 2369; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2370; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2371; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2372; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2373; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2374; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero 2375; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2376; X86-AVX1-NEXT: # xmm3 = mem[0],zero,zero,zero 2377; X86-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2378; X86-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2379; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2380; X86-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2381; X86-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2382; X86-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2383; X86-AVX1-NEXT: retl # encoding: [0xc3] 2384; 2385; X86-AVX512-LABEL: test_mm_setr_ps: 2386; X86-AVX512: # %bb.0: 2387; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2388; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2389; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2390; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2391; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2392; X86-AVX512-NEXT: # xmm2 = mem[0],zero,zero,zero 2393; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2394; X86-AVX512-NEXT: # xmm3 = mem[0],zero,zero,zero 2395; X86-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2396; X86-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2397; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2398; X86-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2399; X86-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2400; X86-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2401; X86-AVX512-NEXT: retl # encoding: [0xc3] 2402; 2403; X64-SSE-LABEL: test_mm_setr_ps: 2404; X64-SSE: # %bb.0: 2405; X64-SSE-NEXT: unpcklps %xmm3, %xmm2 # encoding: [0x0f,0x14,0xd3] 2406; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2407; X64-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 2408; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2409; X64-SSE-NEXT: movlhps %xmm2, %xmm0 # encoding: [0x0f,0x16,0xc2] 2410; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] 2411; X64-SSE-NEXT: retq # encoding: [0xc3] 2412; 2413; X64-AVX1-LABEL: test_mm_setr_ps: 2414; X64-AVX1: # %bb.0: 2415; X64-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2416; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2417; X64-AVX1-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2418; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2419; X64-AVX1-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2420; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2421; X64-AVX1-NEXT: retq # encoding: [0xc3] 2422; 2423; X64-AVX512-LABEL: test_mm_setr_ps: 2424; X64-AVX512: # %bb.0: 2425; X64-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2426; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2427; X64-AVX512-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2428; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2429; X64-AVX512-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2430; X64-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2431; X64-AVX512-NEXT: retq # encoding: [0xc3] 2432 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2433 %res1 = insertelement <4 x float> %res0, float %a1, i32 1 2434 %res2 = insertelement <4 x float> %res1, float %a2, i32 2 2435 %res3 = insertelement <4 x float> %res2, float %a3, i32 3 2436 ret <4 x float> %res3 2437} 2438 2439define <4 x float> @test_mm_setzero_ps() { 2440; SSE-LABEL: test_mm_setzero_ps: 2441; SSE: # %bb.0: 2442; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 2443; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2444; 2445; AVX1-LABEL: test_mm_setzero_ps: 2446; AVX1: # %bb.0: 2447; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 2448; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2449; 2450; AVX512-LABEL: test_mm_setzero_ps: 2451; AVX512: # %bb.0: 2452; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] 2453; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2454 ret <4 x float> zeroinitializer 2455} 2456 2457define void @test_mm_sfence() nounwind { 2458; CHECK-LABEL: test_mm_sfence: 2459; CHECK: # %bb.0: 2460; CHECK-NEXT: sfence # encoding: [0x0f,0xae,0xf8] 2461; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2462 call void @llvm.x86.sse.sfence() 2463 ret void 2464} 2465declare void @llvm.x86.sse.sfence() nounwind readnone 2466 2467define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 2468; SSE-LABEL: test_mm_shuffle_ps: 2469; SSE: # %bb.0: 2470; SSE-NEXT: shufps $0, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x00] 2471; SSE-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2472; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2473; 2474; AVX1-LABEL: test_mm_shuffle_ps: 2475; AVX1: # %bb.0: 2476; AVX1-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc1,0x00] 2477; AVX1-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2478; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2479; 2480; AVX512-LABEL: test_mm_shuffle_ps: 2481; AVX512: # %bb.0: 2482; AVX512-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc1,0x00] 2483; AVX512-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2484; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2485 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4> 2486 ret <4 x float> %res 2487} 2488 2489define <4 x float> @test_mm_sqrt_ps(<4 x float> %a0) { 2490; SSE-LABEL: test_mm_sqrt_ps: 2491; SSE: # %bb.0: 2492; SSE-NEXT: sqrtps %xmm0, %xmm0 # encoding: [0x0f,0x51,0xc0] 2493; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2494; 2495; AVX1-LABEL: test_mm_sqrt_ps: 2496; AVX1: # %bb.0: 2497; AVX1-NEXT: vsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x51,0xc0] 2498; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2499; 2500; AVX512-LABEL: test_mm_sqrt_ps: 2501; AVX512: # %bb.0: 2502; AVX512-NEXT: vsqrtps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0] 2503; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2504 %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0) 2505 ret <4 x float> %res 2506} 2507declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone 2508 2509define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) { 2510; SSE-LABEL: test_mm_sqrt_ss: 2511; SSE: # %bb.0: 2512; SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2513; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2514; 2515; AVX1-LABEL: test_mm_sqrt_ss: 2516; AVX1: # %bb.0: 2517; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2518; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2519; 2520; AVX512-LABEL: test_mm_sqrt_ss: 2521; AVX512: # %bb.0: 2522; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2523; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2524 %ext = extractelement <4 x float> %a0, i32 0 2525 %sqrt = call float @llvm.sqrt.f32(float %ext) 2526 %ins = insertelement <4 x float> %a0, float %sqrt, i32 0 2527 ret <4 x float> %ins 2528} 2529declare float @llvm.sqrt.f32(float) nounwind readnone 2530 2531define float @test_mm_sqrt_ss_scalar(float %a0) { 2532; X86-SSE-LABEL: test_mm_sqrt_ss_scalar: 2533; X86-SSE: # %bb.0: 2534; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 2535; X86-SSE-NEXT: .cfi_def_cfa_offset 8 2536; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08] 2537; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2538; X86-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2539; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] 2540; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2541; X86-SSE-NEXT: popl %eax # encoding: [0x58] 2542; X86-SSE-NEXT: .cfi_def_cfa_offset 4 2543; X86-SSE-NEXT: retl # encoding: [0xc3] 2544; 2545; X86-AVX1-LABEL: test_mm_sqrt_ss_scalar: 2546; X86-AVX1: # %bb.0: 2547; X86-AVX1-NEXT: pushl %eax # encoding: [0x50] 2548; X86-AVX1-NEXT: .cfi_def_cfa_offset 8 2549; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 2550; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2551; X86-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2552; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] 2553; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2554; X86-AVX1-NEXT: popl %eax # encoding: [0x58] 2555; X86-AVX1-NEXT: .cfi_def_cfa_offset 4 2556; X86-AVX1-NEXT: retl # encoding: [0xc3] 2557; 2558; X86-AVX512-LABEL: test_mm_sqrt_ss_scalar: 2559; X86-AVX512: # %bb.0: 2560; X86-AVX512-NEXT: pushl %eax # encoding: [0x50] 2561; X86-AVX512-NEXT: .cfi_def_cfa_offset 8 2562; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 2563; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2564; X86-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2565; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] 2566; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2567; X86-AVX512-NEXT: popl %eax # encoding: [0x58] 2568; X86-AVX512-NEXT: .cfi_def_cfa_offset 4 2569; X86-AVX512-NEXT: retl # encoding: [0xc3] 2570; 2571; X64-SSE-LABEL: test_mm_sqrt_ss_scalar: 2572; X64-SSE: # %bb.0: 2573; X64-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2574; X64-SSE-NEXT: retq # encoding: [0xc3] 2575; 2576; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar: 2577; X64-AVX1: # %bb.0: 2578; X64-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2579; X64-AVX1-NEXT: retq # encoding: [0xc3] 2580; 2581; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar: 2582; X64-AVX512: # %bb.0: 2583; X64-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2584; X64-AVX512-NEXT: retq # encoding: [0xc3] 2585 %sqrt = call float @llvm.sqrt.f32(float %a0) 2586 ret float %sqrt 2587} 2588 2589define void @test_mm_store_ps(float *%a0, <4 x float> %a1) { 2590; X86-SSE-LABEL: test_mm_store_ps: 2591; X86-SSE: # %bb.0: 2592; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2593; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2594; X86-SSE-NEXT: retl # encoding: [0xc3] 2595; 2596; X86-AVX1-LABEL: test_mm_store_ps: 2597; X86-AVX1: # %bb.0: 2598; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2599; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2600; X86-AVX1-NEXT: retl # encoding: [0xc3] 2601; 2602; X86-AVX512-LABEL: test_mm_store_ps: 2603; X86-AVX512: # %bb.0: 2604; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2605; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2606; X86-AVX512-NEXT: retl # encoding: [0xc3] 2607; 2608; X64-SSE-LABEL: test_mm_store_ps: 2609; X64-SSE: # %bb.0: 2610; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2611; X64-SSE-NEXT: retq # encoding: [0xc3] 2612; 2613; X64-AVX1-LABEL: test_mm_store_ps: 2614; X64-AVX1: # %bb.0: 2615; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2616; X64-AVX1-NEXT: retq # encoding: [0xc3] 2617; 2618; X64-AVX512-LABEL: test_mm_store_ps: 2619; X64-AVX512: # %bb.0: 2620; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2621; X64-AVX512-NEXT: retq # encoding: [0xc3] 2622 %arg0 = bitcast float* %a0 to <4 x float>* 2623 store <4 x float> %a1, <4 x float>* %arg0, align 16 2624 ret void 2625} 2626 2627define void @test_mm_store_ps1(float *%a0, <4 x float> %a1) { 2628; X86-SSE-LABEL: test_mm_store_ps1: 2629; X86-SSE: # %bb.0: 2630; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2631; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2632; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2633; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2634; X86-SSE-NEXT: retl # encoding: [0xc3] 2635; 2636; X86-AVX1-LABEL: test_mm_store_ps1: 2637; X86-AVX1: # %bb.0: 2638; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2639; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2640; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2641; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2642; X86-AVX1-NEXT: retl # encoding: [0xc3] 2643; 2644; X86-AVX512-LABEL: test_mm_store_ps1: 2645; X86-AVX512: # %bb.0: 2646; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2647; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2648; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2649; X86-AVX512-NEXT: retl # encoding: [0xc3] 2650; 2651; X64-SSE-LABEL: test_mm_store_ps1: 2652; X64-SSE: # %bb.0: 2653; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2654; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2655; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2656; X64-SSE-NEXT: retq # encoding: [0xc3] 2657; 2658; X64-AVX1-LABEL: test_mm_store_ps1: 2659; X64-AVX1: # %bb.0: 2660; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2661; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2662; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2663; X64-AVX1-NEXT: retq # encoding: [0xc3] 2664; 2665; X64-AVX512-LABEL: test_mm_store_ps1: 2666; X64-AVX512: # %bb.0: 2667; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2668; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2669; X64-AVX512-NEXT: retq # encoding: [0xc3] 2670 %arg0 = bitcast float* %a0 to <4 x float>* 2671 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 2672 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2673 ret void 2674} 2675 2676define void @test_mm_store_ss(float *%a0, <4 x float> %a1) { 2677; X86-SSE-LABEL: test_mm_store_ss: 2678; X86-SSE: # %bb.0: 2679; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2680; X86-SSE-NEXT: movss %xmm0, (%eax) # encoding: [0xf3,0x0f,0x11,0x00] 2681; X86-SSE-NEXT: retl # encoding: [0xc3] 2682; 2683; X86-AVX1-LABEL: test_mm_store_ss: 2684; X86-AVX1: # %bb.0: 2685; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2686; X86-AVX1-NEXT: vmovss %xmm0, (%eax) # encoding: [0xc5,0xfa,0x11,0x00] 2687; X86-AVX1-NEXT: retl # encoding: [0xc3] 2688; 2689; X86-AVX512-LABEL: test_mm_store_ss: 2690; X86-AVX512: # %bb.0: 2691; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2692; X86-AVX512-NEXT: vmovss %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x00] 2693; X86-AVX512-NEXT: retl # encoding: [0xc3] 2694; 2695; X64-SSE-LABEL: test_mm_store_ss: 2696; X64-SSE: # %bb.0: 2697; X64-SSE-NEXT: movss %xmm0, (%rdi) # encoding: [0xf3,0x0f,0x11,0x07] 2698; X64-SSE-NEXT: retq # encoding: [0xc3] 2699; 2700; X64-AVX1-LABEL: test_mm_store_ss: 2701; X64-AVX1: # %bb.0: 2702; X64-AVX1-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 2703; X64-AVX1-NEXT: retq # encoding: [0xc3] 2704; 2705; X64-AVX512-LABEL: test_mm_store_ss: 2706; X64-AVX512: # %bb.0: 2707; X64-AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 2708; X64-AVX512-NEXT: retq # encoding: [0xc3] 2709 %ext = extractelement <4 x float> %a1, i32 0 2710 store float %ext, float* %a0, align 1 2711 ret void 2712} 2713 2714define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) { 2715; X86-SSE-LABEL: test_mm_store1_ps: 2716; X86-SSE: # %bb.0: 2717; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2718; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2719; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2720; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2721; X86-SSE-NEXT: retl # encoding: [0xc3] 2722; 2723; X86-AVX1-LABEL: test_mm_store1_ps: 2724; X86-AVX1: # %bb.0: 2725; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2726; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2727; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2728; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2729; X86-AVX1-NEXT: retl # encoding: [0xc3] 2730; 2731; X86-AVX512-LABEL: test_mm_store1_ps: 2732; X86-AVX512: # %bb.0: 2733; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2734; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2735; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2736; X86-AVX512-NEXT: retl # encoding: [0xc3] 2737; 2738; X64-SSE-LABEL: test_mm_store1_ps: 2739; X64-SSE: # %bb.0: 2740; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2741; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2742; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2743; X64-SSE-NEXT: retq # encoding: [0xc3] 2744; 2745; X64-AVX1-LABEL: test_mm_store1_ps: 2746; X64-AVX1: # %bb.0: 2747; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2748; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2749; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2750; X64-AVX1-NEXT: retq # encoding: [0xc3] 2751; 2752; X64-AVX512-LABEL: test_mm_store1_ps: 2753; X64-AVX512: # %bb.0: 2754; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2755; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2756; X64-AVX512-NEXT: retq # encoding: [0xc3] 2757 %arg0 = bitcast float* %a0 to <4 x float>* 2758 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 2759 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2760 ret void 2761} 2762 2763define void @test_mm_storeh_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { 2764; X86-SSE-LABEL: test_mm_storeh_ps: 2765; X86-SSE: # %bb.0: 2766; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 2767; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 2768; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] 2769; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] 2770; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] 2771; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] 2772; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] 2773; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 2774; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] 2775; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 2776; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 2777; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 2778; X86-SSE-NEXT: retl # encoding: [0xc3] 2779; 2780; X86-AVX1-LABEL: test_mm_storeh_ps: 2781; X86-AVX1: # %bb.0: 2782; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2783; X86-AVX1-NEXT: vmovhpd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x17,0x00] 2784; X86-AVX1-NEXT: retl # encoding: [0xc3] 2785; 2786; X86-AVX512-LABEL: test_mm_storeh_ps: 2787; X86-AVX512: # %bb.0: 2788; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2789; X86-AVX512-NEXT: vmovhpd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x17,0x00] 2790; X86-AVX512-NEXT: retl # encoding: [0xc3] 2791; 2792; X64-SSE-LABEL: test_mm_storeh_ps: 2793; X64-SSE: # %bb.0: 2794; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] 2795; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xf0] 2796; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2797; X64-SSE-NEXT: retq # encoding: [0xc3] 2798; 2799; X64-AVX1-LABEL: test_mm_storeh_ps: 2800; X64-AVX1: # %bb.0: 2801; X64-AVX1-NEXT: vpextrq $1, %xmm0, %rax # encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] 2802; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2803; X64-AVX1-NEXT: retq # encoding: [0xc3] 2804; 2805; X64-AVX512-LABEL: test_mm_storeh_ps: 2806; X64-AVX512: # %bb.0: 2807; X64-AVX512-NEXT: vpextrq $1, %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] 2808; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2809; X64-AVX512-NEXT: retq # encoding: [0xc3] 2810 %ptr = bitcast x86_mmx* %a0 to i64* 2811 %bc = bitcast <4 x float> %a1 to <2 x i64> 2812 %ext = extractelement <2 x i64> %bc, i32 1 2813 store i64 %ext, i64* %ptr 2814 ret void 2815} 2816 2817define void @test_mm_storel_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { 2818; X86-SSE-LABEL: test_mm_storel_ps: 2819; X86-SSE: # %bb.0: 2820; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 2821; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 2822; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] 2823; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] 2824; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] 2825; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] 2826; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] 2827; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] 2828; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] 2829; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 2830; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 2831; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 2832; X86-SSE-NEXT: retl # encoding: [0xc3] 2833; 2834; X86-AVX1-LABEL: test_mm_storel_ps: 2835; X86-AVX1: # %bb.0: 2836; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2837; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] 2838; X86-AVX1-NEXT: retl # encoding: [0xc3] 2839; 2840; X86-AVX512-LABEL: test_mm_storel_ps: 2841; X86-AVX512: # %bb.0: 2842; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2843; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 2844; X86-AVX512-NEXT: retl # encoding: [0xc3] 2845; 2846; X64-SSE-LABEL: test_mm_storel_ps: 2847; X64-SSE: # %bb.0: 2848; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] 2849; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8] 2850; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2851; X64-SSE-NEXT: retq # encoding: [0xc3] 2852; 2853; X64-AVX1-LABEL: test_mm_storel_ps: 2854; X64-AVX1: # %bb.0: 2855; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 2856; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2857; X64-AVX1-NEXT: retq # encoding: [0xc3] 2858; 2859; X64-AVX512-LABEL: test_mm_storel_ps: 2860; X64-AVX512: # %bb.0: 2861; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 2862; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2863; X64-AVX512-NEXT: retq # encoding: [0xc3] 2864 %ptr = bitcast x86_mmx* %a0 to i64* 2865 %bc = bitcast <4 x float> %a1 to <2 x i64> 2866 %ext = extractelement <2 x i64> %bc, i32 0 2867 store i64 %ext, i64* %ptr 2868 ret void 2869} 2870 2871define void @test_mm_storer_ps(float *%a0, <4 x float> %a1) { 2872; X86-SSE-LABEL: test_mm_storer_ps: 2873; X86-SSE: # %bb.0: 2874; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2875; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 2876; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 2877; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2878; X86-SSE-NEXT: retl # encoding: [0xc3] 2879; 2880; X86-AVX1-LABEL: test_mm_storer_ps: 2881; X86-AVX1: # %bb.0: 2882; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2883; X86-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2884; X86-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0] 2885; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2886; X86-AVX1-NEXT: retl # encoding: [0xc3] 2887; 2888; X86-AVX512-LABEL: test_mm_storer_ps: 2889; X86-AVX512: # %bb.0: 2890; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2891; X86-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2892; X86-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0] 2893; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2894; X86-AVX512-NEXT: retl # encoding: [0xc3] 2895; 2896; X64-SSE-LABEL: test_mm_storer_ps: 2897; X64-SSE: # %bb.0: 2898; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 2899; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 2900; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2901; X64-SSE-NEXT: retq # encoding: [0xc3] 2902; 2903; X64-AVX1-LABEL: test_mm_storer_ps: 2904; X64-AVX1: # %bb.0: 2905; X64-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2906; X64-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0] 2907; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2908; X64-AVX1-NEXT: retq # encoding: [0xc3] 2909; 2910; X64-AVX512-LABEL: test_mm_storer_ps: 2911; X64-AVX512: # %bb.0: 2912; X64-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2913; X64-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0] 2914; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2915; X64-AVX512-NEXT: retq # encoding: [0xc3] 2916 %arg0 = bitcast float* %a0 to <4 x float>* 2917 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 2918 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2919 ret void 2920} 2921 2922define void @test_mm_storeu_ps(float *%a0, <4 x float> %a1) { 2923; X86-SSE-LABEL: test_mm_storeu_ps: 2924; X86-SSE: # %bb.0: 2925; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2926; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] 2927; X86-SSE-NEXT: retl # encoding: [0xc3] 2928; 2929; X86-AVX1-LABEL: test_mm_storeu_ps: 2930; X86-AVX1: # %bb.0: 2931; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2932; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 2933; X86-AVX1-NEXT: retl # encoding: [0xc3] 2934; 2935; X86-AVX512-LABEL: test_mm_storeu_ps: 2936; X86-AVX512: # %bb.0: 2937; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2938; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 2939; X86-AVX512-NEXT: retl # encoding: [0xc3] 2940; 2941; X64-SSE-LABEL: test_mm_storeu_ps: 2942; X64-SSE: # %bb.0: 2943; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] 2944; X64-SSE-NEXT: retq # encoding: [0xc3] 2945; 2946; X64-AVX1-LABEL: test_mm_storeu_ps: 2947; X64-AVX1: # %bb.0: 2948; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 2949; X64-AVX1-NEXT: retq # encoding: [0xc3] 2950; 2951; X64-AVX512-LABEL: test_mm_storeu_ps: 2952; X64-AVX512: # %bb.0: 2953; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 2954; X64-AVX512-NEXT: retq # encoding: [0xc3] 2955 %arg0 = bitcast float* %a0 to <4 x float>* 2956 store <4 x float> %a1, <4 x float>* %arg0, align 1 2957 ret void 2958} 2959 2960define void @test_mm_stream_ps(float *%a0, <4 x float> %a1) { 2961; X86-SSE-LABEL: test_mm_stream_ps: 2962; X86-SSE: # %bb.0: 2963; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2964; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] 2965; X86-SSE-NEXT: retl # encoding: [0xc3] 2966; 2967; X86-AVX1-LABEL: test_mm_stream_ps: 2968; X86-AVX1: # %bb.0: 2969; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2970; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] 2971; X86-AVX1-NEXT: retl # encoding: [0xc3] 2972; 2973; X86-AVX512-LABEL: test_mm_stream_ps: 2974; X86-AVX512: # %bb.0: 2975; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2976; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] 2977; X86-AVX512-NEXT: retl # encoding: [0xc3] 2978; 2979; X64-SSE-LABEL: test_mm_stream_ps: 2980; X64-SSE: # %bb.0: 2981; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] 2982; X64-SSE-NEXT: retq # encoding: [0xc3] 2983; 2984; X64-AVX1-LABEL: test_mm_stream_ps: 2985; X64-AVX1: # %bb.0: 2986; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] 2987; X64-AVX1-NEXT: retq # encoding: [0xc3] 2988; 2989; X64-AVX512-LABEL: test_mm_stream_ps: 2990; X64-AVX512: # %bb.0: 2991; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] 2992; X64-AVX512-NEXT: retq # encoding: [0xc3] 2993 %arg0 = bitcast float* %a0 to <4 x float>* 2994 store <4 x float> %a1, <4 x float>* %arg0, align 16, !nontemporal !0 2995 ret void 2996} 2997 2998define <4 x float> @test_mm_sub_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 2999; SSE-LABEL: test_mm_sub_ps: 3000; SSE: # %bb.0: 3001; SSE-NEXT: subps %xmm1, %xmm0 # encoding: [0x0f,0x5c,0xc1] 3002; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3003; 3004; AVX1-LABEL: test_mm_sub_ps: 3005; AVX1: # %bb.0: 3006; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5c,0xc1] 3007; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3008; 3009; AVX512-LABEL: test_mm_sub_ps: 3010; AVX512: # %bb.0: 3011; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1] 3012; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3013 %res = fsub <4 x float> %a0, %a1 3014 ret <4 x float> %res 3015} 3016 3017define <4 x float> @test_mm_sub_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3018; SSE-LABEL: test_mm_sub_ss: 3019; SSE: # %bb.0: 3020; SSE-NEXT: subss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5c,0xc1] 3021; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3022; 3023; AVX1-LABEL: test_mm_sub_ss: 3024; AVX1: # %bb.0: 3025; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5c,0xc1] 3026; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3027; 3028; AVX512-LABEL: test_mm_sub_ss: 3029; AVX512: # %bb.0: 3030; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5c,0xc1] 3031; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3032 %ext0 = extractelement <4 x float> %a0, i32 0 3033 %ext1 = extractelement <4 x float> %a1, i32 0 3034 %fsub = fsub float %ext0, %ext1 3035 %res = insertelement <4 x float> %a0, float %fsub, i32 0 3036 ret <4 x float> %res 3037} 3038 3039define void @test_MM_TRANSPOSE4_PS(<4 x float>* %a0, <4 x float>* %a1, <4 x float>* %a2, <4 x float>* %a3) nounwind { 3040; X86-SSE-LABEL: test_MM_TRANSPOSE4_PS: 3041; X86-SSE: # %bb.0: 3042; X86-SSE-NEXT: pushl %esi # encoding: [0x56] 3043; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3044; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3045; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3046; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3047; X86-SSE-NEXT: movaps (%esi), %xmm0 # encoding: [0x0f,0x28,0x06] 3048; X86-SSE-NEXT: movaps (%edx), %xmm1 # encoding: [0x0f,0x28,0x0a] 3049; X86-SSE-NEXT: movaps (%ecx), %xmm2 # encoding: [0x0f,0x28,0x11] 3050; X86-SSE-NEXT: movaps (%eax), %xmm3 # encoding: [0x0f,0x28,0x18] 3051; X86-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] 3052; X86-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] 3053; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 3054; X86-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] 3055; X86-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] 3056; X86-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 3057; X86-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3058; X86-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3059; X86-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] 3060; X86-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3061; X86-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] 3062; X86-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] 3063; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0] 3064; X86-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] 3065; X86-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1] 3066; X86-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] 3067; X86-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] 3068; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0] 3069; X86-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] 3070; X86-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1] 3071; X86-SSE-NEXT: movaps %xmm1, (%esi) # encoding: [0x0f,0x29,0x0e] 3072; X86-SSE-NEXT: movaps %xmm5, (%edx) # encoding: [0x0f,0x29,0x2a] 3073; X86-SSE-NEXT: movaps %xmm3, (%ecx) # encoding: [0x0f,0x29,0x19] 3074; X86-SSE-NEXT: movaps %xmm2, (%eax) # encoding: [0x0f,0x29,0x10] 3075; X86-SSE-NEXT: popl %esi # encoding: [0x5e] 3076; X86-SSE-NEXT: retl # encoding: [0xc3] 3077; 3078; X86-AVX1-LABEL: test_MM_TRANSPOSE4_PS: 3079; X86-AVX1: # %bb.0: 3080; X86-AVX1-NEXT: pushl %esi # encoding: [0x56] 3081; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3082; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3083; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3084; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3085; X86-AVX1-NEXT: vmovaps (%esi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x06] 3086; X86-AVX1-NEXT: vmovaps (%edx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a] 3087; X86-AVX1-NEXT: vmovaps (%ecx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x11] 3088; X86-AVX1-NEXT: vmovaps (%eax), %xmm3 # encoding: [0xc5,0xf8,0x28,0x18] 3089; X86-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] 3090; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3091; X86-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] 3092; X86-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3093; X86-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3094; X86-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3095; X86-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] 3096; X86-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3097; X86-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] 3098; X86-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0] 3099; X86-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] 3100; X86-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1] 3101; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] 3102; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0] 3103; X86-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 3104; X86-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 3105; X86-AVX1-NEXT: vmovaps %xmm2, (%esi) # encoding: [0xc5,0xf8,0x29,0x16] 3106; X86-AVX1-NEXT: vmovaps %xmm3, (%edx) # encoding: [0xc5,0xf8,0x29,0x1a] 3107; X86-AVX1-NEXT: vmovaps %xmm4, (%ecx) # encoding: [0xc5,0xf8,0x29,0x21] 3108; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 3109; X86-AVX1-NEXT: popl %esi # encoding: [0x5e] 3110; X86-AVX1-NEXT: retl # encoding: [0xc3] 3111; 3112; X86-AVX512-LABEL: test_MM_TRANSPOSE4_PS: 3113; X86-AVX512: # %bb.0: 3114; X86-AVX512-NEXT: pushl %esi # encoding: [0x56] 3115; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3116; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3117; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3118; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3119; X86-AVX512-NEXT: vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06] 3120; X86-AVX512-NEXT: vmovaps (%edx), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0a] 3121; X86-AVX512-NEXT: vmovaps (%ecx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x11] 3122; X86-AVX512-NEXT: vmovaps (%eax), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x18] 3123; X86-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] 3124; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3125; X86-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] 3126; X86-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3127; X86-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3128; X86-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3129; X86-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] 3130; X86-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3131; X86-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] 3132; X86-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0] 3133; X86-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] 3134; X86-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1] 3135; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] 3136; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0] 3137; X86-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 3138; X86-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 3139; X86-AVX512-NEXT: vmovaps %xmm2, (%esi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x16] 3140; X86-AVX512-NEXT: vmovaps %xmm3, (%edx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1a] 3141; X86-AVX512-NEXT: vmovaps %xmm4, (%ecx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x21] 3142; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 3143; X86-AVX512-NEXT: popl %esi # encoding: [0x5e] 3144; X86-AVX512-NEXT: retl # encoding: [0xc3] 3145; 3146; X64-SSE-LABEL: test_MM_TRANSPOSE4_PS: 3147; X64-SSE: # %bb.0: 3148; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 3149; X64-SSE-NEXT: movaps (%rsi), %xmm1 # encoding: [0x0f,0x28,0x0e] 3150; X64-SSE-NEXT: movaps (%rdx), %xmm2 # encoding: [0x0f,0x28,0x12] 3151; X64-SSE-NEXT: movaps (%rcx), %xmm3 # encoding: [0x0f,0x28,0x19] 3152; X64-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] 3153; X64-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] 3154; X64-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 3155; X64-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] 3156; X64-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] 3157; X64-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 3158; X64-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3159; X64-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3160; X64-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] 3161; X64-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3162; X64-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] 3163; X64-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] 3164; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0] 3165; X64-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] 3166; X64-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1] 3167; X64-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] 3168; X64-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] 3169; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0] 3170; X64-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] 3171; X64-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1] 3172; X64-SSE-NEXT: movaps %xmm1, (%rdi) # encoding: [0x0f,0x29,0x0f] 3173; X64-SSE-NEXT: movaps %xmm5, (%rsi) # encoding: [0x0f,0x29,0x2e] 3174; X64-SSE-NEXT: movaps %xmm3, (%rdx) # encoding: [0x0f,0x29,0x1a] 3175; X64-SSE-NEXT: movaps %xmm2, (%rcx) # encoding: [0x0f,0x29,0x11] 3176; X64-SSE-NEXT: retq # encoding: [0xc3] 3177; 3178; X64-AVX1-LABEL: test_MM_TRANSPOSE4_PS: 3179; X64-AVX1: # %bb.0: 3180; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 3181; X64-AVX1-NEXT: vmovaps (%rsi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0e] 3182; X64-AVX1-NEXT: vmovaps (%rdx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x12] 3183; X64-AVX1-NEXT: vmovaps (%rcx), %xmm3 # encoding: [0xc5,0xf8,0x28,0x19] 3184; X64-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] 3185; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3186; X64-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] 3187; X64-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3188; X64-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3189; X64-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3190; X64-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] 3191; X64-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3192; X64-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] 3193; X64-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0] 3194; X64-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] 3195; X64-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1] 3196; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] 3197; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0] 3198; X64-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 3199; X64-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 3200; X64-AVX1-NEXT: vmovaps %xmm2, (%rdi) # encoding: [0xc5,0xf8,0x29,0x17] 3201; X64-AVX1-NEXT: vmovaps %xmm3, (%rsi) # encoding: [0xc5,0xf8,0x29,0x1e] 3202; X64-AVX1-NEXT: vmovaps %xmm4, (%rdx) # encoding: [0xc5,0xf8,0x29,0x22] 3203; X64-AVX1-NEXT: vmovaps %xmm0, (%rcx) # encoding: [0xc5,0xf8,0x29,0x01] 3204; X64-AVX1-NEXT: retq # encoding: [0xc3] 3205; 3206; X64-AVX512-LABEL: test_MM_TRANSPOSE4_PS: 3207; X64-AVX512: # %bb.0: 3208; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 3209; X64-AVX512-NEXT: vmovaps (%rsi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0e] 3210; X64-AVX512-NEXT: vmovaps (%rdx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x12] 3211; X64-AVX512-NEXT: vmovaps (%rcx), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x19] 3212; X64-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] 3213; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3214; X64-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] 3215; X64-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3216; X64-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3217; X64-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3218; X64-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] 3219; X64-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3220; X64-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] 3221; X64-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0] 3222; X64-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] 3223; X64-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1] 3224; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] 3225; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0] 3226; X64-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 3227; X64-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 3228; X64-AVX512-NEXT: vmovaps %xmm2, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x17] 3229; X64-AVX512-NEXT: vmovaps %xmm3, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1e] 3230; X64-AVX512-NEXT: vmovaps %xmm4, (%rdx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x22] 3231; X64-AVX512-NEXT: vmovaps %xmm0, (%rcx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x01] 3232; X64-AVX512-NEXT: retq # encoding: [0xc3] 3233 %row0 = load <4 x float>, <4 x float>* %a0, align 16 3234 %row1 = load <4 x float>, <4 x float>* %a1, align 16 3235 %row2 = load <4 x float>, <4 x float>* %a2, align 16 3236 %row3 = load <4 x float>, <4 x float>* %a3, align 16 3237 %tmp0 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3238 %tmp2 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3239 %tmp1 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3240 %tmp3 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3241 %res0 = shufflevector <4 x float> %tmp0, <4 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 3242 %res1 = shufflevector <4 x float> %tmp2, <4 x float> %tmp0, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 3243 %res2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 3244 %res3 = shufflevector <4 x float> %tmp3, <4 x float> %tmp1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 3245 store <4 x float> %res0, <4 x float>* %a0, align 16 3246 store <4 x float> %res1, <4 x float>* %a1, align 16 3247 store <4 x float> %res2, <4 x float>* %a2, align 16 3248 store <4 x float> %res3, <4 x float>* %a3, align 16 3249 ret void 3250} 3251 3252define i32 @test_mm_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3253; SSE-LABEL: test_mm_ucomieq_ss: 3254; SSE: # %bb.0: 3255; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3256; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3257; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3258; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3259; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3260; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3261; 3262; AVX1-LABEL: test_mm_ucomieq_ss: 3263; AVX1: # %bb.0: 3264; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3265; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3266; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3267; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3268; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3269; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3270; 3271; AVX512-LABEL: test_mm_ucomieq_ss: 3272; AVX512: # %bb.0: 3273; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3274; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3275; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3276; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3277; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3278; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3279 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 3280 ret i32 %res 3281} 3282declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 3283 3284define i32 @test_mm_ucomige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3285; SSE-LABEL: test_mm_ucomige_ss: 3286; SSE: # %bb.0: 3287; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3288; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3289; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3290; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3291; 3292; AVX1-LABEL: test_mm_ucomige_ss: 3293; AVX1: # %bb.0: 3294; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3295; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3296; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3297; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3298; 3299; AVX512-LABEL: test_mm_ucomige_ss: 3300; AVX512: # %bb.0: 3301; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3302; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3303; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3304; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3305 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) 3306 ret i32 %res 3307} 3308declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 3309 3310define i32 @test_mm_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3311; SSE-LABEL: test_mm_ucomigt_ss: 3312; SSE: # %bb.0: 3313; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3314; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3315; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3316; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3317; 3318; AVX1-LABEL: test_mm_ucomigt_ss: 3319; AVX1: # %bb.0: 3320; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3321; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3322; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3323; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3324; 3325; AVX512-LABEL: test_mm_ucomigt_ss: 3326; AVX512: # %bb.0: 3327; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3328; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3329; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3330; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3331 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) 3332 ret i32 %res 3333} 3334declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 3335 3336define i32 @test_mm_ucomile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3337; SSE-LABEL: test_mm_ucomile_ss: 3338; SSE: # %bb.0: 3339; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3340; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] 3341; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3342; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3343; 3344; AVX1-LABEL: test_mm_ucomile_ss: 3345; AVX1: # %bb.0: 3346; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3347; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] 3348; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3349; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3350; 3351; AVX512-LABEL: test_mm_ucomile_ss: 3352; AVX512: # %bb.0: 3353; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3354; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] 3355; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3356; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3357 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) 3358 ret i32 %res 3359} 3360declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 3361 3362define i32 @test_mm_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3363; SSE-LABEL: test_mm_ucomilt_ss: 3364; SSE: # %bb.0: 3365; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3366; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] 3367; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3368; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3369; 3370; AVX1-LABEL: test_mm_ucomilt_ss: 3371; AVX1: # %bb.0: 3372; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3373; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] 3374; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3375; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3376; 3377; AVX512-LABEL: test_mm_ucomilt_ss: 3378; AVX512: # %bb.0: 3379; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3380; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] 3381; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3382; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3383 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) 3384 ret i32 %res 3385} 3386declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 3387 3388define i32 @test_mm_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3389; SSE-LABEL: test_mm_ucomineq_ss: 3390; SSE: # %bb.0: 3391; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3392; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3393; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3394; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3395; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3396; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3397; 3398; AVX1-LABEL: test_mm_ucomineq_ss: 3399; AVX1: # %bb.0: 3400; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3401; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3402; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3403; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3404; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3405; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3406; 3407; AVX512-LABEL: test_mm_ucomineq_ss: 3408; AVX512: # %bb.0: 3409; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3410; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3411; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3412; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3413; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3414; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3415 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) 3416 ret i32 %res 3417} 3418declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 3419 3420define <4 x float> @test_mm_undefined_ps() { 3421; CHECK-LABEL: test_mm_undefined_ps: 3422; CHECK: # %bb.0: 3423; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3424 ret <4 x float> undef 3425} 3426 3427define <4 x float> @test_mm_unpackhi_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3428; SSE-LABEL: test_mm_unpackhi_ps: 3429; SSE: # %bb.0: 3430; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3431; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3432; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3433; 3434; AVX1-LABEL: test_mm_unpackhi_ps: 3435; AVX1: # %bb.0: 3436; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3437; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3438; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3439; 3440; AVX512-LABEL: test_mm_unpackhi_ps: 3441; AVX512: # %bb.0: 3442; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3443; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3444; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3445 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3446 ret <4 x float> %res 3447} 3448 3449define <4 x float> @test_mm_unpacklo_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3450; SSE-LABEL: test_mm_unpacklo_ps: 3451; SSE: # %bb.0: 3452; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 3453; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3454; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3455; 3456; AVX1-LABEL: test_mm_unpacklo_ps: 3457; AVX1: # %bb.0: 3458; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1] 3459; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3460; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3461; 3462; AVX512-LABEL: test_mm_unpacklo_ps: 3463; AVX512: # %bb.0: 3464; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] 3465; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3466; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3467 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3468 ret <4 x float> %res 3469} 3470 3471define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3472; SSE-LABEL: test_mm_xor_ps: 3473; SSE: # %bb.0: 3474; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] 3475; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3476; 3477; AVX1-LABEL: test_mm_xor_ps: 3478; AVX1: # %bb.0: 3479; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 3480; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3481; 3482; AVX512-LABEL: test_mm_xor_ps: 3483; AVX512: # %bb.0: 3484; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 3485; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3486 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 3487 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 3488 %res = xor <4 x i32> %arg0, %arg1 3489 %bc = bitcast <4 x i32> %res to <4 x float> 3490 ret <4 x float> %bc 3491} 3492 3493!0 = !{i32 1} 3494