1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE 3; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 4; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 5; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE 6; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 7; RUN: llc -show-mc-encoding -fast-isel-sink-local-values < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 8 9; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse-builtins.c 10 11define <4 x float> @test_mm_add_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 12; SSE-LABEL: test_mm_add_ps: 13; SSE: # %bb.0: 14; SSE-NEXT: addps %xmm1, %xmm0 # encoding: [0x0f,0x58,0xc1] 15; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 16; 17; AVX1-LABEL: test_mm_add_ps: 18; AVX1: # %bb.0: 19; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x58,0xc1] 20; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 21; 22; AVX512-LABEL: test_mm_add_ps: 23; AVX512: # %bb.0: 24; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1] 25; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 26 %res = fadd <4 x float> %a0, %a1 27 ret <4 x float> %res 28} 29 30define <4 x float> @test_mm_add_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 31; SSE-LABEL: test_mm_add_ss: 32; SSE: # %bb.0: 33; SSE-NEXT: addss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x58,0xc1] 34; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 35; 36; AVX1-LABEL: test_mm_add_ss: 37; AVX1: # %bb.0: 38; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x58,0xc1] 39; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 40; 41; AVX512-LABEL: test_mm_add_ss: 42; AVX512: # %bb.0: 43; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x58,0xc1] 44; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 45 %ext0 = extractelement <4 x float> %a0, i32 0 46 %ext1 = extractelement <4 x float> %a1, i32 0 47 %fadd = fadd float %ext0, %ext1 48 %res = insertelement <4 x float> %a0, float %fadd, i32 0 49 ret <4 x float> %res 50} 51 52define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 53; SSE-LABEL: test_mm_and_ps: 54; SSE: # %bb.0: 55; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] 56; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 57; 58; AVX1-LABEL: test_mm_and_ps: 59; AVX1: # %bb.0: 60; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] 61; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 62; 63; AVX512-LABEL: test_mm_and_ps: 64; AVX512: # %bb.0: 65; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] 66; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 67 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 68 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 69 %res = and <4 x i32> %arg0, %arg1 70 %bc = bitcast <4 x i32> %res to <4 x float> 71 ret <4 x float> %bc 72} 73 74define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 75; SSE-LABEL: test_mm_andnot_ps: 76; SSE: # %bb.0: 77; SSE-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] 78; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 79; 80; AVX1-LABEL: test_mm_andnot_ps: 81; AVX1: # %bb.0: 82; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] 83; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2] 84; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1] 85; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 86; 87; AVX512-LABEL: test_mm_andnot_ps: 88; AVX512: # %bb.0: 89; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f] 90; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1] 91; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 92 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 93 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 94 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> 95 %res = and <4 x i32> %not, %arg1 96 %bc = bitcast <4 x i32> %res to <4 x float> 97 ret <4 x float> %bc 98} 99 100define <4 x float> @test_mm_cmpeq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 101; SSE-LABEL: test_mm_cmpeq_ps: 102; SSE: # %bb.0: 103; SSE-NEXT: cmpeqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x00] 104; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 105; 106; AVX1-LABEL: test_mm_cmpeq_ps: 107; AVX1: # %bb.0: 108; AVX1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x00] 109; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 110; 111; AVX512-LABEL: test_mm_cmpeq_ps: 112; AVX512: # %bb.0: 113; AVX512-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x00] 114; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 115; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 116 %cmp = fcmp oeq <4 x float> %a0, %a1 117 %sext = sext <4 x i1> %cmp to <4 x i32> 118 %res = bitcast <4 x i32> %sext to <4 x float> 119 ret <4 x float> %res 120} 121 122define <4 x float> @test_mm_cmpeq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 123; SSE-LABEL: test_mm_cmpeq_ss: 124; SSE: # %bb.0: 125; SSE-NEXT: cmpeqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x00] 126; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 127; 128; AVX-LABEL: test_mm_cmpeq_ss: 129; AVX: # %bb.0: 130; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x00] 131; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 132 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 133 ret <4 x float> %res 134} 135declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 136 137define <4 x float> @test_mm_cmpge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 138; SSE-LABEL: test_mm_cmpge_ps: 139; SSE: # %bb.0: 140; SSE-NEXT: cmpleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x02] 141; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 142; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 143; 144; AVX1-LABEL: test_mm_cmpge_ps: 145; AVX1: # %bb.0: 146; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x02] 147; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 148; 149; AVX512-LABEL: test_mm_cmpge_ps: 150; AVX512: # %bb.0: 151; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x02] 152; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 153; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 154 %cmp = fcmp ole <4 x float> %a1, %a0 155 %sext = sext <4 x i1> %cmp to <4 x i32> 156 %res = bitcast <4 x i32> %sext to <4 x float> 157 ret <4 x float> %res 158} 159 160define <4 x float> @test_mm_cmpge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 161; SSE-LABEL: test_mm_cmpge_ss: 162; SSE: # %bb.0: 163; SSE-NEXT: cmpless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x02] 164; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 165; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 166; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 167; 168; AVX-LABEL: test_mm_cmpge_ss: 169; AVX: # %bb.0: 170; AVX-NEXT: vcmpless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x02] 171; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 172; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 173; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 174 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 2) 175 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 176 ret <4 x float> %res 177} 178 179define <4 x float> @test_mm_cmpgt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 180; SSE-LABEL: test_mm_cmpgt_ps: 181; SSE: # %bb.0: 182; SSE-NEXT: cmpltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x01] 183; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 184; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 185; 186; AVX1-LABEL: test_mm_cmpgt_ps: 187; AVX1: # %bb.0: 188; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x01] 189; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 190; 191; AVX512-LABEL: test_mm_cmpgt_ps: 192; AVX512: # %bb.0: 193; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x01] 194; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 195; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 196 %cmp = fcmp olt <4 x float> %a1, %a0 197 %sext = sext <4 x i1> %cmp to <4 x i32> 198 %res = bitcast <4 x i32> %sext to <4 x float> 199 ret <4 x float> %res 200} 201 202define <4 x float> @test_mm_cmpgt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 203; SSE-LABEL: test_mm_cmpgt_ss: 204; SSE: # %bb.0: 205; SSE-NEXT: cmpltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x01] 206; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 207; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 208; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 209; 210; AVX-LABEL: test_mm_cmpgt_ss: 211; AVX: # %bb.0: 212; AVX-NEXT: vcmpltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x01] 213; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 214; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 215; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 216 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 1) 217 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 218 ret <4 x float> %res 219} 220 221define <4 x float> @test_mm_cmple_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 222; SSE-LABEL: test_mm_cmple_ps: 223; SSE: # %bb.0: 224; SSE-NEXT: cmpleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x02] 225; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 226; 227; AVX1-LABEL: test_mm_cmple_ps: 228; AVX1: # %bb.0: 229; AVX1-NEXT: vcmpleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x02] 230; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 231; 232; AVX512-LABEL: test_mm_cmple_ps: 233; AVX512: # %bb.0: 234; AVX512-NEXT: vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02] 235; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 236; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 237 %cmp = fcmp ole <4 x float> %a0, %a1 238 %sext = sext <4 x i1> %cmp to <4 x i32> 239 %res = bitcast <4 x i32> %sext to <4 x float> 240 ret <4 x float> %res 241} 242 243define <4 x float> @test_mm_cmple_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 244; SSE-LABEL: test_mm_cmple_ss: 245; SSE: # %bb.0: 246; SSE-NEXT: cmpless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x02] 247; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 248; 249; AVX-LABEL: test_mm_cmple_ss: 250; AVX: # %bb.0: 251; AVX-NEXT: vcmpless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x02] 252; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 253 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 2) 254 ret <4 x float> %res 255} 256 257define <4 x float> @test_mm_cmplt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 258; SSE-LABEL: test_mm_cmplt_ps: 259; SSE: # %bb.0: 260; SSE-NEXT: cmpltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x01] 261; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 262; 263; AVX1-LABEL: test_mm_cmplt_ps: 264; AVX1: # %bb.0: 265; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x01] 266; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 267; 268; AVX512-LABEL: test_mm_cmplt_ps: 269; AVX512: # %bb.0: 270; AVX512-NEXT: vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01] 271; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 272; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 273 %cmp = fcmp olt <4 x float> %a0, %a1 274 %sext = sext <4 x i1> %cmp to <4 x i32> 275 %res = bitcast <4 x i32> %sext to <4 x float> 276 ret <4 x float> %res 277} 278 279define <4 x float> @test_mm_cmplt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 280; SSE-LABEL: test_mm_cmplt_ss: 281; SSE: # %bb.0: 282; SSE-NEXT: cmpltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x01] 283; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 284; 285; AVX-LABEL: test_mm_cmplt_ss: 286; AVX: # %bb.0: 287; AVX-NEXT: vcmpltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x01] 288; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 289 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 1) 290 ret <4 x float> %res 291} 292 293define <4 x float> @test_mm_cmpneq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 294; SSE-LABEL: test_mm_cmpneq_ps: 295; SSE: # %bb.0: 296; SSE-NEXT: cmpneqps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x04] 297; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 298; 299; AVX1-LABEL: test_mm_cmpneq_ps: 300; AVX1: # %bb.0: 301; AVX1-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x04] 302; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 303; 304; AVX512-LABEL: test_mm_cmpneq_ps: 305; AVX512: # %bb.0: 306; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x04] 307; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 308; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 309 %cmp = fcmp une <4 x float> %a0, %a1 310 %sext = sext <4 x i1> %cmp to <4 x i32> 311 %res = bitcast <4 x i32> %sext to <4 x float> 312 ret <4 x float> %res 313} 314 315define <4 x float> @test_mm_cmpneq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 316; SSE-LABEL: test_mm_cmpneq_ss: 317; SSE: # %bb.0: 318; SSE-NEXT: cmpneqss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x04] 319; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 320; 321; AVX-LABEL: test_mm_cmpneq_ss: 322; AVX: # %bb.0: 323; AVX-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x04] 324; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 325 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 4) 326 ret <4 x float> %res 327} 328 329define <4 x float> @test_mm_cmpnge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 330; SSE-LABEL: test_mm_cmpnge_ps: 331; SSE: # %bb.0: 332; SSE-NEXT: cmpnleps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x06] 333; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 334; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 335; 336; AVX1-LABEL: test_mm_cmpnge_ps: 337; AVX1: # %bb.0: 338; AVX1-NEXT: vcmpnleps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x06] 339; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 340; 341; AVX512-LABEL: test_mm_cmpnge_ps: 342; AVX512: # %bb.0: 343; AVX512-NEXT: vcmpnleps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x06] 344; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 345; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 346 %cmp = fcmp ugt <4 x float> %a1, %a0 347 %sext = sext <4 x i1> %cmp to <4 x i32> 348 %res = bitcast <4 x i32> %sext to <4 x float> 349 ret <4 x float> %res 350} 351 352define <4 x float> @test_mm_cmpnge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 353; SSE-LABEL: test_mm_cmpnge_ss: 354; SSE: # %bb.0: 355; SSE-NEXT: cmpnless %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x06] 356; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 357; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 358; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 359; 360; AVX-LABEL: test_mm_cmpnge_ss: 361; AVX: # %bb.0: 362; AVX-NEXT: vcmpnless %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x06] 363; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 364; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 365; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 366 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 6) 367 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 368 ret <4 x float> %res 369} 370 371define <4 x float> @test_mm_cmpngt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 372; SSE-LABEL: test_mm_cmpngt_ps: 373; SSE: # %bb.0: 374; SSE-NEXT: cmpnltps %xmm0, %xmm1 # encoding: [0x0f,0xc2,0xc8,0x05] 375; SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 376; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 377; 378; AVX1-LABEL: test_mm_cmpngt_ps: 379; AVX1: # %bb.0: 380; AVX1-NEXT: vcmpnltps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0xc2,0xc0,0x05] 381; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 382; 383; AVX512-LABEL: test_mm_cmpngt_ps: 384; AVX512: # %bb.0: 385; AVX512-NEXT: vcmpnltps %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x74,0x08,0xc2,0xc0,0x05] 386; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 387; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 388 %cmp = fcmp uge <4 x float> %a1, %a0 389 %sext = sext <4 x i1> %cmp to <4 x i32> 390 %res = bitcast <4 x i32> %sext to <4 x float> 391 ret <4 x float> %res 392} 393 394define <4 x float> @test_mm_cmpngt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 395; SSE-LABEL: test_mm_cmpngt_ss: 396; SSE: # %bb.0: 397; SSE-NEXT: cmpnltss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0xc2,0xc8,0x05] 398; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 399; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 400; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 401; 402; AVX-LABEL: test_mm_cmpngt_ss: 403; AVX: # %bb.0: 404; AVX-NEXT: vcmpnltss %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf2,0xc2,0xc8,0x05] 405; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 406; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 407; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 408 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 5) 409 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 410 ret <4 x float> %res 411} 412 413define <4 x float> @test_mm_cmpnle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 414; SSE-LABEL: test_mm_cmpnle_ps: 415; SSE: # %bb.0: 416; SSE-NEXT: cmpnleps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x06] 417; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 418; 419; AVX1-LABEL: test_mm_cmpnle_ps: 420; AVX1: # %bb.0: 421; AVX1-NEXT: vcmpnleps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x06] 422; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 423; 424; AVX512-LABEL: test_mm_cmpnle_ps: 425; AVX512: # %bb.0: 426; AVX512-NEXT: vcmpnleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x06] 427; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 428; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 429 %cmp = fcmp ugt <4 x float> %a0, %a1 430 %sext = sext <4 x i1> %cmp to <4 x i32> 431 %res = bitcast <4 x i32> %sext to <4 x float> 432 ret <4 x float> %res 433} 434 435define <4 x float> @test_mm_cmpnle_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 436; SSE-LABEL: test_mm_cmpnle_ss: 437; SSE: # %bb.0: 438; SSE-NEXT: cmpnless %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x06] 439; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 440; 441; AVX-LABEL: test_mm_cmpnle_ss: 442; AVX: # %bb.0: 443; AVX-NEXT: vcmpnless %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x06] 444; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 445 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 6) 446 ret <4 x float> %res 447} 448 449define <4 x float> @test_mm_cmpnlt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 450; SSE-LABEL: test_mm_cmpnlt_ps: 451; SSE: # %bb.0: 452; SSE-NEXT: cmpnltps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x05] 453; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 454; 455; AVX1-LABEL: test_mm_cmpnlt_ps: 456; AVX1: # %bb.0: 457; AVX1-NEXT: vcmpnltps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x05] 458; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 459; 460; AVX512-LABEL: test_mm_cmpnlt_ps: 461; AVX512: # %bb.0: 462; AVX512-NEXT: vcmpnltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x05] 463; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 464; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 465 %cmp = fcmp uge <4 x float> %a0, %a1 466 %sext = sext <4 x i1> %cmp to <4 x i32> 467 %res = bitcast <4 x i32> %sext to <4 x float> 468 ret <4 x float> %res 469} 470 471define <4 x float> @test_mm_cmpnlt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 472; SSE-LABEL: test_mm_cmpnlt_ss: 473; SSE: # %bb.0: 474; SSE-NEXT: cmpnltss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x05] 475; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 476; 477; AVX-LABEL: test_mm_cmpnlt_ss: 478; AVX: # %bb.0: 479; AVX-NEXT: vcmpnltss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x05] 480; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 481 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 5) 482 ret <4 x float> %res 483} 484 485define <4 x float> @test_mm_cmpord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 486; SSE-LABEL: test_mm_cmpord_ps: 487; SSE: # %bb.0: 488; SSE-NEXT: cmpordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x07] 489; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 490; 491; AVX1-LABEL: test_mm_cmpord_ps: 492; AVX1: # %bb.0: 493; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x07] 494; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 495; 496; AVX512-LABEL: test_mm_cmpord_ps: 497; AVX512: # %bb.0: 498; AVX512-NEXT: vcmpordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x07] 499; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 500; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 501 %cmp = fcmp ord <4 x float> %a0, %a1 502 %sext = sext <4 x i1> %cmp to <4 x i32> 503 %res = bitcast <4 x i32> %sext to <4 x float> 504 ret <4 x float> %res 505} 506 507define <4 x float> @test_mm_cmpord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 508; SSE-LABEL: test_mm_cmpord_ss: 509; SSE: # %bb.0: 510; SSE-NEXT: cmpordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x07] 511; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 512; 513; AVX-LABEL: test_mm_cmpord_ss: 514; AVX: # %bb.0: 515; AVX-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x07] 516; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 517 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) 518 ret <4 x float> %res 519} 520 521define <4 x float> @test_mm_cmpunord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 522; SSE-LABEL: test_mm_cmpunord_ps: 523; SSE: # %bb.0: 524; SSE-NEXT: cmpunordps %xmm1, %xmm0 # encoding: [0x0f,0xc2,0xc1,0x03] 525; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 526; 527; AVX1-LABEL: test_mm_cmpunord_ps: 528; AVX1: # %bb.0: 529; AVX1-NEXT: vcmpunordps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc2,0xc1,0x03] 530; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 531; 532; AVX512-LABEL: test_mm_cmpunord_ps: 533; AVX512: # %bb.0: 534; AVX512-NEXT: vcmpunordps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x03] 535; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] 536; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 537 %cmp = fcmp uno <4 x float> %a0, %a1 538 %sext = sext <4 x i1> %cmp to <4 x i32> 539 %res = bitcast <4 x i32> %sext to <4 x float> 540 ret <4 x float> %res 541} 542 543define <4 x float> @test_mm_cmpunord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 544; SSE-LABEL: test_mm_cmpunord_ss: 545; SSE: # %bb.0: 546; SSE-NEXT: cmpunordss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0xc2,0xc1,0x03] 547; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 548; 549; AVX-LABEL: test_mm_cmpunord_ss: 550; AVX: # %bb.0: 551; AVX-NEXT: vcmpunordss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xc2,0xc1,0x03] 552; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 553 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 3) 554 ret <4 x float> %res 555} 556 557define i32 @test_mm_comieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 558; SSE-LABEL: test_mm_comieq_ss: 559; SSE: # %bb.0: 560; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 561; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 562; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 563; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 564; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 565; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 566; 567; AVX1-LABEL: test_mm_comieq_ss: 568; AVX1: # %bb.0: 569; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 570; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 571; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 572; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 573; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 574; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 575; 576; AVX512-LABEL: test_mm_comieq_ss: 577; AVX512: # %bb.0: 578; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 579; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 580; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 581; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 582; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 583; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 584 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 585 ret i32 %res 586} 587declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 588 589define i32 @test_mm_comige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 590; SSE-LABEL: test_mm_comige_ss: 591; SSE: # %bb.0: 592; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 593; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 594; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 595; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 596; 597; AVX1-LABEL: test_mm_comige_ss: 598; AVX1: # %bb.0: 599; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 600; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 601; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 602; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 603; 604; AVX512-LABEL: test_mm_comige_ss: 605; AVX512: # %bb.0: 606; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 607; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 608; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 609; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 610 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) 611 ret i32 %res 612} 613declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 614 615define i32 @test_mm_comigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 616; SSE-LABEL: test_mm_comigt_ss: 617; SSE: # %bb.0: 618; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 619; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 620; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 621; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 622; 623; AVX1-LABEL: test_mm_comigt_ss: 624; AVX1: # %bb.0: 625; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 626; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 627; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 628; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 629; 630; AVX512-LABEL: test_mm_comigt_ss: 631; AVX512: # %bb.0: 632; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 633; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 634; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 635; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 636 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) 637 ret i32 %res 638} 639declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 640 641define i32 @test_mm_comile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 642; SSE-LABEL: test_mm_comile_ss: 643; SSE: # %bb.0: 644; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 645; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] 646; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 647; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 648; 649; AVX1-LABEL: test_mm_comile_ss: 650; AVX1: # %bb.0: 651; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 652; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] 653; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 654; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 655; 656; AVX512-LABEL: test_mm_comile_ss: 657; AVX512: # %bb.0: 658; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 659; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] 660; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 661; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 662 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) 663 ret i32 %res 664} 665declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 666 667define i32 @test_mm_comilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 668; SSE-LABEL: test_mm_comilt_ss: 669; SSE: # %bb.0: 670; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 671; SSE-NEXT: comiss %xmm0, %xmm1 # encoding: [0x0f,0x2f,0xc8] 672; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 673; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 674; 675; AVX1-LABEL: test_mm_comilt_ss: 676; AVX1: # %bb.0: 677; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 678; AVX1-NEXT: vcomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2f,0xc8] 679; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 680; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 681; 682; AVX512-LABEL: test_mm_comilt_ss: 683; AVX512: # %bb.0: 684; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 685; AVX512-NEXT: vcomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc8] 686; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 687; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 688 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) 689 ret i32 %res 690} 691declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 692 693define i32 @test_mm_comineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 694; SSE-LABEL: test_mm_comineq_ss: 695; SSE: # %bb.0: 696; SSE-NEXT: comiss %xmm1, %xmm0 # encoding: [0x0f,0x2f,0xc1] 697; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 698; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 699; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 700; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 701; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 702; 703; AVX1-LABEL: test_mm_comineq_ss: 704; AVX1: # %bb.0: 705; AVX1-NEXT: vcomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2f,0xc1] 706; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 707; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 708; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 709; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 710; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 711; 712; AVX512-LABEL: test_mm_comineq_ss: 713; AVX512: # %bb.0: 714; AVX512-NEXT: vcomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2f,0xc1] 715; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 716; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 717; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 718; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 719; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 720 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) 721 ret i32 %res 722} 723declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 724 725define i32 @test_mm_cvt_ss2si(<4 x float> %a0) nounwind { 726; SSE-LABEL: test_mm_cvt_ss2si: 727; SSE: # %bb.0: 728; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] 729; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 730; 731; AVX1-LABEL: test_mm_cvt_ss2si: 732; AVX1: # %bb.0: 733; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] 734; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 735; 736; AVX512-LABEL: test_mm_cvt_ss2si: 737; AVX512: # %bb.0: 738; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] 739; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 740 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 741 ret i32 %res 742} 743declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 744 745define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind { 746; X86-SSE-LABEL: test_mm_cvtsi32_ss: 747; X86-SSE: # %bb.0: 748; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x2a,0x44,0x24,0x04] 749; X86-SSE-NEXT: retl # encoding: [0xc3] 750; 751; X86-AVX1-LABEL: test_mm_cvtsi32_ss: 752; X86-AVX1: # %bb.0: 753; X86-AVX1-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 754; X86-AVX1-NEXT: retl # encoding: [0xc3] 755; 756; X86-AVX512-LABEL: test_mm_cvtsi32_ss: 757; X86-AVX512: # %bb.0: 758; X86-AVX512-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0x44,0x24,0x04] 759; X86-AVX512-NEXT: retl # encoding: [0xc3] 760; 761; X64-SSE-LABEL: test_mm_cvtsi32_ss: 762; X64-SSE: # %bb.0: 763; X64-SSE-NEXT: cvtsi2ss %edi, %xmm0 # encoding: [0xf3,0x0f,0x2a,0xc7] 764; X64-SSE-NEXT: retq # encoding: [0xc3] 765; 766; X64-AVX1-LABEL: test_mm_cvtsi32_ss: 767; X64-AVX1: # %bb.0: 768; X64-AVX1-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x2a,0xc7] 769; X64-AVX1-NEXT: retq # encoding: [0xc3] 770; 771; X64-AVX512-LABEL: test_mm_cvtsi32_ss: 772; X64-AVX512: # %bb.0: 773; X64-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2a,0xc7] 774; X64-AVX512-NEXT: retq # encoding: [0xc3] 775 %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 %a1) 776 ret <4 x float> %res 777} 778declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone 779 780define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind { 781; X86-SSE-LABEL: test_mm_cvtss_f32: 782; X86-SSE: # %bb.0: 783; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 784; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] 785; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 786; X86-SSE-NEXT: popl %eax # encoding: [0x58] 787; X86-SSE-NEXT: retl # encoding: [0xc3] 788; 789; X86-AVX1-LABEL: test_mm_cvtss_f32: 790; X86-AVX1: # %bb.0: 791; X86-AVX1-NEXT: pushl %eax # encoding: [0x50] 792; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] 793; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 794; X86-AVX1-NEXT: popl %eax # encoding: [0x58] 795; X86-AVX1-NEXT: retl # encoding: [0xc3] 796; 797; X86-AVX512-LABEL: test_mm_cvtss_f32: 798; X86-AVX512: # %bb.0: 799; X86-AVX512-NEXT: pushl %eax # encoding: [0x50] 800; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] 801; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 802; X86-AVX512-NEXT: popl %eax # encoding: [0x58] 803; X86-AVX512-NEXT: retl # encoding: [0xc3] 804; 805; X64-LABEL: test_mm_cvtss_f32: 806; X64: # %bb.0: 807; X64-NEXT: retq # encoding: [0xc3] 808 %res = extractelement <4 x float> %a0, i32 0 809 ret float %res 810} 811 812define i32 @test_mm_cvtss_si32(<4 x float> %a0) nounwind { 813; SSE-LABEL: test_mm_cvtss_si32: 814; SSE: # %bb.0: 815; SSE-NEXT: cvtss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2d,0xc0] 816; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 817; 818; AVX1-LABEL: test_mm_cvtss_si32: 819; AVX1: # %bb.0: 820; AVX1-NEXT: vcvtss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2d,0xc0] 821; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 822; 823; AVX512-LABEL: test_mm_cvtss_si32: 824; AVX512: # %bb.0: 825; AVX512-NEXT: vcvtss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2d,0xc0] 826; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 827 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 828 ret i32 %res 829} 830 831define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind { 832; SSE-LABEL: test_mm_cvttss_si: 833; SSE: # %bb.0: 834; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] 835; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 836; 837; AVX1-LABEL: test_mm_cvttss_si: 838; AVX1: # %bb.0: 839; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] 840; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 841; 842; AVX512-LABEL: test_mm_cvttss_si: 843; AVX512: # %bb.0: 844; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] 845; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 846 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 847 ret i32 %res 848} 849declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone 850 851define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind { 852; SSE-LABEL: test_mm_cvttss_si32: 853; SSE: # %bb.0: 854; SSE-NEXT: cvttss2si %xmm0, %eax # encoding: [0xf3,0x0f,0x2c,0xc0] 855; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 856; 857; AVX1-LABEL: test_mm_cvttss_si32: 858; AVX1: # %bb.0: 859; AVX1-NEXT: vcvttss2si %xmm0, %eax # encoding: [0xc5,0xfa,0x2c,0xc0] 860; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 861; 862; AVX512-LABEL: test_mm_cvttss_si32: 863; AVX512: # %bb.0: 864; AVX512-NEXT: vcvttss2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x2c,0xc0] 865; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 866 %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) 867 ret i32 %res 868} 869 870define <4 x float> @test_mm_div_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 871; SSE-LABEL: test_mm_div_ps: 872; SSE: # %bb.0: 873; SSE-NEXT: divps %xmm1, %xmm0 # encoding: [0x0f,0x5e,0xc1] 874; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 875; 876; AVX1-LABEL: test_mm_div_ps: 877; AVX1: # %bb.0: 878; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5e,0xc1] 879; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 880; 881; AVX512-LABEL: test_mm_div_ps: 882; AVX512: # %bb.0: 883; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1] 884; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 885 %res = fdiv <4 x float> %a0, %a1 886 ret <4 x float> %res 887} 888 889define <4 x float> @test_mm_div_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 890; SSE-LABEL: test_mm_div_ss: 891; SSE: # %bb.0: 892; SSE-NEXT: divss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5e,0xc1] 893; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 894; 895; AVX1-LABEL: test_mm_div_ss: 896; AVX1: # %bb.0: 897; AVX1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5e,0xc1] 898; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 899; 900; AVX512-LABEL: test_mm_div_ss: 901; AVX512: # %bb.0: 902; AVX512-NEXT: vdivss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5e,0xc1] 903; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 904 %ext0 = extractelement <4 x float> %a0, i32 0 905 %ext1 = extractelement <4 x float> %a1, i32 0 906 %fdiv = fdiv float %ext0, %ext1 907 %res = insertelement <4 x float> %a0, float %fdiv, i32 0 908 ret <4 x float> %res 909} 910 911define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { 912; X86-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: 913; X86-SSE: # %bb.0: 914; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 915; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 916; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 917; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 918; X86-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 919; X86-SSE-NEXT: # imm = 0x1F80 920; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 921; X86-SSE-NEXT: retl # encoding: [0xc3] 922; 923; X86-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: 924; X86-AVX: # %bb.0: 925; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 926; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 927; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 928; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 929; X86-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 930; X86-AVX-NEXT: # imm = 0x1F80 931; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 932; X86-AVX-NEXT: retl # encoding: [0xc3] 933; 934; X64-SSE-LABEL: test_MM_GET_EXCEPTION_MASK: 935; X64-SSE: # %bb.0: 936; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 937; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 938; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 939; X64-SSE-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 940; X64-SSE-NEXT: # imm = 0x1F80 941; X64-SSE-NEXT: retq # encoding: [0xc3] 942; 943; X64-AVX-LABEL: test_MM_GET_EXCEPTION_MASK: 944; X64-AVX: # %bb.0: 945; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 946; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 947; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 948; X64-AVX-NEXT: andl $8064, %eax # encoding: [0x25,0x80,0x1f,0x00,0x00] 949; X64-AVX-NEXT: # imm = 0x1F80 950; X64-AVX-NEXT: retq # encoding: [0xc3] 951 %1 = alloca i32, align 4 952 %2 = bitcast i32* %1 to i8* 953 call void @llvm.x86.sse.stmxcsr(i8* %2) 954 %3 = load i32, i32* %1, align 4 955 %4 = and i32 %3, 8064 956 ret i32 %4 957} 958declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone 959 960define i32 @test_MM_GET_EXCEPTION_STATE() nounwind { 961; X86-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: 962; X86-SSE: # %bb.0: 963; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 964; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 965; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 966; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 967; X86-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 968; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 969; X86-SSE-NEXT: retl # encoding: [0xc3] 970; 971; X86-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: 972; X86-AVX: # %bb.0: 973; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 974; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 975; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 976; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 977; X86-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 978; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 979; X86-AVX-NEXT: retl # encoding: [0xc3] 980; 981; X64-SSE-LABEL: test_MM_GET_EXCEPTION_STATE: 982; X64-SSE: # %bb.0: 983; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 984; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 985; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 986; X64-SSE-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 987; X64-SSE-NEXT: retq # encoding: [0xc3] 988; 989; X64-AVX-LABEL: test_MM_GET_EXCEPTION_STATE: 990; X64-AVX: # %bb.0: 991; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 992; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 993; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 994; X64-AVX-NEXT: andl $63, %eax # encoding: [0x83,0xe0,0x3f] 995; X64-AVX-NEXT: retq # encoding: [0xc3] 996 %1 = alloca i32, align 4 997 %2 = bitcast i32* %1 to i8* 998 call void @llvm.x86.sse.stmxcsr(i8* %2) 999 %3 = load i32, i32* %1, align 4 1000 %4 = and i32 %3, 63 1001 ret i32 %4 1002} 1003 1004define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { 1005; X86-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1006; X86-SSE: # %bb.0: 1007; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1008; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1009; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1010; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1011; X86-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1012; X86-SSE-NEXT: # imm = 0x8000 1013; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1014; X86-SSE-NEXT: retl # encoding: [0xc3] 1015; 1016; X86-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1017; X86-AVX: # %bb.0: 1018; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1019; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1020; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1021; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1022; X86-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1023; X86-AVX-NEXT: # imm = 0x8000 1024; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1025; X86-AVX-NEXT: retl # encoding: [0xc3] 1026; 1027; X64-SSE-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1028; X64-SSE: # %bb.0: 1029; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1030; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1031; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1032; X64-SSE-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1033; X64-SSE-NEXT: # imm = 0x8000 1034; X64-SSE-NEXT: retq # encoding: [0xc3] 1035; 1036; X64-AVX-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 1037; X64-AVX: # %bb.0: 1038; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1039; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1040; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1041; X64-AVX-NEXT: andl $32768, %eax # encoding: [0x25,0x00,0x80,0x00,0x00] 1042; X64-AVX-NEXT: # imm = 0x8000 1043; X64-AVX-NEXT: retq # encoding: [0xc3] 1044 %1 = alloca i32, align 4 1045 %2 = bitcast i32* %1 to i8* 1046 call void @llvm.x86.sse.stmxcsr(i8* %2) 1047 %3 = load i32, i32* %1, align 4 1048 %4 = and i32 %3, 32768 1049 ret i32 %4 1050} 1051 1052define i32 @test_MM_GET_ROUNDING_MODE() nounwind { 1053; X86-SSE-LABEL: test_MM_GET_ROUNDING_MODE: 1054; X86-SSE: # %bb.0: 1055; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1056; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1057; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1058; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1059; X86-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1060; X86-SSE-NEXT: # imm = 0x6000 1061; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1062; X86-SSE-NEXT: retl # encoding: [0xc3] 1063; 1064; X86-AVX-LABEL: test_MM_GET_ROUNDING_MODE: 1065; X86-AVX: # %bb.0: 1066; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1067; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1068; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1069; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1070; X86-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1071; X86-AVX-NEXT: # imm = 0x6000 1072; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1073; X86-AVX-NEXT: retl # encoding: [0xc3] 1074; 1075; X64-SSE-LABEL: test_MM_GET_ROUNDING_MODE: 1076; X64-SSE: # %bb.0: 1077; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1078; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1079; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1080; X64-SSE-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1081; X64-SSE-NEXT: # imm = 0x6000 1082; X64-SSE-NEXT: retq # encoding: [0xc3] 1083; 1084; X64-AVX-LABEL: test_MM_GET_ROUNDING_MODE: 1085; X64-AVX: # %bb.0: 1086; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1087; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1088; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1089; X64-AVX-NEXT: andl $24576, %eax # encoding: [0x25,0x00,0x60,0x00,0x00] 1090; X64-AVX-NEXT: # imm = 0x6000 1091; X64-AVX-NEXT: retq # encoding: [0xc3] 1092 %1 = alloca i32, align 4 1093 %2 = bitcast i32* %1 to i8* 1094 call void @llvm.x86.sse.stmxcsr(i8* %2) 1095 %3 = load i32, i32* %1, align 4 1096 %4 = and i32 %3, 24576 1097 ret i32 %4 1098} 1099 1100define i32 @test_mm_getcsr() nounwind { 1101; X86-SSE-LABEL: test_mm_getcsr: 1102; X86-SSE: # %bb.0: 1103; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1104; X86-SSE-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1105; X86-SSE-NEXT: stmxcsr (%eax) # encoding: [0x0f,0xae,0x18] 1106; X86-SSE-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1107; X86-SSE-NEXT: popl %ecx # encoding: [0x59] 1108; X86-SSE-NEXT: retl # encoding: [0xc3] 1109; 1110; X86-AVX-LABEL: test_mm_getcsr: 1111; X86-AVX: # %bb.0: 1112; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1113; X86-AVX-NEXT: movl %esp, %eax # encoding: [0x89,0xe0] 1114; X86-AVX-NEXT: vstmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x18] 1115; X86-AVX-NEXT: movl (%esp), %eax # encoding: [0x8b,0x04,0x24] 1116; X86-AVX-NEXT: popl %ecx # encoding: [0x59] 1117; X86-AVX-NEXT: retl # encoding: [0xc3] 1118; 1119; X64-SSE-LABEL: test_mm_getcsr: 1120; X64-SSE: # %bb.0: 1121; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1122; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1123; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1124; X64-SSE-NEXT: retq # encoding: [0xc3] 1125; 1126; X64-AVX-LABEL: test_mm_getcsr: 1127; X64-AVX: # %bb.0: 1128; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1129; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1130; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # encoding: [0x8b,0x44,0x24,0xfc] 1131; X64-AVX-NEXT: retq # encoding: [0xc3] 1132 %1 = alloca i32, align 4 1133 %2 = bitcast i32* %1 to i8* 1134 call void @llvm.x86.sse.stmxcsr(i8* %2) 1135 %3 = load i32, i32* %1, align 4 1136 ret i32 %3 1137} 1138 1139define <4 x float> @test_mm_load_ps(float* %a0) nounwind { 1140; X86-SSE-LABEL: test_mm_load_ps: 1141; X86-SSE: # %bb.0: 1142; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1143; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1144; X86-SSE-NEXT: retl # encoding: [0xc3] 1145; 1146; X86-AVX1-LABEL: test_mm_load_ps: 1147; X86-AVX1: # %bb.0: 1148; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1149; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] 1150; X86-AVX1-NEXT: retl # encoding: [0xc3] 1151; 1152; X86-AVX512-LABEL: test_mm_load_ps: 1153; X86-AVX512: # %bb.0: 1154; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1155; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] 1156; X86-AVX512-NEXT: retl # encoding: [0xc3] 1157; 1158; X64-SSE-LABEL: test_mm_load_ps: 1159; X64-SSE: # %bb.0: 1160; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1161; X64-SSE-NEXT: retq # encoding: [0xc3] 1162; 1163; X64-AVX1-LABEL: test_mm_load_ps: 1164; X64-AVX1: # %bb.0: 1165; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 1166; X64-AVX1-NEXT: retq # encoding: [0xc3] 1167; 1168; X64-AVX512-LABEL: test_mm_load_ps: 1169; X64-AVX512: # %bb.0: 1170; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 1171; X64-AVX512-NEXT: retq # encoding: [0xc3] 1172 %arg0 = bitcast float* %a0 to <4 x float>* 1173 %res = load <4 x float>, <4 x float>* %arg0, align 16 1174 ret <4 x float> %res 1175} 1176 1177define <4 x float> @test_mm_load_ps1(float* %a0) nounwind { 1178; X86-SSE-LABEL: test_mm_load_ps1: 1179; X86-SSE: # %bb.0: 1180; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1181; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1182; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1183; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1184; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1185; X86-SSE-NEXT: retl # encoding: [0xc3] 1186; 1187; X86-AVX1-LABEL: test_mm_load_ps1: 1188; X86-AVX1: # %bb.0: 1189; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1190; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] 1191; X86-AVX1-NEXT: retl # encoding: [0xc3] 1192; 1193; X86-AVX512-LABEL: test_mm_load_ps1: 1194; X86-AVX512: # %bb.0: 1195; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1196; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] 1197; X86-AVX512-NEXT: retl # encoding: [0xc3] 1198; 1199; X64-SSE-LABEL: test_mm_load_ps1: 1200; X64-SSE: # %bb.0: 1201; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1202; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1203; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1204; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1205; X64-SSE-NEXT: retq # encoding: [0xc3] 1206; 1207; X64-AVX1-LABEL: test_mm_load_ps1: 1208; X64-AVX1: # %bb.0: 1209; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] 1210; X64-AVX1-NEXT: retq # encoding: [0xc3] 1211; 1212; X64-AVX512-LABEL: test_mm_load_ps1: 1213; X64-AVX512: # %bb.0: 1214; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] 1215; X64-AVX512-NEXT: retq # encoding: [0xc3] 1216 %ld = load float, float* %a0, align 4 1217 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1218 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 1219 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 1220 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 1221 ret <4 x float> %res3 1222} 1223 1224define <4 x float> @test_mm_load_ss(float* %a0) nounwind { 1225; X86-SSE-LABEL: test_mm_load_ss: 1226; X86-SSE: # %bb.0: 1227; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1228; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1229; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1230; X86-SSE-NEXT: retl # encoding: [0xc3] 1231; 1232; X86-AVX1-LABEL: test_mm_load_ss: 1233; X86-AVX1: # %bb.0: 1234; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1235; X86-AVX1-NEXT: vmovss (%eax), %xmm0 # encoding: [0xc5,0xfa,0x10,0x00] 1236; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1237; X86-AVX1-NEXT: retl # encoding: [0xc3] 1238; 1239; X86-AVX512-LABEL: test_mm_load_ss: 1240; X86-AVX512: # %bb.0: 1241; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1242; X86-AVX512-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00] 1243; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1244; X86-AVX512-NEXT: retl # encoding: [0xc3] 1245; 1246; X64-SSE-LABEL: test_mm_load_ss: 1247; X64-SSE: # %bb.0: 1248; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1249; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1250; X64-SSE-NEXT: retq # encoding: [0xc3] 1251; 1252; X64-AVX1-LABEL: test_mm_load_ss: 1253; X64-AVX1: # %bb.0: 1254; X64-AVX1-NEXT: vmovss (%rdi), %xmm0 # encoding: [0xc5,0xfa,0x10,0x07] 1255; X64-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 1256; X64-AVX1-NEXT: retq # encoding: [0xc3] 1257; 1258; X64-AVX512-LABEL: test_mm_load_ss: 1259; X64-AVX512: # %bb.0: 1260; X64-AVX512-NEXT: vmovss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07] 1261; X64-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 1262; X64-AVX512-NEXT: retq # encoding: [0xc3] 1263 %ld = load float, float* %a0, align 1 1264 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1265 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 1266 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 1267 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 1268 ret <4 x float> %res3 1269} 1270 1271define <4 x float> @test_mm_load1_ps(float* %a0) nounwind { 1272; X86-SSE-LABEL: test_mm_load1_ps: 1273; X86-SSE: # %bb.0: 1274; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1275; X86-SSE-NEXT: movss (%eax), %xmm0 # encoding: [0xf3,0x0f,0x10,0x00] 1276; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1277; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1278; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1279; X86-SSE-NEXT: retl # encoding: [0xc3] 1280; 1281; X86-AVX1-LABEL: test_mm_load1_ps: 1282; X86-AVX1: # %bb.0: 1283; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1284; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x00] 1285; X86-AVX1-NEXT: retl # encoding: [0xc3] 1286; 1287; X86-AVX512-LABEL: test_mm_load1_ps: 1288; X86-AVX512: # %bb.0: 1289; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1290; X86-AVX512-NEXT: vbroadcastss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x00] 1291; X86-AVX512-NEXT: retl # encoding: [0xc3] 1292; 1293; X64-SSE-LABEL: test_mm_load1_ps: 1294; X64-SSE: # %bb.0: 1295; X64-SSE-NEXT: movss (%rdi), %xmm0 # encoding: [0xf3,0x0f,0x10,0x07] 1296; X64-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1297; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 1298; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 1299; X64-SSE-NEXT: retq # encoding: [0xc3] 1300; 1301; X64-AVX1-LABEL: test_mm_load1_ps: 1302; X64-AVX1: # %bb.0: 1303; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x18,0x07] 1304; X64-AVX1-NEXT: retq # encoding: [0xc3] 1305; 1306; X64-AVX512-LABEL: test_mm_load1_ps: 1307; X64-AVX512: # %bb.0: 1308; X64-AVX512-NEXT: vbroadcastss (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0x07] 1309; X64-AVX512-NEXT: retq # encoding: [0xc3] 1310 %ld = load float, float* %a0, align 4 1311 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1312 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 1313 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 1314 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 1315 ret <4 x float> %res3 1316} 1317 1318define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { 1319; X86-SSE-LABEL: test_mm_loadh_pi: 1320; X86-SSE: # %bb.0: 1321; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1322; X86-SSE-NEXT: movhps (%eax), %xmm0 # encoding: [0x0f,0x16,0x00] 1323; X86-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1324; X86-SSE-NEXT: retl # encoding: [0xc3] 1325; 1326; X86-AVX1-LABEL: test_mm_loadh_pi: 1327; X86-AVX1: # %bb.0: 1328; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1329; X86-AVX1-NEXT: vmovhps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x00] 1330; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1331; X86-AVX1-NEXT: retl # encoding: [0xc3] 1332; 1333; X86-AVX512-LABEL: test_mm_loadh_pi: 1334; X86-AVX512: # %bb.0: 1335; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1336; X86-AVX512-NEXT: vmovhps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x00] 1337; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1338; X86-AVX512-NEXT: retl # encoding: [0xc3] 1339; 1340; X64-SSE-LABEL: test_mm_loadh_pi: 1341; X64-SSE: # %bb.0: 1342; X64-SSE-NEXT: movhps (%rdi), %xmm0 # encoding: [0x0f,0x16,0x07] 1343; X64-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1344; X64-SSE-NEXT: retq # encoding: [0xc3] 1345; 1346; X64-AVX1-LABEL: test_mm_loadh_pi: 1347; X64-AVX1: # %bb.0: 1348; X64-AVX1-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0x07] 1349; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1350; X64-AVX1-NEXT: retq # encoding: [0xc3] 1351; 1352; X64-AVX512-LABEL: test_mm_loadh_pi: 1353; X64-AVX512: # %bb.0: 1354; X64-AVX512-NEXT: vmovhps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0x07] 1355; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],mem[0,1] 1356; X64-AVX512-NEXT: retq # encoding: [0xc3] 1357 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1358 %ld = load <2 x float>, <2 x float>* %ptr 1359 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1360 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1361 ret <4 x float> %res 1362} 1363 1364define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { 1365; X86-SSE-LABEL: test_mm_loadl_pi: 1366; X86-SSE: # %bb.0: 1367; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1368; X86-SSE-NEXT: movlps (%eax), %xmm0 # encoding: [0x0f,0x12,0x00] 1369; X86-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1370; X86-SSE-NEXT: retl # encoding: [0xc3] 1371; 1372; X86-AVX1-LABEL: test_mm_loadl_pi: 1373; X86-AVX1: # %bb.0: 1374; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1375; X86-AVX1-NEXT: vmovlps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x00] 1376; X86-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1377; X86-AVX1-NEXT: retl # encoding: [0xc3] 1378; 1379; X86-AVX512-LABEL: test_mm_loadl_pi: 1380; X86-AVX512: # %bb.0: 1381; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1382; X86-AVX512-NEXT: vmovlps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x00] 1383; X86-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1384; X86-AVX512-NEXT: retl # encoding: [0xc3] 1385; 1386; X64-SSE-LABEL: test_mm_loadl_pi: 1387; X64-SSE: # %bb.0: 1388; X64-SSE-NEXT: movlps (%rdi), %xmm0 # encoding: [0x0f,0x12,0x07] 1389; X64-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1390; X64-SSE-NEXT: retq # encoding: [0xc3] 1391; 1392; X64-AVX1-LABEL: test_mm_loadl_pi: 1393; X64-AVX1: # %bb.0: 1394; X64-AVX1-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x12,0x07] 1395; X64-AVX1-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1396; X64-AVX1-NEXT: retq # encoding: [0xc3] 1397; 1398; X64-AVX512-LABEL: test_mm_loadl_pi: 1399; X64-AVX512: # %bb.0: 1400; X64-AVX512-NEXT: vmovlps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x12,0x07] 1401; X64-AVX512-NEXT: # xmm0 = mem[0,1],xmm0[2,3] 1402; X64-AVX512-NEXT: retq # encoding: [0xc3] 1403 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1404 %ld = load <2 x float>, <2 x float>* %ptr 1405 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1406 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1407 ret <4 x float> %res 1408} 1409 1410define <4 x float> @test_mm_loadr_ps(float* %a0) nounwind { 1411; X86-SSE-LABEL: test_mm_loadr_ps: 1412; X86-SSE: # %bb.0: 1413; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1414; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] 1415; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 1416; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 1417; X86-SSE-NEXT: retl # encoding: [0xc3] 1418; 1419; X86-AVX1-LABEL: test_mm_loadr_ps: 1420; X86-AVX1: # %bb.0: 1421; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1422; X86-AVX1-NEXT: vpermilps $27, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] 1423; X86-AVX1-NEXT: # xmm0 = mem[3,2,1,0] 1424; X86-AVX1-NEXT: retl # encoding: [0xc3] 1425; 1426; X86-AVX512-LABEL: test_mm_loadr_ps: 1427; X86-AVX512: # %bb.0: 1428; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1429; X86-AVX512-NEXT: vpermilps $27, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x00,0x1b] 1430; X86-AVX512-NEXT: # xmm0 = mem[3,2,1,0] 1431; X86-AVX512-NEXT: retl # encoding: [0xc3] 1432; 1433; X64-SSE-LABEL: test_mm_loadr_ps: 1434; X64-SSE: # %bb.0: 1435; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 1436; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 1437; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 1438; X64-SSE-NEXT: retq # encoding: [0xc3] 1439; 1440; X64-AVX1-LABEL: test_mm_loadr_ps: 1441; X64-AVX1: # %bb.0: 1442; X64-AVX1-NEXT: vpermilps $27, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] 1443; X64-AVX1-NEXT: # xmm0 = mem[3,2,1,0] 1444; X64-AVX1-NEXT: retq # encoding: [0xc3] 1445; 1446; X64-AVX512-LABEL: test_mm_loadr_ps: 1447; X64-AVX512: # %bb.0: 1448; X64-AVX512-NEXT: vpermilps $27, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0x07,0x1b] 1449; X64-AVX512-NEXT: # xmm0 = mem[3,2,1,0] 1450; X64-AVX512-NEXT: retq # encoding: [0xc3] 1451 %arg0 = bitcast float* %a0 to <4 x float>* 1452 %ld = load <4 x float>, <4 x float>* %arg0, align 16 1453 %res = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1454 ret <4 x float> %res 1455} 1456 1457define <4 x float> @test_mm_loadu_ps(float* %a0) nounwind { 1458; X86-SSE-LABEL: test_mm_loadu_ps: 1459; X86-SSE: # %bb.0: 1460; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1461; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] 1462; X86-SSE-NEXT: retl # encoding: [0xc3] 1463; 1464; X86-AVX1-LABEL: test_mm_loadu_ps: 1465; X86-AVX1: # %bb.0: 1466; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1467; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] 1468; X86-AVX1-NEXT: retl # encoding: [0xc3] 1469; 1470; X86-AVX512-LABEL: test_mm_loadu_ps: 1471; X86-AVX512: # %bb.0: 1472; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1473; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] 1474; X86-AVX512-NEXT: retl # encoding: [0xc3] 1475; 1476; X64-SSE-LABEL: test_mm_loadu_ps: 1477; X64-SSE: # %bb.0: 1478; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] 1479; X64-SSE-NEXT: retq # encoding: [0xc3] 1480; 1481; X64-AVX1-LABEL: test_mm_loadu_ps: 1482; X64-AVX1: # %bb.0: 1483; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] 1484; X64-AVX1-NEXT: retq # encoding: [0xc3] 1485; 1486; X64-AVX512-LABEL: test_mm_loadu_ps: 1487; X64-AVX512: # %bb.0: 1488; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 1489; X64-AVX512-NEXT: retq # encoding: [0xc3] 1490 %arg0 = bitcast float* %a0 to <4 x float>* 1491 %res = load <4 x float>, <4 x float>* %arg0, align 1 1492 ret <4 x float> %res 1493} 1494 1495define <4 x float> @test_mm_max_ps(<4 x float> %a0, <4 x float> %a1) { 1496; SSE-LABEL: test_mm_max_ps: 1497; SSE: # %bb.0: 1498; SSE-NEXT: maxps %xmm1, %xmm0 # encoding: [0x0f,0x5f,0xc1] 1499; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1500; 1501; AVX1-LABEL: test_mm_max_ps: 1502; AVX1: # %bb.0: 1503; AVX1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5f,0xc1] 1504; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1505; 1506; AVX512-LABEL: test_mm_max_ps: 1507; AVX512: # %bb.0: 1508; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1] 1509; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1510 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 1511 ret <4 x float> %res 1512} 1513declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1514 1515define <4 x float> @test_mm_max_ss(<4 x float> %a0, <4 x float> %a1) { 1516; SSE-LABEL: test_mm_max_ss: 1517; SSE: # %bb.0: 1518; SSE-NEXT: maxss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5f,0xc1] 1519; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1520; 1521; AVX1-LABEL: test_mm_max_ss: 1522; AVX1: # %bb.0: 1523; AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5f,0xc1] 1524; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1525; 1526; AVX512-LABEL: test_mm_max_ss: 1527; AVX512: # %bb.0: 1528; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5f,0xc1] 1529; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1530 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 1531 ret <4 x float> %res 1532} 1533declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1534 1535define <4 x float> @test_mm_min_ps(<4 x float> %a0, <4 x float> %a1) { 1536; SSE-LABEL: test_mm_min_ps: 1537; SSE: # %bb.0: 1538; SSE-NEXT: minps %xmm1, %xmm0 # encoding: [0x0f,0x5d,0xc1] 1539; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1540; 1541; AVX1-LABEL: test_mm_min_ps: 1542; AVX1: # %bb.0: 1543; AVX1-NEXT: vminps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5d,0xc1] 1544; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1545; 1546; AVX512-LABEL: test_mm_min_ps: 1547; AVX512: # %bb.0: 1548; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1] 1549; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1550 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1551 ret <4 x float> %res 1552} 1553declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1554 1555define <4 x float> @test_mm_min_ss(<4 x float> %a0, <4 x float> %a1) { 1556; SSE-LABEL: test_mm_min_ss: 1557; SSE: # %bb.0: 1558; SSE-NEXT: minss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5d,0xc1] 1559; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1560; 1561; AVX1-LABEL: test_mm_min_ss: 1562; AVX1: # %bb.0: 1563; AVX1-NEXT: vminss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5d,0xc1] 1564; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1565; 1566; AVX512-LABEL: test_mm_min_ss: 1567; AVX512: # %bb.0: 1568; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5d,0xc1] 1569; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1570 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 1571 ret <4 x float> %res 1572} 1573declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1574 1575define <4 x float> @test_mm_move_ss(<4 x float> %a0, <4 x float> %a1) { 1576; SSE-LABEL: test_mm_move_ss: 1577; SSE: # %bb.0: 1578; SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 1579; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 1580; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1581; 1582; AVX-LABEL: test_mm_move_ss: 1583; AVX: # %bb.0: 1584; AVX-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01] 1585; AVX-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 1586; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1587 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1588 ret <4 x float> %res 1589} 1590 1591define <4 x float> @test_mm_movehl_ps(<4 x float> %a0, <4 x float> %a1) { 1592; SSE-LABEL: test_mm_movehl_ps: 1593; SSE: # %bb.0: 1594; SSE-NEXT: movhlps %xmm1, %xmm0 # encoding: [0x0f,0x12,0xc1] 1595; SSE-NEXT: # xmm0 = xmm1[1],xmm0[1] 1596; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1597; 1598; AVX1-LABEL: test_mm_movehl_ps: 1599; AVX1: # %bb.0: 1600; AVX1-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x15,0xc0] 1601; AVX1-NEXT: # xmm0 = xmm1[1],xmm0[1] 1602; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1603; 1604; AVX512-LABEL: test_mm_movehl_ps: 1605; AVX512: # %bb.0: 1606; AVX512-NEXT: vunpckhpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x15,0xc0] 1607; AVX512-NEXT: # xmm0 = xmm1[1],xmm0[1] 1608; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1609 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 1610 ret <4 x float> %res 1611} 1612 1613define <4 x float> @test_mm_movelh_ps(<4 x float> %a0, <4 x float> %a1) { 1614; SSE-LABEL: test_mm_movelh_ps: 1615; SSE: # %bb.0: 1616; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 1617; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 1618; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1619; 1620; AVX1-LABEL: test_mm_movelh_ps: 1621; AVX1: # %bb.0: 1622; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] 1623; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] 1624; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1625; 1626; AVX512-LABEL: test_mm_movelh_ps: 1627; AVX512: # %bb.0: 1628; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] 1629; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] 1630; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1631 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1632 ret <4 x float> %res 1633} 1634 1635define i32 @test_mm_movemask_ps(<4 x float> %a0) nounwind { 1636; SSE-LABEL: test_mm_movemask_ps: 1637; SSE: # %bb.0: 1638; SSE-NEXT: movmskps %xmm0, %eax # encoding: [0x0f,0x50,0xc0] 1639; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1640; 1641; AVX-LABEL: test_mm_movemask_ps: 1642; AVX: # %bb.0: 1643; AVX-NEXT: vmovmskps %xmm0, %eax # encoding: [0xc5,0xf8,0x50,0xc0] 1644; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1645 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 1646 ret i32 %res 1647} 1648declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1649 1650define <4 x float> @test_mm_mul_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1651; SSE-LABEL: test_mm_mul_ps: 1652; SSE: # %bb.0: 1653; SSE-NEXT: mulps %xmm1, %xmm0 # encoding: [0x0f,0x59,0xc1] 1654; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1655; 1656; AVX1-LABEL: test_mm_mul_ps: 1657; AVX1: # %bb.0: 1658; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x59,0xc1] 1659; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1660; 1661; AVX512-LABEL: test_mm_mul_ps: 1662; AVX512: # %bb.0: 1663; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1] 1664; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1665 %res = fmul <4 x float> %a0, %a1 1666 ret <4 x float> %res 1667} 1668 1669define <4 x float> @test_mm_mul_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 1670; SSE-LABEL: test_mm_mul_ss: 1671; SSE: # %bb.0: 1672; SSE-NEXT: mulss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x59,0xc1] 1673; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1674; 1675; AVX1-LABEL: test_mm_mul_ss: 1676; AVX1: # %bb.0: 1677; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x59,0xc1] 1678; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1679; 1680; AVX512-LABEL: test_mm_mul_ss: 1681; AVX512: # %bb.0: 1682; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x59,0xc1] 1683; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1684 %ext0 = extractelement <4 x float> %a0, i32 0 1685 %ext1 = extractelement <4 x float> %a1, i32 0 1686 %fmul = fmul float %ext0, %ext1 1687 %res = insertelement <4 x float> %a0, float %fmul, i32 0 1688 ret <4 x float> %res 1689} 1690 1691define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1692; SSE-LABEL: test_mm_or_ps: 1693; SSE: # %bb.0: 1694; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] 1695; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1696; 1697; AVX1-LABEL: test_mm_or_ps: 1698; AVX1: # %bb.0: 1699; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] 1700; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1701; 1702; AVX512-LABEL: test_mm_or_ps: 1703; AVX512: # %bb.0: 1704; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] 1705; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1706 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 1707 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 1708 %res = or <4 x i32> %arg0, %arg1 1709 %bc = bitcast <4 x i32> %res to <4 x float> 1710 ret <4 x float> %bc 1711} 1712 1713define void @test_mm_prefetch(i8* %a0) { 1714; X86-LABEL: test_mm_prefetch: 1715; X86: # %bb.0: 1716; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1717; X86-NEXT: prefetchnta (%eax) # encoding: [0x0f,0x18,0x00] 1718; X86-NEXT: retl # encoding: [0xc3] 1719; 1720; X64-LABEL: test_mm_prefetch: 1721; X64: # %bb.0: 1722; X64-NEXT: prefetchnta (%rdi) # encoding: [0x0f,0x18,0x07] 1723; X64-NEXT: retq # encoding: [0xc3] 1724 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1) 1725 ret void 1726} 1727declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone 1728 1729define <4 x float> @test_mm_rcp_ps(<4 x float> %a0) { 1730; SSE-LABEL: test_mm_rcp_ps: 1731; SSE: # %bb.0: 1732; SSE-NEXT: rcpps %xmm0, %xmm0 # encoding: [0x0f,0x53,0xc0] 1733; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1734; 1735; AVX-LABEL: test_mm_rcp_ps: 1736; AVX: # %bb.0: 1737; AVX-NEXT: vrcpps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x53,0xc0] 1738; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1739 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 1740 ret <4 x float> %res 1741} 1742declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1743 1744define <4 x float> @test_mm_rcp_ss(<4 x float> %a0) { 1745; SSE-LABEL: test_mm_rcp_ss: 1746; SSE: # %bb.0: 1747; SSE-NEXT: rcpss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x53,0xc0] 1748; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1749; 1750; AVX-LABEL: test_mm_rcp_ss: 1751; AVX: # %bb.0: 1752; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x53,0xc0] 1753; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1754 %rcp = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) 1755 ret <4 x float> %rcp 1756} 1757declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1758 1759define <4 x float> @test_mm_rsqrt_ps(<4 x float> %a0) { 1760; SSE-LABEL: test_mm_rsqrt_ps: 1761; SSE: # %bb.0: 1762; SSE-NEXT: rsqrtps %xmm0, %xmm0 # encoding: [0x0f,0x52,0xc0] 1763; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1764; 1765; AVX-LABEL: test_mm_rsqrt_ps: 1766; AVX: # %bb.0: 1767; AVX-NEXT: vrsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x52,0xc0] 1768; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1769 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 1770 ret <4 x float> %res 1771} 1772declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1773 1774define <4 x float> @test_mm_rsqrt_ss(<4 x float> %a0) { 1775; SSE-LABEL: test_mm_rsqrt_ss: 1776; SSE: # %bb.0: 1777; SSE-NEXT: rsqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x52,0xc0] 1778; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1779; 1780; AVX-LABEL: test_mm_rsqrt_ss: 1781; AVX: # %bb.0: 1782; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x52,0xc0] 1783; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1784 %rsqrt = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) 1785 ret <4 x float> %rsqrt 1786} 1787declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1788 1789define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind { 1790; X86-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: 1791; X86-SSE: # %bb.0: 1792; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1793; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1794; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1795; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1796; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1797; X86-SSE-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] 1798; X86-SSE-NEXT: # imm = 0xE07F 1799; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1800; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1801; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1802; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1803; X86-SSE-NEXT: retl # encoding: [0xc3] 1804; 1805; X86-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: 1806; X86-AVX: # %bb.0: 1807; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1808; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1809; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1810; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1811; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1812; X86-AVX-NEXT: andl $-8065, %edx # encoding: [0x81,0xe2,0x7f,0xe0,0xff,0xff] 1813; X86-AVX-NEXT: # imm = 0xE07F 1814; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1815; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1816; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1817; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1818; X86-AVX-NEXT: retl # encoding: [0xc3] 1819; 1820; X64-SSE-LABEL: test_MM_SET_EXCEPTION_MASK: 1821; X64-SSE: # %bb.0: 1822; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1823; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1824; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1825; X64-SSE-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] 1826; X64-SSE-NEXT: # imm = 0xE07F 1827; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1828; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1829; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1830; X64-SSE-NEXT: retq # encoding: [0xc3] 1831; 1832; X64-AVX-LABEL: test_MM_SET_EXCEPTION_MASK: 1833; X64-AVX: # %bb.0: 1834; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1835; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1836; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1837; X64-AVX-NEXT: andl $-8065, %ecx # encoding: [0x81,0xe1,0x7f,0xe0,0xff,0xff] 1838; X64-AVX-NEXT: # imm = 0xE07F 1839; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1840; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1841; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1842; X64-AVX-NEXT: retq # encoding: [0xc3] 1843 %1 = alloca i32, align 4 1844 %2 = bitcast i32* %1 to i8* 1845 call void @llvm.x86.sse.stmxcsr(i8* %2) 1846 %3 = load i32, i32* %1 1847 %4 = and i32 %3, -8065 1848 %5 = or i32 %4, %a0 1849 store i32 %5, i32* %1 1850 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1851 ret void 1852} 1853declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone 1854 1855define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind { 1856; X86-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: 1857; X86-SSE: # %bb.0: 1858; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1859; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1860; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1861; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1862; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1863; X86-SSE-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] 1864; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1865; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1866; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1867; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1868; X86-SSE-NEXT: retl # encoding: [0xc3] 1869; 1870; X86-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: 1871; X86-AVX: # %bb.0: 1872; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1873; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1874; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1875; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1876; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1877; X86-AVX-NEXT: andl $-64, %edx # encoding: [0x83,0xe2,0xc0] 1878; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1879; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1880; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1881; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1882; X86-AVX-NEXT: retl # encoding: [0xc3] 1883; 1884; X64-SSE-LABEL: test_MM_SET_EXCEPTION_STATE: 1885; X64-SSE: # %bb.0: 1886; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1887; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1888; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1889; X64-SSE-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] 1890; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1891; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1892; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1893; X64-SSE-NEXT: retq # encoding: [0xc3] 1894; 1895; X64-AVX-LABEL: test_MM_SET_EXCEPTION_STATE: 1896; X64-AVX: # %bb.0: 1897; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1898; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1899; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1900; X64-AVX-NEXT: andl $-64, %ecx # encoding: [0x83,0xe1,0xc0] 1901; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1902; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1903; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1904; X64-AVX-NEXT: retq # encoding: [0xc3] 1905 %1 = alloca i32, align 4 1906 %2 = bitcast i32* %1 to i8* 1907 call void @llvm.x86.sse.stmxcsr(i8* %2) 1908 %3 = load i32, i32* %1 1909 %4 = and i32 %3, -64 1910 %5 = or i32 %4, %a0 1911 store i32 %5, i32* %1 1912 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1913 ret void 1914} 1915 1916define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind { 1917; X86-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1918; X86-SSE: # %bb.0: 1919; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 1920; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1921; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1922; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 1923; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1924; X86-SSE-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] 1925; X86-SSE-NEXT: # imm = 0xFFFF7FFF 1926; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1927; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1928; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 1929; X86-SSE-NEXT: popl %eax # encoding: [0x58] 1930; X86-SSE-NEXT: retl # encoding: [0xc3] 1931; 1932; X86-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1933; X86-AVX: # %bb.0: 1934; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 1935; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 1936; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 1937; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 1938; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 1939; X86-AVX-NEXT: andl $-32769, %edx # encoding: [0x81,0xe2,0xff,0x7f,0xff,0xff] 1940; X86-AVX-NEXT: # imm = 0xFFFF7FFF 1941; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 1942; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 1943; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 1944; X86-AVX-NEXT: popl %eax # encoding: [0x58] 1945; X86-AVX-NEXT: retl # encoding: [0xc3] 1946; 1947; X64-SSE-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1948; X64-SSE: # %bb.0: 1949; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1950; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 1951; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1952; X64-SSE-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] 1953; X64-SSE-NEXT: # imm = 0xFFFF7FFF 1954; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1955; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1956; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 1957; X64-SSE-NEXT: retq # encoding: [0xc3] 1958; 1959; X64-AVX-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1960; X64-AVX: # %bb.0: 1961; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 1962; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 1963; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 1964; X64-AVX-NEXT: andl $-32769, %ecx # encoding: [0x81,0xe1,0xff,0x7f,0xff,0xff] 1965; X64-AVX-NEXT: # imm = 0xFFFF7FFF 1966; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 1967; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 1968; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 1969; X64-AVX-NEXT: retq # encoding: [0xc3] 1970 %1 = alloca i32, align 4 1971 %2 = bitcast i32* %1 to i8* 1972 call void @llvm.x86.sse.stmxcsr(i8* %2) 1973 %3 = load i32, i32* %1 1974 %4 = and i32 %3, -32769 1975 %5 = or i32 %4, %a0 1976 store i32 %5, i32* %1 1977 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1978 ret void 1979} 1980 1981define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 1982; X86-SSE-LABEL: test_mm_set_ps: 1983; X86-SSE: # %bb.0: 1984; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 1985; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 1986; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 1987; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1988; X86-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 1989; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1990; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08] 1991; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 1992; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x04] 1993; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 1994; X86-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] 1995; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1996; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 1997; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 1998; X86-SSE-NEXT: retl # encoding: [0xc3] 1999; 2000; X86-AVX1-LABEL: test_mm_set_ps: 2001; X86-AVX1: # %bb.0: 2002; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2003; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2004; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2005; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2006; X86-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2007; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2008; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08] 2009; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2010; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20] 2011; X86-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2012; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04] 2013; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2014; X86-AVX1-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30] 2015; X86-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0] 2016; X86-AVX1-NEXT: retl # encoding: [0xc3] 2017; 2018; X86-AVX512-LABEL: test_mm_set_ps: 2019; X86-AVX512: # %bb.0: 2020; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2021; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2022; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2023; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2024; X86-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2025; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2026; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x08] 2027; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2028; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x20] 2029; X86-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 2030; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x04] 2031; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2032; X86-AVX512-NEXT: vinsertps $48, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x30] 2033; X86-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm1[0] 2034; X86-AVX512-NEXT: retl # encoding: [0xc3] 2035; 2036; X64-SSE-LABEL: test_mm_set_ps: 2037; X64-SSE: # %bb.0: 2038; X64-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 2039; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2040; X64-SSE-NEXT: unpcklps %xmm2, %xmm3 # encoding: [0x0f,0x14,0xda] 2041; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 2042; X64-SSE-NEXT: movlhps %xmm1, %xmm3 # encoding: [0x0f,0x16,0xd9] 2043; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm1[0] 2044; X64-SSE-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3] 2045; X64-SSE-NEXT: retq # encoding: [0xc3] 2046; 2047; X64-AVX1-LABEL: test_mm_set_ps: 2048; X64-AVX1: # %bb.0: 2049; X64-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2050; X64-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2051; X64-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2052; X64-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2053; X64-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2054; X64-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2055; X64-AVX1-NEXT: retq # encoding: [0xc3] 2056; 2057; X64-AVX512-LABEL: test_mm_set_ps: 2058; X64-AVX512: # %bb.0: 2059; X64-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2060; X64-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2061; X64-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2062; X64-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2063; X64-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2064; X64-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2065; X64-AVX512-NEXT: retq # encoding: [0xc3] 2066 %res0 = insertelement <4 x float> undef, float %a3, i32 0 2067 %res1 = insertelement <4 x float> %res0, float %a2, i32 1 2068 %res2 = insertelement <4 x float> %res1, float %a1, i32 2 2069 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2070 ret <4 x float> %res3 2071} 2072 2073define <4 x float> @test_mm_set_ps1(float %a0) nounwind { 2074; X86-SSE-LABEL: test_mm_set_ps1: 2075; X86-SSE: # %bb.0: 2076; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2077; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2078; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2079; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2080; X86-SSE-NEXT: retl # encoding: [0xc3] 2081; 2082; X86-AVX1-LABEL: test_mm_set_ps1: 2083; X86-AVX1: # %bb.0: 2084; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2085; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2086; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2087; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2088; X86-AVX1-NEXT: retl # encoding: [0xc3] 2089; 2090; X86-AVX512-LABEL: test_mm_set_ps1: 2091; X86-AVX512: # %bb.0: 2092; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2093; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2094; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2095; X86-AVX512-NEXT: retl # encoding: [0xc3] 2096; 2097; X64-SSE-LABEL: test_mm_set_ps1: 2098; X64-SSE: # %bb.0: 2099; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2100; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2101; X64-SSE-NEXT: retq # encoding: [0xc3] 2102; 2103; X64-AVX1-LABEL: test_mm_set_ps1: 2104; X64-AVX1: # %bb.0: 2105; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2106; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2107; X64-AVX1-NEXT: retq # encoding: [0xc3] 2108; 2109; X64-AVX512-LABEL: test_mm_set_ps1: 2110; X64-AVX512: # %bb.0: 2111; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2112; X64-AVX512-NEXT: retq # encoding: [0xc3] 2113 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2114 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 2115 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 2116 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2117 ret <4 x float> %res3 2118} 2119 2120define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind { 2121; X86-SSE-LABEL: test_MM_SET_ROUNDING_MODE: 2122; X86-SSE: # %bb.0: 2123; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 2124; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2125; X86-SSE-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 2126; X86-SSE-NEXT: stmxcsr (%ecx) # encoding: [0x0f,0xae,0x19] 2127; X86-SSE-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 2128; X86-SSE-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] 2129; X86-SSE-NEXT: # imm = 0x9FFF 2130; X86-SSE-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 2131; X86-SSE-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 2132; X86-SSE-NEXT: ldmxcsr (%ecx) # encoding: [0x0f,0xae,0x11] 2133; X86-SSE-NEXT: popl %eax # encoding: [0x58] 2134; X86-SSE-NEXT: retl # encoding: [0xc3] 2135; 2136; X86-AVX-LABEL: test_MM_SET_ROUNDING_MODE: 2137; X86-AVX: # %bb.0: 2138; X86-AVX-NEXT: pushl %eax # encoding: [0x50] 2139; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2140; X86-AVX-NEXT: movl %esp, %ecx # encoding: [0x89,0xe1] 2141; X86-AVX-NEXT: vstmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x19] 2142; X86-AVX-NEXT: movl (%esp), %edx # encoding: [0x8b,0x14,0x24] 2143; X86-AVX-NEXT: andl $-24577, %edx # encoding: [0x81,0xe2,0xff,0x9f,0xff,0xff] 2144; X86-AVX-NEXT: # imm = 0x9FFF 2145; X86-AVX-NEXT: orl %eax, %edx # encoding: [0x09,0xc2] 2146; X86-AVX-NEXT: movl %edx, (%esp) # encoding: [0x89,0x14,0x24] 2147; X86-AVX-NEXT: vldmxcsr (%ecx) # encoding: [0xc5,0xf8,0xae,0x11] 2148; X86-AVX-NEXT: popl %eax # encoding: [0x58] 2149; X86-AVX-NEXT: retl # encoding: [0xc3] 2150; 2151; X64-SSE-LABEL: test_MM_SET_ROUNDING_MODE: 2152; X64-SSE: # %bb.0: 2153; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2154; X64-SSE-NEXT: stmxcsr (%rax) # encoding: [0x0f,0xae,0x18] 2155; X64-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2156; X64-SSE-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] 2157; X64-SSE-NEXT: # imm = 0x9FFF 2158; X64-SSE-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2159; X64-SSE-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2160; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 2161; X64-SSE-NEXT: retq # encoding: [0xc3] 2162; 2163; X64-AVX-LABEL: test_MM_SET_ROUNDING_MODE: 2164; X64-AVX: # %bb.0: 2165; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2166; X64-AVX-NEXT: vstmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x18] 2167; X64-AVX-NEXT: movl -{{[0-9]+}}(%rsp), %ecx # encoding: [0x8b,0x4c,0x24,0xfc] 2168; X64-AVX-NEXT: andl $-24577, %ecx # encoding: [0x81,0xe1,0xff,0x9f,0xff,0xff] 2169; X64-AVX-NEXT: # imm = 0x9FFF 2170; X64-AVX-NEXT: orl %edi, %ecx # encoding: [0x09,0xf9] 2171; X64-AVX-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x4c,0x24,0xfc] 2172; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2173; X64-AVX-NEXT: retq # encoding: [0xc3] 2174 %1 = alloca i32, align 4 2175 %2 = bitcast i32* %1 to i8* 2176 call void @llvm.x86.sse.stmxcsr(i8* %2) 2177 %3 = load i32, i32* %1 2178 %4 = and i32 %3, -24577 2179 %5 = or i32 %4, %a0 2180 store i32 %5, i32* %1 2181 call void @llvm.x86.sse.ldmxcsr(i8* %2) 2182 ret void 2183} 2184 2185define <4 x float> @test_mm_set_ss(float %a0) nounwind { 2186; X86-SSE-LABEL: test_mm_set_ss: 2187; X86-SSE: # %bb.0: 2188; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04] 2189; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2190; X86-SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 2191; X86-SSE-NEXT: movss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x10,0xc1] 2192; X86-SSE-NEXT: # xmm0 = xmm1[0],xmm0[1,2,3] 2193; X86-SSE-NEXT: retl # encoding: [0xc3] 2194; 2195; X86-AVX1-LABEL: test_mm_set_ss: 2196; X86-AVX1: # %bb.0: 2197; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2198; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2199; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2200; X86-AVX1-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2201; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2202; X86-AVX1-NEXT: retl # encoding: [0xc3] 2203; 2204; X86-AVX512-LABEL: test_mm_set_ss: 2205; X86-AVX512: # %bb.0: 2206; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2207; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2208; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2209; X86-AVX512-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2210; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2211; X86-AVX512-NEXT: retl # encoding: [0xc3] 2212; 2213; X64-SSE-LABEL: test_mm_set_ss: 2214; X64-SSE: # %bb.0: 2215; X64-SSE-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9] 2216; X64-SSE-NEXT: movss %xmm0, %xmm1 # encoding: [0xf3,0x0f,0x10,0xc8] 2217; X64-SSE-NEXT: # xmm1 = xmm0[0],xmm1[1,2,3] 2218; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] 2219; X64-SSE-NEXT: retq # encoding: [0xc3] 2220; 2221; X64-AVX-LABEL: test_mm_set_ss: 2222; X64-AVX: # %bb.0: 2223; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9] 2224; X64-AVX-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01] 2225; X64-AVX-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3] 2226; X64-AVX-NEXT: retq # encoding: [0xc3] 2227 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2228 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 2229 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 2230 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 2231 ret <4 x float> %res3 2232} 2233 2234define <4 x float> @test_mm_set1_ps(float %a0) nounwind { 2235; X86-SSE-LABEL: test_mm_set1_ps: 2236; X86-SSE: # %bb.0: 2237; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2238; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2239; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2240; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2241; X86-SSE-NEXT: retl # encoding: [0xc3] 2242; 2243; X86-AVX1-LABEL: test_mm_set1_ps: 2244; X86-AVX1: # %bb.0: 2245; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2246; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2247; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2248; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2249; X86-AVX1-NEXT: retl # encoding: [0xc3] 2250; 2251; X86-AVX512-LABEL: test_mm_set1_ps: 2252; X86-AVX512: # %bb.0: 2253; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] 2254; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2255; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2256; X86-AVX512-NEXT: retl # encoding: [0xc3] 2257; 2258; X64-SSE-LABEL: test_mm_set1_ps: 2259; X64-SSE: # %bb.0: 2260; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2261; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2262; X64-SSE-NEXT: retq # encoding: [0xc3] 2263; 2264; X64-AVX1-LABEL: test_mm_set1_ps: 2265; X64-AVX1: # %bb.0: 2266; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2267; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2268; X64-AVX1-NEXT: retq # encoding: [0xc3] 2269; 2270; X64-AVX512-LABEL: test_mm_set1_ps: 2271; X64-AVX512: # %bb.0: 2272; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2273; X64-AVX512-NEXT: retq # encoding: [0xc3] 2274 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2275 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 2276 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 2277 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 2278 ret <4 x float> %res3 2279} 2280 2281define void @test_mm_setcsr(i32 %a0) nounwind { 2282; X86-SSE-LABEL: test_mm_setcsr: 2283; X86-SSE: # %bb.0: 2284; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] 2285; X86-SSE-NEXT: ldmxcsr (%eax) # encoding: [0x0f,0xae,0x10] 2286; X86-SSE-NEXT: retl # encoding: [0xc3] 2287; 2288; X86-AVX-LABEL: test_mm_setcsr: 2289; X86-AVX: # %bb.0: 2290; X86-AVX-NEXT: leal {{[0-9]+}}(%esp), %eax # encoding: [0x8d,0x44,0x24,0x04] 2291; X86-AVX-NEXT: vldmxcsr (%eax) # encoding: [0xc5,0xf8,0xae,0x10] 2292; X86-AVX-NEXT: retl # encoding: [0xc3] 2293; 2294; X64-SSE-LABEL: test_mm_setcsr: 2295; X64-SSE: # %bb.0: 2296; X64-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] 2297; X64-SSE-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2298; X64-SSE-NEXT: ldmxcsr (%rax) # encoding: [0x0f,0xae,0x10] 2299; X64-SSE-NEXT: retq # encoding: [0xc3] 2300; 2301; X64-AVX-LABEL: test_mm_setcsr: 2302; X64-AVX: # %bb.0: 2303; X64-AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x7c,0x24,0xfc] 2304; X64-AVX-NEXT: leaq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8d,0x44,0x24,0xfc] 2305; X64-AVX-NEXT: vldmxcsr (%rax) # encoding: [0xc5,0xf8,0xae,0x10] 2306; X64-AVX-NEXT: retq # encoding: [0xc3] 2307 %st = alloca i32, align 4 2308 store i32 %a0, i32* %st, align 4 2309 %bc = bitcast i32* %st to i8* 2310 call void @llvm.x86.sse.ldmxcsr(i8* %bc) 2311 ret void 2312} 2313 2314define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 2315; X86-SSE-LABEL: test_mm_setr_ps: 2316; X86-SSE: # %bb.0: 2317; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] 2318; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2319; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] 2320; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero 2321; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] 2322; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2323; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] 2324; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero 2325; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] 2326; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2327; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] 2328; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2329; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] 2330; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] 2331; X86-SSE-NEXT: retl # encoding: [0xc3] 2332; 2333; X86-AVX1-LABEL: test_mm_setr_ps: 2334; X86-AVX1: # %bb.0: 2335; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2336; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2337; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2338; X86-AVX1-NEXT: # xmm1 = mem[0],zero,zero,zero 2339; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2340; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero 2341; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2342; X86-AVX1-NEXT: # xmm3 = mem[0],zero,zero,zero 2343; X86-AVX1-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2344; X86-AVX1-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2345; X86-AVX1-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2346; X86-AVX1-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2347; X86-AVX1-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2348; X86-AVX1-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2349; X86-AVX1-NEXT: retl # encoding: [0xc3] 2350; 2351; X86-AVX512-LABEL: test_mm_setr_ps: 2352; X86-AVX512: # %bb.0: 2353; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x10] 2354; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2355; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c] 2356; X86-AVX512-NEXT: # xmm1 = mem[0],zero,zero,zero 2357; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x54,0x24,0x08] 2358; X86-AVX512-NEXT: # xmm2 = mem[0],zero,zero,zero 2359; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x5c,0x24,0x04] 2360; X86-AVX512-NEXT: # xmm3 = mem[0],zero,zero,zero 2361; X86-AVX512-NEXT: vinsertps $16, %xmm2, %xmm3, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x61,0x21,0xd2,0x10] 2362; X86-AVX512-NEXT: # xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2363; X86-AVX512-NEXT: vinsertps $32, %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x69,0x21,0xc9,0x20] 2364; X86-AVX512-NEXT: # xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 2365; X86-AVX512-NEXT: vinsertps $48, %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x71,0x21,0xc0,0x30] 2366; X86-AVX512-NEXT: # xmm0 = xmm1[0,1,2],xmm0[0] 2367; X86-AVX512-NEXT: retl # encoding: [0xc3] 2368; 2369; X64-SSE-LABEL: test_mm_setr_ps: 2370; X64-SSE: # %bb.0: 2371; X64-SSE-NEXT: unpcklps %xmm3, %xmm2 # encoding: [0x0f,0x14,0xd3] 2372; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2373; X64-SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 2374; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2375; X64-SSE-NEXT: movlhps %xmm2, %xmm0 # encoding: [0x0f,0x16,0xc2] 2376; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] 2377; X64-SSE-NEXT: retq # encoding: [0xc3] 2378; 2379; X64-AVX1-LABEL: test_mm_setr_ps: 2380; X64-AVX1: # %bb.0: 2381; X64-AVX1-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2382; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2383; X64-AVX1-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2384; X64-AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2385; X64-AVX1-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2386; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2387; X64-AVX1-NEXT: retq # encoding: [0xc3] 2388; 2389; X64-AVX512-LABEL: test_mm_setr_ps: 2390; X64-AVX512: # %bb.0: 2391; X64-AVX512-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x10] 2392; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 2393; X64-AVX512-NEXT: vinsertps $32, %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc2,0x20] 2394; X64-AVX512-NEXT: # xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 2395; X64-AVX512-NEXT: vinsertps $48, %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc3,0x30] 2396; X64-AVX512-NEXT: # xmm0 = xmm0[0,1,2],xmm3[0] 2397; X64-AVX512-NEXT: retq # encoding: [0xc3] 2398 %res0 = insertelement <4 x float> undef, float %a0, i32 0 2399 %res1 = insertelement <4 x float> %res0, float %a1, i32 1 2400 %res2 = insertelement <4 x float> %res1, float %a2, i32 2 2401 %res3 = insertelement <4 x float> %res2, float %a3, i32 3 2402 ret <4 x float> %res3 2403} 2404 2405define <4 x float> @test_mm_setzero_ps() { 2406; SSE-LABEL: test_mm_setzero_ps: 2407; SSE: # %bb.0: 2408; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] 2409; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2410; 2411; AVX1-LABEL: test_mm_setzero_ps: 2412; AVX1: # %bb.0: 2413; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] 2414; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2415; 2416; AVX512-LABEL: test_mm_setzero_ps: 2417; AVX512: # %bb.0: 2418; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] 2419; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2420 ret <4 x float> zeroinitializer 2421} 2422 2423define void @test_mm_sfence() nounwind { 2424; CHECK-LABEL: test_mm_sfence: 2425; CHECK: # %bb.0: 2426; CHECK-NEXT: sfence # encoding: [0x0f,0xae,0xf8] 2427; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2428 call void @llvm.x86.sse.sfence() 2429 ret void 2430} 2431declare void @llvm.x86.sse.sfence() nounwind readnone 2432 2433define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 2434; SSE-LABEL: test_mm_shuffle_ps: 2435; SSE: # %bb.0: 2436; SSE-NEXT: shufps $0, %xmm1, %xmm0 # encoding: [0x0f,0xc6,0xc1,0x00] 2437; SSE-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2438; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2439; 2440; AVX1-LABEL: test_mm_shuffle_ps: 2441; AVX1: # %bb.0: 2442; AVX1-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0xc6,0xc1,0x00] 2443; AVX1-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2444; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2445; 2446; AVX512-LABEL: test_mm_shuffle_ps: 2447; AVX512: # %bb.0: 2448; AVX512-NEXT: vshufps $0, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc1,0x00] 2449; AVX512-NEXT: # xmm0 = xmm0[0,0],xmm1[0,0] 2450; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2451 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4> 2452 ret <4 x float> %res 2453} 2454 2455define <4 x float> @test_mm_sqrt_ps(<4 x float> %a0) { 2456; SSE-LABEL: test_mm_sqrt_ps: 2457; SSE: # %bb.0: 2458; SSE-NEXT: sqrtps %xmm0, %xmm0 # encoding: [0x0f,0x51,0xc0] 2459; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2460; 2461; AVX1-LABEL: test_mm_sqrt_ps: 2462; AVX1: # %bb.0: 2463; AVX1-NEXT: vsqrtps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x51,0xc0] 2464; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2465; 2466; AVX512-LABEL: test_mm_sqrt_ps: 2467; AVX512: # %bb.0: 2468; AVX512-NEXT: vsqrtps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x51,0xc0] 2469; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2470 %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a0) 2471 ret <4 x float> %res 2472} 2473declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readnone 2474 2475define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) { 2476; SSE-LABEL: test_mm_sqrt_ss: 2477; SSE: # %bb.0: 2478; SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2479; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2480; 2481; AVX1-LABEL: test_mm_sqrt_ss: 2482; AVX1: # %bb.0: 2483; AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2484; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2485; 2486; AVX512-LABEL: test_mm_sqrt_ss: 2487; AVX512: # %bb.0: 2488; AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2489; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 2490 %ext = extractelement <4 x float> %a0, i32 0 2491 %sqrt = call float @llvm.sqrt.f32(float %ext) 2492 %ins = insertelement <4 x float> %a0, float %sqrt, i32 0 2493 ret <4 x float> %ins 2494} 2495declare float @llvm.sqrt.f32(float) nounwind readnone 2496 2497define float @test_mm_sqrt_ss_scalar(float %a0) { 2498; X86-SSE-LABEL: test_mm_sqrt_ss_scalar: 2499; X86-SSE: # %bb.0: 2500; X86-SSE-NEXT: pushl %eax # encoding: [0x50] 2501; X86-SSE-NEXT: .cfi_def_cfa_offset 8 2502; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08] 2503; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero 2504; X86-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2505; X86-SSE-NEXT: movss %xmm0, (%esp) # encoding: [0xf3,0x0f,0x11,0x04,0x24] 2506; X86-SSE-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2507; X86-SSE-NEXT: popl %eax # encoding: [0x58] 2508; X86-SSE-NEXT: .cfi_def_cfa_offset 4 2509; X86-SSE-NEXT: retl # encoding: [0xc3] 2510; 2511; X86-AVX1-LABEL: test_mm_sqrt_ss_scalar: 2512; X86-AVX1: # %bb.0: 2513; X86-AVX1-NEXT: pushl %eax # encoding: [0x50] 2514; X86-AVX1-NEXT: .cfi_def_cfa_offset 8 2515; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 2516; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero 2517; X86-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2518; X86-AVX1-NEXT: vmovss %xmm0, (%esp) # encoding: [0xc5,0xfa,0x11,0x04,0x24] 2519; X86-AVX1-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2520; X86-AVX1-NEXT: popl %eax # encoding: [0x58] 2521; X86-AVX1-NEXT: .cfi_def_cfa_offset 4 2522; X86-AVX1-NEXT: retl # encoding: [0xc3] 2523; 2524; X86-AVX512-LABEL: test_mm_sqrt_ss_scalar: 2525; X86-AVX512: # %bb.0: 2526; X86-AVX512-NEXT: pushl %eax # encoding: [0x50] 2527; X86-AVX512-NEXT: .cfi_def_cfa_offset 8 2528; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08] 2529; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero 2530; X86-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2531; X86-AVX512-NEXT: vmovss %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x04,0x24] 2532; X86-AVX512-NEXT: flds (%esp) # encoding: [0xd9,0x04,0x24] 2533; X86-AVX512-NEXT: popl %eax # encoding: [0x58] 2534; X86-AVX512-NEXT: .cfi_def_cfa_offset 4 2535; X86-AVX512-NEXT: retl # encoding: [0xc3] 2536; 2537; X64-SSE-LABEL: test_mm_sqrt_ss_scalar: 2538; X64-SSE: # %bb.0: 2539; X64-SSE-NEXT: sqrtss %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x51,0xc0] 2540; X64-SSE-NEXT: retq # encoding: [0xc3] 2541; 2542; X64-AVX1-LABEL: test_mm_sqrt_ss_scalar: 2543; X64-AVX1: # %bb.0: 2544; X64-AVX1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x51,0xc0] 2545; X64-AVX1-NEXT: retq # encoding: [0xc3] 2546; 2547; X64-AVX512-LABEL: test_mm_sqrt_ss_scalar: 2548; X64-AVX512: # %bb.0: 2549; X64-AVX512-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x51,0xc0] 2550; X64-AVX512-NEXT: retq # encoding: [0xc3] 2551 %sqrt = call float @llvm.sqrt.f32(float %a0) 2552 ret float %sqrt 2553} 2554 2555define void @test_mm_store_ps(float *%a0, <4 x float> %a1) { 2556; X86-SSE-LABEL: test_mm_store_ps: 2557; X86-SSE: # %bb.0: 2558; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2559; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2560; X86-SSE-NEXT: retl # encoding: [0xc3] 2561; 2562; X86-AVX1-LABEL: test_mm_store_ps: 2563; X86-AVX1: # %bb.0: 2564; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2565; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2566; X86-AVX1-NEXT: retl # encoding: [0xc3] 2567; 2568; X86-AVX512-LABEL: test_mm_store_ps: 2569; X86-AVX512: # %bb.0: 2570; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2571; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2572; X86-AVX512-NEXT: retl # encoding: [0xc3] 2573; 2574; X64-SSE-LABEL: test_mm_store_ps: 2575; X64-SSE: # %bb.0: 2576; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2577; X64-SSE-NEXT: retq # encoding: [0xc3] 2578; 2579; X64-AVX1-LABEL: test_mm_store_ps: 2580; X64-AVX1: # %bb.0: 2581; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2582; X64-AVX1-NEXT: retq # encoding: [0xc3] 2583; 2584; X64-AVX512-LABEL: test_mm_store_ps: 2585; X64-AVX512: # %bb.0: 2586; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2587; X64-AVX512-NEXT: retq # encoding: [0xc3] 2588 %arg0 = bitcast float* %a0 to <4 x float>* 2589 store <4 x float> %a1, <4 x float>* %arg0, align 16 2590 ret void 2591} 2592 2593define void @test_mm_store_ps1(float *%a0, <4 x float> %a1) { 2594; X86-SSE-LABEL: test_mm_store_ps1: 2595; X86-SSE: # %bb.0: 2596; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2597; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2598; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2599; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2600; X86-SSE-NEXT: retl # encoding: [0xc3] 2601; 2602; X86-AVX1-LABEL: test_mm_store_ps1: 2603; X86-AVX1: # %bb.0: 2604; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2605; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2606; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2607; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2608; X86-AVX1-NEXT: retl # encoding: [0xc3] 2609; 2610; X86-AVX512-LABEL: test_mm_store_ps1: 2611; X86-AVX512: # %bb.0: 2612; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2613; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2614; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2615; X86-AVX512-NEXT: retl # encoding: [0xc3] 2616; 2617; X64-SSE-LABEL: test_mm_store_ps1: 2618; X64-SSE: # %bb.0: 2619; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2620; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2621; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2622; X64-SSE-NEXT: retq # encoding: [0xc3] 2623; 2624; X64-AVX1-LABEL: test_mm_store_ps1: 2625; X64-AVX1: # %bb.0: 2626; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2627; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2628; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2629; X64-AVX1-NEXT: retq # encoding: [0xc3] 2630; 2631; X64-AVX512-LABEL: test_mm_store_ps1: 2632; X64-AVX512: # %bb.0: 2633; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2634; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2635; X64-AVX512-NEXT: retq # encoding: [0xc3] 2636 %arg0 = bitcast float* %a0 to <4 x float>* 2637 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 2638 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2639 ret void 2640} 2641 2642define void @test_mm_store_ss(float *%a0, <4 x float> %a1) { 2643; X86-SSE-LABEL: test_mm_store_ss: 2644; X86-SSE: # %bb.0: 2645; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2646; X86-SSE-NEXT: movss %xmm0, (%eax) # encoding: [0xf3,0x0f,0x11,0x00] 2647; X86-SSE-NEXT: retl # encoding: [0xc3] 2648; 2649; X86-AVX1-LABEL: test_mm_store_ss: 2650; X86-AVX1: # %bb.0: 2651; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2652; X86-AVX1-NEXT: vmovss %xmm0, (%eax) # encoding: [0xc5,0xfa,0x11,0x00] 2653; X86-AVX1-NEXT: retl # encoding: [0xc3] 2654; 2655; X86-AVX512-LABEL: test_mm_store_ss: 2656; X86-AVX512: # %bb.0: 2657; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2658; X86-AVX512-NEXT: vmovss %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x00] 2659; X86-AVX512-NEXT: retl # encoding: [0xc3] 2660; 2661; X64-SSE-LABEL: test_mm_store_ss: 2662; X64-SSE: # %bb.0: 2663; X64-SSE-NEXT: movss %xmm0, (%rdi) # encoding: [0xf3,0x0f,0x11,0x07] 2664; X64-SSE-NEXT: retq # encoding: [0xc3] 2665; 2666; X64-AVX1-LABEL: test_mm_store_ss: 2667; X64-AVX1: # %bb.0: 2668; X64-AVX1-NEXT: vmovss %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x11,0x07] 2669; X64-AVX1-NEXT: retq # encoding: [0xc3] 2670; 2671; X64-AVX512-LABEL: test_mm_store_ss: 2672; X64-AVX512: # %bb.0: 2673; X64-AVX512-NEXT: vmovss %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07] 2674; X64-AVX512-NEXT: retq # encoding: [0xc3] 2675 %ext = extractelement <4 x float> %a1, i32 0 2676 store float %ext, float* %a0, align 1 2677 ret void 2678} 2679 2680define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) { 2681; X86-SSE-LABEL: test_mm_store1_ps: 2682; X86-SSE: # %bb.0: 2683; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2684; X86-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2685; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2686; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2687; X86-SSE-NEXT: retl # encoding: [0xc3] 2688; 2689; X86-AVX1-LABEL: test_mm_store1_ps: 2690; X86-AVX1: # %bb.0: 2691; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2692; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2693; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2694; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2695; X86-AVX1-NEXT: retl # encoding: [0xc3] 2696; 2697; X86-AVX512-LABEL: test_mm_store1_ps: 2698; X86-AVX512: # %bb.0: 2699; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2700; X86-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2701; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2702; X86-AVX512-NEXT: retl # encoding: [0xc3] 2703; 2704; X64-SSE-LABEL: test_mm_store1_ps: 2705; X64-SSE: # %bb.0: 2706; X64-SSE-NEXT: shufps $0, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x00] 2707; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] 2708; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2709; X64-SSE-NEXT: retq # encoding: [0xc3] 2710; 2711; X64-AVX1-LABEL: test_mm_store1_ps: 2712; X64-AVX1: # %bb.0: 2713; X64-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] 2714; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] 2715; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2716; X64-AVX1-NEXT: retq # encoding: [0xc3] 2717; 2718; X64-AVX512-LABEL: test_mm_store1_ps: 2719; X64-AVX512: # %bb.0: 2720; X64-AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] 2721; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2722; X64-AVX512-NEXT: retq # encoding: [0xc3] 2723 %arg0 = bitcast float* %a0 to <4 x float>* 2724 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 2725 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2726 ret void 2727} 2728 2729define void @test_mm_storeh_pi(x86_mmx *%a0, <4 x float> %a1) nounwind { 2730; X86-SSE-LABEL: test_mm_storeh_pi: 2731; X86-SSE: # %bb.0: 2732; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 2733; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 2734; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] 2735; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] 2736; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] 2737; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] 2738; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08] 2739; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 2740; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] 2741; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 2742; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 2743; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 2744; X86-SSE-NEXT: retl # encoding: [0xc3] 2745; 2746; X86-AVX1-LABEL: test_mm_storeh_pi: 2747; X86-AVX1: # %bb.0: 2748; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2749; X86-AVX1-NEXT: vmovhps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x17,0x00] 2750; X86-AVX1-NEXT: retl # encoding: [0xc3] 2751; 2752; X86-AVX512-LABEL: test_mm_storeh_pi: 2753; X86-AVX512: # %bb.0: 2754; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2755; X86-AVX512-NEXT: vmovhps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x00] 2756; X86-AVX512-NEXT: retl # encoding: [0xc3] 2757; 2758; X64-SSE-LABEL: test_mm_storeh_pi: 2759; X64-SSE: # %bb.0: 2760; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] 2761; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xf0] 2762; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2763; X64-SSE-NEXT: retq # encoding: [0xc3] 2764; 2765; X64-AVX1-LABEL: test_mm_storeh_pi: 2766; X64-AVX1: # %bb.0: 2767; X64-AVX1-NEXT: vpextrq $1, %xmm0, %rax # encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] 2768; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2769; X64-AVX1-NEXT: retq # encoding: [0xc3] 2770; 2771; X64-AVX512-LABEL: test_mm_storeh_pi: 2772; X64-AVX512: # %bb.0: 2773; X64-AVX512-NEXT: vpextrq $1, %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0x16,0xc0,0x01] 2774; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2775; X64-AVX512-NEXT: retq # encoding: [0xc3] 2776 %ptr = bitcast x86_mmx* %a0 to i64* 2777 %bc = bitcast <4 x float> %a1 to <2 x i64> 2778 %ext = extractelement <2 x i64> %bc, i32 1 2779 store i64 %ext, i64* %ptr 2780 ret void 2781} 2782 2783define void @test_mm_storeh_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind { 2784; X86-SSE-LABEL: test_mm_storeh_pi2: 2785; X86-SSE: # %bb.0: 2786; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2787; X86-SSE-NEXT: movhps %xmm0, (%eax) # encoding: [0x0f,0x17,0x00] 2788; X86-SSE-NEXT: retl # encoding: [0xc3] 2789; 2790; X86-AVX1-LABEL: test_mm_storeh_pi2: 2791; X86-AVX1: # %bb.0: 2792; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2793; X86-AVX1-NEXT: vmovhps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x17,0x00] 2794; X86-AVX1-NEXT: retl # encoding: [0xc3] 2795; 2796; X86-AVX512-LABEL: test_mm_storeh_pi2: 2797; X86-AVX512: # %bb.0: 2798; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2799; X86-AVX512-NEXT: vmovhps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x00] 2800; X86-AVX512-NEXT: retl # encoding: [0xc3] 2801; 2802; X64-SSE-LABEL: test_mm_storeh_pi2: 2803; X64-SSE: # %bb.0: 2804; X64-SSE-NEXT: movhps %xmm0, (%rdi) # encoding: [0x0f,0x17,0x07] 2805; X64-SSE-NEXT: retq # encoding: [0xc3] 2806; 2807; X64-AVX1-LABEL: test_mm_storeh_pi2: 2808; X64-AVX1: # %bb.0: 2809; X64-AVX1-NEXT: vmovhps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x17,0x07] 2810; X64-AVX1-NEXT: retq # encoding: [0xc3] 2811; 2812; X64-AVX512-LABEL: test_mm_storeh_pi2: 2813; X64-AVX512: # %bb.0: 2814; X64-AVX512-NEXT: vmovhps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x17,0x07] 2815; X64-AVX512-NEXT: retq # encoding: [0xc3] 2816 %ptr = bitcast x86_mmx* %a0 to <2 x float>* 2817 %ext = shufflevector <4 x float> %a1, <4 x float> undef, <2 x i32> <i32 2, i32 3> 2818 store <2 x float> %ext, <2 x float>* %ptr 2819 ret void 2820} 2821 2822define void @test_mm_storel_pi(x86_mmx *%a0, <4 x float> %a1) nounwind { 2823; X86-SSE-LABEL: test_mm_storel_pi: 2824; X86-SSE: # %bb.0: 2825; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] 2826; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] 2827; X86-SSE-NEXT: andl $-16, %esp # encoding: [0x83,0xe4,0xf0] 2828; X86-SSE-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] 2829; X86-SSE-NEXT: movl 8(%ebp), %eax # encoding: [0x8b,0x45,0x08] 2830; X86-SSE-NEXT: movaps %xmm0, (%esp) # encoding: [0x0f,0x29,0x04,0x24] 2831; X86-SSE-NEXT: movl (%esp), %ecx # encoding: [0x8b,0x0c,0x24] 2832; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] 2833; X86-SSE-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04] 2834; X86-SSE-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08] 2835; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] 2836; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] 2837; X86-SSE-NEXT: retl # encoding: [0xc3] 2838; 2839; X86-AVX1-LABEL: test_mm_storel_pi: 2840; X86-AVX1: # %bb.0: 2841; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2842; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] 2843; X86-AVX1-NEXT: retl # encoding: [0xc3] 2844; 2845; X86-AVX512-LABEL: test_mm_storel_pi: 2846; X86-AVX512: # %bb.0: 2847; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2848; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 2849; X86-AVX512-NEXT: retl # encoding: [0xc3] 2850; 2851; X64-SSE-LABEL: test_mm_storel_pi: 2852; X64-SSE: # %bb.0: 2853; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] 2854; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8] 2855; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2856; X64-SSE-NEXT: retq # encoding: [0xc3] 2857; 2858; X64-AVX1-LABEL: test_mm_storel_pi: 2859; X64-AVX1: # %bb.0: 2860; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 2861; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2862; X64-AVX1-NEXT: retq # encoding: [0xc3] 2863; 2864; X64-AVX512-LABEL: test_mm_storel_pi: 2865; X64-AVX512: # %bb.0: 2866; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] 2867; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] 2868; X64-AVX512-NEXT: retq # encoding: [0xc3] 2869 %ptr = bitcast x86_mmx* %a0 to i64* 2870 %bc = bitcast <4 x float> %a1 to <2 x i64> 2871 %ext = extractelement <2 x i64> %bc, i32 0 2872 store i64 %ext, i64* %ptr 2873 ret void 2874} 2875 2876; FIXME: Switch the frontend to use this code. 2877define void @test_mm_storel_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind { 2878; X86-SSE-LABEL: test_mm_storel_pi2: 2879; X86-SSE: # %bb.0: 2880; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2881; X86-SSE-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00] 2882; X86-SSE-NEXT: retl # encoding: [0xc3] 2883; 2884; X86-AVX1-LABEL: test_mm_storel_pi2: 2885; X86-AVX1: # %bb.0: 2886; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2887; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] 2888; X86-AVX1-NEXT: retl # encoding: [0xc3] 2889; 2890; X86-AVX512-LABEL: test_mm_storel_pi2: 2891; X86-AVX512: # %bb.0: 2892; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2893; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] 2894; X86-AVX512-NEXT: retl # encoding: [0xc3] 2895; 2896; X64-SSE-LABEL: test_mm_storel_pi2: 2897; X64-SSE: # %bb.0: 2898; X64-SSE-NEXT: movlps %xmm0, (%rdi) # encoding: [0x0f,0x13,0x07] 2899; X64-SSE-NEXT: retq # encoding: [0xc3] 2900; 2901; X64-AVX1-LABEL: test_mm_storel_pi2: 2902; X64-AVX1: # %bb.0: 2903; X64-AVX1-NEXT: vmovlps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x13,0x07] 2904; X64-AVX1-NEXT: retq # encoding: [0xc3] 2905; 2906; X64-AVX512-LABEL: test_mm_storel_pi2: 2907; X64-AVX512: # %bb.0: 2908; X64-AVX512-NEXT: vmovlps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07] 2909; X64-AVX512-NEXT: retq # encoding: [0xc3] 2910 %ptr = bitcast x86_mmx* %a0 to <2 x float>* 2911 %ext = shufflevector <4 x float> %a1, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2912 store <2 x float> %ext, <2 x float>* %ptr 2913 ret void 2914} 2915 2916define void @test_mm_storer_ps(float *%a0, <4 x float> %a1) { 2917; X86-SSE-LABEL: test_mm_storer_ps: 2918; X86-SSE: # %bb.0: 2919; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2920; X86-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 2921; X86-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 2922; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] 2923; X86-SSE-NEXT: retl # encoding: [0xc3] 2924; 2925; X86-AVX1-LABEL: test_mm_storer_ps: 2926; X86-AVX1: # %bb.0: 2927; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2928; X86-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2929; X86-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0] 2930; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 2931; X86-AVX1-NEXT: retl # encoding: [0xc3] 2932; 2933; X86-AVX512-LABEL: test_mm_storer_ps: 2934; X86-AVX512: # %bb.0: 2935; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2936; X86-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2937; X86-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0] 2938; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 2939; X86-AVX512-NEXT: retl # encoding: [0xc3] 2940; 2941; X64-SSE-LABEL: test_mm_storer_ps: 2942; X64-SSE: # %bb.0: 2943; X64-SSE-NEXT: shufps $27, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0x1b] 2944; X64-SSE-NEXT: # xmm0 = xmm0[3,2,1,0] 2945; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] 2946; X64-SSE-NEXT: retq # encoding: [0xc3] 2947; 2948; X64-AVX1-LABEL: test_mm_storer_ps: 2949; X64-AVX1: # %bb.0: 2950; X64-AVX1-NEXT: vpermilps $27, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2951; X64-AVX1-NEXT: # xmm0 = xmm0[3,2,1,0] 2952; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] 2953; X64-AVX1-NEXT: retq # encoding: [0xc3] 2954; 2955; X64-AVX512-LABEL: test_mm_storer_ps: 2956; X64-AVX512: # %bb.0: 2957; X64-AVX512-NEXT: vpermilps $27, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b] 2958; X64-AVX512-NEXT: # xmm0 = xmm0[3,2,1,0] 2959; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 2960; X64-AVX512-NEXT: retq # encoding: [0xc3] 2961 %arg0 = bitcast float* %a0 to <4 x float>* 2962 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 2963 store <4 x float> %shuf, <4 x float>* %arg0, align 16 2964 ret void 2965} 2966 2967define void @test_mm_storeu_ps(float *%a0, <4 x float> %a1) { 2968; X86-SSE-LABEL: test_mm_storeu_ps: 2969; X86-SSE: # %bb.0: 2970; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2971; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] 2972; X86-SSE-NEXT: retl # encoding: [0xc3] 2973; 2974; X86-AVX1-LABEL: test_mm_storeu_ps: 2975; X86-AVX1: # %bb.0: 2976; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2977; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] 2978; X86-AVX1-NEXT: retl # encoding: [0xc3] 2979; 2980; X86-AVX512-LABEL: test_mm_storeu_ps: 2981; X86-AVX512: # %bb.0: 2982; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2983; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] 2984; X86-AVX512-NEXT: retl # encoding: [0xc3] 2985; 2986; X64-SSE-LABEL: test_mm_storeu_ps: 2987; X64-SSE: # %bb.0: 2988; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] 2989; X64-SSE-NEXT: retq # encoding: [0xc3] 2990; 2991; X64-AVX1-LABEL: test_mm_storeu_ps: 2992; X64-AVX1: # %bb.0: 2993; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] 2994; X64-AVX1-NEXT: retq # encoding: [0xc3] 2995; 2996; X64-AVX512-LABEL: test_mm_storeu_ps: 2997; X64-AVX512: # %bb.0: 2998; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 2999; X64-AVX512-NEXT: retq # encoding: [0xc3] 3000 %arg0 = bitcast float* %a0 to <4 x float>* 3001 store <4 x float> %a1, <4 x float>* %arg0, align 1 3002 ret void 3003} 3004 3005define void @test_mm_stream_ps(float *%a0, <4 x float> %a1) { 3006; X86-SSE-LABEL: test_mm_stream_ps: 3007; X86-SSE: # %bb.0: 3008; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3009; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] 3010; X86-SSE-NEXT: retl # encoding: [0xc3] 3011; 3012; X86-AVX1-LABEL: test_mm_stream_ps: 3013; X86-AVX1: # %bb.0: 3014; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3015; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] 3016; X86-AVX1-NEXT: retl # encoding: [0xc3] 3017; 3018; X86-AVX512-LABEL: test_mm_stream_ps: 3019; X86-AVX512: # %bb.0: 3020; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 3021; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] 3022; X86-AVX512-NEXT: retl # encoding: [0xc3] 3023; 3024; X64-SSE-LABEL: test_mm_stream_ps: 3025; X64-SSE: # %bb.0: 3026; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] 3027; X64-SSE-NEXT: retq # encoding: [0xc3] 3028; 3029; X64-AVX1-LABEL: test_mm_stream_ps: 3030; X64-AVX1: # %bb.0: 3031; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] 3032; X64-AVX1-NEXT: retq # encoding: [0xc3] 3033; 3034; X64-AVX512-LABEL: test_mm_stream_ps: 3035; X64-AVX512: # %bb.0: 3036; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] 3037; X64-AVX512-NEXT: retq # encoding: [0xc3] 3038 %arg0 = bitcast float* %a0 to <4 x float>* 3039 store <4 x float> %a1, <4 x float>* %arg0, align 16, !nontemporal !0 3040 ret void 3041} 3042 3043define <4 x float> @test_mm_sub_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3044; SSE-LABEL: test_mm_sub_ps: 3045; SSE: # %bb.0: 3046; SSE-NEXT: subps %xmm1, %xmm0 # encoding: [0x0f,0x5c,0xc1] 3047; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3048; 3049; AVX1-LABEL: test_mm_sub_ps: 3050; AVX1: # %bb.0: 3051; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5c,0xc1] 3052; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3053; 3054; AVX512-LABEL: test_mm_sub_ps: 3055; AVX512: # %bb.0: 3056; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1] 3057; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3058 %res = fsub <4 x float> %a0, %a1 3059 ret <4 x float> %res 3060} 3061 3062define <4 x float> @test_mm_sub_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3063; SSE-LABEL: test_mm_sub_ss: 3064; SSE: # %bb.0: 3065; SSE-NEXT: subss %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5c,0xc1] 3066; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3067; 3068; AVX1-LABEL: test_mm_sub_ss: 3069; AVX1: # %bb.0: 3070; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5c,0xc1] 3071; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3072; 3073; AVX512-LABEL: test_mm_sub_ss: 3074; AVX512: # %bb.0: 3075; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5c,0xc1] 3076; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3077 %ext0 = extractelement <4 x float> %a0, i32 0 3078 %ext1 = extractelement <4 x float> %a1, i32 0 3079 %fsub = fsub float %ext0, %ext1 3080 %res = insertelement <4 x float> %a0, float %fsub, i32 0 3081 ret <4 x float> %res 3082} 3083 3084define void @test_MM_TRANSPOSE4_PS(<4 x float>* %a0, <4 x float>* %a1, <4 x float>* %a2, <4 x float>* %a3) nounwind { 3085; X86-SSE-LABEL: test_MM_TRANSPOSE4_PS: 3086; X86-SSE: # %bb.0: 3087; X86-SSE-NEXT: pushl %esi # encoding: [0x56] 3088; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3089; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3090; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3091; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3092; X86-SSE-NEXT: movaps (%esi), %xmm0 # encoding: [0x0f,0x28,0x06] 3093; X86-SSE-NEXT: movaps (%edx), %xmm1 # encoding: [0x0f,0x28,0x0a] 3094; X86-SSE-NEXT: movaps (%ecx), %xmm2 # encoding: [0x0f,0x28,0x11] 3095; X86-SSE-NEXT: movaps (%eax), %xmm3 # encoding: [0x0f,0x28,0x18] 3096; X86-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] 3097; X86-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] 3098; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 3099; X86-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] 3100; X86-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] 3101; X86-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 3102; X86-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3103; X86-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3104; X86-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] 3105; X86-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3106; X86-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] 3107; X86-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] 3108; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0] 3109; X86-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] 3110; X86-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1] 3111; X86-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] 3112; X86-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] 3113; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0] 3114; X86-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] 3115; X86-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1] 3116; X86-SSE-NEXT: movaps %xmm1, (%esi) # encoding: [0x0f,0x29,0x0e] 3117; X86-SSE-NEXT: movaps %xmm5, (%edx) # encoding: [0x0f,0x29,0x2a] 3118; X86-SSE-NEXT: movaps %xmm3, (%ecx) # encoding: [0x0f,0x29,0x19] 3119; X86-SSE-NEXT: movaps %xmm2, (%eax) # encoding: [0x0f,0x29,0x10] 3120; X86-SSE-NEXT: popl %esi # encoding: [0x5e] 3121; X86-SSE-NEXT: retl # encoding: [0xc3] 3122; 3123; X86-AVX1-LABEL: test_MM_TRANSPOSE4_PS: 3124; X86-AVX1: # %bb.0: 3125; X86-AVX1-NEXT: pushl %esi # encoding: [0x56] 3126; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3127; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3128; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3129; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3130; X86-AVX1-NEXT: vmovaps (%esi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x06] 3131; X86-AVX1-NEXT: vmovaps (%edx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a] 3132; X86-AVX1-NEXT: vmovaps (%ecx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x11] 3133; X86-AVX1-NEXT: vmovaps (%eax), %xmm3 # encoding: [0xc5,0xf8,0x28,0x18] 3134; X86-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] 3135; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3136; X86-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] 3137; X86-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3138; X86-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3139; X86-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3140; X86-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] 3141; X86-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3142; X86-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] 3143; X86-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0] 3144; X86-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] 3145; X86-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1] 3146; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] 3147; X86-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0] 3148; X86-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 3149; X86-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 3150; X86-AVX1-NEXT: vmovaps %xmm2, (%esi) # encoding: [0xc5,0xf8,0x29,0x16] 3151; X86-AVX1-NEXT: vmovaps %xmm3, (%edx) # encoding: [0xc5,0xf8,0x29,0x1a] 3152; X86-AVX1-NEXT: vmovaps %xmm4, (%ecx) # encoding: [0xc5,0xf8,0x29,0x21] 3153; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] 3154; X86-AVX1-NEXT: popl %esi # encoding: [0x5e] 3155; X86-AVX1-NEXT: retl # encoding: [0xc3] 3156; 3157; X86-AVX512-LABEL: test_MM_TRANSPOSE4_PS: 3158; X86-AVX512: # %bb.0: 3159; X86-AVX512-NEXT: pushl %esi # encoding: [0x56] 3160; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14] 3161; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x10] 3162; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c] 3163; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x08] 3164; X86-AVX512-NEXT: vmovaps (%esi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x06] 3165; X86-AVX512-NEXT: vmovaps (%edx), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0a] 3166; X86-AVX512-NEXT: vmovaps (%ecx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x11] 3167; X86-AVX512-NEXT: vmovaps (%eax), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x18] 3168; X86-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] 3169; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3170; X86-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] 3171; X86-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3172; X86-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3173; X86-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3174; X86-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] 3175; X86-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3176; X86-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] 3177; X86-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0] 3178; X86-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] 3179; X86-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1] 3180; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] 3181; X86-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0] 3182; X86-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 3183; X86-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 3184; X86-AVX512-NEXT: vmovaps %xmm2, (%esi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x16] 3185; X86-AVX512-NEXT: vmovaps %xmm3, (%edx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1a] 3186; X86-AVX512-NEXT: vmovaps %xmm4, (%ecx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x21] 3187; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] 3188; X86-AVX512-NEXT: popl %esi # encoding: [0x5e] 3189; X86-AVX512-NEXT: retl # encoding: [0xc3] 3190; 3191; X64-SSE-LABEL: test_MM_TRANSPOSE4_PS: 3192; X64-SSE: # %bb.0: 3193; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] 3194; X64-SSE-NEXT: movaps (%rsi), %xmm1 # encoding: [0x0f,0x28,0x0e] 3195; X64-SSE-NEXT: movaps (%rdx), %xmm2 # encoding: [0x0f,0x28,0x12] 3196; X64-SSE-NEXT: movaps (%rcx), %xmm3 # encoding: [0x0f,0x28,0x19] 3197; X64-SSE-NEXT: movaps %xmm0, %xmm4 # encoding: [0x0f,0x28,0xe0] 3198; X64-SSE-NEXT: unpcklps %xmm1, %xmm4 # encoding: [0x0f,0x14,0xe1] 3199; X64-SSE-NEXT: # xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 3200; X64-SSE-NEXT: movaps %xmm2, %xmm5 # encoding: [0x0f,0x28,0xea] 3201; X64-SSE-NEXT: unpcklps %xmm3, %xmm5 # encoding: [0x0f,0x14,0xeb] 3202; X64-SSE-NEXT: # xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 3203; X64-SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3204; X64-SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3205; X64-SSE-NEXT: unpckhps %xmm3, %xmm2 # encoding: [0x0f,0x15,0xd3] 3206; X64-SSE-NEXT: # xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3207; X64-SSE-NEXT: movaps %xmm4, %xmm1 # encoding: [0x0f,0x28,0xcc] 3208; X64-SSE-NEXT: movlhps %xmm5, %xmm1 # encoding: [0x0f,0x16,0xcd] 3209; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm5[0] 3210; X64-SSE-NEXT: movhlps %xmm4, %xmm5 # encoding: [0x0f,0x12,0xec] 3211; X64-SSE-NEXT: # xmm5 = xmm4[1],xmm5[1] 3212; X64-SSE-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8] 3213; X64-SSE-NEXT: movlhps %xmm2, %xmm3 # encoding: [0x0f,0x16,0xda] 3214; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0] 3215; X64-SSE-NEXT: movhlps %xmm0, %xmm2 # encoding: [0x0f,0x12,0xd0] 3216; X64-SSE-NEXT: # xmm2 = xmm0[1],xmm2[1] 3217; X64-SSE-NEXT: movaps %xmm1, (%rdi) # encoding: [0x0f,0x29,0x0f] 3218; X64-SSE-NEXT: movaps %xmm5, (%rsi) # encoding: [0x0f,0x29,0x2e] 3219; X64-SSE-NEXT: movaps %xmm3, (%rdx) # encoding: [0x0f,0x29,0x1a] 3220; X64-SSE-NEXT: movaps %xmm2, (%rcx) # encoding: [0x0f,0x29,0x11] 3221; X64-SSE-NEXT: retq # encoding: [0xc3] 3222; 3223; X64-AVX1-LABEL: test_MM_TRANSPOSE4_PS: 3224; X64-AVX1: # %bb.0: 3225; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] 3226; X64-AVX1-NEXT: vmovaps (%rsi), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0e] 3227; X64-AVX1-NEXT: vmovaps (%rdx), %xmm2 # encoding: [0xc5,0xf8,0x28,0x12] 3228; X64-AVX1-NEXT: vmovaps (%rcx), %xmm3 # encoding: [0xc5,0xf8,0x28,0x19] 3229; X64-AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x14,0xe1] 3230; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3231; X64-AVX1-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # encoding: [0xc5,0xe8,0x14,0xeb] 3232; X64-AVX1-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3233; X64-AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3234; X64-AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3235; X64-AVX1-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # encoding: [0xc5,0xe8,0x15,0xcb] 3236; X64-AVX1-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3237; X64-AVX1-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # encoding: [0xc5,0xd8,0x16,0xd5] 3238; X64-AVX1-NEXT: # xmm2 = xmm4[0],xmm5[0] 3239; X64-AVX1-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # encoding: [0xc5,0xd9,0x15,0xdd] 3240; X64-AVX1-NEXT: # xmm3 = xmm4[1],xmm5[1] 3241; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # encoding: [0xc5,0xf8,0x16,0xe1] 3242; X64-AVX1-NEXT: # xmm4 = xmm0[0],xmm1[0] 3243; X64-AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] 3244; X64-AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] 3245; X64-AVX1-NEXT: vmovaps %xmm2, (%rdi) # encoding: [0xc5,0xf8,0x29,0x17] 3246; X64-AVX1-NEXT: vmovaps %xmm3, (%rsi) # encoding: [0xc5,0xf8,0x29,0x1e] 3247; X64-AVX1-NEXT: vmovaps %xmm4, (%rdx) # encoding: [0xc5,0xf8,0x29,0x22] 3248; X64-AVX1-NEXT: vmovaps %xmm0, (%rcx) # encoding: [0xc5,0xf8,0x29,0x01] 3249; X64-AVX1-NEXT: retq # encoding: [0xc3] 3250; 3251; X64-AVX512-LABEL: test_MM_TRANSPOSE4_PS: 3252; X64-AVX512: # %bb.0: 3253; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 3254; X64-AVX512-NEXT: vmovaps (%rsi), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x0e] 3255; X64-AVX512-NEXT: vmovaps (%rdx), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x12] 3256; X64-AVX512-NEXT: vmovaps (%rcx), %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x19] 3257; X64-AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xe1] 3258; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3259; X64-AVX512-NEXT: vunpcklps %xmm3, %xmm2, %xmm5 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x14,0xeb] 3260; X64-AVX512-NEXT: # xmm5 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3261; X64-AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3262; X64-AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3263; X64-AVX512-NEXT: vunpckhps %xmm3, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x15,0xcb] 3264; X64-AVX512-NEXT: # xmm1 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 3265; X64-AVX512-NEXT: vmovlhps %xmm5, %xmm4, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xd8,0x16,0xd5] 3266; X64-AVX512-NEXT: # xmm2 = xmm4[0],xmm5[0] 3267; X64-AVX512-NEXT: vunpckhpd %xmm5, %xmm4, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0x15,0xdd] 3268; X64-AVX512-NEXT: # xmm3 = xmm4[1],xmm5[1] 3269; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xe1] 3270; X64-AVX512-NEXT: # xmm4 = xmm0[0],xmm1[0] 3271; X64-AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] 3272; X64-AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] 3273; X64-AVX512-NEXT: vmovaps %xmm2, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x17] 3274; X64-AVX512-NEXT: vmovaps %xmm3, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x1e] 3275; X64-AVX512-NEXT: vmovaps %xmm4, (%rdx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x22] 3276; X64-AVX512-NEXT: vmovaps %xmm0, (%rcx) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x01] 3277; X64-AVX512-NEXT: retq # encoding: [0xc3] 3278 %row0 = load <4 x float>, <4 x float>* %a0, align 16 3279 %row1 = load <4 x float>, <4 x float>* %a1, align 16 3280 %row2 = load <4 x float>, <4 x float>* %a2, align 16 3281 %row3 = load <4 x float>, <4 x float>* %a3, align 16 3282 %tmp0 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3283 %tmp2 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3284 %tmp1 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3285 %tmp3 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3286 %res0 = shufflevector <4 x float> %tmp0, <4 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 3287 %res1 = shufflevector <4 x float> %tmp2, <4 x float> %tmp0, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 3288 %res2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 3289 %res3 = shufflevector <4 x float> %tmp3, <4 x float> %tmp1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 3290 store <4 x float> %res0, <4 x float>* %a0, align 16 3291 store <4 x float> %res1, <4 x float>* %a1, align 16 3292 store <4 x float> %res2, <4 x float>* %a2, align 16 3293 store <4 x float> %res3, <4 x float>* %a3, align 16 3294 ret void 3295} 3296 3297define i32 @test_mm_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3298; SSE-LABEL: test_mm_ucomieq_ss: 3299; SSE: # %bb.0: 3300; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3301; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3302; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3303; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3304; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3305; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3306; 3307; AVX1-LABEL: test_mm_ucomieq_ss: 3308; AVX1: # %bb.0: 3309; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3310; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3311; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3312; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3313; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3314; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3315; 3316; AVX512-LABEL: test_mm_ucomieq_ss: 3317; AVX512: # %bb.0: 3318; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3319; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] 3320; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] 3321; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] 3322; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3323; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3324 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 3325 ret i32 %res 3326} 3327declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 3328 3329define i32 @test_mm_ucomige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3330; SSE-LABEL: test_mm_ucomige_ss: 3331; SSE: # %bb.0: 3332; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3333; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3334; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3335; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3336; 3337; AVX1-LABEL: test_mm_ucomige_ss: 3338; AVX1: # %bb.0: 3339; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3340; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3341; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3342; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3343; 3344; AVX512-LABEL: test_mm_ucomige_ss: 3345; AVX512: # %bb.0: 3346; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3347; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3348; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3349; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3350 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) 3351 ret i32 %res 3352} 3353declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 3354 3355define i32 @test_mm_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3356; SSE-LABEL: test_mm_ucomigt_ss: 3357; SSE: # %bb.0: 3358; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3359; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3360; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3361; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3362; 3363; AVX1-LABEL: test_mm_ucomigt_ss: 3364; AVX1: # %bb.0: 3365; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3366; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3367; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3368; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3369; 3370; AVX512-LABEL: test_mm_ucomigt_ss: 3371; AVX512: # %bb.0: 3372; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3373; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3374; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3375; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3376 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) 3377 ret i32 %res 3378} 3379declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 3380 3381define i32 @test_mm_ucomile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3382; SSE-LABEL: test_mm_ucomile_ss: 3383; SSE: # %bb.0: 3384; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3385; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] 3386; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3387; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3388; 3389; AVX1-LABEL: test_mm_ucomile_ss: 3390; AVX1: # %bb.0: 3391; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3392; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] 3393; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3394; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3395; 3396; AVX512-LABEL: test_mm_ucomile_ss: 3397; AVX512: # %bb.0: 3398; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3399; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] 3400; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] 3401; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3402 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) 3403 ret i32 %res 3404} 3405declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 3406 3407define i32 @test_mm_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3408; SSE-LABEL: test_mm_ucomilt_ss: 3409; SSE: # %bb.0: 3410; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3411; SSE-NEXT: ucomiss %xmm0, %xmm1 # encoding: [0x0f,0x2e,0xc8] 3412; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3413; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3414; 3415; AVX1-LABEL: test_mm_ucomilt_ss: 3416; AVX1: # %bb.0: 3417; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3418; AVX1-NEXT: vucomiss %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x2e,0xc8] 3419; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3420; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3421; 3422; AVX512-LABEL: test_mm_ucomilt_ss: 3423; AVX512: # %bb.0: 3424; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] 3425; AVX512-NEXT: vucomiss %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc8] 3426; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] 3427; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3428 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) 3429 ret i32 %res 3430} 3431declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 3432 3433define i32 @test_mm_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 3434; SSE-LABEL: test_mm_ucomineq_ss: 3435; SSE: # %bb.0: 3436; SSE-NEXT: ucomiss %xmm1, %xmm0 # encoding: [0x0f,0x2e,0xc1] 3437; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3438; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3439; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3440; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3441; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3442; 3443; AVX1-LABEL: test_mm_ucomineq_ss: 3444; AVX1: # %bb.0: 3445; AVX1-NEXT: vucomiss %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x2e,0xc1] 3446; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3447; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3448; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3449; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3450; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3451; 3452; AVX512-LABEL: test_mm_ucomineq_ss: 3453; AVX512: # %bb.0: 3454; AVX512-NEXT: vucomiss %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1] 3455; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] 3456; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] 3457; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] 3458; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] 3459; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3460 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) 3461 ret i32 %res 3462} 3463declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 3464 3465define <4 x float> @test_mm_undefined_ps() { 3466; CHECK-LABEL: test_mm_undefined_ps: 3467; CHECK: # %bb.0: 3468; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3469 ret <4 x float> undef 3470} 3471 3472define <4 x float> @test_mm_unpackhi_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3473; SSE-LABEL: test_mm_unpackhi_ps: 3474; SSE: # %bb.0: 3475; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] 3476; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3477; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3478; 3479; AVX1-LABEL: test_mm_unpackhi_ps: 3480; AVX1: # %bb.0: 3481; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] 3482; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3483; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3484; 3485; AVX512-LABEL: test_mm_unpackhi_ps: 3486; AVX512: # %bb.0: 3487; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] 3488; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3489; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3490 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 3491 ret <4 x float> %res 3492} 3493 3494define <4 x float> @test_mm_unpacklo_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3495; SSE-LABEL: test_mm_unpacklo_ps: 3496; SSE: # %bb.0: 3497; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] 3498; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3499; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3500; 3501; AVX1-LABEL: test_mm_unpacklo_ps: 3502; AVX1: # %bb.0: 3503; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1] 3504; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3505; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3506; 3507; AVX512-LABEL: test_mm_unpacklo_ps: 3508; AVX512: # %bb.0: 3509; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] 3510; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3511; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3512 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 3513 ret <4 x float> %res 3514} 3515 3516define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 3517; SSE-LABEL: test_mm_xor_ps: 3518; SSE: # %bb.0: 3519; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] 3520; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3521; 3522; AVX1-LABEL: test_mm_xor_ps: 3523; AVX1: # %bb.0: 3524; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] 3525; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3526; 3527; AVX512-LABEL: test_mm_xor_ps: 3528; AVX512: # %bb.0: 3529; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] 3530; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] 3531 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 3532 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 3533 %res = xor <4 x i32> %arg0, %arg1 3534 %bc = bitcast <4 x i32> %res to <4 x float> 3535 ret <4 x float> %bc 3536} 3537 3538!0 = !{i32 1} 3539