1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 4 5declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, 6 <4 x i32>, <4 x float>, i8) nounwind readonly 7 8define <4 x float> @test_x86_avx2_gather_d_ps(i8* %a1, <4 x i32> %idx, <4 x float> %mask) { 9; X32-LABEL: test_x86_avx2_gather_d_ps: 10; X32: # %bb.0: 11; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 12; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2 13; X32-NEXT: vgatherdps %xmm1, (%eax,%xmm0,2), %xmm2 14; X32-NEXT: vmovaps %xmm2, %xmm0 15; X32-NEXT: retl 16; 17; X64-LABEL: test_x86_avx2_gather_d_ps: 18; X64: # %bb.0: 19; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 20; X64-NEXT: vgatherdps %xmm1, (%rdi,%xmm0,2), %xmm2 21; X64-NEXT: vmovaps %xmm2, %xmm0 22; X64-NEXT: retq 23 %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, 24 i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; 25 ret <4 x float> %res 26} 27 28declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, 29 <4 x i32>, <2 x double>, i8) nounwind readonly 30 31define <2 x double> @test_x86_avx2_gather_d_pd(i8* %a1, <4 x i32> %idx, <2 x double> %mask) { 32; X32-LABEL: test_x86_avx2_gather_d_pd: 33; X32: # %bb.0: 34; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 35; X32-NEXT: vxorpd %xmm2, %xmm2, %xmm2 36; X32-NEXT: vgatherdpd %xmm1, (%eax,%xmm0,2), %xmm2 37; X32-NEXT: vmovapd %xmm2, %xmm0 38; X32-NEXT: retl 39; 40; X64-LABEL: test_x86_avx2_gather_d_pd: 41; X64: # %bb.0: 42; X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2 43; X64-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0,2), %xmm2 44; X64-NEXT: vmovapd %xmm2, %xmm0 45; X64-NEXT: retq 46 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> undef, 47 i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ; 48 ret <2 x double> %res 49} 50 51declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, 52 <8 x i32>, <8 x float>, i8) nounwind readonly 53 54define <8 x float> @test_x86_avx2_gather_d_ps_256(i8* %a1, <8 x i32> %idx, <8 x float> %mask) { 55; X32-LABEL: test_x86_avx2_gather_d_ps_256: 56; X32: # %bb.0: 57; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 58; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2 59; X32-NEXT: vgatherdps %ymm1, (%eax,%ymm0,4), %ymm2 60; X32-NEXT: vmovaps %ymm2, %ymm0 61; X32-NEXT: retl 62; 63; X64-LABEL: test_x86_avx2_gather_d_ps_256: 64; X64: # %bb.0: 65; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 66; X64-NEXT: vgatherdps %ymm1, (%rdi,%ymm0,4), %ymm2 67; X64-NEXT: vmovaps %ymm2, %ymm0 68; X64-NEXT: retq 69 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, 70 i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 4) ; 71 ret <8 x float> %res 72} 73 74declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, 75 <4 x i32>, <4 x double>, i8) nounwind readonly 76 77define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1, <4 x i32> %idx, <4 x double> %mask) { 78; X32-LABEL: test_x86_avx2_gather_d_pd_256: 79; X32: # %bb.0: 80; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 81; X32-NEXT: vxorpd %xmm2, %xmm2, %xmm2 82; X32-NEXT: vgatherdpd %ymm1, (%eax,%xmm0,8), %ymm2 83; X32-NEXT: vmovapd %ymm2, %ymm0 84; X32-NEXT: retl 85; 86; X64-LABEL: test_x86_avx2_gather_d_pd_256: 87; X64: # %bb.0: 88; X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2 89; X64-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0,8), %ymm2 90; X64-NEXT: vmovapd %ymm2, %ymm0 91; X64-NEXT: retq 92 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, 93 i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 8) ; 94 ret <4 x double> %res 95} 96 97define <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) { 98; X32-LABEL: test_mm_i32gather_epi32: 99; X32: # %bb.0: 100; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 101; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 102; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 103; X32-NEXT: vpgatherdd %xmm2, (%eax,%xmm0,2), %xmm1 104; X32-NEXT: vmovdqa %xmm1, %xmm0 105; X32-NEXT: retl 106; 107; X64-LABEL: test_mm_i32gather_epi32: 108; X64: # %bb.0: 109; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 110; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 111; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm0,2), %xmm1 112; X64-NEXT: vmovdqa %xmm1, %xmm0 113; X64-NEXT: retq 114 %arg0 = bitcast i32 *%a0 to i8* 115 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 116 %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32> 117 %call = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, i8* %arg0, <4 x i32> %arg1, <4 x i32> %mask, i8 2) 118 %bc = bitcast <4 x i32> %call to <2 x i64> 119 ret <2 x i64> %bc 120} 121declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly 122 123define <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) { 124; X32-LABEL: test_mm_i32gather_pd: 125; X32: # %bb.0: 126; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 127; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 128; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 129; X32-NEXT: vgatherdpd %xmm2, (%eax,%xmm0,2), %xmm1 130; X32-NEXT: vmovapd %xmm1, %xmm0 131; X32-NEXT: retl 132; 133; X64-LABEL: test_mm_i32gather_pd: 134; X64: # %bb.0: 135; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 136; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 137; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1 138; X64-NEXT: vmovapd %xmm1, %xmm0 139; X64-NEXT: retq 140 %arg0 = bitcast double *%a0 to i8* 141 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 142 %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer 143 %sext = sext <2 x i1> %cmp to <2 x i64> 144 %mask = bitcast <2 x i64> %sext to <2 x double> 145 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %arg0, <4 x i32> %arg1, <2 x double> %mask, i8 2) 146 ret <2 x double> %res 147} 148 149@x = dso_local global [1024 x float] zeroinitializer, align 16 150 151define <4 x float> @gather_global(<4 x i64>, i32* nocapture readnone) { 152; X32-LABEL: gather_global: 153; X32: # %bb.0: 154; X32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 155; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 156; X32-NEXT: vgatherqps %xmm2, x(,%ymm0,4), %xmm1 157; X32-NEXT: vmovaps %xmm1, %xmm0 158; X32-NEXT: vzeroupper 159; X32-NEXT: retl 160; 161; X64-LABEL: gather_global: 162; X64: # %bb.0: 163; X64-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 164; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 165; X64-NEXT: vgatherqps %xmm2, x(,%ymm0,4), %xmm1 166; X64-NEXT: vmovaps %xmm1, %xmm0 167; X64-NEXT: vzeroupper 168; X64-NEXT: retq 169 %3 = tail call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* bitcast ([1024 x float]* @x to i8*), <4 x i64> %0, <4 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, i8 4) 170 ret <4 x float> %3 171} 172declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) 173