1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 3 4define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 5; GFX10-LABEL: sample_d_1d: 6; GFX10: ; %bb.0: ; %main_body 7; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff 8; GFX10-NEXT: s_lshl_b32 s12, s0, 16 9; GFX10-NEXT: v_and_or_b32 v0, v0, v3, s12 10; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s12 11; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 12; GFX10-NEXT: s_waitcnt vmcnt(0) 13; GFX10-NEXT: ; return to shader part epilog 14main_body: 15 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 16 ret <4 x float> %v 17} 18 19define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 20; GFX10-LABEL: sample_d_2d: 21; GFX10: ; %bb.0: ; %main_body 22; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 23; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 24; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 25; GFX10-NEXT: v_and_or_b32 v0, v0, v6, v1 26; GFX10-NEXT: v_and_or_b32 v1, v2, v6, v3 27; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 28; GFX10-NEXT: s_waitcnt vmcnt(0) 29; GFX10-NEXT: ; return to shader part epilog 30main_body: 31 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 32 ret <4 x float> %v 33} 34 35define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { 36; GFX10-LABEL: sample_d_3d: 37; GFX10: ; %bb.0: ; %main_body 38; GFX10-NEXT: v_mov_b32_e32 v9, v2 39; GFX10-NEXT: v_mov_b32_e32 v10, v3 40; GFX10-NEXT: v_mov_b32_e32 v11, 0xffff 41; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 42; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 43; GFX10-NEXT: s_lshl_b32 s12, s0, 16 44; GFX10-NEXT: v_and_or_b32 v3, v9, v11, s12 45; GFX10-NEXT: v_and_or_b32 v2, v0, v11, v1 46; GFX10-NEXT: v_and_or_b32 v4, v10, v11, v4 47; GFX10-NEXT: v_and_or_b32 v5, v5, v11, s12 48; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 49; GFX10-NEXT: s_waitcnt vmcnt(0) 50; GFX10-NEXT: ; return to shader part epilog 51main_body: 52 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 53 ret <4 x float> %v 54} 55 56define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 57; GFX10-LABEL: sample_c_d_1d: 58; GFX10: ; %bb.0: ; %main_body 59; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 60; GFX10-NEXT: s_lshl_b32 s12, s0, 16 61; GFX10-NEXT: v_and_or_b32 v1, v1, v4, s12 62; GFX10-NEXT: v_and_or_b32 v2, v2, v4, s12 63; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 64; GFX10-NEXT: s_waitcnt vmcnt(0) 65; GFX10-NEXT: ; return to shader part epilog 66main_body: 67 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 68 ret <4 x float> %v 69} 70 71define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 72; GFX10-LABEL: sample_c_d_2d: 73; GFX10: ; %bb.0: ; %main_body 74; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 75; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 76; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 77; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v2 78; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 79; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 80; GFX10-NEXT: s_waitcnt vmcnt(0) 81; GFX10-NEXT: ; return to shader part epilog 82main_body: 83 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 84 ret <4 x float> %v 85} 86 87define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 88; GFX10-LABEL: sample_d_cl_1d: 89; GFX10: ; %bb.0: ; %main_body 90; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 91; GFX10-NEXT: s_lshl_b32 s12, s0, 16 92; GFX10-NEXT: v_and_or_b32 v0, v0, v4, s12 93; GFX10-NEXT: v_and_or_b32 v1, v1, v4, s12 94; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 95; GFX10-NEXT: s_waitcnt vmcnt(0) 96; GFX10-NEXT: ; return to shader part epilog 97main_body: 98 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 99 ret <4 x float> %v 100} 101 102define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 103; GFX10-LABEL: sample_d_cl_2d: 104; GFX10: ; %bb.0: ; %main_body 105; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 106; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 107; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 108; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v1 109; GFX10-NEXT: v_and_or_b32 v1, v2, v7, v3 110; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 111; GFX10-NEXT: s_waitcnt vmcnt(0) 112; GFX10-NEXT: ; return to shader part epilog 113main_body: 114 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 115 ret <4 x float> %v 116} 117 118define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 119; GFX10-LABEL: sample_c_d_cl_1d: 120; GFX10: ; %bb.0: ; %main_body 121; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 122; GFX10-NEXT: s_lshl_b32 s12, s0, 16 123; GFX10-NEXT: v_and_or_b32 v1, v1, v5, s12 124; GFX10-NEXT: v_and_or_b32 v2, v2, v5, s12 125; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 126; GFX10-NEXT: s_waitcnt vmcnt(0) 127; GFX10-NEXT: ; return to shader part epilog 128main_body: 129 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 130 ret <4 x float> %v 131} 132 133define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 134; GFX10-LABEL: sample_c_d_cl_2d: 135; GFX10: ; %bb.0: ; %main_body 136; GFX10-NEXT: v_mov_b32_e32 v8, v2 137; GFX10-NEXT: v_mov_b32_e32 v9, v3 138; GFX10-NEXT: v_mov_b32_e32 v2, v0 139; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff 140; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 141; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v8 142; GFX10-NEXT: v_and_or_b32 v4, v9, v0, v4 143; GFX10-NEXT: v_and_or_b32 v3, v1, v0, v3 144; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 145; GFX10-NEXT: s_waitcnt vmcnt(0) 146; GFX10-NEXT: ; return to shader part epilog 147main_body: 148 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 149 ret <4 x float> %v 150} 151 152define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 153; GFX10-LABEL: sample_cd_1d: 154; GFX10: ; %bb.0: ; %main_body 155; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff 156; GFX10-NEXT: s_lshl_b32 s12, s0, 16 157; GFX10-NEXT: v_and_or_b32 v0, v0, v3, s12 158; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s12 159; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 160; GFX10-NEXT: s_waitcnt vmcnt(0) 161; GFX10-NEXT: ; return to shader part epilog 162main_body: 163 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 164 ret <4 x float> %v 165} 166 167define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 168; GFX10-LABEL: sample_cd_2d: 169; GFX10: ; %bb.0: ; %main_body 170; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 171; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 172; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 173; GFX10-NEXT: v_and_or_b32 v0, v0, v6, v1 174; GFX10-NEXT: v_and_or_b32 v1, v2, v6, v3 175; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 176; GFX10-NEXT: s_waitcnt vmcnt(0) 177; GFX10-NEXT: ; return to shader part epilog 178main_body: 179 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 180 ret <4 x float> %v 181} 182 183define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 184; GFX10-LABEL: sample_c_cd_1d: 185; GFX10: ; %bb.0: ; %main_body 186; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 187; GFX10-NEXT: s_lshl_b32 s12, s0, 16 188; GFX10-NEXT: v_and_or_b32 v1, v1, v4, s12 189; GFX10-NEXT: v_and_or_b32 v2, v2, v4, s12 190; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 191; GFX10-NEXT: s_waitcnt vmcnt(0) 192; GFX10-NEXT: ; return to shader part epilog 193main_body: 194 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 195 ret <4 x float> %v 196} 197 198define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 199; GFX10-LABEL: sample_c_cd_2d: 200; GFX10: ; %bb.0: ; %main_body 201; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 202; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 203; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 204; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v2 205; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 206; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 207; GFX10-NEXT: s_waitcnt vmcnt(0) 208; GFX10-NEXT: ; return to shader part epilog 209main_body: 210 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 211 ret <4 x float> %v 212} 213 214define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 215; GFX10-LABEL: sample_cd_cl_1d: 216; GFX10: ; %bb.0: ; %main_body 217; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 218; GFX10-NEXT: s_lshl_b32 s12, s0, 16 219; GFX10-NEXT: v_and_or_b32 v0, v0, v4, s12 220; GFX10-NEXT: v_and_or_b32 v1, v1, v4, s12 221; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 222; GFX10-NEXT: s_waitcnt vmcnt(0) 223; GFX10-NEXT: ; return to shader part epilog 224main_body: 225 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 226 ret <4 x float> %v 227} 228 229define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 230; GFX10-LABEL: sample_cd_cl_2d: 231; GFX10: ; %bb.0: ; %main_body 232; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 233; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 234; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 235; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v1 236; GFX10-NEXT: v_and_or_b32 v1, v2, v7, v3 237; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 238; GFX10-NEXT: s_waitcnt vmcnt(0) 239; GFX10-NEXT: ; return to shader part epilog 240main_body: 241 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 242 ret <4 x float> %v 243} 244 245define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 246; GFX10-LABEL: sample_c_cd_cl_1d: 247; GFX10: ; %bb.0: ; %main_body 248; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 249; GFX10-NEXT: s_lshl_b32 s12, s0, 16 250; GFX10-NEXT: v_and_or_b32 v1, v1, v5, s12 251; GFX10-NEXT: v_and_or_b32 v2, v2, v5, s12 252; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 253; GFX10-NEXT: s_waitcnt vmcnt(0) 254; GFX10-NEXT: ; return to shader part epilog 255main_body: 256 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 257 ret <4 x float> %v 258} 259 260define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 261; GFX10-LABEL: sample_c_cd_cl_2d: 262; GFX10: ; %bb.0: ; %main_body 263; GFX10-NEXT: v_mov_b32_e32 v8, v2 264; GFX10-NEXT: v_mov_b32_e32 v9, v3 265; GFX10-NEXT: v_mov_b32_e32 v2, v0 266; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff 267; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 268; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v8 269; GFX10-NEXT: v_and_or_b32 v4, v9, v0, v4 270; GFX10-NEXT: v_and_or_b32 v3, v1, v0, v3 271; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 272; GFX10-NEXT: s_waitcnt vmcnt(0) 273; GFX10-NEXT: ; return to shader part epilog 274main_body: 275 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 276 ret <4 x float> %v 277} 278 279define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 280; GFX10-LABEL: sample_c_d_o_2darray_V1: 281; GFX10: ; %bb.0: ; %main_body 282; GFX10-NEXT: v_mov_b32_e32 v9, v3 283; GFX10-NEXT: v_mov_b32_e32 v10, v2 284; GFX10-NEXT: v_mov_b32_e32 v11, v4 285; GFX10-NEXT: v_mov_b32_e32 v2, v0 286; GFX10-NEXT: v_mov_b32_e32 v3, v1 287; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff 288; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v9 289; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 290; GFX10-NEXT: v_and_or_b32 v4, v10, v0, v1 291; GFX10-NEXT: v_and_or_b32 v5, v11, v0, v5 292; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 293; GFX10-NEXT: s_waitcnt vmcnt(0) 294; GFX10-NEXT: ; return to shader part epilog 295main_body: 296 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 297 ret float %v 298} 299 300define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 301; GFX10-LABEL: sample_c_d_o_2darray_V2: 302; GFX10: ; %bb.0: ; %main_body 303; GFX10-NEXT: v_mov_b32_e32 v9, v3 304; GFX10-NEXT: v_mov_b32_e32 v10, v2 305; GFX10-NEXT: v_mov_b32_e32 v11, v4 306; GFX10-NEXT: v_mov_b32_e32 v2, v0 307; GFX10-NEXT: v_mov_b32_e32 v3, v1 308; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff 309; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v9 310; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 311; GFX10-NEXT: v_and_or_b32 v4, v10, v0, v1 312; GFX10-NEXT: v_and_or_b32 v5, v11, v0, v5 313; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 314; GFX10-NEXT: s_waitcnt vmcnt(0) 315; GFX10-NEXT: ; return to shader part epilog 316main_body: 317 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 318 ret <2 x float> %v 319} 320 321declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 322declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 323declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 324declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 325declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 326declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 327declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 328declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 329declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 330 331declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 332declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 333declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 334declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 335declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 336declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 337declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 338declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 339 340declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 341declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 342 343attributes #0 = { nounwind } 344attributes #1 = { nounwind readonly } 345attributes #2 = { nounwind readnone } 346