1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 3 4define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 5; GFX10-LABEL: sample_d_1d: 6; GFX10: ; %bb.0: ; %main_body 7; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff 8; GFX10-NEXT: s_lshl_b32 s12, s0, 16 9; GFX10-NEXT: ; implicit-def: $vcc_hi 10; GFX10-NEXT: v_and_or_b32 v0, v0, v3, s12 11; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s12 12; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 13; GFX10-NEXT: s_waitcnt vmcnt(0) 14; GFX10-NEXT: ; return to shader part epilog 15main_body: 16 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 17 ret <4 x float> %v 18} 19 20define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 21; GFX10-LABEL: sample_d_2d: 22; GFX10: ; %bb.0: ; %main_body 23; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 24; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 25; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 26; GFX10-NEXT: ; implicit-def: $vcc_hi 27; GFX10-NEXT: v_and_or_b32 v3, v2, v6, v3 28; GFX10-NEXT: v_and_or_b32 v10, v0, v6, v1 29; GFX10-NEXT: image_sample_d_g16 v[0:3], [v10, v3, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 30; GFX10-NEXT: s_waitcnt vmcnt(0) 31; GFX10-NEXT: ; return to shader part epilog 32main_body: 33 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 34 ret <4 x float> %v 35} 36 37define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { 38; GFX10-LABEL: sample_d_3d: 39; GFX10: ; %bb.0: ; %main_body 40; GFX10-NEXT: v_mov_b32_e32 v11, 0xffff 41; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 42; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 43; GFX10-NEXT: s_lshl_b32 s12, s0, 16 44; GFX10-NEXT: ; implicit-def: $vcc_hi 45; GFX10-NEXT: v_and_or_b32 v0, v0, v11, v1 46; GFX10-NEXT: v_and_or_b32 v1, v2, v11, s12 47; GFX10-NEXT: v_and_or_b32 v2, v3, v11, v4 48; GFX10-NEXT: v_and_or_b32 v3, v5, v11, s12 49; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 50; GFX10-NEXT: s_waitcnt vmcnt(0) 51; GFX10-NEXT: ; return to shader part epilog 52main_body: 53 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 54 ret <4 x float> %v 55} 56 57define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 58; GFX10-LABEL: sample_c_d_1d: 59; GFX10: ; %bb.0: ; %main_body 60; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 61; GFX10-NEXT: s_lshl_b32 s12, s0, 16 62; GFX10-NEXT: ; implicit-def: $vcc_hi 63; GFX10-NEXT: v_and_or_b32 v1, v1, v4, s12 64; GFX10-NEXT: v_and_or_b32 v2, v2, v4, s12 65; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 66; GFX10-NEXT: s_waitcnt vmcnt(0) 67; GFX10-NEXT: ; return to shader part epilog 68main_body: 69 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 70 ret <4 x float> %v 71} 72 73define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 74; GFX10-LABEL: sample_c_d_2d: 75; GFX10: ; %bb.0: ; %main_body 76; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 77; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 78; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 79; GFX10-NEXT: ; implicit-def: $vcc_hi 80; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v2 81; GFX10-NEXT: v_and_or_b32 v3, v3, v7, v4 82; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 83; GFX10-NEXT: s_waitcnt vmcnt(0) 84; GFX10-NEXT: ; return to shader part epilog 85main_body: 86 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 87 ret <4 x float> %v 88} 89 90define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 91; GFX10-LABEL: sample_d_cl_1d: 92; GFX10: ; %bb.0: ; %main_body 93; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 94; GFX10-NEXT: s_lshl_b32 s12, s0, 16 95; GFX10-NEXT: ; implicit-def: $vcc_hi 96; GFX10-NEXT: v_and_or_b32 v0, v0, v7, s12 97; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12 98; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 99; GFX10-NEXT: s_waitcnt vmcnt(0) 100; GFX10-NEXT: ; return to shader part epilog 101main_body: 102 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 103 ret <4 x float> %v 104} 105 106define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 107; GFX10-LABEL: sample_d_cl_2d: 108; GFX10: ; %bb.0: ; %main_body 109; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 110; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3 111; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 112; GFX10-NEXT: ; implicit-def: $vcc_hi 113; GFX10-NEXT: v_and_or_b32 v3, v2, v7, v9 114; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v1 115; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 116; GFX10-NEXT: s_waitcnt vmcnt(0) 117; GFX10-NEXT: ; return to shader part epilog 118main_body: 119 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 120 ret <4 x float> %v 121} 122 123define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 124; GFX10-LABEL: sample_c_d_cl_1d: 125; GFX10: ; %bb.0: ; %main_body 126; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 127; GFX10-NEXT: s_lshl_b32 s12, s0, 16 128; GFX10-NEXT: ; implicit-def: $vcc_hi 129; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12 130; GFX10-NEXT: v_and_or_b32 v2, v2, v7, s12 131; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 132; GFX10-NEXT: s_waitcnt vmcnt(0) 133; GFX10-NEXT: ; return to shader part epilog 134main_body: 135 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 136 ret <4 x float> %v 137} 138 139define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 140; GFX10-LABEL: sample_c_d_cl_2d: 141; GFX10: ; %bb.0: ; %main_body 142; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff 143; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 144; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v4 145; GFX10-NEXT: ; implicit-def: $vcc_hi 146; GFX10-NEXT: v_and_or_b32 v1, v1, v8, v2 147; GFX10-NEXT: v_and_or_b32 v2, v3, v8, v10 148; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 149; GFX10-NEXT: s_waitcnt vmcnt(0) 150; GFX10-NEXT: ; return to shader part epilog 151main_body: 152 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 153 ret <4 x float> %v 154} 155 156define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 157; GFX10-LABEL: sample_cd_1d: 158; GFX10: ; %bb.0: ; %main_body 159; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff 160; GFX10-NEXT: s_lshl_b32 s12, s0, 16 161; GFX10-NEXT: ; implicit-def: $vcc_hi 162; GFX10-NEXT: v_and_or_b32 v0, v0, v3, s12 163; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s12 164; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 165; GFX10-NEXT: s_waitcnt vmcnt(0) 166; GFX10-NEXT: ; return to shader part epilog 167main_body: 168 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 169 ret <4 x float> %v 170} 171 172define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 173; GFX10-LABEL: sample_cd_2d: 174; GFX10: ; %bb.0: ; %main_body 175; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 176; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 177; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 178; GFX10-NEXT: ; implicit-def: $vcc_hi 179; GFX10-NEXT: v_and_or_b32 v3, v2, v6, v3 180; GFX10-NEXT: v_and_or_b32 v10, v0, v6, v1 181; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v10, v3, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 182; GFX10-NEXT: s_waitcnt vmcnt(0) 183; GFX10-NEXT: ; return to shader part epilog 184main_body: 185 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 186 ret <4 x float> %v 187} 188 189define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 190; GFX10-LABEL: sample_c_cd_1d: 191; GFX10: ; %bb.0: ; %main_body 192; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 193; GFX10-NEXT: s_lshl_b32 s12, s0, 16 194; GFX10-NEXT: ; implicit-def: $vcc_hi 195; GFX10-NEXT: v_and_or_b32 v1, v1, v4, s12 196; GFX10-NEXT: v_and_or_b32 v2, v2, v4, s12 197; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 198; GFX10-NEXT: s_waitcnt vmcnt(0) 199; GFX10-NEXT: ; return to shader part epilog 200main_body: 201 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 202 ret <4 x float> %v 203} 204 205define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 206; GFX10-LABEL: sample_c_cd_2d: 207; GFX10: ; %bb.0: ; %main_body 208; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 209; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 210; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 211; GFX10-NEXT: ; implicit-def: $vcc_hi 212; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v2 213; GFX10-NEXT: v_and_or_b32 v3, v3, v7, v4 214; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 215; GFX10-NEXT: s_waitcnt vmcnt(0) 216; GFX10-NEXT: ; return to shader part epilog 217main_body: 218 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 219 ret <4 x float> %v 220} 221 222define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 223; GFX10-LABEL: sample_cd_cl_1d: 224; GFX10: ; %bb.0: ; %main_body 225; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 226; GFX10-NEXT: s_lshl_b32 s12, s0, 16 227; GFX10-NEXT: ; implicit-def: $vcc_hi 228; GFX10-NEXT: v_and_or_b32 v0, v0, v7, s12 229; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12 230; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 231; GFX10-NEXT: s_waitcnt vmcnt(0) 232; GFX10-NEXT: ; return to shader part epilog 233main_body: 234 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 235 ret <4 x float> %v 236} 237 238define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 239; GFX10-LABEL: sample_cd_cl_2d: 240; GFX10: ; %bb.0: ; %main_body 241; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 242; GFX10-NEXT: v_lshlrev_b32_e32 v9, 16, v3 243; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 244; GFX10-NEXT: ; implicit-def: $vcc_hi 245; GFX10-NEXT: v_and_or_b32 v3, v2, v7, v9 246; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v1 247; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v3, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 248; GFX10-NEXT: s_waitcnt vmcnt(0) 249; GFX10-NEXT: ; return to shader part epilog 250main_body: 251 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 252 ret <4 x float> %v 253} 254 255define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 256; GFX10-LABEL: sample_c_cd_cl_1d: 257; GFX10: ; %bb.0: ; %main_body 258; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 259; GFX10-NEXT: s_lshl_b32 s12, s0, 16 260; GFX10-NEXT: ; implicit-def: $vcc_hi 261; GFX10-NEXT: v_and_or_b32 v1, v1, v7, s12 262; GFX10-NEXT: v_and_or_b32 v2, v2, v7, s12 263; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 264; GFX10-NEXT: s_waitcnt vmcnt(0) 265; GFX10-NEXT: ; return to shader part epilog 266main_body: 267 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 268 ret <4 x float> %v 269} 270 271define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 272; GFX10-LABEL: sample_c_cd_cl_2d: 273; GFX10: ; %bb.0: ; %main_body 274; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff 275; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 276; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v4 277; GFX10-NEXT: ; implicit-def: $vcc_hi 278; GFX10-NEXT: v_and_or_b32 v1, v1, v8, v2 279; GFX10-NEXT: v_and_or_b32 v2, v3, v8, v10 280; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 281; GFX10-NEXT: s_waitcnt vmcnt(0) 282; GFX10-NEXT: ; return to shader part epilog 283main_body: 284 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 285 ret <4 x float> %v 286} 287 288define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 289; GFX10-LABEL: sample_c_d_o_2darray_V1: 290; GFX10: ; %bb.0: ; %main_body 291; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff 292; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 293; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v5 294; GFX10-NEXT: ; implicit-def: $vcc_hi 295; GFX10-NEXT: v_and_or_b32 v2, v2, v9, v3 296; GFX10-NEXT: v_and_or_b32 v3, v4, v9, v11 297; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 298; GFX10-NEXT: s_waitcnt vmcnt(0) 299; GFX10-NEXT: ; return to shader part epilog 300main_body: 301 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 302 ret float %v 303} 304 305define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 306; GFX10-LABEL: sample_c_d_o_2darray_V2: 307; GFX10: ; %bb.0: ; %main_body 308; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff 309; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 310; GFX10-NEXT: v_lshlrev_b32_e32 v11, 16, v5 311; GFX10-NEXT: ; implicit-def: $vcc_hi 312; GFX10-NEXT: v_and_or_b32 v2, v2, v9, v3 313; GFX10-NEXT: v_and_or_b32 v3, v4, v9, v11 314; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 315; GFX10-NEXT: s_waitcnt vmcnt(0) 316; GFX10-NEXT: ; return to shader part epilog 317main_body: 318 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 319 ret <2 x float> %v 320} 321 322declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 323declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 324declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 325declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 326declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 327declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 328declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 329declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 330declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 331 332declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 333declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 334declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 335declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 336declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 337declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 338declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 339declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 340 341declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 342declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 343 344attributes #0 = { nounwind } 345attributes #1 = { nounwind readonly } 346attributes #2 = { nounwind readnone } 347