1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10NSA %s 4 5define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 6; GFX6-LABEL: gather4_2d: 7; GFX6: ; %bb.0: ; %main_body 8; GFX6-NEXT: s_mov_b64 s[14:15], exec 9; GFX6-NEXT: s_mov_b32 s0, s2 10; GFX6-NEXT: s_mov_b32 s1, s3 11; GFX6-NEXT: s_mov_b32 s2, s4 12; GFX6-NEXT: s_mov_b32 s3, s5 13; GFX6-NEXT: s_mov_b32 s4, s6 14; GFX6-NEXT: s_mov_b32 s5, s7 15; GFX6-NEXT: s_mov_b32 s6, s8 16; GFX6-NEXT: s_mov_b32 s7, s9 17; GFX6-NEXT: s_mov_b32 s8, s10 18; GFX6-NEXT: s_mov_b32 s9, s11 19; GFX6-NEXT: s_mov_b32 s10, s12 20; GFX6-NEXT: s_mov_b32 s11, s13 21; GFX6-NEXT: s_wqm_b64 exec, exec 22; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 23; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 24; GFX6-NEXT: s_waitcnt vmcnt(0) 25; GFX6-NEXT: ; return to shader part epilog 26; 27; GFX10NSA-LABEL: gather4_2d: 28; GFX10NSA: ; %bb.0: ; %main_body 29; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 30; GFX10NSA-NEXT: s_mov_b32 s0, s2 31; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 32; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 33; GFX10NSA-NEXT: s_mov_b32 s1, s3 34; GFX10NSA-NEXT: s_mov_b32 s2, s4 35; GFX10NSA-NEXT: s_mov_b32 s3, s5 36; GFX10NSA-NEXT: s_mov_b32 s4, s6 37; GFX10NSA-NEXT: s_mov_b32 s5, s7 38; GFX10NSA-NEXT: s_mov_b32 s6, s8 39; GFX10NSA-NEXT: s_mov_b32 s7, s9 40; GFX10NSA-NEXT: s_mov_b32 s8, s10 41; GFX10NSA-NEXT: s_mov_b32 s9, s11 42; GFX10NSA-NEXT: s_mov_b32 s10, s12 43; GFX10NSA-NEXT: s_mov_b32 s11, s13 44; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 45; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 46; GFX10NSA-NEXT: ; return to shader part epilog 47main_body: 48 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 49 ret <4 x float> %v 50} 51 52define amdgpu_ps <4 x float> @gather4_2d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 53; GFX6-LABEL: gather4_2d_tfe: 54; GFX6: ; %bb.0: ; %main_body 55; GFX6-NEXT: s_mov_b64 s[14:15], exec 56; GFX6-NEXT: s_mov_b32 s0, s2 57; GFX6-NEXT: s_mov_b32 s1, s3 58; GFX6-NEXT: s_mov_b32 s2, s4 59; GFX6-NEXT: s_mov_b32 s3, s5 60; GFX6-NEXT: s_mov_b32 s4, s6 61; GFX6-NEXT: s_mov_b32 s5, s7 62; GFX6-NEXT: s_mov_b32 s6, s8 63; GFX6-NEXT: s_mov_b32 s7, s9 64; GFX6-NEXT: s_mov_b32 s8, s10 65; GFX6-NEXT: s_mov_b32 s9, s11 66; GFX6-NEXT: s_mov_b32 s10, s12 67; GFX6-NEXT: s_mov_b32 s11, s13 68; GFX6-NEXT: s_wqm_b64 exec, exec 69; GFX6-NEXT: v_mov_b32_e32 v5, v0 70; GFX6-NEXT: v_mov_b32_e32 v0, 0 71; GFX6-NEXT: v_mov_b32_e32 v6, v1 72; GFX6-NEXT: v_mov_b32_e32 v1, v0 73; GFX6-NEXT: v_mov_b32_e32 v2, v0 74; GFX6-NEXT: v_mov_b32_e32 v3, v0 75; GFX6-NEXT: v_mov_b32_e32 v4, v0 76; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 77; GFX6-NEXT: image_gather4 v[0:4], v[5:6], s[0:7], s[8:11] dmask:0x1 tfe 78; GFX6-NEXT: s_waitcnt vmcnt(0) 79; GFX6-NEXT: ; return to shader part epilog 80; 81; GFX10NSA-LABEL: gather4_2d_tfe: 82; GFX10NSA: ; %bb.0: ; %main_body 83; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo 84; GFX10NSA-NEXT: s_mov_b32 s0, s2 85; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 86; GFX10NSA-NEXT: v_mov_b32_e32 v5, v0 87; GFX10NSA-NEXT: v_mov_b32_e32 v0, 0 88; GFX10NSA-NEXT: v_mov_b32_e32 v6, v1 89; GFX10NSA-NEXT: s_mov_b32 s1, s3 90; GFX10NSA-NEXT: s_mov_b32 s2, s4 91; GFX10NSA-NEXT: s_mov_b32 s3, s5 92; GFX10NSA-NEXT: s_mov_b32 s4, s6 93; GFX10NSA-NEXT: s_mov_b32 s5, s7 94; GFX10NSA-NEXT: s_mov_b32 s6, s8 95; GFX10NSA-NEXT: s_mov_b32 s7, s9 96; GFX10NSA-NEXT: s_mov_b32 s8, s10 97; GFX10NSA-NEXT: s_mov_b32 s9, s11 98; GFX10NSA-NEXT: s_mov_b32 s10, s12 99; GFX10NSA-NEXT: s_mov_b32 s11, s13 100; GFX10NSA-NEXT: v_mov_b32_e32 v1, v0 101; GFX10NSA-NEXT: v_mov_b32_e32 v2, v0 102; GFX10NSA-NEXT: v_mov_b32_e32 v3, v0 103; GFX10NSA-NEXT: v_mov_b32_e32 v4, v0 104; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 105; GFX10NSA-NEXT: image_gather4 v[0:4], v[5:6], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe 106; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 107; GFX10NSA-NEXT: ; return to shader part epilog 108main_body: 109 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) 110 %r = extractvalue { <4 x float>, i32 } %v, 0 111 ret <4 x float> %r 112} 113 114define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { 115; GFX6-LABEL: gather4_cube: 116; GFX6: ; %bb.0: ; %main_body 117; GFX6-NEXT: s_mov_b64 s[14:15], exec 118; GFX6-NEXT: s_mov_b32 s0, s2 119; GFX6-NEXT: s_mov_b32 s1, s3 120; GFX6-NEXT: s_mov_b32 s2, s4 121; GFX6-NEXT: s_mov_b32 s3, s5 122; GFX6-NEXT: s_mov_b32 s4, s6 123; GFX6-NEXT: s_mov_b32 s5, s7 124; GFX6-NEXT: s_mov_b32 s6, s8 125; GFX6-NEXT: s_mov_b32 s7, s9 126; GFX6-NEXT: s_mov_b32 s8, s10 127; GFX6-NEXT: s_mov_b32 s9, s11 128; GFX6-NEXT: s_mov_b32 s10, s12 129; GFX6-NEXT: s_mov_b32 s11, s13 130; GFX6-NEXT: s_wqm_b64 exec, exec 131; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 132; GFX6-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da 133; GFX6-NEXT: s_waitcnt vmcnt(0) 134; GFX6-NEXT: ; return to shader part epilog 135; 136; GFX10NSA-LABEL: gather4_cube: 137; GFX10NSA: ; %bb.0: ; %main_body 138; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 139; GFX10NSA-NEXT: s_mov_b32 s0, s2 140; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 141; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 142; GFX10NSA-NEXT: s_mov_b32 s1, s3 143; GFX10NSA-NEXT: s_mov_b32 s2, s4 144; GFX10NSA-NEXT: s_mov_b32 s3, s5 145; GFX10NSA-NEXT: s_mov_b32 s4, s6 146; GFX10NSA-NEXT: s_mov_b32 s5, s7 147; GFX10NSA-NEXT: s_mov_b32 s6, s8 148; GFX10NSA-NEXT: s_mov_b32 s7, s9 149; GFX10NSA-NEXT: s_mov_b32 s8, s10 150; GFX10NSA-NEXT: s_mov_b32 s9, s11 151; GFX10NSA-NEXT: s_mov_b32 s10, s12 152; GFX10NSA-NEXT: s_mov_b32 s11, s13 153; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE 154; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 155; GFX10NSA-NEXT: ; return to shader part epilog 156main_body: 157 %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 1, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 158 ret <4 x float> %v 159} 160 161define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { 162; GFX6-LABEL: gather4_2darray: 163; GFX6: ; %bb.0: ; %main_body 164; GFX6-NEXT: s_mov_b64 s[14:15], exec 165; GFX6-NEXT: s_mov_b32 s0, s2 166; GFX6-NEXT: s_mov_b32 s1, s3 167; GFX6-NEXT: s_mov_b32 s2, s4 168; GFX6-NEXT: s_mov_b32 s3, s5 169; GFX6-NEXT: s_mov_b32 s4, s6 170; GFX6-NEXT: s_mov_b32 s5, s7 171; GFX6-NEXT: s_mov_b32 s6, s8 172; GFX6-NEXT: s_mov_b32 s7, s9 173; GFX6-NEXT: s_mov_b32 s8, s10 174; GFX6-NEXT: s_mov_b32 s9, s11 175; GFX6-NEXT: s_mov_b32 s10, s12 176; GFX6-NEXT: s_mov_b32 s11, s13 177; GFX6-NEXT: s_wqm_b64 exec, exec 178; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 179; GFX6-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da 180; GFX6-NEXT: s_waitcnt vmcnt(0) 181; GFX6-NEXT: ; return to shader part epilog 182; 183; GFX10NSA-LABEL: gather4_2darray: 184; GFX10NSA: ; %bb.0: ; %main_body 185; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 186; GFX10NSA-NEXT: s_mov_b32 s0, s2 187; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 188; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 189; GFX10NSA-NEXT: s_mov_b32 s1, s3 190; GFX10NSA-NEXT: s_mov_b32 s2, s4 191; GFX10NSA-NEXT: s_mov_b32 s3, s5 192; GFX10NSA-NEXT: s_mov_b32 s4, s6 193; GFX10NSA-NEXT: s_mov_b32 s5, s7 194; GFX10NSA-NEXT: s_mov_b32 s6, s8 195; GFX10NSA-NEXT: s_mov_b32 s7, s9 196; GFX10NSA-NEXT: s_mov_b32 s8, s10 197; GFX10NSA-NEXT: s_mov_b32 s9, s11 198; GFX10NSA-NEXT: s_mov_b32 s10, s12 199; GFX10NSA-NEXT: s_mov_b32 s11, s13 200; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 201; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 202; GFX10NSA-NEXT: ; return to shader part epilog 203main_body: 204 %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 1, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 205 ret <4 x float> %v 206} 207 208define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 209; GFX6-LABEL: gather4_c_2d: 210; GFX6: ; %bb.0: ; %main_body 211; GFX6-NEXT: s_mov_b64 s[14:15], exec 212; GFX6-NEXT: s_mov_b32 s0, s2 213; GFX6-NEXT: s_mov_b32 s1, s3 214; GFX6-NEXT: s_mov_b32 s2, s4 215; GFX6-NEXT: s_mov_b32 s3, s5 216; GFX6-NEXT: s_mov_b32 s4, s6 217; GFX6-NEXT: s_mov_b32 s5, s7 218; GFX6-NEXT: s_mov_b32 s6, s8 219; GFX6-NEXT: s_mov_b32 s7, s9 220; GFX6-NEXT: s_mov_b32 s8, s10 221; GFX6-NEXT: s_mov_b32 s9, s11 222; GFX6-NEXT: s_mov_b32 s10, s12 223; GFX6-NEXT: s_mov_b32 s11, s13 224; GFX6-NEXT: s_wqm_b64 exec, exec 225; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 226; GFX6-NEXT: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 227; GFX6-NEXT: s_waitcnt vmcnt(0) 228; GFX6-NEXT: ; return to shader part epilog 229; 230; GFX10NSA-LABEL: gather4_c_2d: 231; GFX10NSA: ; %bb.0: ; %main_body 232; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 233; GFX10NSA-NEXT: s_mov_b32 s0, s2 234; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 235; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 236; GFX10NSA-NEXT: s_mov_b32 s1, s3 237; GFX10NSA-NEXT: s_mov_b32 s2, s4 238; GFX10NSA-NEXT: s_mov_b32 s3, s5 239; GFX10NSA-NEXT: s_mov_b32 s4, s6 240; GFX10NSA-NEXT: s_mov_b32 s5, s7 241; GFX10NSA-NEXT: s_mov_b32 s6, s8 242; GFX10NSA-NEXT: s_mov_b32 s7, s9 243; GFX10NSA-NEXT: s_mov_b32 s8, s10 244; GFX10NSA-NEXT: s_mov_b32 s9, s11 245; GFX10NSA-NEXT: s_mov_b32 s10, s12 246; GFX10NSA-NEXT: s_mov_b32 s11, s13 247; GFX10NSA-NEXT: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 248; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 249; GFX10NSA-NEXT: ; return to shader part epilog 250main_body: 251 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 252 ret <4 x float> %v 253} 254 255define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { 256; GFX6-LABEL: gather4_cl_2d: 257; GFX6: ; %bb.0: ; %main_body 258; GFX6-NEXT: s_mov_b64 s[14:15], exec 259; GFX6-NEXT: s_mov_b32 s0, s2 260; GFX6-NEXT: s_mov_b32 s1, s3 261; GFX6-NEXT: s_mov_b32 s2, s4 262; GFX6-NEXT: s_mov_b32 s3, s5 263; GFX6-NEXT: s_mov_b32 s4, s6 264; GFX6-NEXT: s_mov_b32 s5, s7 265; GFX6-NEXT: s_mov_b32 s6, s8 266; GFX6-NEXT: s_mov_b32 s7, s9 267; GFX6-NEXT: s_mov_b32 s8, s10 268; GFX6-NEXT: s_mov_b32 s9, s11 269; GFX6-NEXT: s_mov_b32 s10, s12 270; GFX6-NEXT: s_mov_b32 s11, s13 271; GFX6-NEXT: s_wqm_b64 exec, exec 272; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 273; GFX6-NEXT: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 274; GFX6-NEXT: s_waitcnt vmcnt(0) 275; GFX6-NEXT: ; return to shader part epilog 276; 277; GFX10NSA-LABEL: gather4_cl_2d: 278; GFX10NSA: ; %bb.0: ; %main_body 279; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 280; GFX10NSA-NEXT: s_mov_b32 s0, s2 281; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 282; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 283; GFX10NSA-NEXT: s_mov_b32 s1, s3 284; GFX10NSA-NEXT: s_mov_b32 s2, s4 285; GFX10NSA-NEXT: s_mov_b32 s3, s5 286; GFX10NSA-NEXT: s_mov_b32 s4, s6 287; GFX10NSA-NEXT: s_mov_b32 s5, s7 288; GFX10NSA-NEXT: s_mov_b32 s6, s8 289; GFX10NSA-NEXT: s_mov_b32 s7, s9 290; GFX10NSA-NEXT: s_mov_b32 s8, s10 291; GFX10NSA-NEXT: s_mov_b32 s9, s11 292; GFX10NSA-NEXT: s_mov_b32 s10, s12 293; GFX10NSA-NEXT: s_mov_b32 s11, s13 294; GFX10NSA-NEXT: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 295; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 296; GFX10NSA-NEXT: ; return to shader part epilog 297main_body: 298 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 1, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 299 ret <4 x float> %v 300} 301 302define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { 303; GFX6-LABEL: gather4_c_cl_2d: 304; GFX6: ; %bb.0: ; %main_body 305; GFX6-NEXT: s_mov_b64 s[14:15], exec 306; GFX6-NEXT: s_mov_b32 s0, s2 307; GFX6-NEXT: s_mov_b32 s1, s3 308; GFX6-NEXT: s_mov_b32 s2, s4 309; GFX6-NEXT: s_mov_b32 s3, s5 310; GFX6-NEXT: s_mov_b32 s4, s6 311; GFX6-NEXT: s_mov_b32 s5, s7 312; GFX6-NEXT: s_mov_b32 s6, s8 313; GFX6-NEXT: s_mov_b32 s7, s9 314; GFX6-NEXT: s_mov_b32 s8, s10 315; GFX6-NEXT: s_mov_b32 s9, s11 316; GFX6-NEXT: s_mov_b32 s10, s12 317; GFX6-NEXT: s_mov_b32 s11, s13 318; GFX6-NEXT: s_wqm_b64 exec, exec 319; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 320; GFX6-NEXT: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 321; GFX6-NEXT: s_waitcnt vmcnt(0) 322; GFX6-NEXT: ; return to shader part epilog 323; 324; GFX10NSA-LABEL: gather4_c_cl_2d: 325; GFX10NSA: ; %bb.0: ; %main_body 326; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 327; GFX10NSA-NEXT: s_mov_b32 s0, s2 328; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 329; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 330; GFX10NSA-NEXT: s_mov_b32 s1, s3 331; GFX10NSA-NEXT: s_mov_b32 s2, s4 332; GFX10NSA-NEXT: s_mov_b32 s3, s5 333; GFX10NSA-NEXT: s_mov_b32 s4, s6 334; GFX10NSA-NEXT: s_mov_b32 s5, s7 335; GFX10NSA-NEXT: s_mov_b32 s6, s8 336; GFX10NSA-NEXT: s_mov_b32 s7, s9 337; GFX10NSA-NEXT: s_mov_b32 s8, s10 338; GFX10NSA-NEXT: s_mov_b32 s9, s11 339; GFX10NSA-NEXT: s_mov_b32 s10, s12 340; GFX10NSA-NEXT: s_mov_b32 s11, s13 341; GFX10NSA-NEXT: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 342; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 343; GFX10NSA-NEXT: ; return to shader part epilog 344main_body: 345 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 346 ret <4 x float> %v 347} 348 349define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { 350; GFX6-LABEL: gather4_b_2d: 351; GFX6: ; %bb.0: ; %main_body 352; GFX6-NEXT: s_mov_b64 s[14:15], exec 353; GFX6-NEXT: s_mov_b32 s0, s2 354; GFX6-NEXT: s_mov_b32 s1, s3 355; GFX6-NEXT: s_mov_b32 s2, s4 356; GFX6-NEXT: s_mov_b32 s3, s5 357; GFX6-NEXT: s_mov_b32 s4, s6 358; GFX6-NEXT: s_mov_b32 s5, s7 359; GFX6-NEXT: s_mov_b32 s6, s8 360; GFX6-NEXT: s_mov_b32 s7, s9 361; GFX6-NEXT: s_mov_b32 s8, s10 362; GFX6-NEXT: s_mov_b32 s9, s11 363; GFX6-NEXT: s_mov_b32 s10, s12 364; GFX6-NEXT: s_mov_b32 s11, s13 365; GFX6-NEXT: s_wqm_b64 exec, exec 366; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 367; GFX6-NEXT: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 368; GFX6-NEXT: s_waitcnt vmcnt(0) 369; GFX6-NEXT: ; return to shader part epilog 370; 371; GFX10NSA-LABEL: gather4_b_2d: 372; GFX10NSA: ; %bb.0: ; %main_body 373; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 374; GFX10NSA-NEXT: s_mov_b32 s0, s2 375; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 376; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 377; GFX10NSA-NEXT: s_mov_b32 s1, s3 378; GFX10NSA-NEXT: s_mov_b32 s2, s4 379; GFX10NSA-NEXT: s_mov_b32 s3, s5 380; GFX10NSA-NEXT: s_mov_b32 s4, s6 381; GFX10NSA-NEXT: s_mov_b32 s5, s7 382; GFX10NSA-NEXT: s_mov_b32 s6, s8 383; GFX10NSA-NEXT: s_mov_b32 s7, s9 384; GFX10NSA-NEXT: s_mov_b32 s8, s10 385; GFX10NSA-NEXT: s_mov_b32 s9, s11 386; GFX10NSA-NEXT: s_mov_b32 s10, s12 387; GFX10NSA-NEXT: s_mov_b32 s11, s13 388; GFX10NSA-NEXT: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 389; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 390; GFX10NSA-NEXT: ; return to shader part epilog 391main_body: 392 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 393 ret <4 x float> %v 394} 395 396define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { 397; GFX6-LABEL: gather4_c_b_2d: 398; GFX6: ; %bb.0: ; %main_body 399; GFX6-NEXT: s_mov_b64 s[14:15], exec 400; GFX6-NEXT: s_mov_b32 s0, s2 401; GFX6-NEXT: s_mov_b32 s1, s3 402; GFX6-NEXT: s_mov_b32 s2, s4 403; GFX6-NEXT: s_mov_b32 s3, s5 404; GFX6-NEXT: s_mov_b32 s4, s6 405; GFX6-NEXT: s_mov_b32 s5, s7 406; GFX6-NEXT: s_mov_b32 s6, s8 407; GFX6-NEXT: s_mov_b32 s7, s9 408; GFX6-NEXT: s_mov_b32 s8, s10 409; GFX6-NEXT: s_mov_b32 s9, s11 410; GFX6-NEXT: s_mov_b32 s10, s12 411; GFX6-NEXT: s_mov_b32 s11, s13 412; GFX6-NEXT: s_wqm_b64 exec, exec 413; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 414; GFX6-NEXT: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 415; GFX6-NEXT: s_waitcnt vmcnt(0) 416; GFX6-NEXT: ; return to shader part epilog 417; 418; GFX10NSA-LABEL: gather4_c_b_2d: 419; GFX10NSA: ; %bb.0: ; %main_body 420; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 421; GFX10NSA-NEXT: s_mov_b32 s0, s2 422; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 423; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 424; GFX10NSA-NEXT: s_mov_b32 s1, s3 425; GFX10NSA-NEXT: s_mov_b32 s2, s4 426; GFX10NSA-NEXT: s_mov_b32 s3, s5 427; GFX10NSA-NEXT: s_mov_b32 s4, s6 428; GFX10NSA-NEXT: s_mov_b32 s5, s7 429; GFX10NSA-NEXT: s_mov_b32 s6, s8 430; GFX10NSA-NEXT: s_mov_b32 s7, s9 431; GFX10NSA-NEXT: s_mov_b32 s8, s10 432; GFX10NSA-NEXT: s_mov_b32 s9, s11 433; GFX10NSA-NEXT: s_mov_b32 s10, s12 434; GFX10NSA-NEXT: s_mov_b32 s11, s13 435; GFX10NSA-NEXT: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 436; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 437; GFX10NSA-NEXT: ; return to shader part epilog 438main_body: 439 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 440 ret <4 x float> %v 441} 442 443define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { 444; GFX6-LABEL: gather4_b_cl_2d: 445; GFX6: ; %bb.0: ; %main_body 446; GFX6-NEXT: s_mov_b64 s[14:15], exec 447; GFX6-NEXT: s_mov_b32 s0, s2 448; GFX6-NEXT: s_mov_b32 s1, s3 449; GFX6-NEXT: s_mov_b32 s2, s4 450; GFX6-NEXT: s_mov_b32 s3, s5 451; GFX6-NEXT: s_mov_b32 s4, s6 452; GFX6-NEXT: s_mov_b32 s5, s7 453; GFX6-NEXT: s_mov_b32 s6, s8 454; GFX6-NEXT: s_mov_b32 s7, s9 455; GFX6-NEXT: s_mov_b32 s8, s10 456; GFX6-NEXT: s_mov_b32 s9, s11 457; GFX6-NEXT: s_mov_b32 s10, s12 458; GFX6-NEXT: s_mov_b32 s11, s13 459; GFX6-NEXT: s_wqm_b64 exec, exec 460; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 461; GFX6-NEXT: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 462; GFX6-NEXT: s_waitcnt vmcnt(0) 463; GFX6-NEXT: ; return to shader part epilog 464; 465; GFX10NSA-LABEL: gather4_b_cl_2d: 466; GFX10NSA: ; %bb.0: ; %main_body 467; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 468; GFX10NSA-NEXT: s_mov_b32 s0, s2 469; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 470; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 471; GFX10NSA-NEXT: s_mov_b32 s1, s3 472; GFX10NSA-NEXT: s_mov_b32 s2, s4 473; GFX10NSA-NEXT: s_mov_b32 s3, s5 474; GFX10NSA-NEXT: s_mov_b32 s4, s6 475; GFX10NSA-NEXT: s_mov_b32 s5, s7 476; GFX10NSA-NEXT: s_mov_b32 s6, s8 477; GFX10NSA-NEXT: s_mov_b32 s7, s9 478; GFX10NSA-NEXT: s_mov_b32 s8, s10 479; GFX10NSA-NEXT: s_mov_b32 s9, s11 480; GFX10NSA-NEXT: s_mov_b32 s10, s12 481; GFX10NSA-NEXT: s_mov_b32 s11, s13 482; GFX10NSA-NEXT: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 483; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 484; GFX10NSA-NEXT: ; return to shader part epilog 485main_body: 486 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 487 ret <4 x float> %v 488} 489 490define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { 491; GFX6-LABEL: gather4_c_b_cl_2d: 492; GFX6: ; %bb.0: ; %main_body 493; GFX6-NEXT: s_mov_b64 s[14:15], exec 494; GFX6-NEXT: s_mov_b32 s0, s2 495; GFX6-NEXT: s_mov_b32 s1, s3 496; GFX6-NEXT: s_mov_b32 s2, s4 497; GFX6-NEXT: s_mov_b32 s3, s5 498; GFX6-NEXT: s_mov_b32 s4, s6 499; GFX6-NEXT: s_mov_b32 s5, s7 500; GFX6-NEXT: s_mov_b32 s6, s8 501; GFX6-NEXT: s_mov_b32 s7, s9 502; GFX6-NEXT: s_mov_b32 s8, s10 503; GFX6-NEXT: s_mov_b32 s9, s11 504; GFX6-NEXT: s_mov_b32 s10, s12 505; GFX6-NEXT: s_mov_b32 s11, s13 506; GFX6-NEXT: s_wqm_b64 exec, exec 507; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 508; GFX6-NEXT: image_gather4_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 509; GFX6-NEXT: s_waitcnt vmcnt(0) 510; GFX6-NEXT: ; return to shader part epilog 511; 512; GFX10NSA-LABEL: gather4_c_b_cl_2d: 513; GFX10NSA: ; %bb.0: ; %main_body 514; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 515; GFX10NSA-NEXT: s_mov_b32 s0, s2 516; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 517; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 518; GFX10NSA-NEXT: s_mov_b32 s1, s3 519; GFX10NSA-NEXT: s_mov_b32 s2, s4 520; GFX10NSA-NEXT: s_mov_b32 s3, s5 521; GFX10NSA-NEXT: s_mov_b32 s4, s6 522; GFX10NSA-NEXT: s_mov_b32 s5, s7 523; GFX10NSA-NEXT: s_mov_b32 s6, s8 524; GFX10NSA-NEXT: s_mov_b32 s7, s9 525; GFX10NSA-NEXT: s_mov_b32 s8, s10 526; GFX10NSA-NEXT: s_mov_b32 s9, s11 527; GFX10NSA-NEXT: s_mov_b32 s10, s12 528; GFX10NSA-NEXT: s_mov_b32 s11, s13 529; GFX10NSA-NEXT: image_gather4_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 530; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 531; GFX10NSA-NEXT: ; return to shader part epilog 532main_body: 533 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 534 ret <4 x float> %v 535} 536 537define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { 538; GFX6-LABEL: gather4_l_2d: 539; GFX6: ; %bb.0: ; %main_body 540; GFX6-NEXT: s_mov_b32 s0, s2 541; GFX6-NEXT: s_mov_b32 s1, s3 542; GFX6-NEXT: s_mov_b32 s2, s4 543; GFX6-NEXT: s_mov_b32 s3, s5 544; GFX6-NEXT: s_mov_b32 s4, s6 545; GFX6-NEXT: s_mov_b32 s5, s7 546; GFX6-NEXT: s_mov_b32 s6, s8 547; GFX6-NEXT: s_mov_b32 s7, s9 548; GFX6-NEXT: s_mov_b32 s8, s10 549; GFX6-NEXT: s_mov_b32 s9, s11 550; GFX6-NEXT: s_mov_b32 s10, s12 551; GFX6-NEXT: s_mov_b32 s11, s13 552; GFX6-NEXT: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 553; GFX6-NEXT: s_waitcnt vmcnt(0) 554; GFX6-NEXT: ; return to shader part epilog 555; 556; GFX10NSA-LABEL: gather4_l_2d: 557; GFX10NSA: ; %bb.0: ; %main_body 558; GFX10NSA-NEXT: s_mov_b32 s0, s2 559; GFX10NSA-NEXT: s_mov_b32 s1, s3 560; GFX10NSA-NEXT: s_mov_b32 s2, s4 561; GFX10NSA-NEXT: s_mov_b32 s3, s5 562; GFX10NSA-NEXT: s_mov_b32 s4, s6 563; GFX10NSA-NEXT: s_mov_b32 s5, s7 564; GFX10NSA-NEXT: s_mov_b32 s6, s8 565; GFX10NSA-NEXT: s_mov_b32 s7, s9 566; GFX10NSA-NEXT: s_mov_b32 s8, s10 567; GFX10NSA-NEXT: s_mov_b32 s9, s11 568; GFX10NSA-NEXT: s_mov_b32 s10, s12 569; GFX10NSA-NEXT: s_mov_b32 s11, s13 570; GFX10NSA-NEXT: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 571; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 572; GFX10NSA-NEXT: ; return to shader part epilog 573main_body: 574 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 575 ret <4 x float> %v 576} 577 578define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { 579; GFX6-LABEL: gather4_c_l_2d: 580; GFX6: ; %bb.0: ; %main_body 581; GFX6-NEXT: s_mov_b32 s0, s2 582; GFX6-NEXT: s_mov_b32 s1, s3 583; GFX6-NEXT: s_mov_b32 s2, s4 584; GFX6-NEXT: s_mov_b32 s3, s5 585; GFX6-NEXT: s_mov_b32 s4, s6 586; GFX6-NEXT: s_mov_b32 s5, s7 587; GFX6-NEXT: s_mov_b32 s6, s8 588; GFX6-NEXT: s_mov_b32 s7, s9 589; GFX6-NEXT: s_mov_b32 s8, s10 590; GFX6-NEXT: s_mov_b32 s9, s11 591; GFX6-NEXT: s_mov_b32 s10, s12 592; GFX6-NEXT: s_mov_b32 s11, s13 593; GFX6-NEXT: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 594; GFX6-NEXT: s_waitcnt vmcnt(0) 595; GFX6-NEXT: ; return to shader part epilog 596; 597; GFX10NSA-LABEL: gather4_c_l_2d: 598; GFX10NSA: ; %bb.0: ; %main_body 599; GFX10NSA-NEXT: s_mov_b32 s0, s2 600; GFX10NSA-NEXT: s_mov_b32 s1, s3 601; GFX10NSA-NEXT: s_mov_b32 s2, s4 602; GFX10NSA-NEXT: s_mov_b32 s3, s5 603; GFX10NSA-NEXT: s_mov_b32 s4, s6 604; GFX10NSA-NEXT: s_mov_b32 s5, s7 605; GFX10NSA-NEXT: s_mov_b32 s6, s8 606; GFX10NSA-NEXT: s_mov_b32 s7, s9 607; GFX10NSA-NEXT: s_mov_b32 s8, s10 608; GFX10NSA-NEXT: s_mov_b32 s9, s11 609; GFX10NSA-NEXT: s_mov_b32 s10, s12 610; GFX10NSA-NEXT: s_mov_b32 s11, s13 611; GFX10NSA-NEXT: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 612; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 613; GFX10NSA-NEXT: ; return to shader part epilog 614main_body: 615 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 616 ret <4 x float> %v 617} 618 619define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 620; GFX6-LABEL: gather4_lz_2d: 621; GFX6: ; %bb.0: ; %main_body 622; GFX6-NEXT: s_mov_b32 s0, s2 623; GFX6-NEXT: s_mov_b32 s1, s3 624; GFX6-NEXT: s_mov_b32 s2, s4 625; GFX6-NEXT: s_mov_b32 s3, s5 626; GFX6-NEXT: s_mov_b32 s4, s6 627; GFX6-NEXT: s_mov_b32 s5, s7 628; GFX6-NEXT: s_mov_b32 s6, s8 629; GFX6-NEXT: s_mov_b32 s7, s9 630; GFX6-NEXT: s_mov_b32 s8, s10 631; GFX6-NEXT: s_mov_b32 s9, s11 632; GFX6-NEXT: s_mov_b32 s10, s12 633; GFX6-NEXT: s_mov_b32 s11, s13 634; GFX6-NEXT: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 635; GFX6-NEXT: s_waitcnt vmcnt(0) 636; GFX6-NEXT: ; return to shader part epilog 637; 638; GFX10NSA-LABEL: gather4_lz_2d: 639; GFX10NSA: ; %bb.0: ; %main_body 640; GFX10NSA-NEXT: s_mov_b32 s0, s2 641; GFX10NSA-NEXT: s_mov_b32 s1, s3 642; GFX10NSA-NEXT: s_mov_b32 s2, s4 643; GFX10NSA-NEXT: s_mov_b32 s3, s5 644; GFX10NSA-NEXT: s_mov_b32 s4, s6 645; GFX10NSA-NEXT: s_mov_b32 s5, s7 646; GFX10NSA-NEXT: s_mov_b32 s6, s8 647; GFX10NSA-NEXT: s_mov_b32 s7, s9 648; GFX10NSA-NEXT: s_mov_b32 s8, s10 649; GFX10NSA-NEXT: s_mov_b32 s9, s11 650; GFX10NSA-NEXT: s_mov_b32 s10, s12 651; GFX10NSA-NEXT: s_mov_b32 s11, s13 652; GFX10NSA-NEXT: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 653; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 654; GFX10NSA-NEXT: ; return to shader part epilog 655main_body: 656 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 657 ret <4 x float> %v 658} 659 660define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 661; GFX6-LABEL: gather4_c_lz_2d: 662; GFX6: ; %bb.0: ; %main_body 663; GFX6-NEXT: s_mov_b32 s0, s2 664; GFX6-NEXT: s_mov_b32 s1, s3 665; GFX6-NEXT: s_mov_b32 s2, s4 666; GFX6-NEXT: s_mov_b32 s3, s5 667; GFX6-NEXT: s_mov_b32 s4, s6 668; GFX6-NEXT: s_mov_b32 s5, s7 669; GFX6-NEXT: s_mov_b32 s6, s8 670; GFX6-NEXT: s_mov_b32 s7, s9 671; GFX6-NEXT: s_mov_b32 s8, s10 672; GFX6-NEXT: s_mov_b32 s9, s11 673; GFX6-NEXT: s_mov_b32 s10, s12 674; GFX6-NEXT: s_mov_b32 s11, s13 675; GFX6-NEXT: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 676; GFX6-NEXT: s_waitcnt vmcnt(0) 677; GFX6-NEXT: ; return to shader part epilog 678; 679; GFX10NSA-LABEL: gather4_c_lz_2d: 680; GFX10NSA: ; %bb.0: ; %main_body 681; GFX10NSA-NEXT: s_mov_b32 s0, s2 682; GFX10NSA-NEXT: s_mov_b32 s1, s3 683; GFX10NSA-NEXT: s_mov_b32 s2, s4 684; GFX10NSA-NEXT: s_mov_b32 s3, s5 685; GFX10NSA-NEXT: s_mov_b32 s4, s6 686; GFX10NSA-NEXT: s_mov_b32 s5, s7 687; GFX10NSA-NEXT: s_mov_b32 s6, s8 688; GFX10NSA-NEXT: s_mov_b32 s7, s9 689; GFX10NSA-NEXT: s_mov_b32 s8, s10 690; GFX10NSA-NEXT: s_mov_b32 s9, s11 691; GFX10NSA-NEXT: s_mov_b32 s10, s12 692; GFX10NSA-NEXT: s_mov_b32 s11, s13 693; GFX10NSA-NEXT: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 694; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 695; GFX10NSA-NEXT: ; return to shader part epilog 696main_body: 697 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 698 ret <4 x float> %v 699} 700 701define amdgpu_ps <4 x float> @gather4_2d_dmask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 702; GFX6-LABEL: gather4_2d_dmask_2: 703; GFX6: ; %bb.0: ; %main_body 704; GFX6-NEXT: s_mov_b64 s[14:15], exec 705; GFX6-NEXT: s_mov_b32 s0, s2 706; GFX6-NEXT: s_mov_b32 s1, s3 707; GFX6-NEXT: s_mov_b32 s2, s4 708; GFX6-NEXT: s_mov_b32 s3, s5 709; GFX6-NEXT: s_mov_b32 s4, s6 710; GFX6-NEXT: s_mov_b32 s5, s7 711; GFX6-NEXT: s_mov_b32 s6, s8 712; GFX6-NEXT: s_mov_b32 s7, s9 713; GFX6-NEXT: s_mov_b32 s8, s10 714; GFX6-NEXT: s_mov_b32 s9, s11 715; GFX6-NEXT: s_mov_b32 s10, s12 716; GFX6-NEXT: s_mov_b32 s11, s13 717; GFX6-NEXT: s_wqm_b64 exec, exec 718; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 719; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2 720; GFX6-NEXT: s_waitcnt vmcnt(0) 721; GFX6-NEXT: ; return to shader part epilog 722; 723; GFX10NSA-LABEL: gather4_2d_dmask_2: 724; GFX10NSA: ; %bb.0: ; %main_body 725; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 726; GFX10NSA-NEXT: s_mov_b32 s0, s2 727; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 728; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 729; GFX10NSA-NEXT: s_mov_b32 s1, s3 730; GFX10NSA-NEXT: s_mov_b32 s2, s4 731; GFX10NSA-NEXT: s_mov_b32 s3, s5 732; GFX10NSA-NEXT: s_mov_b32 s4, s6 733; GFX10NSA-NEXT: s_mov_b32 s5, s7 734; GFX10NSA-NEXT: s_mov_b32 s6, s8 735; GFX10NSA-NEXT: s_mov_b32 s7, s9 736; GFX10NSA-NEXT: s_mov_b32 s8, s10 737; GFX10NSA-NEXT: s_mov_b32 s9, s11 738; GFX10NSA-NEXT: s_mov_b32 s10, s12 739; GFX10NSA-NEXT: s_mov_b32 s11, s13 740; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_2D 741; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 742; GFX10NSA-NEXT: ; return to shader part epilog 743main_body: 744 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 2, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 745 ret <4 x float> %v 746} 747 748define amdgpu_ps <4 x float> @gather4_2d_dmask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 749; GFX6-LABEL: gather4_2d_dmask_4: 750; GFX6: ; %bb.0: ; %main_body 751; GFX6-NEXT: s_mov_b64 s[14:15], exec 752; GFX6-NEXT: s_mov_b32 s0, s2 753; GFX6-NEXT: s_mov_b32 s1, s3 754; GFX6-NEXT: s_mov_b32 s2, s4 755; GFX6-NEXT: s_mov_b32 s3, s5 756; GFX6-NEXT: s_mov_b32 s4, s6 757; GFX6-NEXT: s_mov_b32 s5, s7 758; GFX6-NEXT: s_mov_b32 s6, s8 759; GFX6-NEXT: s_mov_b32 s7, s9 760; GFX6-NEXT: s_mov_b32 s8, s10 761; GFX6-NEXT: s_mov_b32 s9, s11 762; GFX6-NEXT: s_mov_b32 s10, s12 763; GFX6-NEXT: s_mov_b32 s11, s13 764; GFX6-NEXT: s_wqm_b64 exec, exec 765; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 766; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4 767; GFX6-NEXT: s_waitcnt vmcnt(0) 768; GFX6-NEXT: ; return to shader part epilog 769; 770; GFX10NSA-LABEL: gather4_2d_dmask_4: 771; GFX10NSA: ; %bb.0: ; %main_body 772; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 773; GFX10NSA-NEXT: s_mov_b32 s0, s2 774; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 775; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 776; GFX10NSA-NEXT: s_mov_b32 s1, s3 777; GFX10NSA-NEXT: s_mov_b32 s2, s4 778; GFX10NSA-NEXT: s_mov_b32 s3, s5 779; GFX10NSA-NEXT: s_mov_b32 s4, s6 780; GFX10NSA-NEXT: s_mov_b32 s5, s7 781; GFX10NSA-NEXT: s_mov_b32 s6, s8 782; GFX10NSA-NEXT: s_mov_b32 s7, s9 783; GFX10NSA-NEXT: s_mov_b32 s8, s10 784; GFX10NSA-NEXT: s_mov_b32 s9, s11 785; GFX10NSA-NEXT: s_mov_b32 s10, s12 786; GFX10NSA-NEXT: s_mov_b32 s11, s13 787; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D 788; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 789; GFX10NSA-NEXT: ; return to shader part epilog 790main_body: 791 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 4, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 792 ret <4 x float> %v 793} 794 795define amdgpu_ps <4 x float> @gather4_2d_dmask_8(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 796; GFX6-LABEL: gather4_2d_dmask_8: 797; GFX6: ; %bb.0: ; %main_body 798; GFX6-NEXT: s_mov_b64 s[14:15], exec 799; GFX6-NEXT: s_mov_b32 s0, s2 800; GFX6-NEXT: s_mov_b32 s1, s3 801; GFX6-NEXT: s_mov_b32 s2, s4 802; GFX6-NEXT: s_mov_b32 s3, s5 803; GFX6-NEXT: s_mov_b32 s4, s6 804; GFX6-NEXT: s_mov_b32 s5, s7 805; GFX6-NEXT: s_mov_b32 s6, s8 806; GFX6-NEXT: s_mov_b32 s7, s9 807; GFX6-NEXT: s_mov_b32 s8, s10 808; GFX6-NEXT: s_mov_b32 s9, s11 809; GFX6-NEXT: s_mov_b32 s10, s12 810; GFX6-NEXT: s_mov_b32 s11, s13 811; GFX6-NEXT: s_wqm_b64 exec, exec 812; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 813; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8 814; GFX6-NEXT: s_waitcnt vmcnt(0) 815; GFX6-NEXT: ; return to shader part epilog 816; 817; GFX10NSA-LABEL: gather4_2d_dmask_8: 818; GFX10NSA: ; %bb.0: ; %main_body 819; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 820; GFX10NSA-NEXT: s_mov_b32 s0, s2 821; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 822; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 823; GFX10NSA-NEXT: s_mov_b32 s1, s3 824; GFX10NSA-NEXT: s_mov_b32 s2, s4 825; GFX10NSA-NEXT: s_mov_b32 s3, s5 826; GFX10NSA-NEXT: s_mov_b32 s4, s6 827; GFX10NSA-NEXT: s_mov_b32 s5, s7 828; GFX10NSA-NEXT: s_mov_b32 s6, s8 829; GFX10NSA-NEXT: s_mov_b32 s7, s9 830; GFX10NSA-NEXT: s_mov_b32 s8, s10 831; GFX10NSA-NEXT: s_mov_b32 s9, s11 832; GFX10NSA-NEXT: s_mov_b32 s10, s12 833; GFX10NSA-NEXT: s_mov_b32 s11, s13 834; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_2D 835; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 836; GFX10NSA-NEXT: ; return to shader part epilog 837main_body: 838 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 8, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 839 ret <4 x float> %v 840} 841 842declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 843declare { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 844declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 845declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 846declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 847declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 848declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 849declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 850declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 851declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 852declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 853declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 854declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 855declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 856declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 857 858attributes #0 = { nounwind readonly } 859