1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10NSA %s 4 5define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 6; GFX9-LABEL: gather4_2d: 7; GFX9: ; %bb.0: ; %main_body 8; GFX9-NEXT: s_mov_b64 s[14:15], exec 9; GFX9-NEXT: s_mov_b32 s0, s2 10; GFX9-NEXT: s_wqm_b64 exec, exec 11; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 12; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 13; GFX9-NEXT: s_mov_b32 s1, s3 14; GFX9-NEXT: s_mov_b32 s2, s4 15; GFX9-NEXT: s_mov_b32 s3, s5 16; GFX9-NEXT: s_mov_b32 s4, s6 17; GFX9-NEXT: s_mov_b32 s5, s7 18; GFX9-NEXT: s_mov_b32 s6, s8 19; GFX9-NEXT: s_mov_b32 s7, s9 20; GFX9-NEXT: s_mov_b32 s8, s10 21; GFX9-NEXT: s_mov_b32 s9, s11 22; GFX9-NEXT: s_mov_b32 s10, s12 23; GFX9-NEXT: s_mov_b32 s11, s13 24; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 25; GFX9-NEXT: s_and_b64 exec, exec, s[14:15] 26; GFX9-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16 27; GFX9-NEXT: s_waitcnt vmcnt(0) 28; GFX9-NEXT: ; return to shader part epilog 29; 30; GFX10NSA-LABEL: gather4_2d: 31; GFX10NSA: ; %bb.0: ; %main_body 32; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo 33; GFX10NSA-NEXT: s_mov_b32 s0, s2 34; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 35; GFX10NSA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 36; GFX10NSA-NEXT: s_mov_b32 s1, s3 37; GFX10NSA-NEXT: s_mov_b32 s2, s4 38; GFX10NSA-NEXT: s_mov_b32 s3, s5 39; GFX10NSA-NEXT: s_mov_b32 s4, s6 40; GFX10NSA-NEXT: s_mov_b32 s5, s7 41; GFX10NSA-NEXT: s_mov_b32 s6, s8 42; GFX10NSA-NEXT: s_mov_b32 s7, s9 43; GFX10NSA-NEXT: s_mov_b32 s8, s10 44; GFX10NSA-NEXT: s_mov_b32 s9, s11 45; GFX10NSA-NEXT: s_mov_b32 s10, s12 46; GFX10NSA-NEXT: s_mov_b32 s11, s13 47; GFX10NSA-NEXT: v_and_or_b32 v0, v0, 0xffff, v1 48; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 49; GFX10NSA-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 50; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 51; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 52; GFX10NSA-NEXT: ; return to shader part epilog 53main_body: 54 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 55 ret <4 x float> %v 56} 57 58define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) { 59; GFX9-LABEL: gather4_cube: 60; GFX9: ; %bb.0: ; %main_body 61; GFX9-NEXT: s_mov_b64 s[14:15], exec 62; GFX9-NEXT: s_mov_b32 s0, s2 63; GFX9-NEXT: s_wqm_b64 exec, exec 64; GFX9-NEXT: s_mov_b32 s2, s4 65; GFX9-NEXT: s_mov_b32 s4, s6 66; GFX9-NEXT: s_mov_b32 s6, s8 67; GFX9-NEXT: s_mov_b32 s8, s10 68; GFX9-NEXT: s_mov_b32 s10, s12 69; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 70; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 71; GFX9-NEXT: s_lshl_b32 s12, s0, 16 72; GFX9-NEXT: s_mov_b32 s1, s3 73; GFX9-NEXT: s_mov_b32 s3, s5 74; GFX9-NEXT: s_mov_b32 s5, s7 75; GFX9-NEXT: s_mov_b32 s7, s9 76; GFX9-NEXT: s_mov_b32 s9, s11 77; GFX9-NEXT: v_and_or_b32 v0, v0, v3, v1 78; GFX9-NEXT: s_mov_b32 s11, s13 79; GFX9-NEXT: v_and_or_b32 v1, v2, v3, s12 80; GFX9-NEXT: s_and_b64 exec, exec, s[14:15] 81; GFX9-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da 82; GFX9-NEXT: s_waitcnt vmcnt(0) 83; GFX9-NEXT: ; return to shader part epilog 84; 85; GFX10NSA-LABEL: gather4_cube: 86; GFX10NSA: ; %bb.0: ; %main_body 87; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo 88; GFX10NSA-NEXT: s_mov_b32 s0, s2 89; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 90; GFX10NSA-NEXT: v_mov_b32_e32 v3, 0xffff 91; GFX10NSA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 92; GFX10NSA-NEXT: s_mov_b32 s2, s4 93; GFX10NSA-NEXT: s_mov_b32 s4, s6 94; GFX10NSA-NEXT: s_mov_b32 s6, s8 95; GFX10NSA-NEXT: s_mov_b32 s8, s10 96; GFX10NSA-NEXT: s_mov_b32 s10, s12 97; GFX10NSA-NEXT: s_lshl_b32 s12, s0, 16 98; GFX10NSA-NEXT: s_mov_b32 s1, s3 99; GFX10NSA-NEXT: s_mov_b32 s3, s5 100; GFX10NSA-NEXT: s_mov_b32 s5, s7 101; GFX10NSA-NEXT: s_mov_b32 s7, s9 102; GFX10NSA-NEXT: s_mov_b32 s9, s11 103; GFX10NSA-NEXT: v_and_or_b32 v0, v0, v3, v1 104; GFX10NSA-NEXT: s_mov_b32 s11, s13 105; GFX10NSA-NEXT: v_and_or_b32 v1, v2, v3, s12 106; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 107; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE a16 108; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 109; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 110; GFX10NSA-NEXT: ; return to shader part epilog 111main_body: 112 %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 1, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 113 ret <4 x float> %v 114} 115 116define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) { 117; GFX9-LABEL: gather4_2darray: 118; GFX9: ; %bb.0: ; %main_body 119; GFX9-NEXT: s_mov_b64 s[14:15], exec 120; GFX9-NEXT: s_mov_b32 s0, s2 121; GFX9-NEXT: s_wqm_b64 exec, exec 122; GFX9-NEXT: s_mov_b32 s2, s4 123; GFX9-NEXT: s_mov_b32 s4, s6 124; GFX9-NEXT: s_mov_b32 s6, s8 125; GFX9-NEXT: s_mov_b32 s8, s10 126; GFX9-NEXT: s_mov_b32 s10, s12 127; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 128; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 129; GFX9-NEXT: s_lshl_b32 s12, s0, 16 130; GFX9-NEXT: s_mov_b32 s1, s3 131; GFX9-NEXT: s_mov_b32 s3, s5 132; GFX9-NEXT: s_mov_b32 s5, s7 133; GFX9-NEXT: s_mov_b32 s7, s9 134; GFX9-NEXT: s_mov_b32 s9, s11 135; GFX9-NEXT: v_and_or_b32 v0, v0, v3, v1 136; GFX9-NEXT: s_mov_b32 s11, s13 137; GFX9-NEXT: v_and_or_b32 v1, v2, v3, s12 138; GFX9-NEXT: s_and_b64 exec, exec, s[14:15] 139; GFX9-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 da 140; GFX9-NEXT: s_waitcnt vmcnt(0) 141; GFX9-NEXT: ; return to shader part epilog 142; 143; GFX10NSA-LABEL: gather4_2darray: 144; GFX10NSA: ; %bb.0: ; %main_body 145; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo 146; GFX10NSA-NEXT: s_mov_b32 s0, s2 147; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 148; GFX10NSA-NEXT: v_mov_b32_e32 v3, 0xffff 149; GFX10NSA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 150; GFX10NSA-NEXT: s_mov_b32 s2, s4 151; GFX10NSA-NEXT: s_mov_b32 s4, s6 152; GFX10NSA-NEXT: s_mov_b32 s6, s8 153; GFX10NSA-NEXT: s_mov_b32 s8, s10 154; GFX10NSA-NEXT: s_mov_b32 s10, s12 155; GFX10NSA-NEXT: s_lshl_b32 s12, s0, 16 156; GFX10NSA-NEXT: s_mov_b32 s1, s3 157; GFX10NSA-NEXT: s_mov_b32 s3, s5 158; GFX10NSA-NEXT: s_mov_b32 s5, s7 159; GFX10NSA-NEXT: s_mov_b32 s7, s9 160; GFX10NSA-NEXT: s_mov_b32 s9, s11 161; GFX10NSA-NEXT: v_and_or_b32 v0, v0, v3, v1 162; GFX10NSA-NEXT: s_mov_b32 s11, s13 163; GFX10NSA-NEXT: v_and_or_b32 v1, v2, v3, s12 164; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 165; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY a16 166; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 167; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 168; GFX10NSA-NEXT: ; return to shader part epilog 169main_body: 170 %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 1, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 171 ret <4 x float> %v 172} 173 174define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 175; GFX9-LABEL: gather4_c_2d: 176; GFX9: ; %bb.0: ; %main_body 177; GFX9-NEXT: s_mov_b64 s[14:15], exec 178; GFX9-NEXT: s_mov_b32 s0, s2 179; GFX9-NEXT: s_wqm_b64 exec, exec 180; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 181; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 182; GFX9-NEXT: s_mov_b32 s1, s3 183; GFX9-NEXT: s_mov_b32 s2, s4 184; GFX9-NEXT: s_mov_b32 s3, s5 185; GFX9-NEXT: s_mov_b32 s4, s6 186; GFX9-NEXT: s_mov_b32 s5, s7 187; GFX9-NEXT: s_mov_b32 s6, s8 188; GFX9-NEXT: s_mov_b32 s7, s9 189; GFX9-NEXT: s_mov_b32 s8, s10 190; GFX9-NEXT: s_mov_b32 s9, s11 191; GFX9-NEXT: s_mov_b32 s10, s12 192; GFX9-NEXT: s_mov_b32 s11, s13 193; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 194; GFX9-NEXT: s_and_b64 exec, exec, s[14:15] 195; GFX9-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 196; GFX9-NEXT: s_waitcnt vmcnt(0) 197; GFX9-NEXT: ; return to shader part epilog 198; 199; GFX10NSA-LABEL: gather4_c_2d: 200; GFX10NSA: ; %bb.0: ; %main_body 201; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo 202; GFX10NSA-NEXT: s_mov_b32 s0, s2 203; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 204; GFX10NSA-NEXT: v_lshlrev_b32_e32 v2, 16, v2 205; GFX10NSA-NEXT: s_mov_b32 s1, s3 206; GFX10NSA-NEXT: s_mov_b32 s2, s4 207; GFX10NSA-NEXT: s_mov_b32 s3, s5 208; GFX10NSA-NEXT: s_mov_b32 s4, s6 209; GFX10NSA-NEXT: s_mov_b32 s5, s7 210; GFX10NSA-NEXT: s_mov_b32 s6, s8 211; GFX10NSA-NEXT: s_mov_b32 s7, s9 212; GFX10NSA-NEXT: s_mov_b32 s8, s10 213; GFX10NSA-NEXT: s_mov_b32 s9, s11 214; GFX10NSA-NEXT: s_mov_b32 s10, s12 215; GFX10NSA-NEXT: s_mov_b32 s11, s13 216; GFX10NSA-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 217; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 218; GFX10NSA-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 219; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 220; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 221; GFX10NSA-NEXT: ; return to shader part epilog 222main_body: 223 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 224 ret <4 x float> %v 225} 226 227define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) { 228; GFX9-LABEL: gather4_cl_2d: 229; GFX9: ; %bb.0: ; %main_body 230; GFX9-NEXT: s_mov_b64 s[14:15], exec 231; GFX9-NEXT: s_mov_b32 s0, s2 232; GFX9-NEXT: s_wqm_b64 exec, exec 233; GFX9-NEXT: s_mov_b32 s2, s4 234; GFX9-NEXT: s_mov_b32 s4, s6 235; GFX9-NEXT: s_mov_b32 s6, s8 236; GFX9-NEXT: s_mov_b32 s8, s10 237; GFX9-NEXT: s_mov_b32 s10, s12 238; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 239; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 240; GFX9-NEXT: s_lshl_b32 s12, s0, 16 241; GFX9-NEXT: s_mov_b32 s1, s3 242; GFX9-NEXT: s_mov_b32 s3, s5 243; GFX9-NEXT: s_mov_b32 s5, s7 244; GFX9-NEXT: s_mov_b32 s7, s9 245; GFX9-NEXT: s_mov_b32 s9, s11 246; GFX9-NEXT: v_and_or_b32 v0, v0, v3, v1 247; GFX9-NEXT: s_mov_b32 s11, s13 248; GFX9-NEXT: v_and_or_b32 v1, v2, v3, s12 249; GFX9-NEXT: s_and_b64 exec, exec, s[14:15] 250; GFX9-NEXT: image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 251; GFX9-NEXT: s_waitcnt vmcnt(0) 252; GFX9-NEXT: ; return to shader part epilog 253; 254; GFX10NSA-LABEL: gather4_cl_2d: 255; GFX10NSA: ; %bb.0: ; %main_body 256; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo 257; GFX10NSA-NEXT: s_mov_b32 s0, s2 258; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 259; GFX10NSA-NEXT: v_mov_b32_e32 v3, 0xffff 260; GFX10NSA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 261; GFX10NSA-NEXT: s_mov_b32 s2, s4 262; GFX10NSA-NEXT: s_mov_b32 s4, s6 263; GFX10NSA-NEXT: s_mov_b32 s6, s8 264; GFX10NSA-NEXT: s_mov_b32 s8, s10 265; GFX10NSA-NEXT: s_mov_b32 s10, s12 266; GFX10NSA-NEXT: s_lshl_b32 s12, s0, 16 267; GFX10NSA-NEXT: s_mov_b32 s1, s3 268; GFX10NSA-NEXT: s_mov_b32 s3, s5 269; GFX10NSA-NEXT: s_mov_b32 s5, s7 270; GFX10NSA-NEXT: s_mov_b32 s7, s9 271; GFX10NSA-NEXT: s_mov_b32 s9, s11 272; GFX10NSA-NEXT: v_and_or_b32 v0, v0, v3, v1 273; GFX10NSA-NEXT: s_mov_b32 s11, s13 274; GFX10NSA-NEXT: v_and_or_b32 v1, v2, v3, s12 275; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 276; GFX10NSA-NEXT: image_gather4_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 277; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 278; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 279; GFX10NSA-NEXT: ; return to shader part epilog 280main_body: 281 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 1, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 282 ret <4 x float> %v 283} 284 285define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) { 286; GFX9-LABEL: gather4_c_cl_2d: 287; GFX9: ; %bb.0: ; %main_body 288; GFX9-NEXT: s_mov_b64 s[14:15], exec 289; GFX9-NEXT: s_mov_b32 s0, s2 290; GFX9-NEXT: s_wqm_b64 exec, exec 291; GFX9-NEXT: s_mov_b32 s2, s4 292; GFX9-NEXT: s_mov_b32 s4, s6 293; GFX9-NEXT: s_mov_b32 s6, s8 294; GFX9-NEXT: s_mov_b32 s8, s10 295; GFX9-NEXT: s_mov_b32 s10, s12 296; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 297; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 298; GFX9-NEXT: s_lshl_b32 s12, s0, 16 299; GFX9-NEXT: s_mov_b32 s1, s3 300; GFX9-NEXT: s_mov_b32 s3, s5 301; GFX9-NEXT: s_mov_b32 s5, s7 302; GFX9-NEXT: s_mov_b32 s7, s9 303; GFX9-NEXT: s_mov_b32 s9, s11 304; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 305; GFX9-NEXT: s_mov_b32 s11, s13 306; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s12 307; GFX9-NEXT: s_and_b64 exec, exec, s[14:15] 308; GFX9-NEXT: image_gather4_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16 309; GFX9-NEXT: s_waitcnt vmcnt(0) 310; GFX9-NEXT: ; return to shader part epilog 311; 312; GFX10NSA-LABEL: gather4_c_cl_2d: 313; GFX10NSA: ; %bb.0: ; %main_body 314; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo 315; GFX10NSA-NEXT: s_mov_b32 s0, s2 316; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 317; GFX10NSA-NEXT: v_mov_b32_e32 v4, 0xffff 318; GFX10NSA-NEXT: v_lshlrev_b32_e32 v2, 16, v2 319; GFX10NSA-NEXT: s_mov_b32 s2, s4 320; GFX10NSA-NEXT: s_mov_b32 s4, s6 321; GFX10NSA-NEXT: s_mov_b32 s6, s8 322; GFX10NSA-NEXT: s_mov_b32 s8, s10 323; GFX10NSA-NEXT: s_mov_b32 s10, s12 324; GFX10NSA-NEXT: s_lshl_b32 s12, s0, 16 325; GFX10NSA-NEXT: s_mov_b32 s1, s3 326; GFX10NSA-NEXT: s_mov_b32 s3, s5 327; GFX10NSA-NEXT: s_mov_b32 s5, s7 328; GFX10NSA-NEXT: s_mov_b32 s7, s9 329; GFX10NSA-NEXT: s_mov_b32 s9, s11 330; GFX10NSA-NEXT: v_and_or_b32 v1, v1, v4, v2 331; GFX10NSA-NEXT: s_mov_b32 s11, s13 332; GFX10NSA-NEXT: v_and_or_b32 v2, v3, v4, s12 333; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 334; GFX10NSA-NEXT: image_gather4_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 335; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 336; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 337; GFX10NSA-NEXT: ; return to shader part epilog 338main_body: 339 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 340 ret <4 x float> %v 341} 342 343define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) { 344; GFX9-LABEL: gather4_b_2d: 345; GFX9: ; %bb.0: ; %main_body 346; GFX9-NEXT: s_mov_b64 s[14:15], exec 347; GFX9-NEXT: s_mov_b32 s0, s2 348; GFX9-NEXT: s_wqm_b64 exec, exec 349; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 350; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 351; GFX9-NEXT: s_mov_b32 s1, s3 352; GFX9-NEXT: s_mov_b32 s2, s4 353; GFX9-NEXT: s_mov_b32 s3, s5 354; GFX9-NEXT: s_mov_b32 s4, s6 355; GFX9-NEXT: s_mov_b32 s5, s7 356; GFX9-NEXT: s_mov_b32 s6, s8 357; GFX9-NEXT: s_mov_b32 s7, s9 358; GFX9-NEXT: s_mov_b32 s8, s10 359; GFX9-NEXT: s_mov_b32 s9, s11 360; GFX9-NEXT: s_mov_b32 s10, s12 361; GFX9-NEXT: s_mov_b32 s11, s13 362; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 363; GFX9-NEXT: s_and_b64 exec, exec, s[14:15] 364; GFX9-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 365; GFX9-NEXT: s_waitcnt vmcnt(0) 366; GFX9-NEXT: ; return to shader part epilog 367; 368; GFX10NSA-LABEL: gather4_b_2d: 369; GFX10NSA: ; %bb.0: ; %main_body 370; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo 371; GFX10NSA-NEXT: s_mov_b32 s0, s2 372; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 373; GFX10NSA-NEXT: v_lshlrev_b32_e32 v2, 16, v2 374; GFX10NSA-NEXT: s_mov_b32 s1, s3 375; GFX10NSA-NEXT: s_mov_b32 s2, s4 376; GFX10NSA-NEXT: s_mov_b32 s3, s5 377; GFX10NSA-NEXT: s_mov_b32 s4, s6 378; GFX10NSA-NEXT: s_mov_b32 s5, s7 379; GFX10NSA-NEXT: s_mov_b32 s6, s8 380; GFX10NSA-NEXT: s_mov_b32 s7, s9 381; GFX10NSA-NEXT: s_mov_b32 s8, s10 382; GFX10NSA-NEXT: s_mov_b32 s9, s11 383; GFX10NSA-NEXT: s_mov_b32 s10, s12 384; GFX10NSA-NEXT: s_mov_b32 s11, s13 385; GFX10NSA-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 386; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 387; GFX10NSA-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 388; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 389; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 390; GFX10NSA-NEXT: ; return to shader part epilog 391main_body: 392 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 393 ret <4 x float> %v 394} 395 396define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) { 397; GFX9-LABEL: gather4_c_b_2d: 398; GFX9: ; %bb.0: ; %main_body 399; GFX9-NEXT: s_mov_b64 s[14:15], exec 400; GFX9-NEXT: s_mov_b32 s0, s2 401; GFX9-NEXT: s_wqm_b64 exec, exec 402; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 403; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 404; GFX9-NEXT: s_mov_b32 s1, s3 405; GFX9-NEXT: s_mov_b32 s2, s4 406; GFX9-NEXT: s_mov_b32 s3, s5 407; GFX9-NEXT: s_mov_b32 s4, s6 408; GFX9-NEXT: s_mov_b32 s5, s7 409; GFX9-NEXT: s_mov_b32 s6, s8 410; GFX9-NEXT: s_mov_b32 s7, s9 411; GFX9-NEXT: s_mov_b32 s8, s10 412; GFX9-NEXT: s_mov_b32 s9, s11 413; GFX9-NEXT: s_mov_b32 s10, s12 414; GFX9-NEXT: s_mov_b32 s11, s13 415; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 416; GFX9-NEXT: s_and_b64 exec, exec, s[14:15] 417; GFX9-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16 418; GFX9-NEXT: s_waitcnt vmcnt(0) 419; GFX9-NEXT: ; return to shader part epilog 420; 421; GFX10NSA-LABEL: gather4_c_b_2d: 422; GFX10NSA: ; %bb.0: ; %main_body 423; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo 424; GFX10NSA-NEXT: s_mov_b32 s0, s2 425; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 426; GFX10NSA-NEXT: v_lshlrev_b32_e32 v3, 16, v3 427; GFX10NSA-NEXT: s_mov_b32 s1, s3 428; GFX10NSA-NEXT: s_mov_b32 s2, s4 429; GFX10NSA-NEXT: s_mov_b32 s3, s5 430; GFX10NSA-NEXT: s_mov_b32 s4, s6 431; GFX10NSA-NEXT: s_mov_b32 s5, s7 432; GFX10NSA-NEXT: s_mov_b32 s6, s8 433; GFX10NSA-NEXT: s_mov_b32 s7, s9 434; GFX10NSA-NEXT: s_mov_b32 s8, s10 435; GFX10NSA-NEXT: s_mov_b32 s9, s11 436; GFX10NSA-NEXT: s_mov_b32 s10, s12 437; GFX10NSA-NEXT: s_mov_b32 s11, s13 438; GFX10NSA-NEXT: v_and_or_b32 v2, v2, 0xffff, v3 439; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 440; GFX10NSA-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 441; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 442; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 443; GFX10NSA-NEXT: ; return to shader part epilog 444main_body: 445 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 446 ret <4 x float> %v 447} 448 449define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) { 450; GFX9-LABEL: gather4_b_cl_2d: 451; GFX9: ; %bb.0: ; %main_body 452; GFX9-NEXT: s_mov_b64 s[14:15], exec 453; GFX9-NEXT: s_mov_b32 s0, s2 454; GFX9-NEXT: s_wqm_b64 exec, exec 455; GFX9-NEXT: s_mov_b32 s2, s4 456; GFX9-NEXT: s_mov_b32 s4, s6 457; GFX9-NEXT: s_mov_b32 s6, s8 458; GFX9-NEXT: s_mov_b32 s8, s10 459; GFX9-NEXT: s_mov_b32 s10, s12 460; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 461; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 462; GFX9-NEXT: s_lshl_b32 s12, s0, 16 463; GFX9-NEXT: s_mov_b32 s1, s3 464; GFX9-NEXT: s_mov_b32 s3, s5 465; GFX9-NEXT: s_mov_b32 s5, s7 466; GFX9-NEXT: s_mov_b32 s7, s9 467; GFX9-NEXT: s_mov_b32 s9, s11 468; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 469; GFX9-NEXT: s_mov_b32 s11, s13 470; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s12 471; GFX9-NEXT: s_and_b64 exec, exec, s[14:15] 472; GFX9-NEXT: image_gather4_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16 473; GFX9-NEXT: s_waitcnt vmcnt(0) 474; GFX9-NEXT: ; return to shader part epilog 475; 476; GFX10NSA-LABEL: gather4_b_cl_2d: 477; GFX10NSA: ; %bb.0: ; %main_body 478; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo 479; GFX10NSA-NEXT: s_mov_b32 s0, s2 480; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 481; GFX10NSA-NEXT: v_mov_b32_e32 v4, 0xffff 482; GFX10NSA-NEXT: v_lshlrev_b32_e32 v2, 16, v2 483; GFX10NSA-NEXT: s_mov_b32 s2, s4 484; GFX10NSA-NEXT: s_mov_b32 s4, s6 485; GFX10NSA-NEXT: s_mov_b32 s6, s8 486; GFX10NSA-NEXT: s_mov_b32 s8, s10 487; GFX10NSA-NEXT: s_mov_b32 s10, s12 488; GFX10NSA-NEXT: s_lshl_b32 s12, s0, 16 489; GFX10NSA-NEXT: s_mov_b32 s1, s3 490; GFX10NSA-NEXT: s_mov_b32 s3, s5 491; GFX10NSA-NEXT: s_mov_b32 s5, s7 492; GFX10NSA-NEXT: s_mov_b32 s7, s9 493; GFX10NSA-NEXT: s_mov_b32 s9, s11 494; GFX10NSA-NEXT: v_and_or_b32 v1, v1, v4, v2 495; GFX10NSA-NEXT: s_mov_b32 s11, s13 496; GFX10NSA-NEXT: v_and_or_b32 v2, v3, v4, s12 497; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 498; GFX10NSA-NEXT: image_gather4_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 499; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 500; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 501; GFX10NSA-NEXT: ; return to shader part epilog 502main_body: 503 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 504 ret <4 x float> %v 505} 506 507define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) { 508; GFX9-LABEL: gather4_c_b_cl_2d: 509; GFX9: ; %bb.0: ; %main_body 510; GFX9-NEXT: s_mov_b64 s[14:15], exec 511; GFX9-NEXT: s_mov_b32 s0, s2 512; GFX9-NEXT: s_wqm_b64 exec, exec 513; GFX9-NEXT: s_mov_b32 s2, s4 514; GFX9-NEXT: s_mov_b32 s4, s6 515; GFX9-NEXT: s_mov_b32 s6, s8 516; GFX9-NEXT: s_mov_b32 s8, s10 517; GFX9-NEXT: s_mov_b32 s10, s12 518; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff 519; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 520; GFX9-NEXT: s_lshl_b32 s12, s0, 16 521; GFX9-NEXT: s_mov_b32 s1, s3 522; GFX9-NEXT: s_mov_b32 s3, s5 523; GFX9-NEXT: s_mov_b32 s5, s7 524; GFX9-NEXT: s_mov_b32 s7, s9 525; GFX9-NEXT: s_mov_b32 s9, s11 526; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 527; GFX9-NEXT: s_mov_b32 s11, s13 528; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s12 529; GFX9-NEXT: s_and_b64 exec, exec, s[14:15] 530; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 a16 531; GFX9-NEXT: s_waitcnt vmcnt(0) 532; GFX9-NEXT: ; return to shader part epilog 533; 534; GFX10NSA-LABEL: gather4_c_b_cl_2d: 535; GFX10NSA: ; %bb.0: ; %main_body 536; GFX10NSA-NEXT: s_mov_b32 s28, exec_lo 537; GFX10NSA-NEXT: s_mov_b32 s0, s2 538; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 539; GFX10NSA-NEXT: v_mov_b32_e32 v5, 0xffff 540; GFX10NSA-NEXT: v_lshlrev_b32_e32 v3, 16, v3 541; GFX10NSA-NEXT: s_mov_b32 s2, s4 542; GFX10NSA-NEXT: s_mov_b32 s4, s6 543; GFX10NSA-NEXT: s_mov_b32 s6, s8 544; GFX10NSA-NEXT: s_mov_b32 s8, s10 545; GFX10NSA-NEXT: s_mov_b32 s10, s12 546; GFX10NSA-NEXT: s_lshl_b32 s12, s0, 16 547; GFX10NSA-NEXT: s_mov_b32 s1, s3 548; GFX10NSA-NEXT: s_mov_b32 s3, s5 549; GFX10NSA-NEXT: s_mov_b32 s5, s7 550; GFX10NSA-NEXT: s_mov_b32 s7, s9 551; GFX10NSA-NEXT: s_mov_b32 s9, s11 552; GFX10NSA-NEXT: v_and_or_b32 v2, v2, v5, v3 553; GFX10NSA-NEXT: s_mov_b32 s11, s13 554; GFX10NSA-NEXT: v_and_or_b32 v3, v4, v5, s12 555; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28 556; GFX10NSA-NEXT: image_gather4_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 557; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 558; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 559; GFX10NSA-NEXT: ; return to shader part epilog 560main_body: 561 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 1, float %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 562 ret <4 x float> %v 563} 564 565define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) { 566; GFX9-LABEL: gather4_l_2d: 567; GFX9: ; %bb.0: ; %main_body 568; GFX9-NEXT: s_mov_b32 s0, s2 569; GFX9-NEXT: s_mov_b32 s2, s4 570; GFX9-NEXT: s_mov_b32 s4, s6 571; GFX9-NEXT: s_mov_b32 s6, s8 572; GFX9-NEXT: s_mov_b32 s8, s10 573; GFX9-NEXT: s_mov_b32 s10, s12 574; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 575; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 576; GFX9-NEXT: s_lshl_b32 s12, s0, 16 577; GFX9-NEXT: s_mov_b32 s1, s3 578; GFX9-NEXT: s_mov_b32 s3, s5 579; GFX9-NEXT: s_mov_b32 s5, s7 580; GFX9-NEXT: s_mov_b32 s7, s9 581; GFX9-NEXT: s_mov_b32 s9, s11 582; GFX9-NEXT: v_and_or_b32 v0, v0, v3, v1 583; GFX9-NEXT: s_mov_b32 s11, s13 584; GFX9-NEXT: v_and_or_b32 v1, v2, v3, s12 585; GFX9-NEXT: image_gather4_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 586; GFX9-NEXT: s_waitcnt vmcnt(0) 587; GFX9-NEXT: ; return to shader part epilog 588; 589; GFX10NSA-LABEL: gather4_l_2d: 590; GFX10NSA: ; %bb.0: ; %main_body 591; GFX10NSA-NEXT: v_mov_b32_e32 v3, 0xffff 592; GFX10NSA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 593; GFX10NSA-NEXT: s_mov_b32 s0, s2 594; GFX10NSA-NEXT: s_mov_b32 s2, s4 595; GFX10NSA-NEXT: s_mov_b32 s4, s6 596; GFX10NSA-NEXT: s_mov_b32 s6, s8 597; GFX10NSA-NEXT: s_mov_b32 s8, s10 598; GFX10NSA-NEXT: s_mov_b32 s10, s12 599; GFX10NSA-NEXT: s_lshl_b32 s12, s0, 16 600; GFX10NSA-NEXT: v_and_or_b32 v0, v0, v3, v1 601; GFX10NSA-NEXT: v_and_or_b32 v1, v2, v3, s12 602; GFX10NSA-NEXT: s_mov_b32 s1, s3 603; GFX10NSA-NEXT: s_mov_b32 s3, s5 604; GFX10NSA-NEXT: s_mov_b32 s5, s7 605; GFX10NSA-NEXT: s_mov_b32 s7, s9 606; GFX10NSA-NEXT: s_mov_b32 s9, s11 607; GFX10NSA-NEXT: s_mov_b32 s11, s13 608; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 609; GFX10NSA-NEXT: image_gather4_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 610; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 611; GFX10NSA-NEXT: ; return to shader part epilog 612main_body: 613 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 1, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 614 ret <4 x float> %v 615} 616 617define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) { 618; GFX9-LABEL: gather4_c_l_2d: 619; GFX9: ; %bb.0: ; %main_body 620; GFX9-NEXT: s_mov_b32 s0, s2 621; GFX9-NEXT: s_mov_b32 s2, s4 622; GFX9-NEXT: s_mov_b32 s4, s6 623; GFX9-NEXT: s_mov_b32 s6, s8 624; GFX9-NEXT: s_mov_b32 s8, s10 625; GFX9-NEXT: s_mov_b32 s10, s12 626; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 627; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 628; GFX9-NEXT: s_lshl_b32 s12, s0, 16 629; GFX9-NEXT: s_mov_b32 s1, s3 630; GFX9-NEXT: s_mov_b32 s3, s5 631; GFX9-NEXT: s_mov_b32 s5, s7 632; GFX9-NEXT: s_mov_b32 s7, s9 633; GFX9-NEXT: s_mov_b32 s9, s11 634; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 635; GFX9-NEXT: s_mov_b32 s11, s13 636; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s12 637; GFX9-NEXT: image_gather4_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 a16 638; GFX9-NEXT: s_waitcnt vmcnt(0) 639; GFX9-NEXT: ; return to shader part epilog 640; 641; GFX10NSA-LABEL: gather4_c_l_2d: 642; GFX10NSA: ; %bb.0: ; %main_body 643; GFX10NSA-NEXT: v_mov_b32_e32 v4, 0xffff 644; GFX10NSA-NEXT: v_lshlrev_b32_e32 v2, 16, v2 645; GFX10NSA-NEXT: s_mov_b32 s0, s2 646; GFX10NSA-NEXT: s_mov_b32 s2, s4 647; GFX10NSA-NEXT: s_mov_b32 s4, s6 648; GFX10NSA-NEXT: s_mov_b32 s6, s8 649; GFX10NSA-NEXT: s_mov_b32 s8, s10 650; GFX10NSA-NEXT: s_mov_b32 s10, s12 651; GFX10NSA-NEXT: s_lshl_b32 s12, s0, 16 652; GFX10NSA-NEXT: v_and_or_b32 v1, v1, v4, v2 653; GFX10NSA-NEXT: v_and_or_b32 v2, v3, v4, s12 654; GFX10NSA-NEXT: s_mov_b32 s1, s3 655; GFX10NSA-NEXT: s_mov_b32 s3, s5 656; GFX10NSA-NEXT: s_mov_b32 s5, s7 657; GFX10NSA-NEXT: s_mov_b32 s7, s9 658; GFX10NSA-NEXT: s_mov_b32 s9, s11 659; GFX10NSA-NEXT: s_mov_b32 s11, s13 660; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 661; GFX10NSA-NEXT: image_gather4_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 662; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 663; GFX10NSA-NEXT: ; return to shader part epilog 664main_body: 665 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 666 ret <4 x float> %v 667} 668 669define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) { 670; GFX9-LABEL: gather4_lz_2d: 671; GFX9: ; %bb.0: ; %main_body 672; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 673; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 674; GFX9-NEXT: s_mov_b32 s0, s2 675; GFX9-NEXT: s_mov_b32 s1, s3 676; GFX9-NEXT: s_mov_b32 s2, s4 677; GFX9-NEXT: s_mov_b32 s3, s5 678; GFX9-NEXT: s_mov_b32 s4, s6 679; GFX9-NEXT: s_mov_b32 s5, s7 680; GFX9-NEXT: s_mov_b32 s6, s8 681; GFX9-NEXT: s_mov_b32 s7, s9 682; GFX9-NEXT: s_mov_b32 s8, s10 683; GFX9-NEXT: s_mov_b32 s9, s11 684; GFX9-NEXT: s_mov_b32 s10, s12 685; GFX9-NEXT: s_mov_b32 s11, s13 686; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 687; GFX9-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 a16 688; GFX9-NEXT: s_waitcnt vmcnt(0) 689; GFX9-NEXT: ; return to shader part epilog 690; 691; GFX10NSA-LABEL: gather4_lz_2d: 692; GFX10NSA: ; %bb.0: ; %main_body 693; GFX10NSA-NEXT: v_lshlrev_b32_e32 v1, 16, v1 694; GFX10NSA-NEXT: s_mov_b32 s0, s2 695; GFX10NSA-NEXT: s_mov_b32 s1, s3 696; GFX10NSA-NEXT: s_mov_b32 s2, s4 697; GFX10NSA-NEXT: s_mov_b32 s3, s5 698; GFX10NSA-NEXT: v_and_or_b32 v0, v0, 0xffff, v1 699; GFX10NSA-NEXT: s_mov_b32 s4, s6 700; GFX10NSA-NEXT: s_mov_b32 s5, s7 701; GFX10NSA-NEXT: s_mov_b32 s6, s8 702; GFX10NSA-NEXT: s_mov_b32 s7, s9 703; GFX10NSA-NEXT: s_mov_b32 s8, s10 704; GFX10NSA-NEXT: s_mov_b32 s9, s11 705; GFX10NSA-NEXT: s_mov_b32 s10, s12 706; GFX10NSA-NEXT: s_mov_b32 s11, s13 707; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 708; GFX10NSA-NEXT: image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 709; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 710; GFX10NSA-NEXT: ; return to shader part epilog 711main_body: 712 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 713 ret <4 x float> %v 714} 715 716define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) { 717; GFX9-LABEL: gather4_c_lz_2d: 718; GFX9: ; %bb.0: ; %main_body 719; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 720; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 721; GFX9-NEXT: s_mov_b32 s0, s2 722; GFX9-NEXT: s_mov_b32 s1, s3 723; GFX9-NEXT: s_mov_b32 s2, s4 724; GFX9-NEXT: s_mov_b32 s3, s5 725; GFX9-NEXT: s_mov_b32 s4, s6 726; GFX9-NEXT: s_mov_b32 s5, s7 727; GFX9-NEXT: s_mov_b32 s6, s8 728; GFX9-NEXT: s_mov_b32 s7, s9 729; GFX9-NEXT: s_mov_b32 s8, s10 730; GFX9-NEXT: s_mov_b32 s9, s11 731; GFX9-NEXT: s_mov_b32 s10, s12 732; GFX9-NEXT: s_mov_b32 s11, s13 733; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 734; GFX9-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 a16 735; GFX9-NEXT: s_waitcnt vmcnt(0) 736; GFX9-NEXT: ; return to shader part epilog 737; 738; GFX10NSA-LABEL: gather4_c_lz_2d: 739; GFX10NSA: ; %bb.0: ; %main_body 740; GFX10NSA-NEXT: v_lshlrev_b32_e32 v2, 16, v2 741; GFX10NSA-NEXT: s_mov_b32 s0, s2 742; GFX10NSA-NEXT: s_mov_b32 s1, s3 743; GFX10NSA-NEXT: s_mov_b32 s2, s4 744; GFX10NSA-NEXT: s_mov_b32 s3, s5 745; GFX10NSA-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 746; GFX10NSA-NEXT: s_mov_b32 s4, s6 747; GFX10NSA-NEXT: s_mov_b32 s5, s7 748; GFX10NSA-NEXT: s_mov_b32 s6, s8 749; GFX10NSA-NEXT: s_mov_b32 s7, s9 750; GFX10NSA-NEXT: s_mov_b32 s8, s10 751; GFX10NSA-NEXT: s_mov_b32 s9, s11 752; GFX10NSA-NEXT: s_mov_b32 s10, s12 753; GFX10NSA-NEXT: s_mov_b32 s11, s13 754; GFX10NSA-NEXT: ; implicit-def: $vcc_hi 755; GFX10NSA-NEXT: image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16 756; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 757; GFX10NSA-NEXT: ; return to shader part epilog 758main_body: 759 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f16(i32 1, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 760 ret <4 x float> %v 761} 762 763declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 764declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 765declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 766declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 767declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 768declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f16(i32 immarg, float, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 769declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 770declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f16(i32 immarg, float, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 771declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 772declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 773declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 774declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 775declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 776declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 777 778attributes #0 = { nounwind readonly } 779