1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s 4 5define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { 6; GFX6-LABEL: gather4_o_2d: 7; GFX6: ; %bb.0: ; %main_body 8; GFX6-NEXT: s_mov_b32 s0, s2 9; GFX6-NEXT: s_mov_b32 s1, s3 10; GFX6-NEXT: s_mov_b32 s2, s4 11; GFX6-NEXT: s_mov_b32 s3, s5 12; GFX6-NEXT: s_mov_b32 s4, s6 13; GFX6-NEXT: s_mov_b32 s5, s7 14; GFX6-NEXT: s_mov_b32 s6, s8 15; GFX6-NEXT: s_mov_b32 s7, s9 16; GFX6-NEXT: s_mov_b32 s8, s10 17; GFX6-NEXT: s_mov_b32 s9, s11 18; GFX6-NEXT: s_mov_b64 s[14:15], exec 19; GFX6-NEXT: s_mov_b32 s10, s12 20; GFX6-NEXT: s_mov_b32 s11, s13 21; GFX6-NEXT: s_wqm_b64 exec, exec 22; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 23; GFX6-NEXT: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 24; GFX6-NEXT: s_waitcnt vmcnt(0) 25; GFX6-NEXT: ; return to shader part epilog 26; 27; GFX10-LABEL: gather4_o_2d: 28; GFX10: ; %bb.0: ; %main_body 29; GFX10-NEXT: s_mov_b32 s0, s2 30; GFX10-NEXT: s_mov_b32 s1, s3 31; GFX10-NEXT: s_mov_b32 s2, s4 32; GFX10-NEXT: s_mov_b32 s3, s5 33; GFX10-NEXT: s_mov_b32 s4, s6 34; GFX10-NEXT: s_mov_b32 s5, s7 35; GFX10-NEXT: s_mov_b32 s6, s8 36; GFX10-NEXT: s_mov_b32 s7, s9 37; GFX10-NEXT: s_mov_b32 s8, s10 38; GFX10-NEXT: s_mov_b32 s9, s11 39; GFX10-NEXT: s_mov_b32 s14, exec_lo 40; GFX10-NEXT: s_mov_b32 s10, s12 41; GFX10-NEXT: s_mov_b32 s11, s13 42; GFX10-NEXT: ; implicit-def: $vcc_hi 43; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 44; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 45; GFX10-NEXT: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 46; GFX10-NEXT: s_waitcnt vmcnt(0) 47; GFX10-NEXT: ; return to shader part epilog 48main_body: 49 %v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 50 ret <4 x float> %v 51} 52 53define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { 54; GFX6-LABEL: gather4_c_o_2d: 55; GFX6: ; %bb.0: ; %main_body 56; GFX6-NEXT: s_mov_b32 s0, s2 57; GFX6-NEXT: s_mov_b32 s1, s3 58; GFX6-NEXT: s_mov_b32 s2, s4 59; GFX6-NEXT: s_mov_b32 s3, s5 60; GFX6-NEXT: s_mov_b32 s4, s6 61; GFX6-NEXT: s_mov_b32 s5, s7 62; GFX6-NEXT: s_mov_b32 s6, s8 63; GFX6-NEXT: s_mov_b32 s7, s9 64; GFX6-NEXT: s_mov_b32 s8, s10 65; GFX6-NEXT: s_mov_b32 s9, s11 66; GFX6-NEXT: s_mov_b64 s[14:15], exec 67; GFX6-NEXT: s_mov_b32 s10, s12 68; GFX6-NEXT: s_mov_b32 s11, s13 69; GFX6-NEXT: s_wqm_b64 exec, exec 70; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 71; GFX6-NEXT: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 72; GFX6-NEXT: s_waitcnt vmcnt(0) 73; GFX6-NEXT: ; return to shader part epilog 74; 75; GFX10-LABEL: gather4_c_o_2d: 76; GFX10: ; %bb.0: ; %main_body 77; GFX10-NEXT: s_mov_b32 s0, s2 78; GFX10-NEXT: s_mov_b32 s1, s3 79; GFX10-NEXT: s_mov_b32 s2, s4 80; GFX10-NEXT: s_mov_b32 s3, s5 81; GFX10-NEXT: s_mov_b32 s4, s6 82; GFX10-NEXT: s_mov_b32 s5, s7 83; GFX10-NEXT: s_mov_b32 s6, s8 84; GFX10-NEXT: s_mov_b32 s7, s9 85; GFX10-NEXT: s_mov_b32 s8, s10 86; GFX10-NEXT: s_mov_b32 s9, s11 87; GFX10-NEXT: s_mov_b32 s14, exec_lo 88; GFX10-NEXT: s_mov_b32 s10, s12 89; GFX10-NEXT: s_mov_b32 s11, s13 90; GFX10-NEXT: ; implicit-def: $vcc_hi 91; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 92; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 93; GFX10-NEXT: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 94; GFX10-NEXT: s_waitcnt vmcnt(0) 95; GFX10-NEXT: ; return to shader part epilog 96main_body: 97 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 98 ret <4 x float> %v 99} 100 101define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) { 102; GFX6-LABEL: gather4_cl_o_2d: 103; GFX6: ; %bb.0: ; %main_body 104; GFX6-NEXT: s_mov_b32 s0, s2 105; GFX6-NEXT: s_mov_b32 s1, s3 106; GFX6-NEXT: s_mov_b32 s2, s4 107; GFX6-NEXT: s_mov_b32 s3, s5 108; GFX6-NEXT: s_mov_b32 s4, s6 109; GFX6-NEXT: s_mov_b32 s5, s7 110; GFX6-NEXT: s_mov_b32 s6, s8 111; GFX6-NEXT: s_mov_b32 s7, s9 112; GFX6-NEXT: s_mov_b32 s8, s10 113; GFX6-NEXT: s_mov_b32 s9, s11 114; GFX6-NEXT: s_mov_b64 s[14:15], exec 115; GFX6-NEXT: s_mov_b32 s10, s12 116; GFX6-NEXT: s_mov_b32 s11, s13 117; GFX6-NEXT: s_wqm_b64 exec, exec 118; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 119; GFX6-NEXT: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 120; GFX6-NEXT: s_waitcnt vmcnt(0) 121; GFX6-NEXT: ; return to shader part epilog 122; 123; GFX10-LABEL: gather4_cl_o_2d: 124; GFX10: ; %bb.0: ; %main_body 125; GFX10-NEXT: s_mov_b32 s0, s2 126; GFX10-NEXT: s_mov_b32 s1, s3 127; GFX10-NEXT: s_mov_b32 s2, s4 128; GFX10-NEXT: s_mov_b32 s3, s5 129; GFX10-NEXT: s_mov_b32 s4, s6 130; GFX10-NEXT: s_mov_b32 s5, s7 131; GFX10-NEXT: s_mov_b32 s6, s8 132; GFX10-NEXT: s_mov_b32 s7, s9 133; GFX10-NEXT: s_mov_b32 s8, s10 134; GFX10-NEXT: s_mov_b32 s9, s11 135; GFX10-NEXT: s_mov_b32 s14, exec_lo 136; GFX10-NEXT: s_mov_b32 s10, s12 137; GFX10-NEXT: s_mov_b32 s11, s13 138; GFX10-NEXT: ; implicit-def: $vcc_hi 139; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 140; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 141; GFX10-NEXT: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 142; GFX10-NEXT: s_waitcnt vmcnt(0) 143; GFX10-NEXT: ; return to shader part epilog 144main_body: 145 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 146 ret <4 x float> %v 147} 148 149define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) { 150; GFX6-LABEL: gather4_c_cl_o_2d: 151; GFX6: ; %bb.0: ; %main_body 152; GFX6-NEXT: s_mov_b32 s0, s2 153; GFX6-NEXT: s_mov_b32 s1, s3 154; GFX6-NEXT: s_mov_b32 s2, s4 155; GFX6-NEXT: s_mov_b32 s3, s5 156; GFX6-NEXT: s_mov_b32 s4, s6 157; GFX6-NEXT: s_mov_b32 s5, s7 158; GFX6-NEXT: s_mov_b32 s6, s8 159; GFX6-NEXT: s_mov_b32 s7, s9 160; GFX6-NEXT: s_mov_b32 s8, s10 161; GFX6-NEXT: s_mov_b32 s9, s11 162; GFX6-NEXT: s_mov_b64 s[14:15], exec 163; GFX6-NEXT: s_mov_b32 s10, s12 164; GFX6-NEXT: s_mov_b32 s11, s13 165; GFX6-NEXT: s_wqm_b64 exec, exec 166; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 167; GFX6-NEXT: image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 168; GFX6-NEXT: s_waitcnt vmcnt(0) 169; GFX6-NEXT: ; return to shader part epilog 170; 171; GFX10-LABEL: gather4_c_cl_o_2d: 172; GFX10: ; %bb.0: ; %main_body 173; GFX10-NEXT: s_mov_b32 s0, s2 174; GFX10-NEXT: s_mov_b32 s1, s3 175; GFX10-NEXT: s_mov_b32 s2, s4 176; GFX10-NEXT: s_mov_b32 s3, s5 177; GFX10-NEXT: s_mov_b32 s4, s6 178; GFX10-NEXT: s_mov_b32 s5, s7 179; GFX10-NEXT: s_mov_b32 s6, s8 180; GFX10-NEXT: s_mov_b32 s7, s9 181; GFX10-NEXT: s_mov_b32 s8, s10 182; GFX10-NEXT: s_mov_b32 s9, s11 183; GFX10-NEXT: s_mov_b32 s14, exec_lo 184; GFX10-NEXT: s_mov_b32 s10, s12 185; GFX10-NEXT: s_mov_b32 s11, s13 186; GFX10-NEXT: ; implicit-def: $vcc_hi 187; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 188; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 189; GFX10-NEXT: image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 190; GFX10-NEXT: s_waitcnt vmcnt(0) 191; GFX10-NEXT: ; return to shader part epilog 192main_body: 193 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 194 ret <4 x float> %v 195} 196 197define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) { 198; GFX6-LABEL: gather4_b_o_2d: 199; GFX6: ; %bb.0: ; %main_body 200; GFX6-NEXT: s_mov_b32 s0, s2 201; GFX6-NEXT: s_mov_b32 s1, s3 202; GFX6-NEXT: s_mov_b32 s2, s4 203; GFX6-NEXT: s_mov_b32 s3, s5 204; GFX6-NEXT: s_mov_b32 s4, s6 205; GFX6-NEXT: s_mov_b32 s5, s7 206; GFX6-NEXT: s_mov_b32 s6, s8 207; GFX6-NEXT: s_mov_b32 s7, s9 208; GFX6-NEXT: s_mov_b32 s8, s10 209; GFX6-NEXT: s_mov_b32 s9, s11 210; GFX6-NEXT: s_mov_b64 s[14:15], exec 211; GFX6-NEXT: s_mov_b32 s10, s12 212; GFX6-NEXT: s_mov_b32 s11, s13 213; GFX6-NEXT: s_wqm_b64 exec, exec 214; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 215; GFX6-NEXT: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 216; GFX6-NEXT: s_waitcnt vmcnt(0) 217; GFX6-NEXT: ; return to shader part epilog 218; 219; GFX10-LABEL: gather4_b_o_2d: 220; GFX10: ; %bb.0: ; %main_body 221; GFX10-NEXT: s_mov_b32 s0, s2 222; GFX10-NEXT: s_mov_b32 s1, s3 223; GFX10-NEXT: s_mov_b32 s2, s4 224; GFX10-NEXT: s_mov_b32 s3, s5 225; GFX10-NEXT: s_mov_b32 s4, s6 226; GFX10-NEXT: s_mov_b32 s5, s7 227; GFX10-NEXT: s_mov_b32 s6, s8 228; GFX10-NEXT: s_mov_b32 s7, s9 229; GFX10-NEXT: s_mov_b32 s8, s10 230; GFX10-NEXT: s_mov_b32 s9, s11 231; GFX10-NEXT: s_mov_b32 s14, exec_lo 232; GFX10-NEXT: s_mov_b32 s10, s12 233; GFX10-NEXT: s_mov_b32 s11, s13 234; GFX10-NEXT: ; implicit-def: $vcc_hi 235; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 236; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 237; GFX10-NEXT: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 238; GFX10-NEXT: s_waitcnt vmcnt(0) 239; GFX10-NEXT: ; return to shader part epilog 240main_body: 241 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 242 ret <4 x float> %v 243} 244 245define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) { 246; GFX6-LABEL: gather4_c_b_o_2d: 247; GFX6: ; %bb.0: ; %main_body 248; GFX6-NEXT: s_mov_b32 s0, s2 249; GFX6-NEXT: s_mov_b32 s1, s3 250; GFX6-NEXT: s_mov_b32 s2, s4 251; GFX6-NEXT: s_mov_b32 s3, s5 252; GFX6-NEXT: s_mov_b32 s4, s6 253; GFX6-NEXT: s_mov_b32 s5, s7 254; GFX6-NEXT: s_mov_b32 s6, s8 255; GFX6-NEXT: s_mov_b32 s7, s9 256; GFX6-NEXT: s_mov_b32 s8, s10 257; GFX6-NEXT: s_mov_b32 s9, s11 258; GFX6-NEXT: s_mov_b64 s[14:15], exec 259; GFX6-NEXT: s_mov_b32 s10, s12 260; GFX6-NEXT: s_mov_b32 s11, s13 261; GFX6-NEXT: s_wqm_b64 exec, exec 262; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 263; GFX6-NEXT: image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 264; GFX6-NEXT: s_waitcnt vmcnt(0) 265; GFX6-NEXT: ; return to shader part epilog 266; 267; GFX10-LABEL: gather4_c_b_o_2d: 268; GFX10: ; %bb.0: ; %main_body 269; GFX10-NEXT: s_mov_b32 s0, s2 270; GFX10-NEXT: s_mov_b32 s1, s3 271; GFX10-NEXT: s_mov_b32 s2, s4 272; GFX10-NEXT: s_mov_b32 s3, s5 273; GFX10-NEXT: s_mov_b32 s4, s6 274; GFX10-NEXT: s_mov_b32 s5, s7 275; GFX10-NEXT: s_mov_b32 s6, s8 276; GFX10-NEXT: s_mov_b32 s7, s9 277; GFX10-NEXT: s_mov_b32 s8, s10 278; GFX10-NEXT: s_mov_b32 s9, s11 279; GFX10-NEXT: s_mov_b32 s14, exec_lo 280; GFX10-NEXT: s_mov_b32 s10, s12 281; GFX10-NEXT: s_mov_b32 s11, s13 282; GFX10-NEXT: ; implicit-def: $vcc_hi 283; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 284; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 285; GFX10-NEXT: image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 286; GFX10-NEXT: s_waitcnt vmcnt(0) 287; GFX10-NEXT: ; return to shader part epilog 288main_body: 289 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 290 ret <4 x float> %v 291} 292 293define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) { 294; GFX6-LABEL: gather4_b_cl_o_2d: 295; GFX6: ; %bb.0: ; %main_body 296; GFX6-NEXT: s_mov_b32 s0, s2 297; GFX6-NEXT: s_mov_b32 s1, s3 298; GFX6-NEXT: s_mov_b32 s2, s4 299; GFX6-NEXT: s_mov_b32 s3, s5 300; GFX6-NEXT: s_mov_b32 s4, s6 301; GFX6-NEXT: s_mov_b32 s5, s7 302; GFX6-NEXT: s_mov_b32 s6, s8 303; GFX6-NEXT: s_mov_b32 s7, s9 304; GFX6-NEXT: s_mov_b32 s8, s10 305; GFX6-NEXT: s_mov_b32 s9, s11 306; GFX6-NEXT: s_mov_b32 s10, s12 307; GFX6-NEXT: s_mov_b32 s11, s13 308; GFX6-NEXT: image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 309; GFX6-NEXT: s_waitcnt vmcnt(0) 310; GFX6-NEXT: ; return to shader part epilog 311; 312; GFX10-LABEL: gather4_b_cl_o_2d: 313; GFX10: ; %bb.0: ; %main_body 314; GFX10-NEXT: s_mov_b32 s0, s2 315; GFX10-NEXT: s_mov_b32 s1, s3 316; GFX10-NEXT: s_mov_b32 s2, s4 317; GFX10-NEXT: s_mov_b32 s3, s5 318; GFX10-NEXT: s_mov_b32 s4, s6 319; GFX10-NEXT: s_mov_b32 s5, s7 320; GFX10-NEXT: s_mov_b32 s6, s8 321; GFX10-NEXT: s_mov_b32 s7, s9 322; GFX10-NEXT: s_mov_b32 s8, s10 323; GFX10-NEXT: s_mov_b32 s9, s11 324; GFX10-NEXT: s_mov_b32 s10, s12 325; GFX10-NEXT: s_mov_b32 s11, s13 326; GFX10-NEXT: ; implicit-def: $vcc_hi 327; GFX10-NEXT: image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 328; GFX10-NEXT: s_waitcnt vmcnt(0) 329; GFX10-NEXT: ; return to shader part epilog 330main_body: 331 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 332 ret <4 x float> %v 333} 334 335define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) { 336; GFX6-LABEL: gather4_c_b_cl_o_2d: 337; GFX6: ; %bb.0: ; %main_body 338; GFX6-NEXT: s_mov_b32 s0, s2 339; GFX6-NEXT: s_mov_b32 s1, s3 340; GFX6-NEXT: s_mov_b32 s2, s4 341; GFX6-NEXT: s_mov_b32 s3, s5 342; GFX6-NEXT: s_mov_b32 s4, s6 343; GFX6-NEXT: s_mov_b32 s5, s7 344; GFX6-NEXT: s_mov_b32 s6, s8 345; GFX6-NEXT: s_mov_b32 s7, s9 346; GFX6-NEXT: s_mov_b32 s8, s10 347; GFX6-NEXT: s_mov_b32 s9, s11 348; GFX6-NEXT: s_mov_b64 s[14:15], exec 349; GFX6-NEXT: s_mov_b32 s10, s12 350; GFX6-NEXT: s_mov_b32 s11, s13 351; GFX6-NEXT: s_wqm_b64 exec, exec 352; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 353; GFX6-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 354; GFX6-NEXT: s_waitcnt vmcnt(0) 355; GFX6-NEXT: ; return to shader part epilog 356; 357; GFX10-LABEL: gather4_c_b_cl_o_2d: 358; GFX10: ; %bb.0: ; %main_body 359; GFX10-NEXT: s_mov_b32 s0, s2 360; GFX10-NEXT: s_mov_b32 s1, s3 361; GFX10-NEXT: s_mov_b32 s2, s4 362; GFX10-NEXT: s_mov_b32 s3, s5 363; GFX10-NEXT: s_mov_b32 s4, s6 364; GFX10-NEXT: s_mov_b32 s5, s7 365; GFX10-NEXT: s_mov_b32 s6, s8 366; GFX10-NEXT: s_mov_b32 s7, s9 367; GFX10-NEXT: s_mov_b32 s8, s10 368; GFX10-NEXT: s_mov_b32 s9, s11 369; GFX10-NEXT: s_mov_b32 s14, exec_lo 370; GFX10-NEXT: s_mov_b32 s10, s12 371; GFX10-NEXT: s_mov_b32 s11, s13 372; GFX10-NEXT: ; implicit-def: $vcc_hi 373; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 374; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 375; GFX10-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 376; GFX10-NEXT: s_waitcnt vmcnt(0) 377; GFX10-NEXT: ; return to shader part epilog 378main_body: 379 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 380 ret <4 x float> %v 381} 382 383define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { 384; GFX6-LABEL: gather4_l_o_2d: 385; GFX6: ; %bb.0: ; %main_body 386; GFX6-NEXT: s_mov_b32 s0, s2 387; GFX6-NEXT: s_mov_b32 s1, s3 388; GFX6-NEXT: s_mov_b32 s2, s4 389; GFX6-NEXT: s_mov_b32 s3, s5 390; GFX6-NEXT: s_mov_b32 s4, s6 391; GFX6-NEXT: s_mov_b32 s5, s7 392; GFX6-NEXT: s_mov_b32 s6, s8 393; GFX6-NEXT: s_mov_b32 s7, s9 394; GFX6-NEXT: s_mov_b32 s8, s10 395; GFX6-NEXT: s_mov_b32 s9, s11 396; GFX6-NEXT: s_mov_b32 s10, s12 397; GFX6-NEXT: s_mov_b32 s11, s13 398; GFX6-NEXT: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 399; GFX6-NEXT: s_waitcnt vmcnt(0) 400; GFX6-NEXT: ; return to shader part epilog 401; 402; GFX10-LABEL: gather4_l_o_2d: 403; GFX10: ; %bb.0: ; %main_body 404; GFX10-NEXT: s_mov_b32 s0, s2 405; GFX10-NEXT: s_mov_b32 s1, s3 406; GFX10-NEXT: s_mov_b32 s2, s4 407; GFX10-NEXT: s_mov_b32 s3, s5 408; GFX10-NEXT: s_mov_b32 s4, s6 409; GFX10-NEXT: s_mov_b32 s5, s7 410; GFX10-NEXT: s_mov_b32 s6, s8 411; GFX10-NEXT: s_mov_b32 s7, s9 412; GFX10-NEXT: s_mov_b32 s8, s10 413; GFX10-NEXT: s_mov_b32 s9, s11 414; GFX10-NEXT: s_mov_b32 s10, s12 415; GFX10-NEXT: s_mov_b32 s11, s13 416; GFX10-NEXT: ; implicit-def: $vcc_hi 417; GFX10-NEXT: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 418; GFX10-NEXT: s_waitcnt vmcnt(0) 419; GFX10-NEXT: ; return to shader part epilog 420main_body: 421 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 422 ret <4 x float> %v 423} 424 425define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { 426; GFX6-LABEL: gather4_c_l_o_2d: 427; GFX6: ; %bb.0: ; %main_body 428; GFX6-NEXT: s_mov_b32 s0, s2 429; GFX6-NEXT: s_mov_b32 s1, s3 430; GFX6-NEXT: s_mov_b32 s2, s4 431; GFX6-NEXT: s_mov_b32 s3, s5 432; GFX6-NEXT: s_mov_b32 s4, s6 433; GFX6-NEXT: s_mov_b32 s5, s7 434; GFX6-NEXT: s_mov_b32 s6, s8 435; GFX6-NEXT: s_mov_b32 s7, s9 436; GFX6-NEXT: s_mov_b32 s8, s10 437; GFX6-NEXT: s_mov_b32 s9, s11 438; GFX6-NEXT: s_mov_b32 s10, s12 439; GFX6-NEXT: s_mov_b32 s11, s13 440; GFX6-NEXT: image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 441; GFX6-NEXT: s_waitcnt vmcnt(0) 442; GFX6-NEXT: ; return to shader part epilog 443; 444; GFX10-LABEL: gather4_c_l_o_2d: 445; GFX10: ; %bb.0: ; %main_body 446; GFX10-NEXT: s_mov_b32 s0, s2 447; GFX10-NEXT: s_mov_b32 s1, s3 448; GFX10-NEXT: s_mov_b32 s2, s4 449; GFX10-NEXT: s_mov_b32 s3, s5 450; GFX10-NEXT: s_mov_b32 s4, s6 451; GFX10-NEXT: s_mov_b32 s5, s7 452; GFX10-NEXT: s_mov_b32 s6, s8 453; GFX10-NEXT: s_mov_b32 s7, s9 454; GFX10-NEXT: s_mov_b32 s8, s10 455; GFX10-NEXT: s_mov_b32 s9, s11 456; GFX10-NEXT: s_mov_b32 s10, s12 457; GFX10-NEXT: s_mov_b32 s11, s13 458; GFX10-NEXT: ; implicit-def: $vcc_hi 459; GFX10-NEXT: image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 460; GFX10-NEXT: s_waitcnt vmcnt(0) 461; GFX10-NEXT: ; return to shader part epilog 462main_body: 463 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 464 ret <4 x float> %v 465} 466 467define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { 468; GFX6-LABEL: gather4_lz_o_2d: 469; GFX6: ; %bb.0: ; %main_body 470; GFX6-NEXT: s_mov_b32 s0, s2 471; GFX6-NEXT: s_mov_b32 s1, s3 472; GFX6-NEXT: s_mov_b32 s2, s4 473; GFX6-NEXT: s_mov_b32 s3, s5 474; GFX6-NEXT: s_mov_b32 s4, s6 475; GFX6-NEXT: s_mov_b32 s5, s7 476; GFX6-NEXT: s_mov_b32 s6, s8 477; GFX6-NEXT: s_mov_b32 s7, s9 478; GFX6-NEXT: s_mov_b32 s8, s10 479; GFX6-NEXT: s_mov_b32 s9, s11 480; GFX6-NEXT: s_mov_b32 s10, s12 481; GFX6-NEXT: s_mov_b32 s11, s13 482; GFX6-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 483; GFX6-NEXT: s_waitcnt vmcnt(0) 484; GFX6-NEXT: ; return to shader part epilog 485; 486; GFX10-LABEL: gather4_lz_o_2d: 487; GFX10: ; %bb.0: ; %main_body 488; GFX10-NEXT: s_mov_b32 s0, s2 489; GFX10-NEXT: s_mov_b32 s1, s3 490; GFX10-NEXT: s_mov_b32 s2, s4 491; GFX10-NEXT: s_mov_b32 s3, s5 492; GFX10-NEXT: s_mov_b32 s4, s6 493; GFX10-NEXT: s_mov_b32 s5, s7 494; GFX10-NEXT: s_mov_b32 s6, s8 495; GFX10-NEXT: s_mov_b32 s7, s9 496; GFX10-NEXT: s_mov_b32 s8, s10 497; GFX10-NEXT: s_mov_b32 s9, s11 498; GFX10-NEXT: s_mov_b32 s10, s12 499; GFX10-NEXT: s_mov_b32 s11, s13 500; GFX10-NEXT: ; implicit-def: $vcc_hi 501; GFX10-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 502; GFX10-NEXT: s_waitcnt vmcnt(0) 503; GFX10-NEXT: ; return to shader part epilog 504main_body: 505 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 506 ret <4 x float> %v 507} 508 509define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { 510; GFX6-LABEL: gather4_c_lz_o_2d: 511; GFX6: ; %bb.0: ; %main_body 512; GFX6-NEXT: s_mov_b32 s0, s2 513; GFX6-NEXT: s_mov_b32 s1, s3 514; GFX6-NEXT: s_mov_b32 s2, s4 515; GFX6-NEXT: s_mov_b32 s3, s5 516; GFX6-NEXT: s_mov_b32 s4, s6 517; GFX6-NEXT: s_mov_b32 s5, s7 518; GFX6-NEXT: s_mov_b32 s6, s8 519; GFX6-NEXT: s_mov_b32 s7, s9 520; GFX6-NEXT: s_mov_b32 s8, s10 521; GFX6-NEXT: s_mov_b32 s9, s11 522; GFX6-NEXT: s_mov_b32 s10, s12 523; GFX6-NEXT: s_mov_b32 s11, s13 524; GFX6-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 525; GFX6-NEXT: s_waitcnt vmcnt(0) 526; GFX6-NEXT: ; return to shader part epilog 527; 528; GFX10-LABEL: gather4_c_lz_o_2d: 529; GFX10: ; %bb.0: ; %main_body 530; GFX10-NEXT: s_mov_b32 s0, s2 531; GFX10-NEXT: s_mov_b32 s1, s3 532; GFX10-NEXT: s_mov_b32 s2, s4 533; GFX10-NEXT: s_mov_b32 s3, s5 534; GFX10-NEXT: s_mov_b32 s4, s6 535; GFX10-NEXT: s_mov_b32 s5, s7 536; GFX10-NEXT: s_mov_b32 s6, s8 537; GFX10-NEXT: s_mov_b32 s7, s9 538; GFX10-NEXT: s_mov_b32 s8, s10 539; GFX10-NEXT: s_mov_b32 s9, s11 540; GFX10-NEXT: s_mov_b32 s10, s12 541; GFX10-NEXT: s_mov_b32 s11, s13 542; GFX10-NEXT: ; implicit-def: $vcc_hi 543; GFX10-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 544; GFX10-NEXT: s_waitcnt vmcnt(0) 545; GFX10-NEXT: ; return to shader part epilog 546main_body: 547 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 548 ret <4 x float> %v 549} 550 551declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 552declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 553declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 554declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 555declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 556declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 557declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 558declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 559declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 560declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 561declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 562declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 563 564attributes #0 = { nounwind readonly } 565