1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 4 5define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 6; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw: 7; GFX6: ; %bb.0: 8; GFX6-NEXT: s_mov_b32 s0, s2 9; GFX6-NEXT: s_mov_b32 s1, s3 10; GFX6-NEXT: s_mov_b32 s2, s4 11; GFX6-NEXT: s_mov_b32 s3, s5 12; GFX6-NEXT: s_mov_b32 s4, s6 13; GFX6-NEXT: s_mov_b32 s5, s7 14; GFX6-NEXT: s_mov_b32 s6, s8 15; GFX6-NEXT: s_mov_b32 s7, s9 16; GFX6-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da 17; GFX6-NEXT: s_waitcnt vmcnt(0) 18; GFX6-NEXT: ; return to shader part epilog 19; 20; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw: 21; GFX10: ; %bb.0: 22; GFX10-NEXT: s_mov_b32 s0, s2 23; GFX10-NEXT: s_mov_b32 s1, s3 24; GFX10-NEXT: s_mov_b32 s2, s4 25; GFX10-NEXT: s_mov_b32 s3, s5 26; GFX10-NEXT: s_mov_b32 s4, s6 27; GFX10-NEXT: s_mov_b32 s5, s7 28; GFX10-NEXT: s_mov_b32 s6, s8 29; GFX10-NEXT: s_mov_b32 s7, s9 30; GFX10-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm 31; GFX10-NEXT: s_waitcnt vmcnt(0) 32; GFX10-NEXT: ; return to shader part epilog 33 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 34 ret <4 x float> %v 35} 36 37define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 38; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 39; GFX6: ; %bb.0: 40; GFX6-NEXT: v_mov_b32_e32 v5, v0 41; GFX6-NEXT: v_mov_b32_e32 v0, 0 42; GFX6-NEXT: s_mov_b32 s0, s2 43; GFX6-NEXT: s_mov_b32 s1, s3 44; GFX6-NEXT: s_mov_b32 s2, s4 45; GFX6-NEXT: s_mov_b32 s3, s5 46; GFX6-NEXT: s_mov_b32 s4, s6 47; GFX6-NEXT: s_mov_b32 s5, s7 48; GFX6-NEXT: s_mov_b32 s6, s8 49; GFX6-NEXT: s_mov_b32 s7, s9 50; GFX6-NEXT: v_mov_b32_e32 v6, v1 51; GFX6-NEXT: v_mov_b32_e32 v7, v2 52; GFX6-NEXT: v_mov_b32_e32 v8, v3 53; GFX6-NEXT: v_mov_b32_e32 v1, v0 54; GFX6-NEXT: v_mov_b32_e32 v2, v0 55; GFX6-NEXT: v_mov_b32_e32 v3, v0 56; GFX6-NEXT: v_mov_b32_e32 v4, v0 57; GFX6-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe da 58; GFX6-NEXT: s_mov_b32 s8, s10 59; GFX6-NEXT: s_mov_b32 s9, s11 60; GFX6-NEXT: s_mov_b32 s10, -1 61; GFX6-NEXT: s_mov_b32 s11, 0xf000 62; GFX6-NEXT: s_waitcnt vmcnt(0) 63; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 64; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 65; GFX6-NEXT: ; return to shader part epilog 66; 67; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 68; GFX10: ; %bb.0: 69; GFX10-NEXT: v_mov_b32_e32 v9, 0 70; GFX10-NEXT: v_mov_b32_e32 v5, v0 71; GFX10-NEXT: v_mov_b32_e32 v6, v1 72; GFX10-NEXT: v_mov_b32_e32 v7, v2 73; GFX10-NEXT: v_mov_b32_e32 v8, v3 74; GFX10-NEXT: v_mov_b32_e32 v10, v9 75; GFX10-NEXT: v_mov_b32_e32 v11, v9 76; GFX10-NEXT: v_mov_b32_e32 v12, v9 77; GFX10-NEXT: v_mov_b32_e32 v13, v9 78; GFX10-NEXT: s_mov_b32 s0, s2 79; GFX10-NEXT: s_mov_b32 s1, s3 80; GFX10-NEXT: s_mov_b32 s2, s4 81; GFX10-NEXT: s_mov_b32 s3, s5 82; GFX10-NEXT: s_mov_b32 s4, s6 83; GFX10-NEXT: s_mov_b32 s5, s7 84; GFX10-NEXT: s_mov_b32 s6, s8 85; GFX10-NEXT: s_mov_b32 s7, s9 86; GFX10-NEXT: v_mov_b32_e32 v0, v9 87; GFX10-NEXT: v_mov_b32_e32 v1, v10 88; GFX10-NEXT: v_mov_b32_e32 v2, v11 89; GFX10-NEXT: v_mov_b32_e32 v3, v12 90; GFX10-NEXT: v_mov_b32_e32 v4, v13 91; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe 92; GFX10-NEXT: s_waitcnt vmcnt(0) 93; GFX10-NEXT: global_store_dword v9, v4, s[10:11] 94; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 95; GFX10-NEXT: ; return to shader part epilog 96 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) 97 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 98 %v.err = extractvalue { <4 x float>, i32 } %v, 1 99 store i32 %v.err, i32 addrspace(1)* %out, align 4 100 ret <4 x float> %v.vec 101} 102 103define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 104; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 105; GFX6: ; %bb.0: 106; GFX6-NEXT: v_mov_b32_e32 v5, v0 107; GFX6-NEXT: v_mov_b32_e32 v0, 0 108; GFX6-NEXT: s_mov_b32 s0, s2 109; GFX6-NEXT: s_mov_b32 s1, s3 110; GFX6-NEXT: s_mov_b32 s2, s4 111; GFX6-NEXT: s_mov_b32 s3, s5 112; GFX6-NEXT: s_mov_b32 s4, s6 113; GFX6-NEXT: s_mov_b32 s5, s7 114; GFX6-NEXT: s_mov_b32 s6, s8 115; GFX6-NEXT: s_mov_b32 s7, s9 116; GFX6-NEXT: v_mov_b32_e32 v6, v1 117; GFX6-NEXT: v_mov_b32_e32 v7, v2 118; GFX6-NEXT: v_mov_b32_e32 v8, v3 119; GFX6-NEXT: v_mov_b32_e32 v1, v0 120; GFX6-NEXT: v_mov_b32_e32 v2, v0 121; GFX6-NEXT: v_mov_b32_e32 v3, v0 122; GFX6-NEXT: v_mov_b32_e32 v4, v0 123; GFX6-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe lwe da 124; GFX6-NEXT: s_mov_b32 s8, s10 125; GFX6-NEXT: s_mov_b32 s9, s11 126; GFX6-NEXT: s_mov_b32 s10, -1 127; GFX6-NEXT: s_mov_b32 s11, 0xf000 128; GFX6-NEXT: s_waitcnt vmcnt(0) 129; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 130; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 131; GFX6-NEXT: ; return to shader part epilog 132; 133; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 134; GFX10: ; %bb.0: 135; GFX10-NEXT: v_mov_b32_e32 v9, 0 136; GFX10-NEXT: v_mov_b32_e32 v5, v0 137; GFX10-NEXT: v_mov_b32_e32 v6, v1 138; GFX10-NEXT: v_mov_b32_e32 v7, v2 139; GFX10-NEXT: v_mov_b32_e32 v8, v3 140; GFX10-NEXT: v_mov_b32_e32 v10, v9 141; GFX10-NEXT: v_mov_b32_e32 v11, v9 142; GFX10-NEXT: v_mov_b32_e32 v12, v9 143; GFX10-NEXT: v_mov_b32_e32 v13, v9 144; GFX10-NEXT: s_mov_b32 s0, s2 145; GFX10-NEXT: s_mov_b32 s1, s3 146; GFX10-NEXT: s_mov_b32 s2, s4 147; GFX10-NEXT: s_mov_b32 s3, s5 148; GFX10-NEXT: s_mov_b32 s4, s6 149; GFX10-NEXT: s_mov_b32 s5, s7 150; GFX10-NEXT: s_mov_b32 s6, s8 151; GFX10-NEXT: s_mov_b32 s7, s9 152; GFX10-NEXT: v_mov_b32_e32 v0, v9 153; GFX10-NEXT: v_mov_b32_e32 v1, v10 154; GFX10-NEXT: v_mov_b32_e32 v2, v11 155; GFX10-NEXT: v_mov_b32_e32 v3, v12 156; GFX10-NEXT: v_mov_b32_e32 v4, v13 157; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe 158; GFX10-NEXT: s_waitcnt vmcnt(0) 159; GFX10-NEXT: global_store_dword v9, v4, s[10:11] 160; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 161; GFX10-NEXT: ; return to shader part epilog 162 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) 163 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 164 %v.err = extractvalue { <4 x float>, i32 } %v, 1 165 store i32 %v.err, i32 addrspace(1)* %out, align 4 166 ret <4 x float> %v.vec 167} 168 169declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 170declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 171 172attributes #0 = { nounwind readonly } 173