1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 4 5define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 6; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw: 7; GFX9: ; %bb.0: 8; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 9; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 10; GFX9-NEXT: v_and_or_b32 v0, v0, v4, v1 11; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v3 12; GFX9-NEXT: s_mov_b32 s0, s2 13; GFX9-NEXT: s_mov_b32 s1, s3 14; GFX9-NEXT: s_mov_b32 s2, s4 15; GFX9-NEXT: s_mov_b32 s3, s5 16; GFX9-NEXT: s_mov_b32 s4, s6 17; GFX9-NEXT: s_mov_b32 s5, s7 18; GFX9-NEXT: s_mov_b32 s6, s8 19; GFX9-NEXT: s_mov_b32 s7, s9 20; GFX9-NEXT: v_and_or_b32 v1, v2, v4, v1 21; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da 22; GFX9-NEXT: s_waitcnt vmcnt(0) 23; GFX9-NEXT: ; return to shader part epilog 24; 25; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw: 26; GFX10: ; %bb.0: 27; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 28; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 29; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 30; GFX10-NEXT: s_mov_b32 s0, s2 31; GFX10-NEXT: s_mov_b32 s1, s3 32; GFX10-NEXT: s_mov_b32 s2, s4 33; GFX10-NEXT: v_and_or_b32 v0, v0, v4, v1 34; GFX10-NEXT: v_and_or_b32 v1, v2, v4, v3 35; GFX10-NEXT: s_mov_b32 s3, s5 36; GFX10-NEXT: s_mov_b32 s4, s6 37; GFX10-NEXT: s_mov_b32 s5, s7 38; GFX10-NEXT: s_mov_b32 s6, s8 39; GFX10-NEXT: s_mov_b32 s7, s9 40; GFX10-NEXT: ; implicit-def: $vcc_hi 41; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 42; GFX10-NEXT: s_waitcnt vmcnt(0) 43; GFX10-NEXT: ; return to shader part epilog 44 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 45 ret <4 x float> %v 46} 47 48define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 49; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 50; GFX9: ; %bb.0: 51; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 52; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 53; GFX9-NEXT: v_and_or_b32 v0, v0, v4, v1 54; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v3 55; GFX9-NEXT: s_mov_b32 s0, s2 56; GFX9-NEXT: s_mov_b32 s1, s3 57; GFX9-NEXT: s_mov_b32 s2, s4 58; GFX9-NEXT: s_mov_b32 s3, s5 59; GFX9-NEXT: s_mov_b32 s4, s6 60; GFX9-NEXT: s_mov_b32 s5, s7 61; GFX9-NEXT: s_mov_b32 s6, s8 62; GFX9-NEXT: s_mov_b32 s7, s9 63; GFX9-NEXT: v_and_or_b32 v1, v2, v4, v1 64; GFX9-NEXT: image_load v[0:4], v[0:1], s[0:7] dmask:0xf unorm a16 tfe da 65; GFX9-NEXT: v_mov_b32_e32 v5, s10 66; GFX9-NEXT: v_mov_b32_e32 v6, s11 67; GFX9-NEXT: s_waitcnt vmcnt(0) 68; GFX9-NEXT: global_store_dword v[5:6], v4, off 69; GFX9-NEXT: s_waitcnt vmcnt(0) 70; GFX9-NEXT: ; return to shader part epilog 71; 72; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 73; GFX10: ; %bb.0: 74; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 75; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 76; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 77; GFX10-NEXT: s_mov_b32 s0, s2 78; GFX10-NEXT: s_mov_b32 s1, s3 79; GFX10-NEXT: s_mov_b32 s2, s4 80; GFX10-NEXT: v_and_or_b32 v0, v0, v4, v1 81; GFX10-NEXT: v_and_or_b32 v1, v2, v4, v3 82; GFX10-NEXT: s_mov_b32 s3, s5 83; GFX10-NEXT: s_mov_b32 s4, s6 84; GFX10-NEXT: s_mov_b32 s5, s7 85; GFX10-NEXT: s_mov_b32 s6, s8 86; GFX10-NEXT: s_mov_b32 s7, s9 87; GFX10-NEXT: v_mov_b32_e32 v5, s10 88; GFX10-NEXT: image_load v[0:4], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe 89; GFX10-NEXT: v_mov_b32_e32 v6, s11 90; GFX10-NEXT: ; implicit-def: $vcc_hi 91; GFX10-NEXT: s_waitcnt vmcnt(0) 92; GFX10-NEXT: global_store_dword v[5:6], v4, off 93; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 94; GFX10-NEXT: ; return to shader part epilog 95 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0) 96 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 97 %v.err = extractvalue { <4 x float>, i32 } %v, 1 98 store i32 %v.err, i32 addrspace(1)* %out, align 4 99 ret <4 x float> %v.vec 100} 101 102define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 103; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 104; GFX9: ; %bb.0: 105; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 106; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 107; GFX9-NEXT: v_and_or_b32 v0, v0, v4, v1 108; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v3 109; GFX9-NEXT: s_mov_b32 s0, s2 110; GFX9-NEXT: s_mov_b32 s1, s3 111; GFX9-NEXT: s_mov_b32 s2, s4 112; GFX9-NEXT: s_mov_b32 s3, s5 113; GFX9-NEXT: s_mov_b32 s4, s6 114; GFX9-NEXT: s_mov_b32 s5, s7 115; GFX9-NEXT: s_mov_b32 s6, s8 116; GFX9-NEXT: s_mov_b32 s7, s9 117; GFX9-NEXT: v_and_or_b32 v1, v2, v4, v1 118; GFX9-NEXT: image_load v[0:4], v[0:1], s[0:7] dmask:0xf unorm a16 tfe lwe da 119; GFX9-NEXT: v_mov_b32_e32 v5, s10 120; GFX9-NEXT: v_mov_b32_e32 v6, s11 121; GFX9-NEXT: s_waitcnt vmcnt(0) 122; GFX9-NEXT: global_store_dword v[5:6], v4, off 123; GFX9-NEXT: s_waitcnt vmcnt(0) 124; GFX9-NEXT: ; return to shader part epilog 125; 126; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 127; GFX10: ; %bb.0: 128; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 129; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 130; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 131; GFX10-NEXT: s_mov_b32 s0, s2 132; GFX10-NEXT: s_mov_b32 s1, s3 133; GFX10-NEXT: s_mov_b32 s2, s4 134; GFX10-NEXT: v_and_or_b32 v0, v0, v4, v1 135; GFX10-NEXT: v_and_or_b32 v1, v2, v4, v3 136; GFX10-NEXT: s_mov_b32 s3, s5 137; GFX10-NEXT: s_mov_b32 s4, s6 138; GFX10-NEXT: s_mov_b32 s5, s7 139; GFX10-NEXT: s_mov_b32 s6, s8 140; GFX10-NEXT: s_mov_b32 s7, s9 141; GFX10-NEXT: v_mov_b32_e32 v5, s10 142; GFX10-NEXT: image_load v[0:4], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe 143; GFX10-NEXT: v_mov_b32_e32 v6, s11 144; GFX10-NEXT: ; implicit-def: $vcc_hi 145; GFX10-NEXT: s_waitcnt vmcnt(0) 146; GFX10-NEXT: global_store_dword v[5:6], v4, off 147; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 148; GFX10-NEXT: ; return to shader part epilog 149 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 3, i32 0) 150 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 151 %v.err = extractvalue { <4 x float>, i32 } %v, 1 152 store i32 %v.err, i32 addrspace(1)* %out, align 4 153 ret <4 x float> %v.vec 154} 155 156declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 157declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 158 159attributes #0 = { nounwind readonly } 160