1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 4 5define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %r) { 6; GFX9-LABEL: load_3d_v4f32_xyzw: 7; GFX9: ; %bb.0: 8; GFX9-NEXT: s_mov_b32 s0, s2 9; GFX9-NEXT: s_mov_b32 s2, s4 10; GFX9-NEXT: s_mov_b32 s4, s6 11; GFX9-NEXT: s_mov_b32 s6, s8 12; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 13; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 14; GFX9-NEXT: s_lshl_b32 s8, s0, 16 15; GFX9-NEXT: s_mov_b32 s1, s3 16; GFX9-NEXT: s_mov_b32 s3, s5 17; GFX9-NEXT: s_mov_b32 s5, s7 18; GFX9-NEXT: s_mov_b32 s7, s9 19; GFX9-NEXT: v_and_or_b32 v0, v0, v3, v1 20; GFX9-NEXT: v_and_or_b32 v1, v2, v3, s8 21; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 22; GFX9-NEXT: s_waitcnt vmcnt(0) 23; GFX9-NEXT: ; return to shader part epilog 24; 25; GFX10-LABEL: load_3d_v4f32_xyzw: 26; GFX10: ; %bb.0: 27; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff 28; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 29; GFX10-NEXT: s_mov_b32 s0, s2 30; GFX10-NEXT: s_mov_b32 s2, s4 31; GFX10-NEXT: s_mov_b32 s4, s6 32; GFX10-NEXT: s_mov_b32 s6, s8 33; GFX10-NEXT: s_lshl_b32 s8, s0, 16 34; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v1 35; GFX10-NEXT: v_and_or_b32 v1, v2, v3, s8 36; GFX10-NEXT: s_mov_b32 s1, s3 37; GFX10-NEXT: s_mov_b32 s3, s5 38; GFX10-NEXT: s_mov_b32 s5, s7 39; GFX10-NEXT: s_mov_b32 s7, s9 40; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 41; GFX10-NEXT: s_waitcnt vmcnt(0) 42; GFX10-NEXT: ; return to shader part epilog 43 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) 44 ret <4 x float> %v 45} 46 47define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %r) { 48; GFX9-LABEL: load_3d_v4f32_xyzw_tfe: 49; GFX9: ; %bb.0: 50; GFX9-NEXT: s_mov_b32 s0, s2 51; GFX9-NEXT: s_mov_b32 s2, s4 52; GFX9-NEXT: s_mov_b32 s4, s6 53; GFX9-NEXT: s_mov_b32 s6, s8 54; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 55; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 56; GFX9-NEXT: s_lshl_b32 s8, s0, 16 57; GFX9-NEXT: v_mov_b32_e32 v5, 0 58; GFX9-NEXT: v_and_or_b32 v10, v0, v3, v1 59; GFX9-NEXT: v_and_or_b32 v11, v2, v3, s8 60; GFX9-NEXT: v_mov_b32_e32 v6, v5 61; GFX9-NEXT: v_mov_b32_e32 v7, v5 62; GFX9-NEXT: v_mov_b32_e32 v8, v5 63; GFX9-NEXT: v_mov_b32_e32 v9, v5 64; GFX9-NEXT: v_mov_b32_e32 v0, v5 65; GFX9-NEXT: s_mov_b32 s1, s3 66; GFX9-NEXT: s_mov_b32 s3, s5 67; GFX9-NEXT: s_mov_b32 s5, s7 68; GFX9-NEXT: s_mov_b32 s7, s9 69; GFX9-NEXT: v_mov_b32_e32 v1, v6 70; GFX9-NEXT: v_mov_b32_e32 v2, v7 71; GFX9-NEXT: v_mov_b32_e32 v3, v8 72; GFX9-NEXT: v_mov_b32_e32 v4, v9 73; GFX9-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf unorm a16 tfe 74; GFX9-NEXT: s_waitcnt vmcnt(0) 75; GFX9-NEXT: global_store_dword v5, v4, s[10:11] 76; GFX9-NEXT: s_waitcnt vmcnt(0) 77; GFX9-NEXT: ; return to shader part epilog 78; 79; GFX10-LABEL: load_3d_v4f32_xyzw_tfe: 80; GFX10: ; %bb.0: 81; GFX10-NEXT: v_mov_b32_e32 v5, 0 82; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff 83; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 84; GFX10-NEXT: s_mov_b32 s0, s2 85; GFX10-NEXT: s_mov_b32 s2, s4 86; GFX10-NEXT: s_mov_b32 s4, s6 87; GFX10-NEXT: s_mov_b32 s6, s8 88; GFX10-NEXT: s_lshl_b32 s8, s0, 16 89; GFX10-NEXT: v_mov_b32_e32 v6, v5 90; GFX10-NEXT: v_mov_b32_e32 v7, v5 91; GFX10-NEXT: v_mov_b32_e32 v8, v5 92; GFX10-NEXT: v_mov_b32_e32 v9, v5 93; GFX10-NEXT: v_and_or_b32 v10, v0, v3, v1 94; GFX10-NEXT: v_and_or_b32 v11, v2, v3, s8 95; GFX10-NEXT: s_mov_b32 s1, s3 96; GFX10-NEXT: s_mov_b32 s3, s5 97; GFX10-NEXT: s_mov_b32 s5, s7 98; GFX10-NEXT: s_mov_b32 s7, s9 99; GFX10-NEXT: v_mov_b32_e32 v0, v5 100; GFX10-NEXT: v_mov_b32_e32 v1, v6 101; GFX10-NEXT: v_mov_b32_e32 v2, v7 102; GFX10-NEXT: v_mov_b32_e32 v3, v8 103; GFX10-NEXT: v_mov_b32_e32 v4, v9 104; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe 105; GFX10-NEXT: s_waitcnt vmcnt(0) 106; GFX10-NEXT: global_store_dword v5, v4, s[10:11] 107; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 108; GFX10-NEXT: ; return to shader part epilog 109 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0) 110 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 111 %v.err = extractvalue { <4 x float>, i32 } %v, 1 112 store i32 %v.err, i32 addrspace(1)* %out, align 4 113 ret <4 x float> %v.vec 114} 115 116define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %r) { 117; GFX9-LABEL: load_3d_v4f32_xyzw_tfe_lwe: 118; GFX9: ; %bb.0: 119; GFX9-NEXT: s_mov_b32 s0, s2 120; GFX9-NEXT: s_mov_b32 s2, s4 121; GFX9-NEXT: s_mov_b32 s4, s6 122; GFX9-NEXT: s_mov_b32 s6, s8 123; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 124; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 125; GFX9-NEXT: s_lshl_b32 s8, s0, 16 126; GFX9-NEXT: v_mov_b32_e32 v5, 0 127; GFX9-NEXT: v_and_or_b32 v10, v0, v3, v1 128; GFX9-NEXT: v_and_or_b32 v11, v2, v3, s8 129; GFX9-NEXT: v_mov_b32_e32 v6, v5 130; GFX9-NEXT: v_mov_b32_e32 v7, v5 131; GFX9-NEXT: v_mov_b32_e32 v8, v5 132; GFX9-NEXT: v_mov_b32_e32 v9, v5 133; GFX9-NEXT: v_mov_b32_e32 v0, v5 134; GFX9-NEXT: s_mov_b32 s1, s3 135; GFX9-NEXT: s_mov_b32 s3, s5 136; GFX9-NEXT: s_mov_b32 s5, s7 137; GFX9-NEXT: s_mov_b32 s7, s9 138; GFX9-NEXT: v_mov_b32_e32 v1, v6 139; GFX9-NEXT: v_mov_b32_e32 v2, v7 140; GFX9-NEXT: v_mov_b32_e32 v3, v8 141; GFX9-NEXT: v_mov_b32_e32 v4, v9 142; GFX9-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf unorm a16 tfe lwe 143; GFX9-NEXT: s_waitcnt vmcnt(0) 144; GFX9-NEXT: global_store_dword v5, v4, s[10:11] 145; GFX9-NEXT: s_waitcnt vmcnt(0) 146; GFX9-NEXT: ; return to shader part epilog 147; 148; GFX10-LABEL: load_3d_v4f32_xyzw_tfe_lwe: 149; GFX10: ; %bb.0: 150; GFX10-NEXT: v_mov_b32_e32 v5, 0 151; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff 152; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 153; GFX10-NEXT: s_mov_b32 s0, s2 154; GFX10-NEXT: s_mov_b32 s2, s4 155; GFX10-NEXT: s_mov_b32 s4, s6 156; GFX10-NEXT: s_mov_b32 s6, s8 157; GFX10-NEXT: s_lshl_b32 s8, s0, 16 158; GFX10-NEXT: v_mov_b32_e32 v6, v5 159; GFX10-NEXT: v_mov_b32_e32 v7, v5 160; GFX10-NEXT: v_mov_b32_e32 v8, v5 161; GFX10-NEXT: v_mov_b32_e32 v9, v5 162; GFX10-NEXT: v_and_or_b32 v10, v0, v3, v1 163; GFX10-NEXT: v_and_or_b32 v11, v2, v3, s8 164; GFX10-NEXT: s_mov_b32 s1, s3 165; GFX10-NEXT: s_mov_b32 s3, s5 166; GFX10-NEXT: s_mov_b32 s5, s7 167; GFX10-NEXT: s_mov_b32 s7, s9 168; GFX10-NEXT: v_mov_b32_e32 v0, v5 169; GFX10-NEXT: v_mov_b32_e32 v1, v6 170; GFX10-NEXT: v_mov_b32_e32 v2, v7 171; GFX10-NEXT: v_mov_b32_e32 v3, v8 172; GFX10-NEXT: v_mov_b32_e32 v4, v9 173; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16 tfe lwe 174; GFX10-NEXT: s_waitcnt vmcnt(0) 175; GFX10-NEXT: global_store_dword v5, v4, s[10:11] 176; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 177; GFX10-NEXT: ; return to shader part epilog 178 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 3, i32 0) 179 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 180 %v.err = extractvalue { <4 x float>, i32 } %v, 1 181 store i32 %v.err, i32 addrspace(1)* %out, align 4 182 ret <4 x float> %v.vec 183} 184 185declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 186declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 187 188attributes #0 = { nounwind readonly } 189