1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -o - %s | FileCheck -check-prefix=UNPACKED %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -o - %s | FileCheck -check-prefix=GFX81 %s 4; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s 5; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s 6 7define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) { 8; UNPACKED-LABEL: image_store_f16: 9; UNPACKED: ; %bb.0: 10; UNPACKED-NEXT: s_mov_b32 s0, s2 11; UNPACKED-NEXT: s_mov_b32 s1, s3 12; UNPACKED-NEXT: s_mov_b32 s2, s4 13; UNPACKED-NEXT: s_mov_b32 s3, s5 14; UNPACKED-NEXT: s_mov_b32 s4, s6 15; UNPACKED-NEXT: s_mov_b32 s5, s7 16; UNPACKED-NEXT: s_mov_b32 s6, s8 17; UNPACKED-NEXT: s_mov_b32 s7, s9 18; UNPACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 19; UNPACKED-NEXT: s_endpgm 20; 21; GFX81-LABEL: image_store_f16: 22; GFX81: ; %bb.0: 23; GFX81-NEXT: s_mov_b32 s0, s2 24; GFX81-NEXT: s_mov_b32 s1, s3 25; GFX81-NEXT: s_mov_b32 s2, s4 26; GFX81-NEXT: s_mov_b32 s3, s5 27; GFX81-NEXT: s_mov_b32 s4, s6 28; GFX81-NEXT: s_mov_b32 s5, s7 29; GFX81-NEXT: s_mov_b32 s6, s8 30; GFX81-NEXT: s_mov_b32 s7, s9 31; GFX81-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 32; GFX81-NEXT: s_endpgm 33; PACKED-LABEL: image_store_f16: 34; PACKED: ; %bb.0: 35; PACKED-NEXT: s_mov_b32 s0, s2 36; PACKED-NEXT: s_mov_b32 s1, s3 37; PACKED-NEXT: s_mov_b32 s2, s4 38; PACKED-NEXT: s_mov_b32 s3, s5 39; PACKED-NEXT: s_mov_b32 s4, s6 40; PACKED-NEXT: s_mov_b32 s5, s7 41; PACKED-NEXT: s_mov_b32 s6, s8 42; PACKED-NEXT: s_mov_b32 s7, s9 43; PACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 44; PACKED-NEXT: s_endpgm 45 call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 46 ret void 47} 48 49define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x half> %in) { 50; UNPACKED-LABEL: image_store_v2f16: 51; UNPACKED: ; %bb.0: 52; UNPACKED-NEXT: s_mov_b32 s0, s2 53; UNPACKED-NEXT: s_mov_b32 s1, s3 54; UNPACKED-NEXT: s_mov_b32 s2, s4 55; UNPACKED-NEXT: s_mov_b32 s3, s5 56; UNPACKED-NEXT: s_mov_b32 s4, s6 57; UNPACKED-NEXT: s_mov_b32 s5, s7 58; UNPACKED-NEXT: s_mov_b32 s6, s8 59; UNPACKED-NEXT: s_mov_b32 s7, s9 60; UNPACKED-NEXT: v_lshrrev_b32_e32 v3, 16, v2 61; UNPACKED-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16 62; UNPACKED-NEXT: s_endpgm 63; 64; GFX81-LABEL: image_store_v2f16: 65; GFX81: ; %bb.0: 66; GFX81-NEXT: s_mov_b32 s0, s2 67; GFX81-NEXT: s_mov_b32 s1, s3 68; GFX81-NEXT: s_mov_b32 s2, s4 69; GFX81-NEXT: s_mov_b32 s3, s5 70; GFX81-NEXT: s_mov_b32 s4, s6 71; GFX81-NEXT: s_mov_b32 s5, s7 72; GFX81-NEXT: s_mov_b32 s6, s8 73; GFX81-NEXT: s_mov_b32 s7, s9 74; GFX81-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16 75; GFX81-NEXT: s_endpgm 76; PACKED-LABEL: image_store_v2f16: 77; PACKED: ; %bb.0: 78; PACKED-NEXT: s_mov_b32 s0, s2 79; PACKED-NEXT: s_mov_b32 s1, s3 80; PACKED-NEXT: s_mov_b32 s2, s4 81; PACKED-NEXT: s_mov_b32 s3, s5 82; PACKED-NEXT: s_mov_b32 s4, s6 83; PACKED-NEXT: s_mov_b32 s5, s7 84; PACKED-NEXT: s_mov_b32 s6, s8 85; PACKED-NEXT: s_mov_b32 s7, s9 86; PACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16 87; PACKED-NEXT: s_endpgm 88 call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 89 ret void 90} 91 92define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) { 93; UNPACKED-LABEL: image_store_v3f16: 94; UNPACKED: ; %bb.0: 95; UNPACKED-NEXT: v_mov_b32_e32 v5, v1 96; UNPACKED-NEXT: v_mov_b32_e32 v1, v2 97; UNPACKED-NEXT: s_mov_b32 s0, s2 98; UNPACKED-NEXT: s_mov_b32 s1, s3 99; UNPACKED-NEXT: s_mov_b32 s2, s4 100; UNPACKED-NEXT: s_mov_b32 s3, s5 101; UNPACKED-NEXT: s_mov_b32 s4, s6 102; UNPACKED-NEXT: s_mov_b32 s5, s7 103; UNPACKED-NEXT: s_mov_b32 s6, s8 104; UNPACKED-NEXT: s_mov_b32 s7, s9 105; UNPACKED-NEXT: v_mov_b32_e32 v4, v0 106; UNPACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v1 107; UNPACKED-NEXT: image_store v[1:3], v[4:5], s[0:7] dmask:0x7 unorm d16 108; UNPACKED-NEXT: s_endpgm 109; 110; GFX81-LABEL: image_store_v3f16: 111; GFX81: ; %bb.0: 112; GFX81-NEXT: v_lshrrev_b32_e32 v4, 16, v2 113; GFX81-NEXT: v_lshlrev_b32_e32 v4, 16, v4 114; GFX81-NEXT: s_mov_b32 s0, s2 115; GFX81-NEXT: s_mov_b32 s1, s3 116; GFX81-NEXT: s_mov_b32 s2, s4 117; GFX81-NEXT: s_mov_b32 s3, s5 118; GFX81-NEXT: s_mov_b32 s4, s6 119; GFX81-NEXT: s_mov_b32 s5, s7 120; GFX81-NEXT: s_mov_b32 s6, s8 121; GFX81-NEXT: s_mov_b32 s7, s9 122; GFX81-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 123; GFX81-NEXT: v_and_b32_e32 v3, 0xffff, v3 124; GFX81-NEXT: v_mov_b32_e32 v4, 0 125; GFX81-NEXT: image_store v[2:4], v[0:1], s[0:7] dmask:0x7 unorm d16 126; GFX81-NEXT: s_endpgm 127 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 128 ret void 129} 130 131define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <4 x half> %in) { 132; UNPACKED-LABEL: image_store_v4f16: 133; UNPACKED: ; %bb.0: 134; UNPACKED-NEXT: v_mov_b32_e32 v6, v1 135; UNPACKED-NEXT: v_mov_b32_e32 v1, v2 136; UNPACKED-NEXT: s_mov_b32 s0, s2 137; UNPACKED-NEXT: s_mov_b32 s1, s3 138; UNPACKED-NEXT: s_mov_b32 s2, s4 139; UNPACKED-NEXT: s_mov_b32 s3, s5 140; UNPACKED-NEXT: s_mov_b32 s4, s6 141; UNPACKED-NEXT: s_mov_b32 s5, s7 142; UNPACKED-NEXT: s_mov_b32 s6, s8 143; UNPACKED-NEXT: s_mov_b32 s7, s9 144; UNPACKED-NEXT: v_mov_b32_e32 v5, v0 145; UNPACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v1 146; UNPACKED-NEXT: v_lshrrev_b32_e32 v4, 16, v3 147; UNPACKED-NEXT: image_store v[1:4], v[5:6], s[0:7] dmask:0xf unorm d16 148; UNPACKED-NEXT: s_endpgm 149; 150; GFX81-LABEL: image_store_v4f16: 151; GFX81: ; %bb.0: 152; GFX81-NEXT: s_mov_b32 s0, s2 153; GFX81-NEXT: s_mov_b32 s1, s3 154; GFX81-NEXT: s_mov_b32 s2, s4 155; GFX81-NEXT: s_mov_b32 s3, s5 156; GFX81-NEXT: s_mov_b32 s4, s6 157; GFX81-NEXT: s_mov_b32 s5, s7 158; GFX81-NEXT: s_mov_b32 s6, s8 159; GFX81-NEXT: s_mov_b32 s7, s9 160; GFX81-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16 161; GFX81-NEXT: s_endpgm 162; PACKED-LABEL: image_store_v4f16: 163; PACKED: ; %bb.0: 164; PACKED-NEXT: s_mov_b32 s0, s2 165; PACKED-NEXT: s_mov_b32 s1, s3 166; PACKED-NEXT: s_mov_b32 s2, s4 167; PACKED-NEXT: s_mov_b32 s3, s5 168; PACKED-NEXT: s_mov_b32 s4, s6 169; PACKED-NEXT: s_mov_b32 s5, s7 170; PACKED-NEXT: s_mov_b32 s6, s8 171; PACKED-NEXT: s_mov_b32 s7, s9 172; PACKED-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16 173; PACKED-NEXT: s_endpgm 174 call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 175 ret void 176} 177 178declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 179declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 180declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 181declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 182 183attributes #0 = { nounwind writeonly } 184