1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -o - %s | FileCheck -check-prefix=UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -o - %s | FileCheck -check-prefix=GFX81 %s
4
5define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) {
6; UNPACKED-LABEL: image_store_f16:
7; UNPACKED:       ; %bb.0:
8; UNPACKED-NEXT:    s_mov_b32 s0, s2
9; UNPACKED-NEXT:    s_mov_b32 s1, s3
10; UNPACKED-NEXT:    s_mov_b32 s2, s4
11; UNPACKED-NEXT:    s_mov_b32 s3, s5
12; UNPACKED-NEXT:    s_mov_b32 s4, s6
13; UNPACKED-NEXT:    s_mov_b32 s5, s7
14; UNPACKED-NEXT:    s_mov_b32 s6, s8
15; UNPACKED-NEXT:    s_mov_b32 s7, s9
16; UNPACKED-NEXT:    image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16
17; UNPACKED-NEXT:    s_endpgm
18;
19; PACKED-LABEL: image_store_f16:
20; PACKED:       ; %bb.0:
21; PACKED-NEXT:    s_mov_b32 s0, s2
22; PACKED-NEXT:    s_mov_b32 s1, s3
23; PACKED-NEXT:    s_mov_b32 s2, s4
24; PACKED-NEXT:    s_mov_b32 s3, s5
25; PACKED-NEXT:    s_mov_b32 s4, s6
26; PACKED-NEXT:    s_mov_b32 s5, s7
27; PACKED-NEXT:    s_mov_b32 s6, s8
28; PACKED-NEXT:    s_mov_b32 s7, s9
29; PACKED-NEXT:    image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16
30; PACKED-NEXT:    s_endpgm
31;
32; GFX81-LABEL: image_store_f16:
33; GFX81:       ; %bb.0:
34; GFX81-NEXT:    s_mov_b32 s0, s2
35; GFX81-NEXT:    s_mov_b32 s1, s3
36; GFX81-NEXT:    s_mov_b32 s2, s4
37; GFX81-NEXT:    s_mov_b32 s3, s5
38; GFX81-NEXT:    s_mov_b32 s4, s6
39; GFX81-NEXT:    s_mov_b32 s5, s7
40; GFX81-NEXT:    s_mov_b32 s6, s8
41; GFX81-NEXT:    s_mov_b32 s7, s9
42; GFX81-NEXT:    image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16
43; GFX81-NEXT:    s_endpgm
44  call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
45  ret void
46}
47
48define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x half> %in) {
49; UNPACKED-LABEL: image_store_v2f16:
50; UNPACKED:       ; %bb.0:
51; UNPACKED-NEXT:    s_mov_b32 s0, s2
52; UNPACKED-NEXT:    s_mov_b32 s1, s3
53; UNPACKED-NEXT:    s_mov_b32 s2, s4
54; UNPACKED-NEXT:    s_mov_b32 s3, s5
55; UNPACKED-NEXT:    s_mov_b32 s4, s6
56; UNPACKED-NEXT:    s_mov_b32 s5, s7
57; UNPACKED-NEXT:    s_mov_b32 s6, s8
58; UNPACKED-NEXT:    s_mov_b32 s7, s9
59; UNPACKED-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
60; UNPACKED-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16
61; UNPACKED-NEXT:    s_endpgm
62;
63; PACKED-LABEL: image_store_v2f16:
64; PACKED:       ; %bb.0:
65; PACKED-NEXT:    s_mov_b32 s0, s2
66; PACKED-NEXT:    s_mov_b32 s1, s3
67; PACKED-NEXT:    s_mov_b32 s2, s4
68; PACKED-NEXT:    s_mov_b32 s3, s5
69; PACKED-NEXT:    s_mov_b32 s4, s6
70; PACKED-NEXT:    s_mov_b32 s5, s7
71; PACKED-NEXT:    s_mov_b32 s6, s8
72; PACKED-NEXT:    s_mov_b32 s7, s9
73; PACKED-NEXT:    image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16
74; PACKED-NEXT:    s_endpgm
75;
76; GFX81-LABEL: image_store_v2f16:
77; GFX81:       ; %bb.0:
78; GFX81-NEXT:    s_mov_b32 s0, s2
79; GFX81-NEXT:    s_mov_b32 s1, s3
80; GFX81-NEXT:    s_mov_b32 s2, s4
81; GFX81-NEXT:    s_mov_b32 s3, s5
82; GFX81-NEXT:    s_mov_b32 s4, s6
83; GFX81-NEXT:    s_mov_b32 s5, s7
84; GFX81-NEXT:    s_mov_b32 s6, s8
85; GFX81-NEXT:    s_mov_b32 s7, s9
86; GFX81-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16
87; GFX81-NEXT:    s_endpgm
88  call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
89  ret void
90}
91
92; FIXME: Broken
93; define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) {
94;   call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
95;   ret void
96; }
97
98define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <4 x half> %in) {
99; UNPACKED-LABEL: image_store_v4f16:
100; UNPACKED:       ; %bb.0:
101; UNPACKED-NEXT:    v_mov_b32_e32 v6, v1
102; UNPACKED-NEXT:    v_mov_b32_e32 v1, v2
103; UNPACKED-NEXT:    s_mov_b32 s0, s2
104; UNPACKED-NEXT:    s_mov_b32 s1, s3
105; UNPACKED-NEXT:    s_mov_b32 s2, s4
106; UNPACKED-NEXT:    s_mov_b32 s3, s5
107; UNPACKED-NEXT:    s_mov_b32 s4, s6
108; UNPACKED-NEXT:    s_mov_b32 s5, s7
109; UNPACKED-NEXT:    s_mov_b32 s6, s8
110; UNPACKED-NEXT:    s_mov_b32 s7, s9
111; UNPACKED-NEXT:    v_mov_b32_e32 v5, v0
112; UNPACKED-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
113; UNPACKED-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
114; UNPACKED-NEXT:    image_store v[1:4], v[5:6], s[0:7] dmask:0xf unorm d16
115; UNPACKED-NEXT:    s_endpgm
116;
117; PACKED-LABEL: image_store_v4f16:
118; PACKED:       ; %bb.0:
119; PACKED-NEXT:    s_mov_b32 s0, s2
120; PACKED-NEXT:    s_mov_b32 s1, s3
121; PACKED-NEXT:    s_mov_b32 s2, s4
122; PACKED-NEXT:    s_mov_b32 s3, s5
123; PACKED-NEXT:    s_mov_b32 s4, s6
124; PACKED-NEXT:    s_mov_b32 s5, s7
125; PACKED-NEXT:    s_mov_b32 s6, s8
126; PACKED-NEXT:    s_mov_b32 s7, s9
127; PACKED-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16
128; PACKED-NEXT:    s_endpgm
129;
130; GFX81-LABEL: image_store_v4f16:
131; GFX81:       ; %bb.0:
132; GFX81-NEXT:    s_mov_b32 s0, s2
133; GFX81-NEXT:    s_mov_b32 s1, s3
134; GFX81-NEXT:    s_mov_b32 s2, s4
135; GFX81-NEXT:    s_mov_b32 s3, s5
136; GFX81-NEXT:    s_mov_b32 s4, s6
137; GFX81-NEXT:    s_mov_b32 s5, s7
138; GFX81-NEXT:    s_mov_b32 s6, s8
139; GFX81-NEXT:    s_mov_b32 s7, s9
140; GFX81-NEXT:    image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16
141; GFX81-NEXT:    s_endpgm
142  call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
143  ret void
144}
145
146declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
147declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
148declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
149declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
150
151attributes #0 = { nounwind writeonly }
152