1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -o - %s | FileCheck -check-prefix=UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -o - %s | FileCheck -check-prefix=GFX81 %s
4; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
5; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
6
7define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) {
8; UNPACKED-LABEL: image_store_f16:
9; UNPACKED:       ; %bb.0:
10; UNPACKED-NEXT:    s_mov_b32 s0, s2
11; UNPACKED-NEXT:    s_mov_b32 s1, s3
12; UNPACKED-NEXT:    s_mov_b32 s2, s4
13; UNPACKED-NEXT:    s_mov_b32 s3, s5
14; UNPACKED-NEXT:    s_mov_b32 s4, s6
15; UNPACKED-NEXT:    s_mov_b32 s5, s7
16; UNPACKED-NEXT:    s_mov_b32 s6, s8
17; UNPACKED-NEXT:    s_mov_b32 s7, s9
18; UNPACKED-NEXT:    image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16
19; UNPACKED-NEXT:    s_endpgm
20;
21; GFX81-LABEL: image_store_f16:
22; GFX81:       ; %bb.0:
23; GFX81-NEXT:    s_mov_b32 s0, s2
24; GFX81-NEXT:    s_mov_b32 s1, s3
25; GFX81-NEXT:    s_mov_b32 s2, s4
26; GFX81-NEXT:    s_mov_b32 s3, s5
27; GFX81-NEXT:    s_mov_b32 s4, s6
28; GFX81-NEXT:    s_mov_b32 s5, s7
29; GFX81-NEXT:    s_mov_b32 s6, s8
30; GFX81-NEXT:    s_mov_b32 s7, s9
31; GFX81-NEXT:    image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16
32; GFX81-NEXT:    s_endpgm
33; PACKED-LABEL: image_store_f16:
34; PACKED:       ; %bb.0:
35; PACKED-NEXT:    s_mov_b32 s0, s2
36; PACKED-NEXT:    s_mov_b32 s1, s3
37; PACKED-NEXT:    s_mov_b32 s2, s4
38; PACKED-NEXT:    s_mov_b32 s3, s5
39; PACKED-NEXT:    s_mov_b32 s4, s6
40; PACKED-NEXT:    s_mov_b32 s5, s7
41; PACKED-NEXT:    s_mov_b32 s6, s8
42; PACKED-NEXT:    s_mov_b32 s7, s9
43; PACKED-NEXT:    image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16
44; PACKED-NEXT:    s_endpgm
45  call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
46  ret void
47}
48
49define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x half> %in) {
50; UNPACKED-LABEL: image_store_v2f16:
51; UNPACKED:       ; %bb.0:
52; UNPACKED-NEXT:    s_mov_b32 s0, s2
53; UNPACKED-NEXT:    s_mov_b32 s1, s3
54; UNPACKED-NEXT:    s_mov_b32 s2, s4
55; UNPACKED-NEXT:    s_mov_b32 s3, s5
56; UNPACKED-NEXT:    s_mov_b32 s4, s6
57; UNPACKED-NEXT:    s_mov_b32 s5, s7
58; UNPACKED-NEXT:    s_mov_b32 s6, s8
59; UNPACKED-NEXT:    s_mov_b32 s7, s9
60; UNPACKED-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
61; UNPACKED-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16
62; UNPACKED-NEXT:    s_endpgm
63;
64; GFX81-LABEL: image_store_v2f16:
65; GFX81:       ; %bb.0:
66; GFX81-NEXT:    s_mov_b32 s0, s2
67; GFX81-NEXT:    s_mov_b32 s1, s3
68; GFX81-NEXT:    s_mov_b32 s2, s4
69; GFX81-NEXT:    s_mov_b32 s3, s5
70; GFX81-NEXT:    s_mov_b32 s4, s6
71; GFX81-NEXT:    s_mov_b32 s5, s7
72; GFX81-NEXT:    s_mov_b32 s6, s8
73; GFX81-NEXT:    s_mov_b32 s7, s9
74; GFX81-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16
75; GFX81-NEXT:    s_endpgm
76; PACKED-LABEL: image_store_v2f16:
77; PACKED:       ; %bb.0:
78; PACKED-NEXT:    s_mov_b32 s0, s2
79; PACKED-NEXT:    s_mov_b32 s1, s3
80; PACKED-NEXT:    s_mov_b32 s2, s4
81; PACKED-NEXT:    s_mov_b32 s3, s5
82; PACKED-NEXT:    s_mov_b32 s4, s6
83; PACKED-NEXT:    s_mov_b32 s5, s7
84; PACKED-NEXT:    s_mov_b32 s6, s8
85; PACKED-NEXT:    s_mov_b32 s7, s9
86; PACKED-NEXT:    image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16
87; PACKED-NEXT:    s_endpgm
88  call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
89  ret void
90}
91
92define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) {
93; UNPACKED-LABEL: image_store_v3f16:
94; UNPACKED:       ; %bb.0:
95; UNPACKED-NEXT:    v_mov_b32_e32 v5, v1
96; UNPACKED-NEXT:    v_mov_b32_e32 v1, v2
97; UNPACKED-NEXT:    s_mov_b32 s0, s2
98; UNPACKED-NEXT:    s_mov_b32 s1, s3
99; UNPACKED-NEXT:    s_mov_b32 s2, s4
100; UNPACKED-NEXT:    s_mov_b32 s3, s5
101; UNPACKED-NEXT:    s_mov_b32 s4, s6
102; UNPACKED-NEXT:    s_mov_b32 s5, s7
103; UNPACKED-NEXT:    s_mov_b32 s6, s8
104; UNPACKED-NEXT:    s_mov_b32 s7, s9
105; UNPACKED-NEXT:    v_mov_b32_e32 v4, v0
106; UNPACKED-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
107; UNPACKED-NEXT:    image_store v[1:3], v[4:5], s[0:7] dmask:0x7 unorm d16
108; UNPACKED-NEXT:    s_endpgm
109;
110; GFX81-LABEL: image_store_v3f16:
111; GFX81:       ; %bb.0:
112; GFX81-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
113; GFX81-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
114; GFX81-NEXT:    s_mov_b32 s0, s2
115; GFX81-NEXT:    s_mov_b32 s1, s3
116; GFX81-NEXT:    s_mov_b32 s2, s4
117; GFX81-NEXT:    s_mov_b32 s3, s5
118; GFX81-NEXT:    s_mov_b32 s4, s6
119; GFX81-NEXT:    s_mov_b32 s5, s7
120; GFX81-NEXT:    s_mov_b32 s6, s8
121; GFX81-NEXT:    s_mov_b32 s7, s9
122; GFX81-NEXT:    v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
123; GFX81-NEXT:    v_and_b32_e32 v3, 0xffff, v3
124; GFX81-NEXT:    v_mov_b32_e32 v4, 0
125; GFX81-NEXT:    image_store v[2:4], v[0:1], s[0:7] dmask:0x7 unorm d16
126; GFX81-NEXT:    s_endpgm
127  call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
128  ret void
129}
130
131define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <4 x half> %in) {
132; UNPACKED-LABEL: image_store_v4f16:
133; UNPACKED:       ; %bb.0:
134; UNPACKED-NEXT:    v_mov_b32_e32 v6, v1
135; UNPACKED-NEXT:    v_mov_b32_e32 v1, v2
136; UNPACKED-NEXT:    s_mov_b32 s0, s2
137; UNPACKED-NEXT:    s_mov_b32 s1, s3
138; UNPACKED-NEXT:    s_mov_b32 s2, s4
139; UNPACKED-NEXT:    s_mov_b32 s3, s5
140; UNPACKED-NEXT:    s_mov_b32 s4, s6
141; UNPACKED-NEXT:    s_mov_b32 s5, s7
142; UNPACKED-NEXT:    s_mov_b32 s6, s8
143; UNPACKED-NEXT:    s_mov_b32 s7, s9
144; UNPACKED-NEXT:    v_mov_b32_e32 v5, v0
145; UNPACKED-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
146; UNPACKED-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
147; UNPACKED-NEXT:    image_store v[1:4], v[5:6], s[0:7] dmask:0xf unorm d16
148; UNPACKED-NEXT:    s_endpgm
149;
150; GFX81-LABEL: image_store_v4f16:
151; GFX81:       ; %bb.0:
152; GFX81-NEXT:    s_mov_b32 s0, s2
153; GFX81-NEXT:    s_mov_b32 s1, s3
154; GFX81-NEXT:    s_mov_b32 s2, s4
155; GFX81-NEXT:    s_mov_b32 s3, s5
156; GFX81-NEXT:    s_mov_b32 s4, s6
157; GFX81-NEXT:    s_mov_b32 s5, s7
158; GFX81-NEXT:    s_mov_b32 s6, s8
159; GFX81-NEXT:    s_mov_b32 s7, s9
160; GFX81-NEXT:    image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16
161; GFX81-NEXT:    s_endpgm
162; PACKED-LABEL: image_store_v4f16:
163; PACKED:       ; %bb.0:
164; PACKED-NEXT:    s_mov_b32 s0, s2
165; PACKED-NEXT:    s_mov_b32 s1, s3
166; PACKED-NEXT:    s_mov_b32 s2, s4
167; PACKED-NEXT:    s_mov_b32 s3, s5
168; PACKED-NEXT:    s_mov_b32 s4, s6
169; PACKED-NEXT:    s_mov_b32 s5, s7
170; PACKED-NEXT:    s_mov_b32 s6, s8
171; PACKED-NEXT:    s_mov_b32 s7, s9
172; PACKED-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16
173; PACKED-NEXT:    s_endpgm
174  call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
175  ret void
176}
177
178declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
179declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
180declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
181declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
182
183attributes #0 = { nounwind writeonly }
184