1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10 %s
3
4declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
5declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
6declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #2
7
8; GCN-LABEL: {{^}}test_export_zeroes_f32:
9; GCN: exp mrt0 off, off, off, off{{$}}
10; GCN: exp mrt0 off, off, off, off done{{$}}
11define amdgpu_kernel void @test_export_zeroes_f32() #0 {
12
13  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
14  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
15  ret void
16}
17
18; FIXME: Should not set up registers for the unused source registers.
19
20; GCN-LABEL: {{^}}test_export_en_src0_f32:
21; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
22; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
23; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
24; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
25; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
26define amdgpu_kernel void @test_export_en_src0_f32() #0 {
27  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
28  ret void
29}
30
31; GCN-LABEL: {{^}}test_export_en_src1_f32:
32; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
33; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
34; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
35; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
36; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
37define amdgpu_kernel void @test_export_en_src1_f32() #0 {
38  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
39  ret void
40}
41
42; GCN-LABEL: {{^}}test_export_en_src2_f32:
43; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
44; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
45; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
46; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
47; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
48define amdgpu_kernel void @test_export_en_src2_f32() #0 {
49  call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
50  ret void
51}
52
53; GCN-LABEL: {{^}}test_export_en_src3_f32:
54; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
55; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
56; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
57; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
58; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
59define amdgpu_kernel void @test_export_en_src3_f32() #0 {
60  call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
61  ret void
62}
63
64; GCN-LABEL: {{^}}test_export_en_src0_src1_f32:
65; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
66; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
67; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
68; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
69; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
70define amdgpu_kernel void @test_export_en_src0_src1_f32() #0 {
71  call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
72  ret void
73}
74
75; GCN-LABEL: {{^}}test_export_en_src0_src2_f32:
76; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
77; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
78; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
79; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
80; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
81define amdgpu_kernel void @test_export_en_src0_src2_f32() #0 {
82  call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
83  ret void
84}
85
86; GCN-LABEL: {{^}}test_export_en_src0_src3_f32:
87; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
88; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
89; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
90; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
91; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
92; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
93define amdgpu_kernel void @test_export_en_src0_src3_f32() #0 {
94  call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
95  call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
96  ret void
97}
98
99; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3_f32:
100; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
101; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
102; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
103; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
104; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
105; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
106define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_f32() #0 {
107  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
108  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
109  ret void
110}
111
112; GCN-LABEL: {{^}}test_export_mrt7_f32:
113; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 0.5
114; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
115; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
116define amdgpu_kernel void @test_export_mrt7_f32() #0 {
117  call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 false, i1 false)
118  call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 true, i1 false)
119  ret void
120}
121
122; GCN-LABEL: {{^}}test_export_z_f32:
123; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
124; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
125; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
126; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
127; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
128; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
129define amdgpu_kernel void @test_export_z_f32() #0 {
130  call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
131  call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
132  ret void
133}
134
135; GCN-LABEL: {{^}}test_export_null_f32:
136; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
137; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
138; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
139; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
140; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
141; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
142define amdgpu_kernel void @test_export_null_f32() #0 {
143  call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
144  call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
145  ret void
146}
147
148; GCN-LABEL: {{^}}test_export_reserved10_f32:
149; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
150; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
151; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
152; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
153; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
154; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
155define amdgpu_kernel void @test_export_reserved10_f32() #0 {
156  call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
157  call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
158  ret void
159}
160
161; GCN-LABEL: {{^}}test_export_reserved11_f32:
162; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
163; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
164; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
165; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
166; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
167; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
168define amdgpu_kernel void @test_export_reserved11_f32() #0 {
169  call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
170  call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
171  ret void
172}
173
174; GCN-LABEL: {{^}}test_export_pos0_f32:
175; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
176; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
177; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
178; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
179; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
180; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
181define amdgpu_kernel void @test_export_pos0_f32() #0 {
182  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
183  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
184  ret void
185}
186
187; GCN-LABEL: {{^}}test_export_pos3_f32:
188; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
189; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
190; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
191; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
192; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
193; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
194define amdgpu_kernel void @test_export_pos3_f32() #0 {
195  call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
196  call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
197  ret void
198}
199
200; GCN-LABEL: {{^}}test_export_param0_f32:
201; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
202; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
203; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
204; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
205; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
206; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
207define amdgpu_kernel void @test_export_param0_f32() #0 {
208  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
209  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
210  ret void
211}
212
213; GCN-LABEL: {{^}}test_export_param31_f32:
214; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
215; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
216; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
217; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
218; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
219; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
220define amdgpu_kernel void @test_export_param31_f32() #0 {
221  call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
222  call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
223  ret void
224}
225
226; GCN-LABEL: {{^}}test_export_vm_f32:
227; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1.0
228; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
229; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
230; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
231; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
232; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
233define amdgpu_kernel void @test_export_vm_f32() #0 {
234  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 true)
235  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 true)
236  ret void
237}
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253; GCN-LABEL: {{^}}test_export_zeroes_i32:
254; GCN: exp mrt0 off, off, off, off{{$}}
255; GCN: exp mrt0 off, off, off, off done{{$}}
256define amdgpu_kernel void @test_export_zeroes_i32() #0 {
257
258  call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false)
259  call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 true, i1 false)
260  ret void
261}
262
263; FIXME: Should not set up registers for the unused source registers.
264
265; GCN-LABEL: {{^}}test_export_en_src0_i32:
266; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
267; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
268; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
269; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
270; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
271define amdgpu_kernel void @test_export_en_src0_i32() #0 {
272  call void @llvm.amdgcn.exp.i32(i32 0, i32 1, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
273  ret void
274}
275
276; GCN-LABEL: {{^}}test_export_en_src1_i32:
277; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
278; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
279; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
280; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
281; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
282define amdgpu_kernel void @test_export_en_src1_i32() #0 {
283  call void @llvm.amdgcn.exp.i32(i32 0, i32 2, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
284  ret void
285}
286
287; GCN-LABEL: {{^}}test_export_en_src2_i32:
288; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
289; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
290; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
291; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
292; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
293define amdgpu_kernel void @test_export_en_src2_i32() #0 {
294  call void @llvm.amdgcn.exp.i32(i32 0, i32 4, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
295  ret void
296}
297
298; GCN-LABEL: {{^}}test_export_en_src3_i32:
299; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
300; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
301; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
302; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
303; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
304define amdgpu_kernel void @test_export_en_src3_i32() #0 {
305  call void @llvm.amdgcn.exp.i32(i32 0, i32 8, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
306  ret void
307}
308
309; GCN-LABEL: {{^}}test_export_en_src0_src1_i32:
310; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
311; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
312; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
313; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
314; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
315define amdgpu_kernel void @test_export_en_src0_src1_i32() #0 {
316  call void @llvm.amdgcn.exp.i32(i32 0, i32 3, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
317  ret void
318}
319
320; GCN-LABEL: {{^}}test_export_en_src0_src2_i32:
321; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
322; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
323; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
324; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
325; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
326define amdgpu_kernel void @test_export_en_src0_src2_i32() #0 {
327  call void @llvm.amdgcn.exp.i32(i32 0, i32 5, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
328  ret void
329}
330
331; GCN-LABEL: {{^}}test_export_en_src0_src3_i32:
332; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
333; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
334; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
335; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
336; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
337; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
338define amdgpu_kernel void @test_export_en_src0_src3_i32() #0 {
339  call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
340  call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
341  ret void
342}
343
344; GCN-LABEL: {{^}}test_export_en_src0_src1_src2_src3_i32:
345; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
346; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
347; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
348; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
349; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
350; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
351define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_i32() #0 {
352  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
353  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
354  ret void
355}
356
357; GCN-LABEL: {{^}}test_export_mrt7_i32:
358; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 5
359; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
360; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
361define amdgpu_kernel void @test_export_mrt7_i32() #0 {
362  call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 false, i1 false)
363  call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 true, i1 false)
364  ret void
365}
366
367; GCN-LABEL: {{^}}test_export_z_i32:
368; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
369; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
370; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
371; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
372; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
373; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
374define amdgpu_kernel void @test_export_z_i32() #0 {
375  call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
376  call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
377  ret void
378}
379
380; GCN-LABEL: {{^}}test_export_null_i32:
381; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
382; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
383; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
384; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
385; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
386; GCN: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
387define amdgpu_kernel void @test_export_null_i32() #0 {
388  call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
389  call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
390  ret void
391}
392
393; GCN-LABEL: {{^}}test_export_reserved10_i32:
394; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
395; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
396; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
397; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
398; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
399; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
400define amdgpu_kernel void @test_export_reserved10_i32() #0 {
401  call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
402  call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
403  ret void
404}
405
406; GCN-LABEL: {{^}}test_export_reserved11_i32:
407; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
408; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
409; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
410; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
411; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
412; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
413define amdgpu_kernel void @test_export_reserved11_i32() #0 {
414  call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
415  call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
416  ret void
417}
418
419; GCN-LABEL: {{^}}test_export_pos0_i32:
420; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
421; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
422; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
423; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
424; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
425; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
426define amdgpu_kernel void @test_export_pos0_i32() #0 {
427  call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
428  call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
429  ret void
430}
431
432; GCN-LABEL: {{^}}test_export_pos3_i32:
433; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
434; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
435; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
436; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
437; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
438; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
439define amdgpu_kernel void @test_export_pos3_i32() #0 {
440  call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
441  call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
442  ret void
443}
444
445; GCN-LABEL: {{^}}test_export_param0_i32:
446; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
447; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
448; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
449; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
450; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
451; GCN: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
452define amdgpu_kernel void @test_export_param0_i32() #0 {
453  call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
454  call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
455  ret void
456}
457
458; GCN-LABEL: {{^}}test_export_param31_i32:
459; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
460; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
461; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
462; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
463; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
464; GCN: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
465define amdgpu_kernel void @test_export_param31_i32() #0 {
466  call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
467  call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
468  ret void
469}
470
471; GCN-LABEL: {{^}}test_export_vm_i32:
472; GCN-DAG: v_mov_b32_e32 [[SRC0:v[0-9]+]], 1
473; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
474; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
475; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
476; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
477; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
478define amdgpu_kernel void @test_export_vm_i32() #0 {
479  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 true)
480  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 true)
481  ret void
482}
483
484; GCN-LABEL: {{^}}test_if_export_f32:
485; GCN: s_cbranch_execz
486; GCN: exp
487define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
488  %cc = icmp eq i32 %flag, 0
489  br i1 %cc, label %end, label %exp
490
491exp:
492  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
493  br label %end
494
495end:
496  ret void
497}
498
499; GCN-LABEL: {{^}}test_if_export_vm_f32:
500; GCN: s_cbranch_execz
501; GCN: exp
502define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
503  %cc = icmp eq i32 %flag, 0
504  br i1 %cc, label %end, label %exp
505
506exp:
507  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
508  br label %end
509
510end:
511  ret void
512}
513
514; GCN-LABEL: {{^}}test_if_export_done_f32:
515; GCN: s_cbranch_execz
516; GCN: exp
517define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
518  %cc = icmp eq i32 %flag, 0
519  br i1 %cc, label %end, label %exp
520
521exp:
522  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
523  br label %end
524
525end:
526  ret void
527}
528
529; GCN-LABEL: {{^}}test_if_export_vm_done_f32:
530; GCN: s_cbranch_execz
531; GCN: exp
532define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
533  %cc = icmp eq i32 %flag, 0
534  br i1 %cc, label %end, label %exp
535
536exp:
537  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
538  br label %end
539
540end:
541  ret void
542}
543
544; GCN-LABEL: {{^}}test_export_clustering:
545; GCN-DAG: v_mov_b32_e32 [[W0:v[0-9]+]], 0
546; GCN-DAG: v_mov_b32_e32 [[W1:v[0-9]+]], 1.0
547; GCN-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
548; GCN-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
549; GCN-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
550; GCN-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
551; GCN: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
552; GCN-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
553define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
554  %z0 = fadd float %x, %y
555  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false)
556  %z1 = fsub float %y, %x
557  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %x, float %y, float %z1, float 1.0, i1 true, i1 false)
558  ret void
559}
560
561; GCN-LABEL: {{^}}test_export_pos_before_param:
562; GCN: exp pos0
563; GCN-NOT: s_waitcnt
564; GCN: exp param0
565define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
566  %z0 = fadd float %x, %y
567  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
568  %z1 = fsub float %y, %x
569  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
570  ret void
571}
572
573; GCN-LABEL: {{^}}test_export_pos4_before_param:
574; GFX10: exp pos4
575; GFX10-NOT: s_waitcnt
576; GFX10: exp param0
577define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0 {
578  %z0 = fadd float %x, %y
579  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
580  %z1 = fsub float %y, %x
581  call void @llvm.amdgcn.exp.f32(i32 16, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
582  ret void
583}
584
585; GCN-LABEL: {{^}}test_export_pos_before_param_ordered:
586; GCN: exp pos0
587; GCN: exp pos1
588; GCN: exp pos2
589; GCN-NOT: s_waitcnt
590; GCN: exp param0
591; GCN: exp param1
592; GCN: exp param2
593define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float %y) #0 {
594  %z0 = fadd float %x, %y
595  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
596  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
597  call void @llvm.amdgcn.exp.f32(i32 34, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
598  %z1 = fsub float %y, %x
599  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 false, i1 false)
600  call void @llvm.amdgcn.exp.f32(i32 13, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 false, i1 false)
601  call void @llvm.amdgcn.exp.f32(i32 14, i32 15, float 0.0, float 0.0, float 0.0, float %z1, i1 true, i1 false)
602  ret void
603}
604
605; GCN-LABEL: {{^}}test_export_pos_before_param_across_load:
606; GCN: exp pos0
607; GCN-NEXT: exp param0
608; GCN-NEXT: exp param1
609define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 {
610  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
611  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
612  %load = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i32 0)
613  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
614  ret void
615}
616
617; GCN-LABEL: {{^}}test_export_across_store_load:
618; GCN: buffer_store
619; GCN: buffer_load
620; GCN: exp pos0
621; GCN: exp param0
622; GCN: exp param1
623define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 {
624  %data0 = alloca <4 x float>, align 8, addrspace(5)
625  %data1 = alloca <4 x float>, align 8, addrspace(5)
626  %cmp = icmp eq i32 %idx, 1
627  %data = select i1 %cmp, <4 x float> addrspace(5)* %data0, <4 x float> addrspace(5)* %data1
628  %sptr = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data, i32 0, i32 0
629  store float %v, float addrspace(5)* %sptr, align 8
630  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
631  %ptr0 = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data0, i32 0, i32 0
632  %load0 = load float, float addrspace(5)* %ptr0, align 8
633  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
634  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
635  ret void
636}
637
638attributes #0 = { nounwind }
639attributes #1 = { nounwind inaccessiblememonly }
640attributes #2 = { nounwind readnone }
641