1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s
3
4; CHECK-LABEL: {{^}}inline_asm:
5; CHECK: s_endpgm
6; CHECK: s_endpgm
7define amdgpu_kernel void @inline_asm(i32 addrspace(1)* %out) {
8entry:
9  store i32 5, i32 addrspace(1)* %out
10  call void asm sideeffect "s_endpgm", ""()
11  ret void
12}
13
14; CHECK-LABEL: {{^}}inline_asm_shader:
15; CHECK: s_endpgm
16; CHECK: s_endpgm
17define amdgpu_ps void @inline_asm_shader() {
18entry:
19  call void asm sideeffect "s_endpgm", ""()
20  ret void
21}
22
23
24; CHECK: {{^}}branch_on_asm:
25; Make sure inline assembly is treted as divergent.
26; CHECK: s_mov_b32 s{{[0-9]+}}, 0
27; CHECK: s_and_saveexec_b64
28define amdgpu_kernel void @branch_on_asm(i32 addrspace(1)* %out) {
29	%zero = call i32 asm "s_mov_b32 $0, 0", "=s"()
30	%cmp = icmp eq i32 %zero, 0
31	br i1 %cmp, label %if, label %endif
32
33if:
34	store i32 0, i32 addrspace(1)* %out
35	br label %endif
36
37endif:
38  ret void
39}
40
41; CHECK-LABEL: {{^}}v_cmp_asm:
42; CHECK: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
43; CHECK: v_cmp_ne_u32_e64 s{{\[}}[[MASK_LO:[0-9]+]]:[[MASK_HI:[0-9]+]]{{\]}}, 0, [[SRC]]
44; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[MASK_LO]]
45; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[MASK_HI]]
46; CHECK: buffer_store_dwordx2 v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
47define amdgpu_kernel void @v_cmp_asm(i64 addrspace(1)* %out, i32 %in) {
48  %sgpr = tail call i64 asm "v_cmp_ne_u32_e64 $0, 0, $1", "=s,v"(i32 %in)
49  store i64 %sgpr, i64 addrspace(1)* %out
50  ret void
51}
52
53; CHECK-LABEL: {{^}}code_size_inline_asm:
54; CHECK: codeLenInByte = 12
55define amdgpu_kernel void @code_size_inline_asm(i32 addrspace(1)* %out) {
56entry:
57  call void asm sideeffect "v_nop_e64", ""()
58  ret void
59}
60
61; All inlineasm instructions are assumed to be the maximum size
62; CHECK-LABEL: {{^}}code_size_inline_asm_small_inst:
63; CHECK: codeLenInByte = 12
64define amdgpu_kernel void @code_size_inline_asm_small_inst(i32 addrspace(1)* %out) {
65entry:
66  call void asm sideeffect "v_nop_e32", ""()
67  ret void
68}
69
70; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst:
71; CHECK: codeLenInByte = 20
72define amdgpu_kernel void @code_size_inline_asm_2_inst(i32 addrspace(1)* %out) {
73entry:
74  call void asm sideeffect "
75    v_nop_e64
76    v_nop_e64
77   ", ""()
78  ret void
79}
80
81; CHECK-LABEL: {{^}}code_size_inline_asm_2_inst_extra_newline:
82; CHECK: codeLenInByte = 20
83define amdgpu_kernel void @code_size_inline_asm_2_inst_extra_newline(i32 addrspace(1)* %out) {
84entry:
85  call void asm sideeffect "
86    v_nop_e64
87
88    v_nop_e64
89   ", ""()
90  ret void
91}
92
93; CHECK-LABEL: {{^}}code_size_inline_asm_0_inst:
94; CHECK: codeLenInByte = 4
95define amdgpu_kernel void @code_size_inline_asm_0_inst(i32 addrspace(1)* %out) {
96entry:
97  call void asm sideeffect "", ""()
98  ret void
99}
100
101; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment:
102; CHECK: codeLenInByte = 4
103define amdgpu_kernel void @code_size_inline_asm_1_comment(i32 addrspace(1)* %out) {
104entry:
105  call void asm sideeffect "; comment", ""()
106  ret void
107}
108
109; CHECK-LABEL: {{^}}code_size_inline_asm_newline_1_comment:
110; CHECK: codeLenInByte = 4
111define amdgpu_kernel void @code_size_inline_asm_newline_1_comment(i32 addrspace(1)* %out) {
112entry:
113  call void asm sideeffect "
114; comment", ""()
115  ret void
116}
117
118; CHECK-LABEL: {{^}}code_size_inline_asm_1_comment_newline:
119; CHECK: codeLenInByte = 4
120define amdgpu_kernel void @code_size_inline_asm_1_comment_newline(i32 addrspace(1)* %out) {
121entry:
122  call void asm sideeffect "; comment
123", ""()
124  ret void
125}
126
127; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line:
128; CHECK: codeLenInByte = 4
129define amdgpu_kernel void @code_size_inline_asm_2_comments_line(i32 addrspace(1)* %out) {
130entry:
131  call void asm sideeffect "; first comment ; second comment", ""()
132  ret void
133}
134
135; CHECK-LABEL: {{^}}code_size_inline_asm_2_comments_line_nospace:
136; CHECK: codeLenInByte = 4
137define amdgpu_kernel void @code_size_inline_asm_2_comments_line_nospace(i32 addrspace(1)* %out) {
138entry:
139  call void asm sideeffect "; first comment;second comment", ""()
140  ret void
141}
142
143; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments0:
144; CHECK: codeLenInByte = 20
145define amdgpu_kernel void @code_size_inline_asm_mixed_comments0(i32 addrspace(1)* %out) {
146entry:
147  call void asm sideeffect "; comment
148    v_nop_e64 ; inline comment
149; separate comment
150    v_nop_e64
151
152    ; trailing comment
153    ; extra comment
154  ", ""()
155  ret void
156}
157
158; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments1:
159; CHECK: codeLenInByte = 20
160define amdgpu_kernel void @code_size_inline_asm_mixed_comments1(i32 addrspace(1)* %out) {
161entry:
162  call void asm sideeffect "v_nop_e64 ; inline comment
163; separate comment
164    v_nop_e64
165
166    ; trailing comment
167    ; extra comment
168  ", ""()
169  ret void
170}
171
172; CHECK-LABEL: {{^}}code_size_inline_asm_mixed_comments_operands:
173; CHECK: codeLenInByte = 20
174define amdgpu_kernel void @code_size_inline_asm_mixed_comments_operands(i32 addrspace(1)* %out) {
175entry:
176  call void asm sideeffect "; comment
177    v_add_i32_e32 v0, vcc, v1, v2 ; inline comment
178; separate comment
179    v_bfrev_b32_e32 v0, 1
180
181    ; trailing comment
182    ; extra comment
183  ", ""()
184  ret void
185}
186
187; FIXME: Should not have intermediate sgprs
188; CHECK-LABEL: {{^}}i64_imm_input_phys_vgpr:
189; CHECK: s_mov_b32 s1, 0
190; CHECK: s_mov_b32 s0, 0x1e240
191; CHECK: v_mov_b32_e32 v0, s0
192; CHECK: v_mov_b32_e32 v1, s1
193; CHECK: use v[0:1]
194define amdgpu_kernel void @i64_imm_input_phys_vgpr() {
195entry:
196  call void asm sideeffect "; use $0 ", "{v[0:1]}"(i64 123456)
197  ret void
198}
199
200; CHECK-LABEL: {{^}}i1_imm_input_phys_vgpr:
201; CHECK: v_mov_b32_e32 v0, -1{{$}}
202; CHECK: ; use v0
203define amdgpu_kernel void @i1_imm_input_phys_vgpr() {
204entry:
205  call void asm sideeffect "; use $0 ", "{v0}"(i1 true)
206  ret void
207}
208
209; CHECK-LABEL: {{^}}i1_input_phys_vgpr:
210; CHECK: {{buffer|flat}}_load_ubyte [[LOAD:v[0-9]+]]
211; CHECK: v_and_b32_e32 [[LOAD]], 1, [[LOAD]]
212; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, [[LOAD]]
213; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
214; CHECK: ; use v0
215define amdgpu_kernel void @i1_input_phys_vgpr() {
216entry:
217  %val = load i1, i1 addrspace(1)* undef
218  call void asm sideeffect "; use $0 ", "{v0}"(i1 %val)
219  ret void
220}
221
222; FIXME: Should be scheduled to shrink vcc
223; CHECK-LABEL: {{^}}i1_input_phys_vgpr_x2:
224; CHECK: v_cmp_eq_u32_e32 vcc, 1, v0
225; CHECK: v_cndmask_b32_e64 v0, 0, -1, vcc
226; CHECK: v_cmp_eq_u32_e32 vcc, 1, v1
227; CHECK: v_cndmask_b32_e64 v1, 0, -1, vcc
228define amdgpu_kernel void @i1_input_phys_vgpr_x2() {
229entry:
230  %val0 = load volatile i1, i1 addrspace(1)* undef
231  %val1 = load volatile i1, i1 addrspace(1)* undef
232  call void asm sideeffect "; use $0 $1 ", "{v0}, {v1}"(i1 %val0, i1 %val1)
233  ret void
234}
235
236; CHECK-LABEL: {{^}}muliple_def_phys_vgpr:
237; CHECK: ; def v0
238; CHECK: v_mov_b32_e32 v1, v0
239; CHECK: ; def v0
240; CHECK: v_lshlrev_b32_e32 v{{[0-9]+}}, v0, v1
241define amdgpu_kernel void @muliple_def_phys_vgpr() {
242entry:
243  %def0 = call i32 asm sideeffect "; def $0 ", "={v0}"()
244  %def1 = call i32 asm sideeffect "; def $0 ", "={v0}"()
245  %add = shl i32 %def0, %def1
246  store i32 %add, i32 addrspace(1)* undef
247  ret void
248}
249
250; CHECK-LABEL: {{^}}asm_constraint_c_n:
251; CHECK: s_trap 10{{$}}
252define amdgpu_kernel void @asm_constraint_c_n()  {
253entry:
254  tail call void asm sideeffect "s_trap ${0:c}", "n"(i32 10) #1
255  ret void
256}
257
258; CHECK-LABEL: {{^}}asm_constraint_n_n:
259; CHECK: s_trap -10{{$}}
260define amdgpu_kernel void @asm_constraint_n_n()  {
261entry:
262  tail call void asm sideeffect "s_trap ${0:n}", "n"(i32 10) #1
263  ret void
264}
265