1# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
2# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
3
4# test for 3 consecutive _sdwa's
5# GFX9-LABEL: name:            test1_add_co_sdwa
6# GFX9: = nsw V_ADD_CO_U32_sdwa
7# GFX9-NEXT: = nuw V_ADDC_U32_e32
8# GFX9: V_ADD_CO_U32_sdwa
9# GFX9-NEXT: V_ADDC_U32_e32
10# GFX9: V_ADD_CO_U32_sdwa
11# GFX9-NEXT: V_ADDC_U32_e32
12---
13name:            test1_add_co_sdwa
14tracksRegLiveness: true
15registers:
16  - { id: 0, class: vgpr_32, preferred-register: '' }
17liveins:
18  - { reg: '$vgpr0', virtual-reg: '%0' }
19  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
20body:             |
21  bb.0:
22    liveins: $vgpr0, $sgpr0_sgpr1
23
24    %1:sgpr_64 = COPY $sgpr0_sgpr1
25    %0:vgpr_32 = COPY $vgpr0
26    %22:sreg_32_xm0 = S_MOV_B32 255
27    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
28    %30:vreg_64 = COPY $sgpr0_sgpr1
29    %63:vgpr_32, %65:sreg_64_xexec = nsw V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
30    %64:vgpr_32, dead %66:sreg_64_xexec = nuw V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
31    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
32    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
33
34    %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
35    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
36    %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
37    %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
38    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
39
40    %171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
41    %173:vgpr_32, %175:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %171, 0, implicit $exec
42    %174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec
43    %172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1
44    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %172, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
45
46...
47
48# test for VCC interference on sdwa, should generate 1 xform only
49# GFX9-LABEL: name:            test2_add_co_sdwa
50# GFX9: V_ADD_CO_U32_sdwa
51# GFX9: V_ADDC_U32_e32
52# GFX9-NOT: V_ADD_CO_U32_sdwa
53# GFX9-NOT: V_ADDC_U32_e32
54---
55name:            test2_add_co_sdwa
56tracksRegLiveness: true
57registers:
58  - { id: 0, class: vgpr_32, preferred-register: '' }
59liveins:
60  - { reg: '$vgpr0', virtual-reg: '%0' }
61  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
62body:             |
63  bb.0:
64    liveins: $vgpr0, $sgpr0_sgpr1
65
66    %1:sgpr_64 = COPY $sgpr0_sgpr1
67    %0:vgpr_32 = COPY $vgpr0
68    %22:sreg_32_xm0 = S_MOV_B32 255
69    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
70    %30:vreg_64 = COPY $sgpr0_sgpr1
71    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
72
73    %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
74    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
75    %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
76    %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
77
78    %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
79    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
80    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
81
82    %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
83    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
84    %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
85    %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
86    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
87
88...
89
90# test for CarryOut used, should reject
91# GFX9-LABEL: name:            test3_add_co_sdwa
92# GFX9: V_ADD_CO_U32_e64
93# GFX9: V_ADDC_U32_e64
94# GFX9-NOT: V_ADD_CO_U32_sdwa
95# GFX9-NOT: V_ADDC_U32_e32
96---
97name:            test3_add_co_sdwa
98tracksRegLiveness: true
99registers:
100  - { id: 0, class: vgpr_32, preferred-register: '' }
101liveins:
102  - { reg: '$vgpr0', virtual-reg: '%0' }
103  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
104body:             |
105  bb.0:
106    liveins: $vgpr0, $sgpr0_sgpr1
107
108    %1:sgpr_64 = COPY $sgpr0_sgpr1
109    %0:vgpr_32 = COPY $vgpr0
110    %22:sreg_32_xm0 = S_MOV_B32 255
111    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
112    %30:vreg_64 = COPY $sgpr0_sgpr1
113    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
114    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
115    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1
116    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
117
118...
119
120# test for CarryIn used more than once, should reject
121# GFX9-LABEL: name:            test4_add_co_sdwa
122# GFX9: V_ADD_CO_U32_e64
123# GFX9: V_ADDC_U32_e64
124# GFX9-NOT: V_ADD_CO_U32_sdwa
125# GFX9-NOT: V_ADDC_U32_e32
126---
127name:            test4_add_co_sdwa
128tracksRegLiveness: true
129registers:
130  - { id: 0, class: vgpr_32, preferred-register: '' }
131liveins:
132  - { reg: '$vgpr0', virtual-reg: '%0' }
133  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
134body:             |
135  bb.0:
136    liveins: $vgpr0, $sgpr0_sgpr1
137
138    %1:sgpr_64 = COPY $sgpr0_sgpr1
139    %0:vgpr_32 = COPY $vgpr0
140    %22:sreg_32_xm0 = S_MOV_B32 255
141    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
142    %30:vreg_64 = COPY $sgpr0_sgpr1
143    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
144    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
145    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1
146    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
147
148
149...
150
151# test for simple example, should generate sdwa
152# GFX9-LABEL: name:            test5_add_co_sdwa
153# GFX9: V_ADD_CO_U32_sdwa
154# GFX9: V_ADDC_U32_e32
155---
156name:            test5_add_co_sdwa
157tracksRegLiveness: true
158registers:
159  - { id: 0, class: vgpr_32, preferred-register: '' }
160liveins:
161  - { reg: '$vgpr0', virtual-reg: '%0' }
162  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
163body:             |
164  bb.0:
165    liveins: $vgpr0, $sgpr0_sgpr1
166
167    %1:sgpr_64 = COPY $sgpr0_sgpr1
168    %0:vgpr_32 = COPY $vgpr0
169    %22:sreg_32_xm0 = S_MOV_B32 255
170    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
171    %30:vreg_64 = COPY $sgpr0_sgpr1
172    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
173    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
174    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
175    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
176
177
178...
179
180# test for V_ADD_CO_U32_e64 only, should reject
181# GFX9-LABEL: name:            test6_add_co_sdwa
182# GFX9: V_ADD_CO_U32_e64
183# GFX9-NOT: V_ADD_CO_U32_sdwa
184# GFX9-NOT: V_ADDC_U32_e32
185---
186name:            test6_add_co_sdwa
187tracksRegLiveness: true
188registers:
189  - { id: 0, class: vgpr_32, preferred-register: '' }
190liveins:
191  - { reg: '$vgpr0', virtual-reg: '%0' }
192  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
193body:             |
194  bb.0:
195    liveins: $vgpr0, $sgpr0_sgpr1
196
197    %1:sgpr_64 = COPY $sgpr0_sgpr1
198    %0:vgpr_32 = COPY $vgpr0
199    %22:sreg_32_xm0 = S_MOV_B32 255
200    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
201    %30:vreg_64 = COPY $sgpr0_sgpr1
202    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
203    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1
204    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
205
206
207...
208
209# test for V_ADDC_U32_e64 only, should reject
210# GFX9-LABEL: name:            test7_add_co_sdwa
211# GFX9: V_ADDC_U32_e64
212# GFX9-NOT: V_ADD_CO_U32_sdwa
213# GFX9-NOT: V_ADDC_U32_e32
214---
215name:            test7_add_co_sdwa
216tracksRegLiveness: true
217registers:
218  - { id: 0, class: vgpr_32, preferred-register: '' }
219liveins:
220  - { reg: '$vgpr0', virtual-reg: '%0' }
221  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
222body:             |
223  bb.0:
224    liveins: $vgpr0, $sgpr0_sgpr1
225
226    %1:sgpr_64 = COPY $sgpr0_sgpr1
227    %0:vgpr_32 = COPY $vgpr0
228    %22:sreg_32_xm0 = S_MOV_B32 255
229    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
230    %24:sreg_64_xexec = COPY $sgpr0_sgpr1
231
232    %30:vreg_64 = COPY $sgpr0_sgpr1
233    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %24, 0, implicit $exec
234    %62:vreg_64 = REG_SEQUENCE %23, %subreg.sub0, %23, %subreg.sub1
235    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
236
237
238...
239
240# test for $vcc defined between two adds, should not generate
241# GFX9-LABEL: name:            test8_add_co_sdwa
242# GFX9-NOT: V_ADD_CO_U32_sdwa
243# GFX9: V_ADDC_U32_e64
244---
245name:            test8_add_co_sdwa
246tracksRegLiveness: true
247registers:
248  - { id: 0, class: vgpr_32, preferred-register: '' }
249liveins:
250  - { reg: '$vgpr0', virtual-reg: '%0' }
251  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
252body:             |
253  bb.0:
254    liveins: $vgpr0, $sgpr0_sgpr1
255
256    %1:sgpr_64 = COPY $sgpr0_sgpr1
257    %0:vgpr_32 = COPY $vgpr0
258    %22:sreg_32_xm0 = S_MOV_B32 255
259    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
260    %30:vreg_64 = COPY $sgpr0_sgpr1
261    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
262    $vcc = COPY %30
263    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
264    %31:vreg_64 = COPY $vcc
265    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
266    GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
267
268
269...
270
271# test for non dead $vcc, should not generate
272# GFX9-LABEL: name:            test9_add_co_sdwa
273# GFX9-NOT: V_ADD_CO_U32_sdwa
274# GFX9: V_ADDC_U32_e64
275---
276name:            test9_add_co_sdwa
277tracksRegLiveness: true
278registers:
279  - { id: 0, class: vgpr_32, preferred-register: '' }
280liveins:
281  - { reg: '$vgpr0', virtual-reg: '%0' }
282  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
283body:             |
284  bb.0:
285    liveins: $vgpr0, $sgpr0_sgpr1
286
287    %1:sgpr_64 = COPY $sgpr0_sgpr1
288    %0:vgpr_32 = COPY $vgpr0
289    %22:sreg_32_xm0 = S_MOV_B32 255
290    %30:vreg_64 = COPY $sgpr0_sgpr1
291    $vcc = COPY %30
292    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
293    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
294    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
295    %31:vreg_64 = COPY $vcc
296    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
297    GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
298
299...
300
301# test for def $vcc_lo, should not generate
302# GFX9-LABEL: name:            test10_add_co_sdwa
303# GFX9-NOT: V_ADD_CO_U32_sdwa
304# GFX9: V_ADDC_U32_e64
305---
306name:            test10_add_co_sdwa
307tracksRegLiveness: true
308registers:
309  - { id: 0, class: vgpr_32, preferred-register: '' }
310liveins:
311  - { reg: '$vgpr0', virtual-reg: '%0' }
312  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
313body:             |
314  bb.0:
315    liveins: $vgpr0, $sgpr0_sgpr1
316
317    %1:sgpr_64 = COPY $sgpr0_sgpr1
318    %0:vgpr_32 = COPY $vgpr0
319    %22:sreg_32_xm0 = S_MOV_B32 255
320    %30:vreg_64 = COPY $sgpr0_sgpr1
321    $vcc_lo = COPY %30.sub0
322    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
323    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
324    %31:vgpr_32 = COPY $vcc_lo
325    %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
326    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
327    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
328    GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
329
330...
331
332# test for read $vcc_hi, should not generate
333# GFX9-LABEL: name:            test11_add_co_sdwa
334# GFX9-NOT: V_ADD_CO_U32_sdwa
335# GFX9: V_ADDC_U32_e64
336---
337name:            test11_add_co_sdwa
338tracksRegLiveness: true
339registers:
340  - { id: 0, class: vgpr_32, preferred-register: '' }
341liveins:
342  - { reg: '$vgpr0', virtual-reg: '%0' }
343  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
344body:             |
345  bb.0:
346    liveins: $vgpr0, $sgpr0_sgpr1
347
348    %1:sgpr_64 = COPY $sgpr0_sgpr1
349    %0:vgpr_32 = COPY $vgpr0
350    %22:sreg_32_xm0 = S_MOV_B32 255
351    %30:vreg_64 = COPY $sgpr0_sgpr1
352    $vcc_hi = COPY %30.sub0
353    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
354    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
355    %31:vgpr_32 = COPY $vcc_hi
356    %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
357    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
358    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
359    GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
360
361...
362
363# test for $vcc defined and used between adds, should not generate
364# GFX9-LABEL: name:            test12_add_co_sdwa
365# GFX9-NOT: V_ADD_CO_U32_sdwa
366# GFX9: V_ADDC_U32_e64
367---
368name:            test12_add_co_sdwa
369tracksRegLiveness: true
370registers:
371  - { id: 0, class: vgpr_32, preferred-register: '' }
372liveins:
373  - { reg: '$vgpr0', virtual-reg: '%0' }
374  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
375body:             |
376  bb.0:
377    liveins: $vgpr0, $sgpr0_sgpr1
378
379    %1:sgpr_64 = COPY $sgpr0_sgpr1
380    %0:vgpr_32 = COPY $vgpr0
381    %22:sreg_32_xm0 = S_MOV_B32 255
382    %30:vreg_64 = COPY $sgpr0_sgpr1
383    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
384    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
385    $vcc = COPY %30
386    %31:vreg_64 = COPY killed $vcc
387    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
388    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
389    GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
390
391