1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
3# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s
4# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
5# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
6
7---
8name:            amdgpu_atomic_cmpxchg_s32_flat
9legalized:       true
10regBankSelected: true
11tracksRegLiveness: true
12body:             |
13  bb.0:
14    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
15
16    ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
17    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
18    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
19    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
20    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
21    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
22    ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
23    ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
24    ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
25    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
26    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
27    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
28    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
29    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
30    ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
31    ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
32    ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
33    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
34    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
35    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
36    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
37    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
38    ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
39    ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
40    %0:vgpr(p0) = COPY $vgpr0_vgpr1
41    %1:vgpr(s32) = COPY $vgpr2
42    %2:vgpr(s32) = COPY $vgpr3
43    %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
44    %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 0)
45    $vgpr0 = COPY %4
46
47...
48
49---
50name:            amdgpu_atomic_cmpxchg_s32_flat_gep4
51legalized:       true
52regBankSelected: true
53tracksRegLiveness: true
54body:             |
55  bb.0:
56    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
57
58    ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
59    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
60    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
61    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
62    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
63    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
64    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
65    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
66    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
67    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
68    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
69    ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
70    ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
71    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
72    ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
73    ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
74    ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
75    ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
76    ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
77    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
78    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
79    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
80    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
81    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
82    ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
83    ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
84    ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
85    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
86    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
87    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
88    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
89    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
90    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
91    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
92    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
93    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
94    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
95    ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
96    ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
97    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
98    ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
99    ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
100    ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
101    ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
102    %0:vgpr(p0) = COPY $vgpr0_vgpr1
103    %1:vgpr(s32) = COPY $vgpr2
104    %2:vgpr(s32) = COPY $vgpr3
105    %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
106    %4:vgpr(s64) = G_CONSTANT i64 4
107    %5:vgpr(p0) = G_PTR_ADD %0, %4
108    %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0)
109    $vgpr0 = COPY %6
110
111...
112
113---
114name:            amdgpu_atomic_cmpxchg_s64_flat
115legalized:       true
116regBankSelected: true
117tracksRegLiveness: true
118body:             |
119  bb.0:
120    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
121
122    ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
123    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
124    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
125    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
126    ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
127    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
128    ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
129    ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
130    ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
131    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
132    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
133    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
134    ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
135    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
136    ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
137    ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
138    ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
139    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
140    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
141    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
142    ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
143    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
144    ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
145    ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
146    %0:vgpr(p0) = COPY $vgpr0_vgpr1
147    %1:vgpr(s64) = COPY $vgpr2_vgpr3
148    %2:vgpr(s64) = COPY $vgpr4_vgpr5
149    %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
150    %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s64), addrspace 0)
151    $vgpr0_vgpr1 = COPY %4
152
153...
154
155---
156name:            amdgpu_atomic_cmpxchg_s64_flat_gep4
157legalized:       true
158regBankSelected: true
159tracksRegLiveness: true
160body:             |
161  bb.0:
162    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
163
164    ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
165    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
166    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
167    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
168    ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
169    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
170    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
171    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
172    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
173    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
174    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
175    ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
176    ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
177    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
178    ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
179    ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
180    ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
181    ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
182    ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
183    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
184    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
185    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
186    ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
187    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
188    ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
189    ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
190    ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
191    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
192    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
193    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
194    ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
195    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
196    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
197    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
198    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
199    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
200    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
201    ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
202    ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
203    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
204    ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
205    ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
206    ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
207    ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
208    %0:vgpr(p0) = COPY $vgpr0_vgpr1
209    %1:vgpr(s64) = COPY $vgpr2_vgpr3
210    %2:vgpr(s64) = COPY $vgpr4_vgpr5
211    %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
212    %4:vgpr(s64) = G_CONSTANT i64 4
213    %5:vgpr(p0) = G_PTR_ADD %0, %4
214    %6:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s64), addrspace 0)
215    $vgpr0_vgpr1 = COPY %6
216
217...
218
219---
220name:            amdgpu_atomic_cmpxchg_s32_flat_gepm4
221legalized:       true
222regBankSelected: true
223tracksRegLiveness: true
224body:             |
225  bb.0:
226    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
227
228    ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
229    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
230    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
231    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
232    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
233    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
234    ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
235    ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
236    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
237    ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
238    ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
239    ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
240    ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
241    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
242    ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
243    ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
244    ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
245    ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
246    ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
247    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
248    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
249    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
250    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
251    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
252    ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
253    ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
254    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
255    ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
256    ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
257    ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
258    ; GFX9: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
259    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
260    ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
261    ; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
262    ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
263    ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
264    ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
265    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
266    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
267    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
268    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
269    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
270    ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
271    ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
272    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
273    ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
274    ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
275    ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
276    ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
277    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
278    ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
279    ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
280    ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
281    ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
282    %0:vgpr(p0) = COPY $vgpr0_vgpr1
283    %1:vgpr(s32) = COPY $vgpr2
284    %2:vgpr(s32) = COPY $vgpr3
285    %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
286    %4:vgpr(s64) = G_CONSTANT i64 -4
287    %5:vgpr(p0) = G_PTR_ADD %0, %4
288    %6:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %5, %3 :: (load store seq_cst (s32), addrspace 0)
289    $vgpr0 = COPY %6
290
291...
292
293---
294name:            amdgpu_atomic_cmpxchg_s32_flat_nortn
295legalized:       true
296regBankSelected: true
297tracksRegLiveness: true
298body:             |
299  bb.0:
300    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
301
302    ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
303    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
304    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
305    ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
306    ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
307    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
308    ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
309    ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
310    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
311    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
312    ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
313    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
314    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
315    ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
316    ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
317    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
318    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
319    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
320    ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
321    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
322    ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
323    %0:vgpr(p0) = COPY $vgpr0_vgpr1
324    %1:vgpr(s32) = COPY $vgpr2
325    %2:vgpr(s32) = COPY $vgpr3
326    %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2
327    %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s32), addrspace 0)
328
329...
330
331---
332name:            amdgpu_atomic_cmpxchg_s64_flat_nortn
333legalized:       true
334regBankSelected: true
335tracksRegLiveness: true
336body:             |
337  bb.0:
338    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
339
340    ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
341    ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
342    ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
343    ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
344    ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
345    ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
346    ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
347    ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
348    ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
349    ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
350    ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
351    ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
352    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
353    ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
354    ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
355    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
356    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
357    ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
358    ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
359    ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
360    ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64))
361    %0:vgpr(p0) = COPY $vgpr0_vgpr1
362    %1:vgpr(s64) = COPY $vgpr2_vgpr3
363    %2:vgpr(s64) = COPY $vgpr4_vgpr5
364    %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2
365    %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst (s64), addrspace 0)
366
367...
368