1# RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s
2
3# GFX9-LABEL: name: diffoporder_add
4
5# GFX9: %{{[0-9]+}}:vreg_64_align2 = REG_SEQUENCE
6
7# GFX9: S_MOV_B32 6144
8# GFX9-NEXT: V_ADD_CO_U32
9# GFX9-NEXT: V_ADDC_U32
10# GFX9-NEXT: [[PTR0:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE
11# GFX9-NEXT: %{{[0-9]+}}:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[PTR0]], -2048, 0
12# GFX9: %{{[0-9]+}}:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[PTR0]], 0, 0
13
14name: diffoporder_add
15body:             |
16  bb.0.entry:
17    %0:sgpr_64 = COPY $sgpr0_sgpr1
18    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
19    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
20    %4:sreg_32_xm0 = COPY $sgpr101
21    %5:sreg_32_xm0 = S_MOV_B32 0
22    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
23    $sgpr4 = COPY %4
24    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
25    %6:vreg_64_align2 = COPY $vgpr0_vgpr1
26    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
27    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
28    %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
29    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
30    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
31    %12:sgpr_32 = COPY %1.sub1
32    %13:vgpr_32 = COPY %5
33    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
34    %16:vgpr_32 = COPY %12
35    %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
36    %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
37    %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec
38    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
39    %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
40    %25:sgpr_32 = S_MOV_B32 4096
41    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec
42    %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
43    %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
44    %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
45    %32:sgpr_32 = S_MOV_B32 6144
46    %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
47    %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
48    %37:vreg_64_align2 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
49    %38:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
50...
51---
52
53# GFX9-LABEL: name: LowestInMiddle
54# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200
55# GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
56# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
57# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
58# GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0
59#
60# GFX9: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400
61# GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
62# GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
63# GFX9: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
64# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
65# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
66
67name: LowestInMiddle
68body:             |
69  bb.0.entry:
70    %0:sgpr_64 = COPY $sgpr0_sgpr1
71    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
72    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
73    %4:sreg_32_xm0 = COPY $sgpr101
74    %5:sreg_32_xm0 = S_MOV_B32 0
75    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
76    $sgpr4 = COPY %4
77    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
78    %6:vreg_64_align2 = COPY $vgpr0_vgpr1
79    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
80    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
81    %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
82    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
83    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
84    %12:sgpr_32 = COPY %1.sub1
85    %13:vgpr_32 = COPY %5
86    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
87    %16:vgpr_32 = COPY %12
88    %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
89    %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
90    %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec
91    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
92    %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
93    %25:sgpr_32 = S_MOV_B32 8000
94    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
95    %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
96    %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
97    %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
98    %32:sgpr_32 = S_MOV_B32 6400
99    %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
100    %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
101    %37:vreg_64_align2 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
102    %38:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
103    %39:sgpr_32 = S_MOV_B32 11200
104    %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
105    %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
106    %44:vreg_64_align2 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
107    %45:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
108...
109---
110
111# GFX9-LABEL: name: NegativeDistance
112# GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240
113# GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
114# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
115# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[V_ADD_CO_U32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
116# GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0
117# GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
118# GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
119
120name: NegativeDistance
121body:             |
122  bb.0.entry:
123    %0:sgpr_64 = COPY $sgpr0_sgpr1
124    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
125    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
126    %4:sreg_32_xm0 = COPY $sgpr10
127    %5:sreg_32_xm0 = S_MOV_B32 0
128    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
129    $sgpr4 = COPY %4
130    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
131    %6:vreg_64_align2 = COPY $vgpr0_vgpr1
132    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
133    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
134    %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
135    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
136    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
137    %12:sgpr_32 = COPY %1.sub1
138    %13:vgpr_32 = COPY %5
139    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
140    %16:vgpr_32 = COPY %12
141    %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
142    %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
143    %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec
144    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
145    %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
146    %25:sgpr_32 = S_MOV_B32 6144
147    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
148    %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
149    %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
150    %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
151    %32:sgpr_32 = S_MOV_B32 8192
152    %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
153    %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
154    %37:vreg_64_align2 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
155    %38:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
156    %39:sgpr_32 = S_MOV_B32 10240
157    %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
158    %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
159    %44:vreg_64_align2 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
160    %45:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
161...
162---
163
164# Tests for a successful compilation.
165name: assert_hit
166body:             |
167    bb.0.entry:
168    %0:sgpr_64 = COPY $sgpr0_sgpr1
169    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
170    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
171    %4:sreg_32_xm0 = COPY $sgpr101
172    %5:sreg_32_xm0 = S_MOV_B32 0
173    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
174    $sgpr4 = COPY %4
175    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
176    %6:vreg_64_align2 = COPY $vgpr0_vgpr1
177    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
178    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
179    %9:vreg_64_align2 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
180    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
181    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
182    %12:sgpr_32 = COPY %1.sub1
183    %13:vgpr_32 = COPY %5
184    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
185    %16:vgpr_32 = COPY %12
186    %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
187    %19:vreg_64_align2 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
188    %20:vreg_64_align2 = V_LSHLREV_B64_e64 3, %9, implicit $exec
189    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
190    %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
191
192    %25:sgpr_32 = S_MOV_B32 6144
193    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
194    %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
195    %30:vreg_64_align2 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
196    %31:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
197...
198---
199
200# GFX9-LABEL: name: diffoporder_add_store
201# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0,
202# GFX9: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0,
203
204name: diffoporder_add_store
205body:             |
206  bb.0.entry:
207
208    %0:vreg_64_align2 = COPY $vgpr0_vgpr1
209
210    %1:sgpr_32 = S_MOV_B32 4000
211    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
212    %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
213    %6:vreg_64_align2 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
214    GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec
215
216    %8:sgpr_32 = S_MOV_B32 3000
217    %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
218    %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
219    %13:vreg_64_align2 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
220    GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec
221...
222