1# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s
2
3# GFX10-LABEL: name: diffoporder_add
4# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, -2048, 0
5# GFX10: %{{[0-9]+}}:vreg_64 = GLOBAL_LOAD_DWORDX2 %{{[0-9]+}}, 0, 0
6
7name: diffoporder_add
8body:             |
9  bb.0.entry:
10    %0:sgpr_64 = COPY $sgpr0_sgpr1
11    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
12    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
13    %4:sreg_32_xm0 = COPY $sgpr101
14    %5:sreg_32_xm0 = S_MOV_B32 0
15    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
16    $sgpr4 = COPY %4
17    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
18    %6:vreg_64 = COPY $vgpr0_vgpr1
19    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
20    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
21    %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
22    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
23    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
24    %12:sgpr_32 = COPY %1.sub1
25    %13:vgpr_32 = COPY %5
26    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
27    %16:vgpr_32 = COPY %12
28    %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
29    %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
30    %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
31    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
32    %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
33    %25:sgpr_32 = S_MOV_B32 4096
34    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec
35    %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
36    %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
37    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
38    %32:sgpr_32 = S_MOV_B32 6144
39    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
40    %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
41    %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
42    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
43...
44---
45
46# GFX10-LABEL: name: LowestInMiddle
47# GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 6400
48# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
49# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
50# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
51# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 1600, 0
52# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0,
53#
54# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 11200
55# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
56# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
57# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
58# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0,
59
60name: LowestInMiddle
61body:             |
62  bb.0.entry:
63    %0:sgpr_64 = COPY $sgpr0_sgpr1
64    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
65    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
66    %4:sreg_32_xm0 = COPY $sgpr101
67    %5:sreg_32_xm0 = S_MOV_B32 0
68    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
69    $sgpr4 = COPY %4
70    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
71    %6:vreg_64 = COPY $vgpr0_vgpr1
72    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
73    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
74    %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
75    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
76    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
77    %12:sgpr_32 = COPY %1.sub1
78    %13:vgpr_32 = COPY %5
79    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
80    %16:vgpr_32 = COPY %12
81    %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
82    %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
83    %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
84    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
85    %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
86    %25:sgpr_32 = S_MOV_B32 8000
87    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
88    %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
89    %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
90    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
91    %32:sgpr_32 = S_MOV_B32 6400
92    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
93    %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
94    %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
95    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
96    %39:sgpr_32 = S_MOV_B32 11200
97    %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
98    %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
99    %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
100    %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
101...
102---
103
104# GFX10-LABEL: name: NegativeDistance
105# GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
106# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
107# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
108# GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
109# GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0
110# GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0
111# GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 10240
112# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
113# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
114# GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
115# GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0
116
117name: NegativeDistance
118body:             |
119  bb.0.entry:
120    %0:sgpr_64 = COPY $sgpr0_sgpr1
121    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
122    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
123    %4:sreg_32_xm0 = COPY $sgpr101
124    %5:sreg_32_xm0 = S_MOV_B32 0
125    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
126    $sgpr4 = COPY %4
127    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
128    %6:vreg_64 = COPY $vgpr0_vgpr1
129    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
130    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
131    %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
132    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
133    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
134    %12:sgpr_32 = COPY %1.sub1
135    %13:vgpr_32 = COPY %5
136    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
137    %16:vgpr_32 = COPY %12
138    %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
139    %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
140    %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
141    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
142    %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
143    %25:sgpr_32 = S_MOV_B32 6144
144    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
145    %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
146    %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
147    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
148    %32:sgpr_32 = S_MOV_B32 8192
149    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
150    %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
151    %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
152    %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, implicit $exec
153    %39:sgpr_32 = S_MOV_B32 10240
154    %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
155    %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
156    %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
157    %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, implicit $exec
158...
159---
160
161# Tests for a successful compilation.
162name: assert_hit
163body:             |
164    bb.0.entry:
165    %0:sgpr_64 = COPY $sgpr0_sgpr1
166    %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
167    %3:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
168    %4:sreg_32_xm0 = COPY $sgpr101
169    %5:sreg_32_xm0 = S_MOV_B32 0
170    $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3
171    $sgpr4 = COPY %4
172    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
173    %6:vreg_64 = COPY $vgpr0_vgpr1
174    %7:vgpr_32 = V_AND_B32_e32 255, %6.sub0, implicit $exec
175    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
176    %9:vreg_64 = REG_SEQUENCE killed %7, %subreg.sub0, %8, %subreg.sub1
177    %10:vgpr_32 = V_LSHLREV_B32_e64 7, %6.sub0, implicit $exec
178    %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
179    %12:sgpr_32 = COPY %1.sub1
180    %13:vgpr_32 = COPY %5
181    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
182    %16:vgpr_32 = COPY %12
183    %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
184    %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
185    %20:vreg_64 = V_LSHLREV_B64_e64 3, %9, implicit $exec
186    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
187    %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
188
189    %25:sgpr_32 = S_MOV_B32 6144
190    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
191    %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
192    %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
193    %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, implicit $exec
194...
195---
196
197# GFX10-LABEL: name: diffoporder_add_store
198# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub0, 1000, 0
199# GFX10: GLOBAL_STORE_DWORD %{{[0-9]+}}, %0.sub1, 0, 0
200
201name: diffoporder_add_store
202body:             |
203  bb.0.entry:
204
205    %0:vreg_64 = COPY $vgpr0_vgpr1
206
207    %1:sgpr_32 = S_MOV_B32 4000
208    %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
209    %4:vgpr_32, dead %5:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
210    %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
211    GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, implicit $exec
212
213    %8:sgpr_32 = S_MOV_B32 3000
214    %9:vgpr_32, %10:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
215    %11:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
216    %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
217    GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec
218...
219