1# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI
2# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI
3# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI
4
5--- |
6  define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void }
7  define amdgpu_kernel void @smrd_wide() { ret void }
8  define amdgpu_kernel void @constant_address_positive() { ret void }
9...
10---
11
12name:            smrd_imm
13legalized:       true
14regBankSelected: true
15
16# GCN: body:
17# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
18
19# Immediate offset:
20# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
21# VI:   S_LOAD_DWORD_IMM [[PTR]], 4, 0
22
23# Max immediate offset for SI
24# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
25# VI:   S_LOAD_DWORD_IMM [[PTR]], 1020, 0
26
27# Immediate overflow for SI
28# SI: [[K1024:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
29# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0
30# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0
31# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0
32
33# Max immediate offset for VI
34# SI: [[K1048572:%[0-9]+]]:sreg_32 = S_MOV_B32 1048572
35# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143
36# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572
37
38#
39# Immediate overflow for VI
40# SIVI: [[K1048576:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
41# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0
42# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0
43
44# Max immediate for CI
45# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
46# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3
47# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
48# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
49# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
50# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
51# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
52# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
53# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
54# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
55# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
56# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
57
58# Immediate overflow for CI
59# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
60# GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4
61# GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
62# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
63# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
64# GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
65# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
66# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
67# GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
68# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
69# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
70
71# Max 32-bit byte offset
72# SIVI: [[K4294967292:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
73# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0
74# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0
75
76# Overflow 32-bit byte offset
77# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
78# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1
79# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
80# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
81# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
82# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
83# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
84# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
85# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
86# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
87# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
88# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0
89
90# Pointer loads
91# GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
92# GCN: $sgpr0_sgpr1 = COPY [[AS0]]
93# GCN: [[AS1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
94# GCN: $sgpr0_sgpr1 = COPY [[AS1]]
95# GCN: [[AS4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
96# GCN: $sgpr0_sgpr1 = COPY [[AS4]]
97
98body: |
99  bb.0:
100    liveins: $sgpr0_sgpr1
101
102    %0:sgpr(p4) = COPY $sgpr0_sgpr1
103
104    %1:sgpr(s64) = G_CONSTANT i64 4
105    %2:sgpr(p4) = G_PTR_ADD %0, %1
106    %3:sgpr(s32) = G_LOAD %2 :: (load (s32) from %ir.const0, addrspace 4)
107    $sgpr0 = COPY %3
108
109    %4:sgpr(s64) = G_CONSTANT i64 1020
110    %5:sgpr(p4) = G_PTR_ADD %0, %4
111    %6:sgpr(s32) = G_LOAD %5 :: (load (s32) from %ir.const0, addrspace 4)
112    $sgpr0 = COPY %6
113
114    %7:sgpr(s64) = G_CONSTANT i64 1024
115    %8:sgpr(p4) = G_PTR_ADD %0, %7
116    %9:sgpr(s32) = G_LOAD %8 :: (load (s32) from %ir.const0, addrspace 4)
117    $sgpr0 = COPY %9
118
119    %10:sgpr(s64) = G_CONSTANT i64 1048572
120    %11:sgpr(p4) = G_PTR_ADD %0, %10
121    %12:sgpr(s32) = G_LOAD %11 :: (load (s32) from %ir.const0, addrspace 4)
122    $sgpr0 = COPY %12
123
124    %13:sgpr(s64) = G_CONSTANT i64 1048576
125    %14:sgpr(p4) = G_PTR_ADD %0, %13
126    %15:sgpr(s32) = G_LOAD %14 :: (load (s32) from %ir.const0, addrspace 4)
127    $sgpr0 = COPY %15
128
129    %16:sgpr(s64) = G_CONSTANT i64 17179869180
130    %17:sgpr(p4) = G_PTR_ADD %0, %16
131    %18:sgpr(s32) = G_LOAD %17 :: (load (s32) from %ir.const0, addrspace 4)
132    $sgpr0 = COPY %18
133
134    %19:sgpr(s64) = G_CONSTANT i64 17179869184
135    %20:sgpr(p4) = G_PTR_ADD %0, %19
136    %21:sgpr(s32) = G_LOAD %20 :: (load (s32) from %ir.const0, addrspace 4)
137    $sgpr0 = COPY %21
138
139    %22:sgpr(s64) = G_CONSTANT i64 4294967292
140    %23:sgpr(p4) = G_PTR_ADD %0, %22
141    %24:sgpr(s32) = G_LOAD %23 :: (load (s32) from %ir.const0, addrspace 4)
142    $sgpr0 = COPY %24
143
144    %25:sgpr(s64) = G_CONSTANT i64 4294967296
145    %26:sgpr(p4) = G_PTR_ADD %0, %25
146    %27:sgpr(s32) = G_LOAD %26 :: (load (s32) from %ir.const0, addrspace 4)
147    $sgpr0 = COPY %27
148
149    %28:sgpr(p0) = G_LOAD %0 :: (load (p0) from %ir.const0, addrspace 4)
150    $sgpr0_sgpr1 = COPY %28
151
152    %29:sgpr(p1) = G_LOAD %0 :: (load (p1) from %ir.const0, addrspace 4)
153    $sgpr0_sgpr1 = COPY %29
154
155    %30:sgpr(p4) = G_LOAD %0 :: (load (p4) from %ir.const0, addrspace 4)
156    $sgpr0_sgpr1 = COPY %30
157
158...
159---
160
161name:            smrd_wide
162legalized:       true
163regBankSelected: true
164
165body: |
166  bb.0:
167    liveins: $sgpr0_sgpr1, $vgpr2_vgpr3
168    %0:sgpr(p4) = COPY $sgpr0_sgpr1
169    %1:sgpr(p1) = COPY $sgpr2_sgpr3
170
171    ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
172    ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
173    ; CHECK: s_load_dwordx8 [[CONSTANT_PTR]]
174    %2:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4)
175    $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2
176
177    ; CHECK: s_load_dwordx16 [[CONSTANT_PTR]]
178    %3:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), addrspace 4)
179    $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %3
180
181    ; CHECK: s_load_dwordx8 [[GLOBAL_PTR]]
182    %4:sgpr(<8 x s32>) = G_LOAD %1 :: (load (<8 x s32>), addrspace 1)
183    $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4
184
185    ; CHECK s_load_dwordx16 [[GLOBAL_PTR]]
186    %5:sgpr(<16 x s32>) = G_LOAD %1 :: (load (<16 x s32>), addrspace 1)
187    $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %5
188...
189
190
191# Test a load of an offset from a constant base address
192# GCN-LABEL: name: constant_address_positive{{$}}
193# GCN: %0:sreg_64 = S_MOV_B64 44
194
195# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load (s32), addrspace 4)
196# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load (s32), addrspace 4)
197
198---
199
200name:            constant_address_positive
201legalized:       true
202regBankSelected: true
203
204body: |
205  bb.0:
206    liveins: $sgpr0_sgpr1, $vgpr2_vgpr3
207    %0:sgpr(p4) = G_CONSTANT i64 44
208    %1:sgpr(s64) = G_CONSTANT i64 64
209    %2:sgpr(p4) = G_PTR_ADD %0, %1
210    %3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load (s32), align 4, addrspace 4)
211    S_ENDPGM 0, implicit %3
212...
213