1# RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,VI %s
2# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
3
4# If there's a base offset, check that SILoadStoreOptimizer creates
5# V_ADD_{I|U}32_e64 for that offset; _e64 uses a vreg for the carry (rather than
6# $vcc, which is used in _e32); this ensures that $vcc is not inadvertently
7# clobbered.
8
9# GCN-LABEL: name: ds_combine_base_offset{{$}}
10
11# VI: V_ADD_CO_U32_e64 %6, %0,
12# VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8,
13# VI: V_ADD_CO_U32_e64 %10, %3,
14# VI-NEXT: DS_READ2_B32 killed %11, 0, 8,
15
16# GFX9: V_ADD_U32_e64 %6, %0,
17# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0, %3, 0, 8,
18# GFX9: V_ADD_U32_e64 %9, %3,
19# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8,
20
21--- |
22  @0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4
23
24  define amdgpu_kernel void @ds_combine_base_offset() {
25    bb.0:
26      br label %bb2
27
28    bb1:
29      ret void
30
31    bb2:
32      %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
33      %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
34      %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
35      %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
36      br label %bb1
37    }
38
39  define amdgpu_kernel void @ds_combine_base_offset_subreg() {
40    bb.0:
41      br label %bb2
42
43    bb1:
44      ret void
45
46    bb2:
47      %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
48      %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
49      %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
50      %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
51      br label %bb1
52    }
53
54  define amdgpu_kernel void @ds_combine_subreg() {
55    bb.0:
56      br label %bb2
57
58    bb1:
59      ret void
60
61    bb2:
62      %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
63      %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
64      %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
65      %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
66      br label %bb1
67    }
68---
69name:            ds_combine_base_offset
70body:             |
71  bb.0:
72    %0:vgpr_32 = IMPLICIT_DEF
73    S_BRANCH %bb.2
74
75  bb.1:
76    S_ENDPGM 0
77
78  bb.2:
79    %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec
80    %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
81    V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
82    DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
83    %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
84    DS_WRITE_B32 %0, %3, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
85    %4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
86    %5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
87    $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
88    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
89    S_BRANCH %bb.1
90...
91
92# GCN-LABEL: name: ds_combine_base_offset_subreg{{$}}
93
94# VI: V_ADD_CO_U32_e64 %6, %0.sub0,
95# VI-NEXT: DS_WRITE2_B32 killed %7, %0.sub0, %3.sub0, 0, 8,
96# VI: V_ADD_CO_U32_e64 %10, %3.sub0,
97# VI-NEXT: DS_READ2_B32 killed %11, 0, 8,
98
99# GFX9: V_ADD_U32_e64 %6, %0.sub0,
100# GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0.sub0, %3.sub0, 0, 8,
101# GFX9: V_ADD_U32_e64 %9, %3.sub0,
102# GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8,
103---
104name:            ds_combine_base_offset_subreg
105body:             |
106  bb.0:
107    %0:vreg_64 = IMPLICIT_DEF
108    S_BRANCH %bb.2
109
110  bb.1:
111    S_ENDPGM 0
112
113  bb.2:
114    %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
115    %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
116    V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
117    DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
118    %3:vreg_64 = V_LSHLREV_B64_e64 0, 0, implicit $exec
119    DS_WRITE_B32 %0.sub0, %3.sub0, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
120    %4:vgpr_32 = DS_READ_B32 %3.sub0, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
121    %5:vgpr_32 = DS_READ_B32 %3.sub0, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
122    $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
123    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
124    S_BRANCH %bb.1
125...
126
127# GCN-LABEL: name: ds_combine_subreg{{$}}
128
129# VI: DS_WRITE2_B32 %0.sub0, %0.sub0, %3.sub0, 0, 8,
130# VI: DS_READ2_B32 %3.sub0, 0, 8,
131
132# GFX9: DS_WRITE2_B32_gfx9 %0.sub0, %0.sub0, %3.sub0, 0, 8,
133# GFX9: DS_READ2_B32_gfx9 %3.sub0, 0, 8,
134---
135name:            ds_combine_subreg
136body:             |
137  bb.0:
138    %0:vreg_64 = IMPLICIT_DEF
139    S_BRANCH %bb.2
140
141  bb.1:
142    S_ENDPGM 0
143
144  bb.2:
145    %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
146    %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
147    V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
148    DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
149    %3:vreg_64 = V_LSHLREV_B64_e64 0, 0, implicit $exec
150    DS_WRITE_B32 %0.sub0, %3.sub0, 32, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
151    %4:vgpr_32 = DS_READ_B32 %3.sub0, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
152    %5:vgpr_32 = DS_READ_B32 %3.sub0, 32, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
153    $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
154    S_CBRANCH_VCCNZ %bb.1, implicit $vcc
155    S_BRANCH %bb.1
156...
157