1; RUN: llc -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
4
5; Test addressing modes when the scratch base is not a frame index.
6
7; GCN-LABEL: {{^}}store_private_offset_i8:
8; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:8
9define amdgpu_kernel void @store_private_offset_i8() #0 {
10  store volatile i8 5, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
11  ret void
12}
13
14; GCN-LABEL: {{^}}store_private_offset_i16:
15; GCN: buffer_store_short v{{[0-9]+}}, off, s[4:7], s2 offset:8
16define amdgpu_kernel void @store_private_offset_i16() #0 {
17  store volatile i16 5, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
18  ret void
19}
20
21; GCN-LABEL: {{^}}store_private_offset_i32:
22; GCN: buffer_store_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
23define amdgpu_kernel void @store_private_offset_i32() #0 {
24  store volatile i32 5, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*)
25  ret void
26}
27
28; GCN-LABEL: {{^}}store_private_offset_v2i32:
29; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
30define amdgpu_kernel void @store_private_offset_v2i32() #0 {
31  store volatile <2 x i32> <i32 5, i32 10>, <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*)
32  ret void
33}
34
35; GCN-LABEL: {{^}}store_private_offset_v4i32:
36; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
37define amdgpu_kernel void @store_private_offset_v4i32() #0 {
38  store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*)
39  ret void
40}
41
42; GCN-LABEL: {{^}}load_private_offset_i8:
43; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s2 offset:8
44define amdgpu_kernel void @load_private_offset_i8() #0 {
45  %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
46  ret void
47}
48
49; GCN-LABEL: {{^}}sextload_private_offset_i8:
50; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
51define amdgpu_kernel void @sextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
52  %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
53  %sextload = sext i8 %load to i32
54  store i32 %sextload, i32 addrspace(1)* undef
55  ret void
56}
57
58; GCN-LABEL: {{^}}zextload_private_offset_i8:
59; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[4:7], s8 offset:8
60define amdgpu_kernel void @zextload_private_offset_i8(i32 addrspace(1)* %out) #0 {
61  %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 8 to i8 addrspace(5)*)
62  %zextload = zext i8 %load to i32
63  store i32 %zextload, i32 addrspace(1)* undef
64  ret void
65}
66
67; GCN-LABEL: {{^}}load_private_offset_i16:
68; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s2 offset:8
69define amdgpu_kernel void @load_private_offset_i16() #0 {
70  %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
71  ret void
72}
73
74; GCN-LABEL: {{^}}sextload_private_offset_i16:
75; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[4:7], s8 offset:8
76define amdgpu_kernel void @sextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
77  %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
78  %sextload = sext i16 %load to i32
79  store i32 %sextload, i32 addrspace(1)* undef
80  ret void
81}
82
83; GCN-LABEL: {{^}}zextload_private_offset_i16:
84; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[4:7], s8 offset:8
85define amdgpu_kernel void @zextload_private_offset_i16(i32 addrspace(1)* %out) #0 {
86  %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 8 to i16 addrspace(5)*)
87  %zextload = zext i16 %load to i32
88  store i32 %zextload, i32 addrspace(1)* undef
89  ret void
90}
91
92; GCN-LABEL: {{^}}load_private_offset_i32:
93; GCN: buffer_load_dword v{{[0-9]+}}, off, s[4:7], s2 offset:8
94define amdgpu_kernel void @load_private_offset_i32() #0 {
95  %load = load volatile i32, i32 addrspace(5)* inttoptr (i32 8 to i32 addrspace(5)*)
96  ret void
97}
98
99; GCN-LABEL: {{^}}load_private_offset_v2i32:
100; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
101define amdgpu_kernel void @load_private_offset_v2i32() #0 {
102  %load = load volatile <2 x i32>, <2 x i32> addrspace(5)* inttoptr (i32 8 to <2 x i32> addrspace(5)*)
103  ret void
104}
105
106; GCN-LABEL: {{^}}load_private_offset_v4i32:
107; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[4:7], s2 offset:8
108define amdgpu_kernel void @load_private_offset_v4i32() #0 {
109  %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* inttoptr (i32 8 to <4 x i32> addrspace(5)*)
110  ret void
111}
112
113; GCN-LABEL: {{^}}store_private_offset_i8_max_offset:
114; GCN: buffer_store_byte v{{[0-9]+}}, off, s[4:7], s2 offset:4095
115define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 {
116  store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4095 to i8 addrspace(5)*)
117  ret void
118}
119
120; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1:
121; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
122; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen{{$}}
123define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 {
124  store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4096 to i8 addrspace(5)*)
125  ret void
126}
127
128; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2:
129; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000
130; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[4:7], s2 offen offset:1{{$}}
131define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 {
132  store volatile i8 5, i8 addrspace(5)* inttoptr (i32 4097 to i8 addrspace(5)*)
133  ret void
134}
135
136; MUBUF used for stack access has bounds checking enabled before gfx9,
137; so a possibly negative base index can't be used for the vgpr offset.
138
139; GCN-LABEL: {{^}}store_private_unknown_bits_vaddr:
140; SICIVI: v_add_{{i|u}}32_e32 [[ADDR0:v[0-9]+]], vcc, 4
141; SICIVI: v_add_{{i|u}}32_e32 [[ADDR1:v[0-9]+]], vcc, 32, [[ADDR0]]
142; SICIVI: buffer_store_dword v{{[0-9]+}}, [[ADDR1]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
143
144; GFX9: v_add_u32_e32 [[ADDR:v[0-9]+]], 4,
145; GFX9: buffer_store_dword v{{[0-9]+}}, [[ADDR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:32
146define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 {
147  %alloca = alloca [16 x i32], align 4, addrspace(5)
148  %vaddr = load volatile i32, i32 addrspace(1)* undef
149  %vaddr.off = add i32 %vaddr, 8
150  %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %vaddr.off
151  store volatile i32 9, i32 addrspace(5)* %gep
152  ret void
153}
154
155attributes #0 = { nounwind }
156