1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
2; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine -amdgpu-promote-alloca-to-vector-limit=32 < %s | FileCheck -check-prefix=LIMIT32 %s
3
4target datalayout = "A5"
5
6; OPT-LABEL: @alloca_8xi64_max1024(
7; OPT-NOT: alloca
8; OPT: <8 x i64>
9; LIMIT32: alloca
10; LIMIT32-NOT: <8 x i64>
11define amdgpu_kernel void @alloca_8xi64_max1024(i64 addrspace(1)* %out, i32 %index) #0 {
12entry:
13  %tmp = alloca [8 x i64], addrspace(5)
14  %x = getelementptr [8 x i64], [8 x i64] addrspace(5)* %tmp, i32 0, i32 0
15  store i64 0, i64 addrspace(5)* %x
16  %tmp1 = getelementptr [8 x i64], [8 x i64] addrspace(5)* %tmp, i32 0, i32 %index
17  %tmp2 = load i64, i64 addrspace(5)* %tmp1
18  store i64 %tmp2, i64 addrspace(1)* %out
19  ret void
20}
21
22; OPT-LABEL: @alloca_9xi64_max1024(
23; OPT: alloca [9 x i64]
24; OPT-NOT: <9 x i64>
25; LIMIT32: alloca
26; LIMIT32-NOT: <9 x i64>
27define amdgpu_kernel void @alloca_9xi64_max1024(i64 addrspace(1)* %out, i32 %index) #0 {
28entry:
29  %tmp = alloca [9 x i64], addrspace(5)
30  %x = getelementptr [9 x i64], [9 x i64] addrspace(5)* %tmp, i32 0, i32 0
31  store i64 0, i64 addrspace(5)* %x
32  %tmp1 = getelementptr [9 x i64], [9 x i64] addrspace(5)* %tmp, i32 0, i32 %index
33  %tmp2 = load i64, i64 addrspace(5)* %tmp1
34  store i64 %tmp2, i64 addrspace(1)* %out
35  ret void
36}
37
38; OPT-LABEL: @alloca_16xi64_max512(
39; OPT-NOT: alloca
40; OPT: <16 x i64>
41; LIMIT32: alloca
42; LIMIT32-NOT: <16 x i64>
43define amdgpu_kernel void @alloca_16xi64_max512(i64 addrspace(1)* %out, i32 %index) #1 {
44entry:
45  %tmp = alloca [16 x i64], addrspace(5)
46  %x = getelementptr [16 x i64], [16 x i64] addrspace(5)* %tmp, i32 0, i32 0
47  store i64 0, i64 addrspace(5)* %x
48  %tmp1 = getelementptr [16 x i64], [16 x i64] addrspace(5)* %tmp, i32 0, i32 %index
49  %tmp2 = load i64, i64 addrspace(5)* %tmp1
50  store i64 %tmp2, i64 addrspace(1)* %out
51  ret void
52}
53
54; OPT-LABEL: @alloca_17xi64_max512(
55; OPT: alloca [17 x i64]
56; OPT-NOT: <17 x i64>
57; LIMIT32: alloca
58; LIMIT32-NOT: <17 x i64>
59define amdgpu_kernel void @alloca_17xi64_max512(i64 addrspace(1)* %out, i32 %index) #1 {
60entry:
61  %tmp = alloca [17 x i64], addrspace(5)
62  %x = getelementptr [17 x i64], [17 x i64] addrspace(5)* %tmp, i32 0, i32 0
63  store i64 0, i64 addrspace(5)* %x
64  %tmp1 = getelementptr [17 x i64], [17 x i64] addrspace(5)* %tmp, i32 0, i32 %index
65  %tmp2 = load i64, i64 addrspace(5)* %tmp1
66  store i64 %tmp2, i64 addrspace(1)* %out
67  ret void
68}
69
70; OPT-LABEL: @alloca_9xi128_max512(
71; OPT: alloca [9 x i128]
72; OPT-NOT: <9 x i128>
73; LIMIT32: alloca
74; LIMIT32-NOT: <9 x i128>
75define amdgpu_kernel void @alloca_9xi128_max512(i128 addrspace(1)* %out, i32 %index) #1 {
76entry:
77  %tmp = alloca [9 x i128], addrspace(5)
78  %x = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 0
79  store i128 0, i128 addrspace(5)* %x
80  %tmp1 = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 %index
81  %tmp2 = load i128, i128 addrspace(5)* %tmp1
82  store i128 %tmp2, i128 addrspace(1)* %out
83  ret void
84}
85
86; OPT-LABEL: @alloca_9xi128_max256(
87; OPT-NOT: alloca
88; OPT: <9 x i128>
89; LIMIT32: alloca
90; LIMIT32-NOT: <9 x i128>
91define amdgpu_kernel void @alloca_9xi128_max256(i128 addrspace(1)* %out, i32 %index) #2 {
92entry:
93  %tmp = alloca [9 x i128], addrspace(5)
94  %x = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 0
95  store i128 0, i128 addrspace(5)* %x
96  %tmp1 = getelementptr [9 x i128], [9 x i128] addrspace(5)* %tmp, i32 0, i32 %index
97  %tmp2 = load i128, i128 addrspace(5)* %tmp1
98  store i128 %tmp2, i128 addrspace(1)* %out
99  ret void
100}
101
102; OPT-LABEL: @alloca_16xi128_max256(
103; OPT-NOT: alloca
104; OPT: <16 x i128>
105; LIMIT32: alloca
106; LIMIT32-NOT: <16 x i128>
107define amdgpu_kernel void @alloca_16xi128_max256(i128 addrspace(1)* %out, i32 %index) #2 {
108entry:
109  %tmp = alloca [16 x i128], addrspace(5)
110  %x = getelementptr [16 x i128], [16 x i128] addrspace(5)* %tmp, i32 0, i32 0
111  store i128 0, i128 addrspace(5)* %x
112  %tmp1 = getelementptr [16 x i128], [16 x i128] addrspace(5)* %tmp, i32 0, i32 %index
113  %tmp2 = load i128, i128 addrspace(5)* %tmp1
114  store i128 %tmp2, i128 addrspace(1)* %out
115  ret void
116}
117
118; OPT-LABEL: @alloca_9xi256_max256(
119; OPT: alloca [9 x i256]
120; OPT-NOT: <9 x i256>
121; LIMIT32: alloca
122; LIMIT32-NOT: <9 x i256>
123define amdgpu_kernel void @alloca_9xi256_max256(i256 addrspace(1)* %out, i32 %index) #2 {
124entry:
125  %tmp = alloca [9 x i256], addrspace(5)
126  %x = getelementptr [9 x i256], [9 x i256] addrspace(5)* %tmp, i32 0, i32 0
127  store i256 0, i256 addrspace(5)* %x
128  %tmp1 = getelementptr [9 x i256], [9 x i256] addrspace(5)* %tmp, i32 0, i32 %index
129  %tmp2 = load i256, i256 addrspace(5)* %tmp1
130  store i256 %tmp2, i256 addrspace(1)* %out
131  ret void
132}
133
134attributes #0 = { "amdgpu-flat-work-group-size"="1,1024" }
135attributes #1 = { "amdgpu-flat-work-group-size"="1,512" }
136attributes #2 = { "amdgpu-flat-work-group-size"="1,256" }
137