1; RUN: opt -S -mtriple=amdgcn--  -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s
2
3; DESCRIPTION:
4;
5; There are three lds globals defined here, and these three lds are used respectively within
6; three non-kernel functions. There are three kernels, which call two of the non-kernel functions.
7; Hence pointer replacement should take place for all three lds, and pointer initialization within
8; kernel should selectively happen depending on which lds is reachable from the kernel.
9;
10
11; Original LDS should exist.
12; CHECK: @lds_used_within_function_1 = internal addrspace(3) global [1 x i32] undef, align 4
13; CHECK: @lds_used_within_function_2 = internal addrspace(3) global [2 x i32] undef, align 4
14; CHECK: @lds_used_within_function_3 = internal addrspace(3) global [3 x i32] undef, align 4
15@lds_used_within_function_1 = internal addrspace(3) global [1 x i32] undef, align 4
16@lds_used_within_function_2 = internal addrspace(3) global [2 x i32] undef, align 4
17@lds_used_within_function_3 = internal addrspace(3) global [3 x i32] undef, align 4
18
19; Pointers should be created.
20; CHECK: @lds_used_within_function_1.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
21; CHECK: @lds_used_within_function_2.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
22; CHECK: @lds_used_within_function_3.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
23
24; Pointer replacement code should be added.
25define internal void @function_3() {
26; CHECK-LABEL: entry:
27; CHECK:   %0 = load i16, i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
28; CHECK:   %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
29; CHECK:   %2 = bitcast i8 addrspace(3)* %1 to [3 x i32] addrspace(3)*
30; CHECK:   %gep = getelementptr inbounds [3 x i32], [3 x i32] addrspace(3)* %2, i32 0, i32 0
31; CHECK:   ret void
32entry:
33  %gep = getelementptr inbounds [3 x i32], [3 x i32] addrspace(3)* @lds_used_within_function_3, i32 0, i32 0
34  ret void
35}
36
37; Pointer replacement code should be added.
38define internal void @function_2() {
39; CHECK-LABEL: entry:
40; CHECK:   %0 = load i16, i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
41; CHECK:   %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
42; CHECK:   %2 = bitcast i8 addrspace(3)* %1 to [2 x i32] addrspace(3)*
43; CHECK:   %gep = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* %2, i32 0, i32 0
44; CHECK:   ret void
45entry:
46  %gep = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @lds_used_within_function_2, i32 0, i32 0
47  ret void
48}
49
50; Pointer replacement code should be added.
51define internal void @function_1() {
52; CHECK-LABEL: entry:
53; CHECK:   %0 = load i16, i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
54; CHECK:   %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
55; CHECK:   %2 = bitcast i8 addrspace(3)* %1 to [1 x i32] addrspace(3)*
56; CHECK:   %gep = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* %2, i32 0, i32 0
57; CHECK:   ret void
58entry:
59  %gep = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* @lds_used_within_function_1, i32 0, i32 0
60  ret void
61}
62
63; Pointer initialization code shoud be added
64define protected amdgpu_kernel void @kernel_calls_function_3_and_1() {
65; CHECK-LABEL: entry:
66; CHECK:   %0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
67; CHECK:   %1 = icmp eq i32 %0, 0
68; CHECK:   br i1 %1, label %2, label %3
69;
70; CHECK-LABEL: 2:
71; CHECK:   store i16 ptrtoint ([3 x i32] addrspace(3)* @lds_used_within_function_3 to i16), i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
72; CHECK:   store i16 ptrtoint ([1 x i32] addrspace(3)* @lds_used_within_function_1 to i16), i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
73; CHECK:   br label %3
74;
75; CHECK-LABEL: 3:
76; CHECK:   call void @llvm.amdgcn.wave.barrier()
77; CHECK:   call void @function_3()
78; CHECK:   call void @function_1()
79; CHECK:   ret void
80entry:
81  call void @function_3()
82  call void @function_1()
83  ret void
84}
85
86; Pointer initialization code shoud be added
87define protected amdgpu_kernel void @kernel_calls_function_2_and_3() {
88; CHECK-LABEL: entry:
89; CHECK:   %0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
90; CHECK:   %1 = icmp eq i32 %0, 0
91; CHECK:   br i1 %1, label %2, label %3
92;
93; CHECK-LABEL: 2:
94; CHECK:   store i16 ptrtoint ([3 x i32] addrspace(3)* @lds_used_within_function_3 to i16), i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2
95; CHECK:   store i16 ptrtoint ([2 x i32] addrspace(3)* @lds_used_within_function_2 to i16), i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
96; CHECK:   br label %3
97;
98; CHECK-LABEL: 3:
99; CHECK:   call void @llvm.amdgcn.wave.barrier()
100; CHECK:   call void @function_2()
101; CHECK:   call void @function_3()
102; CHECK:   ret void
103entry:
104  call void @function_2()
105  call void @function_3()
106  ret void
107}
108
109; Pointer initialization code shoud be added
110define protected amdgpu_kernel void @kernel_calls_function_1_and_2() {
111; CHECK-LABEL: entry:
112; CHECK:   %0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
113; CHECK:   %1 = icmp eq i32 %0, 0
114; CHECK:   br i1 %1, label %2, label %3
115;
116; CHECK-LABEL: 2:
117; CHECK:   store i16 ptrtoint ([2 x i32] addrspace(3)* @lds_used_within_function_2 to i16), i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2
118; CHECK:   store i16 ptrtoint ([1 x i32] addrspace(3)* @lds_used_within_function_1 to i16), i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2
119; CHECK:   br label %3
120;
121; CHECK-LABEL: 3:
122; CHECK:   call void @llvm.amdgcn.wave.barrier()
123; CHECK:   call void @function_1()
124; CHECK:   call void @function_2()
125; CHECK:   ret void
126entry:
127  call void @function_1()
128  call void @function_2()
129  ret void
130}
131