1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s 2 3; DESCRIPTION: 4; 5; There are three lds globals defined here, and these three lds are used respectively within 6; three non-kernel functions. There are three kernels, which call two of the non-kernel functions. 7; Hence pointer replacement should take place for all three lds, and pointer initialization within 8; kernel should selectively happen depending on which lds is reachable from the kernel. 9; 10 11; Original LDS should exist. 12; CHECK: @lds_used_within_function_1 = internal addrspace(3) global [1 x i32] undef, align 4 13; CHECK: @lds_used_within_function_2 = internal addrspace(3) global [2 x i32] undef, align 4 14; CHECK: @lds_used_within_function_3 = internal addrspace(3) global [3 x i32] undef, align 4 15@lds_used_within_function_1 = internal addrspace(3) global [1 x i32] undef, align 4 16@lds_used_within_function_2 = internal addrspace(3) global [2 x i32] undef, align 4 17@lds_used_within_function_3 = internal addrspace(3) global [3 x i32] undef, align 4 18 19; Pointers should be created. 20; CHECK: @lds_used_within_function_1.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 21; CHECK: @lds_used_within_function_2.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 22; CHECK: @lds_used_within_function_3.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 23 24; Pointer replacement code should be added. 25define internal void @function_3() { 26; CHECK-LABEL: entry: 27; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2 28; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0 29; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [3 x i32] addrspace(3)* 30; CHECK: %gep = getelementptr inbounds [3 x i32], [3 x i32] addrspace(3)* %2, i32 0, i32 0 31; CHECK: ret void 32entry: 33 %gep = getelementptr inbounds [3 x i32], [3 x i32] addrspace(3)* @lds_used_within_function_3, i32 0, i32 0 34 ret void 35} 36 37; Pointer replacement code should be added. 38define internal void @function_2() { 39; CHECK-LABEL: entry: 40; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2 41; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0 42; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [2 x i32] addrspace(3)* 43; CHECK: %gep = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* %2, i32 0, i32 0 44; CHECK: ret void 45entry: 46 %gep = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @lds_used_within_function_2, i32 0, i32 0 47 ret void 48} 49 50; Pointer replacement code should be added. 51define internal void @function_1() { 52; CHECK-LABEL: entry: 53; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2 54; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0 55; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [1 x i32] addrspace(3)* 56; CHECK: %gep = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* %2, i32 0, i32 0 57; CHECK: ret void 58entry: 59 %gep = getelementptr inbounds [1 x i32], [1 x i32] addrspace(3)* @lds_used_within_function_1, i32 0, i32 0 60 ret void 61} 62 63; Pointer initialization code shoud be added 64define protected amdgpu_kernel void @kernel_calls_function_3_and_1() { 65; CHECK-LABEL: entry: 66; CHECK: %0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) 67; CHECK: %1 = icmp eq i32 %0, 0 68; CHECK: br i1 %1, label %2, label %3 69; 70; CHECK-LABEL: 2: 71; CHECK: store i16 ptrtoint ([3 x i32] addrspace(3)* @lds_used_within_function_3 to i16), i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2 72; CHECK: store i16 ptrtoint ([1 x i32] addrspace(3)* @lds_used_within_function_1 to i16), i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2 73; CHECK: br label %3 74; 75; CHECK-LABEL: 3: 76; CHECK: call void @llvm.amdgcn.wave.barrier() 77; CHECK: call void @function_3() 78; CHECK: call void @function_1() 79; CHECK: ret void 80entry: 81 call void @function_3() 82 call void @function_1() 83 ret void 84} 85 86; Pointer initialization code shoud be added 87define protected amdgpu_kernel void @kernel_calls_function_2_and_3() { 88; CHECK-LABEL: entry: 89; CHECK: %0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) 90; CHECK: %1 = icmp eq i32 %0, 0 91; CHECK: br i1 %1, label %2, label %3 92; 93; CHECK-LABEL: 2: 94; CHECK: store i16 ptrtoint ([3 x i32] addrspace(3)* @lds_used_within_function_3 to i16), i16 addrspace(3)* @lds_used_within_function_3.ptr, align 2 95; CHECK: store i16 ptrtoint ([2 x i32] addrspace(3)* @lds_used_within_function_2 to i16), i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2 96; CHECK: br label %3 97; 98; CHECK-LABEL: 3: 99; CHECK: call void @llvm.amdgcn.wave.barrier() 100; CHECK: call void @function_2() 101; CHECK: call void @function_3() 102; CHECK: ret void 103entry: 104 call void @function_2() 105 call void @function_3() 106 ret void 107} 108 109; Pointer initialization code shoud be added 110define protected amdgpu_kernel void @kernel_calls_function_1_and_2() { 111; CHECK-LABEL: entry: 112; CHECK: %0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) 113; CHECK: %1 = icmp eq i32 %0, 0 114; CHECK: br i1 %1, label %2, label %3 115; 116; CHECK-LABEL: 2: 117; CHECK: store i16 ptrtoint ([2 x i32] addrspace(3)* @lds_used_within_function_2 to i16), i16 addrspace(3)* @lds_used_within_function_2.ptr, align 2 118; CHECK: store i16 ptrtoint ([1 x i32] addrspace(3)* @lds_used_within_function_1 to i16), i16 addrspace(3)* @lds_used_within_function_1.ptr, align 2 119; CHECK: br label %3 120; 121; CHECK-LABEL: 3: 122; CHECK: call void @llvm.amdgcn.wave.barrier() 123; CHECK: call void @function_1() 124; CHECK: call void @function_2() 125; CHECK: ret void 126entry: 127 call void @function_1() 128 call void @function_2() 129 ret void 130} 131