1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "polly/CodeGen/PerfMonitor.h"
12 #include "polly/CodeGen/RuntimeDebugBuilder.h"
13 #include "polly/ScopInfo.h"
14 #include "llvm/ADT/Triple.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/IR/IntrinsicsX86.h"
17 
18 using namespace llvm;
19 using namespace polly;
20 
getAtExit()21 Function *PerfMonitor::getAtExit() {
22   const char *Name = "atexit";
23   Function *F = M->getFunction(Name);
24 
25   if (!F) {
26     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
27     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
28                                          {Builder.getInt8PtrTy()}, false);
29     F = Function::Create(Ty, Linkage, Name, M);
30   }
31 
32   return F;
33 }
34 
addToGlobalConstructors(Function * Fn)35 void PerfMonitor::addToGlobalConstructors(Function *Fn) {
36   const char *Name = "llvm.global_ctors";
37   GlobalVariable *GV = M->getGlobalVariable(Name);
38   std::vector<Constant *> V;
39 
40   if (GV) {
41     Constant *Array = GV->getInitializer();
42     for (Value *X : Array->operand_values())
43       V.push_back(cast<Constant>(X));
44     GV->eraseFromParent();
45   }
46 
47   StructType *ST = StructType::get(Builder.getInt32Ty(), Fn->getType(),
48                                    Builder.getInt8PtrTy());
49 
50   V.push_back(
51       ConstantStruct::get(ST, Builder.getInt32(10), Fn,
52                           ConstantPointerNull::get(Builder.getInt8PtrTy())));
53   ArrayType *Ty = ArrayType::get(ST, V.size());
54 
55   GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,
56                           ConstantArray::get(Ty, V), Name, nullptr,
57                           GlobalVariable::NotThreadLocal);
58 }
59 
getRDTSCP()60 Function *PerfMonitor::getRDTSCP() {
61   return Intrinsic::getDeclaration(M, Intrinsic::x86_rdtscp);
62 }
63 
PerfMonitor(const Scop & S,Module * M)64 PerfMonitor::PerfMonitor(const Scop &S, Module *M)
65     : M(M), Builder(M->getContext()), S(S) {
66   if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
67     Supported = true;
68   else
69     Supported = false;
70 }
71 
TryRegisterGlobal(Module * M,const char * Name,Constant * InitialValue,Value ** Location)72 static void TryRegisterGlobal(Module *M, const char *Name,
73                               Constant *InitialValue, Value **Location) {
74   *Location = M->getGlobalVariable(Name);
75 
76   if (!*Location)
77     *Location = new GlobalVariable(
78         *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
79         InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
80 }
81 
82 // Generate a unique name that is usable as a LLVM name for a scop to name its
83 // performance counter.
GetScopUniqueVarname(const Scop & S)84 static std::string GetScopUniqueVarname(const Scop &S) {
85   std::string EntryString, ExitString;
86   std::tie(EntryString, ExitString) = S.getEntryExitStr();
87 
88   return (Twine("__polly_perf_in_") + S.getFunction().getName() + "_from__" +
89           EntryString + "__to__" + ExitString)
90       .str();
91 }
92 
addScopCounter()93 void PerfMonitor::addScopCounter() {
94   const std::string varname = GetScopUniqueVarname(S);
95   TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
96                     &CyclesInCurrentScopPtr);
97 
98   TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
99                     &TripCountForCurrentScopPtr);
100 }
101 
addGlobalVariables()102 void PerfMonitor::addGlobalVariables() {
103   TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0),
104                     &CyclesTotalStartPtr);
105 
106   TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(0),
107                     &AlreadyInitializedPtr);
108 
109   TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0),
110                     &CyclesInScopsPtr);
111 
112   TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
113                     &CyclesInScopStartPtr);
114 }
115 
116 static const char *InitFunctionName = "__polly_perf_init";
117 static const char *FinalReportingFunctionName = "__polly_perf_final";
118 
119 static BasicBlock *FinalStartBB = nullptr;
120 static ReturnInst *ReturnFromFinal = nullptr;
121 
insertFinalReporting()122 Function *PerfMonitor::insertFinalReporting() {
123   // Create new function.
124   GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
125   FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
126   Function *ExitFn =
127       Function::Create(Ty, Linkage, FinalReportingFunctionName, M);
128   FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn);
129   Builder.SetInsertPoint(FinalStartBB);
130 
131   if (!Supported) {
132     RuntimeDebugBuilder::createCPUPrinter(
133         Builder, "Polly runtime information generation not supported\n");
134     Builder.CreateRetVoid();
135     return ExitFn;
136   }
137 
138   // Measure current cycles and compute final timings.
139   Function *RDTSCPFn = getRDTSCP();
140 
141   Type *Int64Ty = Builder.getInt64Ty();
142   Value *CurrentCycles =
143       Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
144   Value *CyclesStart = Builder.CreateLoad(Int64Ty, CyclesTotalStartPtr, true);
145   Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart);
146   Value *CyclesInScops = Builder.CreateLoad(Int64Ty, CyclesInScopsPtr, true);
147 
148   // Print the runtime information.
149   RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n");
150   RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n");
151   RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");
152   RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops,
153                                         "\n");
154 
155   // Print the preamble for per-scop information.
156   RuntimeDebugBuilder::createCPUPrinter(Builder, "\n");
157   RuntimeDebugBuilder::createCPUPrinter(Builder, "Per SCoP information\n");
158   RuntimeDebugBuilder::createCPUPrinter(Builder, "--------------------\n");
159 
160   RuntimeDebugBuilder::createCPUPrinter(
161       Builder, "scop function, "
162                "entry block name, exit block name, total time, trip count\n");
163   ReturnFromFinal = Builder.CreateRetVoid();
164   return ExitFn;
165 }
166 
AppendScopReporting()167 void PerfMonitor::AppendScopReporting() {
168   if (!Supported)
169     return;
170 
171   assert(FinalStartBB && "Expected FinalStartBB to be initialized by "
172                          "PerfMonitor::insertFinalReporting.");
173   assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by "
174                             "PerfMonitor::insertFinalReporting.");
175 
176   Builder.SetInsertPoint(FinalStartBB);
177   ReturnFromFinal->eraseFromParent();
178 
179   Type *Int64Ty = Builder.getInt64Ty();
180   Value *CyclesInCurrentScop =
181       Builder.CreateLoad(Int64Ty, this->CyclesInCurrentScopPtr, true);
182 
183   Value *TripCountForCurrentScop =
184       Builder.CreateLoad(Int64Ty, this->TripCountForCurrentScopPtr, true);
185 
186   std::string EntryName, ExitName;
187   std::tie(EntryName, ExitName) = S.getEntryExitStr();
188 
189   // print in CSV for easy parsing with other tools.
190   RuntimeDebugBuilder::createCPUPrinter(
191       Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
192       CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
193 
194   ReturnFromFinal = Builder.CreateRetVoid();
195 }
196 
197 static Function *FinalReporting = nullptr;
198 
initialize()199 void PerfMonitor::initialize() {
200   addGlobalVariables();
201   addScopCounter();
202 
203   // Ensure that we only add the final reporting function once.
204   // On later invocations, append to the reporting function.
205   if (!FinalReporting) {
206     FinalReporting = insertFinalReporting();
207 
208     Function *InitFn = insertInitFunction(FinalReporting);
209     addToGlobalConstructors(InitFn);
210   }
211 
212   AppendScopReporting();
213 }
214 
insertInitFunction(Function * FinalReporting)215 Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
216   // Insert function definition and BBs.
217   GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
218   FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
219   Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M);
220   BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn);
221   BasicBlock *EarlyReturn =
222       BasicBlock::Create(M->getContext(), "earlyreturn", InitFn);
223   BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn);
224 
225   Builder.SetInsertPoint(Start);
226 
227   // Check if this function was already run. If yes, return.
228   //
229   // In case profiling has been enabled in multiple translation units, the
230   // initializer function will be added to the global constructors list of
231   // each translation unit. When merging translation units, the global
232   // constructor lists are just appended, such that the initializer will appear
233   // multiple times. To avoid initializations being run multiple times (and
234   // especially to avoid that atExitFn is called more than once), we bail
235   // out if the initializer is run more than once.
236   Value *HasRunBefore =
237       Builder.CreateLoad(Builder.getInt1Ty(), AlreadyInitializedPtr);
238   Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB);
239   Builder.SetInsertPoint(EarlyReturn);
240   Builder.CreateRetVoid();
241 
242   // Keep track that this function has been run once.
243   Builder.SetInsertPoint(InitBB);
244   Value *True = Builder.getInt1(true);
245   Builder.CreateStore(True, AlreadyInitializedPtr);
246 
247   // Register the final reporting function with atexit().
248   Value *FinalReportingPtr =
249       Builder.CreatePointerCast(FinalReporting, Builder.getInt8PtrTy());
250   Function *AtExitFn = getAtExit();
251   Builder.CreateCall(AtExitFn, {FinalReportingPtr});
252 
253   if (Supported) {
254     // Read the currently cycle counter and store the result for later.
255     Function *RDTSCPFn = getRDTSCP();
256     Value *CurrentCycles =
257         Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
258     Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true);
259   }
260   Builder.CreateRetVoid();
261 
262   return InitFn;
263 }
264 
insertRegionStart(Instruction * InsertBefore)265 void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
266   if (!Supported)
267     return;
268 
269   Builder.SetInsertPoint(InsertBefore);
270   Function *RDTSCPFn = getRDTSCP();
271   Value *CurrentCycles =
272       Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
273   Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true);
274 }
275 
insertRegionEnd(Instruction * InsertBefore)276 void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
277   if (!Supported)
278     return;
279 
280   Builder.SetInsertPoint(InsertBefore);
281   Function *RDTSCPFn = getRDTSCP();
282   Type *Int64Ty = Builder.getInt64Ty();
283   LoadInst *CyclesStart =
284       Builder.CreateLoad(Int64Ty, CyclesInScopStartPtr, true);
285   Value *CurrentCycles =
286       Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
287   Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart);
288   Value *CyclesInScops = Builder.CreateLoad(Int64Ty, CyclesInScopsPtr, true);
289   CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
290   Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true);
291 
292   Value *CyclesInCurrentScop =
293       Builder.CreateLoad(Int64Ty, CyclesInCurrentScopPtr, true);
294   CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
295   Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
296 
297   Value *TripCountForCurrentScop =
298       Builder.CreateLoad(Int64Ty, TripCountForCurrentScopPtr, true);
299   TripCountForCurrentScop =
300       Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
301   Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
302                       true);
303 }
304