1 //===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of the basic block sections profile reader pass. It parses
10 // and stores the basic block sections profile file (which is specified via the
11 // `-basic-block-sections` flag).
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/CodeGen/Passes.h"
22 #include "llvm/IR/DebugInfoMetadata.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/LineIterator.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Path.h"
27 #include <llvm/ADT/STLExtras.h>
28 
29 using namespace llvm;
30 
31 char BasicBlockSectionsProfileReader::ID = 0;
32 INITIALIZE_PASS(BasicBlockSectionsProfileReader, "bbsections-profile-reader",
33                 "Reads and parses a basic block sections profile.", false,
34                 false)
35 
36 bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
37   return getBBClusterInfoForFunction(FuncName).first;
38 }
39 
40 std::pair<bool, SmallVector<BBClusterInfo>>
41 BasicBlockSectionsProfileReader::getBBClusterInfoForFunction(
42     StringRef FuncName) const {
43   auto R = ProgramBBClusterInfo.find(getAliasName(FuncName));
44   return R != ProgramBBClusterInfo.end()
45              ? std::pair(true, R->second)
46              : std::pair(false, SmallVector<BBClusterInfo>{});
47 }
48 
49 // Basic Block Sections can be enabled for a subset of machine basic blocks.
50 // This is done by passing a file containing names of functions for which basic
51 // block sections are desired.  Additionally, machine basic block ids of the
52 // functions can also be specified for a finer granularity. Moreover, a cluster
53 // of basic blocks could be assigned to the same section.
54 // Optionally, a debug-info filename can be specified for each function to allow
55 // distinguishing internal-linkage functions of the same name.
56 // A file with basic block sections for all of function main and three blocks
57 // for function foo (of which 1 and 2 are placed in a cluster) looks like this:
58 // (Profile for function foo is only loaded when its debug-info filename
59 // matches 'path/to/foo_file.cc').
60 // ----------------------------
61 // list.txt:
62 // !main
63 // !foo M=path/to/foo_file.cc
64 // !!1 2
65 // !!4
66 Error BasicBlockSectionsProfileReader::ReadProfile() {
67   assert(MBuf);
68   line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
69 
70   auto invalidProfileError = [&](auto Message) {
71     return make_error<StringError>(
72         Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " +
73               Twine(LineIt.line_number()) + ": " + Message),
74         inconvertibleErrorCode());
75   };
76 
77   auto FI = ProgramBBClusterInfo.end();
78 
79   // Current cluster ID corresponding to this function.
80   unsigned CurrentCluster = 0;
81   // Current position in the current cluster.
82   unsigned CurrentPosition = 0;
83 
84   // Temporary set to ensure every basic block ID appears once in the clusters
85   // of a function.
86   SmallSet<unsigned, 4> FuncBBIDs;
87 
88   for (; !LineIt.is_at_eof(); ++LineIt) {
89     StringRef S(*LineIt);
90     if (S[0] == '@')
91       continue;
92     // Check for the leading "!"
93     if (!S.consume_front("!") || S.empty())
94       break;
95     // Check for second "!" which indicates a cluster of basic blocks.
96     if (S.consume_front("!")) {
97       // Skip the profile when we the profile iterator (FI) refers to the
98       // past-the-end element.
99       if (FI == ProgramBBClusterInfo.end())
100         continue;
101       SmallVector<StringRef, 4> BBIDs;
102       S.split(BBIDs, ' ');
103       // Reset current cluster position.
104       CurrentPosition = 0;
105       for (auto BBIDStr : BBIDs) {
106         unsigned long long BBID;
107         if (getAsUnsignedInteger(BBIDStr, 10, BBID))
108           return invalidProfileError(Twine("Unsigned integer expected: '") +
109                                      BBIDStr + "'.");
110         if (!FuncBBIDs.insert(BBID).second)
111           return invalidProfileError(Twine("Duplicate basic block id found '") +
112                                      BBIDStr + "'.");
113         if (BBID == 0 && CurrentPosition)
114           return invalidProfileError("Entry BB (0) does not begin a cluster.");
115 
116         FI->second.emplace_back(
117             BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++});
118       }
119       CurrentCluster++;
120     } else {
121       // This is a function name specifier. It may include a debug info filename
122       // specifier starting with `M=`.
123       auto [AliasesStr, DIFilenameStr] = S.split(' ');
124       SmallString<128> DIFilename;
125       if (DIFilenameStr.startswith("M=")) {
126         DIFilename =
127             sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
128         if (DIFilename.empty())
129           return invalidProfileError("Empty module name specifier.");
130       } else if (!DIFilenameStr.empty()) {
131         return invalidProfileError("Unknown string found: '" + DIFilenameStr +
132                                    "'.");
133       }
134       // Function aliases are separated using '/'. We use the first function
135       // name for the cluster info mapping and delegate all other aliases to
136       // this one.
137       SmallVector<StringRef, 4> Aliases;
138       AliasesStr.split(Aliases, '/');
139       bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
140         auto It = FunctionNameToDIFilename.find(Alias);
141         // No match if this function name is not found in this module.
142         if (It == FunctionNameToDIFilename.end())
143           return false;
144         // Return a match if debug-info-filename is not specified. Otherwise,
145         // check for equality.
146         return DIFilename.empty() || It->second.equals(DIFilename);
147       });
148       if (!FunctionFound) {
149         // Skip the following profile by setting the profile iterator (FI) to
150         // the past-the-end element.
151         FI = ProgramBBClusterInfo.end();
152         continue;
153       }
154       for (size_t i = 1; i < Aliases.size(); ++i)
155         FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
156 
157       // Prepare for parsing clusters of this function name.
158       // Start a new cluster map for this function name.
159       auto R = ProgramBBClusterInfo.try_emplace(Aliases.front());
160       // Report error when multiple profiles have been specified for the same
161       // function.
162       if (!R.second)
163         return invalidProfileError("Duplicate profile for function '" +
164                                    Aliases.front() + "'.");
165       FI = R.first;
166       CurrentCluster = 0;
167       FuncBBIDs.clear();
168     }
169   }
170   return Error::success();
171 }
172 
173 bool BasicBlockSectionsProfileReader::doInitialization(Module &M) {
174   if (!MBuf)
175     return false;
176   // Get the function name to debug info filename mapping.
177   FunctionNameToDIFilename.clear();
178   for (const Function &F : M) {
179     SmallString<128> DIFilename;
180     if (F.isDeclaration())
181       continue;
182     DISubprogram *Subprogram = F.getSubprogram();
183     if (Subprogram) {
184       llvm::DICompileUnit *CU = Subprogram->getUnit();
185       if (CU)
186         DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
187     }
188     [[maybe_unused]] bool inserted =
189         FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second;
190     assert(inserted);
191   }
192   if (auto Err = ReadProfile())
193     report_fatal_error(std::move(Err));
194   return false;
195 }
196 
197 ImmutablePass *
198 llvm::createBasicBlockSectionsProfileReaderPass(const MemoryBuffer *Buf) {
199   return new BasicBlockSectionsProfileReader(Buf);
200 }
201