1 //===----------------- ItaniumManglingCanonicalizer.cpp -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ProfileData/ItaniumManglingCanonicalizer.h"
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/FoldingSet.h"
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/Demangle/ItaniumDemangle.h"
14 #include "llvm/Support/Allocator.h"
15 
16 using namespace llvm;
17 using llvm::itanium_demangle::ForwardTemplateReference;
18 using llvm::itanium_demangle::Node;
19 using llvm::itanium_demangle::NodeKind;
20 
21 namespace {
22 struct FoldingSetNodeIDBuilder {
23   llvm::FoldingSetNodeID &ID;
24   void operator()(const Node *P) { ID.AddPointer(P); }
25   void operator()(std::string_view Str) {
26     if (Str.empty())
27       ID.AddString({});
28     else
29       ID.AddString(llvm::StringRef(&*Str.begin(), Str.size()));
30   }
31   template <typename T>
32   std::enable_if_t<std::is_integral_v<T> || std::is_enum_v<T>> operator()(T V) {
33     ID.AddInteger((unsigned long long)V);
34   }
35   void operator()(itanium_demangle::NodeArray A) {
36     ID.AddInteger(A.size());
37     for (const Node *N : A)
38       (*this)(N);
39   }
40 };
41 
42 template<typename ...T>
43 void profileCtor(llvm::FoldingSetNodeID &ID, Node::Kind K, T ...V) {
44   FoldingSetNodeIDBuilder Builder = {ID};
45   Builder(K);
46   int VisitInOrder[] = {
47     (Builder(V), 0) ...,
48     0 // Avoid empty array if there are no arguments.
49   };
50   (void)VisitInOrder;
51 }
52 
53 // FIXME: Convert this to a generic lambda when possible.
54 template<typename NodeT> struct ProfileSpecificNode {
55   FoldingSetNodeID &ID;
56   template<typename ...T> void operator()(T ...V) {
57     profileCtor(ID, NodeKind<NodeT>::Kind, V...);
58   }
59 };
60 
61 struct ProfileNode {
62   FoldingSetNodeID &ID;
63   template<typename NodeT> void operator()(const NodeT *N) {
64     N->match(ProfileSpecificNode<NodeT>{ID});
65   }
66 };
67 
68 template<> void ProfileNode::operator()(const ForwardTemplateReference *N) {
69   llvm_unreachable("should never canonicalize a ForwardTemplateReference");
70 }
71 
72 void profileNode(llvm::FoldingSetNodeID &ID, const Node *N) {
73   N->visit(ProfileNode{ID});
74 }
75 
76 class FoldingNodeAllocator {
77   class alignas(alignof(Node *)) NodeHeader : public llvm::FoldingSetNode {
78   public:
79     // 'Node' in this context names the injected-class-name of the base class.
80     itanium_demangle::Node *getNode() {
81       return reinterpret_cast<itanium_demangle::Node *>(this + 1);
82     }
83     void Profile(llvm::FoldingSetNodeID &ID) { profileNode(ID, getNode()); }
84   };
85 
86   BumpPtrAllocator RawAlloc;
87   llvm::FoldingSet<NodeHeader> Nodes;
88 
89 public:
90   void reset() {}
91 
92   template <typename T, typename... Args>
93   std::pair<Node *, bool> getOrCreateNode(bool CreateNewNodes, Args &&... As) {
94     // FIXME: Don't canonicalize forward template references for now, because
95     // they contain state (the resolved template node) that's not known at their
96     // point of creation.
97     if (std::is_same<T, ForwardTemplateReference>::value) {
98       // Note that we don't use if-constexpr here and so we must still write
99       // this code in a generic form.
100       return {new (RawAlloc.Allocate(sizeof(T), alignof(T)))
101                   T(std::forward<Args>(As)...),
102               true};
103     }
104 
105     llvm::FoldingSetNodeID ID;
106     profileCtor(ID, NodeKind<T>::Kind, As...);
107 
108     void *InsertPos;
109     if (NodeHeader *Existing = Nodes.FindNodeOrInsertPos(ID, InsertPos))
110       return {static_cast<T*>(Existing->getNode()), false};
111 
112     if (!CreateNewNodes)
113       return {nullptr, true};
114 
115     static_assert(alignof(T) <= alignof(NodeHeader),
116                   "underaligned node header for specific node kind");
117     void *Storage =
118         RawAlloc.Allocate(sizeof(NodeHeader) + sizeof(T), alignof(NodeHeader));
119     NodeHeader *New = new (Storage) NodeHeader;
120     T *Result = new (New->getNode()) T(std::forward<Args>(As)...);
121     Nodes.InsertNode(New, InsertPos);
122     return {Result, true};
123   }
124 
125   template<typename T, typename... Args>
126   Node *makeNode(Args &&...As) {
127     return getOrCreateNode<T>(true, std::forward<Args>(As)...).first;
128   }
129 
130   void *allocateNodeArray(size_t sz) {
131     return RawAlloc.Allocate(sizeof(Node *) * sz, alignof(Node *));
132   }
133 };
134 
135 class CanonicalizerAllocator : public FoldingNodeAllocator {
136   Node *MostRecentlyCreated = nullptr;
137   Node *TrackedNode = nullptr;
138   bool TrackedNodeIsUsed = false;
139   bool CreateNewNodes = true;
140   llvm::SmallDenseMap<Node*, Node*, 32> Remappings;
141 
142   template<typename T, typename ...Args> Node *makeNodeSimple(Args &&...As) {
143     std::pair<Node *, bool> Result =
144         getOrCreateNode<T>(CreateNewNodes, std::forward<Args>(As)...);
145     if (Result.second) {
146       // Node is new. Make a note of that.
147       MostRecentlyCreated = Result.first;
148     } else if (Result.first) {
149       // Node is pre-existing; check if it's in our remapping table.
150       if (auto *N = Remappings.lookup(Result.first)) {
151         Result.first = N;
152         assert(!Remappings.contains(Result.first) &&
153                "should never need multiple remap steps");
154       }
155       if (Result.first == TrackedNode)
156         TrackedNodeIsUsed = true;
157     }
158     return Result.first;
159   }
160 
161   /// Helper to allow makeNode to be partially-specialized on T.
162   template<typename T> struct MakeNodeImpl {
163     CanonicalizerAllocator &Self;
164     template<typename ...Args> Node *make(Args &&...As) {
165       return Self.makeNodeSimple<T>(std::forward<Args>(As)...);
166     }
167   };
168 
169 public:
170   template<typename T, typename ...Args> Node *makeNode(Args &&...As) {
171     return MakeNodeImpl<T>{*this}.make(std::forward<Args>(As)...);
172   }
173 
174   void reset() { MostRecentlyCreated = nullptr; }
175 
176   void setCreateNewNodes(bool CNN) { CreateNewNodes = CNN; }
177 
178   void addRemapping(Node *A, Node *B) {
179     // Note, we don't need to check whether B is also remapped, because if it
180     // was we would have already remapped it when building it.
181     Remappings.insert(std::make_pair(A, B));
182   }
183 
184   bool isMostRecentlyCreated(Node *N) const { return MostRecentlyCreated == N; }
185 
186   void trackUsesOf(Node *N) {
187     TrackedNode = N;
188     TrackedNodeIsUsed = false;
189   }
190   bool trackedNodeIsUsed() const { return TrackedNodeIsUsed; }
191 };
192 
193 // FIXME: Also expand built-in substitutions?
194 
195 using CanonicalizingDemangler =
196     itanium_demangle::ManglingParser<CanonicalizerAllocator>;
197 } // namespace
198 
199 struct ItaniumManglingCanonicalizer::Impl {
200   CanonicalizingDemangler Demangler = {nullptr, nullptr};
201 };
202 
203 ItaniumManglingCanonicalizer::ItaniumManglingCanonicalizer() : P(new Impl) {}
204 ItaniumManglingCanonicalizer::~ItaniumManglingCanonicalizer() { delete P; }
205 
206 ItaniumManglingCanonicalizer::EquivalenceError
207 ItaniumManglingCanonicalizer::addEquivalence(FragmentKind Kind, StringRef First,
208                                              StringRef Second) {
209   auto &Alloc = P->Demangler.ASTAllocator;
210   Alloc.setCreateNewNodes(true);
211 
212   auto Parse = [&](StringRef Str) {
213     P->Demangler.reset(Str.begin(), Str.end());
214     Node *N = nullptr;
215     switch (Kind) {
216       // A <name>, with minor extensions to allow arbitrary namespace and
217       // template names that can't easily be written as <name>s.
218     case FragmentKind::Name:
219       // Very special case: allow "St" as a shorthand for "3std". It's not
220       // valid as a <name> mangling, but is nonetheless the most natural
221       // way to name the 'std' namespace.
222       if (Str.size() == 2 && P->Demangler.consumeIf("St"))
223         N = P->Demangler.make<itanium_demangle::NameType>("std");
224       // We permit substitutions to name templates without their template
225       // arguments. This mostly just falls out, as almost all template names
226       // are valid as <name>s, but we also want to parse <substitution>s as
227       // <name>s, even though they're not.
228       else if (Str.starts_with("S"))
229         // Parse the substitution and optional following template arguments.
230         N = P->Demangler.parseType();
231       else
232         N = P->Demangler.parseName();
233       break;
234 
235       // A <type>.
236     case FragmentKind::Type:
237       N = P->Demangler.parseType();
238       break;
239 
240       // An <encoding>.
241     case FragmentKind::Encoding:
242       N = P->Demangler.parseEncoding();
243       break;
244     }
245 
246     // If we have trailing junk, the mangling is invalid.
247     if (P->Demangler.numLeft() != 0)
248       N = nullptr;
249 
250     // If any node was created after N, then we cannot safely remap it because
251     // it might already be in use by another node.
252     return std::make_pair(N, Alloc.isMostRecentlyCreated(N));
253   };
254 
255   Node *FirstNode, *SecondNode;
256   bool FirstIsNew, SecondIsNew;
257 
258   std::tie(FirstNode, FirstIsNew) = Parse(First);
259   if (!FirstNode)
260     return EquivalenceError::InvalidFirstMangling;
261 
262   Alloc.trackUsesOf(FirstNode);
263   std::tie(SecondNode, SecondIsNew) = Parse(Second);
264   if (!SecondNode)
265     return EquivalenceError::InvalidSecondMangling;
266 
267   // If they're already equivalent, there's nothing to do.
268   if (FirstNode == SecondNode)
269     return EquivalenceError::Success;
270 
271   if (FirstIsNew && !Alloc.trackedNodeIsUsed())
272     Alloc.addRemapping(FirstNode, SecondNode);
273   else if (SecondIsNew)
274     Alloc.addRemapping(SecondNode, FirstNode);
275   else
276     return EquivalenceError::ManglingAlreadyUsed;
277 
278   return EquivalenceError::Success;
279 }
280 
281 static ItaniumManglingCanonicalizer::Key
282 parseMaybeMangledName(CanonicalizingDemangler &Demangler, StringRef Mangling,
283                       bool CreateNewNodes) {
284   Demangler.ASTAllocator.setCreateNewNodes(CreateNewNodes);
285   Demangler.reset(Mangling.begin(), Mangling.end());
286   // Attempt demangling only for names that look like C++ mangled names.
287   // Otherwise, treat them as extern "C" names. We permit the latter to
288   // be remapped by (eg)
289   //   encoding 6memcpy 7memmove
290   // consistent with how they are encoded as local-names inside a C++ mangling.
291   Node *N;
292   if (Mangling.starts_with("_Z") || Mangling.starts_with("__Z") ||
293       Mangling.starts_with("___Z") || Mangling.starts_with("____Z"))
294     N = Demangler.parse();
295   else
296     N = Demangler.make<itanium_demangle::NameType>(
297         std::string_view(Mangling.data(), Mangling.size()));
298   return reinterpret_cast<ItaniumManglingCanonicalizer::Key>(N);
299 }
300 
301 ItaniumManglingCanonicalizer::Key
302 ItaniumManglingCanonicalizer::canonicalize(StringRef Mangling) {
303   return parseMaybeMangledName(P->Demangler, Mangling, true);
304 }
305 
306 ItaniumManglingCanonicalizer::Key
307 ItaniumManglingCanonicalizer::lookup(StringRef Mangling) {
308   return parseMaybeMangledName(P->Demangler, Mangling, false);
309 }
310