1 //===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // SymbolRewriter is a LLVM pass which can rewrite symbols transparently within
10 // existing code.  It is implemented as a compiler pass and is configured via a
11 // YAML configuration file.
12 //
13 // The YAML configuration file format is as follows:
14 //
15 // RewriteMapFile := RewriteDescriptors
16 // RewriteDescriptors := RewriteDescriptor | RewriteDescriptors
17 // RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}'
18 // RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields
19 // RewriteDescriptorField := FieldIdentifier ':' FieldValue ','
20 // RewriteDescriptorType := Identifier
21 // FieldIdentifier := Identifier
22 // FieldValue := Identifier
23 // Identifier := [0-9a-zA-Z]+
24 //
25 // Currently, the following descriptor types are supported:
26 //
27 // - function:          (function rewriting)
28 //      + Source        (original name of the function)
29 //      + Target        (explicit transformation)
30 //      + Transform     (pattern transformation)
31 //      + Naked         (boolean, whether the function is undecorated)
32 // - global variable:   (external linkage global variable rewriting)
33 //      + Source        (original name of externally visible variable)
34 //      + Target        (explicit transformation)
35 //      + Transform     (pattern transformation)
36 // - global alias:      (global alias rewriting)
37 //      + Source        (original name of the aliased name)
38 //      + Target        (explicit transformation)
39 //      + Transform     (pattern transformation)
40 //
41 // Note that source and exactly one of [Target, Transform] must be provided
42 //
43 // New rewrite descriptors can be created.  Addding a new rewrite descriptor
44 // involves:
45 //
46 //  a) extended the rewrite descriptor kind enumeration
47 //     (<anonymous>::RewriteDescriptor::RewriteDescriptorType)
48 //  b) implementing the new descriptor
49 //     (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor)
50 //  c) extending the rewrite map parser
51 //     (<anonymous>::RewriteMapParser::parseEntry)
52 //
53 //  Specify to rewrite the symbols using the `-rewrite-symbols` option, and
54 //  specify the map file to use for the rewriting via the `-rewrite-map-file`
55 //  option.
56 //
57 //===----------------------------------------------------------------------===//
58 
59 #include "llvm/Transforms/Utils/SymbolRewriter.h"
60 #include "llvm/ADT/SmallString.h"
61 #include "llvm/ADT/StringRef.h"
62 #include "llvm/ADT/ilist.h"
63 #include "llvm/ADT/iterator_range.h"
64 #include "llvm/IR/Comdat.h"
65 #include "llvm/IR/Function.h"
66 #include "llvm/IR/GlobalAlias.h"
67 #include "llvm/IR/GlobalObject.h"
68 #include "llvm/IR/GlobalVariable.h"
69 #include "llvm/IR/Module.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/InitializePasses.h"
72 #include "llvm/Pass.h"
73 #include "llvm/Support/Casting.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/ErrorHandling.h"
76 #include "llvm/Support/ErrorOr.h"
77 #include "llvm/Support/MemoryBuffer.h"
78 #include "llvm/Support/Regex.h"
79 #include "llvm/Support/SourceMgr.h"
80 #include "llvm/Support/YAMLParser.h"
81 #include <memory>
82 #include <string>
83 #include <vector>
84 
85 using namespace llvm;
86 using namespace SymbolRewriter;
87 
88 #define DEBUG_TYPE "symbol-rewriter"
89 
90 static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
91                                              cl::desc("Symbol Rewrite Map"),
92                                              cl::value_desc("filename"),
93                                              cl::Hidden);
94 
95 static void rewriteComdat(Module &M, GlobalObject *GO,
96                           const std::string &Source,
97                           const std::string &Target) {
98   if (Comdat *CD = GO->getComdat()) {
99     auto &Comdats = M.getComdatSymbolTable();
100 
101     Comdat *C = M.getOrInsertComdat(Target);
102     C->setSelectionKind(CD->getSelectionKind());
103     GO->setComdat(C);
104 
105     Comdats.erase(Comdats.find(Source));
106   }
107 }
108 
109 namespace {
110 
111 template <RewriteDescriptor::Type DT, typename ValueType,
112           ValueType *(Module::*Get)(StringRef) const>
113 class ExplicitRewriteDescriptor : public RewriteDescriptor {
114 public:
115   const std::string Source;
116   const std::string Target;
117 
118   ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked)
119       : RewriteDescriptor(DT),
120         Source(std::string(Naked ? StringRef("\01" + S.str()) : S)),
121         Target(std::string(T)) {}
122 
123   bool performOnModule(Module &M) override;
124 
125   static bool classof(const RewriteDescriptor *RD) {
126     return RD->getType() == DT;
127   }
128 };
129 
130 } // end anonymous namespace
131 
132 template <RewriteDescriptor::Type DT, typename ValueType,
133           ValueType *(Module::*Get)(StringRef) const>
134 bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
135   bool Changed = false;
136   if (ValueType *S = (M.*Get)(Source)) {
137     if (GlobalObject *GO = dyn_cast<GlobalObject>(S))
138       rewriteComdat(M, GO, Source, Target);
139 
140     if (Value *T = (M.*Get)(Target))
141       S->setValueName(T->getValueName());
142     else
143       S->setName(Target);
144 
145     Changed = true;
146   }
147   return Changed;
148 }
149 
150 namespace {
151 
152 template <RewriteDescriptor::Type DT, typename ValueType,
153           ValueType *(Module::*Get)(StringRef) const,
154           iterator_range<typename iplist<ValueType>::iterator>
155           (Module::*Iterator)()>
156 class PatternRewriteDescriptor : public RewriteDescriptor {
157 public:
158   const std::string Pattern;
159   const std::string Transform;
160 
161   PatternRewriteDescriptor(StringRef P, StringRef T)
162       : RewriteDescriptor(DT), Pattern(std::string(P)),
163         Transform(std::string(T)) {}
164 
165   bool performOnModule(Module &M) override;
166 
167   static bool classof(const RewriteDescriptor *RD) {
168     return RD->getType() == DT;
169   }
170 };
171 
172 } // end anonymous namespace
173 
174 template <RewriteDescriptor::Type DT, typename ValueType,
175           ValueType *(Module::*Get)(StringRef) const,
176           iterator_range<typename iplist<ValueType>::iterator>
177           (Module::*Iterator)()>
178 bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>::
179 performOnModule(Module &M) {
180   bool Changed = false;
181   for (auto &C : (M.*Iterator)()) {
182     std::string Error;
183 
184     std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error);
185     if (!Error.empty())
186       report_fatal_error(Twine("unable to transforn ") + C.getName() + " in " +
187                          M.getModuleIdentifier() + ": " + Error);
188 
189     if (C.getName() == Name)
190       continue;
191 
192     if (GlobalObject *GO = dyn_cast<GlobalObject>(&C))
193       rewriteComdat(M, GO, std::string(C.getName()), Name);
194 
195     if (Value *V = (M.*Get)(Name))
196       C.setValueName(V->getValueName());
197     else
198       C.setName(Name);
199 
200     Changed = true;
201   }
202   return Changed;
203 }
204 
205 namespace {
206 
207 /// Represents a rewrite for an explicitly named (function) symbol.  Both the
208 /// source function name and target function name of the transformation are
209 /// explicitly spelt out.
210 using ExplicitRewriteFunctionDescriptor =
211     ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
212                               &Module::getFunction>;
213 
214 /// Represents a rewrite for an explicitly named (global variable) symbol.  Both
215 /// the source variable name and target variable name are spelt out.  This
216 /// applies only to module level variables.
217 using ExplicitRewriteGlobalVariableDescriptor =
218     ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
219                               GlobalVariable, &Module::getGlobalVariable>;
220 
221 /// Represents a rewrite for an explicitly named global alias.  Both the source
222 /// and target name are explicitly spelt out.
223 using ExplicitRewriteNamedAliasDescriptor =
224     ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
225                               &Module::getNamedAlias>;
226 
227 /// Represents a rewrite for a regular expression based pattern for functions.
228 /// A pattern for the function name is provided and a transformation for that
229 /// pattern to determine the target function name create the rewrite rule.
230 using PatternRewriteFunctionDescriptor =
231     PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
232                              &Module::getFunction, &Module::functions>;
233 
234 /// Represents a rewrite for a global variable based upon a matching pattern.
235 /// Each global variable matching the provided pattern will be transformed as
236 /// described in the transformation pattern for the target.  Applies only to
237 /// module level variables.
238 using PatternRewriteGlobalVariableDescriptor =
239     PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
240                              GlobalVariable, &Module::getGlobalVariable,
241                              &Module::globals>;
242 
243 /// PatternRewriteNamedAliasDescriptor - represents a rewrite for global
244 /// aliases which match a given pattern.  The provided transformation will be
245 /// applied to each of the matching names.
246 using PatternRewriteNamedAliasDescriptor =
247     PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
248                              &Module::getNamedAlias, &Module::aliases>;
249 
250 } // end anonymous namespace
251 
252 bool RewriteMapParser::parse(const std::string &MapFile,
253                              RewriteDescriptorList *DL) {
254   ErrorOr<std::unique_ptr<MemoryBuffer>> Mapping =
255       MemoryBuffer::getFile(MapFile);
256 
257   if (!Mapping)
258     report_fatal_error(Twine("unable to read rewrite map '") + MapFile +
259                        "': " + Mapping.getError().message());
260 
261   if (!parse(*Mapping, DL))
262     report_fatal_error(Twine("unable to parse rewrite map '") + MapFile + "'");
263 
264   return true;
265 }
266 
267 bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile,
268                              RewriteDescriptorList *DL) {
269   SourceMgr SM;
270   yaml::Stream YS(MapFile->getBuffer(), SM);
271 
272   for (auto &Document : YS) {
273     yaml::MappingNode *DescriptorList;
274 
275     // ignore empty documents
276     if (isa<yaml::NullNode>(Document.getRoot()))
277       continue;
278 
279     DescriptorList = dyn_cast<yaml::MappingNode>(Document.getRoot());
280     if (!DescriptorList) {
281       YS.printError(Document.getRoot(), "DescriptorList node must be a map");
282       return false;
283     }
284 
285     for (auto &Descriptor : *DescriptorList)
286       if (!parseEntry(YS, Descriptor, DL))
287         return false;
288   }
289 
290   return true;
291 }
292 
293 bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry,
294                                   RewriteDescriptorList *DL) {
295   yaml::ScalarNode *Key;
296   yaml::MappingNode *Value;
297   SmallString<32> KeyStorage;
298   StringRef RewriteType;
299 
300   Key = dyn_cast<yaml::ScalarNode>(Entry.getKey());
301   if (!Key) {
302     YS.printError(Entry.getKey(), "rewrite type must be a scalar");
303     return false;
304   }
305 
306   Value = dyn_cast<yaml::MappingNode>(Entry.getValue());
307   if (!Value) {
308     YS.printError(Entry.getValue(), "rewrite descriptor must be a map");
309     return false;
310   }
311 
312   RewriteType = Key->getValue(KeyStorage);
313   if (RewriteType.equals("function"))
314     return parseRewriteFunctionDescriptor(YS, Key, Value, DL);
315   else if (RewriteType.equals("global variable"))
316     return parseRewriteGlobalVariableDescriptor(YS, Key, Value, DL);
317   else if (RewriteType.equals("global alias"))
318     return parseRewriteGlobalAliasDescriptor(YS, Key, Value, DL);
319 
320   YS.printError(Entry.getKey(), "unknown rewrite type");
321   return false;
322 }
323 
324 bool RewriteMapParser::
325 parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
326                                yaml::MappingNode *Descriptor,
327                                RewriteDescriptorList *DL) {
328   bool Naked = false;
329   std::string Source;
330   std::string Target;
331   std::string Transform;
332 
333   for (auto &Field : *Descriptor) {
334     yaml::ScalarNode *Key;
335     yaml::ScalarNode *Value;
336     SmallString<32> KeyStorage;
337     SmallString<32> ValueStorage;
338     StringRef KeyValue;
339 
340     Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
341     if (!Key) {
342       YS.printError(Field.getKey(), "descriptor key must be a scalar");
343       return false;
344     }
345 
346     Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
347     if (!Value) {
348       YS.printError(Field.getValue(), "descriptor value must be a scalar");
349       return false;
350     }
351 
352     KeyValue = Key->getValue(KeyStorage);
353     if (KeyValue.equals("source")) {
354       std::string Error;
355 
356       Source = std::string(Value->getValue(ValueStorage));
357       if (!Regex(Source).isValid(Error)) {
358         YS.printError(Field.getKey(), "invalid regex: " + Error);
359         return false;
360       }
361     } else if (KeyValue.equals("target")) {
362       Target = std::string(Value->getValue(ValueStorage));
363     } else if (KeyValue.equals("transform")) {
364       Transform = std::string(Value->getValue(ValueStorage));
365     } else if (KeyValue.equals("naked")) {
366       std::string Undecorated;
367 
368       Undecorated = std::string(Value->getValue(ValueStorage));
369       Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1";
370     } else {
371       YS.printError(Field.getKey(), "unknown key for function");
372       return false;
373     }
374   }
375 
376   if (Transform.empty() == Target.empty()) {
377     YS.printError(Descriptor,
378                   "exactly one of transform or target must be specified");
379     return false;
380   }
381 
382   // TODO see if there is a more elegant solution to selecting the rewrite
383   // descriptor type
384   if (!Target.empty())
385     DL->push_back(std::make_unique<ExplicitRewriteFunctionDescriptor>(
386         Source, Target, Naked));
387   else
388     DL->push_back(
389         std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
390 
391   return true;
392 }
393 
394 bool RewriteMapParser::
395 parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
396                                      yaml::MappingNode *Descriptor,
397                                      RewriteDescriptorList *DL) {
398   std::string Source;
399   std::string Target;
400   std::string Transform;
401 
402   for (auto &Field : *Descriptor) {
403     yaml::ScalarNode *Key;
404     yaml::ScalarNode *Value;
405     SmallString<32> KeyStorage;
406     SmallString<32> ValueStorage;
407     StringRef KeyValue;
408 
409     Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
410     if (!Key) {
411       YS.printError(Field.getKey(), "descriptor Key must be a scalar");
412       return false;
413     }
414 
415     Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
416     if (!Value) {
417       YS.printError(Field.getValue(), "descriptor value must be a scalar");
418       return false;
419     }
420 
421     KeyValue = Key->getValue(KeyStorage);
422     if (KeyValue.equals("source")) {
423       std::string Error;
424 
425       Source = std::string(Value->getValue(ValueStorage));
426       if (!Regex(Source).isValid(Error)) {
427         YS.printError(Field.getKey(), "invalid regex: " + Error);
428         return false;
429       }
430     } else if (KeyValue.equals("target")) {
431       Target = std::string(Value->getValue(ValueStorage));
432     } else if (KeyValue.equals("transform")) {
433       Transform = std::string(Value->getValue(ValueStorage));
434     } else {
435       YS.printError(Field.getKey(), "unknown Key for Global Variable");
436       return false;
437     }
438   }
439 
440   if (Transform.empty() == Target.empty()) {
441     YS.printError(Descriptor,
442                   "exactly one of transform or target must be specified");
443     return false;
444   }
445 
446   if (!Target.empty())
447     DL->push_back(std::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
448         Source, Target,
449         /*Naked*/ false));
450   else
451     DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>(
452         Source, Transform));
453 
454   return true;
455 }
456 
457 bool RewriteMapParser::
458 parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
459                                   yaml::MappingNode *Descriptor,
460                                   RewriteDescriptorList *DL) {
461   std::string Source;
462   std::string Target;
463   std::string Transform;
464 
465   for (auto &Field : *Descriptor) {
466     yaml::ScalarNode *Key;
467     yaml::ScalarNode *Value;
468     SmallString<32> KeyStorage;
469     SmallString<32> ValueStorage;
470     StringRef KeyValue;
471 
472     Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
473     if (!Key) {
474       YS.printError(Field.getKey(), "descriptor key must be a scalar");
475       return false;
476     }
477 
478     Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
479     if (!Value) {
480       YS.printError(Field.getValue(), "descriptor value must be a scalar");
481       return false;
482     }
483 
484     KeyValue = Key->getValue(KeyStorage);
485     if (KeyValue.equals("source")) {
486       std::string Error;
487 
488       Source = std::string(Value->getValue(ValueStorage));
489       if (!Regex(Source).isValid(Error)) {
490         YS.printError(Field.getKey(), "invalid regex: " + Error);
491         return false;
492       }
493     } else if (KeyValue.equals("target")) {
494       Target = std::string(Value->getValue(ValueStorage));
495     } else if (KeyValue.equals("transform")) {
496       Transform = std::string(Value->getValue(ValueStorage));
497     } else {
498       YS.printError(Field.getKey(), "unknown key for Global Alias");
499       return false;
500     }
501   }
502 
503   if (Transform.empty() == Target.empty()) {
504     YS.printError(Descriptor,
505                   "exactly one of transform or target must be specified");
506     return false;
507   }
508 
509   if (!Target.empty())
510     DL->push_back(std::make_unique<ExplicitRewriteNamedAliasDescriptor>(
511         Source, Target,
512         /*Naked*/ false));
513   else
514     DL->push_back(std::make_unique<PatternRewriteNamedAliasDescriptor>(
515         Source, Transform));
516 
517   return true;
518 }
519 
520 PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) {
521   if (!runImpl(M))
522     return PreservedAnalyses::all();
523 
524   return PreservedAnalyses::none();
525 }
526 
527 bool RewriteSymbolPass::runImpl(Module &M) {
528   bool Changed;
529 
530   Changed = false;
531   for (auto &Descriptor : Descriptors)
532     Changed |= Descriptor->performOnModule(M);
533 
534   return Changed;
535 }
536 
537 void RewriteSymbolPass::loadAndParseMapFiles() {
538   const std::vector<std::string> MapFiles(RewriteMapFiles);
539   SymbolRewriter::RewriteMapParser Parser;
540 
541   for (const auto &MapFile : MapFiles)
542     Parser.parse(MapFile, &Descriptors);
543 }
544