1 //===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // SymbolRewriter is a LLVM pass which can rewrite symbols transparently within
10 // existing code.  It is implemented as a compiler pass and is configured via a
11 // YAML configuration file.
12 //
13 // The YAML configuration file format is as follows:
14 //
15 // RewriteMapFile := RewriteDescriptors
16 // RewriteDescriptors := RewriteDescriptor | RewriteDescriptors
17 // RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}'
18 // RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields
19 // RewriteDescriptorField := FieldIdentifier ':' FieldValue ','
20 // RewriteDescriptorType := Identifier
21 // FieldIdentifier := Identifier
22 // FieldValue := Identifier
23 // Identifier := [0-9a-zA-Z]+
24 //
25 // Currently, the following descriptor types are supported:
26 //
27 // - function:          (function rewriting)
28 //      + Source        (original name of the function)
29 //      + Target        (explicit transformation)
30 //      + Transform     (pattern transformation)
31 //      + Naked         (boolean, whether the function is undecorated)
32 // - global variable:   (external linkage global variable rewriting)
33 //      + Source        (original name of externally visible variable)
34 //      + Target        (explicit transformation)
35 //      + Transform     (pattern transformation)
36 // - global alias:      (global alias rewriting)
37 //      + Source        (original name of the aliased name)
38 //      + Target        (explicit transformation)
39 //      + Transform     (pattern transformation)
40 //
41 // Note that source and exactly one of [Target, Transform] must be provided
42 //
43 // New rewrite descriptors can be created.  Addding a new rewrite descriptor
44 // involves:
45 //
46 //  a) extended the rewrite descriptor kind enumeration
47 //     (<anonymous>::RewriteDescriptor::RewriteDescriptorType)
48 //  b) implementing the new descriptor
49 //     (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor)
50 //  c) extending the rewrite map parser
51 //     (<anonymous>::RewriteMapParser::parseEntry)
52 //
53 //  Specify to rewrite the symbols using the `-rewrite-symbols` option, and
54 //  specify the map file to use for the rewriting via the `-rewrite-map-file`
55 //  option.
56 //
57 //===----------------------------------------------------------------------===//
58 
59 #include "llvm/Transforms/Utils/SymbolRewriter.h"
60 #include "llvm/ADT/SmallString.h"
61 #include "llvm/ADT/StringRef.h"
62 #include "llvm/ADT/ilist.h"
63 #include "llvm/ADT/iterator_range.h"
64 #include "llvm/IR/Comdat.h"
65 #include "llvm/IR/Function.h"
66 #include "llvm/IR/GlobalAlias.h"
67 #include "llvm/IR/GlobalObject.h"
68 #include "llvm/IR/GlobalVariable.h"
69 #include "llvm/IR/Module.h"
70 #include "llvm/IR/Value.h"
71 #include "llvm/Support/Casting.h"
72 #include "llvm/Support/CommandLine.h"
73 #include "llvm/Support/ErrorHandling.h"
74 #include "llvm/Support/ErrorOr.h"
75 #include "llvm/Support/MemoryBuffer.h"
76 #include "llvm/Support/Regex.h"
77 #include "llvm/Support/SourceMgr.h"
78 #include "llvm/Support/YAMLParser.h"
79 #include <memory>
80 #include <string>
81 #include <vector>
82 
83 using namespace llvm;
84 using namespace SymbolRewriter;
85 
86 #define DEBUG_TYPE "symbol-rewriter"
87 
88 static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
89                                              cl::desc("Symbol Rewrite Map"),
90                                              cl::value_desc("filename"),
91                                              cl::Hidden);
92 
rewriteComdat(Module & M,GlobalObject * GO,const std::string & Source,const std::string & Target)93 static void rewriteComdat(Module &M, GlobalObject *GO,
94                           const std::string &Source,
95                           const std::string &Target) {
96   if (Comdat *CD = GO->getComdat()) {
97     auto &Comdats = M.getComdatSymbolTable();
98 
99     Comdat *C = M.getOrInsertComdat(Target);
100     C->setSelectionKind(CD->getSelectionKind());
101     GO->setComdat(C);
102 
103     Comdats.erase(Comdats.find(Source));
104   }
105 }
106 
107 namespace {
108 
109 template <RewriteDescriptor::Type DT, typename ValueType,
110           ValueType *(Module::*Get)(StringRef) const>
111 class ExplicitRewriteDescriptor : public RewriteDescriptor {
112 public:
113   const std::string Source;
114   const std::string Target;
115 
ExplicitRewriteDescriptor(StringRef S,StringRef T,const bool Naked)116   ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked)
117       : RewriteDescriptor(DT),
118         Source(std::string(Naked ? StringRef("\01" + S.str()) : S)),
119         Target(std::string(T)) {}
120 
121   bool performOnModule(Module &M) override;
122 
classof(const RewriteDescriptor * RD)123   static bool classof(const RewriteDescriptor *RD) {
124     return RD->getType() == DT;
125   }
126 };
127 
128 } // end anonymous namespace
129 
130 template <RewriteDescriptor::Type DT, typename ValueType,
131           ValueType *(Module::*Get)(StringRef) const>
performOnModule(Module & M)132 bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
133   bool Changed = false;
134   if (ValueType *S = (M.*Get)(Source)) {
135     if (GlobalObject *GO = dyn_cast<GlobalObject>(S))
136       rewriteComdat(M, GO, Source, Target);
137 
138     if (Value *T = (M.*Get)(Target))
139       S->setValueName(T->getValueName());
140     else
141       S->setName(Target);
142 
143     Changed = true;
144   }
145   return Changed;
146 }
147 
148 namespace {
149 
150 template <RewriteDescriptor::Type DT, typename ValueType,
151           ValueType *(Module::*Get)(StringRef) const,
152           iterator_range<typename iplist<ValueType>::iterator>
153           (Module::*Iterator)()>
154 class PatternRewriteDescriptor : public RewriteDescriptor {
155 public:
156   const std::string Pattern;
157   const std::string Transform;
158 
PatternRewriteDescriptor(StringRef P,StringRef T)159   PatternRewriteDescriptor(StringRef P, StringRef T)
160       : RewriteDescriptor(DT), Pattern(std::string(P)),
161         Transform(std::string(T)) {}
162 
163   bool performOnModule(Module &M) override;
164 
classof(const RewriteDescriptor * RD)165   static bool classof(const RewriteDescriptor *RD) {
166     return RD->getType() == DT;
167   }
168 };
169 
170 } // end anonymous namespace
171 
172 template <RewriteDescriptor::Type DT, typename ValueType,
173           ValueType *(Module::*Get)(StringRef) const,
174           iterator_range<typename iplist<ValueType>::iterator>
175           (Module::*Iterator)()>
176 bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>::
performOnModule(Module & M)177 performOnModule(Module &M) {
178   bool Changed = false;
179   for (auto &C : (M.*Iterator)()) {
180     std::string Error;
181 
182     std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error);
183     if (!Error.empty())
184       report_fatal_error(Twine("unable to transforn ") + C.getName() + " in " +
185                          M.getModuleIdentifier() + ": " + Error);
186 
187     if (C.getName() == Name)
188       continue;
189 
190     if (GlobalObject *GO = dyn_cast<GlobalObject>(&C))
191       rewriteComdat(M, GO, std::string(C.getName()), Name);
192 
193     if (Value *V = (M.*Get)(Name))
194       C.setValueName(V->getValueName());
195     else
196       C.setName(Name);
197 
198     Changed = true;
199   }
200   return Changed;
201 }
202 
203 namespace {
204 
205 /// Represents a rewrite for an explicitly named (function) symbol.  Both the
206 /// source function name and target function name of the transformation are
207 /// explicitly spelt out.
208 using ExplicitRewriteFunctionDescriptor =
209     ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
210                               &Module::getFunction>;
211 
212 /// Represents a rewrite for an explicitly named (global variable) symbol.  Both
213 /// the source variable name and target variable name are spelt out.  This
214 /// applies only to module level variables.
215 using ExplicitRewriteGlobalVariableDescriptor =
216     ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
217                               GlobalVariable, &Module::getGlobalVariable>;
218 
219 /// Represents a rewrite for an explicitly named global alias.  Both the source
220 /// and target name are explicitly spelt out.
221 using ExplicitRewriteNamedAliasDescriptor =
222     ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
223                               &Module::getNamedAlias>;
224 
225 /// Represents a rewrite for a regular expression based pattern for functions.
226 /// A pattern for the function name is provided and a transformation for that
227 /// pattern to determine the target function name create the rewrite rule.
228 using PatternRewriteFunctionDescriptor =
229     PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
230                              &Module::getFunction, &Module::functions>;
231 
232 /// Represents a rewrite for a global variable based upon a matching pattern.
233 /// Each global variable matching the provided pattern will be transformed as
234 /// described in the transformation pattern for the target.  Applies only to
235 /// module level variables.
236 using PatternRewriteGlobalVariableDescriptor =
237     PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
238                              GlobalVariable, &Module::getGlobalVariable,
239                              &Module::globals>;
240 
241 /// PatternRewriteNamedAliasDescriptor - represents a rewrite for global
242 /// aliases which match a given pattern.  The provided transformation will be
243 /// applied to each of the matching names.
244 using PatternRewriteNamedAliasDescriptor =
245     PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
246                              &Module::getNamedAlias, &Module::aliases>;
247 
248 } // end anonymous namespace
249 
parse(const std::string & MapFile,RewriteDescriptorList * DL)250 bool RewriteMapParser::parse(const std::string &MapFile,
251                              RewriteDescriptorList *DL) {
252   ErrorOr<std::unique_ptr<MemoryBuffer>> Mapping =
253       MemoryBuffer::getFile(MapFile);
254 
255   if (!Mapping)
256     report_fatal_error(Twine("unable to read rewrite map '") + MapFile +
257                        "': " + Mapping.getError().message());
258 
259   if (!parse(*Mapping, DL))
260     report_fatal_error(Twine("unable to parse rewrite map '") + MapFile + "'");
261 
262   return true;
263 }
264 
parse(std::unique_ptr<MemoryBuffer> & MapFile,RewriteDescriptorList * DL)265 bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile,
266                              RewriteDescriptorList *DL) {
267   SourceMgr SM;
268   yaml::Stream YS(MapFile->getBuffer(), SM);
269 
270   for (auto &Document : YS) {
271     yaml::MappingNode *DescriptorList;
272 
273     // ignore empty documents
274     if (isa<yaml::NullNode>(Document.getRoot()))
275       continue;
276 
277     DescriptorList = dyn_cast<yaml::MappingNode>(Document.getRoot());
278     if (!DescriptorList) {
279       YS.printError(Document.getRoot(), "DescriptorList node must be a map");
280       return false;
281     }
282 
283     for (auto &Descriptor : *DescriptorList)
284       if (!parseEntry(YS, Descriptor, DL))
285         return false;
286   }
287 
288   return true;
289 }
290 
parseEntry(yaml::Stream & YS,yaml::KeyValueNode & Entry,RewriteDescriptorList * DL)291 bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry,
292                                   RewriteDescriptorList *DL) {
293   yaml::ScalarNode *Key;
294   yaml::MappingNode *Value;
295   SmallString<32> KeyStorage;
296   StringRef RewriteType;
297 
298   Key = dyn_cast<yaml::ScalarNode>(Entry.getKey());
299   if (!Key) {
300     YS.printError(Entry.getKey(), "rewrite type must be a scalar");
301     return false;
302   }
303 
304   Value = dyn_cast<yaml::MappingNode>(Entry.getValue());
305   if (!Value) {
306     YS.printError(Entry.getValue(), "rewrite descriptor must be a map");
307     return false;
308   }
309 
310   RewriteType = Key->getValue(KeyStorage);
311   if (RewriteType.equals("function"))
312     return parseRewriteFunctionDescriptor(YS, Key, Value, DL);
313   else if (RewriteType.equals("global variable"))
314     return parseRewriteGlobalVariableDescriptor(YS, Key, Value, DL);
315   else if (RewriteType.equals("global alias"))
316     return parseRewriteGlobalAliasDescriptor(YS, Key, Value, DL);
317 
318   YS.printError(Entry.getKey(), "unknown rewrite type");
319   return false;
320 }
321 
322 bool RewriteMapParser::
parseRewriteFunctionDescriptor(yaml::Stream & YS,yaml::ScalarNode * K,yaml::MappingNode * Descriptor,RewriteDescriptorList * DL)323 parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
324                                yaml::MappingNode *Descriptor,
325                                RewriteDescriptorList *DL) {
326   bool Naked = false;
327   std::string Source;
328   std::string Target;
329   std::string Transform;
330 
331   for (auto &Field : *Descriptor) {
332     yaml::ScalarNode *Key;
333     yaml::ScalarNode *Value;
334     SmallString<32> KeyStorage;
335     SmallString<32> ValueStorage;
336     StringRef KeyValue;
337 
338     Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
339     if (!Key) {
340       YS.printError(Field.getKey(), "descriptor key must be a scalar");
341       return false;
342     }
343 
344     Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
345     if (!Value) {
346       YS.printError(Field.getValue(), "descriptor value must be a scalar");
347       return false;
348     }
349 
350     KeyValue = Key->getValue(KeyStorage);
351     if (KeyValue.equals("source")) {
352       std::string Error;
353 
354       Source = std::string(Value->getValue(ValueStorage));
355       if (!Regex(Source).isValid(Error)) {
356         YS.printError(Field.getKey(), "invalid regex: " + Error);
357         return false;
358       }
359     } else if (KeyValue.equals("target")) {
360       Target = std::string(Value->getValue(ValueStorage));
361     } else if (KeyValue.equals("transform")) {
362       Transform = std::string(Value->getValue(ValueStorage));
363     } else if (KeyValue.equals("naked")) {
364       std::string Undecorated;
365 
366       Undecorated = std::string(Value->getValue(ValueStorage));
367       Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1";
368     } else {
369       YS.printError(Field.getKey(), "unknown key for function");
370       return false;
371     }
372   }
373 
374   if (Transform.empty() == Target.empty()) {
375     YS.printError(Descriptor,
376                   "exactly one of transform or target must be specified");
377     return false;
378   }
379 
380   // TODO see if there is a more elegant solution to selecting the rewrite
381   // descriptor type
382   if (!Target.empty())
383     DL->push_back(std::make_unique<ExplicitRewriteFunctionDescriptor>(
384         Source, Target, Naked));
385   else
386     DL->push_back(
387         std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
388 
389   return true;
390 }
391 
392 bool RewriteMapParser::
parseRewriteGlobalVariableDescriptor(yaml::Stream & YS,yaml::ScalarNode * K,yaml::MappingNode * Descriptor,RewriteDescriptorList * DL)393 parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
394                                      yaml::MappingNode *Descriptor,
395                                      RewriteDescriptorList *DL) {
396   std::string Source;
397   std::string Target;
398   std::string Transform;
399 
400   for (auto &Field : *Descriptor) {
401     yaml::ScalarNode *Key;
402     yaml::ScalarNode *Value;
403     SmallString<32> KeyStorage;
404     SmallString<32> ValueStorage;
405     StringRef KeyValue;
406 
407     Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
408     if (!Key) {
409       YS.printError(Field.getKey(), "descriptor Key must be a scalar");
410       return false;
411     }
412 
413     Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
414     if (!Value) {
415       YS.printError(Field.getValue(), "descriptor value must be a scalar");
416       return false;
417     }
418 
419     KeyValue = Key->getValue(KeyStorage);
420     if (KeyValue.equals("source")) {
421       std::string Error;
422 
423       Source = std::string(Value->getValue(ValueStorage));
424       if (!Regex(Source).isValid(Error)) {
425         YS.printError(Field.getKey(), "invalid regex: " + Error);
426         return false;
427       }
428     } else if (KeyValue.equals("target")) {
429       Target = std::string(Value->getValue(ValueStorage));
430     } else if (KeyValue.equals("transform")) {
431       Transform = std::string(Value->getValue(ValueStorage));
432     } else {
433       YS.printError(Field.getKey(), "unknown Key for Global Variable");
434       return false;
435     }
436   }
437 
438   if (Transform.empty() == Target.empty()) {
439     YS.printError(Descriptor,
440                   "exactly one of transform or target must be specified");
441     return false;
442   }
443 
444   if (!Target.empty())
445     DL->push_back(std::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
446         Source, Target,
447         /*Naked*/ false));
448   else
449     DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>(
450         Source, Transform));
451 
452   return true;
453 }
454 
455 bool RewriteMapParser::
parseRewriteGlobalAliasDescriptor(yaml::Stream & YS,yaml::ScalarNode * K,yaml::MappingNode * Descriptor,RewriteDescriptorList * DL)456 parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
457                                   yaml::MappingNode *Descriptor,
458                                   RewriteDescriptorList *DL) {
459   std::string Source;
460   std::string Target;
461   std::string Transform;
462 
463   for (auto &Field : *Descriptor) {
464     yaml::ScalarNode *Key;
465     yaml::ScalarNode *Value;
466     SmallString<32> KeyStorage;
467     SmallString<32> ValueStorage;
468     StringRef KeyValue;
469 
470     Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
471     if (!Key) {
472       YS.printError(Field.getKey(), "descriptor key must be a scalar");
473       return false;
474     }
475 
476     Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
477     if (!Value) {
478       YS.printError(Field.getValue(), "descriptor value must be a scalar");
479       return false;
480     }
481 
482     KeyValue = Key->getValue(KeyStorage);
483     if (KeyValue.equals("source")) {
484       std::string Error;
485 
486       Source = std::string(Value->getValue(ValueStorage));
487       if (!Regex(Source).isValid(Error)) {
488         YS.printError(Field.getKey(), "invalid regex: " + Error);
489         return false;
490       }
491     } else if (KeyValue.equals("target")) {
492       Target = std::string(Value->getValue(ValueStorage));
493     } else if (KeyValue.equals("transform")) {
494       Transform = std::string(Value->getValue(ValueStorage));
495     } else {
496       YS.printError(Field.getKey(), "unknown key for Global Alias");
497       return false;
498     }
499   }
500 
501   if (Transform.empty() == Target.empty()) {
502     YS.printError(Descriptor,
503                   "exactly one of transform or target must be specified");
504     return false;
505   }
506 
507   if (!Target.empty())
508     DL->push_back(std::make_unique<ExplicitRewriteNamedAliasDescriptor>(
509         Source, Target,
510         /*Naked*/ false));
511   else
512     DL->push_back(std::make_unique<PatternRewriteNamedAliasDescriptor>(
513         Source, Transform));
514 
515   return true;
516 }
517 
run(Module & M,ModuleAnalysisManager & AM)518 PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) {
519   if (!runImpl(M))
520     return PreservedAnalyses::all();
521 
522   return PreservedAnalyses::none();
523 }
524 
runImpl(Module & M)525 bool RewriteSymbolPass::runImpl(Module &M) {
526   bool Changed;
527 
528   Changed = false;
529   for (auto &Descriptor : Descriptors)
530     Changed |= Descriptor->performOnModule(M);
531 
532   return Changed;
533 }
534 
loadAndParseMapFiles()535 void RewriteSymbolPass::loadAndParseMapFiles() {
536   const std::vector<std::string> MapFiles(RewriteMapFiles);
537   SymbolRewriter::RewriteMapParser Parser;
538 
539   for (const auto &MapFile : MapFiles)
540     Parser.parse(MapFile, &Descriptors);
541 }
542