1 //===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
13 #include "llvm/ADT/Statistic.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/IR/Function.h"
16 #include "llvm/IR/GlobalValue.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/Instruction.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/Metadata.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/InitializePasses.h"
23 #include "llvm/Pass.h"
24 #include "llvm/PassRegistry.h"
25 #include "llvm/ProfileData/InstrProf.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Debug.h"
28 #include "llvm/Support/FileSystem.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include "llvm/Transforms/Instrumentation.h"
32 #include <fstream>
33 #include <map>
34 #include <mutex>
35 #include <set>
36 #include <sstream>
37 
38 using namespace llvm;
39 #define DEBUG_TYPE "instrorderfile"
40 
41 static cl::opt<std::string> ClOrderFileWriteMapping(
42     "orderfile-write-mapping", cl::init(""),
43     cl::desc(
44         "Dump functions and their MD5 hash to deobfuscate profile data"),
45     cl::Hidden);
46 
47 namespace {
48 
49 // We need a global bitmap to tell if a function is executed. We also
50 // need a global variable to save the order of functions. We can use a
51 // fixed-size buffer that saves the MD5 hash of the function. We need
52 // a global variable to save the index into the buffer.
53 
54 std::mutex MappingMutex;
55 
56 struct InstrOrderFile {
57 private:
58   GlobalVariable *OrderFileBuffer;
59   GlobalVariable *BufferIdx;
60   GlobalVariable *BitMap;
61   ArrayType *BufferTy;
62   ArrayType *MapTy;
63 
64 public:
65   InstrOrderFile() {}
66 
67   void createOrderFileData(Module &M) {
68     LLVMContext &Ctx = M.getContext();
69     int NumFunctions = 0;
70     for (Function &F : M) {
71       if (!F.isDeclaration())
72         NumFunctions++;
73     }
74 
75     BufferTy =
76         ArrayType::get(Type::getInt64Ty(Ctx), INSTR_ORDER_FILE_BUFFER_SIZE);
77     Type *IdxTy = Type::getInt32Ty(Ctx);
78     MapTy = ArrayType::get(Type::getInt8Ty(Ctx), NumFunctions);
79 
80     // Create the global variables.
81     std::string SymbolName = INSTR_PROF_ORDERFILE_BUFFER_NAME_STR;
82     OrderFileBuffer = new GlobalVariable(M, BufferTy, false, GlobalValue::LinkOnceODRLinkage,
83                            Constant::getNullValue(BufferTy), SymbolName);
84     Triple TT = Triple(M.getTargetTriple());
85     OrderFileBuffer->setSection(
86         getInstrProfSectionName(IPSK_orderfile, TT.getObjectFormat()));
87 
88     std::string IndexName = INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR;
89     BufferIdx = new GlobalVariable(M, IdxTy, false, GlobalValue::LinkOnceODRLinkage,
90                            Constant::getNullValue(IdxTy), IndexName);
91 
92     std::string BitMapName = "bitmap_0";
93     BitMap = new GlobalVariable(M, MapTy, false, GlobalValue::PrivateLinkage,
94                                 Constant::getNullValue(MapTy), BitMapName);
95   }
96 
97   // Generate the code sequence in the entry block of each function to
98   // update the buffer.
99   void generateCodeSequence(Module &M, Function &F, int FuncId) {
100     if (!ClOrderFileWriteMapping.empty()) {
101       std::lock_guard<std::mutex> LogLock(MappingMutex);
102       std::error_code EC;
103       llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC,
104                               llvm::sys::fs::OF_Append);
105       if (EC) {
106         report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping +
107                            " to save mapping file for order file instrumentation\n");
108       } else {
109         std::stringstream stream;
110         stream << std::hex << MD5Hash(F.getName());
111         std::string singleLine = "MD5 " + stream.str() + " " +
112                                  std::string(F.getName()) + '\n';
113         OS << singleLine;
114       }
115     }
116 
117     BasicBlock *OrigEntry = &F.getEntryBlock();
118 
119     LLVMContext &Ctx = M.getContext();
120     IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
121     IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
122 
123     // Create a new entry block for instrumentation. We will check the bitmap
124     // in this basic block.
125     BasicBlock *NewEntry =
126         BasicBlock::Create(M.getContext(), "order_file_entry", &F, OrigEntry);
127     IRBuilder<> entryB(NewEntry);
128     // Create a basic block for updating the circular buffer.
129     BasicBlock *UpdateOrderFileBB =
130         BasicBlock::Create(M.getContext(), "order_file_set", &F, OrigEntry);
131     IRBuilder<> updateB(UpdateOrderFileBB);
132 
133     // Check the bitmap, if it is already 1, do nothing.
134     // Otherwise, set the bit, grab the index, update the buffer.
135     Value *IdxFlags[] = {ConstantInt::get(Int32Ty, 0),
136                          ConstantInt::get(Int32Ty, FuncId)};
137     Value *MapAddr = entryB.CreateGEP(MapTy, BitMap, IdxFlags, "");
138     LoadInst *loadBitMap = entryB.CreateLoad(Int8Ty, MapAddr, "");
139     entryB.CreateStore(ConstantInt::get(Int8Ty, 1), MapAddr);
140     Value *IsNotExecuted =
141         entryB.CreateICmpEQ(loadBitMap, ConstantInt::get(Int8Ty, 0));
142     entryB.CreateCondBr(IsNotExecuted, UpdateOrderFileBB, OrigEntry);
143 
144     // Fill up UpdateOrderFileBB: grab the index, update the buffer!
145     Value *IdxVal = updateB.CreateAtomicRMW(
146         AtomicRMWInst::Add, BufferIdx, ConstantInt::get(Int32Ty, 1),
147         AtomicOrdering::SequentiallyConsistent);
148     // We need to wrap around the index to fit it inside the buffer.
149     Value *WrappedIdx = updateB.CreateAnd(
150         IdxVal, ConstantInt::get(Int32Ty, INSTR_ORDER_FILE_BUFFER_MASK));
151     Value *BufferGEPIdx[] = {ConstantInt::get(Int32Ty, 0), WrappedIdx};
152     Value *BufferAddr =
153         updateB.CreateGEP(BufferTy, OrderFileBuffer, BufferGEPIdx, "");
154     updateB.CreateStore(ConstantInt::get(Type::getInt64Ty(Ctx), MD5Hash(F.getName())),
155                         BufferAddr);
156     updateB.CreateBr(OrigEntry);
157   }
158 
159   bool run(Module &M) {
160     createOrderFileData(M);
161 
162     int FuncId = 0;
163     for (Function &F : M) {
164       if (F.isDeclaration())
165         continue;
166       generateCodeSequence(M, F, FuncId);
167       ++FuncId;
168     }
169 
170     return true;
171   }
172 
173 }; // End of InstrOrderFile struct
174 
175 class InstrOrderFileLegacyPass : public ModulePass {
176 public:
177   static char ID;
178 
179   InstrOrderFileLegacyPass() : ModulePass(ID) {
180     initializeInstrOrderFileLegacyPassPass(
181         *PassRegistry::getPassRegistry());
182   }
183 
184   bool runOnModule(Module &M) override;
185 };
186 
187 } // End anonymous namespace
188 
189 bool InstrOrderFileLegacyPass::runOnModule(Module &M) {
190   if (skipModule(M))
191     return false;
192 
193   return InstrOrderFile().run(M);
194 }
195 
196 PreservedAnalyses
197 InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) {
198   if (InstrOrderFile().run(M))
199     return PreservedAnalyses::none();
200   return PreservedAnalyses::all();
201 }
202 
203 INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile",
204                       "Instrumentation for Order File", false, false)
205 INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile",
206                     "Instrumentation for Order File", false, false)
207 
208 char InstrOrderFileLegacyPass::ID = 0;
209 
210 ModulePass *llvm::createInstrOrderFilePass() {
211   return new InstrOrderFileLegacyPass();
212 }
213