1 //===-- IRDynamicChecks.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/IR/Constants.h"
10 #include "llvm/IR/DataLayout.h"
11 #include "llvm/IR/Function.h"
12 #include "llvm/IR/Instructions.h"
13 #include "llvm/IR/Module.h"
14 #include "llvm/IR/Value.h"
15 #include "llvm/Support/raw_ostream.h"
16 
17 #include "IRDynamicChecks.h"
18 
19 #include "lldb/Expression/UtilityFunction.h"
20 #include "lldb/Target/ExecutionContext.h"
21 #include "lldb/Target/Process.h"
22 #include "lldb/Target/StackFrame.h"
23 #include "lldb/Target/Target.h"
24 #include "lldb/Utility/ConstString.h"
25 #include "lldb/Utility/Log.h"
26 
27 #include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
28 
29 using namespace llvm;
30 using namespace lldb_private;
31 
32 static char ID;
33 
34 #define VALID_POINTER_CHECK_NAME "_$__lldb_valid_pointer_check"
35 #define VALID_OBJC_OBJECT_CHECK_NAME "$__lldb_objc_object_check"
36 
37 static const char g_valid_pointer_check_text[] =
38     "extern \"C\" void\n"
39     "_$__lldb_valid_pointer_check (unsigned char *$__lldb_arg_ptr)\n"
40     "{\n"
41     "    unsigned char $__lldb_local_val = *$__lldb_arg_ptr;\n"
42     "}";
43 
44 ClangDynamicCheckerFunctions::ClangDynamicCheckerFunctions()
45     : DynamicCheckerFunctions(DCF_Clang) {}
46 
47 ClangDynamicCheckerFunctions::~ClangDynamicCheckerFunctions() = default;
48 
49 bool ClangDynamicCheckerFunctions::Install(
50     DiagnosticManager &diagnostic_manager, ExecutionContext &exe_ctx) {
51   auto utility_fn_or_error = exe_ctx.GetTargetRef().CreateUtilityFunction(
52       g_valid_pointer_check_text, VALID_POINTER_CHECK_NAME,
53       lldb::eLanguageTypeC, exe_ctx);
54   if (!utility_fn_or_error) {
55     llvm::consumeError(utility_fn_or_error.takeError());
56     return false;
57   }
58   m_valid_pointer_check = std::move(*utility_fn_or_error);
59 
60   if (Process *process = exe_ctx.GetProcessPtr()) {
61     ObjCLanguageRuntime *objc_language_runtime =
62         ObjCLanguageRuntime::Get(*process);
63 
64     if (objc_language_runtime) {
65       auto utility_fn_or_error = objc_language_runtime->CreateObjectChecker(
66           VALID_OBJC_OBJECT_CHECK_NAME, exe_ctx);
67       if (!utility_fn_or_error) {
68         llvm::consumeError(utility_fn_or_error.takeError());
69         return false;
70       }
71       m_objc_object_check = std::move(*utility_fn_or_error);
72     }
73   }
74 
75   return true;
76 }
77 
78 bool ClangDynamicCheckerFunctions::DoCheckersExplainStop(lldb::addr_t addr,
79                                                          Stream &message) {
80   // FIXME: We have to get the checkers to know why they scotched the call in
81   // more detail,
82   // so we can print a better message here.
83   if (m_valid_pointer_check && m_valid_pointer_check->ContainsAddress(addr)) {
84     message.Printf("Attempted to dereference an invalid pointer.");
85     return true;
86   } else if (m_objc_object_check &&
87              m_objc_object_check->ContainsAddress(addr)) {
88     message.Printf("Attempted to dereference an invalid ObjC Object or send it "
89                    "an unrecognized selector");
90     return true;
91   }
92   return false;
93 }
94 
95 static std::string PrintValue(llvm::Value *V, bool truncate = false) {
96   std::string s;
97   raw_string_ostream rso(s);
98   V->print(rso);
99   rso.flush();
100   if (truncate)
101     s.resize(s.length() - 1);
102   return s;
103 }
104 
105 /// \class Instrumenter IRDynamicChecks.cpp
106 /// Finds and instruments individual LLVM IR instructions
107 ///
108 /// When instrumenting LLVM IR, it is frequently desirable to first search for
109 /// instructions, and then later modify them.  This way iterators remain
110 /// intact, and multiple passes can look at the same code base without
111 /// treading on each other's toes.
112 ///
113 /// The Instrumenter class implements this functionality.  A client first
114 /// calls Inspect on a function, which populates a list of instructions to be
115 /// instrumented.  Then, later, when all passes' Inspect functions have been
116 /// called, the client calls Instrument, which adds the desired
117 /// instrumentation.
118 ///
119 /// A subclass of Instrumenter must override InstrumentInstruction, which
120 /// is responsible for adding whatever instrumentation is necessary.
121 ///
122 /// A subclass of Instrumenter may override:
123 ///
124 /// - InspectInstruction [default: does nothing]
125 ///
126 /// - InspectBasicBlock [default: iterates through the instructions in a
127 ///   basic block calling InspectInstruction]
128 ///
129 /// - InspectFunction [default: iterates through the basic blocks in a
130 ///   function calling InspectBasicBlock]
131 class Instrumenter {
132 public:
133   /// Constructor
134   ///
135   /// \param[in] module
136   ///     The module being instrumented.
137   Instrumenter(llvm::Module &module,
138                std::shared_ptr<UtilityFunction> checker_function)
139       : m_module(module), m_checker_function(checker_function),
140         m_i8ptr_ty(nullptr), m_intptr_ty(nullptr) {}
141 
142   virtual ~Instrumenter() = default;
143 
144   /// Inspect a function to find instructions to instrument
145   ///
146   /// \param[in] function
147   ///     The function to inspect.
148   ///
149   /// \return
150   ///     True on success; false on error.
151   bool Inspect(llvm::Function &function) { return InspectFunction(function); }
152 
153   /// Instrument all the instructions found by Inspect()
154   ///
155   /// \return
156   ///     True on success; false on error.
157   bool Instrument() {
158     for (InstIterator ii = m_to_instrument.begin(),
159                       last_ii = m_to_instrument.end();
160          ii != last_ii; ++ii) {
161       if (!InstrumentInstruction(*ii))
162         return false;
163     }
164 
165     return true;
166   }
167 
168 protected:
169   /// Add instrumentation to a single instruction
170   ///
171   /// \param[in] inst
172   ///     The instruction to be instrumented.
173   ///
174   /// \return
175   ///     True on success; false otherwise.
176   virtual bool InstrumentInstruction(llvm::Instruction *inst) = 0;
177 
178   /// Register a single instruction to be instrumented
179   ///
180   /// \param[in] inst
181   ///     The instruction to be instrumented.
182   void RegisterInstruction(llvm::Instruction &inst) {
183     m_to_instrument.push_back(&inst);
184   }
185 
186   /// Determine whether a single instruction is interesting to instrument,
187   /// and, if so, call RegisterInstruction
188   ///
189   /// \param[in] i
190   ///     The instruction to be inspected.
191   ///
192   /// \return
193   ///     False if there was an error scanning; true otherwise.
194   virtual bool InspectInstruction(llvm::Instruction &i) { return true; }
195 
196   /// Scan a basic block to see if any instructions are interesting
197   ///
198   /// \param[in] bb
199   ///     The basic block to be inspected.
200   ///
201   /// \return
202   ///     False if there was an error scanning; true otherwise.
203   virtual bool InspectBasicBlock(llvm::BasicBlock &bb) {
204     for (llvm::BasicBlock::iterator ii = bb.begin(), last_ii = bb.end();
205          ii != last_ii; ++ii) {
206       if (!InspectInstruction(*ii))
207         return false;
208     }
209 
210     return true;
211   }
212 
213   /// Scan a function to see if any instructions are interesting
214   ///
215   /// \param[in] f
216   ///     The function to be inspected.
217   ///
218   /// \return
219   ///     False if there was an error scanning; true otherwise.
220   virtual bool InspectFunction(llvm::Function &f) {
221     for (llvm::Function::iterator bbi = f.begin(), last_bbi = f.end();
222          bbi != last_bbi; ++bbi) {
223       if (!InspectBasicBlock(*bbi))
224         return false;
225     }
226 
227     return true;
228   }
229 
230   /// Build a function pointer for a function with signature void
231   /// (*)(uint8_t*) with a given address
232   ///
233   /// \param[in] start_address
234   ///     The address of the function.
235   ///
236   /// \return
237   ///     The function pointer, for use in a CallInst.
238   llvm::FunctionCallee BuildPointerValidatorFunc(lldb::addr_t start_address) {
239     llvm::Type *param_array[1];
240 
241     param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
242 
243     ArrayRef<llvm::Type *> params(param_array, 1);
244 
245     FunctionType *fun_ty = FunctionType::get(
246         llvm::Type::getVoidTy(m_module.getContext()), params, true);
247     PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
248     Constant *fun_addr_int =
249         ConstantInt::get(GetIntptrTy(), start_address, false);
250     return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
251   }
252 
253   /// Build a function pointer for a function with signature void
254   /// (*)(uint8_t*, uint8_t*) with a given address
255   ///
256   /// \param[in] start_address
257   ///     The address of the function.
258   ///
259   /// \return
260   ///     The function pointer, for use in a CallInst.
261   llvm::FunctionCallee BuildObjectCheckerFunc(lldb::addr_t start_address) {
262     llvm::Type *param_array[2];
263 
264     param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
265     param_array[1] = const_cast<llvm::PointerType *>(GetI8PtrTy());
266 
267     ArrayRef<llvm::Type *> params(param_array, 2);
268 
269     FunctionType *fun_ty = FunctionType::get(
270         llvm::Type::getVoidTy(m_module.getContext()), params, true);
271     PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
272     Constant *fun_addr_int =
273         ConstantInt::get(GetIntptrTy(), start_address, false);
274     return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
275   }
276 
277   PointerType *GetI8PtrTy() {
278     if (!m_i8ptr_ty)
279       m_i8ptr_ty = llvm::Type::getInt8PtrTy(m_module.getContext());
280 
281     return m_i8ptr_ty;
282   }
283 
284   IntegerType *GetIntptrTy() {
285     if (!m_intptr_ty) {
286       llvm::DataLayout data_layout(&m_module);
287 
288       m_intptr_ty = llvm::Type::getIntNTy(m_module.getContext(),
289                                           data_layout.getPointerSizeInBits());
290     }
291 
292     return m_intptr_ty;
293   }
294 
295   typedef std::vector<llvm::Instruction *> InstVector;
296   typedef InstVector::iterator InstIterator;
297 
298   InstVector m_to_instrument; ///< List of instructions the inspector found
299   llvm::Module &m_module;     ///< The module which is being instrumented
300   std::shared_ptr<UtilityFunction>
301       m_checker_function; ///< The dynamic checker function for the process
302 
303 private:
304   PointerType *m_i8ptr_ty;
305   IntegerType *m_intptr_ty;
306 };
307 
308 class ValidPointerChecker : public Instrumenter {
309 public:
310   ValidPointerChecker(llvm::Module &module,
311                       std::shared_ptr<UtilityFunction> checker_function)
312       : Instrumenter(module, checker_function),
313         m_valid_pointer_check_func(nullptr) {}
314 
315   ~ValidPointerChecker() override = default;
316 
317 protected:
318   bool InstrumentInstruction(llvm::Instruction *inst) override {
319     Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
320 
321     LLDB_LOGF(log, "Instrumenting load/store instruction: %s\n",
322               PrintValue(inst).c_str());
323 
324     if (!m_valid_pointer_check_func)
325       m_valid_pointer_check_func =
326           BuildPointerValidatorFunc(m_checker_function->StartAddress());
327 
328     llvm::Value *dereferenced_ptr = nullptr;
329 
330     if (llvm::LoadInst *li = dyn_cast<llvm::LoadInst>(inst))
331       dereferenced_ptr = li->getPointerOperand();
332     else if (llvm::StoreInst *si = dyn_cast<llvm::StoreInst>(inst))
333       dereferenced_ptr = si->getPointerOperand();
334     else
335       return false;
336 
337     // Insert an instruction to cast the loaded value to int8_t*
338 
339     BitCastInst *bit_cast =
340         new BitCastInst(dereferenced_ptr, GetI8PtrTy(), "", inst);
341 
342     // Insert an instruction to call the helper with the result
343 
344     llvm::Value *arg_array[1];
345 
346     arg_array[0] = bit_cast;
347 
348     llvm::ArrayRef<llvm::Value *> args(arg_array, 1);
349 
350     CallInst::Create(m_valid_pointer_check_func, args, "", inst);
351 
352     return true;
353   }
354 
355   bool InspectInstruction(llvm::Instruction &i) override {
356     if (dyn_cast<llvm::LoadInst>(&i) || dyn_cast<llvm::StoreInst>(&i))
357       RegisterInstruction(i);
358 
359     return true;
360   }
361 
362 private:
363   llvm::FunctionCallee m_valid_pointer_check_func;
364 };
365 
366 class ObjcObjectChecker : public Instrumenter {
367 public:
368   ObjcObjectChecker(llvm::Module &module,
369                     std::shared_ptr<UtilityFunction> checker_function)
370       : Instrumenter(module, checker_function),
371         m_objc_object_check_func(nullptr) {}
372 
373   ~ObjcObjectChecker() override = default;
374 
375   enum msgSend_type {
376     eMsgSend = 0,
377     eMsgSendSuper,
378     eMsgSendSuper_stret,
379     eMsgSend_fpret,
380     eMsgSend_stret
381   };
382 
383   std::map<llvm::Instruction *, msgSend_type> msgSend_types;
384 
385 protected:
386   bool InstrumentInstruction(llvm::Instruction *inst) override {
387     CallInst *call_inst = dyn_cast<CallInst>(inst);
388 
389     if (!call_inst)
390       return false; // call_inst really shouldn't be nullptr, because otherwise
391                     // InspectInstruction wouldn't have registered it
392 
393     if (!m_objc_object_check_func)
394       m_objc_object_check_func =
395           BuildObjectCheckerFunc(m_checker_function->StartAddress());
396 
397     // id objc_msgSend(id theReceiver, SEL theSelector, ...)
398 
399     llvm::Value *target_object;
400     llvm::Value *selector;
401 
402     switch (msgSend_types[inst]) {
403     case eMsgSend:
404     case eMsgSend_fpret:
405       // On arm64, clang uses objc_msgSend for scalar and struct return
406       // calls.  The call instruction will record which was used.
407       if (call_inst->hasStructRetAttr()) {
408         target_object = call_inst->getArgOperand(1);
409         selector = call_inst->getArgOperand(2);
410       } else {
411         target_object = call_inst->getArgOperand(0);
412         selector = call_inst->getArgOperand(1);
413       }
414       break;
415     case eMsgSend_stret:
416       target_object = call_inst->getArgOperand(1);
417       selector = call_inst->getArgOperand(2);
418       break;
419     case eMsgSendSuper:
420     case eMsgSendSuper_stret:
421       return true;
422     }
423 
424     // These objects should always be valid according to Sean Calannan
425     assert(target_object);
426     assert(selector);
427 
428     // Insert an instruction to cast the receiver id to int8_t*
429 
430     BitCastInst *bit_cast =
431         new BitCastInst(target_object, GetI8PtrTy(), "", inst);
432 
433     // Insert an instruction to call the helper with the result
434 
435     llvm::Value *arg_array[2];
436 
437     arg_array[0] = bit_cast;
438     arg_array[1] = selector;
439 
440     ArrayRef<llvm::Value *> args(arg_array, 2);
441 
442     CallInst::Create(m_objc_object_check_func, args, "", inst);
443 
444     return true;
445   }
446 
447   static llvm::Function *GetFunction(llvm::Value *value) {
448     if (llvm::Function *function = llvm::dyn_cast<llvm::Function>(value)) {
449       return function;
450     }
451 
452     if (llvm::ConstantExpr *const_expr =
453             llvm::dyn_cast<llvm::ConstantExpr>(value)) {
454       switch (const_expr->getOpcode()) {
455       default:
456         return nullptr;
457       case llvm::Instruction::BitCast:
458         return GetFunction(const_expr->getOperand(0));
459       }
460     }
461 
462     return nullptr;
463   }
464 
465   static llvm::Function *GetCalledFunction(llvm::CallInst *inst) {
466     return GetFunction(inst->getCalledOperand());
467   }
468 
469   bool InspectInstruction(llvm::Instruction &i) override {
470     Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
471 
472     CallInst *call_inst = dyn_cast<CallInst>(&i);
473 
474     if (call_inst) {
475       const llvm::Function *called_function = GetCalledFunction(call_inst);
476 
477       if (!called_function)
478         return true;
479 
480       std::string name_str = called_function->getName().str();
481       const char *name_cstr = name_str.c_str();
482 
483       LLDB_LOGF(log, "Found call to %s: %s\n", name_cstr,
484                 PrintValue(call_inst).c_str());
485 
486       if (name_str.find("objc_msgSend") == std::string::npos)
487         return true;
488 
489       if (!strcmp(name_cstr, "objc_msgSend")) {
490         RegisterInstruction(i);
491         msgSend_types[&i] = eMsgSend;
492         return true;
493       }
494 
495       if (!strcmp(name_cstr, "objc_msgSend_stret")) {
496         RegisterInstruction(i);
497         msgSend_types[&i] = eMsgSend_stret;
498         return true;
499       }
500 
501       if (!strcmp(name_cstr, "objc_msgSend_fpret")) {
502         RegisterInstruction(i);
503         msgSend_types[&i] = eMsgSend_fpret;
504         return true;
505       }
506 
507       if (!strcmp(name_cstr, "objc_msgSendSuper")) {
508         RegisterInstruction(i);
509         msgSend_types[&i] = eMsgSendSuper;
510         return true;
511       }
512 
513       if (!strcmp(name_cstr, "objc_msgSendSuper_stret")) {
514         RegisterInstruction(i);
515         msgSend_types[&i] = eMsgSendSuper_stret;
516         return true;
517       }
518 
519       LLDB_LOGF(log,
520                 "Function name '%s' contains 'objc_msgSend' but is not handled",
521                 name_str.c_str());
522 
523       return true;
524     }
525 
526     return true;
527   }
528 
529 private:
530   llvm::FunctionCallee m_objc_object_check_func;
531 };
532 
533 IRDynamicChecks::IRDynamicChecks(
534     ClangDynamicCheckerFunctions &checker_functions, const char *func_name)
535     : ModulePass(ID), m_func_name(func_name),
536       m_checker_functions(checker_functions) {}
537 
538 IRDynamicChecks::~IRDynamicChecks() = default;
539 
540 bool IRDynamicChecks::runOnModule(llvm::Module &M) {
541   Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
542 
543   llvm::Function *function = M.getFunction(StringRef(m_func_name));
544 
545   if (!function) {
546     LLDB_LOGF(log, "Couldn't find %s() in the module", m_func_name.c_str());
547 
548     return false;
549   }
550 
551   if (m_checker_functions.m_valid_pointer_check) {
552     ValidPointerChecker vpc(M, m_checker_functions.m_valid_pointer_check);
553 
554     if (!vpc.Inspect(*function))
555       return false;
556 
557     if (!vpc.Instrument())
558       return false;
559   }
560 
561   if (m_checker_functions.m_objc_object_check) {
562     ObjcObjectChecker ooc(M, m_checker_functions.m_objc_object_check);
563 
564     if (!ooc.Inspect(*function))
565       return false;
566 
567     if (!ooc.Instrument())
568       return false;
569   }
570 
571   if (log && log->GetVerbose()) {
572     std::string s;
573     raw_string_ostream oss(s);
574 
575     M.print(oss, nullptr);
576 
577     oss.flush();
578 
579     LLDB_LOGF(log, "Module after dynamic checks: \n%s", s.c_str());
580   }
581 
582   return true;
583 }
584 
585 void IRDynamicChecks::assignPassManager(PMStack &PMS, PassManagerType T) {}
586 
587 PassManagerType IRDynamicChecks::getPotentialPassManagerType() const {
588   return PMT_ModulePassManager;
589 }
590