1 //===-- IRDynamicChecks.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/IR/Constants.h" 10 #include "llvm/IR/DataLayout.h" 11 #include "llvm/IR/Function.h" 12 #include "llvm/IR/Instructions.h" 13 #include "llvm/IR/Module.h" 14 #include "llvm/IR/Value.h" 15 #include "llvm/Support/raw_ostream.h" 16 17 #include "IRDynamicChecks.h" 18 19 #include "lldb/Expression/UtilityFunction.h" 20 #include "lldb/Target/ExecutionContext.h" 21 #include "lldb/Target/Process.h" 22 #include "lldb/Target/StackFrame.h" 23 #include "lldb/Target/Target.h" 24 #include "lldb/Utility/ConstString.h" 25 #include "lldb/Utility/Log.h" 26 27 #include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" 28 29 using namespace llvm; 30 using namespace lldb_private; 31 32 static char ID; 33 34 #define VALID_POINTER_CHECK_NAME "_$__lldb_valid_pointer_check" 35 #define VALID_OBJC_OBJECT_CHECK_NAME "$__lldb_objc_object_check" 36 37 static const char g_valid_pointer_check_text[] = 38 "extern \"C\" void\n" 39 "_$__lldb_valid_pointer_check (unsigned char *$__lldb_arg_ptr)\n" 40 "{\n" 41 " unsigned char $__lldb_local_val = *$__lldb_arg_ptr;\n" 42 "}"; 43 44 ClangDynamicCheckerFunctions::ClangDynamicCheckerFunctions() 45 : DynamicCheckerFunctions(DCF_Clang) {} 46 47 ClangDynamicCheckerFunctions::~ClangDynamicCheckerFunctions() = default; 48 49 bool ClangDynamicCheckerFunctions::Install( 50 DiagnosticManager &diagnostic_manager, ExecutionContext &exe_ctx) { 51 auto utility_fn_or_error = exe_ctx.GetTargetRef().CreateUtilityFunction( 52 g_valid_pointer_check_text, VALID_POINTER_CHECK_NAME, 53 lldb::eLanguageTypeC, exe_ctx); 54 if (!utility_fn_or_error) { 55 llvm::consumeError(utility_fn_or_error.takeError()); 56 return false; 57 } 58 m_valid_pointer_check = std::move(*utility_fn_or_error); 59 60 if (Process *process = exe_ctx.GetProcessPtr()) { 61 ObjCLanguageRuntime *objc_language_runtime = 62 ObjCLanguageRuntime::Get(*process); 63 64 if (objc_language_runtime) { 65 auto utility_fn_or_error = objc_language_runtime->CreateObjectChecker( 66 VALID_OBJC_OBJECT_CHECK_NAME, exe_ctx); 67 if (!utility_fn_or_error) { 68 llvm::consumeError(utility_fn_or_error.takeError()); 69 return false; 70 } 71 m_objc_object_check = std::move(*utility_fn_or_error); 72 } 73 } 74 75 return true; 76 } 77 78 bool ClangDynamicCheckerFunctions::DoCheckersExplainStop(lldb::addr_t addr, 79 Stream &message) { 80 // FIXME: We have to get the checkers to know why they scotched the call in 81 // more detail, 82 // so we can print a better message here. 83 if (m_valid_pointer_check && m_valid_pointer_check->ContainsAddress(addr)) { 84 message.Printf("Attempted to dereference an invalid pointer."); 85 return true; 86 } else if (m_objc_object_check && 87 m_objc_object_check->ContainsAddress(addr)) { 88 message.Printf("Attempted to dereference an invalid ObjC Object or send it " 89 "an unrecognized selector"); 90 return true; 91 } 92 return false; 93 } 94 95 static std::string PrintValue(llvm::Value *V, bool truncate = false) { 96 std::string s; 97 raw_string_ostream rso(s); 98 V->print(rso); 99 rso.flush(); 100 if (truncate) 101 s.resize(s.length() - 1); 102 return s; 103 } 104 105 /// \class Instrumenter IRDynamicChecks.cpp 106 /// Finds and instruments individual LLVM IR instructions 107 /// 108 /// When instrumenting LLVM IR, it is frequently desirable to first search for 109 /// instructions, and then later modify them. This way iterators remain 110 /// intact, and multiple passes can look at the same code base without 111 /// treading on each other's toes. 112 /// 113 /// The Instrumenter class implements this functionality. A client first 114 /// calls Inspect on a function, which populates a list of instructions to be 115 /// instrumented. Then, later, when all passes' Inspect functions have been 116 /// called, the client calls Instrument, which adds the desired 117 /// instrumentation. 118 /// 119 /// A subclass of Instrumenter must override InstrumentInstruction, which 120 /// is responsible for adding whatever instrumentation is necessary. 121 /// 122 /// A subclass of Instrumenter may override: 123 /// 124 /// - InspectInstruction [default: does nothing] 125 /// 126 /// - InspectBasicBlock [default: iterates through the instructions in a 127 /// basic block calling InspectInstruction] 128 /// 129 /// - InspectFunction [default: iterates through the basic blocks in a 130 /// function calling InspectBasicBlock] 131 class Instrumenter { 132 public: 133 /// Constructor 134 /// 135 /// \param[in] module 136 /// The module being instrumented. 137 Instrumenter(llvm::Module &module, 138 std::shared_ptr<UtilityFunction> checker_function) 139 : m_module(module), m_checker_function(checker_function), 140 m_i8ptr_ty(nullptr), m_intptr_ty(nullptr) {} 141 142 virtual ~Instrumenter() = default; 143 144 /// Inspect a function to find instructions to instrument 145 /// 146 /// \param[in] function 147 /// The function to inspect. 148 /// 149 /// \return 150 /// True on success; false on error. 151 bool Inspect(llvm::Function &function) { return InspectFunction(function); } 152 153 /// Instrument all the instructions found by Inspect() 154 /// 155 /// \return 156 /// True on success; false on error. 157 bool Instrument() { 158 for (InstIterator ii = m_to_instrument.begin(), 159 last_ii = m_to_instrument.end(); 160 ii != last_ii; ++ii) { 161 if (!InstrumentInstruction(*ii)) 162 return false; 163 } 164 165 return true; 166 } 167 168 protected: 169 /// Add instrumentation to a single instruction 170 /// 171 /// \param[in] inst 172 /// The instruction to be instrumented. 173 /// 174 /// \return 175 /// True on success; false otherwise. 176 virtual bool InstrumentInstruction(llvm::Instruction *inst) = 0; 177 178 /// Register a single instruction to be instrumented 179 /// 180 /// \param[in] inst 181 /// The instruction to be instrumented. 182 void RegisterInstruction(llvm::Instruction &inst) { 183 m_to_instrument.push_back(&inst); 184 } 185 186 /// Determine whether a single instruction is interesting to instrument, 187 /// and, if so, call RegisterInstruction 188 /// 189 /// \param[in] i 190 /// The instruction to be inspected. 191 /// 192 /// \return 193 /// False if there was an error scanning; true otherwise. 194 virtual bool InspectInstruction(llvm::Instruction &i) { return true; } 195 196 /// Scan a basic block to see if any instructions are interesting 197 /// 198 /// \param[in] bb 199 /// The basic block to be inspected. 200 /// 201 /// \return 202 /// False if there was an error scanning; true otherwise. 203 virtual bool InspectBasicBlock(llvm::BasicBlock &bb) { 204 for (llvm::BasicBlock::iterator ii = bb.begin(), last_ii = bb.end(); 205 ii != last_ii; ++ii) { 206 if (!InspectInstruction(*ii)) 207 return false; 208 } 209 210 return true; 211 } 212 213 /// Scan a function to see if any instructions are interesting 214 /// 215 /// \param[in] f 216 /// The function to be inspected. 217 /// 218 /// \return 219 /// False if there was an error scanning; true otherwise. 220 virtual bool InspectFunction(llvm::Function &f) { 221 for (llvm::Function::iterator bbi = f.begin(), last_bbi = f.end(); 222 bbi != last_bbi; ++bbi) { 223 if (!InspectBasicBlock(*bbi)) 224 return false; 225 } 226 227 return true; 228 } 229 230 /// Build a function pointer for a function with signature void 231 /// (*)(uint8_t*) with a given address 232 /// 233 /// \param[in] start_address 234 /// The address of the function. 235 /// 236 /// \return 237 /// The function pointer, for use in a CallInst. 238 llvm::FunctionCallee BuildPointerValidatorFunc(lldb::addr_t start_address) { 239 llvm::Type *param_array[1]; 240 241 param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy()); 242 243 ArrayRef<llvm::Type *> params(param_array, 1); 244 245 FunctionType *fun_ty = FunctionType::get( 246 llvm::Type::getVoidTy(m_module.getContext()), params, true); 247 PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty); 248 Constant *fun_addr_int = 249 ConstantInt::get(GetIntptrTy(), start_address, false); 250 return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)}; 251 } 252 253 /// Build a function pointer for a function with signature void 254 /// (*)(uint8_t*, uint8_t*) with a given address 255 /// 256 /// \param[in] start_address 257 /// The address of the function. 258 /// 259 /// \return 260 /// The function pointer, for use in a CallInst. 261 llvm::FunctionCallee BuildObjectCheckerFunc(lldb::addr_t start_address) { 262 llvm::Type *param_array[2]; 263 264 param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy()); 265 param_array[1] = const_cast<llvm::PointerType *>(GetI8PtrTy()); 266 267 ArrayRef<llvm::Type *> params(param_array, 2); 268 269 FunctionType *fun_ty = FunctionType::get( 270 llvm::Type::getVoidTy(m_module.getContext()), params, true); 271 PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty); 272 Constant *fun_addr_int = 273 ConstantInt::get(GetIntptrTy(), start_address, false); 274 return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)}; 275 } 276 277 PointerType *GetI8PtrTy() { 278 if (!m_i8ptr_ty) 279 m_i8ptr_ty = llvm::Type::getInt8PtrTy(m_module.getContext()); 280 281 return m_i8ptr_ty; 282 } 283 284 IntegerType *GetIntptrTy() { 285 if (!m_intptr_ty) { 286 llvm::DataLayout data_layout(&m_module); 287 288 m_intptr_ty = llvm::Type::getIntNTy(m_module.getContext(), 289 data_layout.getPointerSizeInBits()); 290 } 291 292 return m_intptr_ty; 293 } 294 295 typedef std::vector<llvm::Instruction *> InstVector; 296 typedef InstVector::iterator InstIterator; 297 298 InstVector m_to_instrument; ///< List of instructions the inspector found 299 llvm::Module &m_module; ///< The module which is being instrumented 300 std::shared_ptr<UtilityFunction> 301 m_checker_function; ///< The dynamic checker function for the process 302 303 private: 304 PointerType *m_i8ptr_ty; 305 IntegerType *m_intptr_ty; 306 }; 307 308 class ValidPointerChecker : public Instrumenter { 309 public: 310 ValidPointerChecker(llvm::Module &module, 311 std::shared_ptr<UtilityFunction> checker_function) 312 : Instrumenter(module, checker_function), 313 m_valid_pointer_check_func(nullptr) {} 314 315 ~ValidPointerChecker() override = default; 316 317 protected: 318 bool InstrumentInstruction(llvm::Instruction *inst) override { 319 Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS)); 320 321 LLDB_LOGF(log, "Instrumenting load/store instruction: %s\n", 322 PrintValue(inst).c_str()); 323 324 if (!m_valid_pointer_check_func) 325 m_valid_pointer_check_func = 326 BuildPointerValidatorFunc(m_checker_function->StartAddress()); 327 328 llvm::Value *dereferenced_ptr = nullptr; 329 330 if (llvm::LoadInst *li = dyn_cast<llvm::LoadInst>(inst)) 331 dereferenced_ptr = li->getPointerOperand(); 332 else if (llvm::StoreInst *si = dyn_cast<llvm::StoreInst>(inst)) 333 dereferenced_ptr = si->getPointerOperand(); 334 else 335 return false; 336 337 // Insert an instruction to cast the loaded value to int8_t* 338 339 BitCastInst *bit_cast = 340 new BitCastInst(dereferenced_ptr, GetI8PtrTy(), "", inst); 341 342 // Insert an instruction to call the helper with the result 343 344 llvm::Value *arg_array[1]; 345 346 arg_array[0] = bit_cast; 347 348 llvm::ArrayRef<llvm::Value *> args(arg_array, 1); 349 350 CallInst::Create(m_valid_pointer_check_func, args, "", inst); 351 352 return true; 353 } 354 355 bool InspectInstruction(llvm::Instruction &i) override { 356 if (dyn_cast<llvm::LoadInst>(&i) || dyn_cast<llvm::StoreInst>(&i)) 357 RegisterInstruction(i); 358 359 return true; 360 } 361 362 private: 363 llvm::FunctionCallee m_valid_pointer_check_func; 364 }; 365 366 class ObjcObjectChecker : public Instrumenter { 367 public: 368 ObjcObjectChecker(llvm::Module &module, 369 std::shared_ptr<UtilityFunction> checker_function) 370 : Instrumenter(module, checker_function), 371 m_objc_object_check_func(nullptr) {} 372 373 ~ObjcObjectChecker() override = default; 374 375 enum msgSend_type { 376 eMsgSend = 0, 377 eMsgSendSuper, 378 eMsgSendSuper_stret, 379 eMsgSend_fpret, 380 eMsgSend_stret 381 }; 382 383 std::map<llvm::Instruction *, msgSend_type> msgSend_types; 384 385 protected: 386 bool InstrumentInstruction(llvm::Instruction *inst) override { 387 CallInst *call_inst = dyn_cast<CallInst>(inst); 388 389 if (!call_inst) 390 return false; // call_inst really shouldn't be nullptr, because otherwise 391 // InspectInstruction wouldn't have registered it 392 393 if (!m_objc_object_check_func) 394 m_objc_object_check_func = 395 BuildObjectCheckerFunc(m_checker_function->StartAddress()); 396 397 // id objc_msgSend(id theReceiver, SEL theSelector, ...) 398 399 llvm::Value *target_object; 400 llvm::Value *selector; 401 402 switch (msgSend_types[inst]) { 403 case eMsgSend: 404 case eMsgSend_fpret: 405 // On arm64, clang uses objc_msgSend for scalar and struct return 406 // calls. The call instruction will record which was used. 407 if (call_inst->hasStructRetAttr()) { 408 target_object = call_inst->getArgOperand(1); 409 selector = call_inst->getArgOperand(2); 410 } else { 411 target_object = call_inst->getArgOperand(0); 412 selector = call_inst->getArgOperand(1); 413 } 414 break; 415 case eMsgSend_stret: 416 target_object = call_inst->getArgOperand(1); 417 selector = call_inst->getArgOperand(2); 418 break; 419 case eMsgSendSuper: 420 case eMsgSendSuper_stret: 421 return true; 422 } 423 424 // These objects should always be valid according to Sean Calannan 425 assert(target_object); 426 assert(selector); 427 428 // Insert an instruction to cast the receiver id to int8_t* 429 430 BitCastInst *bit_cast = 431 new BitCastInst(target_object, GetI8PtrTy(), "", inst); 432 433 // Insert an instruction to call the helper with the result 434 435 llvm::Value *arg_array[2]; 436 437 arg_array[0] = bit_cast; 438 arg_array[1] = selector; 439 440 ArrayRef<llvm::Value *> args(arg_array, 2); 441 442 CallInst::Create(m_objc_object_check_func, args, "", inst); 443 444 return true; 445 } 446 447 static llvm::Function *GetFunction(llvm::Value *value) { 448 if (llvm::Function *function = llvm::dyn_cast<llvm::Function>(value)) { 449 return function; 450 } 451 452 if (llvm::ConstantExpr *const_expr = 453 llvm::dyn_cast<llvm::ConstantExpr>(value)) { 454 switch (const_expr->getOpcode()) { 455 default: 456 return nullptr; 457 case llvm::Instruction::BitCast: 458 return GetFunction(const_expr->getOperand(0)); 459 } 460 } 461 462 return nullptr; 463 } 464 465 static llvm::Function *GetCalledFunction(llvm::CallInst *inst) { 466 return GetFunction(inst->getCalledOperand()); 467 } 468 469 bool InspectInstruction(llvm::Instruction &i) override { 470 Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS)); 471 472 CallInst *call_inst = dyn_cast<CallInst>(&i); 473 474 if (call_inst) { 475 const llvm::Function *called_function = GetCalledFunction(call_inst); 476 477 if (!called_function) 478 return true; 479 480 std::string name_str = called_function->getName().str(); 481 const char *name_cstr = name_str.c_str(); 482 483 LLDB_LOGF(log, "Found call to %s: %s\n", name_cstr, 484 PrintValue(call_inst).c_str()); 485 486 if (name_str.find("objc_msgSend") == std::string::npos) 487 return true; 488 489 if (!strcmp(name_cstr, "objc_msgSend")) { 490 RegisterInstruction(i); 491 msgSend_types[&i] = eMsgSend; 492 return true; 493 } 494 495 if (!strcmp(name_cstr, "objc_msgSend_stret")) { 496 RegisterInstruction(i); 497 msgSend_types[&i] = eMsgSend_stret; 498 return true; 499 } 500 501 if (!strcmp(name_cstr, "objc_msgSend_fpret")) { 502 RegisterInstruction(i); 503 msgSend_types[&i] = eMsgSend_fpret; 504 return true; 505 } 506 507 if (!strcmp(name_cstr, "objc_msgSendSuper")) { 508 RegisterInstruction(i); 509 msgSend_types[&i] = eMsgSendSuper; 510 return true; 511 } 512 513 if (!strcmp(name_cstr, "objc_msgSendSuper_stret")) { 514 RegisterInstruction(i); 515 msgSend_types[&i] = eMsgSendSuper_stret; 516 return true; 517 } 518 519 LLDB_LOGF(log, 520 "Function name '%s' contains 'objc_msgSend' but is not handled", 521 name_str.c_str()); 522 523 return true; 524 } 525 526 return true; 527 } 528 529 private: 530 llvm::FunctionCallee m_objc_object_check_func; 531 }; 532 533 IRDynamicChecks::IRDynamicChecks( 534 ClangDynamicCheckerFunctions &checker_functions, const char *func_name) 535 : ModulePass(ID), m_func_name(func_name), 536 m_checker_functions(checker_functions) {} 537 538 IRDynamicChecks::~IRDynamicChecks() = default; 539 540 bool IRDynamicChecks::runOnModule(llvm::Module &M) { 541 Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS)); 542 543 llvm::Function *function = M.getFunction(StringRef(m_func_name)); 544 545 if (!function) { 546 LLDB_LOGF(log, "Couldn't find %s() in the module", m_func_name.c_str()); 547 548 return false; 549 } 550 551 if (m_checker_functions.m_valid_pointer_check) { 552 ValidPointerChecker vpc(M, m_checker_functions.m_valid_pointer_check); 553 554 if (!vpc.Inspect(*function)) 555 return false; 556 557 if (!vpc.Instrument()) 558 return false; 559 } 560 561 if (m_checker_functions.m_objc_object_check) { 562 ObjcObjectChecker ooc(M, m_checker_functions.m_objc_object_check); 563 564 if (!ooc.Inspect(*function)) 565 return false; 566 567 if (!ooc.Instrument()) 568 return false; 569 } 570 571 if (log && log->GetVerbose()) { 572 std::string s; 573 raw_string_ostream oss(s); 574 575 M.print(oss, nullptr); 576 577 oss.flush(); 578 579 LLDB_LOGF(log, "Module after dynamic checks: \n%s", s.c_str()); 580 } 581 582 return true; 583 } 584 585 void IRDynamicChecks::assignPassManager(PMStack &PMS, PassManagerType T) {} 586 587 PassManagerType IRDynamicChecks::getPotentialPassManagerType() const { 588 return PMT_ModulePassManager; 589 } 590