1 //===-- IRMutator.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/FuzzMutate/IRMutator.h"
10 #include "llvm/ADT/STLExtras.h"
11 #include "llvm/ADT/SmallSet.h"
12 #include "llvm/Analysis/TargetLibraryInfo.h"
13 #include "llvm/Bitcode/BitcodeReader.h"
14 #include "llvm/Bitcode/BitcodeWriter.h"
15 #include "llvm/FuzzMutate/Operations.h"
16 #include "llvm/FuzzMutate/Random.h"
17 #include "llvm/FuzzMutate/RandomIRBuilder.h"
18 #include "llvm/IR/BasicBlock.h"
19 #include "llvm/IR/FMF.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/InstIterator.h"
22 #include "llvm/IR/Instructions.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/IR/Operator.h"
25 #include "llvm/IR/Verifier.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/SourceMgr.h"
28 #include "llvm/Transforms/Scalar/DCE.h"
29 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
30 #include <map>
31 #include <optional>
32 
33 using namespace llvm;
34 
35 void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
36   auto RS = makeSampler<Function *>(IB.Rand);
37   for (Function &F : M)
38     if (!F.isDeclaration())
39       RS.sample(&F, /*Weight=*/1);
40 
41   while (RS.totalWeight() < IB.MinFunctionNum) {
42     Function *F = IB.createFunctionDefinition(M);
43     RS.sample(F, /*Weight=*/1);
44   }
45   mutate(*RS.getSelection(), IB);
46 }
47 
48 void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
49   auto Range = make_filter_range(make_pointer_range(F),
50                                  [](BasicBlock *BB) { return !BB->isEHPad(); });
51 
52   mutate(*makeSampler(IB.Rand, Range).getSelection(), IB);
53 }
54 
55 void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
56   mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
57 }
58 
59 size_t llvm::IRMutator::getModuleSize(const Module &M) {
60   return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
61 }
62 
63 void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
64   std::vector<Type *> Types;
65   for (const auto &Getter : AllowedTypes)
66     Types.push_back(Getter(M.getContext()));
67   RandomIRBuilder IB(Seed, Types);
68 
69   size_t CurSize = IRMutator::getModuleSize(M);
70   auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
71   for (const auto &Strategy : Strategies)
72     RS.sample(Strategy.get(),
73               Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
74   if (RS.totalWeight() == 0)
75     return;
76   auto Strategy = RS.getSelection();
77 
78   Strategy->mutate(M, IB);
79 }
80 
81 static void eliminateDeadCode(Function &F) {
82   FunctionPassManager FPM;
83   FPM.addPass(DCEPass());
84   FunctionAnalysisManager FAM;
85   FAM.registerPass([&] { return TargetLibraryAnalysis(); });
86   FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
87   FPM.run(F, FAM);
88 }
89 
90 void InjectorIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
91   IRMutationStrategy::mutate(F, IB);
92   eliminateDeadCode(F);
93 }
94 
95 std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
96   std::vector<fuzzerop::OpDescriptor> Ops;
97   describeFuzzerIntOps(Ops);
98   describeFuzzerFloatOps(Ops);
99   describeFuzzerControlFlowOps(Ops);
100   describeFuzzerPointerOps(Ops);
101   describeFuzzerAggregateOps(Ops);
102   describeFuzzerVectorOps(Ops);
103   return Ops;
104 }
105 
106 std::optional<fuzzerop::OpDescriptor>
107 InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
108   auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
109     return Op.SourcePreds[0].matches({}, Src);
110   };
111   auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred));
112   if (RS.isEmpty())
113     return std::nullopt;
114   return *RS;
115 }
116 
117 static inline iterator_range<BasicBlock::iterator>
118 getInsertionRange(BasicBlock &BB) {
119   auto End = BB.getTerminatingMustTailCall() ? std::prev(BB.end()) : BB.end();
120   return make_range(BB.getFirstInsertionPt(), End);
121 }
122 
123 void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
124   SmallVector<Instruction *, 32> Insts;
125   for (Instruction &I : getInsertionRange(BB))
126     Insts.push_back(&I);
127   if (Insts.size() < 1)
128     return;
129 
130   // Choose an insertion point for our new instruction.
131   size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1);
132 
133   auto InstsBefore = ArrayRef(Insts).slice(0, IP);
134   auto InstsAfter = ArrayRef(Insts).slice(IP);
135 
136   // Choose a source, which will be used to constrain the operation selection.
137   SmallVector<Value *, 2> Srcs;
138   Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore));
139 
140   // Choose an operation that's constrained to be valid for the type of the
141   // source, collect any other sources it needs, and then build it.
142   auto OpDesc = chooseOperation(Srcs[0], IB);
143   // Bail if no operation was found
144   if (!OpDesc)
145     return;
146 
147   for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1))
148     Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
149 
150   if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) {
151     // Find a sink and wire up the results of the operation.
152     IB.connectToSink(BB, InstsAfter, Op);
153   }
154 }
155 
156 uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
157                                           uint64_t CurrentWeight) {
158   // If we have less than 200 bytes, panic and try to always delete.
159   if (CurrentSize > MaxSize - 200)
160     return CurrentWeight ? CurrentWeight * 100 : 1;
161   // Draw a line starting from when we only have 1k left and increasing linearly
162   // to double the current weight.
163   int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
164                  (static_cast<int64_t>(MaxSize) -
165                   static_cast<int64_t>(CurrentSize) - 1000) /
166                  1000;
167   // Clamp negative weights to zero.
168   if (Line < 0)
169     return 0;
170   return Line;
171 }
172 
173 void InstDeleterIRStrategy::mutate(Function &F, RandomIRBuilder &IB) {
174   auto RS = makeSampler<Instruction *>(IB.Rand);
175   for (Instruction &Inst : instructions(F)) {
176     // TODO: We can't handle these instructions.
177     if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
178         isa<PHINode>(Inst))
179       continue;
180 
181     RS.sample(&Inst, /*Weight=*/1);
182   }
183   if (RS.isEmpty())
184     return;
185 
186   // Delete the instruction.
187   mutate(*RS.getSelection(), IB);
188   // Clean up any dead code that's left over after removing the instruction.
189   eliminateDeadCode(F);
190 }
191 
192 void InstDeleterIRStrategy::mutate(Instruction &Inst, RandomIRBuilder &IB) {
193   assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
194 
195   if (Inst.getType()->isVoidTy()) {
196     // Instructions with void type (ie, store) have no uses to worry about. Just
197     // erase it and move on.
198     Inst.eraseFromParent();
199     return;
200   }
201 
202   // Otherwise we need to find some other value with the right type to keep the
203   // users happy.
204   auto Pred = fuzzerop::onlyType(Inst.getType());
205   auto RS = makeSampler<Value *>(IB.Rand);
206   SmallVector<Instruction *, 32> InstsBefore;
207   BasicBlock *BB = Inst.getParent();
208   for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
209        ++I) {
210     if (Pred.matches({}, &*I))
211       RS.sample(&*I, /*Weight=*/1);
212     InstsBefore.push_back(&*I);
213   }
214   if (!RS)
215     RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1);
216 
217   Inst.replaceAllUsesWith(RS.getSelection());
218   Inst.eraseFromParent();
219 }
220 
221 void InstModificationIRStrategy::mutate(Instruction &Inst,
222                                         RandomIRBuilder &IB) {
223   SmallVector<std::function<void()>, 8> Modifications;
224   CmpInst *CI = nullptr;
225   GetElementPtrInst *GEP = nullptr;
226   switch (Inst.getOpcode()) {
227   default:
228     break;
229   // Add nsw, nuw flag
230   case Instruction::Add:
231   case Instruction::Mul:
232   case Instruction::Sub:
233   case Instruction::Shl:
234     Modifications.push_back(
235         [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
236     Modifications.push_back(
237         [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
238     break;
239   case Instruction::ICmp:
240     CI = cast<ICmpInst>(&Inst);
241     for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
242          p <= CmpInst::LAST_ICMP_PREDICATE; p++) {
243       Modifications.push_back(
244           [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
245     }
246     break;
247   // Add inbound flag.
248   case Instruction::GetElementPtr:
249     GEP = cast<GetElementPtrInst>(&Inst);
250     Modifications.push_back(
251         [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
252     break;
253   // Add exact flag.
254   case Instruction::UDiv:
255   case Instruction::SDiv:
256   case Instruction::LShr:
257   case Instruction::AShr:
258     Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); });
259     break;
260 
261   case Instruction::FCmp:
262     CI = cast<FCmpInst>(&Inst);
263     for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
264          p <= CmpInst::LAST_FCMP_PREDICATE; p++) {
265       Modifications.push_back(
266           [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
267     }
268     break;
269   }
270 
271   // Add fast math flag if possible.
272   if (isa<FPMathOperator>(&Inst)) {
273     // Try setting everything unless they are already on.
274     Modifications.push_back(
275         [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
276     // Try unsetting everything unless they are already off.
277     Modifications.push_back(
278         [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
279     // Individual setting by flipping the bit
280     Modifications.push_back(
281         [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
282     Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
283     Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
284     Modifications.push_back(
285         [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
286     Modifications.push_back(
287         [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
288     Modifications.push_back(
289         [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
290     Modifications.push_back(
291         [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
292   }
293 
294   // Randomly switch operands of instructions
295   std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
296   switch (Inst.getOpcode()) {
297   case Instruction::SDiv:
298   case Instruction::UDiv:
299   case Instruction::SRem:
300   case Instruction::URem:
301   case Instruction::FDiv:
302   case Instruction::FRem: {
303     // Verify that the after shuffle the second operand is not
304     // constant 0.
305     Value *Operand = Inst.getOperand(0);
306     if (Constant *C = dyn_cast<Constant>(Operand)) {
307       if (!C->isZeroValue()) {
308         ShuffleItems = {0, 1};
309       }
310     }
311     break;
312   }
313   case Instruction::Select:
314     ShuffleItems = {1, 2};
315     break;
316   case Instruction::Add:
317   case Instruction::Sub:
318   case Instruction::Mul:
319   case Instruction::Shl:
320   case Instruction::LShr:
321   case Instruction::AShr:
322   case Instruction::And:
323   case Instruction::Or:
324   case Instruction::Xor:
325   case Instruction::FAdd:
326   case Instruction::FSub:
327   case Instruction::FMul:
328   case Instruction::ICmp:
329   case Instruction::FCmp:
330   case Instruction::ShuffleVector:
331     ShuffleItems = {0, 1};
332     break;
333   }
334   if (ShuffleItems != NoneItem) {
335     Modifications.push_back([&Inst, &ShuffleItems]() {
336       Value *Op0 = Inst.getOperand(ShuffleItems.first);
337       Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second));
338       Inst.setOperand(ShuffleItems.second, Op0);
339     });
340   }
341 
342   auto RS = makeSampler(IB.Rand, Modifications);
343   if (RS)
344     RS.getSelection()();
345 }
346 
347 /// Return a case value that is not already taken to make sure we don't have two
348 /// cases with same value.
349 static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken,
350                                    uint64_t MaxValue, RandomIRBuilder &IB) {
351   uint64_t tmp;
352   do {
353     tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue);
354   } while (CasesTaken.count(tmp) != 0);
355   CasesTaken.insert(tmp);
356   return tmp;
357 }
358 
359 void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
360   Module *M = BB.getParent()->getParent();
361   // If nullptr is selected, we will create a new function declaration.
362   SmallVector<Function *, 32> Functions({nullptr});
363   for (Function &F : M->functions()) {
364     Functions.push_back(&F);
365   }
366 
367   auto RS = makeSampler(IB.Rand, Functions);
368   Function *F = RS.getSelection();
369   // Some functions accept metadata type or token type as arguments.
370   // We don't call those functions for now.
371   // For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
372   // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
373   auto IsUnsupportedTy = [](Type *T) {
374     return T->isMetadataTy() || T->isTokenTy();
375   };
376   if (!F || IsUnsupportedTy(F->getReturnType()) ||
377       any_of(F->getFunctionType()->params(), IsUnsupportedTy)) {
378     F = IB.createFunctionDeclaration(*M);
379   }
380 
381   FunctionType *FTy = F->getFunctionType();
382   SmallVector<fuzzerop::SourcePred, 2> SourcePreds;
383   if (!F->arg_empty()) {
384     for (Type *ArgTy : FTy->params()) {
385       SourcePreds.push_back(fuzzerop::onlyType(ArgTy));
386     }
387   }
388   bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext()));
389   auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
390                                          Instruction *Inst) {
391     StringRef Name = isRetVoid ? nullptr : "C";
392     CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, Inst);
393     // Don't return this call inst if it return void as it can't be sinked.
394     return isRetVoid ? nullptr : Call;
395   };
396 
397   SmallVector<Instruction *, 32> Insts;
398   for (Instruction &I : getInsertionRange(BB))
399     Insts.push_back(&I);
400   if (Insts.size() < 1)
401     return;
402 
403   // Choose an insertion point for our new call instruction.
404   uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
405 
406   auto InstsBefore = ArrayRef(Insts).slice(0, IP);
407   auto InstsAfter = ArrayRef(Insts).slice(IP);
408 
409   // Choose a source, which will be used to constrain the operation selection.
410   SmallVector<Value *, 2> Srcs;
411 
412   for (const auto &Pred : ArrayRef(SourcePreds)) {
413     Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
414   }
415 
416   if (Value *Op = BuilderFunc(Srcs, Insts[IP])) {
417     // Find a sink and wire up the results of the operation.
418     IB.connectToSink(BB, InstsAfter, Op);
419   }
420 }
421 
422 void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
423   SmallVector<Instruction *, 32> Insts;
424   for (Instruction &I : getInsertionRange(BB))
425     Insts.push_back(&I);
426   if (Insts.size() < 1)
427     return;
428 
429   // Choose a point where we split the block.
430   uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
431   auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP);
432 
433   // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
434   // directly jumps to `Sink`. Here, we have to create a new terminator for
435   // `Source`.
436   BasicBlock *Block = Insts[IP]->getParent();
437   BasicBlock *Source = Block;
438   BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB");
439 
440   Function *F = BB.getParent();
441   LLVMContext &C = F->getParent()->getContext();
442   // A coin decides if it is branch or switch
443   if (uniform<uint64_t>(IB.Rand, 0, 1)) {
444     // Branch
445     BasicBlock *IfTrue = BasicBlock::Create(C, "T", F);
446     BasicBlock *IfFalse = BasicBlock::Create(C, "F", F);
447     Value *Cond =
448         IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
449                               fuzzerop::onlyType(Type::getInt1Ty(C)), false);
450     BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
451     // Remove the old terminator.
452     ReplaceInstWithInst(Source->getTerminator(), Branch);
453     // Connect these blocks to `Sink`
454     connectBlocksToSink({IfTrue, IfFalse}, Sink, IB);
455   } else {
456     // Switch
457     // Determine Integer type, it IS possible we use a boolean to switch.
458     auto RS =
459         makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) {
460                       return Ty->isIntegerTy();
461                     }));
462     assert(RS && "There is no integer type in all allowed types, is the "
463                  "setting correct?");
464     Type *Ty = RS.getSelection();
465     IntegerType *IntTy = cast<IntegerType>(Ty);
466 
467     uint64_t BitSize = IntTy->getBitWidth();
468     uint64_t MaxCaseVal =
469         (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
470     // Create Switch inst in Block
471     Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
472                                         fuzzerop::onlyType(IntTy), false);
473     BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F);
474     uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases);
475     NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
476     SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases);
477     // Remove the old terminator.
478     ReplaceInstWithInst(Source->getTerminator(), Switch);
479 
480     // Create blocks, for each block assign a case value.
481     SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
482     SmallSet<uint64_t, 4> CasesTaken;
483     for (uint64_t i = 0; i < NumCases; i++) {
484       uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB);
485       BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F);
486       ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal);
487       Switch->addCase(OnValue, CaseBlock);
488       Blocks.push_back(CaseBlock);
489     }
490 
491     // Connect these blocks to `Sink`
492     connectBlocksToSink(Blocks, Sink, IB);
493   }
494 }
495 
496 /// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
497 /// even have terminator.
498 void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
499                                             BasicBlock *Sink,
500                                             RandomIRBuilder &IB) {
501   uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1);
502   for (uint64_t i = 0; i < Blocks.size(); i++) {
503     // We have at least one block that directly goes to sink.
504     CFGToSink ToSink = (i == DirectSinkIdx)
505                            ? CFGToSink::DirectSink
506                            : static_cast<CFGToSink>(uniform<uint64_t>(
507                                  IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1));
508     BasicBlock *BB = Blocks[i];
509     Function *F = BB->getParent();
510     LLVMContext &C = F->getParent()->getContext();
511     switch (ToSink) {
512     case CFGToSink::Return: {
513       Type *RetTy = F->getReturnType();
514       Value *RetValue = nullptr;
515       if (!RetTy->isVoidTy())
516         RetValue =
517             IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy));
518       ReturnInst::Create(C, RetValue, BB);
519       break;
520     }
521     case CFGToSink::DirectSink: {
522       BranchInst::Create(Sink, BB);
523       break;
524     }
525     case CFGToSink::SinkOrSelfLoop: {
526       SmallVector<BasicBlock *, 2> Branches({Sink, BB});
527       // A coin decides which block is true branch.
528       uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1);
529       Value *Cond = IB.findOrCreateSource(
530           *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false);
531       BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB);
532       break;
533     }
534     case CFGToSink::EndOfCFGToLink:
535       llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
536     }
537   }
538 }
539 
540 void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
541   // Can't insert PHI node to entry node.
542   if (&BB == &BB.getParent()->getEntryBlock())
543     return;
544   Type *Ty = IB.randomType();
545   PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", &BB.front());
546 
547   // Use a map to make sure the same incoming basic block has the same value.
548   DenseMap<BasicBlock *, Value *> IncomingValues;
549   for (BasicBlock *Pred : predecessors(&BB)) {
550     Value *Src = IncomingValues[Pred];
551     // If `Pred` is not in the map yet, we'll get a nullptr.
552     if (!Src) {
553       SmallVector<Instruction *, 32> Insts;
554       for (auto I = Pred->begin(); I != Pred->end(); ++I)
555         Insts.push_back(&*I);
556       // There is no need to inform IB what previously used values are if we are
557       // using `onlyType`
558       Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty));
559       IncomingValues[Pred] = Src;
560     }
561     PHI->addIncoming(Src, Pred);
562   }
563   SmallVector<Instruction *, 32> InstsAfter;
564   for (Instruction &I : getInsertionRange(BB))
565     InstsAfter.push_back(&I);
566   IB.connectToSink(BB, InstsAfter, PHI);
567 }
568 
569 void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) {
570   for (BasicBlock &BB : F) {
571     this->mutate(BB, IB);
572   }
573 }
574 void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
575   SmallVector<Instruction *, 32> Insts;
576   for (Instruction &I : getInsertionRange(BB))
577     Insts.push_back(&I);
578   if (Insts.size() < 1)
579     return;
580   // Choose an Instruction to mutate.
581   uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
582   Instruction *Inst = Insts[Idx];
583   // `Idx + 1` so we don't sink to ourselves.
584   auto InstsAfter = ArrayRef(Insts).slice(Idx + 1);
585   Type *Ty = Inst->getType();
586   // Don't sink terminators, void function calls, token, etc.
587   if (!Ty->isVoidTy() && !Ty->isTokenTy())
588     // Find a new sink and wire up the results of the operation.
589     IB.connectToSink(BB, InstsAfter, Inst);
590 }
591 
592 void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
593   // A deterministic alternative to SmallPtrSet with the same lookup
594   // performance.
595   std::map<size_t, Instruction *> AliveInsts;
596   std::map<Instruction *, size_t> AliveInstsLookup;
597   size_t InsertIdx = 0;
598   for (auto &I : make_early_inc_range(make_range(
599            BB.getFirstInsertionPt(), BB.getTerminator()->getIterator()))) {
600     // First gather all instructions that can be shuffled. Don't take
601     // terminator.
602     AliveInsts.insert({InsertIdx, &I});
603     AliveInstsLookup.insert({&I, InsertIdx++});
604     // Then remove these instructions from the block
605     I.removeFromParent();
606   }
607 
608   // Shuffle these instructions using topological sort.
609   // Returns false if all current instruction's dependencies in this block have
610   // been shuffled. If so, this instruction can be shuffled too.
611   auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
612     for (Value *O : AliveInsts[Index]->operands()) {
613       Instruction *P = dyn_cast<Instruction>(O);
614       if (P && AliveInstsLookup.count(P))
615         return true;
616     }
617     return false;
618   };
619   // Get all alive instructions that depend on the current instruction.
620   // Takes Instruction* instead of index because the instruction is already
621   // shuffled.
622   auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
623     SmallSetVector<size_t, 8> Children;
624     for (Value *U : I->users()) {
625       Instruction *P = dyn_cast<Instruction>(U);
626       if (P && AliveInstsLookup.count(P))
627         Children.insert(AliveInstsLookup[P]);
628     }
629     return Children;
630   };
631   SmallSet<size_t, 8> RootIndices;
632   SmallVector<Instruction *, 8> Insts;
633   for (const auto &[Index, Inst] : AliveInsts) {
634     if (!hasAliveParent(Index))
635       RootIndices.insert(Index);
636   }
637   // Topological sort by randomly selecting a node without a parent, or root.
638   while (!RootIndices.empty()) {
639     auto RS = makeSampler<size_t>(IB.Rand);
640     for (size_t RootIdx : RootIndices)
641       RS.sample(RootIdx, 1);
642     size_t RootIdx = RS.getSelection();
643 
644     RootIndices.erase(RootIdx);
645     Instruction *Root = AliveInsts[RootIdx];
646     AliveInsts.erase(RootIdx);
647     AliveInstsLookup.erase(Root);
648     Insts.push_back(Root);
649 
650     for (size_t Child : getAliveChildren(Root)) {
651       if (!hasAliveParent(Child)) {
652         RootIndices.insert(Child);
653       }
654     }
655   }
656 
657   Instruction *Terminator = BB.getTerminator();
658   // Then put instructions back.
659   for (Instruction *I : Insts) {
660     I->insertBefore(Terminator);
661   }
662 }
663 
664 std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
665                                           LLVMContext &Context) {
666 
667   if (Size <= 1)
668     // We get bogus data given an empty corpus - just create a new module.
669     return std::make_unique<Module>("M", Context);
670 
671   auto Buffer = MemoryBuffer::getMemBuffer(
672       StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
673       /*RequiresNullTerminator=*/false);
674 
675   SMDiagnostic Err;
676   auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
677   if (Error E = M.takeError()) {
678     errs() << toString(std::move(E)) << "\n";
679     return nullptr;
680   }
681   return std::move(M.get());
682 }
683 
684 size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
685   std::string Buf;
686   {
687     raw_string_ostream OS(Buf);
688     WriteBitcodeToFile(M, OS);
689   }
690   if (Buf.size() > MaxSize)
691     return 0;
692   memcpy(Dest, Buf.data(), Buf.size());
693   return Buf.size();
694 }
695 
696 std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
697                                              LLVMContext &Context) {
698   auto M = parseModule(Data, Size, Context);
699   if (!M || verifyModule(*M, &errs()))
700     return nullptr;
701 
702   return M;
703 }
704