1 //===- FuzzerTraceState.cpp - Trace-based fuzzer mutator ------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // Data tracing.
10 //===----------------------------------------------------------------------===//
11 
12 #include "FuzzerDictionary.h"
13 #include "FuzzerInternal.h"
14 #include "FuzzerIO.h"
15 #include "FuzzerMutate.h"
16 #include "FuzzerRandom.h"
17 #include "FuzzerTracePC.h"
18 #include <algorithm>
19 #include <cstring>
20 #include <map>
21 #include <set>
22 #include <thread>
23 
24 namespace fuzzer {
25 
26 // For now, very simple: put Size bytes of Data at position Pos.
27 struct TraceBasedMutation {
28   uint32_t Pos;
29   Word W;
30 };
31 
32 // Declared as static globals for faster checks inside the hooks.
33 static bool RecordingMemcmp = false;
34 static bool RecordingMemmem = false;
35 static bool DoingMyOwnMemmem = false;
36 
ScopedDoingMyOwnMemmem()37 ScopedDoingMyOwnMemmem::ScopedDoingMyOwnMemmem() { DoingMyOwnMemmem = true; }
~ScopedDoingMyOwnMemmem()38 ScopedDoingMyOwnMemmem::~ScopedDoingMyOwnMemmem() { DoingMyOwnMemmem = false; }
39 
40 class TraceState {
41 public:
TraceState(MutationDispatcher & MD,const FuzzingOptions & Options,const Fuzzer * F)42   TraceState(MutationDispatcher &MD, const FuzzingOptions &Options,
43              const Fuzzer *F)
44       : MD(MD), Options(Options), F(F) {}
45 
46   void TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
47                            const uint8_t *Data2);
48 
49   void TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits, uint64_t Val,
50                            size_t NumCases, uint64_t *Cases);
51   int TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
52                           size_t DataSize);
53   int TryToAddDesiredData(const uint8_t *PresentData,
54                           const uint8_t *DesiredData, size_t DataSize);
55 
StartTraceRecording()56   void StartTraceRecording() {
57     if (!Options.UseMemcmp)
58       return;
59     RecordingMemcmp = Options.UseMemcmp;
60     RecordingMemmem = Options.UseMemmem;
61     NumMutations = 0;
62     InterestingWords.clear();
63     MD.ClearAutoDictionary();
64   }
65 
StopTraceRecording()66   void StopTraceRecording() {
67     if (!RecordingMemcmp)
68       return;
69     RecordingMemcmp = false;
70     for (size_t i = 0; i < NumMutations; i++) {
71       auto &M = Mutations[i];
72       if (Options.Verbosity >= 2) {
73         AutoDictUnitCounts[M.W]++;
74         AutoDictAdds++;
75         if ((AutoDictAdds & (AutoDictAdds - 1)) == 0) {
76           typedef std::pair<size_t, Word> CU;
77           std::vector<CU> CountedUnits;
78           for (auto &I : AutoDictUnitCounts)
79             CountedUnits.push_back(std::make_pair(I.second, I.first));
80           std::sort(CountedUnits.begin(), CountedUnits.end(),
81                     [](const CU &a, const CU &b) { return a.first > b.first; });
82           Printf("AutoDict:\n");
83           for (auto &I : CountedUnits) {
84             Printf("   %zd ", I.first);
85             PrintASCII(I.second.data(), I.second.size());
86             Printf("\n");
87           }
88         }
89       }
90       MD.AddWordToAutoDictionary({M.W, M.Pos});
91     }
92     for (auto &W : InterestingWords)
93       MD.AddWordToAutoDictionary({W});
94   }
95 
AddMutation(uint32_t Pos,uint32_t Size,const uint8_t * Data)96   void AddMutation(uint32_t Pos, uint32_t Size, const uint8_t *Data) {
97     if (NumMutations >= kMaxMutations) return;
98     auto &M = Mutations[NumMutations++];
99     M.Pos = Pos;
100     M.W.Set(Data, Size);
101   }
102 
AddMutation(uint32_t Pos,uint32_t Size,uint64_t Data)103   void AddMutation(uint32_t Pos, uint32_t Size, uint64_t Data) {
104     assert(Size <= sizeof(Data));
105     AddMutation(Pos, Size, reinterpret_cast<uint8_t*>(&Data));
106   }
107 
AddInterestingWord(const uint8_t * Data,size_t Size)108   void AddInterestingWord(const uint8_t *Data, size_t Size) {
109     if (!RecordingMemmem || !F->InFuzzingThread()) return;
110     if (Size <= 1) return;
111     Size = std::min(Size, Word::GetMaxSize());
112     Word W(Data, Size);
113     InterestingWords.insert(W);
114   }
115 
116  private:
IsTwoByteData(uint64_t Data)117   bool IsTwoByteData(uint64_t Data) {
118     int64_t Signed = static_cast<int64_t>(Data);
119     Signed >>= 16;
120     return Signed == 0 || Signed == -1L;
121   }
122 
123   // We don't want to create too many trace-based mutations as it is both
124   // expensive and useless. So after some number of mutations is collected,
125   // start rejecting some of them. The more there are mutations the more we
126   // reject.
WantToHandleOneMoreMutation()127   bool WantToHandleOneMoreMutation() {
128     const size_t FirstN = 64;
129     // Gladly handle first N mutations.
130     if (NumMutations <= FirstN) return true;
131     size_t Diff = NumMutations - FirstN;
132     size_t DiffLog = sizeof(long) * 8 - __builtin_clzl((long)Diff);
133     assert(DiffLog > 0 && DiffLog < 64);
134     bool WantThisOne = MD.GetRand()(1 << DiffLog) == 0;  // 1 out of DiffLog.
135     return WantThisOne;
136   }
137 
138   static const size_t kMaxMutations = 1 << 16;
139   size_t NumMutations;
140   TraceBasedMutation Mutations[kMaxMutations];
141   // TODO: std::set is too inefficient, need to have a custom DS here.
142   std::set<Word> InterestingWords;
143   MutationDispatcher &MD;
144   const FuzzingOptions Options;
145   const Fuzzer *F;
146   std::map<Word, size_t> AutoDictUnitCounts;
147   size_t AutoDictAdds = 0;
148 };
149 
TryToAddDesiredData(uint64_t PresentData,uint64_t DesiredData,size_t DataSize)150 int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
151                                     size_t DataSize) {
152   if (NumMutations >= kMaxMutations || !WantToHandleOneMoreMutation()) return 0;
153   ScopedDoingMyOwnMemmem scoped_doing_my_own_memmem;
154   const uint8_t *UnitData;
155   auto UnitSize = F->GetCurrentUnitInFuzzingThead(&UnitData);
156   int Res = 0;
157   const uint8_t *Beg = UnitData;
158   const uint8_t *End = Beg + UnitSize;
159   for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
160     Cur = (uint8_t *)SearchMemory(Cur, End - Cur, &PresentData, DataSize);
161     if (!Cur)
162       break;
163     size_t Pos = Cur - Beg;
164     assert(Pos < UnitSize);
165     AddMutation(Pos, DataSize, DesiredData);
166     AddMutation(Pos, DataSize, DesiredData + 1);
167     AddMutation(Pos, DataSize, DesiredData - 1);
168     Res++;
169   }
170   return Res;
171 }
172 
TryToAddDesiredData(const uint8_t * PresentData,const uint8_t * DesiredData,size_t DataSize)173 int TraceState::TryToAddDesiredData(const uint8_t *PresentData,
174                                     const uint8_t *DesiredData,
175                                     size_t DataSize) {
176   if (NumMutations >= kMaxMutations || !WantToHandleOneMoreMutation()) return 0;
177   ScopedDoingMyOwnMemmem scoped_doing_my_own_memmem;
178   const uint8_t *UnitData;
179   auto UnitSize = F->GetCurrentUnitInFuzzingThead(&UnitData);
180   int Res = 0;
181   const uint8_t *Beg = UnitData;
182   const uint8_t *End = Beg + UnitSize;
183   for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
184     Cur = (uint8_t *)SearchMemory(Cur, End - Cur, PresentData, DataSize);
185     if (!Cur)
186       break;
187     size_t Pos = Cur - Beg;
188     assert(Pos < UnitSize);
189     AddMutation(Pos, DataSize, DesiredData);
190     Res++;
191   }
192   return Res;
193 }
194 
TraceMemcmpCallback(size_t CmpSize,const uint8_t * Data1,const uint8_t * Data2)195 void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
196                                      const uint8_t *Data2) {
197   if (!RecordingMemcmp || !F->InFuzzingThread()) return;
198   CmpSize = std::min(CmpSize, Word::GetMaxSize());
199   int Added2 = TryToAddDesiredData(Data1, Data2, CmpSize);
200   int Added1 = TryToAddDesiredData(Data2, Data1, CmpSize);
201   if ((Added1 || Added2) && Options.Verbosity >= 3) {
202     Printf("MemCmp Added %d%d: ", Added1, Added2);
203     if (Added1) PrintASCII(Data1, CmpSize);
204     if (Added2) PrintASCII(Data2, CmpSize);
205     Printf("\n");
206   }
207 }
208 
TraceSwitchCallback(uintptr_t PC,size_t ValSizeInBits,uint64_t Val,size_t NumCases,uint64_t * Cases)209 void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits,
210                                      uint64_t Val, size_t NumCases,
211                                      uint64_t *Cases) {
212   if (F->InFuzzingThread()) return;
213   size_t ValSize = ValSizeInBits / 8;
214   bool TryShort = IsTwoByteData(Val);
215   for (size_t i = 0; i < NumCases; i++)
216     TryShort &= IsTwoByteData(Cases[i]);
217 
218   if (Options.Verbosity >= 3)
219     Printf("TraceSwitch: %p %zd # %zd; TryShort %d\n", PC, Val, NumCases,
220            TryShort);
221 
222   for (size_t i = 0; i < NumCases; i++) {
223     TryToAddDesiredData(Val, Cases[i], ValSize);
224     if (TryShort)
225       TryToAddDesiredData(Val, Cases[i], 2);
226   }
227 }
228 
229 static TraceState *TS;
230 
StartTraceRecording()231 void Fuzzer::StartTraceRecording() {
232   if (!TS) return;
233   TS->StartTraceRecording();
234 }
235 
StopTraceRecording()236 void Fuzzer::StopTraceRecording() {
237   if (!TS) return;
238   TS->StopTraceRecording();
239 }
240 
InitializeTraceState()241 void Fuzzer::InitializeTraceState() {
242   if (!Options.UseMemcmp) return;
243   TS = new TraceState(MD, Options, this);
244 }
245 
InternalStrnlen(const char * S,size_t MaxLen)246 static size_t InternalStrnlen(const char *S, size_t MaxLen) {
247   size_t Len = 0;
248   for (; Len < MaxLen && S[Len]; Len++) {}
249   return Len;
250 }
251 
252 }  // namespace fuzzer
253 
254 using fuzzer::TS;
255 using fuzzer::RecordingMemcmp;
256 
257 extern "C" {
258 
259 // We may need to avoid defining weak hooks to stay compatible with older clang.
260 #ifndef LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
261 # define LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS 1
262 #endif
263 
264 #if LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
__sanitizer_weak_hook_memcmp(void * caller_pc,const void * s1,const void * s2,size_t n,int result)265 void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
266                                   const void *s2, size_t n, int result) {
267   fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n);
268   if (!RecordingMemcmp) return;
269   if (result == 0) return;  // No reason to mutate.
270   if (n <= 1) return;  // Not interesting.
271   TS->TraceMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
272                           reinterpret_cast<const uint8_t *>(s2));
273 }
274 
__sanitizer_weak_hook_strncmp(void * caller_pc,const char * s1,const char * s2,size_t n,int result)275 void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
276                                    const char *s2, size_t n, int result) {
277   fuzzer::TPC.AddValueForStrcmp(caller_pc, s1, s2, n);
278   if (!RecordingMemcmp) return;
279   if (result == 0) return;  // No reason to mutate.
280   size_t Len1 = fuzzer::InternalStrnlen(s1, n);
281   size_t Len2 = fuzzer::InternalStrnlen(s2, n);
282   n = std::min(n, Len1);
283   n = std::min(n, Len2);
284   if (n <= 1) return;  // Not interesting.
285   TS->TraceMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
286                           reinterpret_cast<const uint8_t *>(s2));
287 }
288 
__sanitizer_weak_hook_strcmp(void * caller_pc,const char * s1,const char * s2,int result)289 void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
290                                    const char *s2, int result) {
291   fuzzer::TPC.AddValueForStrcmp(caller_pc, s1, s2, 64);
292   if (!RecordingMemcmp) return;
293   if (result == 0) return;  // No reason to mutate.
294   size_t Len1 = strlen(s1);
295   size_t Len2 = strlen(s2);
296   size_t N = std::min(Len1, Len2);
297   if (N <= 1) return;  // Not interesting.
298   TS->TraceMemcmpCallback(N, reinterpret_cast<const uint8_t *>(s1),
299                           reinterpret_cast<const uint8_t *>(s2));
300 }
301 
__sanitizer_weak_hook_strncasecmp(void * called_pc,const char * s1,const char * s2,size_t n,int result)302 void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1,
303                                        const char *s2, size_t n, int result) {
304   return __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result);
305 }
__sanitizer_weak_hook_strcasecmp(void * called_pc,const char * s1,const char * s2,int result)306 void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1,
307                                       const char *s2, int result) {
308   return __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result);
309 }
__sanitizer_weak_hook_strstr(void * called_pc,const char * s1,const char * s2,char * result)310 void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1,
311                                   const char *s2, char *result) {
312   TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
313 }
__sanitizer_weak_hook_strcasestr(void * called_pc,const char * s1,const char * s2,char * result)314 void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1,
315                                       const char *s2, char *result) {
316   TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
317 }
__sanitizer_weak_hook_memmem(void * called_pc,const void * s1,size_t len1,const void * s2,size_t len2,void * result)318 void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1,
319                                   const void *s2, size_t len2, void *result) {
320   if (fuzzer::DoingMyOwnMemmem) return;
321   TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), len2);
322 }
323 
324 #endif  // LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
325 }  // extern "C"
326