1 #ifndef SIMDJSON_FUZZUTILS_H
2 #define SIMDJSON_FUZZUTILS_H
3 
4 #include <cstdint>
5 #include <vector>
6 #include <string_view>
7 #include <cstring> //memcpy
8 
9 // view data as a byte pointer
as_bytes(const T * data)10 template <typename T> inline const std::uint8_t* as_bytes(const T* data) {
11   return static_cast<const std::uint8_t*>(static_cast<const void*>(data));
12 }
13 
14 // view data as a char pointer
as_chars(const T * data)15 template <typename T> inline const char* as_chars(const T* data) {
16   return static_cast<const char*>(static_cast<const void*>(data));
17 }
18 
19 
20 
21 
22 // Splits the input into strings, using a four byte separator which is human
23 // readable. Makes for nicer debugging of fuzz data.
24 // See https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#magic-separator
25 // for background. Note: don't use memmem, it is not standard C++.
split(const char * Data,size_t Size)26 inline std::vector<std::string_view> split(const char* Data, size_t Size) {
27 
28   std::vector<std::string_view> ret;
29 
30     using namespace std::literals;
31     constexpr auto sep="\n~~\n"sv;
32 
33     std::string_view all(Data,Size);
34     auto pos=all.find(sep);
35     while(pos!=std::string_view::npos) {
36       ret.push_back(all.substr(0,pos));
37       all=all.substr(pos+sep.size());
38       pos=all.find(sep);
39     }
40     ret.push_back(all);
41     return ret;
42 }
43 
44 // Generic helper to split fuzz data into usable parts, like ints etc.
45 // Note that it does not throw, instead it sets the data pointer to null
46 // if the input is exhausted.
47 struct FuzzData {
48   // data may not be null, even if size is zero.
FuzzDataFuzzData49   FuzzData(const uint8_t* data,
50            size_t size) : Data(data),Size(size){}
51 
52   ///range is inclusive
53   template<int Min, int Max>
getIntFuzzData54   int getInt() {
55     static_assert (Min<Max,"min must be <max");
56 
57     // make this constexpr, can't overflow because that is UB and is forbidden
58     // in constexpr evaluation
59     constexpr int range=(Max-Min)+1;
60     constexpr unsigned int urange=range;
61 
62     // don't use std::uniform_int_distribution, we don't want to pay for
63     // over consumption of random data. Accept the slightly non-uniform distribution.
64     if(range<256)
65       return Min+static_cast<int>(get<uint8_t>()%urange);
66     if(range<65536)
67       return Min+static_cast<int>(get<uint16_t>()%urange);
68 
69     return Min+static_cast<int>(get<uint32_t>()%urange);
70   }
71 
72   template<typename T>
getFuzzData73   T get() {
74     const auto Nbytes=sizeof(T);
75     T ret{};
76     if(Size<Nbytes) {
77       //don't throw, signal with null instead.
78       Data=nullptr;
79       Size=0;
80       return ret;
81     }
82     std::memcpy(&ret,Data,Nbytes);
83     Data+=Nbytes;
84     Size-=Nbytes;
85     return ret;
86   }
87 
88   // gets a string view with length in [Min,Max]
89   template<int Min, int Max>
get_stringviewFuzzData90   std::string_view get_stringview() {
91     static_assert (Min>=0,"Min must be positive");
92     const int len=getInt<Min,Max>();
93     const unsigned int ulen=static_cast<unsigned int>(len);
94     if(ulen<Size) {
95       std::string_view ret(chardata(),ulen);
96       Data+=len;
97       Size-=ulen;
98       return ret;
99     }
100 
101     //mark that there is too little data to fulfill the request
102     Data=nullptr;
103     Size=0;
104 
105     return {};
106   }
107 
108   // consumes the rest of the data as a string view
remainder_as_stringviewFuzzData109   std::string_view remainder_as_stringview() {
110     std::string_view ret{chardata(),Size};
111     Data+=Size;
112     Size=0;
113     return ret;
114   }
115 
116   // split the remainder of the data into string views,
splitIntoStringsFuzzData117   std::vector<std::string_view> splitIntoStrings() {
118     std::vector<std::string_view> ret;
119     if(Size>0) {
120       ret=split(chardata(),Size);
121       // all data consumed.
122       Data+=Size;
123       Size=0;
124     }
125     return ret;
126   }
127 
128   //are we good?
129   explicit operator bool() const { return Data!=nullptr;}
130 
131   //we are a URBG
132   // https://en.cppreference.com/w/cpp/named_req/UniformRandomBitGenerator
133   //The type G satisfies UniformRandomBitGenerator if    Given
134   //   T, the type named by G::result_type
135   //    g, a value of type G
136   //
137   //  The following expressions must be valid and have their specified effects
138   //  Expression 	Return type 	Requirements
139   //  G::result_type 	T 	T is an unsigned integer type
140   using result_type=uint8_t;
141   //  G::min() 	T 	Returns the smallest value that G's operator() may return. The value is strictly less than G::max(). The function must be constexpr.
minFuzzData142   static constexpr result_type min() {return 0;}
143   //  G::max() 	T 	Returns the largest value that G's operator() may return. The value is strictly greater than G::min(). The function must be constexpr.
maxFuzzData144   static constexpr result_type max() {return 255;}
145   //  g() 	T 	Returns a value in the closed interval [G::min(), G::max()]. Has amortized constant complexity.
operatorFuzzData146   result_type operator()() {
147     if(Size==0) {
148       // return something varying, otherwise uniform_int_distribution may get
149       // stuck
150       return failcount++;
151     }
152     const result_type ret=Data[0];
153     Data++;
154     Size--;
155     return ret;
156   }
157   // returns a pointer to data as const char* to avoid those cstyle casts
chardataFuzzData158   const char* chardata() const {return static_cast<const char*>(static_cast<const void*>(Data));}
159   // members
160   const uint8_t* Data;
161   size_t Size;
162   uint8_t failcount=0;
163 };
164 
165 
166 #endif // SIMDJSON_FUZZUTILS_H
167