1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <vector>
7 
8 #include "base/logging.h"
9 #include "base/numerics/safe_math.h"
10 #include "base/strings/stringprintf.h"
11 #include "build/build_config.h"
12 #include "components/safe_browsing/core/db/v4_rice.h"
13 
14 #if defined(OS_WIN)
15 #include <winsock2.h>
16 #elif defined(OS_POSIX) || defined(OS_FUCHSIA)
17 #include <arpa/inet.h>
18 #endif
19 
20 using ::google::protobuf::RepeatedField;
21 using ::google::protobuf::int32;
22 using ::google::protobuf::int64;
23 
24 #if !defined(ARCH_CPU_LITTLE_ENDIAN) || (ARCH_CPU_LITTLE_ENDIAN != 1)
25 #error The code below assumes little-endianness.
26 #endif
27 
28 namespace safe_browsing {
29 
30 namespace {
31 
32 const int kBitsPerByte = 8;
33 const unsigned int kMaxBitIndex = kBitsPerByte * sizeof(uint32_t);
34 
35 }  // namespace
36 
37 // static
ValidateInput(const int32 rice_parameter,const int32 num_entries,const std::string & encoded_data)38 V4DecodeResult V4RiceDecoder::ValidateInput(const int32 rice_parameter,
39                                             const int32 num_entries,
40                                             const std::string& encoded_data) {
41   if (num_entries < 0) {
42     NOTREACHED();
43     return NUM_ENTRIES_NEGATIVE_FAILURE;
44   }
45 
46   if (num_entries == 0) {
47     return DECODE_SUCCESS;
48   }
49 
50   if (rice_parameter <= 0) {
51     NOTREACHED();
52     return RICE_PARAMETER_NON_POSITIVE_FAILURE;
53   }
54 
55   if (encoded_data.empty()) {
56     NOTREACHED();
57     return ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE;
58   }
59 
60   return DECODE_SUCCESS;
61 }
62 
63 // static
DecodeIntegers(const int64 first_value,const int32 rice_parameter,const int32 num_entries,const std::string & encoded_data,RepeatedField<int32> * out)64 V4DecodeResult V4RiceDecoder::DecodeIntegers(const int64 first_value,
65                                              const int32 rice_parameter,
66                                              const int32 num_entries,
67                                              const std::string& encoded_data,
68                                              RepeatedField<int32>* out) {
69   DCHECK(out);
70 
71   V4DecodeResult result =
72       ValidateInput(rice_parameter, num_entries, encoded_data);
73   if (result != DECODE_SUCCESS) {
74     return result;
75   }
76 
77   out->Reserve(num_entries + 1);
78   base::CheckedNumeric<int32> last_value(first_value);
79   out->Add(last_value.ValueOrDie());
80   if (num_entries == 0) {
81     return DECODE_SUCCESS;
82   }
83 
84   V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);
85   while (decoder.HasAnotherValue()) {
86     uint32_t offset;
87     result = decoder.GetNextValue(&offset);
88     if (result != DECODE_SUCCESS) {
89       return result;
90     }
91 
92     last_value += offset;
93     if (!last_value.IsValid()) {
94       NOTREACHED();
95       return DECODED_INTEGER_OVERFLOW_FAILURE;
96     }
97 
98     out->Add(last_value.ValueOrDie());
99   }
100 
101   return DECODE_SUCCESS;
102 }
103 
104 // static
DecodePrefixes(const int64 first_value,const int32 rice_parameter,const int32 num_entries,const std::string & encoded_data,std::vector<uint32_t> * out)105 V4DecodeResult V4RiceDecoder::DecodePrefixes(const int64 first_value,
106                                              const int32 rice_parameter,
107                                              const int32 num_entries,
108                                              const std::string& encoded_data,
109                                              std::vector<uint32_t>* out) {
110   DCHECK(out);
111 
112   V4DecodeResult result =
113       ValidateInput(rice_parameter, num_entries, encoded_data);
114   if (result != DECODE_SUCCESS) {
115     return result;
116   }
117   out->reserve((num_entries + 1));
118 
119   base::CheckedNumeric<uint32_t> last_value(first_value);
120   out->push_back(htonl(last_value.ValueOrDie()));
121 
122   if (num_entries > 0) {
123     V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);
124     while (decoder.HasAnotherValue()) {
125       uint32_t offset;
126       result = decoder.GetNextValue(&offset);
127       if (result != DECODE_SUCCESS) {
128         return result;
129       }
130 
131       last_value += offset;
132       if (!last_value.IsValid()) {
133         NOTREACHED();
134         return DECODED_INTEGER_OVERFLOW_FAILURE;
135       }
136 
137       // This flipping is done so that the decoded uint32 is interpreted
138       // correcly as a string of 4 bytes.
139       out->push_back(htonl(last_value.ValueOrDie()));
140     }
141   }
142 
143   // Flipping the bytes, as done above, destroys the sort order. Sort the
144   // values back.
145   std::sort(out->begin(), out->end());
146 
147   // This flipping is done so that when the vector is interpreted as a string,
148   // the bytes are in the correct order.
149   for (size_t i = 0; i < out->size(); i++) {
150     (*out)[i] = ntohl((*out)[i]);
151   }
152 
153   return DECODE_SUCCESS;
154 }
155 
V4RiceDecoder(const int rice_parameter,const int num_entries,const std::string & encoded_data)156 V4RiceDecoder::V4RiceDecoder(const int rice_parameter,
157                              const int num_entries,
158                              const std::string& encoded_data)
159     : rice_parameter_(rice_parameter),
160       num_entries_(num_entries),
161       data_(encoded_data),
162       current_word_(0) {
163   DCHECK_LE(0, num_entries_);
164   DCHECK_LE(2u, rice_parameter_);
165   DCHECK_GE(28u, rice_parameter_);
166 
167   data_byte_index_ = 0;
168   current_word_bit_index_ = kMaxBitIndex;
169 }
170 
~V4RiceDecoder()171 V4RiceDecoder::~V4RiceDecoder() {}
172 
HasAnotherValue() const173 bool V4RiceDecoder::HasAnotherValue() const {
174   return num_entries_ > 0;
175 }
176 
GetNextValue(uint32_t * value)177 V4DecodeResult V4RiceDecoder::GetNextValue(uint32_t* value) {
178   if (!HasAnotherValue()) {
179     return DECODE_NO_MORE_ENTRIES_FAILURE;
180   }
181 
182   V4DecodeResult result;
183   uint32_t q = 0;
184   uint32_t bit;
185   do {
186     result = GetNextBits(1, &bit);
187     if (result != DECODE_SUCCESS) {
188       return result;
189     }
190     q += bit;
191   } while (bit);
192   uint32_t r = 0;
193   result = GetNextBits(rice_parameter_, &r);
194   if (result != DECODE_SUCCESS) {
195     return result;
196   }
197 
198   *value = (q << rice_parameter_) + r;
199   num_entries_--;
200   return DECODE_SUCCESS;
201 }
202 
GetNextWord(uint32_t * word)203 V4DecodeResult V4RiceDecoder::GetNextWord(uint32_t* word) {
204   if (data_byte_index_ >= data_.size()) {
205     return DECODE_RAN_OUT_OF_BITS_FAILURE;
206   }
207 
208   const size_t mask = 0xFF;
209   *word = (data_[data_byte_index_] & mask);
210   data_byte_index_++;
211   current_word_bit_index_ = 0;
212 
213   if (data_byte_index_ < data_.size()) {
214     *word |= ((data_[data_byte_index_] & mask) << 8);
215     data_byte_index_++;
216 
217     if (data_byte_index_ < data_.size()) {
218       *word |= ((data_[data_byte_index_] & mask) << 16);
219       data_byte_index_++;
220 
221       if (data_byte_index_ < data_.size()) {
222         *word |= ((data_[data_byte_index_] & mask) << 24);
223         data_byte_index_++;
224       }
225     }
226   }
227 
228   return DECODE_SUCCESS;
229 }
230 
GetNextBits(unsigned int num_requested_bits,uint32_t * x)231 V4DecodeResult V4RiceDecoder::GetNextBits(unsigned int num_requested_bits,
232                                           uint32_t* x) {
233   if (num_requested_bits > kMaxBitIndex) {
234     NOTREACHED();
235     return DECODE_REQUESTED_TOO_MANY_BITS_FAILURE;
236   }
237 
238   if (current_word_bit_index_ == kMaxBitIndex) {
239     V4DecodeResult result = GetNextWord(&current_word_);
240     if (result != DECODE_SUCCESS) {
241       return result;
242     }
243   }
244 
245   unsigned int num_bits_left_in_current_word =
246       kMaxBitIndex - current_word_bit_index_;
247   if (num_bits_left_in_current_word >= num_requested_bits) {
248     // All the bits that we need are in |current_word_|.
249     *x = GetBitsFromCurrentWord(num_requested_bits);
250   } else {
251     // |current_word_| contains fewer bits than we need so read the remaining
252     // bits from |current_word_| into |lower|, and then call GetNextBits on the
253     // remaining number of bits, which will read in a new word into
254     // |current_word_|.
255     uint32_t lower = GetBitsFromCurrentWord(num_bits_left_in_current_word);
256 
257     unsigned int num_bits_from_next_word =
258         num_requested_bits - num_bits_left_in_current_word;
259     uint32_t upper;
260     V4DecodeResult result = GetNextBits(num_bits_from_next_word, &upper);
261     if (result != DECODE_SUCCESS) {
262       return result;
263     }
264     *x = (upper << num_bits_left_in_current_word) | lower;
265   }
266   return DECODE_SUCCESS;
267 }
268 
GetBitsFromCurrentWord(unsigned int num_requested_bits)269 uint32_t V4RiceDecoder::GetBitsFromCurrentWord(
270     unsigned int num_requested_bits) {
271   uint32_t mask = 0xFFFFFFFF >> (kMaxBitIndex - num_requested_bits);
272   uint32_t x = current_word_ & mask;
273   current_word_ = current_word_ >> num_requested_bits;
274   current_word_bit_index_ += num_requested_bits;
275   return x;
276 }
277 
DebugString() const278 std::string V4RiceDecoder::DebugString() const {
279   // Calculates the total number of bits that we have read from the buffer,
280   // excluding those that have been read into current_word_ but not yet
281   // consumed byt GetNextBits().
282   unsigned bits_read = (data_byte_index_ - sizeof(uint32_t)) * kBitsPerByte +
283                        current_word_bit_index_;
284   return base::StringPrintf(
285       "bits_read: %x; current_word_: %x; data_byte_index_; %x, "
286       "current_word_bit_index_: %x; rice_parameter_: %x",
287       bits_read, current_word_, data_byte_index_, current_word_bit_index_,
288       rice_parameter_);
289 }
290 
operator <<(std::ostream & os,const V4RiceDecoder & rice_decoder)291 std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder) {
292   os << rice_decoder.DebugString();
293   return os;
294 }
295 
296 }  // namespace safe_browsing
297