1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <vector>
7
8 #include "base/logging.h"
9 #include "base/numerics/safe_math.h"
10 #include "base/strings/stringprintf.h"
11 #include "build/build_config.h"
12 #include "components/safe_browsing/core/db/v4_rice.h"
13
14 #if defined(OS_WIN)
15 #include <winsock2.h>
16 #elif defined(OS_POSIX) || defined(OS_FUCHSIA)
17 #include <arpa/inet.h>
18 #endif
19
20 using ::google::protobuf::RepeatedField;
21 using ::google::protobuf::int32;
22 using ::google::protobuf::int64;
23
24 #if !defined(ARCH_CPU_LITTLE_ENDIAN) || (ARCH_CPU_LITTLE_ENDIAN != 1)
25 #error The code below assumes little-endianness.
26 #endif
27
28 namespace safe_browsing {
29
30 namespace {
31
32 const int kBitsPerByte = 8;
33 const unsigned int kMaxBitIndex = kBitsPerByte * sizeof(uint32_t);
34
35 } // namespace
36
37 // static
ValidateInput(const int32 rice_parameter,const int32 num_entries,const std::string & encoded_data)38 V4DecodeResult V4RiceDecoder::ValidateInput(const int32 rice_parameter,
39 const int32 num_entries,
40 const std::string& encoded_data) {
41 if (num_entries < 0) {
42 NOTREACHED();
43 return NUM_ENTRIES_NEGATIVE_FAILURE;
44 }
45
46 if (num_entries == 0) {
47 return DECODE_SUCCESS;
48 }
49
50 if (rice_parameter <= 0) {
51 NOTREACHED();
52 return RICE_PARAMETER_NON_POSITIVE_FAILURE;
53 }
54
55 if (encoded_data.empty()) {
56 NOTREACHED();
57 return ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE;
58 }
59
60 return DECODE_SUCCESS;
61 }
62
63 // static
DecodeIntegers(const int64 first_value,const int32 rice_parameter,const int32 num_entries,const std::string & encoded_data,RepeatedField<int32> * out)64 V4DecodeResult V4RiceDecoder::DecodeIntegers(const int64 first_value,
65 const int32 rice_parameter,
66 const int32 num_entries,
67 const std::string& encoded_data,
68 RepeatedField<int32>* out) {
69 DCHECK(out);
70
71 V4DecodeResult result =
72 ValidateInput(rice_parameter, num_entries, encoded_data);
73 if (result != DECODE_SUCCESS) {
74 return result;
75 }
76
77 out->Reserve(num_entries + 1);
78 base::CheckedNumeric<int32> last_value(first_value);
79 out->Add(last_value.ValueOrDie());
80 if (num_entries == 0) {
81 return DECODE_SUCCESS;
82 }
83
84 V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);
85 while (decoder.HasAnotherValue()) {
86 uint32_t offset;
87 result = decoder.GetNextValue(&offset);
88 if (result != DECODE_SUCCESS) {
89 return result;
90 }
91
92 last_value += offset;
93 if (!last_value.IsValid()) {
94 NOTREACHED();
95 return DECODED_INTEGER_OVERFLOW_FAILURE;
96 }
97
98 out->Add(last_value.ValueOrDie());
99 }
100
101 return DECODE_SUCCESS;
102 }
103
104 // static
DecodePrefixes(const int64 first_value,const int32 rice_parameter,const int32 num_entries,const std::string & encoded_data,std::vector<uint32_t> * out)105 V4DecodeResult V4RiceDecoder::DecodePrefixes(const int64 first_value,
106 const int32 rice_parameter,
107 const int32 num_entries,
108 const std::string& encoded_data,
109 std::vector<uint32_t>* out) {
110 DCHECK(out);
111
112 V4DecodeResult result =
113 ValidateInput(rice_parameter, num_entries, encoded_data);
114 if (result != DECODE_SUCCESS) {
115 return result;
116 }
117 out->reserve((num_entries + 1));
118
119 base::CheckedNumeric<uint32_t> last_value(first_value);
120 out->push_back(htonl(last_value.ValueOrDie()));
121
122 if (num_entries > 0) {
123 V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);
124 while (decoder.HasAnotherValue()) {
125 uint32_t offset;
126 result = decoder.GetNextValue(&offset);
127 if (result != DECODE_SUCCESS) {
128 return result;
129 }
130
131 last_value += offset;
132 if (!last_value.IsValid()) {
133 NOTREACHED();
134 return DECODED_INTEGER_OVERFLOW_FAILURE;
135 }
136
137 // This flipping is done so that the decoded uint32 is interpreted
138 // correcly as a string of 4 bytes.
139 out->push_back(htonl(last_value.ValueOrDie()));
140 }
141 }
142
143 // Flipping the bytes, as done above, destroys the sort order. Sort the
144 // values back.
145 std::sort(out->begin(), out->end());
146
147 // This flipping is done so that when the vector is interpreted as a string,
148 // the bytes are in the correct order.
149 for (size_t i = 0; i < out->size(); i++) {
150 (*out)[i] = ntohl((*out)[i]);
151 }
152
153 return DECODE_SUCCESS;
154 }
155
V4RiceDecoder(const int rice_parameter,const int num_entries,const std::string & encoded_data)156 V4RiceDecoder::V4RiceDecoder(const int rice_parameter,
157 const int num_entries,
158 const std::string& encoded_data)
159 : rice_parameter_(rice_parameter),
160 num_entries_(num_entries),
161 data_(encoded_data),
162 current_word_(0) {
163 DCHECK_LE(0, num_entries_);
164 DCHECK_LE(2u, rice_parameter_);
165 DCHECK_GE(28u, rice_parameter_);
166
167 data_byte_index_ = 0;
168 current_word_bit_index_ = kMaxBitIndex;
169 }
170
~V4RiceDecoder()171 V4RiceDecoder::~V4RiceDecoder() {}
172
HasAnotherValue() const173 bool V4RiceDecoder::HasAnotherValue() const {
174 return num_entries_ > 0;
175 }
176
GetNextValue(uint32_t * value)177 V4DecodeResult V4RiceDecoder::GetNextValue(uint32_t* value) {
178 if (!HasAnotherValue()) {
179 return DECODE_NO_MORE_ENTRIES_FAILURE;
180 }
181
182 V4DecodeResult result;
183 uint32_t q = 0;
184 uint32_t bit;
185 do {
186 result = GetNextBits(1, &bit);
187 if (result != DECODE_SUCCESS) {
188 return result;
189 }
190 q += bit;
191 } while (bit);
192 uint32_t r = 0;
193 result = GetNextBits(rice_parameter_, &r);
194 if (result != DECODE_SUCCESS) {
195 return result;
196 }
197
198 *value = (q << rice_parameter_) + r;
199 num_entries_--;
200 return DECODE_SUCCESS;
201 }
202
GetNextWord(uint32_t * word)203 V4DecodeResult V4RiceDecoder::GetNextWord(uint32_t* word) {
204 if (data_byte_index_ >= data_.size()) {
205 return DECODE_RAN_OUT_OF_BITS_FAILURE;
206 }
207
208 const size_t mask = 0xFF;
209 *word = (data_[data_byte_index_] & mask);
210 data_byte_index_++;
211 current_word_bit_index_ = 0;
212
213 if (data_byte_index_ < data_.size()) {
214 *word |= ((data_[data_byte_index_] & mask) << 8);
215 data_byte_index_++;
216
217 if (data_byte_index_ < data_.size()) {
218 *word |= ((data_[data_byte_index_] & mask) << 16);
219 data_byte_index_++;
220
221 if (data_byte_index_ < data_.size()) {
222 *word |= ((data_[data_byte_index_] & mask) << 24);
223 data_byte_index_++;
224 }
225 }
226 }
227
228 return DECODE_SUCCESS;
229 }
230
GetNextBits(unsigned int num_requested_bits,uint32_t * x)231 V4DecodeResult V4RiceDecoder::GetNextBits(unsigned int num_requested_bits,
232 uint32_t* x) {
233 if (num_requested_bits > kMaxBitIndex) {
234 NOTREACHED();
235 return DECODE_REQUESTED_TOO_MANY_BITS_FAILURE;
236 }
237
238 if (current_word_bit_index_ == kMaxBitIndex) {
239 V4DecodeResult result = GetNextWord(¤t_word_);
240 if (result != DECODE_SUCCESS) {
241 return result;
242 }
243 }
244
245 unsigned int num_bits_left_in_current_word =
246 kMaxBitIndex - current_word_bit_index_;
247 if (num_bits_left_in_current_word >= num_requested_bits) {
248 // All the bits that we need are in |current_word_|.
249 *x = GetBitsFromCurrentWord(num_requested_bits);
250 } else {
251 // |current_word_| contains fewer bits than we need so read the remaining
252 // bits from |current_word_| into |lower|, and then call GetNextBits on the
253 // remaining number of bits, which will read in a new word into
254 // |current_word_|.
255 uint32_t lower = GetBitsFromCurrentWord(num_bits_left_in_current_word);
256
257 unsigned int num_bits_from_next_word =
258 num_requested_bits - num_bits_left_in_current_word;
259 uint32_t upper;
260 V4DecodeResult result = GetNextBits(num_bits_from_next_word, &upper);
261 if (result != DECODE_SUCCESS) {
262 return result;
263 }
264 *x = (upper << num_bits_left_in_current_word) | lower;
265 }
266 return DECODE_SUCCESS;
267 }
268
GetBitsFromCurrentWord(unsigned int num_requested_bits)269 uint32_t V4RiceDecoder::GetBitsFromCurrentWord(
270 unsigned int num_requested_bits) {
271 uint32_t mask = 0xFFFFFFFF >> (kMaxBitIndex - num_requested_bits);
272 uint32_t x = current_word_ & mask;
273 current_word_ = current_word_ >> num_requested_bits;
274 current_word_bit_index_ += num_requested_bits;
275 return x;
276 }
277
DebugString() const278 std::string V4RiceDecoder::DebugString() const {
279 // Calculates the total number of bits that we have read from the buffer,
280 // excluding those that have been read into current_word_ but not yet
281 // consumed byt GetNextBits().
282 unsigned bits_read = (data_byte_index_ - sizeof(uint32_t)) * kBitsPerByte +
283 current_word_bit_index_;
284 return base::StringPrintf(
285 "bits_read: %x; current_word_: %x; data_byte_index_; %x, "
286 "current_word_bit_index_: %x; rice_parameter_: %x",
287 bits_read, current_word_, data_byte_index_, current_word_bit_index_,
288 rice_parameter_);
289 }
290
operator <<(std::ostream & os,const V4RiceDecoder & rice_decoder)291 std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder) {
292 os << rice_decoder.DebugString();
293 return os;
294 }
295
296 } // namespace safe_browsing
297