1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/cbor/reader.h"
6 
7 #include <math.h>
8 
9 #include <utility>
10 
11 #include "base/numerics/checked_math.h"
12 #include "base/numerics/safe_conversions.h"
13 #include "base/stl_util.h"
14 #include "base/strings/string_util.h"
15 #include "components/cbor/constants.h"
16 
17 namespace cbor {
18 
19 namespace constants {
20 const char kUnsupportedMajorType[] = "Unsupported major type.";
21 }
22 
23 namespace {
24 
GetMajorType(uint8_t initial_data_byte)25 Value::Type GetMajorType(uint8_t initial_data_byte) {
26   return static_cast<Value::Type>(
27       (initial_data_byte & constants::kMajorTypeMask) >>
28       constants::kMajorTypeBitShift);
29 }
30 
GetAdditionalInfo(uint8_t initial_data_byte)31 uint8_t GetAdditionalInfo(uint8_t initial_data_byte) {
32   return initial_data_byte & constants::kAdditionalInformationMask;
33 }
34 
35 // Error messages that correspond to each of the error codes. There is 1
36 // exception: we declare |kUnsupportedMajorType| in constants.h in the
37 // `constants` namespace, because we use it in several files.
38 const char kNoError[] = "Successfully deserialized to a CBOR value.";
39 const char kUnknownAdditionalInfo[] =
40     "Unknown additional info format in the first byte.";
41 const char kIncompleteCBORData[] =
42     "Prematurely terminated CBOR data byte array.";
43 const char kIncorrectMapKeyType[] =
44     "Specified map key type is not supported by the current implementation.";
45 const char kTooMuchNesting[] = "Too much nesting.";
46 const char kInvalidUTF8[] =
47     "String encodings other than UTF-8 are not allowed.";
48 const char kExtraneousData[] = "Trailing data bytes are not allowed.";
49 const char kMapKeyOutOfOrder[] =
50     "Map keys must be strictly monotonically increasing based on byte length "
51     "and then by byte-wise lexical order.";
52 const char kNonMinimalCBOREncoding[] =
53     "Unsigned integers must be encoded with minimum number of bytes.";
54 const char kUnsupportedSimpleValue[] =
55     "Unsupported or unassigned simple value.";
56 const char kUnsupportedFloatingPointValue[] =
57     "Floating point numbers are not supported.";
58 const char kOutOfRangeIntegerValue[] =
59     "Integer values must be between INT64_MIN and INT64_MAX.";
60 const char kUnknownError[] = "An unknown error occured.";
61 
62 }  // namespace
63 
64 Reader::Config::Config() = default;
65 Reader::Config::~Config() = default;
66 
Reader(base::span<const uint8_t> data)67 Reader::Reader(base::span<const uint8_t> data)
68     : rest_(data), error_code_(DecoderError::CBOR_NO_ERROR) {}
~Reader()69 Reader::~Reader() {}
70 
71 // static
Read(base::span<uint8_t const> data,DecoderError * error_code_out,int max_nesting_level)72 base::Optional<Value> Reader::Read(base::span<uint8_t const> data,
73                                    DecoderError* error_code_out,
74                                    int max_nesting_level) {
75   Config config;
76   config.error_code_out = error_code_out;
77   config.max_nesting_level = max_nesting_level;
78 
79   return Read(data, config);
80 }
81 
82 // static
Read(base::span<uint8_t const> data,size_t * num_bytes_consumed,DecoderError * error_code_out,int max_nesting_level)83 base::Optional<Value> Reader::Read(base::span<uint8_t const> data,
84                                    size_t* num_bytes_consumed,
85                                    DecoderError* error_code_out,
86                                    int max_nesting_level) {
87   DCHECK(num_bytes_consumed);
88 
89   Config config;
90   config.num_bytes_consumed = num_bytes_consumed;
91   config.error_code_out = error_code_out;
92   config.max_nesting_level = max_nesting_level;
93 
94   return Read(data, config);
95 }
96 
97 // static
Read(base::span<uint8_t const> data,const Config & config)98 base::Optional<Value> Reader::Read(base::span<uint8_t const> data,
99                                    const Config& config) {
100   Reader reader(data);
101   base::Optional<Value> value =
102       reader.DecodeCompleteDataItem(config, config.max_nesting_level);
103 
104   auto error = reader.GetErrorCode();
105   const bool success = value.has_value();
106   DCHECK_EQ(success, error == DecoderError::CBOR_NO_ERROR);
107 
108   if (config.num_bytes_consumed) {
109     *config.num_bytes_consumed =
110         success ? data.size() - reader.num_bytes_remaining() : 0;
111   } else if (success && reader.num_bytes_remaining() > 0) {
112     error = DecoderError::EXTRANEOUS_DATA;
113     value.reset();
114   }
115 
116   if (config.error_code_out) {
117     *config.error_code_out = error;
118   }
119 
120   return value;
121 }
122 
DecodeCompleteDataItem(const Config & config,int max_nesting_level)123 base::Optional<Value> Reader::DecodeCompleteDataItem(const Config& config,
124                                                      int max_nesting_level) {
125   if (max_nesting_level < 0 || max_nesting_level > kCBORMaxDepth) {
126     error_code_ = DecoderError::TOO_MUCH_NESTING;
127     return base::nullopt;
128   }
129 
130   base::Optional<DataItemHeader> header = DecodeDataItemHeader();
131   if (!header.has_value()) {
132     return base::nullopt;
133   }
134 
135   switch (header->type) {
136     case Value::Type::UNSIGNED:
137       return DecodeValueToUnsigned(header->value);
138     case Value::Type::NEGATIVE:
139       return DecodeValueToNegative(header->value);
140     case Value::Type::BYTE_STRING:
141       return ReadByteStringContent(*header);
142     case Value::Type::STRING:
143       return ReadStringContent(*header, config);
144     case Value::Type::ARRAY:
145       return ReadArrayContent(*header, config, max_nesting_level);
146     case Value::Type::MAP:
147       return ReadMapContent(*header, config, max_nesting_level);
148     case Value::Type::SIMPLE_VALUE:
149       return DecodeToSimpleValue(*header);
150     case Value::Type::TAG:  // We explicitly don't support TAG.
151     case Value::Type::NONE:
152     case Value::Type::INVALID_UTF8:
153       break;
154   }
155 
156   error_code_ = DecoderError::UNSUPPORTED_MAJOR_TYPE;
157   return base::nullopt;
158 }
159 
DecodeDataItemHeader()160 base::Optional<Reader::DataItemHeader> Reader::DecodeDataItemHeader() {
161   const base::Optional<uint8_t> initial_byte = ReadByte();
162   if (!initial_byte) {
163     return base::nullopt;
164   }
165 
166   const auto major_type = GetMajorType(initial_byte.value());
167   const uint8_t additional_info = GetAdditionalInfo(initial_byte.value());
168 
169   base::Optional<uint64_t> value = ReadVariadicLengthInteger(additional_info);
170   return value ? base::make_optional(
171                      DataItemHeader{major_type, additional_info, value.value()})
172                : base::nullopt;
173 }
174 
ReadVariadicLengthInteger(uint8_t additional_info)175 base::Optional<uint64_t> Reader::ReadVariadicLengthInteger(
176     uint8_t additional_info) {
177   uint8_t additional_bytes = 0;
178   if (additional_info < 24) {
179     return base::make_optional(additional_info);
180   } else if (additional_info == 24) {
181     additional_bytes = 1;
182   } else if (additional_info == 25) {
183     additional_bytes = 2;
184   } else if (additional_info == 26) {
185     additional_bytes = 4;
186   } else if (additional_info == 27) {
187     additional_bytes = 8;
188   } else {
189     error_code_ = DecoderError::UNKNOWN_ADDITIONAL_INFO;
190     return base::nullopt;
191   }
192 
193   const base::Optional<base::span<const uint8_t>> bytes =
194       ReadBytes(additional_bytes);
195   if (!bytes) {
196     return base::nullopt;
197   }
198 
199   uint64_t int_data = 0;
200   for (const uint8_t b : bytes.value()) {
201     int_data <<= 8;
202     int_data |= b;
203   }
204 
205   return IsEncodingMinimal(additional_bytes, int_data)
206              ? base::make_optional(int_data)
207              : base::nullopt;
208 }
209 
DecodeValueToNegative(uint64_t value)210 base::Optional<Value> Reader::DecodeValueToNegative(uint64_t value) {
211   auto negative_value = -base::CheckedNumeric<int64_t>(value) - 1;
212   if (!negative_value.IsValid()) {
213     error_code_ = DecoderError::OUT_OF_RANGE_INTEGER_VALUE;
214     return base::nullopt;
215   }
216   return Value(negative_value.ValueOrDie());
217 }
218 
DecodeValueToUnsigned(uint64_t value)219 base::Optional<Value> Reader::DecodeValueToUnsigned(uint64_t value) {
220   auto unsigned_value = base::CheckedNumeric<int64_t>(value);
221   if (!unsigned_value.IsValid()) {
222     error_code_ = DecoderError::OUT_OF_RANGE_INTEGER_VALUE;
223     return base::nullopt;
224   }
225   return Value(unsigned_value.ValueOrDie());
226 }
227 
DecodeToSimpleValue(const DataItemHeader & header)228 base::Optional<Value> Reader::DecodeToSimpleValue(
229     const DataItemHeader& header) {
230   // ReadVariadicLengthInteger provides this bound.
231   CHECK_LE(header.additional_info, 27);
232   // Floating point numbers are not supported.
233   if (header.additional_info > 24) {
234     error_code_ = DecoderError::UNSUPPORTED_FLOATING_POINT_VALUE;
235     return base::nullopt;
236   }
237 
238   // Since |header.additional_info| <= 24, ReadVariadicLengthInteger also
239   // provides this bound for |header.value|.
240   CHECK_LE(header.value, 255u);
241   // |SimpleValue| is an enum class and so the underlying type is specified to
242   // be |int|. So this cast is safe.
243   Value::SimpleValue possibly_unsupported_simple_value =
244       static_cast<Value::SimpleValue>(static_cast<int>(header.value));
245   switch (possibly_unsupported_simple_value) {
246     case Value::SimpleValue::FALSE_VALUE:
247     case Value::SimpleValue::TRUE_VALUE:
248     case Value::SimpleValue::NULL_VALUE:
249     case Value::SimpleValue::UNDEFINED:
250       return Value(possibly_unsupported_simple_value);
251   }
252 
253   error_code_ = DecoderError::UNSUPPORTED_SIMPLE_VALUE;
254   return base::nullopt;
255 }
256 
ReadStringContent(const Reader::DataItemHeader & header,const Config & config)257 base::Optional<Value> Reader::ReadStringContent(
258     const Reader::DataItemHeader& header,
259     const Config& config) {
260   uint64_t num_bytes = header.value;
261   const base::Optional<base::span<const uint8_t>> bytes = ReadBytes(num_bytes);
262   if (!bytes) {
263     return base::nullopt;
264   }
265 
266   std::string cbor_string(bytes->begin(), bytes->end());
267   if (base::IsStringUTF8(cbor_string)) {
268     return Value(std::move(cbor_string));
269   }
270 
271   if (config.allow_invalid_utf8) {
272     return Value(*bytes, Value::Type::INVALID_UTF8);
273   }
274 
275   error_code_ = DecoderError::INVALID_UTF8;
276   return base::nullopt;
277 }
278 
ReadByteStringContent(const Reader::DataItemHeader & header)279 base::Optional<Value> Reader::ReadByteStringContent(
280     const Reader::DataItemHeader& header) {
281   uint64_t num_bytes = header.value;
282   const base::Optional<base::span<const uint8_t>> bytes = ReadBytes(num_bytes);
283   if (!bytes) {
284     return base::nullopt;
285   }
286 
287   std::vector<uint8_t> cbor_byte_string(bytes->begin(), bytes->end());
288   return Value(std::move(cbor_byte_string));
289 }
290 
ReadArrayContent(const Reader::DataItemHeader & header,const Config & config,int max_nesting_level)291 base::Optional<Value> Reader::ReadArrayContent(
292     const Reader::DataItemHeader& header,
293     const Config& config,
294     int max_nesting_level) {
295   const uint64_t length = header.value;
296 
297   Value::ArrayValue cbor_array;
298   for (uint64_t i = 0; i < length; ++i) {
299     base::Optional<Value> cbor_element =
300         DecodeCompleteDataItem(config, max_nesting_level - 1);
301     if (!cbor_element.has_value()) {
302       return base::nullopt;
303     }
304     cbor_array.push_back(std::move(cbor_element.value()));
305   }
306   return Value(std::move(cbor_array));
307 }
308 
ReadMapContent(const Reader::DataItemHeader & header,const Config & config,int max_nesting_level)309 base::Optional<Value> Reader::ReadMapContent(
310     const Reader::DataItemHeader& header,
311     const Config& config,
312     int max_nesting_level) {
313   const uint64_t length = header.value;
314 
315   Value::MapValue cbor_map;
316   for (uint64_t i = 0; i < length; ++i) {
317     base::Optional<Value> key =
318         DecodeCompleteDataItem(config, max_nesting_level - 1);
319     base::Optional<Value> value =
320         DecodeCompleteDataItem(config, max_nesting_level - 1);
321     if (!key.has_value() || !value.has_value()) {
322       return base::nullopt;
323     }
324 
325     switch (key.value().type()) {
326       case Value::Type::UNSIGNED:
327       case Value::Type::NEGATIVE:
328       case Value::Type::STRING:
329       case Value::Type::BYTE_STRING:
330         break;
331       case Value::Type::INVALID_UTF8:
332         error_code_ = DecoderError::INVALID_UTF8;
333         return base::nullopt;
334       default:
335         error_code_ = DecoderError::INCORRECT_MAP_KEY_TYPE;
336         return base::nullopt;
337     }
338     if (!IsKeyInOrder(key.value(), &cbor_map)) {
339       return base::nullopt;
340     }
341 
342     cbor_map.insert_or_assign(std::move(key.value()), std::move(value.value()));
343   }
344   return Value(std::move(cbor_map));
345 }
346 
ReadByte()347 base::Optional<uint8_t> Reader::ReadByte() {
348   const base::Optional<base::span<const uint8_t>> bytes = ReadBytes(1);
349   return bytes ? base::make_optional(bytes.value()[0]) : base::nullopt;
350 }
351 
ReadBytes(uint64_t num_bytes)352 base::Optional<base::span<const uint8_t>> Reader::ReadBytes(
353     uint64_t num_bytes) {
354   if (base::strict_cast<uint64_t>(rest_.size()) < num_bytes) {
355     error_code_ = DecoderError::INCOMPLETE_CBOR_DATA;
356     return base::nullopt;
357   }
358   const base::span<const uint8_t> ret = rest_.first(num_bytes);
359   rest_ = rest_.subspan(num_bytes);
360   return ret;
361 }
362 
IsEncodingMinimal(uint8_t additional_bytes,uint64_t uint_data)363 bool Reader::IsEncodingMinimal(uint8_t additional_bytes, uint64_t uint_data) {
364   if ((additional_bytes == 1 && uint_data < 24) ||
365       uint_data <= (1ULL << 8 * (additional_bytes >> 1)) - 1) {
366     error_code_ = DecoderError::NON_MINIMAL_CBOR_ENCODING;
367     return false;
368   }
369   return true;
370 }
371 
IsKeyInOrder(const Value & new_key,Value::MapValue * map)372 bool Reader::IsKeyInOrder(const Value& new_key, Value::MapValue* map) {
373   if (map->empty()) {
374     return true;
375   }
376 
377   const auto& max_current_key = map->rbegin()->first;
378   const auto less = map->key_comp();
379   if (!less(max_current_key, new_key)) {
380     error_code_ = DecoderError::OUT_OF_ORDER_KEY;
381     return false;
382   }
383   return true;
384 }
385 
386 // static
ErrorCodeToString(DecoderError error)387 const char* Reader::ErrorCodeToString(DecoderError error) {
388   switch (error) {
389     case DecoderError::CBOR_NO_ERROR:
390       return kNoError;
391     case DecoderError::UNSUPPORTED_MAJOR_TYPE:
392       return constants::kUnsupportedMajorType;
393     case DecoderError::UNKNOWN_ADDITIONAL_INFO:
394       return kUnknownAdditionalInfo;
395     case DecoderError::INCOMPLETE_CBOR_DATA:
396       return kIncompleteCBORData;
397     case DecoderError::INCORRECT_MAP_KEY_TYPE:
398       return kIncorrectMapKeyType;
399     case DecoderError::TOO_MUCH_NESTING:
400       return kTooMuchNesting;
401     case DecoderError::INVALID_UTF8:
402       return kInvalidUTF8;
403     case DecoderError::EXTRANEOUS_DATA:
404       return kExtraneousData;
405     case DecoderError::OUT_OF_ORDER_KEY:
406       return kMapKeyOutOfOrder;
407     case DecoderError::NON_MINIMAL_CBOR_ENCODING:
408       return kNonMinimalCBOREncoding;
409     case DecoderError::UNSUPPORTED_SIMPLE_VALUE:
410       return kUnsupportedSimpleValue;
411     case DecoderError::UNSUPPORTED_FLOATING_POINT_VALUE:
412       return kUnsupportedFloatingPointValue;
413     case DecoderError::OUT_OF_RANGE_INTEGER_VALUE:
414       return kOutOfRangeIntegerValue;
415     case DecoderError::UNKNOWN_ERROR:
416       return kUnknownError;
417     default:
418       NOTREACHED();
419       return "Unknown error code.";
420   }
421 }
422 
423 }  // namespace cbor
424