1 // Copyright 2017 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_CBOR_READER_H_ 6 #define COMPONENTS_CBOR_READER_H_ 7 8 #include <stddef.h> 9 10 #include <string> 11 #include <vector> 12 13 #include "base/containers/span.h" 14 #include "base/optional.h" 15 #include "components/cbor/cbor_export.h" 16 #include "components/cbor/values.h" 17 18 // Concise Binary Object Representation (CBOR) decoder as defined by 19 // https://tools.ietf.org/html/rfc7049. This decoder only accepts canonical CBOR 20 // as defined by section 3.9. 21 // 22 // This implementation supports the following major types: 23 // - 0: Unsigned integers, up to 64-bit values*. 24 // - 1: Signed integers, up to 64-bit values*. 25 // - 2: Byte strings. 26 // - 3: UTF-8 strings. 27 // - 4: Definite-length arrays. 28 // - 5: Definite-length maps. 29 // - 7: Simple values. 30 // 31 // * Note: For simplicity, this implementation represents both signed and 32 // unsigned integers with signed int64_t. This reduces the effective range 33 // of unsigned integers. 34 // 35 // Requirements for canonical CBOR representation: 36 // - Duplicate keys in maps are not allowed. 37 // - Keys for maps must be sorted first by length and then by byte-wise 38 // lexical order. 39 // 40 // Known limitations and interpretations of the RFC (and the reasons): 41 // - Does not support indefinite-length data streams or semantic tags (major 42 // type 6). (Simplicity; security) 43 // - Does not support the floating point and BREAK stop code value types in 44 // major type 7. (Simplicity) 45 // - Does not support non-character codepoints in major type 3. (Security) 46 // - Treats incomplete CBOR data items as syntax errors. (Security) 47 // - Treats trailing data bytes as errors. (Security) 48 // - Treats unknown additional information formats as syntax errors. 49 // (Simplicity; security) 50 // - Limits CBOR value inputs to at most 16 layers of nesting. Callers can 51 // enforce more shallow nesting by setting |max_nesting_level|. (Efficiency; 52 // security) 53 // - Only supports CBOR maps with integer or string type keys, due to the 54 // cost of serialization when sorting map keys. (Efficiency; simplicity) 55 // - Does not support simple values that are unassigned/reserved as per RFC 56 // 7049, and treats them as errors. (Security) 57 58 namespace cbor { 59 60 class CBOR_EXPORT Reader { 61 public: 62 enum class DecoderError { 63 CBOR_NO_ERROR = 0, 64 UNSUPPORTED_MAJOR_TYPE, 65 UNKNOWN_ADDITIONAL_INFO, 66 INCOMPLETE_CBOR_DATA, 67 INCORRECT_MAP_KEY_TYPE, 68 TOO_MUCH_NESTING, 69 INVALID_UTF8, 70 EXTRANEOUS_DATA, 71 OUT_OF_ORDER_KEY, 72 NON_MINIMAL_CBOR_ENCODING, 73 UNSUPPORTED_SIMPLE_VALUE, 74 UNSUPPORTED_FLOATING_POINT_VALUE, 75 OUT_OF_RANGE_INTEGER_VALUE, 76 UNKNOWN_ERROR, 77 }; 78 79 // CBOR nested depth sufficient for most use cases. 80 static const int kCBORMaxDepth = 16; 81 82 // Config contains configuration for a CBOR parsing operation. 83 struct CBOR_EXPORT Config { 84 Config(); 85 ~Config(); 86 87 // Used to report the number of bytes of input consumed. This suppresses the 88 // |EXTRANEOUS_DATA| error case. May be nullptr. 89 size_t* num_bytes_consumed = nullptr; 90 91 // Used to report the specific error in the case that parsing fails. May be 92 // nullptr; 93 DecoderError* error_code_out = nullptr; 94 95 // Controls the maximum depth of CBOR nesting that will be permitted. This 96 // exists to control stack consumption during parsing. 97 int max_nesting_level = kCBORMaxDepth; 98 99 // Causes strings that are not valid UTF-8 to be accepted and suppresses the 100 // |INVALID_UTF8| error, unless such strings are map keys. Invalid strings 101 // will result in Values of type |INVALID_UTF8| rather than |STRING|. Users 102 // of this feature should ensure that every invalid string is accounted for 103 // in the resulting structure. 104 // 105 // (Map keys are not allowed to be invalid because it was not necessary for 106 // the motivating case and because it adds complexity to handle the ordering 107 // correctly.) 108 bool allow_invalid_utf8 = false; 109 110 private: 111 DISALLOW_COPY_AND_ASSIGN(Config); 112 }; 113 114 ~Reader(); 115 116 // Reads and parses |input_data| into a Value. Returns an empty Optional 117 // if the input violates any one of the syntax requirements (including unknown 118 // additional info and incomplete CBOR data). 119 // 120 // The caller can optionally provide |error_code_out| to obtain additional 121 // information about decoding failures. 122 // 123 // If the caller provides it, |max_nesting_level| cannot exceed 124 // |kCBORMaxDepth|. 125 // 126 // Returns an empty Optional if not all the data was consumed, and sets 127 // |error_code_out| to EXTRANEOUS_DATA in this case. 128 static base::Optional<Value> Read(base::span<const uint8_t> input_data, 129 DecoderError* error_code_out = nullptr, 130 int max_nesting_level = kCBORMaxDepth); 131 132 // A version of |Read|, above, that takes a |Config| structure to allow 133 // additional controls. 134 static base::Optional<Value> Read(base::span<const uint8_t> input_data, 135 const Config& config); 136 137 // A version of |Read| that takes some fields of |Config| as parameters to 138 // avoid having to construct a |Config| object explicitly. 139 static base::Optional<Value> Read(base::span<const uint8_t> input_data, 140 size_t* num_bytes_consumed, 141 DecoderError* error_code_out = nullptr, 142 int max_nesting_level = kCBORMaxDepth); 143 144 // Translates errors to human-readable error messages. 145 static const char* ErrorCodeToString(DecoderError error_code); 146 147 private: 148 explicit Reader(base::span<const uint8_t> data); 149 150 // Encapsulates information extracted from the header of a CBOR data item, 151 // which consists of the initial byte, and a variable-length-encoded integer 152 // (if any). 153 struct DataItemHeader { 154 // The major type decoded from the initial byte. 155 Value::Type type; 156 157 // The raw 5-bit additional information from the initial byte. 158 uint8_t additional_info; 159 160 // The integer |value| decoded from the |additional_info| and the 161 // variable-length-encoded integer, if any. 162 uint64_t value; 163 }; 164 165 base::Optional<DataItemHeader> DecodeDataItemHeader(); 166 base::Optional<Value> DecodeCompleteDataItem(const Config& config, 167 int max_nesting_level); 168 base::Optional<Value> DecodeValueToNegative(uint64_t value); 169 base::Optional<Value> DecodeValueToUnsigned(uint64_t value); 170 base::Optional<Value> DecodeToSimpleValue(const DataItemHeader& header); 171 base::Optional<uint64_t> ReadVariadicLengthInteger(uint8_t additional_info); 172 base::Optional<Value> ReadByteStringContent(const DataItemHeader& header); 173 base::Optional<Value> ReadStringContent(const DataItemHeader& header, 174 const Config& config); 175 base::Optional<Value> ReadArrayContent(const DataItemHeader& header, 176 const Config& config, 177 int max_nesting_level); 178 base::Optional<Value> ReadMapContent(const DataItemHeader& header, 179 const Config& config, 180 int max_nesting_level); 181 base::Optional<uint8_t> ReadByte(); 182 base::Optional<base::span<const uint8_t>> ReadBytes(uint64_t num_bytes); 183 bool IsKeyInOrder(const Value& new_key, Value::MapValue* map); 184 bool IsEncodingMinimal(uint8_t additional_bytes, uint64_t uint_data); 185 GetErrorCode()186 DecoderError GetErrorCode() { return error_code_; } 187 num_bytes_remaining()188 size_t num_bytes_remaining() const { return rest_.size(); } 189 190 base::span<const uint8_t> rest_; 191 DecoderError error_code_; 192 193 DISALLOW_COPY_AND_ASSIGN(Reader); 194 }; 195 196 } // namespace cbor 197 198 #endif // COMPONENTS_CBOR_READER_H_ 199