1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_CBOR_READER_H_
6 #define COMPONENTS_CBOR_READER_H_
7 
8 #include <stddef.h>
9 
10 #include <string>
11 #include <vector>
12 
13 #include "base/containers/span.h"
14 #include "base/optional.h"
15 #include "components/cbor/cbor_export.h"
16 #include "components/cbor/values.h"
17 
18 // Concise Binary Object Representation (CBOR) decoder as defined by
19 // https://tools.ietf.org/html/rfc7049. This decoder only accepts canonical CBOR
20 // as defined by section 3.9.
21 //
22 // This implementation supports the following major types:
23 //  - 0: Unsigned integers, up to 64-bit values*.
24 //  - 1: Signed integers, up to 64-bit values*.
25 //  - 2: Byte strings.
26 //  - 3: UTF-8 strings.
27 //  - 4: Definite-length arrays.
28 //  - 5: Definite-length maps.
29 //  - 7: Simple values.
30 //
31 //  * Note: For simplicity, this implementation represents both signed and
32 //    unsigned integers with signed int64_t. This reduces the effective range
33 //    of unsigned integers.
34 //
35 // Requirements for canonical CBOR representation:
36 //  - Duplicate keys in maps are not allowed.
37 //  - Keys for maps must be sorted first by length and then by byte-wise
38 //    lexical order.
39 //
40 // Known limitations and interpretations of the RFC (and the reasons):
41 //  - Does not support indefinite-length data streams or semantic tags (major
42 //    type 6). (Simplicity; security)
43 //  - Does not support the floating point and BREAK stop code value types in
44 //    major type 7. (Simplicity)
45 //  - Does not support non-character codepoints in major type 3. (Security)
46 //  - Treats incomplete CBOR data items as syntax errors. (Security)
47 //  - Treats trailing data bytes as errors. (Security)
48 //  - Treats unknown additional information formats as syntax errors.
49 //    (Simplicity; security)
50 //  - Limits CBOR value inputs to at most 16 layers of nesting. Callers can
51 //    enforce more shallow nesting by setting |max_nesting_level|. (Efficiency;
52 //    security)
53 //  - Only supports CBOR maps with integer or string type keys, due to the
54 //    cost of serialization when sorting map keys. (Efficiency; simplicity)
55 //  - Does not support simple values that are unassigned/reserved as per RFC
56 //    7049, and treats them as errors. (Security)
57 
58 namespace cbor {
59 
60 class CBOR_EXPORT Reader {
61  public:
62   enum class DecoderError {
63     CBOR_NO_ERROR = 0,
64     UNSUPPORTED_MAJOR_TYPE,
65     UNKNOWN_ADDITIONAL_INFO,
66     INCOMPLETE_CBOR_DATA,
67     INCORRECT_MAP_KEY_TYPE,
68     TOO_MUCH_NESTING,
69     INVALID_UTF8,
70     EXTRANEOUS_DATA,
71     OUT_OF_ORDER_KEY,
72     NON_MINIMAL_CBOR_ENCODING,
73     UNSUPPORTED_SIMPLE_VALUE,
74     UNSUPPORTED_FLOATING_POINT_VALUE,
75     OUT_OF_RANGE_INTEGER_VALUE,
76     UNKNOWN_ERROR,
77   };
78 
79   // CBOR nested depth sufficient for most use cases.
80   static const int kCBORMaxDepth = 16;
81 
82   // Config contains configuration for a CBOR parsing operation.
83   struct CBOR_EXPORT Config {
84     Config();
85     ~Config();
86 
87     // Used to report the number of bytes of input consumed. This suppresses the
88     // |EXTRANEOUS_DATA| error case. May be nullptr.
89     size_t* num_bytes_consumed = nullptr;
90 
91     // Used to report the specific error in the case that parsing fails. May be
92     // nullptr;
93     DecoderError* error_code_out = nullptr;
94 
95     // Controls the maximum depth of CBOR nesting that will be permitted. This
96     // exists to control stack consumption during parsing.
97     int max_nesting_level = kCBORMaxDepth;
98 
99     // Causes strings that are not valid UTF-8 to be accepted and suppresses the
100     // |INVALID_UTF8| error, unless such strings are map keys. Invalid strings
101     // will result in Values of type |INVALID_UTF8| rather than |STRING|. Users
102     // of this feature should ensure that every invalid string is accounted for
103     // in the resulting structure.
104     //
105     // (Map keys are not allowed to be invalid because it was not necessary for
106     // the motivating case and because it adds complexity to handle the ordering
107     // correctly.)
108     bool allow_invalid_utf8 = false;
109 
110    private:
111     DISALLOW_COPY_AND_ASSIGN(Config);
112   };
113 
114   ~Reader();
115 
116   // Reads and parses |input_data| into a Value. Returns an empty Optional
117   // if the input violates any one of the syntax requirements (including unknown
118   // additional info and incomplete CBOR data).
119   //
120   // The caller can optionally provide |error_code_out| to obtain additional
121   // information about decoding failures.
122   //
123   // If the caller provides it, |max_nesting_level| cannot exceed
124   // |kCBORMaxDepth|.
125   //
126   // Returns an empty Optional if not all the data was consumed, and sets
127   // |error_code_out| to EXTRANEOUS_DATA in this case.
128   static base::Optional<Value> Read(base::span<const uint8_t> input_data,
129                                     DecoderError* error_code_out = nullptr,
130                                     int max_nesting_level = kCBORMaxDepth);
131 
132   // A version of |Read|, above, that takes a |Config| structure to allow
133   // additional controls.
134   static base::Optional<Value> Read(base::span<const uint8_t> input_data,
135                                     const Config& config);
136 
137   // A version of |Read| that takes some fields of |Config| as parameters to
138   // avoid having to construct a |Config| object explicitly.
139   static base::Optional<Value> Read(base::span<const uint8_t> input_data,
140                                     size_t* num_bytes_consumed,
141                                     DecoderError* error_code_out = nullptr,
142                                     int max_nesting_level = kCBORMaxDepth);
143 
144   // Translates errors to human-readable error messages.
145   static const char* ErrorCodeToString(DecoderError error_code);
146 
147  private:
148   explicit Reader(base::span<const uint8_t> data);
149 
150   // Encapsulates information extracted from the header of a CBOR data item,
151   // which consists of the initial byte, and a variable-length-encoded integer
152   // (if any).
153   struct DataItemHeader {
154     // The major type decoded from the initial byte.
155     Value::Type type;
156 
157     // The raw 5-bit additional information from the initial byte.
158     uint8_t additional_info;
159 
160     // The integer |value| decoded from the |additional_info| and the
161     // variable-length-encoded integer, if any.
162     uint64_t value;
163   };
164 
165   base::Optional<DataItemHeader> DecodeDataItemHeader();
166   base::Optional<Value> DecodeCompleteDataItem(const Config& config,
167                                                int max_nesting_level);
168   base::Optional<Value> DecodeValueToNegative(uint64_t value);
169   base::Optional<Value> DecodeValueToUnsigned(uint64_t value);
170   base::Optional<Value> DecodeToSimpleValue(const DataItemHeader& header);
171   base::Optional<uint64_t> ReadVariadicLengthInteger(uint8_t additional_info);
172   base::Optional<Value> ReadByteStringContent(const DataItemHeader& header);
173   base::Optional<Value> ReadStringContent(const DataItemHeader& header,
174                                           const Config& config);
175   base::Optional<Value> ReadArrayContent(const DataItemHeader& header,
176                                          const Config& config,
177                                          int max_nesting_level);
178   base::Optional<Value> ReadMapContent(const DataItemHeader& header,
179                                        const Config& config,
180                                        int max_nesting_level);
181   base::Optional<uint8_t> ReadByte();
182   base::Optional<base::span<const uint8_t>> ReadBytes(uint64_t num_bytes);
183   bool IsKeyInOrder(const Value& new_key, Value::MapValue* map);
184   bool IsEncodingMinimal(uint8_t additional_bytes, uint64_t uint_data);
185 
GetErrorCode()186   DecoderError GetErrorCode() { return error_code_; }
187 
num_bytes_remaining()188   size_t num_bytes_remaining() const { return rest_.size(); }
189 
190   base::span<const uint8_t> rest_;
191   DecoderError error_code_;
192 
193   DISALLOW_COPY_AND_ASSIGN(Reader);
194 };
195 
196 }  // namespace cbor
197 
198 #endif  // COMPONENTS_CBOR_READER_H_
199