1 // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the LICENSE file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS.  All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 #ifndef SRC_BYTE_PARSER_H_
9 #define SRC_BYTE_PARSER_H_
10 
11 #include <cassert>
12 #include <cstdint>
13 #include <string>
14 #include <utility>
15 #include <vector>
16 
17 #include "src/element_parser.h"
18 #include "webm/callback.h"
19 #include "webm/element.h"
20 #include "webm/reader.h"
21 #include "webm/status.h"
22 
23 namespace webm {
24 
25 // Parses an EBML string (UTF-8 and ASCII) or binary element from a byte stream.
26 // Spec reference for string/binary elements:
27 // http://matroska.org/technical/specs/index.html#EBML_ex
28 // https://github.com/Matroska-Org/ebml-specification/blob/master/specification.markdown#ebml-element-types
29 template <typename T>
30 class ByteParser : public ElementParser {
31  public:
32   static_assert(std::is_same<T, std::vector<std::uint8_t>>::value ||
33                     std::is_same<T, std::string>::value,
34                 "T must be std::vector<std::uint8_t> or std::string");
35 
36   // Constructs a new parser which will use the given default_value as the
37   // value for the element if its size is zero. Defaults to the empty string
38   // or empty binary element (as the EBML spec indicates).
39   explicit ByteParser(T default_value = {})
default_value_(std::move (default_value))40       : default_value_(std::move(default_value)) {}
41 
42   ByteParser(ByteParser&&) = default;
43   ByteParser& operator=(ByteParser&&) = default;
44 
45   ByteParser(const ByteParser&) = delete;
46   ByteParser& operator=(const ByteParser&) = delete;
47 
Init(const ElementMetadata & metadata,std::uint64_t max_size)48   Status Init(const ElementMetadata& metadata,
49               std::uint64_t max_size) override {
50     assert(metadata.size == kUnknownElementSize || metadata.size <= max_size);
51 
52     if (metadata.size == kUnknownElementSize) {
53       return Status(Status::kInvalidElementSize);
54     }
55 
56     if (metadata.size > std::numeric_limits<std::size_t>::max() ||
57         metadata.size > value_.max_size()) {
58       return Status(Status::kNotEnoughMemory);
59     }
60 
61 #if WEBM_FUZZER_BYTE_ELEMENT_SIZE_LIMIT
62     // AFL and ASan just kill the process if too much memory is allocated, so
63     // let's cap the maximum size of the element. It's too easy for the fuzzer
64     // to make an element with a ridiculously huge size, and that just creates
65     // uninteresting false positives.
66     if (metadata.size > WEBM_FUZZER_BYTE_ELEMENT_SIZE_LIMIT) {
67       return Status(Status::kNotEnoughMemory);
68     }
69 #endif
70 
71     if (metadata.size == 0) {
72       value_ = default_value_;
73       total_read_ = default_value_.size();
74     } else {
75       value_.resize(static_cast<std::size_t>(metadata.size));
76       total_read_ = 0;
77     }
78 
79     return Status(Status::kOkCompleted);
80   }
81 
Feed(Callback * callback,Reader * reader,std::uint64_t * num_bytes_read)82   Status Feed(Callback* callback, Reader* reader,
83               std::uint64_t* num_bytes_read) override {
84     assert(callback != nullptr);
85     assert(reader != nullptr);
86     assert(num_bytes_read != nullptr);
87 
88     *num_bytes_read = 0;
89 
90     if (total_read_ == value_.size()) {
91       return Status(Status::kOkCompleted);
92     }
93 
94     Status status;
95     do {
96       std::uint64_t local_num_bytes_read = 0;
97       std::uint8_t* buffer =
98           reinterpret_cast<std::uint8_t*>(&value_.front()) + total_read_;
99       std::size_t buffer_size = value_.size() - total_read_;
100       status = reader->Read(buffer_size, buffer, &local_num_bytes_read);
101       assert((status.completed_ok() && local_num_bytes_read == buffer_size) ||
102              (status.ok() && local_num_bytes_read < buffer_size) ||
103              (!status.ok() && local_num_bytes_read == 0));
104       *num_bytes_read += local_num_bytes_read;
105       total_read_ += static_cast<std::size_t>(local_num_bytes_read);
106     } while (status.code == Status::kOkPartial);
107 
108     // UTF-8 and ASCII string elements can be padded with NUL characters at the
109     // end, which should be ignored.
110     if (std::is_same<T, std::string>::value && status.completed_ok()) {
111       while (!value_.empty() && value_.back() == '\0') {
112         value_.pop_back();
113       }
114     }
115 
116     return status;
117   }
118 
119   // Gets the parsed value. This must not be called until the parse has been
120   // successfully completed.
value()121   const T& value() const {
122     assert(total_read_ >= value_.size());
123     return value_;
124   }
125 
126   // Gets the parsed value. This must not be called until the parse has been
127   // successfully completed.
mutable_value()128   T* mutable_value() {
129     assert(total_read_ >= value_.size());
130     return &value_;
131   }
132 
133  private:
134   T value_;
135   T default_value_;
136   std::size_t total_read_;
137 };
138 
139 using StringParser = ByteParser<std::string>;
140 using BinaryParser = ByteParser<std::vector<std::uint8_t>>;
141 
142 }  // namespace webm
143 
144 #endif  // SRC_BYTE_PARSER_H_
145