1 // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the LICENSE file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 #include "src/master_parser.h"
9
10 #include <cassert>
11 #include <cstdint>
12 #include <limits>
13
14 #include "src/element_parser.h"
15 #include "src/skip_callback.h"
16 #include "webm/element.h"
17 #include "webm/id.h"
18 #include "webm/reader.h"
19 #include "webm/status.h"
20
21 namespace webm {
22
23 // Spec reference:
24 // http://matroska.org/technical/specs/index.html#EBML_ex
25 // https://github.com/Matroska-Org/ebml-specification/blob/master/specification.markdown
Init(const ElementMetadata & metadata,std::uint64_t max_size)26 Status MasterParser::Init(const ElementMetadata& metadata,
27 std::uint64_t max_size) {
28 assert(metadata.size == kUnknownElementSize || metadata.size <= max_size);
29
30 InitSetup(metadata.header_size, metadata.size, metadata.position);
31
32 if (metadata.size != kUnknownElementSize) {
33 max_size_ = metadata.size;
34 } else {
35 max_size_ = max_size;
36 }
37
38 if (metadata.size == 0) {
39 state_ = State::kEndReached;
40 } else {
41 state_ = State::kFirstReadOfChildId;
42 }
43
44 return Status(Status::kOkCompleted);
45 }
46
InitAfterSeek(const Ancestory & child_ancestory,const ElementMetadata & child_metadata)47 void MasterParser::InitAfterSeek(const Ancestory& child_ancestory,
48 const ElementMetadata& child_metadata) {
49 InitSetup(kUnknownHeaderSize, kUnknownElementSize, kUnknownElementPosition);
50 max_size_ = std::numeric_limits<std::uint64_t>::max();
51
52 if (child_ancestory.empty()) {
53 child_metadata_ = child_metadata;
54 auto iter = parsers_.find(child_metadata_.id);
55 assert(iter != parsers_.end());
56 child_parser_ = iter->second.get();
57 state_ = State::kGettingAction;
58 } else {
59 child_metadata_.id = child_ancestory.id();
60 child_metadata_.header_size = kUnknownHeaderSize;
61 child_metadata_.size = kUnknownElementSize;
62 child_metadata_.position = kUnknownElementPosition;
63
64 auto iter = parsers_.find(child_metadata_.id);
65 assert(iter != parsers_.end());
66 child_parser_ = iter->second.get();
67 child_parser_->InitAfterSeek(child_ancestory.next(), child_metadata);
68 state_ = State::kReadingChildBody;
69 }
70 }
71
Feed(Callback * callback,Reader * reader,std::uint64_t * num_bytes_read)72 Status MasterParser::Feed(Callback* callback, Reader* reader,
73 std::uint64_t* num_bytes_read) {
74 assert(callback != nullptr);
75 assert(reader != nullptr);
76 assert(num_bytes_read != nullptr);
77
78 *num_bytes_read = 0;
79
80 Callback* const original_callback = callback;
81
82 SkipCallback skip_callback;
83 if (action_ == Action::kSkip) {
84 callback = &skip_callback;
85 }
86
87 Status status;
88 std::uint64_t local_num_bytes_read;
89 while (true) {
90 switch (state_) {
91 case State::kFirstReadOfChildId: {
92 // This separate case for the first read of the child ID is needed to
93 // avoid potential bugs where calling Feed() twice in a row on an
94 // unsized element at the end of the stream would return
95 // Status::kOkCompleted instead of Status::kEndOfFile (since we convert
96 // Status::kEndOfFile to Status::kOkCompleted when EOF is hit for an
97 // unsized element after its children have been fully parsed). Once
98 // the ID parser consumes > 0 bytes, this state must be exited.
99 assert(child_parser_ == nullptr);
100 assert(my_size_ == kUnknownElementSize || total_bytes_read_ < my_size_);
101 child_metadata_.position = reader->Position();
102 child_metadata_.header_size = 0;
103 status = id_parser_.Feed(callback, reader, &local_num_bytes_read);
104 *num_bytes_read += local_num_bytes_read;
105 total_bytes_read_ += local_num_bytes_read;
106 child_metadata_.header_size +=
107 static_cast<std::uint32_t>(local_num_bytes_read);
108 if (status.code == Status::kEndOfFile &&
109 my_size_ == kUnknownElementSize && local_num_bytes_read == 0) {
110 state_ = State::kEndReached;
111 } else if (!status.ok()) {
112 if (local_num_bytes_read > 0) {
113 state_ = State::kFinishingReadingChildId;
114 }
115 return status;
116 } else if (status.completed_ok()) {
117 state_ = State::kReadingChildSize;
118 } else {
119 state_ = State::kFinishingReadingChildId;
120 }
121 continue;
122 }
123
124 case State::kFinishingReadingChildId: {
125 assert(child_parser_ == nullptr);
126 assert(my_size_ == kUnknownElementSize || total_bytes_read_ < my_size_);
127 status = id_parser_.Feed(callback, reader, &local_num_bytes_read);
128 *num_bytes_read += local_num_bytes_read;
129 total_bytes_read_ += local_num_bytes_read;
130 child_metadata_.header_size +=
131 static_cast<std::uint32_t>(local_num_bytes_read);
132 if (!status.completed_ok()) {
133 return status;
134 }
135 state_ = State::kReadingChildSize;
136 continue;
137 }
138
139 case State::kReadingChildSize: {
140 assert(child_parser_ == nullptr);
141 assert(total_bytes_read_ > 0);
142 status = size_parser_.Feed(callback, reader, &local_num_bytes_read);
143 *num_bytes_read += local_num_bytes_read;
144 total_bytes_read_ += local_num_bytes_read;
145 child_metadata_.header_size +=
146 static_cast<std::uint32_t>(local_num_bytes_read);
147 if (!status.completed_ok()) {
148 return status;
149 }
150 child_metadata_.id = id_parser_.id();
151 child_metadata_.size = size_parser_.size();
152 state_ = State::kValidatingChildSize;
153 continue;
154 }
155
156 case State::kValidatingChildSize: {
157 assert(child_parser_ == nullptr);
158
159 std::uint64_t byte_count = total_bytes_read_;
160 if (child_metadata_.size != kUnknownElementSize) {
161 byte_count += child_metadata_.size;
162 }
163
164 std::uint64_t byte_cap = max_size_;
165 // my_size_ is <= max_size_ if it's known, so pick the smaller value.
166 if (my_size_ != kUnknownElementSize) {
167 byte_cap = my_size_;
168 }
169
170 if (byte_count > byte_cap) {
171 return Status(Status::kElementOverflow);
172 }
173
174 auto iter = parsers_.find(child_metadata_.id);
175 bool unknown_child = iter == parsers_.end();
176
177 if (my_size_ == kUnknownElementSize && unknown_child) {
178 // The end of an unsized master element is considered to be the first
179 // instance of an element that isn't a known/valid child element.
180 has_cached_metadata_ = true;
181 state_ = State::kEndReached;
182 continue;
183 } else if (unknown_child &&
184 child_metadata_.size == kUnknownElementSize) {
185 // We can't skip or otherwise handle unknown elements with an unknown
186 // size.
187 return Status(Status::kIndefiniteUnknownElement);
188 }
189 if (unknown_child) {
190 child_parser_ = &unknown_parser_;
191 } else {
192 child_parser_ = iter->second.get();
193 }
194 state_ = State::kGettingAction;
195 continue;
196 }
197
198 case State::kGettingAction: {
199 assert(child_parser_ != nullptr);
200 status = callback->OnElementBegin(child_metadata_, &action_);
201 if (!status.completed_ok()) {
202 return status;
203 }
204
205 if (action_ == Action::kSkip) {
206 callback = &skip_callback;
207 if (child_metadata_.size != kUnknownElementSize) {
208 child_parser_ = &skip_parser_;
209 }
210 }
211 state_ = State::kInitializingChildParser;
212 continue;
213 }
214
215 case State::kInitializingChildParser: {
216 assert(child_parser_ != nullptr);
217 status =
218 child_parser_->Init(child_metadata_, max_size_ - total_bytes_read_);
219 if (!status.completed_ok()) {
220 return status;
221 }
222 state_ = State::kReadingChildBody;
223 continue;
224 }
225
226 case State::kReadingChildBody: {
227 assert(child_parser_ != nullptr);
228 status = child_parser_->Feed(callback, reader, &local_num_bytes_read);
229 *num_bytes_read += local_num_bytes_read;
230 total_bytes_read_ += local_num_bytes_read;
231 if (!status.completed_ok()) {
232 return status;
233 }
234 state_ = State::kChildFullyParsed;
235 continue;
236 }
237
238 case State::kChildFullyParsed: {
239 assert(child_parser_ != nullptr);
240 std::uint64_t byte_cap = max_size_;
241 // my_size_ is <= max_size_ if it's known, so pick the smaller value.
242 if (my_size_ != kUnknownElementSize) {
243 byte_cap = my_size_;
244 }
245
246 if (total_bytes_read_ > byte_cap) {
247 return Status(Status::kElementOverflow);
248 } else if (total_bytes_read_ == byte_cap) {
249 state_ = State::kEndReached;
250 continue;
251 }
252
253 if (child_parser_->GetCachedMetadata(&child_metadata_)) {
254 state_ = State::kValidatingChildSize;
255 } else {
256 state_ = State::kFirstReadOfChildId;
257 }
258 PrepareForNextChild();
259 callback = original_callback;
260 continue;
261 }
262
263 case State::kEndReached: {
264 return Status(Status::kOkCompleted);
265 }
266 }
267 }
268 }
269
GetCachedMetadata(ElementMetadata * metadata)270 bool MasterParser::GetCachedMetadata(ElementMetadata* metadata) {
271 assert(metadata != nullptr);
272
273 if (has_cached_metadata_) {
274 *metadata = child_metadata_;
275 }
276 return has_cached_metadata_;
277 }
278
InitSetup(std::uint32_t header_size,std::uint64_t size_in_bytes,std::uint64_t position)279 void MasterParser::InitSetup(std::uint32_t header_size,
280 std::uint64_t size_in_bytes,
281 std::uint64_t position) {
282 PrepareForNextChild();
283 header_size_ = header_size;
284 my_size_ = size_in_bytes;
285 my_position_ = position;
286 total_bytes_read_ = 0;
287 has_cached_metadata_ = false;
288 }
289
PrepareForNextChild()290 void MasterParser::PrepareForNextChild() {
291 // Do not reset child_metadata_ here.
292 id_parser_ = {};
293 size_parser_ = {};
294 child_parser_ = nullptr;
295 action_ = Action::kRead;
296 }
297
298 } // namespace webm
299