1 // Copyright (c) 2016 The WebM project authors. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the LICENSE file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS.  All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 #include "src/master_parser.h"
9 
10 #include <cassert>
11 #include <cstdint>
12 #include <limits>
13 
14 #include "src/element_parser.h"
15 #include "src/skip_callback.h"
16 #include "webm/element.h"
17 #include "webm/id.h"
18 #include "webm/reader.h"
19 #include "webm/status.h"
20 
21 namespace webm {
22 
23 // Spec reference:
24 // http://matroska.org/technical/specs/index.html#EBML_ex
25 // https://github.com/Matroska-Org/ebml-specification/blob/master/specification.markdown
Init(const ElementMetadata & metadata,std::uint64_t max_size)26 Status MasterParser::Init(const ElementMetadata& metadata,
27                           std::uint64_t max_size) {
28   assert(metadata.size == kUnknownElementSize || metadata.size <= max_size);
29 
30   InitSetup(metadata.header_size, metadata.size, metadata.position);
31 
32   if (metadata.size != kUnknownElementSize) {
33     max_size_ = metadata.size;
34   } else {
35     max_size_ = max_size;
36   }
37 
38   if (metadata.size == 0) {
39     state_ = State::kEndReached;
40   } else {
41     state_ = State::kFirstReadOfChildId;
42   }
43 
44   return Status(Status::kOkCompleted);
45 }
46 
InitAfterSeek(const Ancestory & child_ancestory,const ElementMetadata & child_metadata)47 void MasterParser::InitAfterSeek(const Ancestory& child_ancestory,
48                                  const ElementMetadata& child_metadata) {
49   InitSetup(kUnknownHeaderSize, kUnknownElementSize, kUnknownElementPosition);
50   max_size_ = std::numeric_limits<std::uint64_t>::max();
51 
52   if (child_ancestory.empty()) {
53     child_metadata_ = child_metadata;
54     auto iter = parsers_.find(child_metadata_.id);
55     assert(iter != parsers_.end());
56     child_parser_ = iter->second.get();
57     state_ = State::kGettingAction;
58   } else {
59     child_metadata_.id = child_ancestory.id();
60     child_metadata_.header_size = kUnknownHeaderSize;
61     child_metadata_.size = kUnknownElementSize;
62     child_metadata_.position = kUnknownElementPosition;
63 
64     auto iter = parsers_.find(child_metadata_.id);
65     assert(iter != parsers_.end());
66     child_parser_ = iter->second.get();
67     child_parser_->InitAfterSeek(child_ancestory.next(), child_metadata);
68     state_ = State::kReadingChildBody;
69   }
70 }
71 
Feed(Callback * callback,Reader * reader,std::uint64_t * num_bytes_read)72 Status MasterParser::Feed(Callback* callback, Reader* reader,
73                           std::uint64_t* num_bytes_read) {
74   assert(callback != nullptr);
75   assert(reader != nullptr);
76   assert(num_bytes_read != nullptr);
77 
78   *num_bytes_read = 0;
79 
80   Callback* const original_callback = callback;
81 
82   SkipCallback skip_callback;
83   if (action_ == Action::kSkip) {
84     callback = &skip_callback;
85   }
86 
87   Status status;
88   std::uint64_t local_num_bytes_read;
89   while (true) {
90     switch (state_) {
91       case State::kFirstReadOfChildId: {
92         // This separate case for the first read of the child ID is needed to
93         // avoid potential bugs where calling Feed() twice in a row on an
94         // unsized element at the end of the stream would return
95         // Status::kOkCompleted instead of Status::kEndOfFile (since we convert
96         // Status::kEndOfFile to Status::kOkCompleted when EOF is hit for an
97         // unsized element after its children have been fully parsed). Once
98         // the ID parser consumes > 0 bytes, this state must be exited.
99         assert(child_parser_ == nullptr);
100         assert(my_size_ == kUnknownElementSize || total_bytes_read_ < my_size_);
101         child_metadata_.position = reader->Position();
102         child_metadata_.header_size = 0;
103         status = id_parser_.Feed(callback, reader, &local_num_bytes_read);
104         *num_bytes_read += local_num_bytes_read;
105         total_bytes_read_ += local_num_bytes_read;
106         child_metadata_.header_size +=
107             static_cast<std::uint32_t>(local_num_bytes_read);
108         if (status.code == Status::kEndOfFile &&
109             my_size_ == kUnknownElementSize && local_num_bytes_read == 0) {
110           state_ = State::kEndReached;
111         } else if (!status.ok()) {
112           if (local_num_bytes_read > 0) {
113             state_ = State::kFinishingReadingChildId;
114           }
115           return status;
116         } else if (status.completed_ok()) {
117           state_ = State::kReadingChildSize;
118         } else {
119           state_ = State::kFinishingReadingChildId;
120         }
121         continue;
122       }
123 
124       case State::kFinishingReadingChildId: {
125         assert(child_parser_ == nullptr);
126         assert(my_size_ == kUnknownElementSize || total_bytes_read_ < my_size_);
127         status = id_parser_.Feed(callback, reader, &local_num_bytes_read);
128         *num_bytes_read += local_num_bytes_read;
129         total_bytes_read_ += local_num_bytes_read;
130         child_metadata_.header_size +=
131             static_cast<std::uint32_t>(local_num_bytes_read);
132         if (!status.completed_ok()) {
133           return status;
134         }
135         state_ = State::kReadingChildSize;
136         continue;
137       }
138 
139       case State::kReadingChildSize: {
140         assert(child_parser_ == nullptr);
141         assert(total_bytes_read_ > 0);
142         status = size_parser_.Feed(callback, reader, &local_num_bytes_read);
143         *num_bytes_read += local_num_bytes_read;
144         total_bytes_read_ += local_num_bytes_read;
145         child_metadata_.header_size +=
146             static_cast<std::uint32_t>(local_num_bytes_read);
147         if (!status.completed_ok()) {
148           return status;
149         }
150         child_metadata_.id = id_parser_.id();
151         child_metadata_.size = size_parser_.size();
152         state_ = State::kValidatingChildSize;
153         continue;
154       }
155 
156       case State::kValidatingChildSize: {
157         assert(child_parser_ == nullptr);
158 
159         std::uint64_t byte_count = total_bytes_read_;
160         if (child_metadata_.size != kUnknownElementSize) {
161           byte_count += child_metadata_.size;
162         }
163 
164         std::uint64_t byte_cap = max_size_;
165         // my_size_ is <= max_size_ if it's known, so pick the smaller value.
166         if (my_size_ != kUnknownElementSize) {
167           byte_cap = my_size_;
168         }
169 
170         if (byte_count > byte_cap) {
171           return Status(Status::kElementOverflow);
172         }
173 
174         auto iter = parsers_.find(child_metadata_.id);
175         bool unknown_child = iter == parsers_.end();
176 
177         if (my_size_ == kUnknownElementSize && unknown_child) {
178           // The end of an unsized master element is considered to be the first
179           // instance of an element that isn't a known/valid child element.
180           has_cached_metadata_ = true;
181           state_ = State::kEndReached;
182           continue;
183         } else if (unknown_child &&
184                    child_metadata_.size == kUnknownElementSize) {
185           // We can't skip or otherwise handle unknown elements with an unknown
186           // size.
187           return Status(Status::kIndefiniteUnknownElement);
188         }
189         if (unknown_child) {
190           child_parser_ = &unknown_parser_;
191         } else {
192           child_parser_ = iter->second.get();
193         }
194         state_ = State::kGettingAction;
195         continue;
196       }
197 
198       case State::kGettingAction: {
199         assert(child_parser_ != nullptr);
200         status = callback->OnElementBegin(child_metadata_, &action_);
201         if (!status.completed_ok()) {
202           return status;
203         }
204 
205         if (action_ == Action::kSkip) {
206           callback = &skip_callback;
207           if (child_metadata_.size != kUnknownElementSize) {
208             child_parser_ = &skip_parser_;
209           }
210         }
211         state_ = State::kInitializingChildParser;
212         continue;
213       }
214 
215       case State::kInitializingChildParser: {
216         assert(child_parser_ != nullptr);
217         status =
218             child_parser_->Init(child_metadata_, max_size_ - total_bytes_read_);
219         if (!status.completed_ok()) {
220           return status;
221         }
222         state_ = State::kReadingChildBody;
223         continue;
224       }
225 
226       case State::kReadingChildBody: {
227         assert(child_parser_ != nullptr);
228         status = child_parser_->Feed(callback, reader, &local_num_bytes_read);
229         *num_bytes_read += local_num_bytes_read;
230         total_bytes_read_ += local_num_bytes_read;
231         if (!status.completed_ok()) {
232           return status;
233         }
234         state_ = State::kChildFullyParsed;
235         continue;
236       }
237 
238       case State::kChildFullyParsed: {
239         assert(child_parser_ != nullptr);
240         std::uint64_t byte_cap = max_size_;
241         // my_size_ is <= max_size_ if it's known, so pick the smaller value.
242         if (my_size_ != kUnknownElementSize) {
243           byte_cap = my_size_;
244         }
245 
246         if (total_bytes_read_ > byte_cap) {
247           return Status(Status::kElementOverflow);
248         } else if (total_bytes_read_ == byte_cap) {
249           state_ = State::kEndReached;
250           continue;
251         }
252 
253         if (child_parser_->GetCachedMetadata(&child_metadata_)) {
254           state_ = State::kValidatingChildSize;
255         } else {
256           state_ = State::kFirstReadOfChildId;
257         }
258         PrepareForNextChild();
259         callback = original_callback;
260         continue;
261       }
262 
263       case State::kEndReached: {
264         return Status(Status::kOkCompleted);
265       }
266     }
267   }
268 }
269 
GetCachedMetadata(ElementMetadata * metadata)270 bool MasterParser::GetCachedMetadata(ElementMetadata* metadata) {
271   assert(metadata != nullptr);
272 
273   if (has_cached_metadata_) {
274     *metadata = child_metadata_;
275   }
276   return has_cached_metadata_;
277 }
278 
InitSetup(std::uint32_t header_size,std::uint64_t size_in_bytes,std::uint64_t position)279 void MasterParser::InitSetup(std::uint32_t header_size,
280                              std::uint64_t size_in_bytes,
281                              std::uint64_t position) {
282   PrepareForNextChild();
283   header_size_ = header_size;
284   my_size_ = size_in_bytes;
285   my_position_ = position;
286   total_bytes_read_ = 0;
287   has_cached_metadata_ = false;
288 }
289 
PrepareForNextChild()290 void MasterParser::PrepareForNextChild() {
291   // Do not reset child_metadata_ here.
292   id_parser_ = {};
293   size_parser_ = {};
294   child_parser_ = nullptr;
295   action_ = Action::kRead;
296 }
297 
298 }  // namespace webm
299