1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "ContainerParser.h"
8 
9 #include "WebMBufferedParser.h"
10 #include "mozilla/EndianUtils.h"
11 #include "mozilla/IntegerPrintfMacros.h"
12 #include "mozilla/ErrorResult.h"
13 #include "MoofParser.h"
14 #include "mozilla/Logging.h"
15 #include "mozilla/Maybe.h"
16 #include "mozilla/Result.h"
17 #include "MediaData.h"
18 #include "nsMimeTypes.h"
19 #ifdef MOZ_FMP4
20 #  include "AtomType.h"
21 #  include "BufferReader.h"
22 #  include "Index.h"
23 #  include "MP4Interval.h"
24 #  include "ByteStream.h"
25 #endif
26 #include "SourceBufferResource.h"
27 #include <algorithm>
28 
29 extern mozilla::LogModule* GetMediaSourceSamplesLog();
30 
31 #define MSE_DEBUG(arg, ...)                                            \
32   DDMOZ_LOG(GetMediaSourceSamplesLog(), mozilla::LogLevel::Debug,      \
33             "(%s)::%s: " arg, mType.OriginalString().Data(), __func__, \
34             ##__VA_ARGS__)
35 #define MSE_DEBUGV(arg, ...)                                           \
36   DDMOZ_LOG(GetMediaSourceSamplesLog(), mozilla::LogLevel::Verbose,    \
37             "(%s)::%s: " arg, mType.OriginalString().Data(), __func__, \
38             ##__VA_ARGS__)
39 #define MSE_DEBUGVEX(_this, arg, ...)                                        \
40   DDMOZ_LOGEX(_this, GetMediaSourceSamplesLog(), mozilla::LogLevel::Verbose, \
41               "(%s)::%s: " arg, mType.OriginalString().Data(), __func__,     \
42               ##__VA_ARGS__)
43 
44 namespace mozilla {
45 
ContainerParser(const MediaContainerType & aType)46 ContainerParser::ContainerParser(const MediaContainerType& aType)
47     : mHasInitData(false), mTotalParsed(0), mGlobalOffset(0), mType(aType) {}
48 
49 ContainerParser::~ContainerParser() = default;
50 
IsInitSegmentPresent(const MediaSpan & aData)51 MediaResult ContainerParser::IsInitSegmentPresent(const MediaSpan& aData) {
52   MSE_DEBUG(
53       "aLength=%zu [%x%x%x%x]", aData.Length(),
54       aData.Length() > 0 ? aData[0] : 0, aData.Length() > 1 ? aData[1] : 0,
55       aData.Length() > 2 ? aData[2] : 0, aData.Length() > 3 ? aData[3] : 0);
56   return NS_ERROR_NOT_AVAILABLE;
57 }
58 
IsMediaSegmentPresent(const MediaSpan & aData)59 MediaResult ContainerParser::IsMediaSegmentPresent(const MediaSpan& aData) {
60   MSE_DEBUG(
61       "aLength=%zu [%x%x%x%x]", aData.Length(),
62       aData.Length() > 0 ? aData[0] : 0, aData.Length() > 1 ? aData[1] : 0,
63       aData.Length() > 2 ? aData[2] : 0, aData.Length() > 3 ? aData[3] : 0);
64   return NS_ERROR_NOT_AVAILABLE;
65 }
66 
ParseStartAndEndTimestamps(const MediaSpan & aData,int64_t & aStart,int64_t & aEnd)67 MediaResult ContainerParser::ParseStartAndEndTimestamps(const MediaSpan& aData,
68                                                         int64_t& aStart,
69                                                         int64_t& aEnd) {
70   return NS_ERROR_NOT_AVAILABLE;
71 }
72 
TimestampsFuzzyEqual(int64_t aLhs,int64_t aRhs)73 bool ContainerParser::TimestampsFuzzyEqual(int64_t aLhs, int64_t aRhs) {
74   return llabs(aLhs - aRhs) <= GetRoundingError();
75 }
76 
GetRoundingError()77 int64_t ContainerParser::GetRoundingError() {
78   NS_WARNING("Using default ContainerParser::GetRoundingError implementation");
79   return 0;
80 }
81 
HasCompleteInitData()82 bool ContainerParser::HasCompleteInitData() {
83   return mHasInitData && !!mInitData->Length();
84 }
85 
InitData()86 MediaByteBuffer* ContainerParser::InitData() { return mInitData; }
87 
InitSegmentRange()88 MediaByteRange ContainerParser::InitSegmentRange() {
89   return mCompleteInitSegmentRange;
90 }
91 
MediaHeaderRange()92 MediaByteRange ContainerParser::MediaHeaderRange() {
93   return mCompleteMediaHeaderRange;
94 }
95 
MediaSegmentRange()96 MediaByteRange ContainerParser::MediaSegmentRange() {
97   return mCompleteMediaSegmentRange;
98 }
99 
100 DDLoggedTypeDeclNameAndBase(WebMContainerParser, ContainerParser);
101 
102 class WebMContainerParser
103     : public ContainerParser,
104       public DecoderDoctorLifeLogger<WebMContainerParser> {
105  public:
WebMContainerParser(const MediaContainerType & aType)106   explicit WebMContainerParser(const MediaContainerType& aType)
107       : ContainerParser(aType), mParser(0), mOffset(0) {}
108 
109   static const unsigned NS_PER_USEC = 1000;
110 
IsInitSegmentPresent(const MediaSpan & aData)111   MediaResult IsInitSegmentPresent(const MediaSpan& aData) override {
112     ContainerParser::IsInitSegmentPresent(aData);
113     if (aData.Length() < 4) {
114       return NS_ERROR_NOT_AVAILABLE;
115     }
116 
117     WebMBufferedParser parser(0);
118     nsTArray<WebMTimeDataOffset> mapping;
119     ReentrantMonitor dummy("dummy");
120     bool result =
121         parser.Append(aData.Elements(), aData.Length(), mapping, dummy);
122     if (!result) {
123       return MediaResult(NS_ERROR_FAILURE,
124                          RESULT_DETAIL("Invalid webm content"));
125     }
126     return parser.mInitEndOffset > 0 ? NS_OK : NS_ERROR_NOT_AVAILABLE;
127   }
128 
IsMediaSegmentPresent(const MediaSpan & aData)129   MediaResult IsMediaSegmentPresent(const MediaSpan& aData) override {
130     ContainerParser::IsMediaSegmentPresent(aData);
131     if (aData.Length() < 4) {
132       return NS_ERROR_NOT_AVAILABLE;
133     }
134 
135     WebMBufferedParser parser(0);
136     nsTArray<WebMTimeDataOffset> mapping;
137     ReentrantMonitor dummy("dummy");
138     parser.AppendMediaSegmentOnly();
139     bool result =
140         parser.Append(aData.Elements(), aData.Length(), mapping, dummy);
141     if (!result) {
142       return MediaResult(NS_ERROR_FAILURE,
143                          RESULT_DETAIL("Invalid webm content"));
144     }
145     return parser.GetClusterOffset() >= 0 ? NS_OK : NS_ERROR_NOT_AVAILABLE;
146   }
147 
ParseStartAndEndTimestamps(const MediaSpan & aData,int64_t & aStart,int64_t & aEnd)148   MediaResult ParseStartAndEndTimestamps(const MediaSpan& aData,
149                                          int64_t& aStart,
150                                          int64_t& aEnd) override {
151     bool initSegment = NS_SUCCEEDED(IsInitSegmentPresent(aData));
152 
153     if (mLastMapping &&
154         (initSegment || NS_SUCCEEDED(IsMediaSegmentPresent(aData)))) {
155       // The last data contained a complete cluster but we can only detect it
156       // now that a new one is starting.
157       // We use mOffset as end position to ensure that any blocks not reported
158       // by WebMBufferParser are properly skipped.
159       mCompleteMediaSegmentRange =
160           MediaByteRange(mLastMapping.ref().mSyncOffset, mOffset) +
161           mGlobalOffset;
162       mLastMapping.reset();
163       MSE_DEBUG("New cluster found at start, ending previous one");
164       return NS_ERROR_NOT_AVAILABLE;
165     }
166 
167     if (initSegment) {
168       mOffset = 0;
169       mParser = WebMBufferedParser(0);
170       mOverlappedMapping.Clear();
171       mInitData = new MediaByteBuffer();
172       mResource = new SourceBufferResource();
173       DDLINKCHILD("resource", mResource.get());
174       mCompleteInitSegmentRange = MediaByteRange();
175       mCompleteMediaHeaderRange = MediaByteRange();
176       mCompleteMediaSegmentRange = MediaByteRange();
177       mGlobalOffset = mTotalParsed;
178     }
179 
180     // XXX if it only adds new mappings, overlapped but not available
181     // (e.g. overlap < 0) frames are "lost" from the reported mappings here.
182     nsTArray<WebMTimeDataOffset> mapping;
183     mapping.AppendElements(mOverlappedMapping);
184     mOverlappedMapping.Clear();
185     ReentrantMonitor dummy("dummy");
186     mParser.Append(aData.Elements(), aData.Length(), mapping, dummy);
187     if (mResource) {
188       mResource->AppendData(aData);
189     }
190 
191     // XXX This is a bit of a hack.  Assume if there are no timecodes
192     // present and it's an init segment that it's _just_ an init segment.
193     // We should be more precise.
194     if (initSegment || !HasCompleteInitData()) {
195       if (mParser.mInitEndOffset > 0) {
196         MOZ_DIAGNOSTIC_ASSERT(mInitData && mResource &&
197                               mParser.mInitEndOffset <= mResource->GetLength());
198         if (!mInitData->SetLength(mParser.mInitEndOffset, fallible)) {
199           // Super unlikely OOM
200           return NS_ERROR_OUT_OF_MEMORY;
201         }
202         mCompleteInitSegmentRange =
203             MediaByteRange(0, mParser.mInitEndOffset) + mGlobalOffset;
204         char* buffer = reinterpret_cast<char*>(mInitData->Elements());
205         mResource->ReadFromCache(buffer, 0, mParser.mInitEndOffset);
206         MSE_DEBUG("Stashed init of %" PRId64 " bytes.", mParser.mInitEndOffset);
207         mResource = nullptr;
208       } else {
209         MSE_DEBUG("Incomplete init found.");
210       }
211       mHasInitData = true;
212     }
213     mOffset += aData.Length();
214     mTotalParsed += aData.Length();
215 
216     if (mapping.IsEmpty()) {
217       return NS_ERROR_NOT_AVAILABLE;
218     }
219 
220     // Calculate media range for first media segment.
221 
222     // Check if we have a cluster finishing in the current data.
223     uint32_t endIdx = mapping.Length() - 1;
224     bool foundNewCluster = false;
225     while (mapping[0].mSyncOffset != mapping[endIdx].mSyncOffset) {
226       endIdx -= 1;
227       foundNewCluster = true;
228     }
229 
230     int32_t completeIdx = endIdx;
231     while (completeIdx >= 0 && mOffset < mapping[completeIdx].mEndOffset) {
232       MSE_DEBUG("block is incomplete, missing: %" PRId64,
233                 mapping[completeIdx].mEndOffset - mOffset);
234       completeIdx -= 1;
235     }
236 
237     // Save parsed blocks for which we do not have all data yet.
238     mOverlappedMapping.AppendElements(mapping.Elements() + completeIdx + 1,
239                                       mapping.Length() - completeIdx - 1);
240 
241     if (completeIdx < 0) {
242       mLastMapping.reset();
243       return NS_ERROR_NOT_AVAILABLE;
244     }
245 
246     if (mCompleteMediaHeaderRange.IsEmpty()) {
247       mCompleteMediaHeaderRange =
248           MediaByteRange(mapping[0].mSyncOffset, mapping[0].mEndOffset) +
249           mGlobalOffset;
250     }
251 
252     if (foundNewCluster && mOffset >= mapping[endIdx].mEndOffset) {
253       // We now have all information required to delimit a complete cluster.
254       int64_t endOffset = mapping[endIdx + 1].mSyncOffset;
255       if (mapping[endIdx + 1].mInitOffset > mapping[endIdx].mInitOffset) {
256         // We have a new init segment before this cluster.
257         endOffset = mapping[endIdx + 1].mInitOffset;
258       }
259       mCompleteMediaSegmentRange =
260           MediaByteRange(mapping[endIdx].mSyncOffset, endOffset) +
261           mGlobalOffset;
262     } else if (mapping[endIdx].mClusterEndOffset >= 0 &&
263                mOffset >= mapping[endIdx].mClusterEndOffset) {
264       mCompleteMediaSegmentRange =
265           MediaByteRange(
266               mapping[endIdx].mSyncOffset,
267               mParser.EndSegmentOffset(mapping[endIdx].mClusterEndOffset)) +
268           mGlobalOffset;
269     }
270 
271     Maybe<WebMTimeDataOffset> previousMapping;
272     if (completeIdx) {
273       previousMapping = Some(mapping[completeIdx - 1]);
274     } else {
275       previousMapping = mLastMapping;
276     }
277 
278     mLastMapping = Some(mapping[completeIdx]);
279 
280     if (!previousMapping && completeIdx + 1u >= mapping.Length()) {
281       // We have no previous nor next block available,
282       // so we can't estimate this block's duration.
283       return NS_ERROR_NOT_AVAILABLE;
284     }
285 
286     uint64_t frameDuration =
287         (completeIdx + 1u < mapping.Length())
288             ? mapping[completeIdx + 1].mTimecode -
289                   mapping[completeIdx].mTimecode
290             : mapping[completeIdx].mTimecode - previousMapping.ref().mTimecode;
291     aStart = mapping[0].mTimecode / NS_PER_USEC;
292     aEnd = (mapping[completeIdx].mTimecode + frameDuration) / NS_PER_USEC;
293 
294     MSE_DEBUG("[%" PRId64 ", %" PRId64 "] [fso=%" PRId64 ", leo=%" PRId64
295               ", l=%zu processedIdx=%u fs=%" PRId64 "]",
296               aStart, aEnd, mapping[0].mSyncOffset,
297               mapping[completeIdx].mEndOffset, mapping.Length(), completeIdx,
298               mCompleteMediaSegmentRange.mEnd);
299 
300     return NS_OK;
301   }
302 
GetRoundingError()303   int64_t GetRoundingError() override {
304     int64_t error = mParser.GetTimecodeScale() / NS_PER_USEC;
305     return error * 2;
306   }
307 
308  private:
309   WebMBufferedParser mParser;
310   nsTArray<WebMTimeDataOffset> mOverlappedMapping;
311   int64_t mOffset;
312   Maybe<WebMTimeDataOffset> mLastMapping;
313 };
314 
315 #ifdef MOZ_FMP4
316 
317 DDLoggedTypeDeclNameAndBase(MP4Stream, ByteStream);
318 
319 class MP4Stream : public ByteStream, public DecoderDoctorLifeLogger<MP4Stream> {
320  public:
321   explicit MP4Stream(SourceBufferResource* aResource);
322   virtual ~MP4Stream();
323   bool ReadAt(int64_t aOffset, void* aBuffer, size_t aCount,
324               size_t* aBytesRead) override;
325   bool CachedReadAt(int64_t aOffset, void* aBuffer, size_t aCount,
326                     size_t* aBytesRead) override;
327   bool Length(int64_t* aSize) override;
328   const uint8_t* GetContiguousAccess(int64_t aOffset, size_t aSize) override;
329 
330  private:
331   RefPtr<SourceBufferResource> mResource;
332 };
333 
MP4Stream(SourceBufferResource * aResource)334 MP4Stream::MP4Stream(SourceBufferResource* aResource) : mResource(aResource) {
335   MOZ_COUNT_CTOR(MP4Stream);
336   MOZ_ASSERT(aResource);
337   DDLINKCHILD("resource", aResource);
338 }
339 
~MP4Stream()340 MP4Stream::~MP4Stream() { MOZ_COUNT_DTOR(MP4Stream); }
341 
ReadAt(int64_t aOffset,void * aBuffer,size_t aCount,size_t * aBytesRead)342 bool MP4Stream::ReadAt(int64_t aOffset, void* aBuffer, size_t aCount,
343                        size_t* aBytesRead) {
344   return CachedReadAt(aOffset, aBuffer, aCount, aBytesRead);
345 }
346 
CachedReadAt(int64_t aOffset,void * aBuffer,size_t aCount,size_t * aBytesRead)347 bool MP4Stream::CachedReadAt(int64_t aOffset, void* aBuffer, size_t aCount,
348                              size_t* aBytesRead) {
349   nsresult rv = mResource->ReadFromCache(reinterpret_cast<char*>(aBuffer),
350                                          aOffset, aCount);
351   if (NS_FAILED(rv)) {
352     *aBytesRead = 0;
353     return false;
354   }
355   *aBytesRead = aCount;
356   return true;
357 }
358 
GetContiguousAccess(int64_t aOffset,size_t aSize)359 const uint8_t* MP4Stream::GetContiguousAccess(int64_t aOffset, size_t aSize) {
360   return mResource->GetContiguousAccess(aOffset, aSize);
361 }
362 
Length(int64_t * aSize)363 bool MP4Stream::Length(int64_t* aSize) {
364   if (mResource->GetLength() < 0) return false;
365   *aSize = mResource->GetLength();
366   return true;
367 }
368 
369 DDLoggedTypeDeclNameAndBase(MP4ContainerParser, ContainerParser);
370 
371 class MP4ContainerParser : public ContainerParser,
372                            public DecoderDoctorLifeLogger<MP4ContainerParser> {
373  public:
MP4ContainerParser(const MediaContainerType & aType)374   explicit MP4ContainerParser(const MediaContainerType& aType)
375       : ContainerParser(aType) {}
376 
IsInitSegmentPresent(const MediaSpan & aData)377   MediaResult IsInitSegmentPresent(const MediaSpan& aData) override {
378     ContainerParser::IsInitSegmentPresent(aData);
379     // Each MP4 atom has a chunk size and chunk type. The root chunk in an MP4
380     // file is the 'ftyp' atom followed by a file type. We just check for a
381     // vaguely valid 'ftyp' atom.
382     if (aData.Length() < 8) {
383       return NS_ERROR_NOT_AVAILABLE;
384     }
385     AtomParser parser(*this, aData, AtomParser::StopAt::eInitSegment);
386     if (!parser.IsValid()) {
387       return MediaResult(
388           NS_ERROR_FAILURE,
389           RESULT_DETAIL("Invalid Top-Level Box:%s", parser.LastInvalidBox()));
390     }
391     return parser.StartWithInitSegment() ? NS_OK : NS_ERROR_NOT_AVAILABLE;
392   }
393 
IsMediaSegmentPresent(const MediaSpan & aData)394   MediaResult IsMediaSegmentPresent(const MediaSpan& aData) override {
395     if (aData.Length() < 8) {
396       return NS_ERROR_NOT_AVAILABLE;
397     }
398     AtomParser parser(*this, aData, AtomParser::StopAt::eMediaSegment);
399     if (!parser.IsValid()) {
400       return MediaResult(
401           NS_ERROR_FAILURE,
402           RESULT_DETAIL("Invalid Box:%s", parser.LastInvalidBox()));
403     }
404     return parser.StartWithMediaSegment() ? NS_OK : NS_ERROR_NOT_AVAILABLE;
405   }
406 
407  private:
408   class AtomParser {
409    public:
410     enum class StopAt { eInitSegment, eMediaSegment, eEnd };
411 
AtomParser(const MP4ContainerParser & aParser,const MediaSpan & aData,StopAt aStop=StopAt::eEnd)412     AtomParser(const MP4ContainerParser& aParser, const MediaSpan& aData,
413                StopAt aStop = StopAt::eEnd) {
414       mValid = Init(aParser, aData, aStop).isOk();
415     }
416 
Init(const MP4ContainerParser & aParser,const MediaSpan & aData,StopAt aStop)417     Result<Ok, nsresult> Init(const MP4ContainerParser& aParser,
418                               const MediaSpan& aData, StopAt aStop) {
419       const MediaContainerType mType(
420           aParser.ContainerType());  // for logging macro.
421       BufferReader reader(aData);
422       AtomType initAtom("moov");
423       AtomType mediaAtom("moof");
424       AtomType dataAtom("mdat");
425 
426       // Valid top-level boxes defined in ISO/IEC 14496-12 (Table 1)
427       static const AtomType validBoxes[] = {
428           "ftyp", "moov",          // init segment
429           "pdin", "free", "sidx",  // optional prior moov box
430           "styp", "moof", "mdat",  // media segment
431           "mfra", "skip", "meta", "meco", "ssix", "prft",  // others.
432           "pssh",         // optional with encrypted EME, though ignored.
433           "emsg",         // ISO23009-1:2014 Section 5.10.3.3
434           "bloc", "uuid"  // boxes accepted by chrome.
435       };
436 
437       while (reader.Remaining() >= 8) {
438         uint32_t tmp;
439         MOZ_TRY_VAR(tmp, reader.ReadU32());
440         uint64_t size = tmp;
441         const uint8_t* typec = reader.Peek(4);
442         MOZ_TRY_VAR(tmp, reader.ReadU32());
443         AtomType type(tmp);
444         MSE_DEBUGVEX(&aParser, "Checking atom:'%c%c%c%c' @ %u", typec[0],
445                      typec[1], typec[2], typec[3],
446                      (uint32_t)reader.Offset() - 8);
447         if (std::find(std::begin(validBoxes), std::end(validBoxes), type) ==
448             std::end(validBoxes)) {
449           // No valid box found, no point continuing.
450           mLastInvalidBox[0] = typec[0];
451           mLastInvalidBox[1] = typec[1];
452           mLastInvalidBox[2] = typec[2];
453           mLastInvalidBox[3] = typec[3];
454           mLastInvalidBox[4] = '\0';
455           return Err(NS_ERROR_FAILURE);
456         }
457         if (mInitOffset.isNothing() && AtomType(type) == initAtom) {
458           mInitOffset = Some(reader.Offset());
459         }
460         if (mMediaOffset.isNothing() && AtomType(type) == mediaAtom) {
461           mMediaOffset = Some(reader.Offset());
462         }
463         if (mDataOffset.isNothing() && AtomType(type) == dataAtom) {
464           mDataOffset = Some(reader.Offset());
465         }
466         if (size == 1) {
467           // 64 bits size.
468           MOZ_TRY_VAR(size, reader.ReadU64());
469         } else if (size == 0) {
470           // Atom extends to the end of the buffer, it can't have what we're
471           // looking for.
472           break;
473         }
474         if (reader.Remaining() < size - 8) {
475           // Incomplete atom.
476           break;
477         }
478         reader.Read(size - 8);
479 
480         if (aStop == StopAt::eInitSegment && (mInitOffset || mMediaOffset)) {
481           // When we're looking for an init segment, if we encountered a media
482           // segment, it we will need to be processed first. So we can stop
483           // right away if we have found a media segment.
484           break;
485         }
486         if (aStop == StopAt::eMediaSegment &&
487             (mInitOffset || (mMediaOffset && mDataOffset))) {
488           // When we're looking for a media segment, if we encountered an init
489           // segment, it we will need to be processed first. So we can stop
490           // right away if we have found an init segment.
491           break;
492         }
493       }
494 
495       return Ok();
496     }
497 
StartWithInitSegment() const498     bool StartWithInitSegment() const {
499       return mInitOffset.isSome() && (mMediaOffset.isNothing() ||
500                                       mInitOffset.ref() < mMediaOffset.ref());
501     }
StartWithMediaSegment() const502     bool StartWithMediaSegment() const {
503       return mMediaOffset.isSome() && (mInitOffset.isNothing() ||
504                                        mMediaOffset.ref() < mInitOffset.ref());
505     }
IsValid() const506     bool IsValid() const { return mValid; }
LastInvalidBox() const507     const char* LastInvalidBox() const { return mLastInvalidBox; }
508 
509    private:
510     Maybe<size_t> mInitOffset;
511     Maybe<size_t> mMediaOffset;
512     Maybe<size_t> mDataOffset;
513     bool mValid;
514     char mLastInvalidBox[5];
515   };
516 
517  public:
ParseStartAndEndTimestamps(const MediaSpan & aData,int64_t & aStart,int64_t & aEnd)518   MediaResult ParseStartAndEndTimestamps(const MediaSpan& aData,
519                                          int64_t& aStart,
520                                          int64_t& aEnd) override {
521     bool initSegment = NS_SUCCEEDED(IsInitSegmentPresent(aData));
522     if (initSegment) {
523       mResource = new SourceBufferResource();
524       DDLINKCHILD("resource", mResource.get());
525       mStream = new MP4Stream(mResource);
526       // We use a timestampOffset of 0 for ContainerParser, and require
527       // consumers of ParseStartAndEndTimestamps to add their timestamp offset
528       // manually. This allows the ContainerParser to be shared across different
529       // timestampOffsets.
530       mParser = MakeUnique<MoofParser>(mStream, AsVariant(ParseAllTracks{}),
531                                        /* aIsAudio = */ false);
532       DDLINKCHILD("parser", mParser.get());
533       mInitData = new MediaByteBuffer();
534       mCompleteInitSegmentRange = MediaByteRange();
535       mCompleteMediaHeaderRange = MediaByteRange();
536       mCompleteMediaSegmentRange = MediaByteRange();
537       mGlobalOffset = mTotalParsed;
538     } else if (!mStream || !mParser) {
539       mTotalParsed += aData.Length();
540       return NS_ERROR_NOT_AVAILABLE;
541     }
542 
543     MOZ_DIAGNOSTIC_ASSERT(mResource && mParser && mInitData,
544                           "Should have received an init segment first");
545 
546     mResource->AppendData(aData);
547     MediaByteRangeSet byteRanges;
548     byteRanges +=
549         MediaByteRange(int64_t(mParser->mOffset), mResource->GetLength());
550     mParser->RebuildFragmentedIndex(byteRanges);
551 
552     if (initSegment || !HasCompleteInitData()) {
553       MediaByteRange& range = mParser->mInitRange;
554       if (range.Length()) {
555         mCompleteInitSegmentRange = range + mGlobalOffset;
556         if (!mInitData->SetLength(range.Length(), fallible)) {
557           // Super unlikely OOM
558           return NS_ERROR_OUT_OF_MEMORY;
559         }
560         char* buffer = reinterpret_cast<char*>(mInitData->Elements());
561         mResource->ReadFromCache(buffer, range.mStart, range.Length());
562         MSE_DEBUG("Stashed init of %" PRIu64 " bytes.", range.Length());
563       } else {
564         MSE_DEBUG("Incomplete init found.");
565       }
566       mHasInitData = true;
567     }
568     mTotalParsed += aData.Length();
569 
570     MP4Interval<Microseconds> compositionRange =
571         mParser->GetCompositionRange(byteRanges);
572 
573     mCompleteMediaHeaderRange =
574         mParser->FirstCompleteMediaHeader() + mGlobalOffset;
575     mCompleteMediaSegmentRange =
576         mParser->FirstCompleteMediaSegment() + mGlobalOffset;
577 
578     if (HasCompleteInitData()) {
579       mResource->EvictData(mParser->mOffset, mParser->mOffset);
580     }
581 
582     if (compositionRange.IsNull()) {
583       return NS_ERROR_NOT_AVAILABLE;
584     }
585     aStart = compositionRange.start;
586     aEnd = compositionRange.end;
587     MSE_DEBUG("[%" PRId64 ", %" PRId64 "]", aStart, aEnd);
588     return NS_OK;
589   }
590 
591   // Gaps of up to 35ms (marginally longer than a single frame at 30fps) are
592   // considered to be sequential frames.
GetRoundingError()593   int64_t GetRoundingError() override { return 35000; }
594 
595  private:
596   RefPtr<MP4Stream> mStream;
597   UniquePtr<MoofParser> mParser;
598 };
599 #endif  // MOZ_FMP4
600 
601 #ifdef MOZ_FMP4
602 DDLoggedTypeDeclNameAndBase(ADTSContainerParser, ContainerParser);
603 
604 class ADTSContainerParser
605     : public ContainerParser,
606       public DecoderDoctorLifeLogger<ADTSContainerParser> {
607  public:
ADTSContainerParser(const MediaContainerType & aType)608   explicit ADTSContainerParser(const MediaContainerType& aType)
609       : ContainerParser(aType) {}
610 
611   typedef struct {
612     size_t header_length;  // Length of just the initialization data.
613     size_t frame_length;   // Includes header_length;
614     uint8_t aac_frames;    // Number of AAC frames in the ADTS frame.
615     bool have_crc;
616   } Header;
617 
618   /// Helper to parse the ADTS header, returning data we care about.
619   /// Returns true if the header is parsed successfully.
620   /// Returns false if the header is invalid or incomplete,
621   /// without modifying the passed-in Header object.
Parse(const MediaSpan & aData,Header & header)622   bool Parse(const MediaSpan& aData, Header& header) {
623     // ADTS initialization segments are just the packet header.
624     if (aData.Length() < 7) {
625       MSE_DEBUG("buffer too short for header.");
626       return false;
627     }
628     // Check 0xfffx sync word plus layer 0.
629     if ((aData[0] != 0xff) || ((aData[1] & 0xf6) != 0xf0)) {
630       MSE_DEBUG("no syncword.");
631       return false;
632     }
633     bool have_crc = !(aData[1] & 0x01);
634     if (have_crc && aData.Length() < 9) {
635       MSE_DEBUG("buffer too short for header with crc.");
636       return false;
637     }
638     uint8_t frequency_index = (aData[2] & 0x3c) >> 2;
639     MOZ_ASSERT(frequency_index < 16);
640     if (frequency_index == 15) {
641       MSE_DEBUG("explicit frequency disallowed.");
642       return false;
643     }
644     size_t header_length = have_crc ? 9 : 7;
645     size_t data_length = ((aData[3] & 0x03) << 11) | ((aData[4] & 0xff) << 3) |
646                          ((aData[5] & 0xe0) >> 5);
647     uint8_t frames = (aData[6] & 0x03) + 1;
648     MOZ_ASSERT(frames > 0);
649     MOZ_ASSERT(frames < 4);
650 
651     // Return successfully parsed data.
652     header.header_length = header_length;
653     header.frame_length = header_length + data_length;
654     header.aac_frames = frames;
655     header.have_crc = have_crc;
656     return true;
657   }
658 
IsInitSegmentPresent(const MediaSpan & aData)659   MediaResult IsInitSegmentPresent(const MediaSpan& aData) override {
660     // Call superclass for logging.
661     ContainerParser::IsInitSegmentPresent(aData);
662 
663     Header header;
664     if (!Parse(aData, header)) {
665       return NS_ERROR_NOT_AVAILABLE;
666     }
667 
668     MSE_DEBUGV("%llu byte frame %d aac frames%s",
669                (unsigned long long)header.frame_length, (int)header.aac_frames,
670                header.have_crc ? " crc" : "");
671 
672     return NS_OK;
673   }
674 
IsMediaSegmentPresent(const MediaSpan & aData)675   MediaResult IsMediaSegmentPresent(const MediaSpan& aData) override {
676     // Call superclass for logging.
677     ContainerParser::IsMediaSegmentPresent(aData);
678 
679     // Make sure we have a header so we know how long the frame is.
680     // NB this assumes the media segment buffer starts with an
681     // initialization segment. Since every frame has an ADTS header
682     // this is a normal place to divide packets, but we can re-parse
683     // mInitData if we need to handle separate media segments.
684     Header header;
685     if (!Parse(aData, header)) {
686       return NS_ERROR_NOT_AVAILABLE;
687     }
688     // We're supposed to return true as long as aData contains the
689     // start of a media segment, whether or not it's complete. So
690     // return true if we have any data beyond the header.
691     if (aData.Length() <= header.header_length) {
692       return NS_ERROR_NOT_AVAILABLE;
693     }
694 
695     // We should have at least a partial frame.
696     return NS_OK;
697   }
698 
ParseStartAndEndTimestamps(const MediaSpan & aData,int64_t & aStart,int64_t & aEnd)699   MediaResult ParseStartAndEndTimestamps(const MediaSpan& aData,
700                                          int64_t& aStart,
701                                          int64_t& aEnd) override {
702     // ADTS header.
703     Header header;
704     if (!Parse(aData, header)) {
705       return NS_ERROR_NOT_AVAILABLE;
706     }
707     mHasInitData = true;
708     mCompleteInitSegmentRange =
709         MediaByteRange(0, int64_t(header.header_length));
710 
711     // Cache raw header in case the caller wants a copy.
712     mInitData = new MediaByteBuffer(header.header_length);
713     mInitData->AppendElements(aData.Elements(), header.header_length);
714 
715     // Check that we have enough data for the frame body.
716     if (aData.Length() < header.frame_length) {
717       MSE_DEBUGV(
718           "Not enough data for %llu byte frame"
719           " in %llu byte buffer.",
720           (unsigned long long)header.frame_length,
721           (unsigned long long)(aData.Length()));
722       return NS_ERROR_NOT_AVAILABLE;
723     }
724     mCompleteMediaSegmentRange =
725         MediaByteRange(header.header_length, header.frame_length);
726     // The ADTS MediaSource Byte Stream Format document doesn't
727     // define media header. Just treat it the same as the whole
728     // media segment.
729     mCompleteMediaHeaderRange = mCompleteMediaSegmentRange;
730 
731     MSE_DEBUG("[%" PRId64 ", %" PRId64 "]", aStart, aEnd);
732     // We don't update timestamps, regardless.
733     return NS_ERROR_NOT_AVAILABLE;
734   }
735 
736   // Audio shouldn't have gaps.
737   // Especially when we generate the timestamps ourselves.
GetRoundingError()738   int64_t GetRoundingError() override { return 0; }
739 };
740 #endif  // MOZ_FMP4
741 
742 /*static*/
CreateForMIMEType(const MediaContainerType & aType)743 UniquePtr<ContainerParser> ContainerParser::CreateForMIMEType(
744     const MediaContainerType& aType) {
745   if (aType.Type() == MEDIAMIMETYPE(VIDEO_WEBM) ||
746       aType.Type() == MEDIAMIMETYPE(AUDIO_WEBM)) {
747     return MakeUnique<WebMContainerParser>(aType);
748   }
749 
750 #ifdef MOZ_FMP4
751   if (aType.Type() == MEDIAMIMETYPE(VIDEO_MP4) ||
752       aType.Type() == MEDIAMIMETYPE(AUDIO_MP4)) {
753     return MakeUnique<MP4ContainerParser>(aType);
754   }
755   if (aType.Type() == MEDIAMIMETYPE("audio/aac")) {
756     return MakeUnique<ADTSContainerParser>(aType);
757   }
758 #endif
759 
760   return MakeUnique<ContainerParser>(aType);
761 }
762 
763 #undef MSE_DEBUG
764 #undef MSE_DEBUGV
765 #undef MSE_DEBUGVEX
766 
767 }  // namespace mozilla
768