1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #ifndef MOOF_PARSER_H_
6 #define MOOF_PARSER_H_
7 
8 #include "mozilla/ResultExtensions.h"
9 #include "mozilla/Variant.h"
10 #include "Atom.h"
11 #include "AtomType.h"
12 #include "SinfParser.h"
13 #include "ByteStream.h"
14 #include "MP4Interval.h"
15 #include "MediaResource.h"
16 
17 namespace mozilla {
18 
19 typedef int64_t Microseconds;
20 
21 class Box;
22 class BoxContext;
23 class BoxReader;
24 class Moof;
25 
26 // Used to track the CTS end time of the last sample of a track
27 // in the preceeding Moof, so that we can smooth tracks' timestamps
28 // across Moofs.
29 struct TrackEndCts {
TrackEndCtsTrackEndCts30   TrackEndCts(uint32_t aTrackId, Microseconds aCtsEndTime)
31       : mTrackId(aTrackId), mCtsEndTime(aCtsEndTime) {}
32   uint32_t mTrackId;
33   Microseconds mCtsEndTime;
34 };
35 
36 class Mvhd : public Atom {
37  public:
Mvhd()38   Mvhd()
39       : mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {}
40   explicit Mvhd(Box& aBox);
41 
ToMicroseconds(int64_t aTimescaleUnits)42   Result<Microseconds, nsresult> ToMicroseconds(int64_t aTimescaleUnits) {
43     if (!mTimescale) {
44       NS_WARNING("invalid mTimescale");
45       return Err(NS_ERROR_FAILURE);
46     }
47     int64_t major = aTimescaleUnits / mTimescale;
48     int64_t remainder = aTimescaleUnits % mTimescale;
49     return major * 1000000ll + remainder * 1000000ll / mTimescale;
50   }
51 
52   uint64_t mCreationTime;
53   uint64_t mModificationTime;
54   uint32_t mTimescale;
55   uint64_t mDuration;
56 
57  protected:
58   Result<Ok, nsresult> Parse(Box& aBox);
59 };
60 
61 class Tkhd : public Mvhd {
62  public:
Tkhd()63   Tkhd() : mTrackId(0) {}
64   explicit Tkhd(Box& aBox);
65 
66   uint32_t mTrackId;
67 
68  protected:
69   Result<Ok, nsresult> Parse(Box& aBox);
70 };
71 
72 class Mdhd : public Mvhd {
73  public:
74   Mdhd() = default;
75   explicit Mdhd(Box& aBox);
76 };
77 
78 class Trex : public Atom {
79  public:
Trex(uint32_t aTrackId)80   explicit Trex(uint32_t aTrackId)
81       : mFlags(0),
82         mTrackId(aTrackId),
83         mDefaultSampleDescriptionIndex(0),
84         mDefaultSampleDuration(0),
85         mDefaultSampleSize(0),
86         mDefaultSampleFlags(0) {}
87 
88   explicit Trex(Box& aBox);
89 
90   uint32_t mFlags;
91   uint32_t mTrackId;
92   uint32_t mDefaultSampleDescriptionIndex;
93   uint32_t mDefaultSampleDuration;
94   uint32_t mDefaultSampleSize;
95   uint32_t mDefaultSampleFlags;
96 
97  protected:
98   Result<Ok, nsresult> Parse(Box& aBox);
99 };
100 
101 class Tfhd : public Trex {
102  public:
Tfhd(Trex & aTrex)103   explicit Tfhd(Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) {
104     mValid = aTrex.IsValid();
105   }
106   Tfhd(Box& aBox, Trex& aTrex);
107 
108   uint64_t mBaseDataOffset;
109 
110  protected:
111   Result<Ok, nsresult> Parse(Box& aBox);
112 };
113 
114 class Tfdt : public Atom {
115  public:
Tfdt()116   Tfdt() : mBaseMediaDecodeTime(0) {}
117   explicit Tfdt(Box& aBox);
118 
119   uint64_t mBaseMediaDecodeTime;
120 
121  protected:
122   Result<Ok, nsresult> Parse(Box& aBox);
123 };
124 
125 class Edts : public Atom {
126  public:
Edts()127   Edts() : mMediaStart(0), mEmptyOffset(0) {}
128   explicit Edts(Box& aBox);
IsValid()129   virtual bool IsValid() override {
130     // edts is optional
131     return true;
132   }
133 
134   int64_t mMediaStart;
135   int64_t mEmptyOffset;
136 
137  protected:
138   Result<Ok, nsresult> Parse(Box& aBox);
139 };
140 
141 struct Sample {
142   mozilla::MediaByteRange mByteRange;
143   mozilla::MediaByteRange mCencRange;
144   Microseconds mDecodeTime;
145   MP4Interval<Microseconds> mCompositionRange;
146   bool mSync;
147 };
148 
149 class Saiz final : public Atom {
150  public:
151   Saiz(Box& aBox, AtomType aDefaultType);
152 
153   AtomType mAuxInfoType;
154   uint32_t mAuxInfoTypeParameter;
155   FallibleTArray<uint8_t> mSampleInfoSize;
156 
157  protected:
158   Result<Ok, nsresult> Parse(Box& aBox);
159 };
160 
161 class Saio final : public Atom {
162  public:
163   Saio(Box& aBox, AtomType aDefaultType);
164 
165   AtomType mAuxInfoType;
166   uint32_t mAuxInfoTypeParameter;
167   FallibleTArray<uint64_t> mOffsets;
168 
169  protected:
170   Result<Ok, nsresult> Parse(Box& aBox);
171 };
172 
173 struct SampleToGroupEntry {
174  public:
175   static const uint32_t kTrackGroupDescriptionIndexBase = 0;
176   static const uint32_t kFragmentGroupDescriptionIndexBase = 0x10000;
177 
SampleToGroupEntrySampleToGroupEntry178   SampleToGroupEntry(uint32_t aSampleCount, uint32_t aGroupDescriptionIndex)
179       : mSampleCount(aSampleCount),
180         mGroupDescriptionIndex(aGroupDescriptionIndex) {}
181 
182   uint32_t mSampleCount;
183   uint32_t mGroupDescriptionIndex;
184 };
185 
186 class Sbgp final : public Atom  // SampleToGroup box.
187 {
188  public:
189   explicit Sbgp(Box& aBox);
190 
191   AtomType mGroupingType;
192   uint32_t mGroupingTypeParam;
193   FallibleTArray<SampleToGroupEntry> mEntries;
194 
195  protected:
196   Result<Ok, nsresult> Parse(Box& aBox);
197 };
198 
199 // Stores information form CencSampleEncryptionInformationGroupEntry (seig).
200 // Cenc here refers to the common encryption standard, rather than the specific
201 // cenc scheme from that standard. This structure is used for all encryption
202 // schemes. I.e. it is used for both cenc and cbcs, not just cenc.
203 struct CencSampleEncryptionInfoEntry final {
204  public:
205   CencSampleEncryptionInfoEntry() = default;
206 
207   Result<Ok, nsresult> Init(BoxReader& aReader);
208 
209   bool mIsEncrypted = false;
210   uint8_t mIVSize = 0;
211   CopyableTArray<uint8_t> mKeyId;
212   uint8_t mCryptByteBlock = 0;
213   uint8_t mSkipByteBlock = 0;
214   CopyableTArray<uint8_t> mConsantIV;
215 };
216 
217 class Sgpd final : public Atom  // SampleGroupDescription box.
218 {
219  public:
220   explicit Sgpd(Box& aBox);
221 
222   AtomType mGroupingType;
223   FallibleTArray<CencSampleEncryptionInfoEntry> mEntries;
224 
225  protected:
226   Result<Ok, nsresult> Parse(Box& aBox);
227 };
228 
229 // Audio/video entries from the sample description box (stsd). We only need to
230 // store if these are encrypted, so do not need a specialized class for
231 // different audio and video data. Currently most of the parsing of these
232 // entries is by the mp4parse-rust, but moof pasrser needs to know which of
233 // these are encrypted when parsing the track fragment header (tfhd).
234 struct SampleDescriptionEntry {
235   bool mIsEncryptedEntry = false;
236 };
237 
238 // Used to indicate in variants if all tracks should be parsed.
239 struct ParseAllTracks {};
240 
241 typedef Variant<ParseAllTracks, uint32_t> TrackParseMode;
242 
243 class Moof final : public Atom {
244  public:
245   Moof(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
246        Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
247        uint64_t* aDecodeTime, bool aIsAudio,
248        nsTArray<TrackEndCts>& aTracksEndCts);
249   bool GetAuxInfo(AtomType aType, FallibleTArray<MediaByteRange>* aByteRanges);
250   void FixRounding(const Moof& aMoof);
251 
252   mozilla::MediaByteRange mRange;
253   mozilla::MediaByteRange mMdatRange;
254   MP4Interval<Microseconds> mTimeRange;
255   FallibleTArray<Sample> mIndex;
256 
257   FallibleTArray<CencSampleEncryptionInfoEntry>
258       mFragmentSampleEncryptionInfoEntries;
259   FallibleTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries;
260 
261   Tfhd mTfhd;
262   FallibleTArray<Saiz> mSaizs;
263   FallibleTArray<Saio> mSaios;
264   nsTArray<nsTArray<uint8_t>> mPsshes;
265 
266  private:
267   // aDecodeTime is updated to the end of the parsed TRAF on return.
268   void ParseTraf(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
269                  Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
270                  uint64_t* aDecodeTime, bool aIsAudio);
271   // aDecodeTime is updated to the end of the parsed TRUN on return.
272   Result<Ok, nsresult> ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd,
273                                  Edts& aEdts, uint64_t* aDecodeTime,
274                                  bool aIsAudio);
275   // Process the sample auxiliary information used by common encryption.
276   // aScheme is used to select the appropriate auxiliary information and should
277   // be set based on the encryption scheme used by the track being processed.
278   // Note, the term cenc here refers to the standard, not the specific scheme
279   // from that standard. I.e. this function is used to handle up auxiliary
280   // information from the cenc and cbcs schemes.
281   bool ProcessCencAuxInfo(AtomType aScheme);
282   uint64_t mMaxRoundingError;
283 };
284 
285 DDLoggedTypeDeclName(MoofParser);
286 
287 class MoofParser : public DecoderDoctorLifeLogger<MoofParser> {
288  public:
MoofParser(ByteStream * aSource,const TrackParseMode & aTrackParseMode,bool aIsAudio)289   MoofParser(ByteStream* aSource, const TrackParseMode& aTrackParseMode,
290              bool aIsAudio)
291       : mSource(aSource),
292         mOffset(0),
293         mTrex(aTrackParseMode.is<uint32_t>() ? aTrackParseMode.as<uint32_t>()
294                                              : 0),
295         mIsAudio(aIsAudio),
296         mLastDecodeTime(0),
297         mTrackParseMode(aTrackParseMode) {
298     // Setting mIsMultitrackParser is a nasty work around for calculating
299     // the composition range for MSE that causes the parser to parse multiple
300     // tracks. Ideally we'd store an array of tracks with different metadata
301     // for each.
302     DDLINKCHILD("source", aSource);
303   }
304   bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges);
305   // If *aCanEvict is set to true. then will remove all moofs already parsed
306   // from index then rebuild the index. *aCanEvict is set to true upon return if
307   // some moofs were removed.
308   bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges,
309                               bool* aCanEvict);
310   bool RebuildFragmentedIndex(BoxContext& aContext);
311   MP4Interval<Microseconds> GetCompositionRange(
312       const mozilla::MediaByteRangeSet& aByteRanges);
313   bool ReachedEnd();
314   void ParseMoov(Box& aBox);
315   void ParseTrak(Box& aBox);
316   void ParseMdia(Box& aBox);
317   void ParseMvex(Box& aBox);
318 
319   void ParseMinf(Box& aBox);
320   void ParseStbl(Box& aBox);
321   void ParseStsd(Box& aBox);
322   void ParseEncrypted(Box& aBox);
323 
324   bool BlockingReadNextMoof();
325 
326   already_AddRefed<mozilla::MediaByteBuffer> Metadata();
327   MediaByteRange FirstCompleteMediaSegment();
328   MediaByteRange FirstCompleteMediaHeader();
329 
330   mozilla::MediaByteRange mInitRange;
331   RefPtr<ByteStream> mSource;
332   uint64_t mOffset;
333   Mvhd mMvhd;
334   Mdhd mMdhd;
335   Trex mTrex;
336   Tfdt mTfdt;
337   Edts mEdts;
338   Sinf mSinf;
339 
340   FallibleTArray<CencSampleEncryptionInfoEntry>
341       mTrackSampleEncryptionInfoEntries;
342   FallibleTArray<SampleToGroupEntry> mTrackSampleToGroupEntries;
343   FallibleTArray<SampleDescriptionEntry> mSampleDescriptions;
344 
Moofs()345   nsTArray<Moof>& Moofs() { return mMoofs; }
346 
347  private:
348   void ScanForMetadata(mozilla::MediaByteRange& aMoov);
349   nsTArray<Moof> mMoofs;
350   nsTArray<MediaByteRange> mMediaRanges;
351   nsTArray<TrackEndCts> mTracksEndCts;
352   bool mIsAudio;
353   uint64_t mLastDecodeTime;
354   // Either a ParseAllTracks if in multitrack mode, or an integer representing
355   // the track_id for the track being parsed. If parsing a specific track, mTrex
356   // should have an id matching mTrackParseMode.as<uint32_t>(). In this case 0
357   // is a valid track id -- this is not allowed in the spec, but such mp4s
358   // appear in the wild. In the ParseAllTracks case, mTrex can have an arbitrary
359   // id based on the tracks being parsed.
360   const TrackParseMode mTrackParseMode;
361 };
362 }  // namespace mozilla
363 
364 #endif
365