1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 #ifndef MOOF_PARSER_H_ 6 #define MOOF_PARSER_H_ 7 8 #include "mozilla/ResultExtensions.h" 9 #include "mozilla/Variant.h" 10 #include "Atom.h" 11 #include "AtomType.h" 12 #include "SinfParser.h" 13 #include "ByteStream.h" 14 #include "MP4Interval.h" 15 #include "MediaResource.h" 16 17 namespace mozilla { 18 19 typedef int64_t Microseconds; 20 21 class Box; 22 class BoxContext; 23 class BoxReader; 24 class Moof; 25 26 // Used to track the CTS end time of the last sample of a track 27 // in the preceeding Moof, so that we can smooth tracks' timestamps 28 // across Moofs. 29 struct TrackEndCts { TrackEndCtsTrackEndCts30 TrackEndCts(uint32_t aTrackId, Microseconds aCtsEndTime) 31 : mTrackId(aTrackId), mCtsEndTime(aCtsEndTime) {} 32 uint32_t mTrackId; 33 Microseconds mCtsEndTime; 34 }; 35 36 class Mvhd : public Atom { 37 public: Mvhd()38 Mvhd() 39 : mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {} 40 explicit Mvhd(Box& aBox); 41 ToMicroseconds(int64_t aTimescaleUnits)42 Result<Microseconds, nsresult> ToMicroseconds(int64_t aTimescaleUnits) { 43 if (!mTimescale) { 44 NS_WARNING("invalid mTimescale"); 45 return Err(NS_ERROR_FAILURE); 46 } 47 int64_t major = aTimescaleUnits / mTimescale; 48 int64_t remainder = aTimescaleUnits % mTimescale; 49 return major * 1000000ll + remainder * 1000000ll / mTimescale; 50 } 51 52 uint64_t mCreationTime; 53 uint64_t mModificationTime; 54 uint32_t mTimescale; 55 uint64_t mDuration; 56 57 protected: 58 Result<Ok, nsresult> Parse(Box& aBox); 59 }; 60 61 class Tkhd : public Mvhd { 62 public: Tkhd()63 Tkhd() : mTrackId(0) {} 64 explicit Tkhd(Box& aBox); 65 66 uint32_t mTrackId; 67 68 protected: 69 Result<Ok, nsresult> Parse(Box& aBox); 70 }; 71 72 class Mdhd : public Mvhd { 73 public: 74 Mdhd() = default; 75 explicit Mdhd(Box& aBox); 76 }; 77 78 class Trex : public Atom { 79 public: Trex(uint32_t aTrackId)80 explicit Trex(uint32_t aTrackId) 81 : mFlags(0), 82 mTrackId(aTrackId), 83 mDefaultSampleDescriptionIndex(0), 84 mDefaultSampleDuration(0), 85 mDefaultSampleSize(0), 86 mDefaultSampleFlags(0) {} 87 88 explicit Trex(Box& aBox); 89 90 uint32_t mFlags; 91 uint32_t mTrackId; 92 uint32_t mDefaultSampleDescriptionIndex; 93 uint32_t mDefaultSampleDuration; 94 uint32_t mDefaultSampleSize; 95 uint32_t mDefaultSampleFlags; 96 97 protected: 98 Result<Ok, nsresult> Parse(Box& aBox); 99 }; 100 101 class Tfhd : public Trex { 102 public: Tfhd(Trex & aTrex)103 explicit Tfhd(Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) { 104 mValid = aTrex.IsValid(); 105 } 106 Tfhd(Box& aBox, Trex& aTrex); 107 108 uint64_t mBaseDataOffset; 109 110 protected: 111 Result<Ok, nsresult> Parse(Box& aBox); 112 }; 113 114 class Tfdt : public Atom { 115 public: Tfdt()116 Tfdt() : mBaseMediaDecodeTime(0) {} 117 explicit Tfdt(Box& aBox); 118 119 uint64_t mBaseMediaDecodeTime; 120 121 protected: 122 Result<Ok, nsresult> Parse(Box& aBox); 123 }; 124 125 class Edts : public Atom { 126 public: Edts()127 Edts() : mMediaStart(0), mEmptyOffset(0) {} 128 explicit Edts(Box& aBox); IsValid()129 virtual bool IsValid() override { 130 // edts is optional 131 return true; 132 } 133 134 int64_t mMediaStart; 135 int64_t mEmptyOffset; 136 137 protected: 138 Result<Ok, nsresult> Parse(Box& aBox); 139 }; 140 141 struct Sample { 142 mozilla::MediaByteRange mByteRange; 143 mozilla::MediaByteRange mCencRange; 144 Microseconds mDecodeTime; 145 MP4Interval<Microseconds> mCompositionRange; 146 bool mSync; 147 }; 148 149 class Saiz final : public Atom { 150 public: 151 Saiz(Box& aBox, AtomType aDefaultType); 152 153 AtomType mAuxInfoType; 154 uint32_t mAuxInfoTypeParameter; 155 FallibleTArray<uint8_t> mSampleInfoSize; 156 157 protected: 158 Result<Ok, nsresult> Parse(Box& aBox); 159 }; 160 161 class Saio final : public Atom { 162 public: 163 Saio(Box& aBox, AtomType aDefaultType); 164 165 AtomType mAuxInfoType; 166 uint32_t mAuxInfoTypeParameter; 167 FallibleTArray<uint64_t> mOffsets; 168 169 protected: 170 Result<Ok, nsresult> Parse(Box& aBox); 171 }; 172 173 struct SampleToGroupEntry { 174 public: 175 static const uint32_t kTrackGroupDescriptionIndexBase = 0; 176 static const uint32_t kFragmentGroupDescriptionIndexBase = 0x10000; 177 SampleToGroupEntrySampleToGroupEntry178 SampleToGroupEntry(uint32_t aSampleCount, uint32_t aGroupDescriptionIndex) 179 : mSampleCount(aSampleCount), 180 mGroupDescriptionIndex(aGroupDescriptionIndex) {} 181 182 uint32_t mSampleCount; 183 uint32_t mGroupDescriptionIndex; 184 }; 185 186 class Sbgp final : public Atom // SampleToGroup box. 187 { 188 public: 189 explicit Sbgp(Box& aBox); 190 191 AtomType mGroupingType; 192 uint32_t mGroupingTypeParam; 193 FallibleTArray<SampleToGroupEntry> mEntries; 194 195 protected: 196 Result<Ok, nsresult> Parse(Box& aBox); 197 }; 198 199 // Stores information form CencSampleEncryptionInformationGroupEntry (seig). 200 // Cenc here refers to the common encryption standard, rather than the specific 201 // cenc scheme from that standard. This structure is used for all encryption 202 // schemes. I.e. it is used for both cenc and cbcs, not just cenc. 203 struct CencSampleEncryptionInfoEntry final { 204 public: 205 CencSampleEncryptionInfoEntry() = default; 206 207 Result<Ok, nsresult> Init(BoxReader& aReader); 208 209 bool mIsEncrypted = false; 210 uint8_t mIVSize = 0; 211 CopyableTArray<uint8_t> mKeyId; 212 uint8_t mCryptByteBlock = 0; 213 uint8_t mSkipByteBlock = 0; 214 CopyableTArray<uint8_t> mConsantIV; 215 }; 216 217 class Sgpd final : public Atom // SampleGroupDescription box. 218 { 219 public: 220 explicit Sgpd(Box& aBox); 221 222 AtomType mGroupingType; 223 FallibleTArray<CencSampleEncryptionInfoEntry> mEntries; 224 225 protected: 226 Result<Ok, nsresult> Parse(Box& aBox); 227 }; 228 229 // Audio/video entries from the sample description box (stsd). We only need to 230 // store if these are encrypted, so do not need a specialized class for 231 // different audio and video data. Currently most of the parsing of these 232 // entries is by the mp4parse-rust, but moof pasrser needs to know which of 233 // these are encrypted when parsing the track fragment header (tfhd). 234 struct SampleDescriptionEntry { 235 bool mIsEncryptedEntry = false; 236 }; 237 238 // Used to indicate in variants if all tracks should be parsed. 239 struct ParseAllTracks {}; 240 241 typedef Variant<ParseAllTracks, uint32_t> TrackParseMode; 242 243 class Moof final : public Atom { 244 public: 245 Moof(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex, 246 Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, 247 uint64_t* aDecodeTime, bool aIsAudio, 248 nsTArray<TrackEndCts>& aTracksEndCts); 249 bool GetAuxInfo(AtomType aType, FallibleTArray<MediaByteRange>* aByteRanges); 250 void FixRounding(const Moof& aMoof); 251 252 mozilla::MediaByteRange mRange; 253 mozilla::MediaByteRange mMdatRange; 254 MP4Interval<Microseconds> mTimeRange; 255 FallibleTArray<Sample> mIndex; 256 257 FallibleTArray<CencSampleEncryptionInfoEntry> 258 mFragmentSampleEncryptionInfoEntries; 259 FallibleTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries; 260 261 Tfhd mTfhd; 262 FallibleTArray<Saiz> mSaizs; 263 FallibleTArray<Saio> mSaios; 264 nsTArray<nsTArray<uint8_t>> mPsshes; 265 266 private: 267 // aDecodeTime is updated to the end of the parsed TRAF on return. 268 void ParseTraf(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex, 269 Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, 270 uint64_t* aDecodeTime, bool aIsAudio); 271 // aDecodeTime is updated to the end of the parsed TRUN on return. 272 Result<Ok, nsresult> ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd, 273 Edts& aEdts, uint64_t* aDecodeTime, 274 bool aIsAudio); 275 // Process the sample auxiliary information used by common encryption. 276 // aScheme is used to select the appropriate auxiliary information and should 277 // be set based on the encryption scheme used by the track being processed. 278 // Note, the term cenc here refers to the standard, not the specific scheme 279 // from that standard. I.e. this function is used to handle up auxiliary 280 // information from the cenc and cbcs schemes. 281 bool ProcessCencAuxInfo(AtomType aScheme); 282 uint64_t mMaxRoundingError; 283 }; 284 285 DDLoggedTypeDeclName(MoofParser); 286 287 class MoofParser : public DecoderDoctorLifeLogger<MoofParser> { 288 public: MoofParser(ByteStream * aSource,const TrackParseMode & aTrackParseMode,bool aIsAudio)289 MoofParser(ByteStream* aSource, const TrackParseMode& aTrackParseMode, 290 bool aIsAudio) 291 : mSource(aSource), 292 mOffset(0), 293 mTrex(aTrackParseMode.is<uint32_t>() ? aTrackParseMode.as<uint32_t>() 294 : 0), 295 mIsAudio(aIsAudio), 296 mLastDecodeTime(0), 297 mTrackParseMode(aTrackParseMode) { 298 // Setting mIsMultitrackParser is a nasty work around for calculating 299 // the composition range for MSE that causes the parser to parse multiple 300 // tracks. Ideally we'd store an array of tracks with different metadata 301 // for each. 302 DDLINKCHILD("source", aSource); 303 } 304 bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges); 305 // If *aCanEvict is set to true. then will remove all moofs already parsed 306 // from index then rebuild the index. *aCanEvict is set to true upon return if 307 // some moofs were removed. 308 bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges, 309 bool* aCanEvict); 310 bool RebuildFragmentedIndex(BoxContext& aContext); 311 MP4Interval<Microseconds> GetCompositionRange( 312 const mozilla::MediaByteRangeSet& aByteRanges); 313 bool ReachedEnd(); 314 void ParseMoov(Box& aBox); 315 void ParseTrak(Box& aBox); 316 void ParseMdia(Box& aBox); 317 void ParseMvex(Box& aBox); 318 319 void ParseMinf(Box& aBox); 320 void ParseStbl(Box& aBox); 321 void ParseStsd(Box& aBox); 322 void ParseEncrypted(Box& aBox); 323 324 bool BlockingReadNextMoof(); 325 326 already_AddRefed<mozilla::MediaByteBuffer> Metadata(); 327 MediaByteRange FirstCompleteMediaSegment(); 328 MediaByteRange FirstCompleteMediaHeader(); 329 330 mozilla::MediaByteRange mInitRange; 331 RefPtr<ByteStream> mSource; 332 uint64_t mOffset; 333 Mvhd mMvhd; 334 Mdhd mMdhd; 335 Trex mTrex; 336 Tfdt mTfdt; 337 Edts mEdts; 338 Sinf mSinf; 339 340 FallibleTArray<CencSampleEncryptionInfoEntry> 341 mTrackSampleEncryptionInfoEntries; 342 FallibleTArray<SampleToGroupEntry> mTrackSampleToGroupEntries; 343 FallibleTArray<SampleDescriptionEntry> mSampleDescriptions; 344 Moofs()345 nsTArray<Moof>& Moofs() { return mMoofs; } 346 347 private: 348 void ScanForMetadata(mozilla::MediaByteRange& aMoov); 349 nsTArray<Moof> mMoofs; 350 nsTArray<MediaByteRange> mMediaRanges; 351 nsTArray<TrackEndCts> mTracksEndCts; 352 bool mIsAudio; 353 uint64_t mLastDecodeTime; 354 // Either a ParseAllTracks if in multitrack mode, or an integer representing 355 // the track_id for the track being parsed. If parsing a specific track, mTrex 356 // should have an id matching mTrackParseMode.as<uint32_t>(). In this case 0 357 // is a valid track id -- this is not allowed in the spec, but such mp4s 358 // appear in the wild. In the ParseAllTracks case, mTrex can have an arbitrary 359 // id based on the tracks being parsed. 360 const TrackParseMode mTrackParseMode; 361 }; 362 } // namespace mozilla 363 364 #endif 365