1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this file, 4 * You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #ifndef TrackEncoder_h_ 7 #define TrackEncoder_h_ 8 9 #include "AudioSegment.h" 10 #include "EncodedFrameContainer.h" 11 #include "MediaStreamGraph.h" 12 #include "StreamTracks.h" 13 #include "TrackMetadataBase.h" 14 #include "VideoSegment.h" 15 16 namespace mozilla { 17 18 class AbstractThread; 19 class TrackEncoder; 20 21 class TrackEncoderListener { 22 public: 23 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackEncoderListener) 24 25 /** 26 * Called when the TrackEncoder's underlying encoder has been successfully 27 * initialized and there's non-null data ready to be encoded. 28 */ 29 virtual void Initialized(TrackEncoder* aEncoder) = 0; 30 31 /** 32 * Called when there's new data ready to be encoded. 33 * Always called after Initialized(). 34 */ 35 virtual void DataAvailable(TrackEncoder* aEncoder) = 0; 36 37 /** 38 * Called after the TrackEncoder hit an unexpected error, causing it to 39 * abort operation. 40 */ 41 virtual void Error(TrackEncoder* aEncoder) = 0; 42 43 protected: ~TrackEncoderListener()44 virtual ~TrackEncoderListener() {} 45 }; 46 47 /** 48 * Base class of AudioTrackEncoder and VideoTrackEncoder. Lifetime managed by 49 * MediaEncoder. All methods are to be called only on the worker thread. 50 * 51 * MediaStreamTrackListeners will get store raw data in mIncomingBuffer, so 52 * mIncomingBuffer is protected by a lock. The control APIs are all called by 53 * MediaEncoder on its dedicated thread, where GetEncodedTrack is called 54 * periodically to swap out mIncomingBuffer, feed it to the encoder, and return 55 * the encoded data. 56 */ 57 class TrackEncoder { 58 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackEncoder); 59 60 public: 61 explicit TrackEncoder(TrackRate aTrackRate); 62 63 virtual void Suspend(TimeStamp aTime) = 0; 64 65 virtual void Resume(TimeStamp aTime) = 0; 66 67 /** 68 * Called by MediaEncoder to cancel the encoding. 69 */ 70 virtual void Cancel() = 0; 71 72 /** 73 * Notifies us that we have reached the end of the stream and no more data 74 * will be appended. 75 */ 76 virtual void NotifyEndOfStream() = 0; 77 78 /** 79 * MediaStreamGraph notifies us about the time of the track's start. 80 * This gets called on the MediaEncoder thread after a dispatch. 81 */ 82 virtual void SetStartOffset(StreamTime aStartOffset) = 0; 83 84 /** 85 * Dispatched from MediaStreamGraph when it has run an iteration where the 86 * input track of the track this TrackEncoder is associated with didn't have 87 * any data. 88 */ 89 virtual void AdvanceBlockedInput(StreamTime aDuration) = 0; 90 91 /** 92 * MediaStreamGraph notifies us about the duration of data that has just been 93 * processed. This gets called on the MediaEncoder thread after a dispatch. 94 */ 95 virtual void AdvanceCurrentTime(StreamTime aDuration) = 0; 96 97 /** 98 * Creates and sets up meta data for a specific codec, called on the worker 99 * thread. 100 */ 101 virtual already_AddRefed<TrackMetadataBase> GetMetadata() = 0; 102 103 /** 104 * Encodes raw segments. Result data is returned in aData, and called on the 105 * worker thread. 106 */ 107 virtual nsresult GetEncodedTrack(EncodedFrameContainer& aData) = 0; 108 109 /** 110 * Returns true once this TrackEncoder is initialized. 111 */ 112 bool IsInitialized(); 113 114 /** 115 * True if the track encoder has encoded all source segments coming from 116 * MediaStreamGraph. Call on the worker thread. 117 */ 118 bool IsEncodingComplete(); 119 120 /** 121 * If this TrackEncoder was not already initialized, it is set to initialized 122 * and listeners are notified. 123 */ 124 void SetInitialized(); 125 126 /** 127 * Notifies listeners that there is data available for encoding. 128 */ 129 void OnDataAvailable(); 130 131 /** 132 * Called after an error. Cancels the encoding and notifies listeners. 133 */ 134 void OnError(); 135 136 /** 137 * Registers a listener to events from this TrackEncoder. 138 * We hold a strong reference to the listener. 139 */ 140 void RegisterListener(TrackEncoderListener* aListener); 141 142 /** 143 * Unregisters a listener from events from this TrackEncoder. 144 * The listener will stop receiving events synchronously. 145 */ 146 bool UnregisterListener(TrackEncoderListener* aListener); 147 148 virtual void SetBitrate(const uint32_t aBitrate) = 0; 149 150 /** 151 * It's optional to set the worker thread, but if you do we'll assert that 152 * we are in the worker thread in every method that gets called. 153 */ 154 void SetWorkerThread(AbstractThread* aWorkerThread); 155 156 /** 157 * Measure size of internal buffers. 158 */ 159 virtual size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) = 0; 160 161 protected: ~TrackEncoder()162 virtual ~TrackEncoder() { MOZ_ASSERT(mListeners.IsEmpty()); } 163 164 /** 165 * True if the track encoder has encoded all source data. 166 */ 167 bool mEncodingComplete; 168 169 /** 170 * True if flag of EOS or any form of indicating EOS has set in the codec- 171 * encoder. 172 */ 173 bool mEosSetInEncoder; 174 175 /** 176 * True if the track encoder has been initialized successfully. 177 */ 178 bool mInitialized; 179 180 /** 181 * True once all data until the end of the input track has been received. 182 */ 183 bool mEndOfStream; 184 185 /** 186 * True once this encoding has been cancelled. 187 */ 188 bool mCanceled; 189 190 /** 191 * The latest current time reported to us from the MSG. 192 */ 193 StreamTime mCurrentTime; 194 195 // How many times we have tried to initialize the encoder. 196 uint32_t mInitCounter; 197 StreamTime mNotInitDuration; 198 199 bool mSuspended; 200 201 /** 202 * The track rate of source media. 203 */ 204 TrackRate mTrackRate; 205 206 /** 207 * If set we assert that all methods are called on this thread. 208 */ 209 RefPtr<AbstractThread> mWorkerThread; 210 211 nsTArray<RefPtr<TrackEncoderListener>> mListeners; 212 }; 213 214 class AudioTrackEncoder : public TrackEncoder { 215 public: AudioTrackEncoder(TrackRate aTrackRate)216 explicit AudioTrackEncoder(TrackRate aTrackRate) 217 : TrackEncoder(aTrackRate), 218 mChannels(0), 219 mSamplingRate(0), 220 mAudioBitrate(0) {} 221 222 /** 223 * Suspends encoding from mCurrentTime, i.e., all audio data until the next 224 * Resume() will be dropped. 225 */ 226 void Suspend(TimeStamp aTime) override; 227 228 /** 229 * Resumes encoding starting at mCurrentTime. 230 */ 231 void Resume(TimeStamp aTime) override; 232 233 /** 234 * Appends and consumes track data from aSegment. 235 */ 236 void AppendAudioSegment(AudioSegment&& aSegment); 237 238 /** 239 * Takes track data from the last time TakeTrackData ran until mCurrentTime 240 * and moves it to aSegment. 241 */ 242 void TakeTrackData(AudioSegment& aSegment); 243 244 template <typename T> InterleaveTrackData(nsTArray<const T * > & aInput,int32_t aDuration,uint32_t aOutputChannels,AudioDataValue * aOutput,float aVolume)245 static void InterleaveTrackData(nsTArray<const T*>& aInput, int32_t aDuration, 246 uint32_t aOutputChannels, 247 AudioDataValue* aOutput, float aVolume) { 248 if (aInput.Length() < aOutputChannels) { 249 // Up-mix. This might make the mChannelData have more than aChannels. 250 AudioChannelsUpMix(&aInput, aOutputChannels, 251 SilentChannel::ZeroChannel<T>()); 252 } 253 254 if (aInput.Length() > aOutputChannels) { 255 DownmixAndInterleave(aInput, aDuration, aVolume, aOutputChannels, 256 aOutput); 257 } else { 258 InterleaveAndConvertBuffer(aInput.Elements(), aDuration, aVolume, 259 aOutputChannels, aOutput); 260 } 261 } 262 263 /** 264 * Interleaves the track data and stores the result into aOutput. Might need 265 * to up-mix or down-mix the channel data if the channels number of this chunk 266 * is different from aOutputChannels. The channel data from aChunk might be 267 * modified by up-mixing. 268 */ 269 static void InterleaveTrackData(AudioChunk& aChunk, int32_t aDuration, 270 uint32_t aOutputChannels, 271 AudioDataValue* aOutput); 272 273 /** 274 * De-interleaves the aInput data and stores the result into aOutput. 275 * No up-mix or down-mix operations inside. 276 */ 277 static void DeInterleaveTrackData(AudioDataValue* aInput, int32_t aDuration, 278 int32_t aChannels, AudioDataValue* aOutput); 279 280 /** 281 * Measure size of internal buffers. 282 */ 283 size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override; 284 SetBitrate(const uint32_t aBitrate)285 void SetBitrate(const uint32_t aBitrate) override { 286 mAudioBitrate = aBitrate; 287 } 288 289 /** 290 * Tries to initiate the AudioEncoder based on data in aSegment. 291 * This can be re-called often, as it will exit early should we already be 292 * initiated. mInitiated will only be set if there was enough data in 293 * aSegment to infer metadata. If mInitiated gets set, listeners are notified. 294 * 295 * Not having enough data in aSegment to initiate the encoder for an 296 * accumulated aDuration of one second will make us initiate with a default 297 * number of channels. 298 * 299 * If we attempt to initiate the underlying encoder but fail, we Cancel() and 300 * notify listeners. 301 */ 302 void TryInit(const AudioSegment& aSegment, StreamTime aDuration); 303 304 void Cancel() override; 305 306 /** 307 * Dispatched from MediaStreamGraph when we have finished feeding data to 308 * mIncomingBuffer. 309 */ 310 void NotifyEndOfStream() override; 311 312 void SetStartOffset(StreamTime aStartOffset) override; 313 314 /** 315 * Dispatched from MediaStreamGraph when it has run an iteration where the 316 * input track of the track this TrackEncoder is associated with didn't have 317 * any data. 318 * 319 * Since we sometimes use a direct listener for AudioSegments we miss periods 320 * of time for which the source didn't have any data. This ensures that the 321 * latest frame gets displayed while we wait for more data to be pushed. 322 */ 323 void AdvanceBlockedInput(StreamTime aDuration) override; 324 325 /** 326 * Dispatched from MediaStreamGraph when it has run an iteration so we can 327 * hand more data to the encoder. 328 */ 329 void AdvanceCurrentTime(StreamTime aDuration) override; 330 331 protected: 332 /** 333 * Number of samples per channel in a pcm buffer. This is also the value of 334 * frame size required by audio encoder, and listeners will be notified when 335 * at least this much data has been added to mOutgoingBuffer. 336 */ GetPacketDuration()337 virtual int GetPacketDuration() { return 0; } 338 339 /** 340 * Initializes the audio encoder. The call of this method is delayed until we 341 * have received the first valid track from MediaStreamGraph. 342 */ 343 virtual nsresult Init(int aChannels, int aSamplingRate) = 0; 344 345 /** 346 * The number of channels are used for processing PCM data in the audio 347 * encoder. This value comes from the first valid audio chunk. If encoder 348 * can't support the channels in the chunk, downmix PCM stream can be 349 * performed. This value also be used to initialize the audio encoder. 350 */ 351 int mChannels; 352 353 /** 354 * The sampling rate of source audio data. 355 */ 356 int mSamplingRate; 357 358 /** 359 * A segment queue of incoming audio track data, from listeners. 360 * The duration of mIncomingBuffer is strictly increasing as it gets fed more 361 * data. Consumed data is replaced by null data. 362 */ 363 AudioSegment mIncomingBuffer; 364 365 /** 366 * A segment queue of outgoing audio track data to the encoder. 367 * The contents of mOutgoingBuffer will always be what has been consumed from 368 * mIncomingBuffer (up to mCurrentTime) but not yet consumed by the encoder 369 * sub class. 370 */ 371 AudioSegment mOutgoingBuffer; 372 373 uint32_t mAudioBitrate; 374 375 // This may only be accessed on the MSG thread. 376 // I.e., in the regular NotifyQueuedChanges for audio to avoid adding data 377 // from that callback when the direct one is active. 378 bool mDirectConnected; 379 }; 380 381 enum class FrameDroppingMode { 382 ALLOW, // Allowed to drop frames to keep up under load 383 DISALLOW, // Must not drop any frames, even if it means we will OOM 384 }; 385 386 class VideoTrackEncoder : public TrackEncoder { 387 public: 388 explicit VideoTrackEncoder(TrackRate aTrackRate, 389 FrameDroppingMode aFrameDroppingMode); 390 391 /** 392 * Suspends encoding from aTime, i.e., all video frame with a timestamp 393 * between aTime and the timestamp of the next Resume() will be dropped. 394 */ 395 void Suspend(TimeStamp aTime) override; 396 397 /** 398 * Resumes encoding starting at aTime. 399 */ 400 void Resume(TimeStamp aTime) override; 401 402 /** 403 * Appends source video frames to mIncomingBuffer. We only append the source 404 * chunk if the image is different from mLastChunk's image. Called on the 405 * MediaStreamGraph thread. 406 */ 407 void AppendVideoSegment(VideoSegment&& aSegment); 408 409 /** 410 * Takes track data from the last time TakeTrackData ran until mCurrentTime 411 * and moves it to aSegment. 412 */ 413 void TakeTrackData(VideoSegment& aSegment); 414 415 /** 416 * Measure size of internal buffers. 417 */ 418 size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override; 419 SetBitrate(const uint32_t aBitrate)420 void SetBitrate(const uint32_t aBitrate) override { 421 mVideoBitrate = aBitrate; 422 } 423 424 /** 425 * Tries to initiate the VideoEncoder based on data in aSegment. 426 * This can be re-called often, as it will exit early should we already be 427 * initiated. mInitiated will only be set if there was enough data in 428 * aSegment to infer metadata. If mInitiated gets set, listeners are notified. 429 * 430 * Failing to initiate the encoder for an accumulated aDuration of 30 seconds 431 * is seen as an error and will cancel the current encoding. 432 */ 433 void Init(const VideoSegment& aSegment, StreamTime aDuration); 434 SecondsToMediaTime(double aS)435 StreamTime SecondsToMediaTime(double aS) const { 436 NS_ASSERTION(0 <= aS && aS <= TRACK_TICKS_MAX / TRACK_RATE_MAX, 437 "Bad seconds"); 438 return mTrackRate * aS; 439 } 440 441 void Cancel() override; 442 443 /** 444 * Notifies us that we have reached the end of the stream and no more data 445 * will be appended to mIncomingBuffer. 446 */ 447 void NotifyEndOfStream() override; 448 449 void SetStartOffset(StreamTime aStartOffset) override; 450 451 /** 452 * Dispatched from MediaStreamGraph when it has run an iteration where the 453 * input track of the track this TrackEncoder is associated with didn't have 454 * any data. 455 * 456 * Since we use a direct listener for VideoSegments we miss periods of time 457 * for which the source didn't have any data. This ensures that the latest 458 * frame gets displayed while we wait for more data to be pushed. 459 */ 460 void AdvanceBlockedInput(StreamTime aDuration) override; 461 462 /** 463 * Dispatched from MediaStreamGraph when it has run an iteration so we can 464 * hand more data to the encoder. 465 */ 466 void AdvanceCurrentTime(StreamTime aDuration) override; 467 468 /** 469 * Set desired keyframe interval defined in milliseconds. 470 */ 471 void SetKeyFrameInterval(int32_t aKeyFrameInterval); 472 473 protected: 474 /** 475 * Initialize the video encoder. In order to collect the value of width and 476 * height of source frames, this initialization is delayed until we have 477 * received the first valid video frame from MediaStreamGraph. 478 * Listeners will be notified after it has been successfully initialized. 479 */ 480 virtual nsresult Init(int aWidth, int aHeight, int aDisplayWidth, 481 int aDisplayHeight) = 0; 482 483 /** 484 * The width of source video frame, ceiled if the source width is odd. 485 */ 486 int mFrameWidth; 487 488 /** 489 * The height of source video frame, ceiled if the source height is odd. 490 */ 491 int mFrameHeight; 492 493 /** 494 * The display width of source video frame. 495 */ 496 int mDisplayWidth; 497 498 /** 499 * The display height of source video frame. 500 */ 501 int mDisplayHeight; 502 503 /** 504 * The last unique frame and duration so far handled by 505 * NotifyAdvanceCurrentTime. When a new frame is detected, mLastChunk is added 506 * to mOutgoingBuffer. 507 */ 508 VideoChunk mLastChunk; 509 510 /** 511 * A segment queue of incoming video track data, from listeners. 512 * The duration of mIncomingBuffer is strictly increasing as it gets fed more 513 * data. Consumed data is replaced by null data. 514 */ 515 VideoSegment mIncomingBuffer; 516 517 /** 518 * A segment queue of outgoing video track data to the encoder. 519 * The contents of mOutgoingBuffer will always be what has been consumed from 520 * mIncomingBuffer (up to mCurrentTime) but not yet consumed by the encoder 521 * sub class. There won't be any null data at the beginning of mOutgoingBuffer 522 * unless explicitly pushed by the producer. 523 */ 524 VideoSegment mOutgoingBuffer; 525 526 /** 527 * The number of mTrackRate ticks we have passed to mOutgoingBuffer. 528 */ 529 StreamTime mEncodedTicks; 530 531 /** 532 * The time of the first real video frame passed to mOutgoingBuffer (at t=0). 533 * 534 * Note that this time will progress during suspension, to make sure the 535 * incoming frames stay in sync with the output. 536 */ 537 TimeStamp mStartTime; 538 539 /** 540 * The time Suspend was called on the MediaRecorder, so we can calculate the 541 * duration on the next Resume(). 542 */ 543 TimeStamp mSuspendTime; 544 545 uint32_t mVideoBitrate; 546 547 /** 548 * ALLOW to drop frames under load. 549 * DISALLOW to encode all frames, mainly for testing. 550 */ 551 FrameDroppingMode mFrameDroppingMode; 552 553 /** 554 * The desired keyframe interval defined in milliseconds. 555 */ 556 int32_t mKeyFrameInterval; 557 }; 558 559 } // namespace mozilla 560 561 #endif 562