1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4  * You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #ifndef TrackEncoder_h_
7 #define TrackEncoder_h_
8 
9 #include "AudioSegment.h"
10 #include "EncodedFrameContainer.h"
11 #include "MediaStreamGraph.h"
12 #include "StreamTracks.h"
13 #include "TrackMetadataBase.h"
14 #include "VideoSegment.h"
15 
16 namespace mozilla {
17 
18 class AbstractThread;
19 class TrackEncoder;
20 
21 class TrackEncoderListener {
22  public:
23   NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackEncoderListener)
24 
25   /**
26    * Called when the TrackEncoder's underlying encoder has been successfully
27    * initialized and there's non-null data ready to be encoded.
28    */
29   virtual void Initialized(TrackEncoder* aEncoder) = 0;
30 
31   /**
32    * Called when there's new data ready to be encoded.
33    * Always called after Initialized().
34    */
35   virtual void DataAvailable(TrackEncoder* aEncoder) = 0;
36 
37   /**
38    * Called after the TrackEncoder hit an unexpected error, causing it to
39    * abort operation.
40    */
41   virtual void Error(TrackEncoder* aEncoder) = 0;
42 
43  protected:
~TrackEncoderListener()44   virtual ~TrackEncoderListener() {}
45 };
46 
47 /**
48  * Base class of AudioTrackEncoder and VideoTrackEncoder. Lifetime managed by
49  * MediaEncoder. All methods are to be called only on the worker thread.
50  *
51  * MediaStreamTrackListeners will get store raw data in mIncomingBuffer, so
52  * mIncomingBuffer is protected by a lock. The control APIs are all called by
53  * MediaEncoder on its dedicated thread, where GetEncodedTrack is called
54  * periodically to swap out mIncomingBuffer, feed it to the encoder, and return
55  * the encoded data.
56  */
57 class TrackEncoder {
58   NS_INLINE_DECL_THREADSAFE_REFCOUNTING(TrackEncoder);
59 
60  public:
61   explicit TrackEncoder(TrackRate aTrackRate);
62 
63   virtual void Suspend(TimeStamp aTime) = 0;
64 
65   virtual void Resume(TimeStamp aTime) = 0;
66 
67   /**
68    * Called by MediaEncoder to cancel the encoding.
69    */
70   virtual void Cancel() = 0;
71 
72   /**
73    * Notifies us that we have reached the end of the stream and no more data
74    * will be appended.
75    */
76   virtual void NotifyEndOfStream() = 0;
77 
78   /**
79    * MediaStreamGraph notifies us about the time of the track's start.
80    * This gets called on the MediaEncoder thread after a dispatch.
81    */
82   virtual void SetStartOffset(StreamTime aStartOffset) = 0;
83 
84   /**
85    * Dispatched from MediaStreamGraph when it has run an iteration where the
86    * input track of the track this TrackEncoder is associated with didn't have
87    * any data.
88    */
89   virtual void AdvanceBlockedInput(StreamTime aDuration) = 0;
90 
91   /**
92    * MediaStreamGraph notifies us about the duration of data that has just been
93    * processed. This gets called on the MediaEncoder thread after a dispatch.
94    */
95   virtual void AdvanceCurrentTime(StreamTime aDuration) = 0;
96 
97   /**
98    * Creates and sets up meta data for a specific codec, called on the worker
99    * thread.
100    */
101   virtual already_AddRefed<TrackMetadataBase> GetMetadata() = 0;
102 
103   /**
104    * Encodes raw segments. Result data is returned in aData, and called on the
105    * worker thread.
106    */
107   virtual nsresult GetEncodedTrack(EncodedFrameContainer& aData) = 0;
108 
109   /**
110    * Returns true once this TrackEncoder is initialized.
111    */
112   bool IsInitialized();
113 
114   /**
115    * True if the track encoder has encoded all source segments coming from
116    * MediaStreamGraph. Call on the worker thread.
117    */
118   bool IsEncodingComplete();
119 
120   /**
121    * If this TrackEncoder was not already initialized, it is set to initialized
122    * and listeners are notified.
123    */
124   void SetInitialized();
125 
126   /**
127    * Notifies listeners that there is data available for encoding.
128    */
129   void OnDataAvailable();
130 
131   /**
132    * Called after an error. Cancels the encoding and notifies listeners.
133    */
134   void OnError();
135 
136   /**
137    * Registers a listener to events from this TrackEncoder.
138    * We hold a strong reference to the listener.
139    */
140   void RegisterListener(TrackEncoderListener* aListener);
141 
142   /**
143    * Unregisters a listener from events from this TrackEncoder.
144    * The listener will stop receiving events synchronously.
145    */
146   bool UnregisterListener(TrackEncoderListener* aListener);
147 
148   virtual void SetBitrate(const uint32_t aBitrate) = 0;
149 
150   /**
151    * It's optional to set the worker thread, but if you do we'll assert that
152    * we are in the worker thread in every method that gets called.
153    */
154   void SetWorkerThread(AbstractThread* aWorkerThread);
155 
156   /**
157    * Measure size of internal buffers.
158    */
159   virtual size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) = 0;
160 
161  protected:
~TrackEncoder()162   virtual ~TrackEncoder() { MOZ_ASSERT(mListeners.IsEmpty()); }
163 
164   /**
165    * True if the track encoder has encoded all source data.
166    */
167   bool mEncodingComplete;
168 
169   /**
170    * True if flag of EOS or any form of indicating EOS has set in the codec-
171    * encoder.
172    */
173   bool mEosSetInEncoder;
174 
175   /**
176    * True if the track encoder has been initialized successfully.
177    */
178   bool mInitialized;
179 
180   /**
181    * True once all data until the end of the input track has been received.
182    */
183   bool mEndOfStream;
184 
185   /**
186    * True once this encoding has been cancelled.
187    */
188   bool mCanceled;
189 
190   /**
191    * The latest current time reported to us from the MSG.
192    */
193   StreamTime mCurrentTime;
194 
195   // How many times we have tried to initialize the encoder.
196   uint32_t mInitCounter;
197   StreamTime mNotInitDuration;
198 
199   bool mSuspended;
200 
201   /**
202    * The track rate of source media.
203    */
204   TrackRate mTrackRate;
205 
206   /**
207    * If set we assert that all methods are called on this thread.
208    */
209   RefPtr<AbstractThread> mWorkerThread;
210 
211   nsTArray<RefPtr<TrackEncoderListener>> mListeners;
212 };
213 
214 class AudioTrackEncoder : public TrackEncoder {
215  public:
AudioTrackEncoder(TrackRate aTrackRate)216   explicit AudioTrackEncoder(TrackRate aTrackRate)
217       : TrackEncoder(aTrackRate),
218         mChannels(0),
219         mSamplingRate(0),
220         mAudioBitrate(0) {}
221 
222   /**
223    * Suspends encoding from mCurrentTime, i.e., all audio data until the next
224    * Resume() will be dropped.
225    */
226   void Suspend(TimeStamp aTime) override;
227 
228   /**
229    * Resumes encoding starting at mCurrentTime.
230    */
231   void Resume(TimeStamp aTime) override;
232 
233   /**
234    * Appends and consumes track data from aSegment.
235    */
236   void AppendAudioSegment(AudioSegment&& aSegment);
237 
238   /**
239    * Takes track data from the last time TakeTrackData ran until mCurrentTime
240    * and moves it to aSegment.
241    */
242   void TakeTrackData(AudioSegment& aSegment);
243 
244   template <typename T>
InterleaveTrackData(nsTArray<const T * > & aInput,int32_t aDuration,uint32_t aOutputChannels,AudioDataValue * aOutput,float aVolume)245   static void InterleaveTrackData(nsTArray<const T*>& aInput, int32_t aDuration,
246                                   uint32_t aOutputChannels,
247                                   AudioDataValue* aOutput, float aVolume) {
248     if (aInput.Length() < aOutputChannels) {
249       // Up-mix. This might make the mChannelData have more than aChannels.
250       AudioChannelsUpMix(&aInput, aOutputChannels,
251                          SilentChannel::ZeroChannel<T>());
252     }
253 
254     if (aInput.Length() > aOutputChannels) {
255       DownmixAndInterleave(aInput, aDuration, aVolume, aOutputChannels,
256                            aOutput);
257     } else {
258       InterleaveAndConvertBuffer(aInput.Elements(), aDuration, aVolume,
259                                  aOutputChannels, aOutput);
260     }
261   }
262 
263   /**
264    * Interleaves the track data and stores the result into aOutput. Might need
265    * to up-mix or down-mix the channel data if the channels number of this chunk
266    * is different from aOutputChannels. The channel data from aChunk might be
267    * modified by up-mixing.
268    */
269   static void InterleaveTrackData(AudioChunk& aChunk, int32_t aDuration,
270                                   uint32_t aOutputChannels,
271                                   AudioDataValue* aOutput);
272 
273   /**
274    * De-interleaves the aInput data and stores the result into aOutput.
275    * No up-mix or down-mix operations inside.
276    */
277   static void DeInterleaveTrackData(AudioDataValue* aInput, int32_t aDuration,
278                                     int32_t aChannels, AudioDataValue* aOutput);
279 
280   /**
281    * Measure size of internal buffers.
282    */
283   size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override;
284 
SetBitrate(const uint32_t aBitrate)285   void SetBitrate(const uint32_t aBitrate) override {
286     mAudioBitrate = aBitrate;
287   }
288 
289   /**
290    * Tries to initiate the AudioEncoder based on data in aSegment.
291    * This can be re-called often, as it will exit early should we already be
292    * initiated. mInitiated will only be set if there was enough data in
293    * aSegment to infer metadata. If mInitiated gets set, listeners are notified.
294    *
295    * Not having enough data in aSegment to initiate the encoder for an
296    * accumulated aDuration of one second will make us initiate with a default
297    * number of channels.
298    *
299    * If we attempt to initiate the underlying encoder but fail, we Cancel() and
300    * notify listeners.
301    */
302   void TryInit(const AudioSegment& aSegment, StreamTime aDuration);
303 
304   void Cancel() override;
305 
306   /**
307    * Dispatched from MediaStreamGraph when we have finished feeding data to
308    * mIncomingBuffer.
309    */
310   void NotifyEndOfStream() override;
311 
312   void SetStartOffset(StreamTime aStartOffset) override;
313 
314   /**
315    * Dispatched from MediaStreamGraph when it has run an iteration where the
316    * input track of the track this TrackEncoder is associated with didn't have
317    * any data.
318    *
319    * Since we sometimes use a direct listener for AudioSegments we miss periods
320    * of time for which the source didn't have any data. This ensures that the
321    * latest frame gets displayed while we wait for more data to be pushed.
322    */
323   void AdvanceBlockedInput(StreamTime aDuration) override;
324 
325   /**
326    * Dispatched from MediaStreamGraph when it has run an iteration so we can
327    * hand more data to the encoder.
328    */
329   void AdvanceCurrentTime(StreamTime aDuration) override;
330 
331  protected:
332   /**
333    * Number of samples per channel in a pcm buffer. This is also the value of
334    * frame size required by audio encoder, and listeners will be notified when
335    * at least this much data has been added to mOutgoingBuffer.
336    */
GetPacketDuration()337   virtual int GetPacketDuration() { return 0; }
338 
339   /**
340    * Initializes the audio encoder. The call of this method is delayed until we
341    * have received the first valid track from MediaStreamGraph.
342    */
343   virtual nsresult Init(int aChannels, int aSamplingRate) = 0;
344 
345   /**
346    * The number of channels are used for processing PCM data in the audio
347    * encoder. This value comes from the first valid audio chunk. If encoder
348    * can't support the channels in the chunk, downmix PCM stream can be
349    * performed. This value also be used to initialize the audio encoder.
350    */
351   int mChannels;
352 
353   /**
354    * The sampling rate of source audio data.
355    */
356   int mSamplingRate;
357 
358   /**
359    * A segment queue of incoming audio track data, from listeners.
360    * The duration of mIncomingBuffer is strictly increasing as it gets fed more
361    * data. Consumed data is replaced by null data.
362    */
363   AudioSegment mIncomingBuffer;
364 
365   /**
366    * A segment queue of outgoing audio track data to the encoder.
367    * The contents of mOutgoingBuffer will always be what has been consumed from
368    * mIncomingBuffer (up to mCurrentTime) but not yet consumed by the encoder
369    * sub class.
370    */
371   AudioSegment mOutgoingBuffer;
372 
373   uint32_t mAudioBitrate;
374 
375   // This may only be accessed on the MSG thread.
376   // I.e., in the regular NotifyQueuedChanges for audio to avoid adding data
377   // from that callback when the direct one is active.
378   bool mDirectConnected;
379 };
380 
381 enum class FrameDroppingMode {
382   ALLOW,     // Allowed to drop frames to keep up under load
383   DISALLOW,  // Must not drop any frames, even if it means we will OOM
384 };
385 
386 class VideoTrackEncoder : public TrackEncoder {
387  public:
388   explicit VideoTrackEncoder(TrackRate aTrackRate,
389                              FrameDroppingMode aFrameDroppingMode);
390 
391   /**
392    * Suspends encoding from aTime, i.e., all video frame with a timestamp
393    * between aTime and the timestamp of the next Resume() will be dropped.
394    */
395   void Suspend(TimeStamp aTime) override;
396 
397   /**
398    * Resumes encoding starting at aTime.
399    */
400   void Resume(TimeStamp aTime) override;
401 
402   /**
403    * Appends source video frames to mIncomingBuffer. We only append the source
404    * chunk if the image is different from mLastChunk's image. Called on the
405    * MediaStreamGraph thread.
406    */
407   void AppendVideoSegment(VideoSegment&& aSegment);
408 
409   /**
410    * Takes track data from the last time TakeTrackData ran until mCurrentTime
411    * and moves it to aSegment.
412    */
413   void TakeTrackData(VideoSegment& aSegment);
414 
415   /**
416    * Measure size of internal buffers.
417    */
418   size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) override;
419 
SetBitrate(const uint32_t aBitrate)420   void SetBitrate(const uint32_t aBitrate) override {
421     mVideoBitrate = aBitrate;
422   }
423 
424   /**
425    * Tries to initiate the VideoEncoder based on data in aSegment.
426    * This can be re-called often, as it will exit early should we already be
427    * initiated. mInitiated will only be set if there was enough data in
428    * aSegment to infer metadata. If mInitiated gets set, listeners are notified.
429    *
430    * Failing to initiate the encoder for an accumulated aDuration of 30 seconds
431    * is seen as an error and will cancel the current encoding.
432    */
433   void Init(const VideoSegment& aSegment, StreamTime aDuration);
434 
SecondsToMediaTime(double aS)435   StreamTime SecondsToMediaTime(double aS) const {
436     NS_ASSERTION(0 <= aS && aS <= TRACK_TICKS_MAX / TRACK_RATE_MAX,
437                  "Bad seconds");
438     return mTrackRate * aS;
439   }
440 
441   void Cancel() override;
442 
443   /**
444    * Notifies us that we have reached the end of the stream and no more data
445    * will be appended to mIncomingBuffer.
446    */
447   void NotifyEndOfStream() override;
448 
449   void SetStartOffset(StreamTime aStartOffset) override;
450 
451   /**
452    * Dispatched from MediaStreamGraph when it has run an iteration where the
453    * input track of the track this TrackEncoder is associated with didn't have
454    * any data.
455    *
456    * Since we use a direct listener for VideoSegments we miss periods of time
457    * for which the source didn't have any data. This ensures that the latest
458    * frame gets displayed while we wait for more data to be pushed.
459    */
460   void AdvanceBlockedInput(StreamTime aDuration) override;
461 
462   /**
463    * Dispatched from MediaStreamGraph when it has run an iteration so we can
464    * hand more data to the encoder.
465    */
466   void AdvanceCurrentTime(StreamTime aDuration) override;
467 
468   /**
469    * Set desired keyframe interval defined in milliseconds.
470    */
471   void SetKeyFrameInterval(int32_t aKeyFrameInterval);
472 
473  protected:
474   /**
475    * Initialize the video encoder. In order to collect the value of width and
476    * height of source frames, this initialization is delayed until we have
477    * received the first valid video frame from MediaStreamGraph.
478    * Listeners will be notified after it has been successfully initialized.
479    */
480   virtual nsresult Init(int aWidth, int aHeight, int aDisplayWidth,
481                         int aDisplayHeight) = 0;
482 
483   /**
484    * The width of source video frame, ceiled if the source width is odd.
485    */
486   int mFrameWidth;
487 
488   /**
489    * The height of source video frame, ceiled if the source height is odd.
490    */
491   int mFrameHeight;
492 
493   /**
494    * The display width of source video frame.
495    */
496   int mDisplayWidth;
497 
498   /**
499    * The display height of source video frame.
500    */
501   int mDisplayHeight;
502 
503   /**
504    * The last unique frame and duration so far handled by
505    * NotifyAdvanceCurrentTime. When a new frame is detected, mLastChunk is added
506    * to mOutgoingBuffer.
507    */
508   VideoChunk mLastChunk;
509 
510   /**
511    * A segment queue of incoming video track data, from listeners.
512    * The duration of mIncomingBuffer is strictly increasing as it gets fed more
513    * data. Consumed data is replaced by null data.
514    */
515   VideoSegment mIncomingBuffer;
516 
517   /**
518    * A segment queue of outgoing video track data to the encoder.
519    * The contents of mOutgoingBuffer will always be what has been consumed from
520    * mIncomingBuffer (up to mCurrentTime) but not yet consumed by the encoder
521    * sub class. There won't be any null data at the beginning of mOutgoingBuffer
522    * unless explicitly pushed by the producer.
523    */
524   VideoSegment mOutgoingBuffer;
525 
526   /**
527    * The number of mTrackRate ticks we have passed to mOutgoingBuffer.
528    */
529   StreamTime mEncodedTicks;
530 
531   /**
532    * The time of the first real video frame passed to mOutgoingBuffer (at t=0).
533    *
534    * Note that this time will progress during suspension, to make sure the
535    * incoming frames stay in sync with the output.
536    */
537   TimeStamp mStartTime;
538 
539   /**
540    * The time Suspend was called on the MediaRecorder, so we can calculate the
541    * duration on the next Resume().
542    */
543   TimeStamp mSuspendTime;
544 
545   uint32_t mVideoBitrate;
546 
547   /**
548    * ALLOW to drop frames under load.
549    * DISALLOW to encode all frames, mainly for testing.
550    */
551   FrameDroppingMode mFrameDroppingMode;
552 
553   /**
554    * The desired keyframe interval defined in milliseconds.
555    */
556   int32_t mKeyFrameInterval;
557 };
558 
559 }  // namespace mozilla
560 
561 #endif
562