1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "third_party/blink/renderer/modules/mediarecorder/vpx_encoder.h"
6
7 #include <algorithm>
8
9 #include "base/system/sys_info.h"
10 #include "media/base/video_frame.h"
11 #include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h"
12 #include "third_party/blink/renderer/platform/scheduler/public/post_cross_thread_task.h"
13 #include "third_party/blink/renderer/platform/scheduler/public/thread.h"
14 #include "third_party/blink/renderer/platform/wtf/cross_thread_functional.h"
15 #include "third_party/blink/renderer/platform/wtf/functional.h"
16 #include "ui/gfx/geometry/size.h"
17
18 using media::VideoFrame;
19 using media::VideoFrameMetadata;
20
21 namespace blink {
22
operator ()(vpx_codec_ctx_t * codec)23 void VpxEncoder::VpxCodecDeleter::operator()(vpx_codec_ctx_t* codec) {
24 if (!codec)
25 return;
26 vpx_codec_err_t ret = vpx_codec_destroy(codec);
27 CHECK_EQ(ret, VPX_CODEC_OK);
28 delete codec;
29 }
30
GetNumberOfThreadsForEncoding()31 static int GetNumberOfThreadsForEncoding() {
32 // Do not saturate CPU utilization just for encoding. On a lower-end system
33 // with only 1 or 2 cores, use only one thread for encoding. On systems with
34 // more cores, allow half of the cores to be used for encoding.
35 return std::min(8, (base::SysInfo::NumberOfProcessors() + 1) / 2);
36 }
37
38 // static
ShutdownEncoder(std::unique_ptr<Thread> encoding_thread,ScopedVpxCodecCtxPtr encoder)39 void VpxEncoder::ShutdownEncoder(std::unique_ptr<Thread> encoding_thread,
40 ScopedVpxCodecCtxPtr encoder) {
41 DCHECK(encoding_thread);
42 // Both |encoding_thread| and |encoder| will be destroyed at end-of-scope.
43 }
44
VpxEncoder(bool use_vp9,const VideoTrackRecorder::OnEncodedVideoCB & on_encoded_video_cb,int32_t bits_per_second,scoped_refptr<base::SequencedTaskRunner> main_task_runner)45 VpxEncoder::VpxEncoder(
46 bool use_vp9,
47 const VideoTrackRecorder::OnEncodedVideoCB& on_encoded_video_cb,
48 int32_t bits_per_second,
49 scoped_refptr<base::SequencedTaskRunner> main_task_runner)
50 : VideoTrackRecorder::Encoder(on_encoded_video_cb,
51 bits_per_second,
52 std::move(main_task_runner)),
53 use_vp9_(use_vp9) {
54 codec_config_.g_timebase.den = 0; // Not initialized.
55 alpha_codec_config_.g_timebase.den = 0; // Not initialized.
56 DCHECK(encoding_thread_);
57 }
58
~VpxEncoder()59 VpxEncoder::~VpxEncoder() {
60 PostCrossThreadTask(
61 *main_task_runner_.get(), FROM_HERE,
62 CrossThreadBindOnce(&VpxEncoder::ShutdownEncoder,
63 std::move(encoding_thread_), std::move(encoder_)));
64 }
65
CanEncodeAlphaChannel()66 bool VpxEncoder::CanEncodeAlphaChannel() {
67 return true;
68 }
69
EncodeOnEncodingTaskRunner(scoped_refptr<VideoFrame> frame,base::TimeTicks capture_timestamp)70 void VpxEncoder::EncodeOnEncodingTaskRunner(scoped_refptr<VideoFrame> frame,
71 base::TimeTicks capture_timestamp) {
72 TRACE_EVENT0("media", "VpxEncoder::EncodeOnEncodingTaskRunner");
73 DCHECK_CALLED_ON_VALID_SEQUENCE(encoding_sequence_checker_);
74
75 if (frame->format() == media::PIXEL_FORMAT_NV12 &&
76 frame->storage_type() == media::VideoFrame::STORAGE_GPU_MEMORY_BUFFER)
77 frame = WrapMappedGpuMemoryBufferVideoFrame(frame);
78 if (!frame) {
79 LOG(WARNING) << "Invalid video frame to encode";
80 return;
81 }
82
83 const gfx::Size frame_size = frame->visible_rect().size();
84 base::TimeDelta duration = EstimateFrameDuration(*frame);
85 const media::WebmMuxer::VideoParameters video_params(frame);
86
87 if (!IsInitialized(codec_config_) ||
88 gfx::Size(codec_config_.g_w, codec_config_.g_h) != frame_size) {
89 ConfigureEncoderOnEncodingTaskRunner(frame_size, &codec_config_, &encoder_);
90 }
91
92 bool keyframe = false;
93 bool force_keyframe = false;
94 bool alpha_keyframe = false;
95 std::string data;
96 std::string alpha_data;
97 switch (frame->format()) {
98 case media::PIXEL_FORMAT_NV12: {
99 last_frame_had_alpha_ = false;
100 DoEncode(encoder_.get(), frame_size, frame->data(VideoFrame::kYPlane),
101 frame->visible_data(VideoFrame::kYPlane),
102 frame->stride(VideoFrame::kYPlane),
103 frame->visible_data(VideoFrame::kUVPlane),
104 frame->stride(VideoFrame::kUVPlane),
105 frame->visible_data(VideoFrame::kUVPlane) + 1,
106 frame->stride(VideoFrame::kUVPlane), duration, force_keyframe,
107 data, &keyframe, VPX_IMG_FMT_NV12);
108 break;
109 }
110 case media::PIXEL_FORMAT_I420: {
111 last_frame_had_alpha_ = false;
112 DoEncode(encoder_.get(), frame_size, frame->data(VideoFrame::kYPlane),
113 frame->visible_data(VideoFrame::kYPlane),
114 frame->stride(VideoFrame::kYPlane),
115 frame->visible_data(VideoFrame::kUPlane),
116 frame->stride(VideoFrame::kUPlane),
117 frame->visible_data(VideoFrame::kVPlane),
118 frame->stride(VideoFrame::kVPlane), duration, force_keyframe,
119 data, &keyframe, VPX_IMG_FMT_I420);
120 break;
121 }
122 case media::PIXEL_FORMAT_I420A: {
123 // Split the duration between two encoder instances if alpha is encoded.
124 duration = duration / 2;
125 if ((!IsInitialized(alpha_codec_config_) ||
126 gfx::Size(alpha_codec_config_.g_w, alpha_codec_config_.g_h) !=
127 frame_size)) {
128 ConfigureEncoderOnEncodingTaskRunner(frame_size, &alpha_codec_config_,
129 &alpha_encoder_);
130 u_plane_stride_ = media::VideoFrame::RowBytes(
131 VideoFrame::kUPlane, frame->format(), frame_size.width());
132 v_plane_stride_ = media::VideoFrame::RowBytes(
133 VideoFrame::kVPlane, frame->format(), frame_size.width());
134 v_plane_offset_ = media::VideoFrame::PlaneSize(
135 frame->format(), VideoFrame::kUPlane, frame_size)
136 .GetArea();
137 alpha_dummy_planes_.resize(SafeCast<wtf_size_t>(
138 v_plane_offset_ + media::VideoFrame::PlaneSize(frame->format(),
139 VideoFrame::kVPlane,
140 frame_size)
141 .GetArea()));
142 // It is more expensive to encode 0x00, so use 0x80 instead.
143 std::fill(alpha_dummy_planes_.begin(), alpha_dummy_planes_.end(), 0x80);
144 }
145 // If we introduced a new alpha frame, force keyframe.
146 force_keyframe = !last_frame_had_alpha_;
147 last_frame_had_alpha_ = true;
148
149 DoEncode(encoder_.get(), frame_size, frame->data(VideoFrame::kYPlane),
150 frame->visible_data(VideoFrame::kYPlane),
151 frame->stride(VideoFrame::kYPlane),
152 frame->visible_data(VideoFrame::kUPlane),
153 frame->stride(VideoFrame::kUPlane),
154 frame->visible_data(VideoFrame::kVPlane),
155 frame->stride(VideoFrame::kVPlane), duration, force_keyframe,
156 data, &keyframe, VPX_IMG_FMT_I420);
157
158 DoEncode(alpha_encoder_.get(), frame_size,
159 frame->data(VideoFrame::kAPlane),
160 frame->visible_data(VideoFrame::kAPlane),
161 frame->stride(VideoFrame::kAPlane), alpha_dummy_planes_.data(),
162 SafeCast<int>(u_plane_stride_),
163 alpha_dummy_planes_.data() + v_plane_offset_,
164 SafeCast<int>(v_plane_stride_), duration, keyframe, alpha_data,
165 &alpha_keyframe, VPX_IMG_FMT_I420);
166 DCHECK_EQ(keyframe, alpha_keyframe);
167 break;
168 }
169 default:
170 NOTREACHED() << media::VideoPixelFormatToString(frame->format());
171 }
172 frame = nullptr;
173
174 PostCrossThreadTask(
175 *origin_task_runner_.get(), FROM_HERE,
176 CrossThreadBindOnce(
177 OnFrameEncodeCompleted,
178 WTF::Passed(CrossThreadBindRepeating(on_encoded_video_cb_)),
179 video_params, std::move(data), std::move(alpha_data),
180 capture_timestamp, keyframe));
181 }
182
DoEncode(vpx_codec_ctx_t * const encoder,const gfx::Size & frame_size,uint8_t * const data,uint8_t * const y_plane,int y_stride,uint8_t * const u_plane,int u_stride,uint8_t * const v_plane,int v_stride,const base::TimeDelta & duration,bool force_keyframe,std::string & output_data,bool * const keyframe,vpx_img_fmt_t img_fmt)183 void VpxEncoder::DoEncode(vpx_codec_ctx_t* const encoder,
184 const gfx::Size& frame_size,
185 uint8_t* const data,
186 uint8_t* const y_plane,
187 int y_stride,
188 uint8_t* const u_plane,
189 int u_stride,
190 uint8_t* const v_plane,
191 int v_stride,
192 const base::TimeDelta& duration,
193 bool force_keyframe,
194 std::string& output_data,
195 bool* const keyframe,
196 vpx_img_fmt_t img_fmt) {
197 DCHECK_CALLED_ON_VALID_SEQUENCE(encoding_sequence_checker_);
198 DCHECK(img_fmt == VPX_IMG_FMT_I420 || img_fmt == VPX_IMG_FMT_NV12);
199
200 vpx_image_t vpx_image;
201 vpx_image_t* const result =
202 vpx_img_wrap(&vpx_image, img_fmt, frame_size.width(), frame_size.height(),
203 1 /* align */, data);
204 DCHECK_EQ(result, &vpx_image);
205 vpx_image.planes[VPX_PLANE_Y] = y_plane;
206 vpx_image.planes[VPX_PLANE_U] = u_plane;
207 vpx_image.planes[VPX_PLANE_V] = v_plane;
208 vpx_image.stride[VPX_PLANE_Y] = y_stride;
209 vpx_image.stride[VPX_PLANE_U] = u_stride;
210 vpx_image.stride[VPX_PLANE_V] = v_stride;
211
212 const vpx_codec_flags_t flags = force_keyframe ? VPX_EFLAG_FORCE_KF : 0;
213 // Encode the frame. The presentation time stamp argument here is fixed to
214 // zero to force the encoder to base its single-frame bandwidth calculations
215 // entirely on |predicted_frame_duration|.
216 const vpx_codec_err_t ret =
217 vpx_codec_encode(encoder, &vpx_image, 0 /* pts */,
218 static_cast<unsigned long>(duration.InMicroseconds()),
219 flags, VPX_DL_REALTIME);
220 DCHECK_EQ(ret, VPX_CODEC_OK)
221 << vpx_codec_err_to_string(ret) << ", #" << vpx_codec_error(encoder)
222 << " -" << vpx_codec_error_detail(encoder);
223
224 *keyframe = false;
225 vpx_codec_iter_t iter = nullptr;
226 const vpx_codec_cx_pkt_t* pkt = nullptr;
227 while ((pkt = vpx_codec_get_cx_data(encoder, &iter))) {
228 if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
229 continue;
230 output_data.assign(static_cast<char*>(pkt->data.frame.buf),
231 pkt->data.frame.sz);
232 *keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
233 break;
234 }
235 }
236
ConfigureEncoderOnEncodingTaskRunner(const gfx::Size & size,vpx_codec_enc_cfg_t * codec_config,ScopedVpxCodecCtxPtr * encoder)237 void VpxEncoder::ConfigureEncoderOnEncodingTaskRunner(
238 const gfx::Size& size,
239 vpx_codec_enc_cfg_t* codec_config,
240 ScopedVpxCodecCtxPtr* encoder) {
241 DCHECK_CALLED_ON_VALID_SEQUENCE(encoding_sequence_checker_);
242 if (IsInitialized(*codec_config)) {
243 // TODO(mcasas) VP8 quirk/optimisation: If the new |size| is strictly less-
244 // than-or-equal than the old size, in terms of area, the existing encoder
245 // instance could be reused after changing |codec_config->{g_w,g_h}|.
246 DVLOG(1) << "Destroying/Re-Creating encoder for new frame size: "
247 << gfx::Size(codec_config->g_w, codec_config->g_h).ToString()
248 << " --> " << size.ToString() << (use_vp9_ ? " vp9" : " vp8");
249 encoder->reset();
250 }
251
252 const vpx_codec_iface_t* codec_interface =
253 use_vp9_ ? vpx_codec_vp9_cx() : vpx_codec_vp8_cx();
254 vpx_codec_err_t result = vpx_codec_enc_config_default(
255 codec_interface, codec_config, 0 /* reserved */);
256 DCHECK_EQ(VPX_CODEC_OK, result);
257
258 DCHECK_EQ(320u, codec_config->g_w);
259 DCHECK_EQ(240u, codec_config->g_h);
260 DCHECK_EQ(256u, codec_config->rc_target_bitrate);
261 // Use the selected bitrate or adjust default bit rate to account for the
262 // actual size. Note: |rc_target_bitrate| units are kbit per second.
263 if (bits_per_second_ > 0) {
264 codec_config->rc_target_bitrate = bits_per_second_ / 1000;
265 } else {
266 codec_config->rc_target_bitrate = size.GetArea() *
267 codec_config->rc_target_bitrate /
268 codec_config->g_w / codec_config->g_h;
269 }
270 // Both VP8/VP9 configuration should be Variable BitRate by default.
271 DCHECK_EQ(VPX_VBR, codec_config->rc_end_usage);
272 if (use_vp9_) {
273 // Number of frames to consume before producing output.
274 codec_config->g_lag_in_frames = 0;
275
276 // DCHECK that the profile selected by default is I420 (magic number 0).
277 DCHECK_EQ(0u, codec_config->g_profile);
278 } else {
279 // VP8 always produces frames instantaneously.
280 DCHECK_EQ(0u, codec_config->g_lag_in_frames);
281 }
282
283 DCHECK(size.width());
284 DCHECK(size.height());
285 codec_config->g_w = size.width();
286 codec_config->g_h = size.height();
287 codec_config->g_pass = VPX_RC_ONE_PASS;
288
289 // Timebase is the smallest interval used by the stream, can be set to the
290 // frame rate or to e.g. microseconds.
291 codec_config->g_timebase.num = 1;
292 codec_config->g_timebase.den = base::Time::kMicrosecondsPerSecond;
293
294 // Let the encoder decide where to place the Keyframes, between min and max.
295 // In VPX_KF_AUTO mode libvpx will sometimes emit keyframes regardless of min/
296 // max distance out of necessity.
297 // Note that due to http://crbug.com/440223, it might be necessary to force a
298 // key frame after 10,000frames since decoding fails after 30,000 non-key
299 // frames.
300 // Forcing a keyframe in regular intervals also allows seeking in the
301 // resulting recording with decent performance.
302 codec_config->kf_mode = VPX_KF_AUTO;
303 codec_config->kf_min_dist = 0;
304 codec_config->kf_max_dist = 100;
305
306 codec_config->g_threads = GetNumberOfThreadsForEncoding();
307
308 // Number of frames to consume before producing output.
309 codec_config->g_lag_in_frames = 0;
310
311 encoder->reset(new vpx_codec_ctx_t);
312 const vpx_codec_err_t ret = vpx_codec_enc_init(
313 encoder->get(), codec_interface, codec_config, 0 /* flags */);
314 DCHECK_EQ(VPX_CODEC_OK, ret);
315
316 if (use_vp9_) {
317 // Values of VP8E_SET_CPUUSED greater than 0 will increase encoder speed at
318 // the expense of quality up to a maximum value of 8 for VP9, by tuning the
319 // target time spent encoding the frame. Go from 8 to 5 (values for real
320 // time encoding) depending on the amount of cores available in the system.
321 const int kCpuUsed =
322 std::max(5, 8 - base::SysInfo::NumberOfProcessors() / 2);
323 result = vpx_codec_control(encoder->get(), VP8E_SET_CPUUSED, kCpuUsed);
324 DLOG_IF(WARNING, VPX_CODEC_OK != result) << "VP8E_SET_CPUUSED failed";
325 }
326 }
327
IsInitialized(const vpx_codec_enc_cfg_t & codec_config) const328 bool VpxEncoder::IsInitialized(const vpx_codec_enc_cfg_t& codec_config) const {
329 DCHECK_CALLED_ON_VALID_SEQUENCE(encoding_sequence_checker_);
330 return codec_config.g_timebase.den != 0;
331 }
332
EstimateFrameDuration(const VideoFrame & frame)333 base::TimeDelta VpxEncoder::EstimateFrameDuration(const VideoFrame& frame) {
334 DCHECK_CALLED_ON_VALID_SEQUENCE(encoding_sequence_checker_);
335
336 using base::TimeDelta;
337
338 // If the source of the video frame did not provide the frame duration, use
339 // the actual amount of time between the current and previous frame as a
340 // prediction for the next frame's duration.
341 // TODO(mcasas): This duration estimation could lead to artifacts if the
342 // cadence of the received stream is compromised (e.g. camera freeze, pause,
343 // remote packet loss). Investigate using GetFrameRate() in this case.
344 base::TimeDelta predicted_frame_duration =
345 frame.timestamp() - last_frame_timestamp_;
346 base::TimeDelta frame_duration =
347 frame.metadata()->frame_duration.value_or(predicted_frame_duration);
348 last_frame_timestamp_ = frame.timestamp();
349 // Make sure |frame_duration| is in a safe range of values.
350 const base::TimeDelta kMaxFrameDuration =
351 base::TimeDelta::FromSecondsD(1.0 / 8);
352 const base::TimeDelta kMinFrameDuration =
353 base::TimeDelta::FromMilliseconds(1);
354 return std::min(kMaxFrameDuration,
355 std::max(frame_duration, kMinFrameDuration));
356 }
357
358 } // namespace blink
359