1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "media/gpu/vaapi/vaapi_video_decode_accelerator.h"
6
7 #include <string.h>
8 #include <va/va.h>
9
10 #include <memory>
11
12 #include "base/bind.h"
13 #include "base/bind_helpers.h"
14 #include "base/cpu.h"
15 #include "base/files/scoped_file.h"
16 #include "base/logging.h"
17 #include "base/macros.h"
18 #include "base/metrics/histogram_macros.h"
19 #include "base/numerics/ranges.h"
20 #include "base/stl_util.h"
21 #include "base/strings/string_util.h"
22 #include "base/synchronization/waitable_event.h"
23 #include "base/threading/thread_task_runner_handle.h"
24 #include "base/trace_event/memory_dump_manager.h"
25 #include "base/trace_event/process_memory_dump.h"
26 #include "base/trace_event/trace_event.h"
27 #include "gpu/ipc/service/gpu_channel.h"
28 #include "media/base/bind_to_current_loop.h"
29 #include "media/base/format_utils.h"
30 #include "media/base/unaligned_shared_memory.h"
31 #include "media/base/video_util.h"
32 #include "media/gpu/accelerated_video_decoder.h"
33 #include "media/gpu/h264_decoder.h"
34 #include "media/gpu/macros.h"
35 #include "media/gpu/vaapi/h264_vaapi_video_decoder_delegate.h"
36 #include "media/gpu/vaapi/vaapi_common.h"
37 #include "media/gpu/vaapi/vaapi_picture.h"
38 #include "media/gpu/vaapi/vaapi_utils.h"
39 #include "media/gpu/vaapi/vp8_vaapi_video_decoder_delegate.h"
40 #include "media/gpu/vaapi/vp9_vaapi_video_decoder_delegate.h"
41 #include "media/gpu/vp8_decoder.h"
42 #include "media/gpu/vp9_decoder.h"
43 #include "media/video/picture.h"
44 #include "ui/gl/gl_image.h"
45
46 namespace media {
47
48 namespace {
49
50 // UMA errors that the VaapiVideoDecodeAccelerator class reports.
51 enum VAVDADecoderFailure {
52 VAAPI_ERROR = 0,
53 VAAPI_VPP_ERROR = 1,
54 VAVDA_DECODER_FAILURES_MAX,
55 };
56
57 // Returns the preferred VA_RT_FORMAT for the given |profile|.
GetVaFormatForVideoCodecProfile(VideoCodecProfile profile)58 unsigned int GetVaFormatForVideoCodecProfile(VideoCodecProfile profile) {
59 if (profile == VP9PROFILE_PROFILE2 || profile == VP9PROFILE_PROFILE3)
60 return VA_RT_FORMAT_YUV420_10BPP;
61 return VA_RT_FORMAT_YUV420;
62 }
63
ReportToUMA(VAVDADecoderFailure failure)64 void ReportToUMA(VAVDADecoderFailure failure) {
65 UMA_HISTOGRAM_ENUMERATION("Media.VAVDA.DecoderFailure", failure,
66 VAVDA_DECODER_FAILURES_MAX + 1);
67 }
68
69 // Returns true if the CPU is an Intel Gemini Lake or later (including Kaby
70 // Lake) Cpu platform id's are referenced from the following file in kernel
71 // source arch/x86/include/asm/intel-family.h
IsGeminiLakeOrLater()72 bool IsGeminiLakeOrLater() {
73 constexpr int kPentiumAndLaterFamily = 0x06;
74 constexpr int kGeminiLakeModelId = 0x7A;
75 static base::CPU cpuid;
76 static bool is_geminilake_or_later =
77 cpuid.family() == kPentiumAndLaterFamily &&
78 cpuid.model() >= kGeminiLakeModelId;
79 return is_geminilake_or_later;
80 }
81
82 } // namespace
83
84 #define RETURN_AND_NOTIFY_ON_FAILURE(result, log, error_code, ret) \
85 do { \
86 if (!(result)) { \
87 LOG(ERROR) << log; \
88 NotifyError(error_code); \
89 return ret; \
90 } \
91 } while (0)
92
93 class VaapiVideoDecodeAccelerator::InputBuffer {
94 public:
InputBuffer()95 InputBuffer() : buffer_(nullptr) {}
InputBuffer(int32_t id,scoped_refptr<DecoderBuffer> buffer,base::OnceCallback<void (int32_t id)> release_cb)96 InputBuffer(int32_t id,
97 scoped_refptr<DecoderBuffer> buffer,
98 base::OnceCallback<void(int32_t id)> release_cb)
99 : id_(id),
100 buffer_(std::move(buffer)),
101 release_cb_(std::move(release_cb)) {}
~InputBuffer()102 ~InputBuffer() {
103 DVLOGF(4) << "id = " << id_;
104 if (release_cb_)
105 std::move(release_cb_).Run(id_);
106 }
107
108 // Indicates this is a dummy buffer for flush request.
IsFlushRequest() const109 bool IsFlushRequest() const { return !buffer_; }
id() const110 int32_t id() const { return id_; }
buffer() const111 const scoped_refptr<DecoderBuffer>& buffer() const { return buffer_; }
112
113 private:
114 const int32_t id_ = -1;
115 const scoped_refptr<DecoderBuffer> buffer_;
116 base::OnceCallback<void(int32_t id)> release_cb_;
117
118 DISALLOW_COPY_AND_ASSIGN(InputBuffer);
119 };
120
NotifyError(Error error)121 void VaapiVideoDecodeAccelerator::NotifyError(Error error) {
122 if (!task_runner_->BelongsToCurrentThread()) {
123 DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread());
124 task_runner_->PostTask(
125 FROM_HERE, base::BindOnce(&VaapiVideoDecodeAccelerator::NotifyError,
126 weak_this_, error));
127 return;
128 }
129
130 VLOGF(1) << "Notifying of error " << error;
131 if (client_) {
132 client_->NotifyError(error);
133 client_ptr_factory_.reset();
134 }
135 }
136
VaapiVideoDecodeAccelerator(const MakeGLContextCurrentCallback & make_context_current_cb,const BindGLImageCallback & bind_image_cb)137 VaapiVideoDecodeAccelerator::VaapiVideoDecodeAccelerator(
138 const MakeGLContextCurrentCallback& make_context_current_cb,
139 const BindGLImageCallback& bind_image_cb)
140 : state_(kUninitialized),
141 input_ready_(&lock_),
142 vaapi_picture_factory_(new VaapiPictureFactory()),
143 buffer_allocation_mode_(BufferAllocationMode::kNormal),
144 surfaces_available_(&lock_),
145 va_surface_format_(VA_INVALID_ID),
146 task_runner_(base::ThreadTaskRunnerHandle::Get()),
147 decoder_thread_("VaapiDecoderThread"),
148 finish_flush_pending_(false),
149 awaiting_va_surfaces_recycle_(false),
150 requested_num_pics_(0),
151 requested_num_reference_frames_(0),
152 previously_requested_num_reference_frames_(0),
153 profile_(VIDEO_CODEC_PROFILE_UNKNOWN),
154 make_context_current_cb_(make_context_current_cb),
155 bind_image_cb_(bind_image_cb),
156 weak_this_factory_(this) {
157 weak_this_ = weak_this_factory_.GetWeakPtr();
158 va_surface_recycle_cb_ = BindToCurrentLoop(base::BindRepeating(
159 &VaapiVideoDecodeAccelerator::RecycleVASurface, weak_this_));
160 base::trace_event::MemoryDumpManager::GetInstance()->RegisterDumpProvider(
161 this, "media::VaapiVideoDecodeAccelerator",
162 base::ThreadTaskRunnerHandle::Get());
163 }
164
~VaapiVideoDecodeAccelerator()165 VaapiVideoDecodeAccelerator::~VaapiVideoDecodeAccelerator() {
166 DCHECK(task_runner_->BelongsToCurrentThread());
167 base::trace_event::MemoryDumpManager::GetInstance()->UnregisterDumpProvider(
168 this);
169 }
170
Initialize(const Config & config,Client * client)171 bool VaapiVideoDecodeAccelerator::Initialize(const Config& config,
172 Client* client) {
173 DCHECK(task_runner_->BelongsToCurrentThread());
174
175 if (config.is_encrypted()) {
176 NOTREACHED() << "Encrypted streams are not supported for this VDA";
177 return false;
178 }
179
180 client_ptr_factory_.reset(new base::WeakPtrFactory<Client>(client));
181 client_ = client_ptr_factory_->GetWeakPtr();
182
183 VideoCodecProfile profile = config.profile;
184
185 base::AutoLock auto_lock(lock_);
186 DCHECK_EQ(state_, kUninitialized);
187 VLOGF(2) << "Initializing VAVDA, profile: " << GetProfileName(profile);
188
189 vaapi_wrapper_ = VaapiWrapper::CreateForVideoCodec(
190 VaapiWrapper::kDecode, profile, base::Bind(&ReportToUMA, VAAPI_ERROR));
191
192 UMA_HISTOGRAM_BOOLEAN("Media.VAVDA.VaapiWrapperCreationSuccess",
193 vaapi_wrapper_.get());
194 if (!vaapi_wrapper_.get()) {
195 VLOGF(1) << "Failed initializing VAAPI for profile "
196 << GetProfileName(profile);
197 return false;
198 }
199
200 if (profile >= H264PROFILE_MIN && profile <= H264PROFILE_MAX) {
201 auto accelerator =
202 std::make_unique<H264VaapiVideoDecoderDelegate>(this, vaapi_wrapper_);
203 decoder_delegate_ = accelerator.get();
204 decoder_.reset(new H264Decoder(std::move(accelerator), profile,
205 config.container_color_space));
206 } else if (profile >= VP8PROFILE_MIN && profile <= VP8PROFILE_MAX) {
207 auto accelerator =
208 std::make_unique<VP8VaapiVideoDecoderDelegate>(this, vaapi_wrapper_);
209 decoder_delegate_ = accelerator.get();
210 decoder_.reset(new VP8Decoder(std::move(accelerator)));
211 } else if (profile >= VP9PROFILE_MIN && profile <= VP9PROFILE_MAX) {
212 auto accelerator =
213 std::make_unique<VP9VaapiVideoDecoderDelegate>(this, vaapi_wrapper_);
214 decoder_delegate_ = accelerator.get();
215 decoder_.reset(new VP9Decoder(std::move(accelerator), profile,
216 config.container_color_space));
217 } else {
218 VLOGF(1) << "Unsupported profile " << GetProfileName(profile);
219 return false;
220 }
221
222 CHECK(decoder_thread_.Start());
223 decoder_thread_task_runner_ = decoder_thread_.task_runner();
224
225 state_ = kIdle;
226 profile_ = profile;
227 output_mode_ = config.output_mode;
228 buffer_allocation_mode_ = DecideBufferAllocationMode();
229 previously_requested_num_reference_frames_ = 0;
230 return true;
231 }
232
OutputPicture(scoped_refptr<VASurface> va_surface,int32_t input_id,gfx::Rect visible_rect,const VideoColorSpace & picture_color_space)233 void VaapiVideoDecodeAccelerator::OutputPicture(
234 scoped_refptr<VASurface> va_surface,
235 int32_t input_id,
236 gfx::Rect visible_rect,
237 const VideoColorSpace& picture_color_space) {
238 DCHECK(task_runner_->BelongsToCurrentThread());
239
240 const VASurfaceID va_surface_id = va_surface->id();
241
242 VaapiPicture* picture = nullptr;
243 {
244 base::AutoLock auto_lock(lock_);
245 int32_t picture_buffer_id = available_picture_buffers_.front();
246 if (buffer_allocation_mode_ == BufferAllocationMode::kNone) {
247 // Find the |pictures_| entry matching |va_surface_id|.
248 for (const auto& id_and_picture : pictures_) {
249 if (id_and_picture.second->va_surface_id() == va_surface_id) {
250 picture_buffer_id = id_and_picture.first;
251 break;
252 }
253 }
254 }
255 picture = pictures_[picture_buffer_id].get();
256 DCHECK(base::Contains(available_picture_buffers_, picture_buffer_id));
257 base::Erase(available_picture_buffers_, picture_buffer_id);
258 }
259
260 DCHECK(picture) << " could not find " << va_surface_id << " available";
261 const int32_t output_id = picture->picture_buffer_id();
262
263 DVLOGF(4) << "Outputting VASurface " << va_surface->id()
264 << " into pixmap bound to picture buffer id " << output_id;
265
266 if (buffer_allocation_mode_ != BufferAllocationMode::kNone) {
267 TRACE_EVENT2("media,gpu", "VAVDA::DownloadFromSurface", "input_id",
268 input_id, "output_id", output_id);
269 RETURN_AND_NOTIFY_ON_FAILURE(picture->DownloadFromSurface(va_surface),
270 "Failed putting surface into pixmap",
271 PLATFORM_FAILURE, );
272 }
273
274 {
275 base::AutoLock auto_lock(lock_);
276 TRACE_COUNTER_ID2("media,gpu", "Vaapi frames at client", this, "used",
277 pictures_.size() - available_picture_buffers_.size(),
278 "available", available_picture_buffers_.size());
279 }
280
281 DVLOGF(4) << "Notifying output picture id " << output_id << " for input "
282 << input_id
283 << " is ready. visible rect: " << visible_rect.ToString();
284 if (!client_)
285 return;
286
287 Picture client_picture(output_id, input_id, visible_rect,
288 picture_color_space.ToGfxColorSpace(),
289 picture->AllowOverlay());
290 client_picture.set_read_lock_fences_enabled(true);
291 // Notify the |client_| a picture is ready to be consumed.
292 client_->PictureReady(client_picture);
293 }
294
TryOutputPicture()295 void VaapiVideoDecodeAccelerator::TryOutputPicture() {
296 DCHECK(task_runner_->BelongsToCurrentThread());
297
298 // Handle Destroy() arriving while pictures are queued for output.
299 if (!client_)
300 return;
301
302 {
303 base::AutoLock auto_lock(lock_);
304 if (pending_output_cbs_.empty() || available_picture_buffers_.empty())
305 return;
306 }
307
308 auto output_cb = std::move(pending_output_cbs_.front());
309 pending_output_cbs_.pop();
310 std::move(output_cb).Run();
311
312 if (finish_flush_pending_ && pending_output_cbs_.empty())
313 FinishFlush();
314 }
315
QueueInputBuffer(scoped_refptr<DecoderBuffer> buffer,int32_t bitstream_id)316 void VaapiVideoDecodeAccelerator::QueueInputBuffer(
317 scoped_refptr<DecoderBuffer> buffer,
318 int32_t bitstream_id) {
319 DVLOGF(4) << "Queueing new input buffer id: " << bitstream_id
320 << " size: " << (buffer->end_of_stream() ? 0 : buffer->data_size());
321 DCHECK(task_runner_->BelongsToCurrentThread());
322 TRACE_EVENT1("media,gpu", "QueueInputBuffer", "input_id", bitstream_id);
323
324 base::AutoLock auto_lock(lock_);
325 if (buffer->end_of_stream()) {
326 auto flush_buffer = std::make_unique<InputBuffer>();
327 DCHECK(flush_buffer->IsFlushRequest());
328 input_buffers_.push(std::move(flush_buffer));
329 } else {
330 auto input_buffer = std::make_unique<InputBuffer>(
331 bitstream_id, std::move(buffer),
332 BindToCurrentLoop(
333 base::Bind(&Client::NotifyEndOfBitstreamBuffer, client_)));
334 input_buffers_.push(std::move(input_buffer));
335 }
336
337 TRACE_COUNTER1("media,gpu", "Vaapi input buffers", input_buffers_.size());
338 input_ready_.Signal();
339
340 switch (state_) {
341 case kIdle:
342 state_ = kDecoding;
343 decoder_thread_task_runner_->PostTask(
344 FROM_HERE, base::BindOnce(&VaapiVideoDecodeAccelerator::DecodeTask,
345 base::Unretained(this)));
346 break;
347
348 case kDecoding:
349 // Decoder already running.
350 break;
351
352 case kResetting:
353 // When resetting, allow accumulating bitstream buffers, so that
354 // the client can queue after-seek-buffers while we are finishing with
355 // the before-seek one.
356 break;
357
358 default:
359 LOG(ERROR) << "Decode/Flush request from client in invalid state: "
360 << state_;
361 NotifyError(PLATFORM_FAILURE);
362 break;
363 }
364 }
365
GetCurrInputBuffer_Locked()366 bool VaapiVideoDecodeAccelerator::GetCurrInputBuffer_Locked() {
367 DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread());
368 lock_.AssertAcquired();
369
370 if (curr_input_buffer_.get())
371 return true;
372
373 // Will only wait if it is expected that in current state new buffers will
374 // be queued from the client via Decode(). The state can change during wait.
375 while (input_buffers_.empty() && (state_ == kDecoding || state_ == kIdle))
376 input_ready_.Wait();
377
378 // We could have got woken up in a different state or never got to sleep
379 // due to current state.
380 if (state_ != kDecoding && state_ != kIdle)
381 return false;
382
383 DCHECK(!input_buffers_.empty());
384 curr_input_buffer_ = std::move(input_buffers_.front());
385 input_buffers_.pop();
386 TRACE_COUNTER1("media,gpu", "Vaapi input buffers", input_buffers_.size());
387
388 if (curr_input_buffer_->IsFlushRequest()) {
389 DVLOGF(4) << "New flush buffer";
390 return true;
391 }
392
393 DVLOGF(4) << "New |curr_input_buffer_|, id: " << curr_input_buffer_->id()
394 << " size: " << curr_input_buffer_->buffer()->data_size() << "B";
395 decoder_->SetStream(curr_input_buffer_->id(), *curr_input_buffer_->buffer());
396 return true;
397 }
398
ReturnCurrInputBuffer_Locked()399 void VaapiVideoDecodeAccelerator::ReturnCurrInputBuffer_Locked() {
400 DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread());
401 lock_.AssertAcquired();
402 DCHECK(curr_input_buffer_.get());
403 curr_input_buffer_.reset();
404 }
405
406 // TODO(posciak): refactor the whole class to remove sleeping in wait for
407 // surfaces, and reschedule DecodeTask instead.
WaitForSurfaces_Locked()408 bool VaapiVideoDecodeAccelerator::WaitForSurfaces_Locked() {
409 DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread());
410 lock_.AssertAcquired();
411
412 while (available_va_surfaces_.empty() &&
413 (state_ == kDecoding || state_ == kIdle)) {
414 surfaces_available_.Wait();
415 }
416
417 return state_ == kDecoding || state_ == kIdle;
418 }
419
DecodeTask()420 void VaapiVideoDecodeAccelerator::DecodeTask() {
421 DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread());
422 base::AutoLock auto_lock(lock_);
423
424 if (state_ != kDecoding)
425 return;
426 DVLOGF(4) << "Decode task";
427
428 // Try to decode what stream data is (still) in the decoder until we run out
429 // of it.
430 while (GetCurrInputBuffer_Locked()) {
431 DCHECK(curr_input_buffer_.get());
432
433 if (curr_input_buffer_->IsFlushRequest()) {
434 FlushTask();
435 break;
436 }
437
438 AcceleratedVideoDecoder::DecodeResult res;
439 {
440 // We are OK releasing the lock here, as decoder never calls our methods
441 // directly and we will reacquire the lock before looking at state again.
442 // This is the main decode function of the decoder and while keeping
443 // the lock for its duration would be fine, it would defeat the purpose
444 // of having a separate decoder thread.
445 base::AutoUnlock auto_unlock(lock_);
446 TRACE_EVENT0("media,gpu", "VAVDA::Decode");
447 res = decoder_->Decode();
448 }
449
450 switch (res) {
451 case AcceleratedVideoDecoder::kConfigChange: {
452 // The visible rect should be a subset of the picture size. Otherwise,
453 // the encoded stream is bad.
454 const gfx::Size pic_size = decoder_->GetPicSize();
455 const gfx::Rect visible_rect = decoder_->GetVisibleRect();
456 RETURN_AND_NOTIFY_ON_FAILURE(
457 gfx::Rect(pic_size).Contains(visible_rect),
458 "The visible rectangle is not contained by the picture size",
459 UNREADABLE_INPUT, );
460 VLOGF(2) << "Decoder requesting a new set of surfaces";
461 task_runner_->PostTask(
462 FROM_HERE,
463 base::BindOnce(
464 &VaapiVideoDecodeAccelerator::InitiateSurfaceSetChange,
465 weak_this_, decoder_->GetRequiredNumOfPictures(), pic_size,
466 decoder_->GetNumReferenceFrames(), visible_rect));
467 // We'll get rescheduled once ProvidePictureBuffers() finishes.
468 return;
469 }
470 case AcceleratedVideoDecoder::kRanOutOfStreamData:
471 ReturnCurrInputBuffer_Locked();
472 break;
473
474 case AcceleratedVideoDecoder::kRanOutOfSurfaces:
475 // No more output buffers in the decoder, try getting more or go to
476 // sleep waiting for them.
477 if (!WaitForSurfaces_Locked())
478 return;
479
480 break;
481
482 case AcceleratedVideoDecoder::kNeedContextUpdate:
483 // This should not happen as we return false from
484 // IsFrameContextRequired().
485 NOTREACHED() << "Context updates not supported";
486 return;
487
488 case AcceleratedVideoDecoder::kDecodeError:
489 RETURN_AND_NOTIFY_ON_FAILURE(false, "Error decoding stream",
490 PLATFORM_FAILURE, );
491 return;
492
493 case AcceleratedVideoDecoder::kTryAgain:
494 NOTREACHED() << "Should not reach here unless this class accepts "
495 "encrypted streams.";
496 RETURN_AND_NOTIFY_ON_FAILURE(false, "Error decoding stream",
497 PLATFORM_FAILURE, );
498 return;
499 }
500 }
501 }
502
InitiateSurfaceSetChange(size_t num_pics,gfx::Size size,size_t num_reference_frames,const gfx::Rect & visible_rect)503 void VaapiVideoDecodeAccelerator::InitiateSurfaceSetChange(
504 size_t num_pics,
505 gfx::Size size,
506 size_t num_reference_frames,
507 const gfx::Rect& visible_rect) {
508 DCHECK(task_runner_->BelongsToCurrentThread());
509 DCHECK(!awaiting_va_surfaces_recycle_);
510 DCHECK_GT(num_pics, num_reference_frames);
511
512 // At this point decoder has stopped running and has already posted onto our
513 // loop any remaining output request callbacks, which executed before we got
514 // here. Some of them might have been pended though, because we might not have
515 // had enough PictureBuffers to output surfaces to. Initiate a wait cycle,
516 // which will wait for client to return enough PictureBuffers to us, so that
517 // we can finish all pending output callbacks, releasing associated surfaces.
518 awaiting_va_surfaces_recycle_ = true;
519
520 requested_pic_size_ = size;
521 requested_visible_rect_ = visible_rect;
522 if (buffer_allocation_mode_ == BufferAllocationMode::kSuperReduced) {
523 // Add one to the reference frames for the one being currently egressed.
524 requested_num_reference_frames_ = num_reference_frames + 1;
525 requested_num_pics_ = num_pics - num_reference_frames;
526 } else if (buffer_allocation_mode_ == BufferAllocationMode::kReduced) {
527 // Add one to the reference frames for the one being currently egressed,
528 // and an extra allocation for both |client_| and |decoder_|.
529 requested_num_reference_frames_ = num_reference_frames + 2;
530 requested_num_pics_ = num_pics - num_reference_frames + 1;
531 } else {
532 requested_num_reference_frames_ = 0;
533 requested_num_pics_ = num_pics + num_extra_pics_;
534 }
535
536 VLOGF(2) << " |requested_num_pics_| = " << requested_num_pics_
537 << "; |requested_num_reference_frames_| = "
538 << requested_num_reference_frames_;
539
540 TryFinishSurfaceSetChange();
541 }
542
TryFinishSurfaceSetChange()543 void VaapiVideoDecodeAccelerator::TryFinishSurfaceSetChange() {
544 DCHECK(task_runner_->BelongsToCurrentThread());
545
546 if (!awaiting_va_surfaces_recycle_)
547 return;
548
549 base::AutoLock auto_lock(lock_);
550 const size_t expected_max_available_va_surfaces =
551 IsBufferAllocationModeReducedOrSuperReduced()
552 ? previously_requested_num_reference_frames_
553 : pictures_.size();
554 if (!pending_output_cbs_.empty() ||
555 expected_max_available_va_surfaces != available_va_surfaces_.size()) {
556 // If we're here the stream resolution has changed; we need to wait until:
557 // - all |pending_output_cbs_| have been executed
558 // - all VASurfaces are back to |available_va_surfaces_|; we can't use
559 // |requested_num_reference_frames_| for comparison, since it might have
560 // changed in the previous call to InitiateSurfaceSetChange(), so we use
561 // |previously_requested_num_reference_frames_| instead.
562 DVLOGF(2) << "Awaiting pending output/surface release callbacks to finish";
563 task_runner_->PostTask(
564 FROM_HERE,
565 base::BindOnce(&VaapiVideoDecodeAccelerator::TryFinishSurfaceSetChange,
566 weak_this_));
567 return;
568 }
569
570 previously_requested_num_reference_frames_ = requested_num_reference_frames_;
571
572 // All surfaces released, destroy them and dismiss all PictureBuffers.
573 awaiting_va_surfaces_recycle_ = false;
574
575 VideoCodecProfile new_profile = decoder_->GetProfile();
576 if (profile_ != new_profile) {
577 DCHECK(decoder_delegate_);
578 profile_ = new_profile;
579 auto new_vaapi_wrapper = VaapiWrapper::CreateForVideoCodec(
580 VaapiWrapper::kDecode, profile_, base::Bind(&ReportToUMA, VAAPI_ERROR));
581 RETURN_AND_NOTIFY_ON_FAILURE(new_vaapi_wrapper.get(),
582 "Failed creating VaapiWrapper",
583 INVALID_ARGUMENT, );
584 decoder_delegate_->set_vaapi_wrapper(new_vaapi_wrapper.get());
585 vaapi_wrapper_ = std::move(new_vaapi_wrapper);
586 } else {
587 vaapi_wrapper_->DestroyContext();
588 }
589
590 available_va_surfaces_.clear();
591
592 for (auto iter = pictures_.begin(); iter != pictures_.end(); ++iter) {
593 VLOGF(2) << "Dismissing picture id: " << iter->first;
594 if (client_)
595 client_->DismissPictureBuffer(iter->first);
596 }
597 pictures_.clear();
598
599 // And ask for a new set as requested.
600 VLOGF(2) << "Requesting " << requested_num_pics_
601 << " pictures of size: " << requested_pic_size_.ToString()
602 << " and visible rectangle = " << requested_visible_rect_.ToString();
603
604 const base::Optional<VideoPixelFormat> format =
605 GfxBufferFormatToVideoPixelFormat(
606 vaapi_picture_factory_->GetBufferFormat());
607 CHECK(format);
608 task_runner_->PostTask(
609 FROM_HERE, base::BindOnce(&Client::ProvidePictureBuffersWithVisibleRect,
610 client_, requested_num_pics_, *format, 1,
611 requested_pic_size_, requested_visible_rect_,
612 vaapi_picture_factory_->GetGLTextureTarget()));
613 // |client_| may respond via AssignPictureBuffers().
614 }
615
Decode(BitstreamBuffer bitstream_buffer)616 void VaapiVideoDecodeAccelerator::Decode(BitstreamBuffer bitstream_buffer) {
617 Decode(bitstream_buffer.ToDecoderBuffer(), bitstream_buffer.id());
618 }
619
Decode(scoped_refptr<DecoderBuffer> buffer,int32_t bitstream_id)620 void VaapiVideoDecodeAccelerator::Decode(scoped_refptr<DecoderBuffer> buffer,
621 int32_t bitstream_id) {
622 DCHECK(task_runner_->BelongsToCurrentThread());
623 TRACE_EVENT1("media,gpu", "VAVDA::Decode", "Buffer id", bitstream_id);
624
625 if (bitstream_id < 0) {
626 LOG(ERROR) << "Invalid bitstream_buffer, id: " << bitstream_id;
627 NotifyError(INVALID_ARGUMENT);
628 return;
629 }
630
631 if (!buffer) {
632 if (client_)
633 client_->NotifyEndOfBitstreamBuffer(bitstream_id);
634 return;
635 }
636
637 QueueInputBuffer(std::move(buffer), bitstream_id);
638 }
639
AssignPictureBuffers(const std::vector<PictureBuffer> & buffers)640 void VaapiVideoDecodeAccelerator::AssignPictureBuffers(
641 const std::vector<PictureBuffer>& buffers) {
642 DCHECK(task_runner_->BelongsToCurrentThread());
643 base::AutoLock auto_lock(lock_);
644 DCHECK(pictures_.empty());
645
646 available_picture_buffers_.clear();
647
648 RETURN_AND_NOTIFY_ON_FAILURE(
649 buffers.size() >= requested_num_pics_,
650 "Got an invalid number of picture buffers. (Got " << buffers.size()
651 << ", requested " << requested_num_pics_ << ")", INVALID_ARGUMENT, );
652 // requested_pic_size_ can be adjusted by VDA client. We should update
653 // |requested_pic_size_| by buffers[0].size(). But AMD driver doesn't decode
654 // frames correctly if the surface stride is different from the width of a
655 // coded size.
656 // TODO(b/139460315): Save buffers[0].size() as |adjusted_size_| once the
657 // AMD driver issue is resolved.
658
659 va_surface_format_ = GetVaFormatForVideoCodecProfile(profile_);
660 std::vector<VASurfaceID> va_surface_ids;
661
662 // If we aren't in BufferAllocationMode::kNone, we have to allocate a
663 // |vpp_vaapi_wrapper_| for VaapiPicture to DownloadFromSurface() the VA's
664 // internal decoded frame.
665 if (buffer_allocation_mode_ != BufferAllocationMode::kNone &&
666 !vpp_vaapi_wrapper_) {
667 vpp_vaapi_wrapper_ = VaapiWrapper::Create(
668 VaapiWrapper::kVideoProcess, VAProfileNone,
669 base::BindRepeating(&ReportToUMA, VAAPI_VPP_ERROR));
670 RETURN_AND_NOTIFY_ON_FAILURE(vpp_vaapi_wrapper_,
671 "Failed to initialize VppVaapiWrapper",
672 PLATFORM_FAILURE, );
673
674 // Size is irrelevant for a VPP context.
675 RETURN_AND_NOTIFY_ON_FAILURE(vpp_vaapi_wrapper_->CreateContext(gfx::Size()),
676 "Failed to create Context",
677 PLATFORM_FAILURE, );
678 }
679
680 for (size_t i = 0; i < buffers.size(); ++i) {
681 // If we aren't in BufferAllocationMode::kNone, this |picture| is
682 // only used as a copy destination. Therefore, the VaapiWrapper used and
683 // owned by |picture| is |vpp_vaapi_wrapper_|.
684
685 // TODO(b/139460315): Create with buffers[i] once the AMD driver issue is
686 // resolved.
687 PictureBuffer buffer = buffers[i];
688 buffer.set_size(requested_pic_size_);
689
690 // Note that the |size_to_bind| is not relevant in IMPORT mode.
691 const gfx::Size size_to_bind =
692 (output_mode_ == Config::OutputMode::ALLOCATE)
693 ? GetRectSizeFromOrigin(requested_visible_rect_)
694 : gfx::Size();
695
696 std::unique_ptr<VaapiPicture> picture = vaapi_picture_factory_->Create(
697 (buffer_allocation_mode_ == BufferAllocationMode::kNone)
698 ? vaapi_wrapper_
699 : vpp_vaapi_wrapper_,
700 make_context_current_cb_, bind_image_cb_, buffer, size_to_bind);
701 RETURN_AND_NOTIFY_ON_FAILURE(picture, "Failed creating a VaapiPicture",
702 PLATFORM_FAILURE, );
703
704 if (output_mode_ == Config::OutputMode::ALLOCATE) {
705 RETURN_AND_NOTIFY_ON_FAILURE(
706 picture->Allocate(vaapi_picture_factory_->GetBufferFormat()),
707 "Failed to allocate memory for a VaapiPicture", PLATFORM_FAILURE, );
708 available_picture_buffers_.push_back(buffers[i].id());
709 VASurfaceID va_surface_id = picture->va_surface_id();
710 if (va_surface_id != VA_INVALID_ID)
711 va_surface_ids.push_back(va_surface_id);
712 }
713
714 DCHECK(!base::Contains(pictures_, buffers[i].id()));
715 pictures_[buffers[i].id()] = std::move(picture);
716
717 surfaces_available_.Signal();
718 }
719
720 base::RepeatingCallback<void(VASurfaceID)> va_surface_release_cb;
721
722 // If we aren't in BufferAllocationMode::kNone, we use |va_surface_ids| for
723 // decode, otherwise ask |vaapi_wrapper_| to allocate them for us.
724 if (buffer_allocation_mode_ == BufferAllocationMode::kNone) {
725 DCHECK(!va_surface_ids.empty());
726 RETURN_AND_NOTIFY_ON_FAILURE(
727 vaapi_wrapper_->CreateContext(requested_pic_size_),
728 "Failed creating VA Context", PLATFORM_FAILURE, );
729 DCHECK_EQ(va_surface_ids.size(), buffers.size());
730
731 va_surface_release_cb = base::DoNothing();
732 } else {
733 const size_t requested_num_surfaces =
734 IsBufferAllocationModeReducedOrSuperReduced()
735 ? requested_num_reference_frames_
736 : pictures_.size();
737 CHECK_NE(requested_num_surfaces, 0u);
738 va_surface_ids.clear();
739
740 RETURN_AND_NOTIFY_ON_FAILURE(
741 vaapi_wrapper_->CreateContextAndSurfaces(
742 va_surface_format_, requested_pic_size_,
743 VaapiWrapper::SurfaceUsageHint::kVideoDecoder,
744 requested_num_surfaces, &va_surface_ids),
745 "Failed creating VA Surfaces", PLATFORM_FAILURE, );
746
747 va_surface_release_cb =
748 base::BindRepeating(&VaapiWrapper::DestroySurface, vaapi_wrapper_);
749 }
750
751 for (const VASurfaceID va_surface_id : va_surface_ids) {
752 available_va_surfaces_.emplace_back(std::make_unique<ScopedVASurfaceID>(
753 va_surface_id, va_surface_release_cb));
754 }
755
756 // Resume DecodeTask if it is still in decoding state.
757 if (state_ == kDecoding) {
758 decoder_thread_task_runner_->PostTask(
759 FROM_HERE, base::BindOnce(&VaapiVideoDecodeAccelerator::DecodeTask,
760 base::Unretained(this)));
761 }
762 }
763
764 #if defined(USE_OZONE)
ImportBufferForPicture(int32_t picture_buffer_id,VideoPixelFormat pixel_format,gfx::GpuMemoryBufferHandle gpu_memory_buffer_handle)765 void VaapiVideoDecodeAccelerator::ImportBufferForPicture(
766 int32_t picture_buffer_id,
767 VideoPixelFormat pixel_format,
768 gfx::GpuMemoryBufferHandle gpu_memory_buffer_handle) {
769 VLOGF(2) << "Importing picture id: " << picture_buffer_id;
770 DCHECK(task_runner_->BelongsToCurrentThread());
771
772 if (output_mode_ != Config::OutputMode::IMPORT) {
773 LOG(ERROR) << "Cannot import in non-import mode";
774 NotifyError(INVALID_ARGUMENT);
775 return;
776 }
777
778 {
779 base::AutoLock auto_lock(lock_);
780 if (!pictures_.count(picture_buffer_id)) {
781 // It's possible that we've already posted a DismissPictureBuffer for this
782 // picture, but it has not yet executed when this ImportBufferForPicture
783 // was posted to us by the client. In that case just ignore this (we've
784 // already dismissed it and accounted for that).
785 DVLOGF(3) << "got picture id=" << picture_buffer_id
786 << " not in use (anymore?).";
787 return;
788 }
789
790 auto buffer_format = VideoPixelFormatToGfxBufferFormat(pixel_format);
791 if (!buffer_format) {
792 LOG(ERROR) << "Unsupported format: " << pixel_format;
793 NotifyError(INVALID_ARGUMENT);
794 return;
795 }
796
797 VaapiPicture* picture = pictures_[picture_buffer_id].get();
798 if (!picture->ImportGpuMemoryBufferHandle(
799 *buffer_format, std::move(gpu_memory_buffer_handle))) {
800 // ImportGpuMemoryBufferHandle will close the handles even on failure, so
801 // we don't need to do this ourselves.
802 LOG(ERROR) << "Failed to import GpuMemoryBufferHandle";
803 NotifyError(PLATFORM_FAILURE);
804 return;
805 }
806 }
807
808 ReusePictureBuffer(picture_buffer_id);
809 }
810 #endif
811
ReusePictureBuffer(int32_t picture_buffer_id)812 void VaapiVideoDecodeAccelerator::ReusePictureBuffer(
813 int32_t picture_buffer_id) {
814 DVLOGF(4) << "picture id=" << picture_buffer_id;
815 DCHECK(task_runner_->BelongsToCurrentThread());
816 TRACE_EVENT1("media,gpu", "VAVDA::ReusePictureBuffer", "Picture id",
817 picture_buffer_id);
818
819 {
820 base::AutoLock auto_lock(lock_);
821
822 if (!pictures_.count(picture_buffer_id)) {
823 // It's possible that we've already posted a DismissPictureBuffer for this
824 // picture, but it has not yet executed when this ReusePictureBuffer
825 // was posted to us by the client. In that case just ignore this (we've
826 // already dismissed it and accounted for that).
827 DVLOGF(3) << "got picture id=" << picture_buffer_id
828 << " not in use (anymore?).";
829 return;
830 }
831
832 available_picture_buffers_.push_back(picture_buffer_id);
833 TRACE_COUNTER_ID2("media,gpu", "Vaapi frames at client", this, "used",
834 pictures_.size() - available_picture_buffers_.size(),
835 "available", available_picture_buffers_.size());
836 }
837
838 TryOutputPicture();
839 }
840
FlushTask()841 void VaapiVideoDecodeAccelerator::FlushTask() {
842 VLOGF(2);
843 DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread());
844 DCHECK(curr_input_buffer_ && curr_input_buffer_->IsFlushRequest());
845
846 curr_input_buffer_.reset();
847
848 // First flush all the pictures that haven't been outputted, notifying the
849 // client to output them.
850 bool res = decoder_->Flush();
851 RETURN_AND_NOTIFY_ON_FAILURE(res, "Failed flushing the decoder.",
852 PLATFORM_FAILURE, );
853
854 // Put the decoder in idle state, ready to resume.
855 decoder_->Reset();
856
857 task_runner_->PostTask(
858 FROM_HERE,
859 base::BindOnce(&VaapiVideoDecodeAccelerator::FinishFlush, weak_this_));
860 }
861
Flush()862 void VaapiVideoDecodeAccelerator::Flush() {
863 VLOGF(2) << "Got flush request";
864 DCHECK(task_runner_->BelongsToCurrentThread());
865
866 QueueInputBuffer(DecoderBuffer::CreateEOSBuffer(), -1);
867 }
868
FinishFlush()869 void VaapiVideoDecodeAccelerator::FinishFlush() {
870 VLOGF(2);
871 DCHECK(task_runner_->BelongsToCurrentThread());
872
873 finish_flush_pending_ = false;
874
875 base::AutoLock auto_lock(lock_);
876 if (state_ != kDecoding) {
877 DCHECK(state_ == kDestroying || state_ == kResetting) << state_;
878 return;
879 }
880
881 // Still waiting for textures from client to finish outputting all pending
882 // frames. Try again later.
883 if (!pending_output_cbs_.empty()) {
884 finish_flush_pending_ = true;
885 return;
886 }
887
888 // Resume decoding if necessary.
889 if (input_buffers_.empty()) {
890 state_ = kIdle;
891 } else {
892 decoder_thread_task_runner_->PostTask(
893 FROM_HERE, base::BindOnce(&VaapiVideoDecodeAccelerator::DecodeTask,
894 base::Unretained(this)));
895 }
896
897 task_runner_->PostTask(FROM_HERE,
898 base::BindOnce(&Client::NotifyFlushDone, client_));
899 }
900
ResetTask()901 void VaapiVideoDecodeAccelerator::ResetTask() {
902 VLOGF(2);
903 DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread());
904
905 // All the decoding tasks from before the reset request from client are done
906 // by now, as this task was scheduled after them and client is expected not
907 // to call Decode() after Reset() and before NotifyResetDone.
908 decoder_->Reset();
909
910 base::AutoLock auto_lock(lock_);
911
912 // Return current input buffer, if present.
913 if (curr_input_buffer_)
914 ReturnCurrInputBuffer_Locked();
915
916 // And let client know that we are done with reset.
917 task_runner_->PostTask(
918 FROM_HERE,
919 base::BindOnce(&VaapiVideoDecodeAccelerator::FinishReset, weak_this_));
920 }
921
Reset()922 void VaapiVideoDecodeAccelerator::Reset() {
923 VLOGF(2) << "Got reset request";
924 DCHECK(task_runner_->BelongsToCurrentThread());
925
926 // This will make any new decode tasks exit early.
927 base::AutoLock auto_lock(lock_);
928 state_ = kResetting;
929 finish_flush_pending_ = false;
930
931 // Drop all remaining input buffers, if present.
932 while (!input_buffers_.empty())
933 input_buffers_.pop();
934 TRACE_COUNTER1("media,gpu", "Vaapi input buffers", input_buffers_.size());
935
936 decoder_thread_task_runner_->PostTask(
937 FROM_HERE, base::BindOnce(&VaapiVideoDecodeAccelerator::ResetTask,
938 base::Unretained(this)));
939
940 input_ready_.Signal();
941 surfaces_available_.Signal();
942 }
943
FinishReset()944 void VaapiVideoDecodeAccelerator::FinishReset() {
945 VLOGF(2);
946 DCHECK(task_runner_->BelongsToCurrentThread());
947 base::AutoLock auto_lock(lock_);
948
949 if (state_ != kResetting) {
950 DCHECK(state_ == kDestroying || state_ == kUninitialized) << state_;
951 return; // We could've gotten destroyed already.
952 }
953
954 // Drop pending outputs.
955 while (!pending_output_cbs_.empty())
956 pending_output_cbs_.pop();
957
958 if (awaiting_va_surfaces_recycle_) {
959 // Decoder requested a new surface set while we were waiting for it to
960 // finish the last DecodeTask, running at the time of Reset().
961 // Let the surface set change finish first before resetting.
962 task_runner_->PostTask(
963 FROM_HERE,
964 base::BindOnce(&VaapiVideoDecodeAccelerator::FinishReset, weak_this_));
965 return;
966 }
967
968 state_ = kIdle;
969
970 task_runner_->PostTask(FROM_HERE,
971 base::BindOnce(&Client::NotifyResetDone, client_));
972
973 // The client might have given us new buffers via Decode() while we were
974 // resetting and might be waiting for our move, and not call Decode() anymore
975 // until we return something. Post a DecodeTask() so that we won't
976 // sleep forever waiting for Decode() in that case. Having two of them
977 // in the pipe is harmless, the additional one will return as soon as it sees
978 // that we are back in kDecoding state.
979 if (!input_buffers_.empty()) {
980 state_ = kDecoding;
981 decoder_thread_task_runner_->PostTask(
982 FROM_HERE, base::BindOnce(&VaapiVideoDecodeAccelerator::DecodeTask,
983 base::Unretained(this)));
984 }
985 }
986
Cleanup()987 void VaapiVideoDecodeAccelerator::Cleanup() {
988 DCHECK(task_runner_->BelongsToCurrentThread());
989
990 base::AutoLock auto_lock(lock_);
991 if (state_ == kUninitialized || state_ == kDestroying)
992 return;
993
994 VLOGF(2) << "Destroying VAVDA";
995 state_ = kDestroying;
996
997 // Call DismissPictureBuffer() to notify |client_| that the picture buffers
998 // are no longer used and thus |client_| shall release them. If |client_| has
999 // been invalidated in NotifyError(),|client_| will be destroyed shortly. The
1000 // destruction should release all the PictureBuffers.
1001 if (client_) {
1002 for (const auto& id_and_picture : pictures_)
1003 client_->DismissPictureBuffer(id_and_picture.first);
1004 }
1005 pictures_.clear();
1006
1007 client_ptr_factory_.reset();
1008 weak_this_factory_.InvalidateWeakPtrs();
1009
1010 // TODO(mcasas): consider deleting |decoder_| on
1011 // |decoder_thread_task_runner_|, https://crbug.com/789160.
1012
1013 // Signal all potential waiters on the decoder_thread_, let them early-exit,
1014 // as we've just moved to the kDestroying state, and wait for all tasks
1015 // to finish.
1016 input_ready_.Signal();
1017 surfaces_available_.Signal();
1018 {
1019 base::AutoUnlock auto_unlock(lock_);
1020 decoder_thread_.Stop();
1021 }
1022 if (buffer_allocation_mode_ != BufferAllocationMode::kNone)
1023 available_va_surfaces_.clear();
1024
1025 vaapi_wrapper_->DestroyContext();
1026
1027 if (vpp_vaapi_wrapper_)
1028 vpp_vaapi_wrapper_->DestroyContext();
1029 state_ = kUninitialized;
1030 }
1031
Destroy()1032 void VaapiVideoDecodeAccelerator::Destroy() {
1033 DCHECK(task_runner_->BelongsToCurrentThread());
1034 Cleanup();
1035 delete this;
1036 }
1037
TryToSetupDecodeOnSeparateThread(const base::WeakPtr<Client> & decode_client,const scoped_refptr<base::SingleThreadTaskRunner> & decode_task_runner)1038 bool VaapiVideoDecodeAccelerator::TryToSetupDecodeOnSeparateThread(
1039 const base::WeakPtr<Client>& decode_client,
1040 const scoped_refptr<base::SingleThreadTaskRunner>& decode_task_runner) {
1041 return false;
1042 }
1043
SurfaceReady(scoped_refptr<VASurface> dec_surface,int32_t bitstream_id,const gfx::Rect & visible_rect,const VideoColorSpace & color_space)1044 void VaapiVideoDecodeAccelerator::SurfaceReady(
1045 scoped_refptr<VASurface> dec_surface,
1046 int32_t bitstream_id,
1047 const gfx::Rect& visible_rect,
1048 const VideoColorSpace& color_space) {
1049 if (!task_runner_->BelongsToCurrentThread()) {
1050 task_runner_->PostTask(
1051 FROM_HERE, base::BindOnce(&VaapiVideoDecodeAccelerator::SurfaceReady,
1052 weak_this_, std::move(dec_surface),
1053 bitstream_id, visible_rect, color_space));
1054 return;
1055 }
1056
1057 DCHECK(!awaiting_va_surfaces_recycle_);
1058
1059 {
1060 base::AutoLock auto_lock(lock_);
1061 // Drop any requests to output if we are resetting or being destroyed.
1062 if (state_ == kResetting || state_ == kDestroying)
1063 return;
1064 }
1065 pending_output_cbs_.push(base::BindOnce(
1066 &VaapiVideoDecodeAccelerator::OutputPicture, weak_this_,
1067 std::move(dec_surface), bitstream_id, visible_rect, color_space));
1068
1069 TryOutputPicture();
1070 }
1071
CreateSurface()1072 scoped_refptr<VASurface> VaapiVideoDecodeAccelerator::CreateSurface() {
1073 DCHECK(decoder_thread_task_runner_->BelongsToCurrentThread());
1074 base::AutoLock auto_lock(lock_);
1075
1076 if (available_va_surfaces_.empty())
1077 return nullptr;
1078
1079 DCHECK_NE(VA_INVALID_ID, va_surface_format_);
1080 DCHECK(!awaiting_va_surfaces_recycle_);
1081 if (buffer_allocation_mode_ != BufferAllocationMode::kNone) {
1082 auto va_surface_id = std::move(available_va_surfaces_.front());
1083 const VASurfaceID id = va_surface_id->id();
1084 available_va_surfaces_.pop_front();
1085
1086 TRACE_COUNTER_ID2("media,gpu", "Vaapi VASurfaceIDs", this, "used",
1087 (IsBufferAllocationModeReducedOrSuperReduced()
1088 ? requested_num_reference_frames_
1089 : pictures_.size()) -
1090 available_va_surfaces_.size(),
1091 "available", available_va_surfaces_.size());
1092
1093 return new VASurface(
1094 id, requested_pic_size_, va_surface_format_,
1095 base::BindOnce(va_surface_recycle_cb_, std::move(va_surface_id)));
1096 }
1097
1098 // Find the first |available_va_surfaces_| id such that the associated
1099 // |pictures_| entry is marked as |available_picture_buffers_|. In practice,
1100 // we will quickly find an available |va_surface_id|.
1101 for (auto it = available_va_surfaces_.begin();
1102 it != available_va_surfaces_.end(); ++it) {
1103 const VASurfaceID va_surface_id = (*it)->id();
1104 for (const auto& id_and_picture : pictures_) {
1105 if (id_and_picture.second->va_surface_id() == va_surface_id &&
1106 base::Contains(available_picture_buffers_, id_and_picture.first)) {
1107 // Remove |va_surface_id| from the list of availables, and use the id
1108 // to return a new VASurface.
1109 auto va_surface = std::move(*it);
1110 available_va_surfaces_.erase(it);
1111 return new VASurface(
1112 va_surface_id, requested_pic_size_, va_surface_format_,
1113 base::BindOnce(va_surface_recycle_cb_, std::move(va_surface)));
1114 }
1115 }
1116 }
1117 return nullptr;
1118 }
1119
RecycleVASurface(std::unique_ptr<ScopedVASurfaceID> va_surface,VASurfaceID)1120 void VaapiVideoDecodeAccelerator::RecycleVASurface(
1121 std::unique_ptr<ScopedVASurfaceID> va_surface,
1122 // We don't use |va_surface_id| but it must be here because this method is
1123 // bound as VASurface::ReleaseCB.
1124 VASurfaceID /*va_surface_id*/) {
1125 DCHECK(task_runner_->BelongsToCurrentThread());
1126
1127 {
1128 base::AutoLock auto_lock(lock_);
1129 available_va_surfaces_.push_back(std::move(va_surface));
1130
1131 if (buffer_allocation_mode_ != BufferAllocationMode::kNone) {
1132 TRACE_COUNTER_ID2("media,gpu", "Vaapi VASurfaceIDs", this, "used",
1133 (IsBufferAllocationModeReducedOrSuperReduced()
1134 ? requested_num_reference_frames_
1135 : pictures_.size()) -
1136 available_va_surfaces_.size(),
1137 "available", available_va_surfaces_.size());
1138 }
1139 surfaces_available_.Signal();
1140 }
1141
1142 TryOutputPicture();
1143 }
1144
1145 // static
1146 VideoDecodeAccelerator::SupportedProfiles
GetSupportedProfiles()1147 VaapiVideoDecodeAccelerator::GetSupportedProfiles() {
1148 return VaapiWrapper::GetSupportedDecodeProfiles();
1149 }
1150
1151 VaapiVideoDecodeAccelerator::BufferAllocationMode
DecideBufferAllocationMode()1152 VaapiVideoDecodeAccelerator::DecideBufferAllocationMode() {
1153 // TODO(crbug.com/912295): Enable a better BufferAllocationMode for IMPORT
1154 // |output_mode_| as well.
1155 if (output_mode_ == VideoDecodeAccelerator::Config::OutputMode::IMPORT)
1156 return BufferAllocationMode::kNormal;
1157
1158 // On Gemini Lake, Kaby Lake and later we can pass to libva the client's
1159 // PictureBuffers to decode onto, which skips the use of the Vpp unit and its
1160 // associated format reconciliation copy, avoiding all internal buffer
1161 // allocations. This only works for VP8 and VP9: H264 GetNumReferenceFrames()
1162 // depends on the bitstream and sometimes it's not enough to cover the amount
1163 // of frames needed by the client pipeline (see b/133733739).
1164 // TODO(crbug.com/911754): Enable for VP9 Profile 2.
1165 if (IsGeminiLakeOrLater() &&
1166 (profile_ == VP9PROFILE_PROFILE0 || profile_ == VP8PROFILE_ANY)) {
1167 // Add one to the reference frames for the one being currently egressed, and
1168 // an extra allocation for both |client_| and |decoder_|, see
1169 // crrev.com/c/1576560.
1170 if (profile_ == VP8PROFILE_ANY)
1171 num_extra_pics_ = 3;
1172 return BufferAllocationMode::kNone;
1173 }
1174
1175 // If we're here, we have to use the Vpp unit and allocate buffers for
1176 // |decoder_|; usually we'd have to allocate the |decoder_|s
1177 // GetRequiredNumOfPictures() internally, we can allocate just |decoder_|s
1178 // GetNumReferenceFrames() + 1. Moreover, we also request the |client_| to
1179 // allocate less than the usual |decoder_|s GetRequiredNumOfPictures().
1180
1181 // Another +1 is experimentally needed for high-to-high resolution changes.
1182 // TODO(mcasas): Figure out why and why only H264, see crbug.com/912295 and
1183 // http://crrev.com/c/1363807/9/media/gpu/h264_decoder.cc#1449.
1184 if (profile_ >= H264PROFILE_MIN && profile_ <= H264PROFILE_MAX)
1185 return BufferAllocationMode::kReduced;
1186
1187 return BufferAllocationMode::kSuperReduced;
1188 }
1189
IsBufferAllocationModeReducedOrSuperReduced() const1190 bool VaapiVideoDecodeAccelerator::IsBufferAllocationModeReducedOrSuperReduced()
1191 const {
1192 return buffer_allocation_mode_ == BufferAllocationMode::kSuperReduced ||
1193 buffer_allocation_mode_ == BufferAllocationMode::kReduced;
1194 }
1195
OnMemoryDump(const base::trace_event::MemoryDumpArgs & args,base::trace_event::ProcessMemoryDump * pmd)1196 bool VaapiVideoDecodeAccelerator::OnMemoryDump(
1197 const base::trace_event::MemoryDumpArgs& args,
1198 base::trace_event::ProcessMemoryDump* pmd) {
1199 using base::trace_event::MemoryAllocatorDump;
1200 base::AutoLock auto_lock(lock_);
1201 if (buffer_allocation_mode_ == BufferAllocationMode::kNone ||
1202 !requested_num_reference_frames_) {
1203 return false;
1204 }
1205
1206 auto dump_name = base::StringPrintf("gpu/vaapi/decoder/0x%" PRIxPTR,
1207 reinterpret_cast<uintptr_t>(this));
1208 MemoryAllocatorDump* dump = pmd->CreateAllocatorDump(dump_name);
1209
1210 constexpr float kNumBytesPerPixelYUV420 = 12.0 / 8;
1211 constexpr float kNumBytesPerPixelYUV420_10bpp = 2 * kNumBytesPerPixelYUV420;
1212 DCHECK(va_surface_format_ == VA_RT_FORMAT_YUV420 ||
1213 va_surface_format_ == VA_RT_FORMAT_YUV420_10BPP);
1214 const float va_surface_bytes_per_pixel =
1215 va_surface_format_ == VA_RT_FORMAT_YUV420 ? kNumBytesPerPixelYUV420
1216 : kNumBytesPerPixelYUV420_10bpp;
1217 // Report |requested_num_surfaces| and the associated memory size. The
1218 // calculated size is an estimation since we don't know the internal VA
1219 // strides, texture compression, headers, etc, but is a good lower boundary.
1220 const size_t requested_num_surfaces =
1221 IsBufferAllocationModeReducedOrSuperReduced()
1222 ? requested_num_reference_frames_
1223 : pictures_.size();
1224 dump->AddScalar(MemoryAllocatorDump::kNameSize,
1225 MemoryAllocatorDump::kUnitsBytes,
1226 static_cast<uint64_t>(requested_num_surfaces *
1227 requested_pic_size_.GetArea() *
1228 va_surface_bytes_per_pixel));
1229 dump->AddScalar(MemoryAllocatorDump::kNameObjectCount,
1230 MemoryAllocatorDump::kUnitsObjects,
1231 static_cast<uint64_t>(requested_num_surfaces));
1232
1233 return true;
1234 }
1235
1236 } // namespace media
1237