1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 //
5 // Copyright (C) 2020-2021 Intel Corporation
6 
7 #include "opencv2/videoio.hpp"
8 #ifdef HAVE_OPENCL
9 #include "opencv2/core/ocl.hpp"
10 #endif
11 #if defined(__OPENCV_BUILD) && !defined(BUILD_PLUGIN)  // TODO Properly detect and add D3D11 / LIBVA dependencies for standalone plugins
12 #include "cvconfig.h"
13 #endif
14 #include <sstream>
15 
16 #ifdef HAVE_D3D11
17 #define D3D11_NO_HELPERS
18 #include <d3d11.h>
19 #include <codecvt>
20 #include "opencv2/core/directx.hpp"
21 #ifdef HAVE_OPENCL
22 #include <CL/cl_d3d11.h>
23 #endif
24 #endif // HAVE_D3D11
25 
26 #ifdef HAVE_VA
27 #include <va/va_backend.h>
28 #ifdef HAVE_VA_INTEL
29 #include "opencv2/core/va_intel.hpp"
30 #ifndef CL_TARGET_OPENCL_VERSION
31 #define CL_TARGET_OPENCL_VERSION 120
32 #endif
33 #ifdef HAVE_VA_INTEL_OLD_HEADER
34 #include <CL/va_ext.h>
35 #else
36 #include <CL/cl_va_api_media_sharing_intel.h>
37 #endif
38 #endif
39 #endif // HAVE_VA
40 
41 // FFMPEG "C" headers
42 extern "C" {
43 #include <libavcodec/avcodec.h>
44 #include <libavutil/avutil.h>
45 #include <libavutil/hwcontext.h>
46 #ifdef HAVE_D3D11
47 #include <libavutil/hwcontext_d3d11va.h>
48 #endif
49 #ifdef HAVE_VA
50 #include <libavutil/hwcontext_vaapi.h>
51 #endif
52 #ifdef HAVE_MFX // dependency only on MFX header files, no linkage dependency
53 #include <libavutil/hwcontext_qsv.h>
54 #endif
55 }
56 
57 #define HW_DEFAULT_POOL_SIZE    32
58 #define HW_DEFAULT_SW_FORMAT    AV_PIX_FMT_NV12
59 
60 using namespace cv;
61 
62 static AVCodec *hw_find_codec(AVCodecID id, AVHWDeviceType hw_type, int (*check_category)(const AVCodec *),
63                               const char *disabled_codecs, AVPixelFormat *hw_pix_fmt);
64 static AVBufferRef* hw_create_device(AVHWDeviceType hw_type, int hw_device, const std::string& device_subname, bool use_opencl);
65 static AVBufferRef* hw_create_frames(struct AVCodecContext* ctx, AVBufferRef *hw_device_ctx, int width, int height, AVPixelFormat hw_format);
66 static AVPixelFormat hw_get_format_callback(struct AVCodecContext *ctx, const enum AVPixelFormat * fmt);
67 static VideoAccelerationType hw_type_to_va_type(AVHWDeviceType hw_type);
68 
69 static
getVideoAccelerationName(VideoAccelerationType va_type)70 const char* getVideoAccelerationName(VideoAccelerationType va_type)
71 {
72     switch (va_type)
73     {
74     case VIDEO_ACCELERATION_NONE: return "none";
75     case VIDEO_ACCELERATION_ANY: return "any";
76     case VIDEO_ACCELERATION_D3D11: return "d3d11";
77     case VIDEO_ACCELERATION_VAAPI: return "vaapi";
78     case VIDEO_ACCELERATION_MFX: return "mfx";
79     }
80     return "unknown";
81 }
82 
83 static
getDecoderConfiguration(VideoAccelerationType va_type,AVDictionary * dict)84 std::string getDecoderConfiguration(VideoAccelerationType va_type, AVDictionary *dict)
85 {
86     std::string va_name = getVideoAccelerationName(va_type);
87     std::string key_name = std::string("hw_decoders_") + va_name;
88     const char *hw_acceleration = NULL;
89     if (dict)
90     {
91         AVDictionaryEntry* entry = av_dict_get(dict, key_name.c_str(), NULL, 0);
92         if (entry)
93             hw_acceleration = entry->value;
94     }
95     if (hw_acceleration)
96         return hw_acceleration;
97 
98     // some default values (FFMPEG_DECODE_ACCELERATION_TYPES)
99 #ifdef _WIN32
100     switch (va_type)
101     {
102     case VIDEO_ACCELERATION_NONE: return "";
103     case VIDEO_ACCELERATION_ANY: return "d3d11va";
104     case VIDEO_ACCELERATION_D3D11: return "d3d11va";
105     case VIDEO_ACCELERATION_VAAPI: return "";
106     case VIDEO_ACCELERATION_MFX: return ""; // "qsv" fails if non-Intel D3D11 device
107     }
108     return "";
109 #else
110     switch (va_type)
111     {
112     case VIDEO_ACCELERATION_NONE: return "";
113     case VIDEO_ACCELERATION_ANY: return "vaapi.iHD";
114     case VIDEO_ACCELERATION_D3D11: return "";
115     case VIDEO_ACCELERATION_VAAPI: return "vaapi.iHD";
116     case VIDEO_ACCELERATION_MFX: return "qsv.iHD";
117     }
118     return "";
119 #endif
120 }
121 
122 static
getEncoderConfiguration(VideoAccelerationType va_type,AVDictionary * dict)123 std::string getEncoderConfiguration(VideoAccelerationType va_type, AVDictionary *dict)
124 {
125     std::string va_name = getVideoAccelerationName(va_type);
126     std::string key_name = std::string("hw_encoders_") + va_name;
127     const char *hw_acceleration = NULL;
128     if (dict)
129     {
130         AVDictionaryEntry* entry = av_dict_get(dict, key_name.c_str(), NULL, 0);
131         if (entry)
132             hw_acceleration = entry->value;
133     }
134     if (hw_acceleration)
135         return hw_acceleration;
136 
137     // some default values (FFMPEG_ENCODE_ACCELERATION_TYPES)
138 #ifdef _WIN32
139     switch (va_type)
140     {
141     case VIDEO_ACCELERATION_NONE: return "";
142     case VIDEO_ACCELERATION_ANY: return "qsv";
143     case VIDEO_ACCELERATION_D3D11: return "";
144     case VIDEO_ACCELERATION_VAAPI: return "";
145     case VIDEO_ACCELERATION_MFX: return "qsv";
146     }
147     return "";
148 #else
149     switch (va_type)
150     {
151     case VIDEO_ACCELERATION_NONE: return "";
152     case VIDEO_ACCELERATION_ANY: return "qsv.iHD,vaapi.iHD";
153     case VIDEO_ACCELERATION_D3D11: return "";
154     case VIDEO_ACCELERATION_VAAPI: return "vaapi.iHD";
155     case VIDEO_ACCELERATION_MFX: return "qsv.iHD";
156     }
157     return "unknown";
158 #endif
159 }
160 
161 static
getDecoderDisabledCodecs(AVDictionary * dict)162 std::string getDecoderDisabledCodecs(AVDictionary *dict)
163 {
164     std::string key_name = std::string("hw_disable_decoders");
165     const char *disabled_codecs = NULL;
166     if (dict)
167     {
168         AVDictionaryEntry* entry = av_dict_get(dict, key_name.c_str(), NULL, 0);
169         if (entry)
170             disabled_codecs = entry->value;
171     }
172     if (disabled_codecs)
173         return disabled_codecs;
174 
175     // some default values (FFMPEG_DECODE_DISABLE_CODECS)
176 #ifdef _WIN32
177     return "none";
178 #else
179     return "av1.vaapi,av1_qsv,vp8.vaapi,vp8_qsv";  // "vp9_qsv"
180 #endif
181 }
182 
183 static
getEncoderDisabledCodecs(AVDictionary * dict)184 std::string getEncoderDisabledCodecs(AVDictionary *dict)
185 {
186     std::string key_name = std::string("hw_disabled_encoders");
187     const char *disabled_codecs = NULL;
188     if (dict)
189     {
190         AVDictionaryEntry* entry = av_dict_get(dict, key_name.c_str(), NULL, 0);
191         if (entry)
192             disabled_codecs = entry->value;
193     }
194     if (disabled_codecs)
195         return disabled_codecs;
196 
197     // some default values (FFMPEG_ENCODE_DISABLE_CODECS)
198 #ifdef _WIN32
199     return "mjpeg_qsv";
200 #else
201     return "mjpeg_vaapi,mjpeg_qsv,vp8_vaapi";
202 #endif
203 }
204 
205 static
hw_check_device(AVBufferRef * ctx,AVHWDeviceType hw_type,const std::string & device_subname)206 bool hw_check_device(AVBufferRef* ctx, AVHWDeviceType hw_type, const std::string& device_subname) {
207     if (!ctx)
208         return false;
209     AVHWDeviceContext* hw_device_ctx = (AVHWDeviceContext*)ctx->data;
210     if (!hw_device_ctx->hwctx)
211         return false;
212     const char *hw_name = av_hwdevice_get_type_name(hw_type);
213     if (hw_type == AV_HWDEVICE_TYPE_QSV)
214         hw_name = "MFX";
215     bool ret = true;
216     std::string device_name;
217 #if defined(HAVE_D3D11)
218     if (hw_device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) {
219         ID3D11Device* device = ((AVD3D11VADeviceContext*)hw_device_ctx->hwctx)->device;
220         IDXGIDevice* dxgiDevice = nullptr;
221         if (device && SUCCEEDED(device->QueryInterface(__uuidof(IDXGIDevice), reinterpret_cast<void**>(&dxgiDevice)))) {
222             IDXGIAdapter* adapter = nullptr;
223             if (SUCCEEDED(dxgiDevice->GetAdapter(&adapter))) {
224                 DXGI_ADAPTER_DESC desc;
225                 if (SUCCEEDED(adapter->GetDesc(&desc))) {
226                     std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> conv;
227                     device_name = conv.to_bytes(desc.Description);
228                 }
229                 adapter->Release();
230             }
231             dxgiDevice->Release();
232         }
233     }
234 #endif
235     if (hw_device_ctx->type == AV_HWDEVICE_TYPE_VAAPI) {
236 #if defined(HAVE_VA) && (VA_MAJOR_VERSION >= 1)
237         VADisplay display = ((AVVAAPIDeviceContext *) hw_device_ctx->hwctx)->display;
238         if (display) {
239             VADriverContext *va_ctx = ((VADisplayContext *) display)->pDriverContext;
240             device_name = va_ctx->str_vendor;
241             if (hw_type == AV_HWDEVICE_TYPE_QSV) {
242                 // Workaround for issue fixed in MediaSDK 21.x https://github.com/Intel-Media-SDK/MediaSDK/issues/2595
243                 // Checks VAAPI driver for support of VideoProc operation required by MediaSDK
244                 ret = false;
245                 int n_entrypoints = va_ctx->max_entrypoints;
246                 std::vector<VAEntrypoint> entrypoints(n_entrypoints);
247                 if (va_ctx->vtable->vaQueryConfigEntrypoints(va_ctx, VAProfileNone, entrypoints.data(), &n_entrypoints) == VA_STATUS_SUCCESS) {
248                     for (int i = 0; i < n_entrypoints; i++) {
249                         if (entrypoints[i] == VAEntrypointVideoProc) {
250                             ret = true;
251                             break;
252                         }
253                     }
254                 }
255                 if (!ret)
256                     CV_LOG_INFO(NULL, "FFMPEG: Skipping MFX video acceleration as entrypoint VideoProc not found in: " << device_name);
257             }
258         }
259 #else
260         ret = (hw_type != AV_HWDEVICE_TYPE_QSV); // disable MFX if we can't check VAAPI for VideoProc entrypoint
261 #endif
262     }
263     if (ret && !device_subname.empty() && device_name.find(device_subname) == std::string::npos)
264     {
265         CV_LOG_INFO(NULL, "FFMPEG: Skipping '" << hw_name <<
266             "' video acceleration on the following device name as not matching substring '" << device_subname << "': " << device_name);
267         ret = false;  // reject configuration
268     }
269     if (ret)
270     {
271         if (!device_name.empty()) {
272             CV_LOG_INFO(NULL, "FFMPEG: Using " << hw_name << " video acceleration on device: " << device_name);
273         } else {
274             CV_LOG_INFO(NULL, "FFMPEG: Using " << hw_name << " video acceleration");
275         }
276     }
277     return ret;
278 }
279 
280 static
hw_create_derived_context(AVHWDeviceType hw_type,AVBufferRef * hw_device_ctx)281 AVBufferRef* hw_create_derived_context(AVHWDeviceType hw_type, AVBufferRef* hw_device_ctx) {
282     AVBufferRef* derived_ctx = NULL;
283     const char* hw_name = av_hwdevice_get_type_name(hw_type);
284     int err = av_hwdevice_ctx_create_derived(&derived_ctx, hw_type, hw_device_ctx, 0);
285     if (!derived_ctx || err < 0)
286     {
287         if (derived_ctx)
288             av_buffer_unref(&derived_ctx);
289         CV_LOG_INFO(NULL, "FFMPEG: Failed to create derived video acceleration (av_hwdevice_ctx_create_derived) for " << hw_name << ". Error=" << err);
290         return NULL;
291     }
292     else
293     {
294         // Store child context in 'user_opaque' field of parent context.
295         struct FreeChildContext {
296             static void free(struct AVHWDeviceContext* ctx) {
297                 AVBufferRef* child_ctx = (AVBufferRef*)ctx->user_opaque;
298                 if (child_ctx)
299                     av_buffer_unref(&child_ctx);
300             }
301         };
302         AVHWDeviceContext* ctx = (AVHWDeviceContext*)derived_ctx->data;
303         ctx->user_opaque = av_buffer_ref(hw_device_ctx);
304         ctx->free = FreeChildContext::free;
305         CV_LOG_INFO(NULL, "FFMPEG: Created derived video acceleration context (av_hwdevice_ctx_create_derived) for " << hw_name);
306         return derived_ctx;
307     }
308 }
309 
310 #ifdef HAVE_OPENCL // GPU buffer interop with cv::UMat
311 
312 // FFmpeg context attached to OpenCL context
313 class OpenCL_FFMPEG_Context : public ocl::Context::UserContext {
314 public:
OpenCL_FFMPEG_Context(AVBufferRef * ctx)315     OpenCL_FFMPEG_Context(AVBufferRef* ctx) {
316         ctx_ = av_buffer_ref(ctx);
317     }
~OpenCL_FFMPEG_Context()318     virtual ~OpenCL_FFMPEG_Context() {
319         av_buffer_unref(&ctx_);
320     }
GetAVHWDevice()321     AVBufferRef* GetAVHWDevice() {
322         return ctx_;
323     }
324 private:
325     AVBufferRef* ctx_;
326 };
327 
328 #ifdef HAVE_MFX
329 static
hw_find_qsv_surface_index(AVFrame * hw_frame)330 int hw_find_qsv_surface_index(AVFrame* hw_frame)
331 {
332     if (AV_PIX_FMT_QSV != hw_frame->format)
333         return -1;
334     mfxFrameSurface1* surface = (mfxFrameSurface1*)hw_frame->data[3]; // As defined by AV_PIX_FMT_QSV
335     AVHWFramesContext* frames_ctx = (AVHWFramesContext*)hw_frame->hw_frames_ctx->data;
336     AVQSVFramesContext* qsv_ctx = (AVQSVFramesContext*)frames_ctx->hwctx;
337     for (int i = 0; i < qsv_ctx->nb_surfaces; i++) {
338         if (surface == qsv_ctx->surfaces + i) {
339             return i;
340         }
341     }
342     return -1;
343 }
344 #endif
345 
346 #ifdef HAVE_VA
347 static
hw_get_va_display(AVHWDeviceContext * hw_device_ctx)348 VADisplay hw_get_va_display(AVHWDeviceContext* hw_device_ctx)
349 {
350     if (hw_device_ctx->type == AV_HWDEVICE_TYPE_QSV) { // we stored pointer to child context in 'user_opaque' field
351         AVBufferRef* ctx = (AVBufferRef*)hw_device_ctx->user_opaque;
352         hw_device_ctx = (AVHWDeviceContext*)ctx->data;
353     }
354     if (hw_device_ctx && hw_device_ctx->type == AV_HWDEVICE_TYPE_VAAPI) {
355         return ((AVVAAPIDeviceContext*)hw_device_ctx->hwctx)->display;
356     }
357     return NULL;
358 }
359 #endif // HAVE_VA
360 
361 #ifdef HAVE_VA_INTEL
362 static
hw_get_va_surface(AVFrame * hw_frame)363 VASurfaceID hw_get_va_surface(AVFrame* hw_frame) {
364     if (AV_PIX_FMT_VAAPI == hw_frame->format) {
365         return (VASurfaceID)(size_t)hw_frame->data[3]; // As defined by AV_PIX_FMT_VAAPI
366     }
367 #ifdef HAVE_MFX
368     else if (AV_PIX_FMT_QSV == hw_frame->format) {
369         int frame_idx = hw_find_qsv_surface_index(hw_frame);
370         if (frame_idx >= 0) { // frame index is same in parent (QSV) and child (VAAPI) frame context
371             AVHWFramesContext *frames_ctx = (AVHWFramesContext *) hw_frame->hw_frames_ctx->data;
372             AVHWFramesContext *child_ctx = (AVHWFramesContext *) frames_ctx->user_opaque;
373             if (child_ctx && AV_HWDEVICE_TYPE_VAAPI == child_ctx->device_ctx->type) {
374                 AVVAAPIFramesContext *vaapi_ctx = (AVVAAPIFramesContext *) child_ctx->hwctx;
375                 CV_Assert(frame_idx < vaapi_ctx->nb_surfaces);
376                 return vaapi_ctx->surface_ids[frame_idx];
377             }
378         }
379     }
380 #endif // HAVE_MFX
381     return VA_INVALID_SURFACE;
382 }
383 #endif // HAVE_VA_INTEL
384 
385 #ifdef HAVE_D3D11
386 static
hw_get_d3d11_device_ctx(AVHWDeviceContext * hw_device_ctx)387 AVD3D11VADeviceContext* hw_get_d3d11_device_ctx(AVHWDeviceContext* hw_device_ctx) {
388     if (AV_HWDEVICE_TYPE_QSV == hw_device_ctx->type) { // we stored pointer to child context in 'user_opaque' field
389         AVBufferRef* ctx = (AVBufferRef*)hw_device_ctx->user_opaque;
390         hw_device_ctx = (AVHWDeviceContext*)ctx->data;
391     }
392     if (AV_HWDEVICE_TYPE_D3D11VA == hw_device_ctx->type) {
393         return (AVD3D11VADeviceContext*)hw_device_ctx->hwctx;
394     }
395     return NULL;
396 }
397 
hw_get_d3d11_texture(AVFrame * hw_frame,int * subresource)398 ID3D11Texture2D* hw_get_d3d11_texture(AVFrame* hw_frame, int* subresource) {
399     ID3D11Texture2D* texture = NULL;
400     if (AV_PIX_FMT_D3D11 == hw_frame->format) {
401         texture = (ID3D11Texture2D*)hw_frame->data[0]; // As defined by AV_PIX_FMT_D3D11
402         *subresource = (intptr_t)hw_frame->data[1]; // As defined by AV_PIX_FMT_D3D11
403     }
404 #ifdef HAVE_MFX
405     else if (AV_PIX_FMT_QSV == hw_frame->format) {
406         AVHWFramesContext *frames_ctx = (AVHWFramesContext *) hw_frame->hw_frames_ctx->data;
407         AVHWFramesContext *child_ctx = (AVHWFramesContext *) frames_ctx->user_opaque;
408         if (child_ctx && AV_HWDEVICE_TYPE_D3D11VA == child_ctx->device_ctx->type) {
409             texture = ((AVD3D11VAFramesContext*)child_ctx->hwctx)->texture;
410         }
411         *subresource = hw_find_qsv_surface_index(hw_frame);
412         CV_Assert(*subresource >= 0);
413     }
414 #endif
415     return texture;
416 }
417 
418 // In D3D11 case we allocate additional texture as single texture (not texture array) because
419 // OpenCL interop with D3D11 doesn't support/work with NV12 sub-texture of texture array.
hw_get_d3d11_single_texture(AVFrame * hw_frame,AVD3D11VADeviceContext * d3d11_device_ctx,ID3D11Texture2D * texture)420 ID3D11Texture2D* hw_get_d3d11_single_texture(AVFrame* hw_frame, AVD3D11VADeviceContext* d3d11_device_ctx, ID3D11Texture2D* texture) {
421     AVHWFramesContext* frames_ctx = (AVHWFramesContext*)hw_frame->hw_frames_ctx->data;
422     if (AV_HWDEVICE_TYPE_QSV == frames_ctx->device_ctx->type) {
423         frames_ctx = (AVHWFramesContext*)frames_ctx->user_opaque; // we stored pointer to child context in 'user_opaque' field
424     }
425     if (!frames_ctx || AV_HWDEVICE_TYPE_D3D11VA != frames_ctx->device_ctx->type) {
426         return NULL;
427     }
428     ID3D11Texture2D* singleTexture = (ID3D11Texture2D*)frames_ctx->user_opaque;
429     if (!singleTexture && d3d11_device_ctx && texture) {
430         D3D11_TEXTURE2D_DESC desc = {};
431         texture->GetDesc(&desc);
432         desc.ArraySize = 1;
433         desc.BindFlags |= D3D11_BIND_SHADER_RESOURCE;
434         desc.MiscFlags |= D3D11_RESOURCE_MISC_SHARED;
435         if (SUCCEEDED(d3d11_device_ctx->device->CreateTexture2D(&desc, NULL, &singleTexture))) {
436             frames_ctx->user_opaque = singleTexture;
437         }
438     }
439     return singleTexture;
440 }
441 #endif // HAVE_D3D11
442 
443 static
hw_check_opencl_context(AVHWDeviceContext * ctx)444 AVHWDeviceType hw_check_opencl_context(AVHWDeviceContext* ctx) {
445     ocl::OpenCLExecutionContext& ocl_context = ocl::OpenCLExecutionContext::getCurrentRef();
446     if (!ctx || ocl_context.empty())
447         return AV_HWDEVICE_TYPE_NONE;
448 #ifdef HAVE_VA_INTEL
449     VADisplay vadisplay_ocl = ocl_context.getContext().getOpenCLContextProperty(CL_CONTEXT_VA_API_DISPLAY_INTEL);
450     VADisplay vadisplay_ctx = hw_get_va_display(ctx);
451     if (vadisplay_ocl && vadisplay_ocl == vadisplay_ctx)
452         return AV_HWDEVICE_TYPE_VAAPI;
453 #endif
454 #ifdef HAVE_D3D11
455     ID3D11Device* d3d11device_ocl = (ID3D11Device*)ocl_context.getContext().getOpenCLContextProperty(CL_CONTEXT_D3D11_DEVICE_KHR);
456     AVD3D11VADeviceContext* d3d11_device_ctx = hw_get_d3d11_device_ctx(ctx);
457     if (d3d11_device_ctx && d3d11device_ocl && d3d11_device_ctx->device == d3d11device_ocl)
458         return AV_HWDEVICE_TYPE_D3D11VA;
459 #endif
460     return AV_HWDEVICE_TYPE_NONE;
461 }
462 
463 static
hw_init_opencl(AVBufferRef * ctx)464 void hw_init_opencl(AVBufferRef* ctx) {
465     if (!ctx)
466         return;
467     AVHWDeviceContext* hw_device_ctx = (AVHWDeviceContext*)ctx->data;
468     if (!hw_device_ctx)
469         return;
470 #ifdef HAVE_VA_INTEL
471     VADisplay va_display = hw_get_va_display(hw_device_ctx);
472     if (va_display) {
473         va_intel::ocl::initializeContextFromVA(va_display);
474     }
475 #endif
476 #ifdef HAVE_D3D11
477     AVD3D11VADeviceContext* d3d11_device_ctx = hw_get_d3d11_device_ctx(hw_device_ctx);
478     if (d3d11_device_ctx) {
479         directx::ocl::initializeContextFromD3D11Device(d3d11_device_ctx->device);
480     }
481 #endif
482     if (hw_check_opencl_context(hw_device_ctx) != AV_HWDEVICE_TYPE_NONE) {
483         // Attach AVHWDeviceContext to OpenCL context
484         ocl::Context &ocl_context = ocl::OpenCLExecutionContext::getCurrent().getContext();
485         ocl_context.setUserContext(std::make_shared<OpenCL_FFMPEG_Context>(ctx));
486     }
487 }
488 
489 static
hw_create_context_from_opencl(ocl::OpenCLExecutionContext & ocl_context,AVHWDeviceType hw_type)490 AVBufferRef* hw_create_context_from_opencl(ocl::OpenCLExecutionContext& ocl_context, AVHWDeviceType hw_type) {
491     if (ocl_context.empty())
492         return NULL;
493     auto ocl_ffmpeg_context = ocl_context.getContext().getUserContext<OpenCL_FFMPEG_Context>();
494     if (!ocl_ffmpeg_context)
495         return NULL;
496     AVBufferRef* ctx = ocl_ffmpeg_context->GetAVHWDevice();
497     if (hw_type != ((AVHWDeviceContext*)ctx->data)->type) {
498         ctx = hw_create_derived_context(hw_type, ctx);
499     }
500     else {
501         ctx = av_buffer_ref(ctx);
502     }
503     if (ctx)
504         CV_LOG_INFO(NULL, "FFMPEG: Using " << av_hwdevice_get_type_name(hw_type) << " video acceleration context attached to OpenCL context");
505     return ctx;
506 }
507 
508 #endif // HAVE_OPENCL
509 
510 static
hw_create_device(AVHWDeviceType hw_type,int hw_device,const std::string & device_subname,bool use_opencl)511 AVBufferRef* hw_create_device(AVHWDeviceType hw_type, int hw_device, const std::string& device_subname, bool use_opencl) {
512     AVBufferRef* hw_device_ctx = NULL;
513     if (AV_HWDEVICE_TYPE_NONE == hw_type)
514         return NULL;
515 
516 #ifdef HAVE_OPENCL
517     // Check if OpenCL context has AVHWDeviceContext attached to it
518     ocl::OpenCLExecutionContext& ocl_context = ocl::OpenCLExecutionContext::getCurrentRef();
519     try {
520         hw_device_ctx = hw_create_context_from_opencl(ocl_context, hw_type);
521         if (hw_device_ctx) {
522             if (hw_device >= 0)
523                 CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: ignoring property HW_DEVICE as device context already created and attached to OpenCL context");
524             return hw_device_ctx;
525         }
526     }
527     catch (...) {
528         CV_LOG_INFO(NULL, "FFMPEG: Exception creating Video Acceleration context using current OpenCL context");
529     }
530 #endif
531 
532     // Create new media context. In QSV case, first create 'child' context.
533     std::vector<AVHWDeviceType> child_types = { hw_type };
534     if (hw_type == AV_HWDEVICE_TYPE_QSV) {
535 #ifdef _WIN32
536         child_types = { AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_DXVA2 };
537 #else
538         child_types = { AV_HWDEVICE_TYPE_VAAPI };
539 #endif
540     }
541     for (AVHWDeviceType child_type : child_types) {
542         char device[128] = "";
543         char* pdevice = NULL;
544         if (hw_device >= 0 && hw_device < 100000) {
545             if (child_type == AV_HWDEVICE_TYPE_VAAPI) {
546                 snprintf(device, sizeof(device), "/dev/dri/renderD%d", 128 + hw_device);
547             }
548             else {
549                 snprintf(device, sizeof(device), "%d", hw_device);
550             }
551             pdevice = device;
552         }
553         const char* hw_child_name = av_hwdevice_get_type_name(child_type);
554         const char* device_name = pdevice ? pdevice : "'default'";
555         int err = av_hwdevice_ctx_create(&hw_device_ctx, child_type, pdevice, NULL, 0);
556         if (hw_device_ctx && err >= 0)
557         {
558             if (!hw_check_device(hw_device_ctx, hw_type, device_subname)) {
559                 av_buffer_unref(&hw_device_ctx);
560                 continue;
561             }
562             CV_LOG_INFO(NULL, "FFMPEG: Created video acceleration context (av_hwdevice_ctx_create) for " << hw_child_name << " on device " << device_name);
563 #ifdef HAVE_OPENCL
564             // if OpenCL context not created yet or property HW_ACCELERATION_USE_OPENCL set, create OpenCL context with binding to video acceleration context
565             if (ocl::haveOpenCL()) {
566                 if (ocl_context.empty() || use_opencl) {
567                     try {
568                         hw_init_opencl(hw_device_ctx);
569                         ocl_context = ocl::OpenCLExecutionContext::getCurrentRef();
570                         if (!ocl_context.empty()) {
571                             CV_LOG_INFO(NULL, "FFMPEG: Created OpenCL context with " << hw_child_name <<
572                                 " video acceleration on OpenCL device: " << ocl_context.getDevice().name());
573                         }
574                     } catch (...) {
575                         CV_LOG_INFO(NULL, "FFMPEG: Exception creating OpenCL context with " << hw_child_name << " video acceleration");
576                     }
577                 }
578                 else {
579                     CV_LOG_INFO(NULL, "FFMPEG: Can't bind " << hw_child_name << " video acceleration context to already created OpenCL context");
580                 }
581             }
582 #else
583             CV_UNUSED(use_opencl);
584 #endif
585             if (hw_type != child_type) {
586                 AVBufferRef* derived_ctx = hw_create_derived_context(hw_type, hw_device_ctx);
587                 av_buffer_unref(&hw_device_ctx);
588                 return derived_ctx;
589             } else {
590                 return hw_device_ctx;
591             }
592         }
593         else
594         {
595             const char* hw_name = hw_child_name;
596             CV_LOG_INFO(NULL, "FFMPEG: Failed to create " << hw_name << " video acceleration (av_hwdevice_ctx_create) on device " << device_name);
597         }
598     }
599     return NULL;
600 }
601 
602 static
hw_create_frames(struct AVCodecContext * codec_ctx,AVBufferRef * hw_device_ctx,int width,int height,AVPixelFormat hw_format)603 AVBufferRef* hw_create_frames(struct AVCodecContext* codec_ctx, AVBufferRef *hw_device_ctx, int width, int height, AVPixelFormat hw_format)
604 {
605     AVHWDeviceContext *device_ctx = (AVHWDeviceContext*)hw_device_ctx->data;
606     AVBufferRef* child_ctx = hw_device_ctx;
607     // In QSV case we first allocate child D3D11/VAAPI frames (except DXVA2 as no OpenCL interop), then derive to parent QSV frames
608     if (AV_HWDEVICE_TYPE_QSV == device_ctx->type) {
609         AVBufferRef *ctx = (AVBufferRef *) device_ctx->user_opaque; // child context stored during creation of derived context
610         if (ctx && AV_HWDEVICE_TYPE_DXVA2 != ((AVHWDeviceContext *) ctx->data)->type) {
611             child_ctx = ctx;
612         }
613     }
614     AVBufferRef *hw_frames_ref = nullptr;
615     if (codec_ctx)
616     {
617         int res = avcodec_get_hw_frames_parameters(codec_ctx, child_ctx, hw_format, &hw_frames_ref);
618         if (res < 0)
619         {
620             CV_LOG_DEBUG(NULL, "FFMPEG: avcodec_get_hw_frames_parameters() call failed: " << res)
621         }
622     }
623     if (!hw_frames_ref)
624     {
625         hw_frames_ref = av_hwframe_ctx_alloc(child_ctx);
626     }
627     if (!hw_frames_ref)
628     {
629         CV_LOG_INFO(NULL, "FFMPEG: Failed to create HW frame context (av_hwframe_ctx_alloc)");
630         return NULL;
631     }
632     AVHWFramesContext *frames_ctx = (AVHWFramesContext *)(hw_frames_ref->data);
633     frames_ctx->width = width;
634     frames_ctx->height = height;
635     if (frames_ctx->format == AV_PIX_FMT_NONE) {
636         if (child_ctx == hw_device_ctx) {
637             frames_ctx->format = hw_format;
638         }
639         else {
640             AVHWFramesConstraints* constraints = av_hwdevice_get_hwframe_constraints(child_ctx, NULL);
641             if (constraints) {
642                 frames_ctx->format = constraints->valid_hw_formats[0];
643                 av_hwframe_constraints_free(&constraints);
644             }
645         }
646     }
647     if (frames_ctx->sw_format == AV_PIX_FMT_NONE)
648         frames_ctx->sw_format = HW_DEFAULT_SW_FORMAT;
649     if (frames_ctx->initial_pool_size == 0)
650         frames_ctx->initial_pool_size = HW_DEFAULT_POOL_SIZE;
651 
652 #ifdef HAVE_D3D11
653     if (frames_ctx->device_ctx && AV_HWDEVICE_TYPE_D3D11VA == frames_ctx->device_ctx->type) {
654         // BindFlags
655         AVD3D11VAFramesContext* frames_hwctx = (AVD3D11VAFramesContext*)frames_ctx->hwctx;
656         frames_hwctx->BindFlags |= D3D11_BIND_DECODER | D3D11_BIND_VIDEO_ENCODER;
657         // See function hw_get_d3d11_single_texture(), it allocates additional ID3D11Texture2D texture and
658         // attaches it as 'user_opaque' field. We have to set free() callback before av_hwframe_ctx_init() call.
659         struct D3D11SingleTexture {
660             static void free(struct AVHWFramesContext* ctx) {
661                 ID3D11Texture2D* singleTexture = (ID3D11Texture2D*)ctx->user_opaque;
662                 if (ctx->user_opaque)
663                     singleTexture->Release();
664             }
665         };
666         frames_ctx->free = D3D11SingleTexture::free;
667     }
668 #endif
669 
670     int res = av_hwframe_ctx_init(hw_frames_ref);
671     if (res < 0)
672     {
673         CV_LOG_INFO(NULL, "FFMPEG: Failed to initialize HW frame context (av_hwframe_ctx_init): " << res);
674         av_buffer_unref(&hw_frames_ref);
675         return NULL;
676     }
677 
678     if (child_ctx != hw_device_ctx) {
679         AVBufferRef* derived_frame_ctx = NULL;
680         int flags = AV_HWFRAME_MAP_READ | AV_HWFRAME_MAP_WRITE;
681         res = av_hwframe_ctx_create_derived(&derived_frame_ctx, hw_format, hw_device_ctx, hw_frames_ref, flags);
682         av_buffer_unref(&hw_frames_ref);
683         if (res < 0)
684         {
685             CV_LOG_INFO(NULL, "FFMPEG: Failed to create derived HW frame context (av_hwframe_ctx_create_derived): " << res);
686             return NULL;
687         }
688         else {
689             ((AVHWFramesContext*)derived_frame_ctx->data)->user_opaque = frames_ctx;
690             return derived_frame_ctx;
691         }
692     }
693     else {
694         return hw_frames_ref;
695     }
696 }
697 
698 static
hw_check_codec(AVCodec * codec,AVHWDeviceType hw_type,const char * disabled_codecs)699 bool hw_check_codec(AVCodec* codec, AVHWDeviceType hw_type, const char *disabled_codecs)
700 {
701     CV_Assert(disabled_codecs);
702     std::string hw_name = std::string(".") + av_hwdevice_get_type_name(hw_type);
703     std::stringstream s_stream(disabled_codecs);
704     while (s_stream.good()) {
705         std::string name;
706         getline(s_stream, name, ',');
707         if (name == codec->name || name == hw_name || name == codec->name + hw_name || name == "hw") {
708             CV_LOG_INFO(NULL, "FFMPEG: skipping codec " << codec->name << hw_name);
709             return false;
710         }
711     }
712     return true;
713 }
714 
715 static
hw_find_codec(AVCodecID id,AVHWDeviceType hw_type,int (* check_category)(const AVCodec *),const char * disabled_codecs,AVPixelFormat * hw_pix_fmt)716 AVCodec *hw_find_codec(AVCodecID id, AVHWDeviceType hw_type, int (*check_category)(const AVCodec *), const char *disabled_codecs, AVPixelFormat *hw_pix_fmt) {
717     AVCodec *c = 0;
718     void *opaque = 0;
719 
720     while (NULL != (c = (AVCodec*)av_codec_iterate(&opaque)))
721     {
722         if (!check_category(c))
723             continue;
724         if (c->id != id)
725             continue;
726         if (c->capabilities & AV_CODEC_CAP_EXPERIMENTAL)
727             continue;
728         if (hw_type != AV_HWDEVICE_TYPE_NONE) {
729             AVPixelFormat hw_native_fmt = AV_PIX_FMT_NONE;
730 #if LIBAVUTIL_BUILD < AV_VERSION_INT(56, 51, 100) // VAAPI encoders support avcodec_get_hw_config() starting ffmpeg 4.3
731             if (hw_type == AV_HWDEVICE_TYPE_VAAPI)
732                 hw_native_fmt = AV_PIX_FMT_VAAPI_VLD;
733 #endif
734             if (hw_type == AV_HWDEVICE_TYPE_CUDA) // CUDA encoders don't support avcodec_get_hw_config()
735                 hw_native_fmt = AV_PIX_FMT_CUDA;
736             if (av_codec_is_encoder(c) && hw_native_fmt != AV_PIX_FMT_NONE && c->pix_fmts) {
737                 for (int i = 0; c->pix_fmts[i] != AV_PIX_FMT_NONE; i++) {
738                     if (c->pix_fmts[i] == hw_native_fmt) {
739                         *hw_pix_fmt = hw_native_fmt;
740                         if (hw_check_codec(c, hw_type, disabled_codecs))
741                             return c;
742                     }
743                 }
744             }
745             for (int i = 0;; i++) {
746                 const AVCodecHWConfig *hw_config = avcodec_get_hw_config(c, i);
747                 if (!hw_config)
748                     break;
749                 if (hw_config->device_type == hw_type) {
750                     *hw_pix_fmt = hw_config->pix_fmt;
751                     if (hw_check_codec(c, hw_type, disabled_codecs))
752                         return c;
753                 }
754             }
755         } else {
756             return c;
757         }
758     }
759 
760     return NULL;
761 }
762 
763 // Callback to select hardware pixel format (not software format) and allocate frame pool (hw_frames_ctx)
764 static
hw_get_format_callback(struct AVCodecContext * ctx,const enum AVPixelFormat * fmt)765 AVPixelFormat hw_get_format_callback(struct AVCodecContext *ctx, const enum AVPixelFormat * fmt) {
766     if (!ctx->hw_device_ctx)
767         return fmt[0];
768     AVHWDeviceType hw_type = ((AVHWDeviceContext*)ctx->hw_device_ctx->data)->type;
769     for (int j = 0;; j++) {
770         const AVCodecHWConfig *hw_config = avcodec_get_hw_config(ctx->codec, j);
771         if (!hw_config)
772             break;
773         if (hw_config->device_type == hw_type) {
774             for (int i = 0; fmt[i] != AV_PIX_FMT_NONE; i++) {
775                 if (fmt[i] == hw_config->pix_fmt) {
776                     if (hw_config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) {
777                         ctx->sw_pix_fmt = HW_DEFAULT_SW_FORMAT;
778                         ctx->hw_frames_ctx = hw_create_frames(ctx, ctx->hw_device_ctx, ctx->width, ctx->height, fmt[i]);
779                         if (ctx->hw_frames_ctx) {
780                             //ctx->sw_pix_fmt = ((AVHWFramesContext *)(ctx->hw_frames_ctx->data))->sw_format;
781                             return fmt[i];
782                         }
783                     }
784                 }
785             }
786         }
787     }
788     CV_LOG_DEBUG(NULL, "FFMPEG: Can't select HW format in 'get_format()' callback, use default");
789     return fmt[0];
790 }
791 
792 // GPU color conversion NV12->BGRA via OpenCL extensions
793 static bool
hw_copy_frame_to_umat(AVBufferRef * ctx,AVFrame * hw_frame,cv::OutputArray output)794 hw_copy_frame_to_umat(AVBufferRef* ctx, AVFrame* hw_frame, cv::OutputArray output) {
795     CV_UNUSED(hw_frame);
796     CV_UNUSED(output);
797     if (!ctx)
798         return false;
799 
800 #ifdef HAVE_OPENCL
801     try {
802         // check that current OpenCL context initilized with binding to same VAAPI/D3D11 context
803         AVHWDeviceContext *hw_device_ctx = (AVHWDeviceContext *) ctx->data;
804         AVHWDeviceType child_type = hw_check_opencl_context(hw_device_ctx);
805         if (child_type == AV_HWDEVICE_TYPE_NONE)
806             return false;
807 
808 #ifdef HAVE_VA_INTEL
809         if (child_type == AV_HWDEVICE_TYPE_VAAPI) {
810             VADisplay va_display = hw_get_va_display(hw_device_ctx);
811             VASurfaceID va_surface = hw_get_va_surface(hw_frame);
812             if (va_display && va_surface != VA_INVALID_SURFACE) {
813                 va_intel::convertFromVASurface(va_display, va_surface, {hw_frame->width, hw_frame->height}, output);
814                 return true;
815             }
816         }
817 #endif
818 
819 #ifdef HAVE_D3D11
820         if (child_type == AV_HWDEVICE_TYPE_D3D11VA) {
821             AVD3D11VADeviceContext* d3d11_device_ctx = hw_get_d3d11_device_ctx(hw_device_ctx);
822             int subresource = 0;
823             ID3D11Texture2D* texture = hw_get_d3d11_texture(hw_frame, &subresource);
824             ID3D11Texture2D* singleTexture = hw_get_d3d11_single_texture(hw_frame, d3d11_device_ctx, texture);
825             if (texture && singleTexture) {
826                 // Copy D3D11 sub-texture to D3D11 single texture
827                 d3d11_device_ctx->device_context->CopySubresourceRegion(singleTexture, 0, 0, 0, 0, texture, subresource, NULL);
828                 // Copy D3D11 single texture to cv::UMat
829                 directx::convertFromD3D11Texture2D(singleTexture, output);
830                 return true;
831             }
832         }
833 #endif
834     }
835     catch (...)
836     {
837         return false;
838     }
839 #endif // HAVE_OPENCL
840 
841     return false;
842 }
843 
844 // GPU color conversion BGRA->NV12 via OpenCL extensions
845 static bool
hw_copy_umat_to_frame(AVBufferRef * ctx,cv::InputArray input,AVFrame * hw_frame)846 hw_copy_umat_to_frame(AVBufferRef* ctx, cv::InputArray input, AVFrame* hw_frame) {
847     CV_UNUSED(input);
848     CV_UNUSED(hw_frame);
849     if (!ctx)
850         return false;
851 
852 #ifdef HAVE_OPENCL
853     try {
854         // check that current OpenCL context initilized with binding to same VAAPI/D3D11 context
855         AVHWDeviceContext *hw_device_ctx = (AVHWDeviceContext *) ctx->data;
856         AVHWDeviceType child_type = hw_check_opencl_context(hw_device_ctx);
857         if (child_type == AV_HWDEVICE_TYPE_NONE)
858             return false;
859 
860 #ifdef HAVE_VA_INTEL
861         if (child_type == AV_HWDEVICE_TYPE_VAAPI) {
862             VADisplay va_display = hw_get_va_display(hw_device_ctx);
863             VASurfaceID va_surface = hw_get_va_surface(hw_frame);
864             if (va_display != NULL && va_surface != VA_INVALID_SURFACE) {
865                 va_intel::convertToVASurface(va_display, input, va_surface, {hw_frame->width, hw_frame->height});
866                 return true;
867             }
868         }
869 #endif
870 
871 #ifdef HAVE_D3D11
872         if (child_type == AV_HWDEVICE_TYPE_D3D11VA) {
873             AVD3D11VADeviceContext* d3d11_device_ctx = hw_get_d3d11_device_ctx(hw_device_ctx);
874             int subresource = 0;
875             ID3D11Texture2D* texture = hw_get_d3d11_texture(hw_frame, &subresource);
876             ID3D11Texture2D* singleTexture = hw_get_d3d11_single_texture(hw_frame, d3d11_device_ctx, texture);
877             if (texture && singleTexture) {
878                 // Copy cv::UMat to D3D11 single texture
879                 directx::convertToD3D11Texture2D(input, singleTexture);
880                 // Copy D3D11 single texture to D3D11 sub-texture
881                 d3d11_device_ctx->device_context->CopySubresourceRegion(texture, subresource, 0, 0, 0, singleTexture, 0, NULL);
882                 return true;
883             }
884         }
885 #endif
886     }
887     catch (...)
888     {
889         return false;
890     }
891 #endif // HAVE_OPENCL
892 
893     return false;
894 }
895 
896 static
hw_type_to_va_type(AVHWDeviceType hw_type)897 VideoAccelerationType hw_type_to_va_type(AVHWDeviceType hw_type) {
898     struct HWTypeFFMPEG {
899         AVHWDeviceType hw_type;
900         VideoAccelerationType va_type;
901     } known_hw_types[] = {
902             { AV_HWDEVICE_TYPE_D3D11VA, VIDEO_ACCELERATION_D3D11 },
903             { AV_HWDEVICE_TYPE_VAAPI, VIDEO_ACCELERATION_VAAPI },
904             { AV_HWDEVICE_TYPE_QSV, VIDEO_ACCELERATION_MFX },
905             { AV_HWDEVICE_TYPE_CUDA, (VideoAccelerationType)(1 << 11) },
906     };
907     for (const HWTypeFFMPEG& hw : known_hw_types) {
908         if (hw_type == hw.hw_type)
909             return hw.va_type;
910     }
911     return VIDEO_ACCELERATION_NONE;
912 }
913 
914 class HWAccelIterator {
915 public:
HWAccelIterator(VideoAccelerationType va_type,bool isEncoder,AVDictionary * dict)916     HWAccelIterator(VideoAccelerationType va_type, bool isEncoder, AVDictionary *dict)
917         : hw_type_(AV_HWDEVICE_TYPE_NONE)
918     {
919         std::string accel_list;
920         if (va_type != VIDEO_ACCELERATION_NONE)
921         {
922             updateAccelList_(accel_list, va_type, isEncoder, dict);
923         }
924         if (va_type == VIDEO_ACCELERATION_ANY)
925         {
926             if (!accel_list.empty())
927                 accel_list += ",";  // add no-acceleration case to the end of the list
928         }
929         CV_LOG_DEBUG(NULL, "FFMPEG: allowed acceleration types (" << getVideoAccelerationName(va_type) << "): '" << accel_list << "'");
930 
931         if (accel_list.empty() && va_type != VIDEO_ACCELERATION_NONE && va_type != VIDEO_ACCELERATION_ANY)
932         {
933             // broke stream
934             std::string tmp;
935             s_stream_ >> tmp;
936         }
937         else
938         {
939             s_stream_ = std::istringstream(accel_list);
940         }
941 
942         if (va_type != VIDEO_ACCELERATION_NONE)
943         {
944             disabled_codecs_ = isEncoder
945                     ? getEncoderDisabledCodecs(dict)
946                     : getDecoderDisabledCodecs(dict);
947             CV_LOG_DEBUG(NULL, "FFMPEG: disabled codecs: '" << disabled_codecs_ << "'");
948         }
949     }
good() const950     bool good() const
951     {
952         return s_stream_.good();
953     }
parse_next()954     void parse_next()
955     {
956         getline(s_stream_, hw_type_device_string_, ',');
957         size_t index = hw_type_device_string_.find('.');
958         if (index != std::string::npos) {
959             device_subname_ = hw_type_device_string_.substr(index + 1);
960             hw_type_string_ = hw_type_device_string_.substr(0, index);
961         } else {
962             device_subname_.clear();
963             hw_type_string_ = hw_type_device_string_;
964         }
965         hw_type_ = av_hwdevice_find_type_by_name(hw_type_string_.c_str());
966     }
hw_type_device_string() const967     const std::string& hw_type_device_string() const { return hw_type_device_string_; }
hw_type_string() const968     const std::string& hw_type_string() const { return hw_type_string_; }
hw_type() const969     AVHWDeviceType hw_type() const { return hw_type_; }
device_subname() const970     const std::string& device_subname() const { return device_subname_; }
disabled_codecs() const971     const std::string& disabled_codecs() const { return disabled_codecs_; }
972 private:
updateAccelList_(std::string & accel_list,VideoAccelerationType va_type,bool isEncoder,AVDictionary * dict)973     bool updateAccelList_(std::string& accel_list, VideoAccelerationType va_type, bool isEncoder, AVDictionary *dict)
974     {
975         std::string new_accels = isEncoder
976                 ? getEncoderConfiguration(va_type, dict)
977                 : getDecoderConfiguration(va_type, dict);
978         if (new_accels.empty())
979             return false;
980         if (accel_list.empty())
981             accel_list = new_accels;
982         else
983             accel_list = accel_list + "," + new_accels;
984         return true;
985     }
986     std::istringstream s_stream_;
987     std::string hw_type_device_string_;
988     std::string hw_type_string_;
989     AVHWDeviceType hw_type_;
990     std::string device_subname_;
991 
992     std::string disabled_codecs_;
993 };
994