1 /*
2  *  Copyright (C) 2017-2019 Team Kodi
3  *  This file is part of Kodi - https://kodi.tv
4  *
5  *  SPDX-License-Identifier: GPL-2.0-or-later
6  *  See LICENSES/README.md for more information.
7  */
8 
9 #include "RendererShaders.h"
10 
11 #include "DVDCodecs/Video/DXVA.h"
12 #include "rendering/dx/RenderContext.h"
13 #include "utils/CPUInfo.h"
14 #ifndef _M_ARM
15   #include "utils/gpu_memcpy_sse4.h"
16 #endif
17 #include "utils/log.h"
18 #include "windowing/GraphicContext.h"
19 
20 #include <ppl.h>
21 
22 using namespace Microsoft::WRL;
23 static DXGI_FORMAT plane_formats[][2] =
24 {
25   { DXGI_FORMAT_R8_UNORM,  DXGI_FORMAT_R8G8_UNORM },   // NV12
26   { DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM }, // P010
27   { DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM }  // P016
28 };
29 
Create(CVideoSettings & videoSettings)30 CRendererBase* CRendererShaders::Create(CVideoSettings& videoSettings)
31 {
32   return new CRendererShaders(videoSettings);
33 }
34 
GetWeight(std::map<RenderMethod,int> & weights,const VideoPicture & picture)35 void CRendererShaders::GetWeight(std::map<RenderMethod, int>& weights, const VideoPicture& picture)
36 {
37   unsigned weight = 0;
38   const AVPixelFormat av_pixel_format = picture.videoBuffer->GetFormat();
39 
40   switch (av_pixel_format)
41   {
42     case AV_PIX_FMT_D3D11VA_VLD:
43       if (IsHWPicSupported(picture))
44         weight += 1000; // support natively
45       else
46         weight += 200; // double copying (GPU->CPU->GPU)
47       break;
48     case AV_PIX_FMT_YUV420P:
49     case AV_PIX_FMT_NV12:
50       weight += 500; // single copying
51       break;
52     case AV_PIX_FMT_YUV420P10:
53     case AV_PIX_FMT_YUV420P16:
54       if (DX::Windowing()->IsFormatSupport(DXGI_FORMAT_R16_UNORM, D3D11_FORMAT_SUPPORT_TEXTURE2D))
55         weight += 500; // single copying
56       else
57         CLog::LogF(LOGWARNING, "Texture format DXGI_FORMAT_R16_UNORM is not supported.");
58       break;
59     case AV_PIX_FMT_P010:
60     case AV_PIX_FMT_P016:
61       if (DX::Windowing()->IsFormatSupport(DXGI_FORMAT_R16_UNORM, D3D11_FORMAT_SUPPORT_TEXTURE2D) &&
62           DX::Windowing()->IsFormatSupport(DXGI_FORMAT_R16G16_UNORM,
63                                            D3D11_FORMAT_SUPPORT_TEXTURE2D))
64         weight += 500; // single copying
65       else
66         CLog::LogF(LOGWARNING, "Texture format R16_UNORM / R16G16_UNORM is not supported.");
67       break;
68   }
69 
70   if (weight > 0)
71     weights[RENDER_PS] = weight;
72 }
73 
Supports(ESCALINGMETHOD method)74 bool CRendererShaders::Supports(ESCALINGMETHOD method)
75 {
76   if (method == VS_SCALINGMETHOD_LINEAR)
77     return true;
78 
79   return __super::Supports(method);
80 }
81 
Configure(const VideoPicture & picture,float fps,unsigned orientation)82 bool CRendererShaders::Configure(const VideoPicture& picture, float fps, unsigned orientation)
83 {
84   if (__super::Configure(picture, fps, orientation))
85   {
86     m_format = picture.videoBuffer->GetFormat();
87     if (m_format == AV_PIX_FMT_D3D11VA_VLD)
88     {
89       const DXGI_FORMAT dxgi_format = GetDXGIFormat(picture);
90 
91       // if decoded texture isn't supported in shaders
92       // then change format to supported via copying
93       if (!IsHWPicSupported(picture))
94         m_format = GetAVFormat(dxgi_format);
95     }
96 
97     CreateIntermediateTarget(m_sourceWidth, m_sourceHeight);
98     return true;
99   }
100   return false;
101 }
102 
RenderImpl(CD3DTexture & target,CRect & sourceRect,CPoint (& destPoints)[4],uint32_t flags)103 void CRendererShaders::RenderImpl(CD3DTexture& target, CRect& sourceRect, CPoint(&destPoints)[4], uint32_t flags)
104 {
105   if (!m_colorShader)
106     return;
107 
108   // reset scissors and viewport
109   CD3D11_VIEWPORT viewPort(0.0f, 0.0f,
110     static_cast<float>(target.GetWidth()),
111     static_cast<float>(target.GetHeight()));
112   DX::DeviceResources::Get()->GetD3DContext()->RSSetViewports(1, &viewPort);
113   DX::Windowing()->ResetScissors();
114 
115   CRenderBuffer* buf = m_renderBuffers[m_iBufferIndex];
116 
117   CPoint srcPoints[4];
118   sourceRect.GetQuad(srcPoints);
119 
120   m_colorShader->SetParams(m_videoSettings.m_Contrast, m_videoSettings.m_Brightness,
121                            DX::Windowing()->UseLimitedColor());
122   m_colorShader->SetColParams(buf->color_space, buf->bits, !buf->full_range, buf->texBits);
123   m_colorShader->Render(sourceRect, srcPoints, buf, target);
124 
125   if (!HasHQScaler())
126     ReorderDrawPoints(CRect(destPoints[0], destPoints[2]), destPoints);
127 }
128 
CheckVideoParameters()129 void CRendererShaders::CheckVideoParameters()
130 {
131   __super::CheckVideoParameters();
132 
133   CRenderBuffer* buf = m_renderBuffers[m_iBufferIndex];
134   const AVColorPrimaries srcPrim = GetSrcPrimaries(buf->primaries, buf->GetWidth(), buf->GetHeight());
135   if (srcPrim != m_srcPrimaries)
136   {
137     // source params is changed, reset shader
138     m_srcPrimaries = srcPrim;
139     m_colorShader.reset();
140   }
141 }
142 
UpdateVideoFilters()143 void CRendererShaders::UpdateVideoFilters()
144 {
145   __super::UpdateVideoFilters();
146 
147   if (!m_colorShader)
148   {
149     m_colorShader = std::make_unique<CYUV2RGBShader>();
150 
151     AVColorPrimaries dstPrimaries = AVCOL_PRI_BT709;
152 
153     if (DX::Windowing()->IsHDROutput() &&
154         (m_srcPrimaries == AVCOL_PRI_BT709 || m_srcPrimaries == AVCOL_PRI_BT2020))
155       dstPrimaries = m_srcPrimaries;
156 
157     if (!m_colorShader->Create(m_format, dstPrimaries, m_srcPrimaries))
158     {
159       // we are in a big trouble
160       CLog::LogF(LOGERROR, "unable to create YUV->RGB shader, rendering is not possible");
161       m_colorShader.reset();
162     }
163   }
164 }
165 
IsHWPicSupported(const VideoPicture & picture)166 bool CRendererShaders::IsHWPicSupported(const VideoPicture& picture)
167 {
168   // checking support of decoder texture in shaders
169   const DXGI_FORMAT dxgi_format = GetDXGIFormat(picture);
170   if (dxgi_format != DXGI_FORMAT_UNKNOWN)
171   {
172     CD3D11_TEXTURE2D_DESC texDesc(
173       dxgi_format,
174       FFALIGN(picture.iWidth, 32),
175       FFALIGN(picture.iHeight, 32),
176       1, 1,
177       D3D11_BIND_DECODER | D3D11_BIND_SHADER_RESOURCE,
178       D3D11_USAGE_DEFAULT
179     );
180 
181     ComPtr<ID3D11Device> pDevice = DX::DeviceResources::Get()->GetD3DDevice();
182     return SUCCEEDED(pDevice->CreateTexture2D(&texDesc, nullptr, nullptr));
183   }
184   return false;
185 }
186 
GetSrcPrimaries(AVColorPrimaries srcPrimaries,unsigned width,unsigned height)187 AVColorPrimaries CRendererShaders::GetSrcPrimaries(AVColorPrimaries srcPrimaries, unsigned width, unsigned height)
188 {
189   AVColorPrimaries ret = srcPrimaries;
190   if (ret == AVCOL_PRI_UNSPECIFIED)
191   {
192     if (width > 1024 || height >= 600)
193       ret = AVCOL_PRI_BT709;
194     else
195       ret = AVCOL_PRI_BT470BG;
196   }
197   return ret;
198 }
199 
CreateBuffer()200 CRenderBuffer* CRendererShaders::CreateBuffer()
201 {
202   return new CRenderBufferImpl(m_format, m_sourceWidth, m_sourceHeight);
203 }
204 
CRenderBufferImpl(AVPixelFormat av_pix_format,unsigned width,unsigned height)205 CRendererShaders::CRenderBufferImpl::CRenderBufferImpl(AVPixelFormat av_pix_format, unsigned width, unsigned height)
206   : CRenderBuffer(av_pix_format, width, height)
207 {
208   DXGI_FORMAT view_formats[YuvImage::MAX_PLANES] = {};
209 
210   switch (av_format)
211   {
212   case AV_PIX_FMT_D3D11VA_VLD:
213     m_viewCount = 2;
214     break;
215   case AV_PIX_FMT_NV12:
216   {
217     view_formats[0] = DXGI_FORMAT_R8_UNORM;
218     view_formats[1] = DXGI_FORMAT_R8G8_UNORM;
219     // FL 9.x doesn't support DXGI_FORMAT_R8G8_UNORM, so we have to use SNORM and correct values in shader
220     if (!DX::Windowing()->IsFormatSupport(view_formats[1], D3D11_FORMAT_SUPPORT_TEXTURE2D))
221       view_formats[1] = DXGI_FORMAT_R8G8_SNORM;
222     m_viewCount = 2;
223     break;
224   }
225   case AV_PIX_FMT_P010:
226   case AV_PIX_FMT_P016:
227   {
228     view_formats[0] = DXGI_FORMAT_R16_UNORM;
229     view_formats[1] = DXGI_FORMAT_R16G16_UNORM;
230     m_viewCount = 2;
231     break;
232   }
233   case AV_PIX_FMT_YUV420P:
234   {
235     view_formats[0] = view_formats[1] = view_formats[2] = DXGI_FORMAT_R8_UNORM;
236     m_viewCount = 3;
237     break;
238   }
239   case AV_PIX_FMT_YUV420P10:
240   case AV_PIX_FMT_YUV420P16:
241   {
242     view_formats[0] = view_formats[1] = view_formats[2] = DXGI_FORMAT_R16_UNORM;
243     m_viewCount = 3;
244     texBits = av_format == AV_PIX_FMT_YUV420P10 ? 10 : 16;
245     break;
246   }
247   default:
248     // unsupported format
249     return;
250   }
251 
252   if (av_format != AV_PIX_FMT_D3D11VA_VLD)
253   {
254     for (size_t i = 0; i < m_viewCount; i++)
255     {
256       const auto w = i ? m_width >> 1 : m_width;
257       const auto h = i ? m_height >> 1 : m_height;
258 
259       if (!m_textures[i].Create(w, h, 1, D3D11_USAGE_DYNAMIC, view_formats[i]))
260         break;
261 
262       // clear plane
263       D3D11_MAPPED_SUBRESOURCE mapping = {};
264       if (m_textures[i].LockRect(0, &mapping, D3D11_MAP_WRITE_DISCARD))
265       {
266         if (view_formats[i] == DXGI_FORMAT_R8_UNORM ||
267           view_formats[i] == DXGI_FORMAT_R8G8_UNORM ||
268           view_formats[i] == DXGI_FORMAT_R8G8_SNORM)
269           memset(mapping.pData, i ? 0x80 : 0, mapping.RowPitch * h);
270         else
271           wmemset(static_cast<wchar_t*>(mapping.pData), i ? 0x8000 : 0, mapping.RowPitch * h >> 1);
272 
273         if (m_textures[i].UnlockRect(0)) {}
274       }
275     }
276   }
277 }
278 
~CRenderBufferImpl()279 CRendererShaders::CRenderBufferImpl::~CRenderBufferImpl()
280 {
281   CRenderBufferImpl::ReleasePicture();
282 }
283 
AppendPicture(const VideoPicture & picture)284 void CRendererShaders::CRenderBufferImpl::AppendPicture(const VideoPicture& picture)
285 {
286   __super::AppendPicture(picture);
287 
288   if (videoBuffer->GetFormat() == AV_PIX_FMT_D3D11VA_VLD)
289   {
290     if (AV_PIX_FMT_D3D11VA_VLD != av_format)
291       QueueCopyFromGPU();
292 
293     const auto hw = dynamic_cast<DXVA::CVideoBuffer*>(videoBuffer);
294     m_widthTex = hw->width;
295     m_heightTex = hw->height;
296   }
297 }
298 
UploadBuffer()299 bool CRendererShaders::CRenderBufferImpl::UploadBuffer()
300 {
301   if (!videoBuffer)
302     return false;
303 
304   if (videoBuffer->GetFormat() == AV_PIX_FMT_D3D11VA_VLD)
305   {
306     if (AV_PIX_FMT_D3D11VA_VLD == av_format)
307       m_bLoaded = true;
308     else
309       m_bLoaded = UploadFromGPU();
310   }
311   else
312     m_bLoaded = UploadFromBuffer();
313 
314   return m_bLoaded;
315 }
316 
GetViewCount() const317 unsigned CRendererShaders::CRenderBufferImpl::GetViewCount() const
318 {
319   return m_viewCount;
320 }
321 
GetView(unsigned viewIdx)322 ID3D11View* CRendererShaders::CRenderBufferImpl::GetView(unsigned viewIdx)
323 {
324   if (videoBuffer->GetFormat() == AV_PIX_FMT_D3D11VA_VLD &&
325     AV_PIX_FMT_D3D11VA_VLD == av_format)
326   {
327     if (m_planes[viewIdx])
328       return m_planes[viewIdx].Get();
329 
330     unsigned arrayIdx;
331     ComPtr<ID3D11Resource> pResource;
332     if (FAILED(GetResource(&pResource, &arrayIdx)))
333     {
334       CLog::LogF(LOGERROR, "unable to open d3d11va resource.");
335       return nullptr;
336     }
337 
338     const auto dxva_format = CRendererBase::GetDXGIFormat(videoBuffer);
339     // impossible but we check
340     if (dxva_format < DXGI_FORMAT_NV12 || dxva_format > DXGI_FORMAT_P016)
341       return nullptr;
342 
343     CD3D11_SHADER_RESOURCE_VIEW_DESC srvDesc(
344       D3D11_SRV_DIMENSION_TEXTURE2DARRAY,
345       plane_formats[dxva_format - DXGI_FORMAT_NV12][viewIdx],
346       0, 1, arrayIdx, 1
347     );
348 
349     ComPtr<ID3D11Device> pD3DDevice = DX::DeviceResources::Get()->GetD3DDevice();
350     if (FAILED(pD3DDevice->CreateShaderResourceView(pResource.Get(), &srvDesc, &m_planes[viewIdx])))
351     {
352       CLog::LogF(LOGERROR, "unable to create shader target for decoder texture.");
353       return nullptr;
354     }
355 
356     return m_planes[viewIdx].Get();
357   }
358 
359   return m_textures[viewIdx].GetShaderResource();
360 }
361 
ReleasePicture()362 void CRendererShaders::CRenderBufferImpl::ReleasePicture()
363 {
364   __super::ReleasePicture();
365 
366   m_planes[0] = nullptr;
367   m_planes[1] = nullptr;
368 }
369 
UploadFromGPU()370 bool CRendererShaders::CRenderBufferImpl::UploadFromGPU()
371 {
372   ComPtr<ID3D11DeviceContext> pContext = DX::DeviceResources::Get()->GetImmediateContext();
373   D3D11_MAPPED_SUBRESOURCE mapGPU;
374   D3D11_MAPPED_SUBRESOURCE mappings[2];
375 
376   if (FAILED(pContext->Map(m_staging.Get(), 0, D3D11_MAP_READ, 0, &mapGPU)))
377     return false;
378 
379   if (!m_textures[PLANE_Y].LockRect(0, &mappings[PLANE_Y], D3D11_MAP_WRITE_DISCARD) ||
380     !m_textures[PLANE_UV].LockRect(0, &mappings[PLANE_UV], D3D11_MAP_WRITE_DISCARD))
381   {
382     pContext->Unmap(m_staging.Get(), 0);
383     return false;
384   }
385 
386   void* (*copy_func)(void* d, const void* s, size_t size) =
387 #if defined(HAVE_SSE2)
388       ((CServiceBroker::GetCPUInfo()->GetCPUFeatures() & CPU_FEATURE_SSE4) != 0) ? gpu_memcpy :
389 #endif
390                                                                                  memcpy;
391 
392   auto* s_y = static_cast<uint8_t*>(mapGPU.pData);
393   auto* s_uv = static_cast<uint8_t*>(mapGPU.pData) + m_sDesc.Height * mapGPU.RowPitch;
394   auto* d_y = static_cast<uint8_t*>(mappings[PLANE_Y].pData);
395   auto* d_uv = static_cast<uint8_t*>(mappings[PLANE_UV].pData);
396 
397   if (mappings[PLANE_Y].RowPitch == mapGPU.RowPitch
398     && mappings[PLANE_UV].RowPitch == mapGPU.RowPitch)
399   {
400     Concurrency::parallel_invoke([&]() {
401       // copy Y
402       copy_func(d_y, s_y, mapGPU.RowPitch * m_height);
403     }, [&]() {
404       // copy UV
405       copy_func(d_uv, s_uv, mapGPU.RowPitch * m_height >> 1);
406     });
407   }
408   else
409   {
410     Concurrency::parallel_invoke([&]() {
411       // copy Y
412       for (unsigned y = 0; y < m_height; ++y)
413       {
414         copy_func(d_y, s_y, mappings[PLANE_Y].RowPitch);
415         s_y += mapGPU.RowPitch;
416         d_y += mappings[PLANE_Y].RowPitch;
417       }
418     }, [&]() {
419       // copy UV
420       for (unsigned y = 0; y < m_height >> 1; ++y)
421       {
422         copy_func(d_uv, s_uv, mappings[PLANE_UV].RowPitch);
423         s_uv += mapGPU.RowPitch;
424         d_uv += mappings[PLANE_UV].RowPitch;
425       }
426     });
427   }
428   pContext->Unmap(m_staging.Get(), 0);
429 
430   return m_textures[PLANE_Y].UnlockRect(0) &&
431     m_textures[PLANE_UV].UnlockRect(0);
432 }
433 
UploadFromBuffer() const434 bool CRendererShaders::CRenderBufferImpl::UploadFromBuffer() const
435 {
436   uint8_t* bufData[3];
437   int srcLines[3];
438   videoBuffer->GetPlanes(bufData);
439   videoBuffer->GetStrides(srcLines);
440   std::vector<Concurrency::task<void>> tasks;
441 
442   for (unsigned plane = 0; plane < m_viewCount; ++plane)
443   {
444     D3D11_MAPPED_SUBRESOURCE mapping = {};
445     if (!m_textures[plane].LockRect(0, &mapping, D3D11_MAP_WRITE_DISCARD))
446       break;
447 
448     auto* dst = static_cast<uint8_t*>(mapping.pData);
449     auto* src = bufData[plane];
450     int srcLine = srcLines[plane];
451     int dstLine = mapping.RowPitch;
452     int height = plane ? m_height >> 1 : m_height;
453 
454     auto task = Concurrency::create_task([src, dst, srcLine, dstLine, height]()
455     {
456       if (srcLine == dstLine)
457       {
458         memcpy(dst, src, srcLine * height);
459       }
460       else
461       {
462         uint8_t* s = src;
463         uint8_t* d = dst;
464         for (int i = 0; i < height; ++i)
465         {
466           memcpy(d, s, std::min(srcLine, dstLine));
467           d += dstLine;
468           s += srcLine;
469         }
470       }
471     });
472     tasks.push_back(task);
473   }
474 
475   // event based await is required on WinRT because
476   // blocking WinRT STA threads with task.wait() isn't allowed
477   auto sync = std::make_shared<Concurrency::event>();
478   when_all(tasks.begin(), tasks.end()).then([&sync]() {
479     sync->set();
480   });
481   sync->wait();
482 
483   for (unsigned plane = 0; plane < m_viewCount; ++plane)
484     if (!m_textures[plane].UnlockRect(0)) {}
485 
486   return true;
487 }
488