1 /*
2 * Copyright (C) 2017-2019 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
4 *
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
8
9 #include "RendererShaders.h"
10
11 #include "DVDCodecs/Video/DXVA.h"
12 #include "rendering/dx/RenderContext.h"
13 #include "utils/CPUInfo.h"
14 #ifndef _M_ARM
15 #include "utils/gpu_memcpy_sse4.h"
16 #endif
17 #include "utils/log.h"
18 #include "windowing/GraphicContext.h"
19
20 #include <ppl.h>
21
22 using namespace Microsoft::WRL;
23 static DXGI_FORMAT plane_formats[][2] =
24 {
25 { DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8G8_UNORM }, // NV12
26 { DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM }, // P010
27 { DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16G16_UNORM } // P016
28 };
29
Create(CVideoSettings & videoSettings)30 CRendererBase* CRendererShaders::Create(CVideoSettings& videoSettings)
31 {
32 return new CRendererShaders(videoSettings);
33 }
34
GetWeight(std::map<RenderMethod,int> & weights,const VideoPicture & picture)35 void CRendererShaders::GetWeight(std::map<RenderMethod, int>& weights, const VideoPicture& picture)
36 {
37 unsigned weight = 0;
38 const AVPixelFormat av_pixel_format = picture.videoBuffer->GetFormat();
39
40 switch (av_pixel_format)
41 {
42 case AV_PIX_FMT_D3D11VA_VLD:
43 if (IsHWPicSupported(picture))
44 weight += 1000; // support natively
45 else
46 weight += 200; // double copying (GPU->CPU->GPU)
47 break;
48 case AV_PIX_FMT_YUV420P:
49 case AV_PIX_FMT_NV12:
50 weight += 500; // single copying
51 break;
52 case AV_PIX_FMT_YUV420P10:
53 case AV_PIX_FMT_YUV420P16:
54 if (DX::Windowing()->IsFormatSupport(DXGI_FORMAT_R16_UNORM, D3D11_FORMAT_SUPPORT_TEXTURE2D))
55 weight += 500; // single copying
56 else
57 CLog::LogF(LOGWARNING, "Texture format DXGI_FORMAT_R16_UNORM is not supported.");
58 break;
59 case AV_PIX_FMT_P010:
60 case AV_PIX_FMT_P016:
61 if (DX::Windowing()->IsFormatSupport(DXGI_FORMAT_R16_UNORM, D3D11_FORMAT_SUPPORT_TEXTURE2D) &&
62 DX::Windowing()->IsFormatSupport(DXGI_FORMAT_R16G16_UNORM,
63 D3D11_FORMAT_SUPPORT_TEXTURE2D))
64 weight += 500; // single copying
65 else
66 CLog::LogF(LOGWARNING, "Texture format R16_UNORM / R16G16_UNORM is not supported.");
67 break;
68 }
69
70 if (weight > 0)
71 weights[RENDER_PS] = weight;
72 }
73
Supports(ESCALINGMETHOD method)74 bool CRendererShaders::Supports(ESCALINGMETHOD method)
75 {
76 if (method == VS_SCALINGMETHOD_LINEAR)
77 return true;
78
79 return __super::Supports(method);
80 }
81
Configure(const VideoPicture & picture,float fps,unsigned orientation)82 bool CRendererShaders::Configure(const VideoPicture& picture, float fps, unsigned orientation)
83 {
84 if (__super::Configure(picture, fps, orientation))
85 {
86 m_format = picture.videoBuffer->GetFormat();
87 if (m_format == AV_PIX_FMT_D3D11VA_VLD)
88 {
89 const DXGI_FORMAT dxgi_format = GetDXGIFormat(picture);
90
91 // if decoded texture isn't supported in shaders
92 // then change format to supported via copying
93 if (!IsHWPicSupported(picture))
94 m_format = GetAVFormat(dxgi_format);
95 }
96
97 CreateIntermediateTarget(m_sourceWidth, m_sourceHeight);
98 return true;
99 }
100 return false;
101 }
102
RenderImpl(CD3DTexture & target,CRect & sourceRect,CPoint (& destPoints)[4],uint32_t flags)103 void CRendererShaders::RenderImpl(CD3DTexture& target, CRect& sourceRect, CPoint(&destPoints)[4], uint32_t flags)
104 {
105 if (!m_colorShader)
106 return;
107
108 // reset scissors and viewport
109 CD3D11_VIEWPORT viewPort(0.0f, 0.0f,
110 static_cast<float>(target.GetWidth()),
111 static_cast<float>(target.GetHeight()));
112 DX::DeviceResources::Get()->GetD3DContext()->RSSetViewports(1, &viewPort);
113 DX::Windowing()->ResetScissors();
114
115 CRenderBuffer* buf = m_renderBuffers[m_iBufferIndex];
116
117 CPoint srcPoints[4];
118 sourceRect.GetQuad(srcPoints);
119
120 m_colorShader->SetParams(m_videoSettings.m_Contrast, m_videoSettings.m_Brightness,
121 DX::Windowing()->UseLimitedColor());
122 m_colorShader->SetColParams(buf->color_space, buf->bits, !buf->full_range, buf->texBits);
123 m_colorShader->Render(sourceRect, srcPoints, buf, target);
124
125 if (!HasHQScaler())
126 ReorderDrawPoints(CRect(destPoints[0], destPoints[2]), destPoints);
127 }
128
CheckVideoParameters()129 void CRendererShaders::CheckVideoParameters()
130 {
131 __super::CheckVideoParameters();
132
133 CRenderBuffer* buf = m_renderBuffers[m_iBufferIndex];
134 const AVColorPrimaries srcPrim = GetSrcPrimaries(buf->primaries, buf->GetWidth(), buf->GetHeight());
135 if (srcPrim != m_srcPrimaries)
136 {
137 // source params is changed, reset shader
138 m_srcPrimaries = srcPrim;
139 m_colorShader.reset();
140 }
141 }
142
UpdateVideoFilters()143 void CRendererShaders::UpdateVideoFilters()
144 {
145 __super::UpdateVideoFilters();
146
147 if (!m_colorShader)
148 {
149 m_colorShader = std::make_unique<CYUV2RGBShader>();
150
151 AVColorPrimaries dstPrimaries = AVCOL_PRI_BT709;
152
153 if (DX::Windowing()->IsHDROutput() &&
154 (m_srcPrimaries == AVCOL_PRI_BT709 || m_srcPrimaries == AVCOL_PRI_BT2020))
155 dstPrimaries = m_srcPrimaries;
156
157 if (!m_colorShader->Create(m_format, dstPrimaries, m_srcPrimaries))
158 {
159 // we are in a big trouble
160 CLog::LogF(LOGERROR, "unable to create YUV->RGB shader, rendering is not possible");
161 m_colorShader.reset();
162 }
163 }
164 }
165
IsHWPicSupported(const VideoPicture & picture)166 bool CRendererShaders::IsHWPicSupported(const VideoPicture& picture)
167 {
168 // checking support of decoder texture in shaders
169 const DXGI_FORMAT dxgi_format = GetDXGIFormat(picture);
170 if (dxgi_format != DXGI_FORMAT_UNKNOWN)
171 {
172 CD3D11_TEXTURE2D_DESC texDesc(
173 dxgi_format,
174 FFALIGN(picture.iWidth, 32),
175 FFALIGN(picture.iHeight, 32),
176 1, 1,
177 D3D11_BIND_DECODER | D3D11_BIND_SHADER_RESOURCE,
178 D3D11_USAGE_DEFAULT
179 );
180
181 ComPtr<ID3D11Device> pDevice = DX::DeviceResources::Get()->GetD3DDevice();
182 return SUCCEEDED(pDevice->CreateTexture2D(&texDesc, nullptr, nullptr));
183 }
184 return false;
185 }
186
GetSrcPrimaries(AVColorPrimaries srcPrimaries,unsigned width,unsigned height)187 AVColorPrimaries CRendererShaders::GetSrcPrimaries(AVColorPrimaries srcPrimaries, unsigned width, unsigned height)
188 {
189 AVColorPrimaries ret = srcPrimaries;
190 if (ret == AVCOL_PRI_UNSPECIFIED)
191 {
192 if (width > 1024 || height >= 600)
193 ret = AVCOL_PRI_BT709;
194 else
195 ret = AVCOL_PRI_BT470BG;
196 }
197 return ret;
198 }
199
CreateBuffer()200 CRenderBuffer* CRendererShaders::CreateBuffer()
201 {
202 return new CRenderBufferImpl(m_format, m_sourceWidth, m_sourceHeight);
203 }
204
CRenderBufferImpl(AVPixelFormat av_pix_format,unsigned width,unsigned height)205 CRendererShaders::CRenderBufferImpl::CRenderBufferImpl(AVPixelFormat av_pix_format, unsigned width, unsigned height)
206 : CRenderBuffer(av_pix_format, width, height)
207 {
208 DXGI_FORMAT view_formats[YuvImage::MAX_PLANES] = {};
209
210 switch (av_format)
211 {
212 case AV_PIX_FMT_D3D11VA_VLD:
213 m_viewCount = 2;
214 break;
215 case AV_PIX_FMT_NV12:
216 {
217 view_formats[0] = DXGI_FORMAT_R8_UNORM;
218 view_formats[1] = DXGI_FORMAT_R8G8_UNORM;
219 // FL 9.x doesn't support DXGI_FORMAT_R8G8_UNORM, so we have to use SNORM and correct values in shader
220 if (!DX::Windowing()->IsFormatSupport(view_formats[1], D3D11_FORMAT_SUPPORT_TEXTURE2D))
221 view_formats[1] = DXGI_FORMAT_R8G8_SNORM;
222 m_viewCount = 2;
223 break;
224 }
225 case AV_PIX_FMT_P010:
226 case AV_PIX_FMT_P016:
227 {
228 view_formats[0] = DXGI_FORMAT_R16_UNORM;
229 view_formats[1] = DXGI_FORMAT_R16G16_UNORM;
230 m_viewCount = 2;
231 break;
232 }
233 case AV_PIX_FMT_YUV420P:
234 {
235 view_formats[0] = view_formats[1] = view_formats[2] = DXGI_FORMAT_R8_UNORM;
236 m_viewCount = 3;
237 break;
238 }
239 case AV_PIX_FMT_YUV420P10:
240 case AV_PIX_FMT_YUV420P16:
241 {
242 view_formats[0] = view_formats[1] = view_formats[2] = DXGI_FORMAT_R16_UNORM;
243 m_viewCount = 3;
244 texBits = av_format == AV_PIX_FMT_YUV420P10 ? 10 : 16;
245 break;
246 }
247 default:
248 // unsupported format
249 return;
250 }
251
252 if (av_format != AV_PIX_FMT_D3D11VA_VLD)
253 {
254 for (size_t i = 0; i < m_viewCount; i++)
255 {
256 const auto w = i ? m_width >> 1 : m_width;
257 const auto h = i ? m_height >> 1 : m_height;
258
259 if (!m_textures[i].Create(w, h, 1, D3D11_USAGE_DYNAMIC, view_formats[i]))
260 break;
261
262 // clear plane
263 D3D11_MAPPED_SUBRESOURCE mapping = {};
264 if (m_textures[i].LockRect(0, &mapping, D3D11_MAP_WRITE_DISCARD))
265 {
266 if (view_formats[i] == DXGI_FORMAT_R8_UNORM ||
267 view_formats[i] == DXGI_FORMAT_R8G8_UNORM ||
268 view_formats[i] == DXGI_FORMAT_R8G8_SNORM)
269 memset(mapping.pData, i ? 0x80 : 0, mapping.RowPitch * h);
270 else
271 wmemset(static_cast<wchar_t*>(mapping.pData), i ? 0x8000 : 0, mapping.RowPitch * h >> 1);
272
273 if (m_textures[i].UnlockRect(0)) {}
274 }
275 }
276 }
277 }
278
~CRenderBufferImpl()279 CRendererShaders::CRenderBufferImpl::~CRenderBufferImpl()
280 {
281 CRenderBufferImpl::ReleasePicture();
282 }
283
AppendPicture(const VideoPicture & picture)284 void CRendererShaders::CRenderBufferImpl::AppendPicture(const VideoPicture& picture)
285 {
286 __super::AppendPicture(picture);
287
288 if (videoBuffer->GetFormat() == AV_PIX_FMT_D3D11VA_VLD)
289 {
290 if (AV_PIX_FMT_D3D11VA_VLD != av_format)
291 QueueCopyFromGPU();
292
293 const auto hw = dynamic_cast<DXVA::CVideoBuffer*>(videoBuffer);
294 m_widthTex = hw->width;
295 m_heightTex = hw->height;
296 }
297 }
298
UploadBuffer()299 bool CRendererShaders::CRenderBufferImpl::UploadBuffer()
300 {
301 if (!videoBuffer)
302 return false;
303
304 if (videoBuffer->GetFormat() == AV_PIX_FMT_D3D11VA_VLD)
305 {
306 if (AV_PIX_FMT_D3D11VA_VLD == av_format)
307 m_bLoaded = true;
308 else
309 m_bLoaded = UploadFromGPU();
310 }
311 else
312 m_bLoaded = UploadFromBuffer();
313
314 return m_bLoaded;
315 }
316
GetViewCount() const317 unsigned CRendererShaders::CRenderBufferImpl::GetViewCount() const
318 {
319 return m_viewCount;
320 }
321
GetView(unsigned viewIdx)322 ID3D11View* CRendererShaders::CRenderBufferImpl::GetView(unsigned viewIdx)
323 {
324 if (videoBuffer->GetFormat() == AV_PIX_FMT_D3D11VA_VLD &&
325 AV_PIX_FMT_D3D11VA_VLD == av_format)
326 {
327 if (m_planes[viewIdx])
328 return m_planes[viewIdx].Get();
329
330 unsigned arrayIdx;
331 ComPtr<ID3D11Resource> pResource;
332 if (FAILED(GetResource(&pResource, &arrayIdx)))
333 {
334 CLog::LogF(LOGERROR, "unable to open d3d11va resource.");
335 return nullptr;
336 }
337
338 const auto dxva_format = CRendererBase::GetDXGIFormat(videoBuffer);
339 // impossible but we check
340 if (dxva_format < DXGI_FORMAT_NV12 || dxva_format > DXGI_FORMAT_P016)
341 return nullptr;
342
343 CD3D11_SHADER_RESOURCE_VIEW_DESC srvDesc(
344 D3D11_SRV_DIMENSION_TEXTURE2DARRAY,
345 plane_formats[dxva_format - DXGI_FORMAT_NV12][viewIdx],
346 0, 1, arrayIdx, 1
347 );
348
349 ComPtr<ID3D11Device> pD3DDevice = DX::DeviceResources::Get()->GetD3DDevice();
350 if (FAILED(pD3DDevice->CreateShaderResourceView(pResource.Get(), &srvDesc, &m_planes[viewIdx])))
351 {
352 CLog::LogF(LOGERROR, "unable to create shader target for decoder texture.");
353 return nullptr;
354 }
355
356 return m_planes[viewIdx].Get();
357 }
358
359 return m_textures[viewIdx].GetShaderResource();
360 }
361
ReleasePicture()362 void CRendererShaders::CRenderBufferImpl::ReleasePicture()
363 {
364 __super::ReleasePicture();
365
366 m_planes[0] = nullptr;
367 m_planes[1] = nullptr;
368 }
369
UploadFromGPU()370 bool CRendererShaders::CRenderBufferImpl::UploadFromGPU()
371 {
372 ComPtr<ID3D11DeviceContext> pContext = DX::DeviceResources::Get()->GetImmediateContext();
373 D3D11_MAPPED_SUBRESOURCE mapGPU;
374 D3D11_MAPPED_SUBRESOURCE mappings[2];
375
376 if (FAILED(pContext->Map(m_staging.Get(), 0, D3D11_MAP_READ, 0, &mapGPU)))
377 return false;
378
379 if (!m_textures[PLANE_Y].LockRect(0, &mappings[PLANE_Y], D3D11_MAP_WRITE_DISCARD) ||
380 !m_textures[PLANE_UV].LockRect(0, &mappings[PLANE_UV], D3D11_MAP_WRITE_DISCARD))
381 {
382 pContext->Unmap(m_staging.Get(), 0);
383 return false;
384 }
385
386 void* (*copy_func)(void* d, const void* s, size_t size) =
387 #if defined(HAVE_SSE2)
388 ((CServiceBroker::GetCPUInfo()->GetCPUFeatures() & CPU_FEATURE_SSE4) != 0) ? gpu_memcpy :
389 #endif
390 memcpy;
391
392 auto* s_y = static_cast<uint8_t*>(mapGPU.pData);
393 auto* s_uv = static_cast<uint8_t*>(mapGPU.pData) + m_sDesc.Height * mapGPU.RowPitch;
394 auto* d_y = static_cast<uint8_t*>(mappings[PLANE_Y].pData);
395 auto* d_uv = static_cast<uint8_t*>(mappings[PLANE_UV].pData);
396
397 if (mappings[PLANE_Y].RowPitch == mapGPU.RowPitch
398 && mappings[PLANE_UV].RowPitch == mapGPU.RowPitch)
399 {
400 Concurrency::parallel_invoke([&]() {
401 // copy Y
402 copy_func(d_y, s_y, mapGPU.RowPitch * m_height);
403 }, [&]() {
404 // copy UV
405 copy_func(d_uv, s_uv, mapGPU.RowPitch * m_height >> 1);
406 });
407 }
408 else
409 {
410 Concurrency::parallel_invoke([&]() {
411 // copy Y
412 for (unsigned y = 0; y < m_height; ++y)
413 {
414 copy_func(d_y, s_y, mappings[PLANE_Y].RowPitch);
415 s_y += mapGPU.RowPitch;
416 d_y += mappings[PLANE_Y].RowPitch;
417 }
418 }, [&]() {
419 // copy UV
420 for (unsigned y = 0; y < m_height >> 1; ++y)
421 {
422 copy_func(d_uv, s_uv, mappings[PLANE_UV].RowPitch);
423 s_uv += mapGPU.RowPitch;
424 d_uv += mappings[PLANE_UV].RowPitch;
425 }
426 });
427 }
428 pContext->Unmap(m_staging.Get(), 0);
429
430 return m_textures[PLANE_Y].UnlockRect(0) &&
431 m_textures[PLANE_UV].UnlockRect(0);
432 }
433
UploadFromBuffer() const434 bool CRendererShaders::CRenderBufferImpl::UploadFromBuffer() const
435 {
436 uint8_t* bufData[3];
437 int srcLines[3];
438 videoBuffer->GetPlanes(bufData);
439 videoBuffer->GetStrides(srcLines);
440 std::vector<Concurrency::task<void>> tasks;
441
442 for (unsigned plane = 0; plane < m_viewCount; ++plane)
443 {
444 D3D11_MAPPED_SUBRESOURCE mapping = {};
445 if (!m_textures[plane].LockRect(0, &mapping, D3D11_MAP_WRITE_DISCARD))
446 break;
447
448 auto* dst = static_cast<uint8_t*>(mapping.pData);
449 auto* src = bufData[plane];
450 int srcLine = srcLines[plane];
451 int dstLine = mapping.RowPitch;
452 int height = plane ? m_height >> 1 : m_height;
453
454 auto task = Concurrency::create_task([src, dst, srcLine, dstLine, height]()
455 {
456 if (srcLine == dstLine)
457 {
458 memcpy(dst, src, srcLine * height);
459 }
460 else
461 {
462 uint8_t* s = src;
463 uint8_t* d = dst;
464 for (int i = 0; i < height; ++i)
465 {
466 memcpy(d, s, std::min(srcLine, dstLine));
467 d += dstLine;
468 s += srcLine;
469 }
470 }
471 });
472 tasks.push_back(task);
473 }
474
475 // event based await is required on WinRT because
476 // blocking WinRT STA threads with task.wait() isn't allowed
477 auto sync = std::make_shared<Concurrency::event>();
478 when_all(tasks.begin(), tasks.end()).then([&sync]() {
479 sync->set();
480 });
481 sync->wait();
482
483 for (unsigned plane = 0; plane < m_viewCount; ++plane)
484 if (!m_textures[plane].UnlockRect(0)) {}
485
486 return true;
487 }
488