1 // Copyright (c) 2012- PPSSPP Project.
2
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License 2.0 for more details.
11
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18 #include <algorithm>
19
20 #include "Common/Log.h"
21 #include "Common/MemoryUtil.h"
22 #include "Common/TimeUtil.h"
23 #include "Core/MemMap.h"
24 #include "Core/System.h"
25 #include "Core/Reporting.h"
26 #include "Core/Config.h"
27 #include "Core/CoreTiming.h"
28
29 #include "GPU/Math3D.h"
30 #include "GPU/GPUState.h"
31 #include "GPU/ge_constants.h"
32
33 #include "GPU/Common/TextureDecoder.h"
34 #include "GPU/Common/SplineCommon.h"
35 #include "GPU/Common/TransformCommon.h"
36 #include "GPU/Common/VertexDecoderCommon.h"
37 #include "GPU/Common/SoftwareTransformCommon.h"
38 #include "GPU/Debugger/Debugger.h"
39 #include "GPU/D3D11/FramebufferManagerD3D11.h"
40 #include "GPU/D3D11/TextureCacheD3D11.h"
41 #include "GPU/D3D11/DrawEngineD3D11.h"
42 #include "GPU/D3D11/ShaderManagerD3D11.h"
43 #include "GPU/D3D11/GPU_D3D11.h"
44
45 const D3D11_PRIMITIVE_TOPOLOGY d3d11prim[8] = {
46 D3D11_PRIMITIVE_TOPOLOGY_POINTLIST,
47 D3D11_PRIMITIVE_TOPOLOGY_LINELIST,
48 D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP,
49 D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
50 D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP,
51 D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, // Fans not supported
52 D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST, // Need expansion - though we could do it with geom shaders in most cases
53 };
54
55 #define VERTEXCACHE_DECIMATION_INTERVAL 17
56
57 enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 };
58 enum {
59 VERTEX_PUSH_SIZE = 1024 * 1024 * 16,
60 INDEX_PUSH_SIZE = 1024 * 1024 * 4,
61 };
62
63 static const D3D11_INPUT_ELEMENT_DESC TransformedVertexElements[] = {
64 { "POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
65 { "TEXCOORD", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0 },
66 { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0 },
67 { "COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 32, D3D11_INPUT_PER_VERTEX_DATA, 0 },
68 };
69
DrawEngineD3D11(Draw::DrawContext * draw,ID3D11Device * device,ID3D11DeviceContext * context)70 DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device, ID3D11DeviceContext *context)
71 : draw_(draw),
72 device_(device),
73 context_(context),
74 vai_(256),
75 inputLayoutMap_(32),
76 blendCache_(32),
77 blendCache1_(32),
78 depthStencilCache_(64),
79 rasterCache_(4) {
80 device1_ = (ID3D11Device1 *)draw->GetNativeObject(Draw::NativeObject::DEVICE_EX);
81 context1_ = (ID3D11DeviceContext1 *)draw->GetNativeObject(Draw::NativeObject::CONTEXT_EX);
82 decOptions_.expandAllWeightsToFloat = true;
83 decOptions_.expand8BitNormalsToFloat = true;
84
85 decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
86 // Allocate nicely aligned memory. Maybe graphics drivers will
87 // appreciate it.
88 // All this is a LOT of memory, need to see if we can cut down somehow.
89 decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
90 decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
91
92 indexGen.Setup(decIndex);
93
94 InitDeviceObjects();
95
96 // Vertex pushing buffers. For uniforms we use short DISCARD buffers, but we could use
97 // this kind of buffer there as well with D3D11.1. We might be able to use the same buffer
98 // for both vertices and indices, and possibly all three data types.
99 }
100
~DrawEngineD3D11()101 DrawEngineD3D11::~DrawEngineD3D11() {
102 DestroyDeviceObjects();
103 FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
104 FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
105 }
106
InitDeviceObjects()107 void DrawEngineD3D11::InitDeviceObjects() {
108 pushVerts_ = new PushBufferD3D11(device_, VERTEX_PUSH_SIZE, D3D11_BIND_VERTEX_BUFFER);
109 pushInds_ = new PushBufferD3D11(device_, INDEX_PUSH_SIZE, D3D11_BIND_INDEX_BUFFER);
110
111 tessDataTransferD3D11 = new TessellationDataTransferD3D11(context_, device_);
112 tessDataTransfer = tessDataTransferD3D11;
113 }
114
ClearTrackedVertexArrays()115 void DrawEngineD3D11::ClearTrackedVertexArrays() {
116 vai_.Iterate([&](uint32_t hash, VertexArrayInfoD3D11 *vai){
117 delete vai;
118 });
119 vai_.Clear();
120 }
121
ClearInputLayoutMap()122 void DrawEngineD3D11::ClearInputLayoutMap() {
123 inputLayoutMap_.Iterate([&](const InputLayoutKey &key, ID3D11InputLayout *il) {
124 if (il)
125 il->Release();
126 });
127 inputLayoutMap_.Clear();
128 }
129
Resized()130 void DrawEngineD3D11::Resized() {
131 DrawEngineCommon::Resized();
132 ClearInputLayoutMap();
133 }
134
DestroyDeviceObjects()135 void DrawEngineD3D11::DestroyDeviceObjects() {
136 ClearTrackedVertexArrays();
137 ClearInputLayoutMap();
138 delete tessDataTransferD3D11;
139 tessDataTransferD3D11 = nullptr;
140 tessDataTransfer = nullptr;
141 delete pushVerts_;
142 delete pushInds_;
143 depthStencilCache_.Iterate([&](const uint64_t &key, ID3D11DepthStencilState *ds) {
144 ds->Release();
145 });
146 depthStencilCache_.Clear();
147 blendCache_.Iterate([&](const uint64_t &key, ID3D11BlendState *bs) {
148 bs->Release();
149 });
150 blendCache_.Clear();
151 blendCache1_.Iterate([&](const uint64_t &key, ID3D11BlendState1 *bs) {
152 bs->Release();
153 });
154 blendCache1_.Clear();
155 rasterCache_.Iterate([&](const uint32_t &key, ID3D11RasterizerState *rs) {
156 rs->Release();
157 });
158 rasterCache_.Clear();
159 }
160
161 struct DeclTypeInfo {
162 DXGI_FORMAT type;
163 const char * name;
164 };
165
166 static const DeclTypeInfo VComp[] = {
167 { DXGI_FORMAT_UNKNOWN, "NULL" }, // DEC_NONE,
168 { DXGI_FORMAT_R32_FLOAT, "D3DDECLTYPE_FLOAT1 " }, // DEC_FLOAT_1,
169 { DXGI_FORMAT_R32G32_FLOAT, "D3DDECLTYPE_FLOAT2 " }, // DEC_FLOAT_2,
170 { DXGI_FORMAT_R32G32B32_FLOAT, "D3DDECLTYPE_FLOAT3 " }, // DEC_FLOAT_3,
171 { DXGI_FORMAT_R32G32B32A32_FLOAT, "D3DDECLTYPE_FLOAT4 " }, // DEC_FLOAT_4,
172
173 { DXGI_FORMAT_R8G8B8A8_SNORM, "UNUSED" }, // DEC_S8_3,
174
175 { DXGI_FORMAT_R16G16B16A16_SNORM, "D3DDECLTYPE_SHORT4N " }, // DEC_S16_3,
176 { DXGI_FORMAT_R8G8B8A8_UNORM, "D3DDECLTYPE_UBYTE4N " }, // DEC_U8_1,
177 { DXGI_FORMAT_R8G8B8A8_UNORM, "D3DDECLTYPE_UBYTE4N " }, // DEC_U8_2,
178 { DXGI_FORMAT_R8G8B8A8_UNORM, "D3DDECLTYPE_UBYTE4N " }, // DEC_U8_3,
179 { DXGI_FORMAT_R8G8B8A8_UNORM, "D3DDECLTYPE_UBYTE4N " }, // DEC_U8_4,
180
181 { DXGI_FORMAT_UNKNOWN, "UNUSED_DEC_U16_1" }, // DEC_U16_1,
182 { DXGI_FORMAT_UNKNOWN, "UNUSED_DEC_U16_2" }, // DEC_U16_2,
183 { DXGI_FORMAT_R16G16B16A16_UNORM ,"D3DDECLTYPE_USHORT4N "}, // DEC_U16_3,
184 { DXGI_FORMAT_R16G16B16A16_UNORM ,"D3DDECLTYPE_USHORT4N "}, // DEC_U16_4,
185 };
186
VertexAttribSetup(D3D11_INPUT_ELEMENT_DESC * VertexElement,u8 fmt,u8 offset,const char * semantic,u8 semantic_index=0)187 static void VertexAttribSetup(D3D11_INPUT_ELEMENT_DESC * VertexElement, u8 fmt, u8 offset, const char *semantic, u8 semantic_index = 0) {
188 memset(VertexElement, 0, sizeof(D3D11_INPUT_ELEMENT_DESC));
189 VertexElement->AlignedByteOffset = offset;
190 VertexElement->Format = VComp[fmt].type;
191 VertexElement->SemanticName = semantic;
192 VertexElement->SemanticIndex = semantic_index;
193 }
194
SetupDecFmtForDraw(D3D11VertexShader * vshader,const DecVtxFormat & decFmt,u32 pspFmt)195 ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt) {
196 // TODO: Instead of one for each vshader, we can reduce it to one for each type of shader
197 // that reads TEXCOORD or not, etc. Not sure if worth it.
198 InputLayoutKey key{ vshader, decFmt.id };
199 ID3D11InputLayout *inputLayout = inputLayoutMap_.Get(key);
200 if (inputLayout) {
201 return inputLayout;
202 } else {
203 D3D11_INPUT_ELEMENT_DESC VertexElements[8];
204 D3D11_INPUT_ELEMENT_DESC *VertexElement = &VertexElements[0];
205
206 // Vertices Elements orders
207 // WEIGHT
208 if (decFmt.w0fmt != 0) {
209 VertexAttribSetup(VertexElement, decFmt.w0fmt, decFmt.w0off, "TEXCOORD", 1);
210 VertexElement++;
211 }
212
213 if (decFmt.w1fmt != 0) {
214 VertexAttribSetup(VertexElement, decFmt.w1fmt, decFmt.w1off, "TEXCOORD", 2);
215 VertexElement++;
216 }
217
218 // TC
219 if (decFmt.uvfmt != 0) {
220 VertexAttribSetup(VertexElement, decFmt.uvfmt, decFmt.uvoff, "TEXCOORD", 0);
221 VertexElement++;
222 }
223
224 // COLOR
225 if (decFmt.c0fmt != 0) {
226 VertexAttribSetup(VertexElement, decFmt.c0fmt, decFmt.c0off, "COLOR", 0);
227 VertexElement++;
228 }
229 // Never used ?
230 if (decFmt.c1fmt != 0) {
231 VertexAttribSetup(VertexElement, decFmt.c1fmt, decFmt.c1off, "COLOR", 1);
232 VertexElement++;
233 }
234
235 // NORMAL
236 if (decFmt.nrmfmt != 0) {
237 VertexAttribSetup(VertexElement, decFmt.nrmfmt, decFmt.nrmoff, "NORMAL", 0);
238 VertexElement++;
239 }
240
241 // POSITION
242 // Always
243 VertexAttribSetup(VertexElement, decFmt.posfmt, decFmt.posoff, "POSITION", 0);
244 VertexElement++;
245
246 // Create declaration
247 HRESULT hr = device_->CreateInputLayout(VertexElements, VertexElement - VertexElements, vshader->bytecode().data(), vshader->bytecode().size(), &inputLayout);
248 if (FAILED(hr)) {
249 ERROR_LOG(G3D, "Failed to create input layout!");
250 inputLayout = nullptr;
251 }
252
253 // Add it to map
254 inputLayoutMap_.Insert(key, inputLayout);
255 return inputLayout;
256 }
257 }
258
MarkUnreliable(VertexArrayInfoD3D11 * vai)259 void DrawEngineD3D11::MarkUnreliable(VertexArrayInfoD3D11 *vai) {
260 vai->status = VertexArrayInfoD3D11::VAI_UNRELIABLE;
261 if (vai->vbo) {
262 vai->vbo->Release();
263 vai->vbo = nullptr;
264 }
265 if (vai->ebo) {
266 vai->ebo->Release();
267 vai->ebo = nullptr;
268 }
269 }
270
BeginFrame()271 void DrawEngineD3D11::BeginFrame() {
272 pushVerts_->Reset();
273 pushInds_->Reset();
274
275 if (--decimationCounter_ <= 0) {
276 decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
277 } else {
278 return;
279 }
280
281 const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
282 const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
283 int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
284 vai_.Iterate([&](uint32_t hash, VertexArrayInfoD3D11 *vai){
285 bool kill;
286 if (vai->status == VertexArrayInfoD3D11::VAI_UNRELIABLE) {
287 // We limit killing unreliable so we don't rehash too often.
288 kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
289 } else {
290 kill = vai->lastFrame < threshold;
291 }
292 if (kill) {
293 delete vai;
294 vai_.Remove(hash);
295 }
296 });
297 vai_.Maintain();
298
299 // Enable if you want to see vertex decoders in the log output. Need a better way.
300 #if 0
301 char buffer[16384];
302 for (std::map<u32, VertexDecoder*>::iterator dec = decoderMap_.begin(); dec != decoderMap_.end(); ++dec) {
303 char *ptr = buffer;
304 ptr += dec->second->ToString(ptr);
305 // *ptr++ = '\n';
306 NOTICE_LOG(G3D, buffer);
307 }
308 #endif
309
310 lastRenderStepId_ = -1;
311 }
312
~VertexArrayInfoD3D11()313 VertexArrayInfoD3D11::~VertexArrayInfoD3D11() {
314 if (vbo)
315 vbo->Release();
316 if (ebo)
317 ebo->Release();
318 }
319
320 // The inline wrapper in the header checks for numDrawCalls == 0
DoFlush()321 void DrawEngineD3D11::DoFlush() {
322 gpuStats.numFlushes++;
323 gpuStats.numTrackedVertexArrays = (int)vai_.size();
324
325 // In D3D, we're synchronous and state carries over so all we reset here on a new step is the viewport/scissor.
326 int curRenderStepId = draw_->GetCurrentStepId();
327 if (lastRenderStepId_ != curRenderStepId) {
328 // Dirty everything that has dynamic state that will need re-recording.
329 gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
330 lastRenderStepId_ = curRenderStepId;
331 }
332
333 // This is not done on every drawcall, we collect vertex data
334 // until critical state changes. That's when we draw (flush).
335
336 GEPrimitiveType prim = prevPrim_;
337 ApplyDrawState(prim);
338
339 // Always use software for flat shading to fix the provoking index.
340 bool tess = gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE;
341 bool useHWTransform = CanUseHardwareTransform(prim) && (tess || gstate.getShadeMode() != GE_SHADE_FLAT);
342
343 if (useHWTransform) {
344 ID3D11Buffer *vb_ = nullptr;
345 ID3D11Buffer *ib_ = nullptr;
346
347 int vertexCount = 0;
348 int maxIndex = 0;
349 bool useElements = true;
350
351 // Cannot cache vertex data with morph enabled.
352 bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
353 // Also avoid caching when software skinning.
354 if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK))
355 useCache = false;
356
357 if (useCache) {
358 u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
359
360 VertexArrayInfoD3D11 *vai = vai_.Get(id);
361 if (!vai) {
362 vai = new VertexArrayInfoD3D11();
363 vai_.Insert(id, vai);
364 }
365
366 switch (vai->status) {
367 case VertexArrayInfoD3D11::VAI_NEW:
368 {
369 // Haven't seen this one before.
370 uint64_t dataHash = ComputeHash();
371 vai->hash = dataHash;
372 vai->minihash = ComputeMiniHash();
373 vai->status = VertexArrayInfoD3D11::VAI_HASHING;
374 vai->drawsUntilNextFullHash = 0;
375 DecodeVerts(decoded); // writes to indexGen
376 vai->numVerts = indexGen.VertexCount();
377 vai->prim = indexGen.Prim();
378 vai->maxIndex = indexGen.MaxIndex();
379 vai->flags = gstate_c.vertexFullAlpha ? VAI11_FLAG_VERTEXFULLALPHA : 0;
380 goto rotateVBO;
381 }
382
383 // Hashing - still gaining confidence about the buffer.
384 // But if we get this far it's likely to be worth creating a vertex buffer.
385 case VertexArrayInfoD3D11::VAI_HASHING:
386 {
387 vai->numDraws++;
388 if (vai->lastFrame != gpuStats.numFlips) {
389 vai->numFrames++;
390 }
391 if (vai->drawsUntilNextFullHash == 0) {
392 // Let's try to skip a full hash if mini would fail.
393 const u32 newMiniHash = ComputeMiniHash();
394 uint64_t newHash = vai->hash;
395 if (newMiniHash == vai->minihash) {
396 newHash = ComputeHash();
397 }
398 if (newMiniHash != vai->minihash || newHash != vai->hash) {
399 MarkUnreliable(vai);
400 DecodeVerts(decoded);
401 goto rotateVBO;
402 }
403 if (vai->numVerts > 64) {
404 // exponential backoff up to 16 draws, then every 24
405 vai->drawsUntilNextFullHash = std::min(24, vai->numFrames);
406 } else {
407 // Lower numbers seem much more likely to change.
408 vai->drawsUntilNextFullHash = 0;
409 }
410 // TODO: tweak
411 //if (vai->numFrames > 1000) {
412 // vai->status = VertexArrayInfo::VAI_RELIABLE;
413 //}
414 } else {
415 vai->drawsUntilNextFullHash--;
416 u32 newMiniHash = ComputeMiniHash();
417 if (newMiniHash != vai->minihash) {
418 MarkUnreliable(vai);
419 DecodeVerts(decoded);
420 goto rotateVBO;
421 }
422 }
423
424 if (vai->vbo == 0) {
425 DecodeVerts(decoded);
426 vai->numVerts = indexGen.VertexCount();
427 vai->prim = indexGen.Prim();
428 vai->maxIndex = indexGen.MaxIndex();
429 vai->flags = gstate_c.vertexFullAlpha ? VAI11_FLAG_VERTEXFULLALPHA : 0;
430 useElements = !indexGen.SeenOnlyPurePrims() || prim == GE_PRIM_TRIANGLE_FAN;
431 if (!useElements && indexGen.PureCount()) {
432 vai->numVerts = indexGen.PureCount();
433 }
434
435 _dbg_assert_msg_(gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching.");
436
437 // TODO: Combine these two into one buffer?
438 u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex();
439 D3D11_BUFFER_DESC desc{ size, D3D11_USAGE_IMMUTABLE, D3D11_BIND_VERTEX_BUFFER, 0 };
440 D3D11_SUBRESOURCE_DATA data{ decoded };
441 ASSERT_SUCCESS(device_->CreateBuffer(&desc, &data, &vai->vbo));
442 if (useElements) {
443 u32 size = sizeof(short) * indexGen.VertexCount();
444 D3D11_BUFFER_DESC desc{ size, D3D11_USAGE_IMMUTABLE, D3D11_BIND_INDEX_BUFFER, 0 };
445 D3D11_SUBRESOURCE_DATA data{ decIndex };
446 ASSERT_SUCCESS(device_->CreateBuffer(&desc, &data, &vai->ebo));
447 } else {
448 vai->ebo = 0;
449 }
450 } else {
451 gpuStats.numCachedDrawCalls++;
452 useElements = vai->ebo ? true : false;
453 gpuStats.numCachedVertsDrawn += vai->numVerts;
454 gstate_c.vertexFullAlpha = vai->flags & VAI11_FLAG_VERTEXFULLALPHA;
455 }
456 vb_ = vai->vbo;
457 ib_ = vai->ebo;
458 vertexCount = vai->numVerts;
459 maxIndex = vai->maxIndex;
460 prim = static_cast<GEPrimitiveType>(vai->prim);
461 break;
462 }
463
464 // Reliable - we don't even bother hashing anymore. Right now we don't go here until after a very long time.
465 case VertexArrayInfoD3D11::VAI_RELIABLE:
466 {
467 vai->numDraws++;
468 if (vai->lastFrame != gpuStats.numFlips) {
469 vai->numFrames++;
470 }
471 gpuStats.numCachedDrawCalls++;
472 gpuStats.numCachedVertsDrawn += vai->numVerts;
473 vb_ = vai->vbo;
474 ib_ = vai->ebo;
475
476 vertexCount = vai->numVerts;
477
478 maxIndex = vai->maxIndex;
479 prim = static_cast<GEPrimitiveType>(vai->prim);
480
481 gstate_c.vertexFullAlpha = vai->flags & VAI11_FLAG_VERTEXFULLALPHA;
482 break;
483 }
484
485 case VertexArrayInfoD3D11::VAI_UNRELIABLE:
486 {
487 vai->numDraws++;
488 if (vai->lastFrame != gpuStats.numFlips) {
489 vai->numFrames++;
490 }
491 DecodeVerts(decoded);
492 goto rotateVBO;
493 }
494 }
495
496 vai->lastFrame = gpuStats.numFlips;
497 } else {
498 DecodeVerts(decoded);
499 rotateVBO:
500 gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
501 useElements = !indexGen.SeenOnlyPurePrims() || prim == GE_PRIM_TRIANGLE_FAN;
502 vertexCount = indexGen.VertexCount();
503 maxIndex = indexGen.MaxIndex();
504 if (!useElements && indexGen.PureCount()) {
505 vertexCount = indexGen.PureCount();
506 }
507 prim = indexGen.Prim();
508 }
509
510 VERBOSE_LOG(G3D, "Flush prim %i! %i verts in one go", prim, vertexCount);
511 bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
512 if (gstate.isModeThrough()) {
513 gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
514 } else {
515 gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
516 }
517
518 ApplyDrawStateLate(true, dynState_.stencilRef);
519
520 D3D11VertexShader *vshader;
521 D3D11FragmentShader *fshader;
522 shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, useHWTransform, useHWTessellation_, decOptions_.expandAllWeightsToFloat);
523 ID3D11InputLayout *inputLayout = SetupDecFmtForDraw(vshader, dec_->GetDecVtxFmt(), dec_->VertexType());
524 context_->PSSetShader(fshader->GetShader(), nullptr, 0);
525 context_->VSSetShader(vshader->GetShader(), nullptr, 0);
526 shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
527 shaderManager_->BindUniforms();
528
529 context_->IASetInputLayout(inputLayout);
530 UINT stride = dec_->GetDecVtxFmt().stride;
531 context_->IASetPrimitiveTopology(d3d11prim[prim]);
532 if (!vb_) {
533 // Push!
534 UINT vOffset;
535 int vSize = (maxIndex + 1) * dec_->GetDecVtxFmt().stride;
536 uint8_t *vptr = pushVerts_->BeginPush(context_, &vOffset, vSize);
537 memcpy(vptr, decoded, vSize);
538 pushVerts_->EndPush(context_);
539 ID3D11Buffer *buf = pushVerts_->Buf();
540 context_->IASetVertexBuffers(0, 1, &buf, &stride, &vOffset);
541 if (useElements) {
542 UINT iOffset;
543 int iSize = 2 * indexGen.VertexCount();
544 uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize);
545 memcpy(iptr, decIndex, iSize);
546 pushInds_->EndPush(context_);
547 context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
548 context_->DrawIndexed(vertexCount, 0, 0);
549 } else {
550 context_->Draw(vertexCount, 0);
551 }
552 } else {
553 UINT offset = 0;
554 context_->IASetVertexBuffers(0, 1, &vb_, &stride, &offset);
555 if (useElements) {
556 context_->IASetIndexBuffer(ib_, DXGI_FORMAT_R16_UINT, 0);
557 context_->DrawIndexed(vertexCount, 0, 0);
558 } else {
559 context_->Draw(vertexCount, 0);
560 }
561 }
562 } else {
563 DecodeVerts(decoded);
564 bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
565 if (gstate.isModeThrough()) {
566 gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
567 } else {
568 gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
569 }
570
571 gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
572 prim = indexGen.Prim();
573 // Undo the strip optimization, not supported by the SW code yet.
574 if (prim == GE_PRIM_TRIANGLE_STRIP)
575 prim = GE_PRIM_TRIANGLES;
576 VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
577
578 u16 *inds = decIndex;
579 SoftwareTransformResult result{};
580 SoftwareTransformParams params{};
581 params.decoded = decoded;
582 params.transformed = transformed;
583 params.transformedExpanded = transformedExpanded;
584 params.fbman = framebufferManager_;
585 params.texCache = textureCache_;
586 params.allowClear = true;
587 params.allowSeparateAlphaClear = false; // D3D11 doesn't support separate alpha clears
588 params.provokeFlatFirst = true;
589
590 int maxIndex = indexGen.MaxIndex();
591 SoftwareTransform swTransform(params);
592 swTransform.Decode(prim, dec_->VertexType(), dec_->GetDecVtxFmt(), maxIndex, &result);
593 if (result.action == SW_NOT_READY) {
594 swTransform.DetectOffsetTexture(maxIndex);
595 swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result);
596 }
597
598 if (result.setSafeSize)
599 framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
600
601 if (result.action == SW_DRAW_PRIMITIVES) {
602 ApplyDrawStateLate(result.setStencil, result.stencilValue);
603
604 D3D11VertexShader *vshader;
605 D3D11FragmentShader *fshader;
606 shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, false, false, decOptions_.expandAllWeightsToFloat);
607 context_->PSSetShader(fshader->GetShader(), nullptr, 0);
608 context_->VSSetShader(vshader->GetShader(), nullptr, 0);
609 shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
610 shaderManager_->BindUniforms();
611
612 // We really do need a vertex layout for each vertex shader (or at least check its ID bits for what inputs it uses)!
613 // Some vertex shaders ignore one of the inputs, and then the layout created from it will lack it, which will be a problem for others.
614 InputLayoutKey key{ vshader, 0xFFFFFFFF }; // Let's use 0xFFFFFFFF to signify TransformedVertex
615 ID3D11InputLayout *layout = inputLayoutMap_.Get(key);
616 if (!layout) {
617 ASSERT_SUCCESS(device_->CreateInputLayout(TransformedVertexElements, ARRAY_SIZE(TransformedVertexElements), vshader->bytecode().data(), vshader->bytecode().size(), &layout));
618 inputLayoutMap_.Insert(key, layout);
619 }
620 context_->IASetInputLayout(layout);
621 context_->IASetPrimitiveTopology(d3d11prim[prim]);
622
623 UINT stride = sizeof(TransformedVertex);
624 UINT vOffset = 0;
625 int vSize = maxIndex * stride;
626 uint8_t *vptr = pushVerts_->BeginPush(context_, &vOffset, vSize);
627 memcpy(vptr, result.drawBuffer, vSize);
628 pushVerts_->EndPush(context_);
629 ID3D11Buffer *buf = pushVerts_->Buf();
630 context_->IASetVertexBuffers(0, 1, &buf, &stride, &vOffset);
631 if (result.drawIndexed) {
632 UINT iOffset;
633 int iSize = sizeof(uint16_t) * result.drawNumTrans;
634 uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize);
635 memcpy(iptr, inds, iSize);
636 pushInds_->EndPush(context_);
637 context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
638 context_->DrawIndexed(result.drawNumTrans, 0, 0);
639 } else {
640 context_->Draw(result.drawNumTrans, 0);
641 }
642 } else if (result.action == SW_CLEAR) {
643 u32 clearColor = result.color;
644 float clearDepth = result.depth;
645
646 uint32_t clearFlag = 0;
647
648 if (gstate.isClearModeColorMask()) clearFlag |= Draw::FBChannel::FB_COLOR_BIT;
649 if (gstate.isClearModeAlphaMask()) clearFlag |= Draw::FBChannel::FB_STENCIL_BIT;
650 if (gstate.isClearModeDepthMask()) clearFlag |= Draw::FBChannel::FB_DEPTH_BIT;
651
652 if (clearFlag & Draw::FBChannel::FB_DEPTH_BIT) {
653 framebufferManager_->SetDepthUpdated();
654 }
655 if (clearFlag & Draw::FBChannel::FB_COLOR_BIT) {
656 framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
657 }
658
659 uint8_t clearStencil = clearColor >> 24;
660 draw_->Clear(clearFlag, clearColor, clearDepth, clearStencil);
661
662 if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) {
663 int scissorX1 = gstate.getScissorX1();
664 int scissorY1 = gstate.getScissorY1();
665 int scissorX2 = gstate.getScissorX2() + 1;
666 int scissorY2 = gstate.getScissorY2() + 1;
667 framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor);
668 }
669 }
670 }
671
672 gpuStats.numDrawCalls += numDrawCalls;
673 gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
674
675 indexGen.Reset();
676 decodedVerts_ = 0;
677 numDrawCalls = 0;
678 vertexCountInDrawCalls_ = 0;
679 decodeCounter_ = 0;
680 dcid_ = 0;
681 prevPrim_ = GE_PRIM_INVALID;
682 gstate_c.vertexFullAlpha = true;
683 framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
684
685 // Now seems as good a time as any to reset the min/max coords, which we may examine later.
686 gstate_c.vertBounds.minU = 512;
687 gstate_c.vertBounds.minV = 512;
688 gstate_c.vertBounds.maxU = 0;
689 gstate_c.vertBounds.maxV = 0;
690
691 GPUDebug::NotifyDraw();
692 }
693
TessellationDataTransferD3D11(ID3D11DeviceContext * context,ID3D11Device * device)694 TessellationDataTransferD3D11::TessellationDataTransferD3D11(ID3D11DeviceContext *context, ID3D11Device *device)
695 : context_(context), device_(device) {
696 desc.Usage = D3D11_USAGE_DYNAMIC;
697 desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
698 desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
699 desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
700 }
701
~TessellationDataTransferD3D11()702 TessellationDataTransferD3D11::~TessellationDataTransferD3D11() {
703 for (int i = 0; i < 3; ++i) {
704 if (buf[i]) buf[i]->Release();
705 if (view[i]) view[i]->Release();
706 }
707 }
708
SendDataToShader(const SimpleVertex * const * points,int size_u,int size_v,u32 vertType,const Spline::Weight2D & weights)709 void TessellationDataTransferD3D11::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
710 struct TessData {
711 float pos[3]; float pad1;
712 float uv[2]; float pad2[2];
713 float color[4];
714 };
715
716 int size = size_u * size_v;
717
718 if (prevSize < size) {
719 prevSize = size;
720 if (buf[0]) buf[0]->Release();
721 if (view[0]) view[0]->Release();
722
723 desc.ByteWidth = size * sizeof(TessData);
724 desc.StructureByteStride = sizeof(TessData);
725 device_->CreateBuffer(&desc, nullptr, &buf[0]);
726 device_->CreateShaderResourceView(buf[0], nullptr, &view[0]);
727 context_->VSSetShaderResources(0, 1, &view[0]);
728 }
729 D3D11_MAPPED_SUBRESOURCE map;
730 context_->Map(buf[0], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
731 uint8_t *data = (uint8_t *)map.pData;
732
733 float *pos = (float *)(data);
734 float *tex = (float *)(data + offsetof(TessData, uv));
735 float *col = (float *)(data + offsetof(TessData, color));
736 int stride = sizeof(TessData) / sizeof(float);
737
738 CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);
739
740 context_->Unmap(buf[0], 0);
741
742 using Spline::Weight;
743
744 // Weights U
745 if (prevSizeWU < weights.size_u) {
746 prevSizeWU = weights.size_u;
747 if (buf[1]) buf[1]->Release();
748 if (view[1]) view[1]->Release();
749
750 desc.ByteWidth = weights.size_u * sizeof(Weight);
751 desc.StructureByteStride = sizeof(Weight);
752 device_->CreateBuffer(&desc, nullptr, &buf[1]);
753 device_->CreateShaderResourceView(buf[1], nullptr, &view[1]);
754 context_->VSSetShaderResources(1, 1, &view[1]);
755 }
756 context_->Map(buf[1], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
757 memcpy(map.pData, weights.u, weights.size_u * sizeof(Weight));
758 context_->Unmap(buf[1], 0);
759
760 // Weights V
761 if (prevSizeWV < weights.size_v) {
762 prevSizeWV = weights.size_v;
763 if (buf[2]) buf[2]->Release();
764 if (view[2]) view[2]->Release();
765
766 desc.ByteWidth = weights.size_v * sizeof(Weight);
767 desc.StructureByteStride = sizeof(Weight);
768 device_->CreateBuffer(&desc, nullptr, &buf[2]);
769 device_->CreateShaderResourceView(buf[2], nullptr, &view[2]);
770 context_->VSSetShaderResources(2, 1, &view[2]);
771 }
772 context_->Map(buf[2], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
773 memcpy(map.pData, weights.v, weights.size_v * sizeof(Weight));
774 context_->Unmap(buf[2], 0);
775 }
776