1 // Copyright (c) 2012- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #include <algorithm>
19 
20 #include "Common/Log.h"
21 #include "Common/MemoryUtil.h"
22 #include "Common/TimeUtil.h"
23 #include "Core/MemMap.h"
24 #include "Core/System.h"
25 #include "Core/Reporting.h"
26 #include "Core/Config.h"
27 #include "Core/CoreTiming.h"
28 
29 #include "GPU/Math3D.h"
30 #include "GPU/GPUState.h"
31 #include "GPU/ge_constants.h"
32 
33 #include "GPU/Common/TextureDecoder.h"
34 #include "GPU/Common/SplineCommon.h"
35 #include "GPU/Common/TransformCommon.h"
36 #include "GPU/Common/VertexDecoderCommon.h"
37 #include "GPU/Common/SoftwareTransformCommon.h"
38 #include "GPU/Debugger/Debugger.h"
39 #include "GPU/D3D11/FramebufferManagerD3D11.h"
40 #include "GPU/D3D11/TextureCacheD3D11.h"
41 #include "GPU/D3D11/DrawEngineD3D11.h"
42 #include "GPU/D3D11/ShaderManagerD3D11.h"
43 #include "GPU/D3D11/GPU_D3D11.h"
44 
45 const D3D11_PRIMITIVE_TOPOLOGY d3d11prim[8] = {
46 	D3D11_PRIMITIVE_TOPOLOGY_POINTLIST,
47 	D3D11_PRIMITIVE_TOPOLOGY_LINELIST,
48 	D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP,
49 	D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
50 	D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP,
51 	D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  // Fans not supported
52 	D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  // Need expansion - though we could do it with geom shaders in most cases
53 };
54 
55 #define VERTEXCACHE_DECIMATION_INTERVAL 17
56 
57 enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 };
58 enum {
59 	VERTEX_PUSH_SIZE = 1024 * 1024 * 16,
60 	INDEX_PUSH_SIZE = 1024 * 1024 * 4,
61 };
62 
63 static const D3D11_INPUT_ELEMENT_DESC TransformedVertexElements[] = {
64 	{ "POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
65 	{ "TEXCOORD", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0 },
66 	{ "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0 },
67 	{ "COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 32, D3D11_INPUT_PER_VERTEX_DATA, 0 },
68 };
69 
DrawEngineD3D11(Draw::DrawContext * draw,ID3D11Device * device,ID3D11DeviceContext * context)70 DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device, ID3D11DeviceContext *context)
71 	: draw_(draw),
72 		device_(device),
73 		context_(context),
74 		vai_(256),
75 		inputLayoutMap_(32),
76 		blendCache_(32),
77 		blendCache1_(32),
78 		depthStencilCache_(64),
79 		rasterCache_(4) {
80 	device1_ = (ID3D11Device1 *)draw->GetNativeObject(Draw::NativeObject::DEVICE_EX);
81 	context1_ = (ID3D11DeviceContext1 *)draw->GetNativeObject(Draw::NativeObject::CONTEXT_EX);
82 	decOptions_.expandAllWeightsToFloat = true;
83 	decOptions_.expand8BitNormalsToFloat = true;
84 
85 	decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
86 	// Allocate nicely aligned memory. Maybe graphics drivers will
87 	// appreciate it.
88 	// All this is a LOT of memory, need to see if we can cut down somehow.
89 	decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
90 	decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
91 
92 	indexGen.Setup(decIndex);
93 
94 	InitDeviceObjects();
95 
96 	// Vertex pushing buffers. For uniforms we use short DISCARD buffers, but we could use
97 	// this kind of buffer there as well with D3D11.1. We might be able to use the same buffer
98 	// for both vertices and indices, and possibly all three data types.
99 }
100 
~DrawEngineD3D11()101 DrawEngineD3D11::~DrawEngineD3D11() {
102 	DestroyDeviceObjects();
103 	FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
104 	FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
105 }
106 
InitDeviceObjects()107 void DrawEngineD3D11::InitDeviceObjects() {
108 	pushVerts_ = new PushBufferD3D11(device_, VERTEX_PUSH_SIZE, D3D11_BIND_VERTEX_BUFFER);
109 	pushInds_ = new PushBufferD3D11(device_, INDEX_PUSH_SIZE, D3D11_BIND_INDEX_BUFFER);
110 
111 	tessDataTransferD3D11 = new TessellationDataTransferD3D11(context_, device_);
112 	tessDataTransfer = tessDataTransferD3D11;
113 }
114 
ClearTrackedVertexArrays()115 void DrawEngineD3D11::ClearTrackedVertexArrays() {
116 	vai_.Iterate([&](uint32_t hash, VertexArrayInfoD3D11 *vai){
117 		delete vai;
118 	});
119 	vai_.Clear();
120 }
121 
ClearInputLayoutMap()122 void DrawEngineD3D11::ClearInputLayoutMap() {
123 	inputLayoutMap_.Iterate([&](const InputLayoutKey &key, ID3D11InputLayout *il) {
124 		if (il)
125 			il->Release();
126 	});
127 	inputLayoutMap_.Clear();
128 }
129 
Resized()130 void DrawEngineD3D11::Resized() {
131 	DrawEngineCommon::Resized();
132 	ClearInputLayoutMap();
133 }
134 
DestroyDeviceObjects()135 void DrawEngineD3D11::DestroyDeviceObjects() {
136 	ClearTrackedVertexArrays();
137 	ClearInputLayoutMap();
138 	delete tessDataTransferD3D11;
139 	tessDataTransferD3D11 = nullptr;
140 	tessDataTransfer = nullptr;
141 	delete pushVerts_;
142 	delete pushInds_;
143 	depthStencilCache_.Iterate([&](const uint64_t &key, ID3D11DepthStencilState *ds) {
144 		ds->Release();
145 	});
146 	depthStencilCache_.Clear();
147 	blendCache_.Iterate([&](const uint64_t &key, ID3D11BlendState *bs) {
148 		bs->Release();
149 	});
150 	blendCache_.Clear();
151 	blendCache1_.Iterate([&](const uint64_t &key, ID3D11BlendState1 *bs) {
152 		bs->Release();
153 	});
154 	blendCache1_.Clear();
155 	rasterCache_.Iterate([&](const uint32_t &key, ID3D11RasterizerState *rs) {
156 		rs->Release();
157 	});
158 	rasterCache_.Clear();
159 }
160 
161 struct DeclTypeInfo {
162 	DXGI_FORMAT type;
163 	const char * name;
164 };
165 
166 static const DeclTypeInfo VComp[] = {
167 	{ DXGI_FORMAT_UNKNOWN, "NULL" }, // DEC_NONE,
168 	{ DXGI_FORMAT_R32_FLOAT, "D3DDECLTYPE_FLOAT1 " },  // DEC_FLOAT_1,
169 	{ DXGI_FORMAT_R32G32_FLOAT, "D3DDECLTYPE_FLOAT2 " },  // DEC_FLOAT_2,
170 	{ DXGI_FORMAT_R32G32B32_FLOAT, "D3DDECLTYPE_FLOAT3 " },  // DEC_FLOAT_3,
171 	{ DXGI_FORMAT_R32G32B32A32_FLOAT, "D3DDECLTYPE_FLOAT4 " },  // DEC_FLOAT_4,
172 
173 	{ DXGI_FORMAT_R8G8B8A8_SNORM, "UNUSED" }, // DEC_S8_3,
174 
175 	{ DXGI_FORMAT_R16G16B16A16_SNORM, "D3DDECLTYPE_SHORT4N	" },	// DEC_S16_3,
176 	{ DXGI_FORMAT_R8G8B8A8_UNORM, "D3DDECLTYPE_UBYTE4N	" },	// DEC_U8_1,
177 	{ DXGI_FORMAT_R8G8B8A8_UNORM, "D3DDECLTYPE_UBYTE4N	" },	// DEC_U8_2,
178 	{ DXGI_FORMAT_R8G8B8A8_UNORM, "D3DDECLTYPE_UBYTE4N	" },	// DEC_U8_3,
179 	{ DXGI_FORMAT_R8G8B8A8_UNORM, "D3DDECLTYPE_UBYTE4N	" },	// DEC_U8_4,
180 
181 	{ DXGI_FORMAT_UNKNOWN, "UNUSED_DEC_U16_1" },	// 	DEC_U16_1,
182 	{ DXGI_FORMAT_UNKNOWN, "UNUSED_DEC_U16_2" },	// 	DEC_U16_2,
183 	{ DXGI_FORMAT_R16G16B16A16_UNORM	,"D3DDECLTYPE_USHORT4N "}, // DEC_U16_3,
184 	{ DXGI_FORMAT_R16G16B16A16_UNORM	,"D3DDECLTYPE_USHORT4N "}, // DEC_U16_4,
185 };
186 
VertexAttribSetup(D3D11_INPUT_ELEMENT_DESC * VertexElement,u8 fmt,u8 offset,const char * semantic,u8 semantic_index=0)187 static void VertexAttribSetup(D3D11_INPUT_ELEMENT_DESC * VertexElement, u8 fmt, u8 offset, const char *semantic, u8 semantic_index = 0) {
188 	memset(VertexElement, 0, sizeof(D3D11_INPUT_ELEMENT_DESC));
189 	VertexElement->AlignedByteOffset = offset;
190 	VertexElement->Format = VComp[fmt].type;
191 	VertexElement->SemanticName = semantic;
192 	VertexElement->SemanticIndex = semantic_index;
193 }
194 
SetupDecFmtForDraw(D3D11VertexShader * vshader,const DecVtxFormat & decFmt,u32 pspFmt)195 ID3D11InputLayout *DrawEngineD3D11::SetupDecFmtForDraw(D3D11VertexShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt) {
196 	// TODO: Instead of one for each vshader, we can reduce it to one for each type of shader
197 	// that reads TEXCOORD or not, etc. Not sure if worth it.
198 	InputLayoutKey key{ vshader, decFmt.id };
199 	ID3D11InputLayout *inputLayout = inputLayoutMap_.Get(key);
200 	if (inputLayout) {
201 		return inputLayout;
202 	} else {
203 		D3D11_INPUT_ELEMENT_DESC VertexElements[8];
204 		D3D11_INPUT_ELEMENT_DESC *VertexElement = &VertexElements[0];
205 
206 		// Vertices Elements orders
207 		// WEIGHT
208 		if (decFmt.w0fmt != 0) {
209 			VertexAttribSetup(VertexElement, decFmt.w0fmt, decFmt.w0off, "TEXCOORD", 1);
210 			VertexElement++;
211 		}
212 
213 		if (decFmt.w1fmt != 0) {
214 			VertexAttribSetup(VertexElement, decFmt.w1fmt, decFmt.w1off, "TEXCOORD", 2);
215 			VertexElement++;
216 		}
217 
218 		// TC
219 		if (decFmt.uvfmt != 0) {
220 			VertexAttribSetup(VertexElement, decFmt.uvfmt, decFmt.uvoff, "TEXCOORD", 0);
221 			VertexElement++;
222 		}
223 
224 		// COLOR
225 		if (decFmt.c0fmt != 0) {
226 			VertexAttribSetup(VertexElement, decFmt.c0fmt, decFmt.c0off, "COLOR", 0);
227 			VertexElement++;
228 		}
229 		// Never used ?
230 		if (decFmt.c1fmt != 0) {
231 			VertexAttribSetup(VertexElement, decFmt.c1fmt, decFmt.c1off, "COLOR", 1);
232 			VertexElement++;
233 		}
234 
235 		// NORMAL
236 		if (decFmt.nrmfmt != 0) {
237 			VertexAttribSetup(VertexElement, decFmt.nrmfmt, decFmt.nrmoff, "NORMAL", 0);
238 			VertexElement++;
239 		}
240 
241 		// POSITION
242 		// Always
243 		VertexAttribSetup(VertexElement, decFmt.posfmt, decFmt.posoff, "POSITION", 0);
244 		VertexElement++;
245 
246 		// Create declaration
247 		HRESULT hr = device_->CreateInputLayout(VertexElements, VertexElement - VertexElements, vshader->bytecode().data(), vshader->bytecode().size(), &inputLayout);
248 		if (FAILED(hr)) {
249 			ERROR_LOG(G3D, "Failed to create input layout!");
250 			inputLayout = nullptr;
251 		}
252 
253 		// Add it to map
254 		inputLayoutMap_.Insert(key, inputLayout);
255 		return inputLayout;
256 	}
257 }
258 
MarkUnreliable(VertexArrayInfoD3D11 * vai)259 void DrawEngineD3D11::MarkUnreliable(VertexArrayInfoD3D11 *vai) {
260 	vai->status = VertexArrayInfoD3D11::VAI_UNRELIABLE;
261 	if (vai->vbo) {
262 		vai->vbo->Release();
263 		vai->vbo = nullptr;
264 	}
265 	if (vai->ebo) {
266 		vai->ebo->Release();
267 		vai->ebo = nullptr;
268 	}
269 }
270 
BeginFrame()271 void DrawEngineD3D11::BeginFrame() {
272 	pushVerts_->Reset();
273 	pushInds_->Reset();
274 
275 	if (--decimationCounter_ <= 0) {
276 		decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
277 	} else {
278 		return;
279 	}
280 
281 	const int threshold = gpuStats.numFlips - VAI_KILL_AGE;
282 	const int unreliableThreshold = gpuStats.numFlips - VAI_UNRELIABLE_KILL_AGE;
283 	int unreliableLeft = VAI_UNRELIABLE_KILL_MAX;
284 	vai_.Iterate([&](uint32_t hash, VertexArrayInfoD3D11 *vai){
285 		bool kill;
286 		if (vai->status == VertexArrayInfoD3D11::VAI_UNRELIABLE) {
287 			// We limit killing unreliable so we don't rehash too often.
288 			kill = vai->lastFrame < unreliableThreshold && --unreliableLeft >= 0;
289 		} else {
290 			kill = vai->lastFrame < threshold;
291 		}
292 		if (kill) {
293 			delete vai;
294 			vai_.Remove(hash);
295 		}
296 	});
297 	vai_.Maintain();
298 
299 	// Enable if you want to see vertex decoders in the log output. Need a better way.
300 #if 0
301 	char buffer[16384];
302 	for (std::map<u32, VertexDecoder*>::iterator dec = decoderMap_.begin(); dec != decoderMap_.end(); ++dec) {
303 		char *ptr = buffer;
304 		ptr += dec->second->ToString(ptr);
305 		//		*ptr++ = '\n';
306 		NOTICE_LOG(G3D, buffer);
307 	}
308 #endif
309 
310 	lastRenderStepId_ = -1;
311 }
312 
~VertexArrayInfoD3D11()313 VertexArrayInfoD3D11::~VertexArrayInfoD3D11() {
314 	if (vbo)
315 		vbo->Release();
316 	if (ebo)
317 		ebo->Release();
318 }
319 
320 // The inline wrapper in the header checks for numDrawCalls == 0
DoFlush()321 void DrawEngineD3D11::DoFlush() {
322 	gpuStats.numFlushes++;
323 	gpuStats.numTrackedVertexArrays = (int)vai_.size();
324 
325 	// In D3D, we're synchronous and state carries over so all we reset here on a new step is the viewport/scissor.
326 	int curRenderStepId = draw_->GetCurrentStepId();
327 	if (lastRenderStepId_ != curRenderStepId) {
328 		// Dirty everything that has dynamic state that will need re-recording.
329 		gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
330 		lastRenderStepId_ = curRenderStepId;
331 	}
332 
333 	// This is not done on every drawcall, we collect vertex data
334 	// until critical state changes. That's when we draw (flush).
335 
336 	GEPrimitiveType prim = prevPrim_;
337 	ApplyDrawState(prim);
338 
339 	// Always use software for flat shading to fix the provoking index.
340 	bool tess = gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE;
341 	bool useHWTransform = CanUseHardwareTransform(prim) && (tess || gstate.getShadeMode() != GE_SHADE_FLAT);
342 
343 	if (useHWTransform) {
344 		ID3D11Buffer *vb_ = nullptr;
345 		ID3D11Buffer *ib_ = nullptr;
346 
347 		int vertexCount = 0;
348 		int maxIndex = 0;
349 		bool useElements = true;
350 
351 		// Cannot cache vertex data with morph enabled.
352 		bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
353 		// Also avoid caching when software skinning.
354 		if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK))
355 			useCache = false;
356 
357 		if (useCache) {
358 			u32 id = dcid_ ^ gstate.getUVGenMode();  // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263
359 
360 			VertexArrayInfoD3D11 *vai = vai_.Get(id);
361 			if (!vai) {
362 				vai = new VertexArrayInfoD3D11();
363 				vai_.Insert(id, vai);
364 			}
365 
366 			switch (vai->status) {
367 			case VertexArrayInfoD3D11::VAI_NEW:
368 				{
369 					// Haven't seen this one before.
370 					uint64_t dataHash = ComputeHash();
371 					vai->hash = dataHash;
372 					vai->minihash = ComputeMiniHash();
373 					vai->status = VertexArrayInfoD3D11::VAI_HASHING;
374 					vai->drawsUntilNextFullHash = 0;
375 					DecodeVerts(decoded); // writes to indexGen
376 					vai->numVerts = indexGen.VertexCount();
377 					vai->prim = indexGen.Prim();
378 					vai->maxIndex = indexGen.MaxIndex();
379 					vai->flags = gstate_c.vertexFullAlpha ? VAI11_FLAG_VERTEXFULLALPHA : 0;
380 					goto rotateVBO;
381 				}
382 
383 				// Hashing - still gaining confidence about the buffer.
384 				// But if we get this far it's likely to be worth creating a vertex buffer.
385 			case VertexArrayInfoD3D11::VAI_HASHING:
386 				{
387 					vai->numDraws++;
388 					if (vai->lastFrame != gpuStats.numFlips) {
389 						vai->numFrames++;
390 					}
391 					if (vai->drawsUntilNextFullHash == 0) {
392 						// Let's try to skip a full hash if mini would fail.
393 						const u32 newMiniHash = ComputeMiniHash();
394 						uint64_t newHash = vai->hash;
395 						if (newMiniHash == vai->minihash) {
396 							newHash = ComputeHash();
397 						}
398 						if (newMiniHash != vai->minihash || newHash != vai->hash) {
399 							MarkUnreliable(vai);
400 							DecodeVerts(decoded);
401 							goto rotateVBO;
402 						}
403 						if (vai->numVerts > 64) {
404 							// exponential backoff up to 16 draws, then every 24
405 							vai->drawsUntilNextFullHash = std::min(24, vai->numFrames);
406 						} else {
407 							// Lower numbers seem much more likely to change.
408 							vai->drawsUntilNextFullHash = 0;
409 						}
410 						// TODO: tweak
411 						//if (vai->numFrames > 1000) {
412 						//	vai->status = VertexArrayInfo::VAI_RELIABLE;
413 						//}
414 					} else {
415 						vai->drawsUntilNextFullHash--;
416 						u32 newMiniHash = ComputeMiniHash();
417 						if (newMiniHash != vai->minihash) {
418 							MarkUnreliable(vai);
419 							DecodeVerts(decoded);
420 							goto rotateVBO;
421 						}
422 					}
423 
424 					if (vai->vbo == 0) {
425 						DecodeVerts(decoded);
426 						vai->numVerts = indexGen.VertexCount();
427 						vai->prim = indexGen.Prim();
428 						vai->maxIndex = indexGen.MaxIndex();
429 						vai->flags = gstate_c.vertexFullAlpha ? VAI11_FLAG_VERTEXFULLALPHA : 0;
430 						useElements = !indexGen.SeenOnlyPurePrims() || prim == GE_PRIM_TRIANGLE_FAN;
431 						if (!useElements && indexGen.PureCount()) {
432 							vai->numVerts = indexGen.PureCount();
433 						}
434 
435 						_dbg_assert_msg_(gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching.");
436 
437 						// TODO: Combine these two into one buffer?
438 						u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex();
439 						D3D11_BUFFER_DESC desc{ size, D3D11_USAGE_IMMUTABLE, D3D11_BIND_VERTEX_BUFFER, 0 };
440 						D3D11_SUBRESOURCE_DATA data{ decoded };
441 						ASSERT_SUCCESS(device_->CreateBuffer(&desc, &data, &vai->vbo));
442 						if (useElements) {
443 							u32 size = sizeof(short) * indexGen.VertexCount();
444 							D3D11_BUFFER_DESC desc{ size, D3D11_USAGE_IMMUTABLE, D3D11_BIND_INDEX_BUFFER, 0 };
445 							D3D11_SUBRESOURCE_DATA data{ decIndex };
446 							ASSERT_SUCCESS(device_->CreateBuffer(&desc, &data, &vai->ebo));
447 						} else {
448 							vai->ebo = 0;
449 						}
450 					} else {
451 						gpuStats.numCachedDrawCalls++;
452 						useElements = vai->ebo ? true : false;
453 						gpuStats.numCachedVertsDrawn += vai->numVerts;
454 						gstate_c.vertexFullAlpha = vai->flags & VAI11_FLAG_VERTEXFULLALPHA;
455 					}
456 					vb_ = vai->vbo;
457 					ib_ = vai->ebo;
458 					vertexCount = vai->numVerts;
459 					maxIndex = vai->maxIndex;
460 					prim = static_cast<GEPrimitiveType>(vai->prim);
461 					break;
462 				}
463 
464 				// Reliable - we don't even bother hashing anymore. Right now we don't go here until after a very long time.
465 			case VertexArrayInfoD3D11::VAI_RELIABLE:
466 				{
467 					vai->numDraws++;
468 					if (vai->lastFrame != gpuStats.numFlips) {
469 						vai->numFrames++;
470 					}
471 					gpuStats.numCachedDrawCalls++;
472 					gpuStats.numCachedVertsDrawn += vai->numVerts;
473 					vb_ = vai->vbo;
474 					ib_ = vai->ebo;
475 
476 					vertexCount = vai->numVerts;
477 
478 					maxIndex = vai->maxIndex;
479 					prim = static_cast<GEPrimitiveType>(vai->prim);
480 
481 					gstate_c.vertexFullAlpha = vai->flags & VAI11_FLAG_VERTEXFULLALPHA;
482 					break;
483 				}
484 
485 			case VertexArrayInfoD3D11::VAI_UNRELIABLE:
486 				{
487 					vai->numDraws++;
488 					if (vai->lastFrame != gpuStats.numFlips) {
489 						vai->numFrames++;
490 					}
491 					DecodeVerts(decoded);
492 					goto rotateVBO;
493 				}
494 			}
495 
496 			vai->lastFrame = gpuStats.numFlips;
497 		} else {
498 			DecodeVerts(decoded);
499 rotateVBO:
500 			gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
501 			useElements = !indexGen.SeenOnlyPurePrims() || prim == GE_PRIM_TRIANGLE_FAN;
502 			vertexCount = indexGen.VertexCount();
503 			maxIndex = indexGen.MaxIndex();
504 			if (!useElements && indexGen.PureCount()) {
505 				vertexCount = indexGen.PureCount();
506 			}
507 			prim = indexGen.Prim();
508 		}
509 
510 		VERBOSE_LOG(G3D, "Flush prim %i! %i verts in one go", prim, vertexCount);
511 		bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
512 		if (gstate.isModeThrough()) {
513 			gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
514 		} else {
515 			gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
516 		}
517 
518 		ApplyDrawStateLate(true, dynState_.stencilRef);
519 
520 		D3D11VertexShader *vshader;
521 		D3D11FragmentShader *fshader;
522 		shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, useHWTransform, useHWTessellation_, decOptions_.expandAllWeightsToFloat);
523 		ID3D11InputLayout *inputLayout = SetupDecFmtForDraw(vshader, dec_->GetDecVtxFmt(), dec_->VertexType());
524 		context_->PSSetShader(fshader->GetShader(), nullptr, 0);
525 		context_->VSSetShader(vshader->GetShader(), nullptr, 0);
526 		shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
527 		shaderManager_->BindUniforms();
528 
529 		context_->IASetInputLayout(inputLayout);
530 		UINT stride = dec_->GetDecVtxFmt().stride;
531 		context_->IASetPrimitiveTopology(d3d11prim[prim]);
532 		if (!vb_) {
533 			// Push!
534 			UINT vOffset;
535 			int vSize = (maxIndex + 1) * dec_->GetDecVtxFmt().stride;
536 			uint8_t *vptr = pushVerts_->BeginPush(context_, &vOffset, vSize);
537 			memcpy(vptr, decoded, vSize);
538 			pushVerts_->EndPush(context_);
539 			ID3D11Buffer *buf = pushVerts_->Buf();
540 			context_->IASetVertexBuffers(0, 1, &buf, &stride, &vOffset);
541 			if (useElements) {
542 				UINT iOffset;
543 				int iSize = 2 * indexGen.VertexCount();
544 				uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize);
545 				memcpy(iptr, decIndex, iSize);
546 				pushInds_->EndPush(context_);
547 				context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
548 				context_->DrawIndexed(vertexCount, 0, 0);
549 			} else {
550 				context_->Draw(vertexCount, 0);
551 			}
552 		} else {
553 			UINT offset = 0;
554 			context_->IASetVertexBuffers(0, 1, &vb_, &stride, &offset);
555 			if (useElements) {
556 				context_->IASetIndexBuffer(ib_, DXGI_FORMAT_R16_UINT, 0);
557 				context_->DrawIndexed(vertexCount, 0, 0);
558 			} else {
559 				context_->Draw(vertexCount, 0);
560 			}
561 		}
562 	} else {
563 		DecodeVerts(decoded);
564 		bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
565 		if (gstate.isModeThrough()) {
566 			gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
567 		} else {
568 			gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
569 		}
570 
571 		gpuStats.numUncachedVertsDrawn += indexGen.VertexCount();
572 		prim = indexGen.Prim();
573 		// Undo the strip optimization, not supported by the SW code yet.
574 		if (prim == GE_PRIM_TRIANGLE_STRIP)
575 			prim = GE_PRIM_TRIANGLES;
576 		VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
577 
578 		u16 *inds = decIndex;
579 		SoftwareTransformResult result{};
580 		SoftwareTransformParams params{};
581 		params.decoded = decoded;
582 		params.transformed = transformed;
583 		params.transformedExpanded = transformedExpanded;
584 		params.fbman = framebufferManager_;
585 		params.texCache = textureCache_;
586 		params.allowClear = true;
587 		params.allowSeparateAlphaClear = false;  // D3D11 doesn't support separate alpha clears
588 		params.provokeFlatFirst = true;
589 
590 		int maxIndex = indexGen.MaxIndex();
591 		SoftwareTransform swTransform(params);
592 		swTransform.Decode(prim, dec_->VertexType(), dec_->GetDecVtxFmt(), maxIndex, &result);
593 		if (result.action == SW_NOT_READY) {
594 			swTransform.DetectOffsetTexture(maxIndex);
595 			swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result);
596 		}
597 
598 		if (result.setSafeSize)
599 			framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
600 
601 		if (result.action == SW_DRAW_PRIMITIVES) {
602 			ApplyDrawStateLate(result.setStencil, result.stencilValue);
603 
604 			D3D11VertexShader *vshader;
605 			D3D11FragmentShader *fshader;
606 			shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, false, false, decOptions_.expandAllWeightsToFloat);
607 			context_->PSSetShader(fshader->GetShader(), nullptr, 0);
608 			context_->VSSetShader(vshader->GetShader(), nullptr, 0);
609 			shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
610 			shaderManager_->BindUniforms();
611 
612 			// We really do need a vertex layout for each vertex shader (or at least check its ID bits for what inputs it uses)!
613 			// Some vertex shaders ignore one of the inputs, and then the layout created from it will lack it, which will be a problem for others.
614 			InputLayoutKey key{ vshader, 0xFFFFFFFF };  // Let's use 0xFFFFFFFF to signify TransformedVertex
615 			ID3D11InputLayout *layout = inputLayoutMap_.Get(key);
616 			if (!layout) {
617 				ASSERT_SUCCESS(device_->CreateInputLayout(TransformedVertexElements, ARRAY_SIZE(TransformedVertexElements), vshader->bytecode().data(), vshader->bytecode().size(), &layout));
618 				inputLayoutMap_.Insert(key, layout);
619 			}
620 			context_->IASetInputLayout(layout);
621 			context_->IASetPrimitiveTopology(d3d11prim[prim]);
622 
623 			UINT stride = sizeof(TransformedVertex);
624 			UINT vOffset = 0;
625 			int vSize = maxIndex * stride;
626 			uint8_t *vptr = pushVerts_->BeginPush(context_, &vOffset, vSize);
627 			memcpy(vptr, result.drawBuffer, vSize);
628 			pushVerts_->EndPush(context_);
629 			ID3D11Buffer *buf = pushVerts_->Buf();
630 			context_->IASetVertexBuffers(0, 1, &buf, &stride, &vOffset);
631 			if (result.drawIndexed) {
632 				UINT iOffset;
633 				int iSize = sizeof(uint16_t) * result.drawNumTrans;
634 				uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize);
635 				memcpy(iptr, inds, iSize);
636 				pushInds_->EndPush(context_);
637 				context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
638 				context_->DrawIndexed(result.drawNumTrans, 0, 0);
639 			} else {
640 				context_->Draw(result.drawNumTrans, 0);
641 			}
642 		} else if (result.action == SW_CLEAR) {
643 			u32 clearColor = result.color;
644 			float clearDepth = result.depth;
645 
646 			uint32_t clearFlag = 0;
647 
648 			if (gstate.isClearModeColorMask()) clearFlag |= Draw::FBChannel::FB_COLOR_BIT;
649 			if (gstate.isClearModeAlphaMask()) clearFlag |= Draw::FBChannel::FB_STENCIL_BIT;
650 			if (gstate.isClearModeDepthMask()) clearFlag |= Draw::FBChannel::FB_DEPTH_BIT;
651 
652 			if (clearFlag & Draw::FBChannel::FB_DEPTH_BIT) {
653 				framebufferManager_->SetDepthUpdated();
654 			}
655 			if (clearFlag & Draw::FBChannel::FB_COLOR_BIT) {
656 				framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
657 			}
658 
659 			uint8_t clearStencil = clearColor >> 24;
660 			draw_->Clear(clearFlag, clearColor, clearDepth, clearStencil);
661 
662 			if ((gstate_c.featureFlags & GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) {
663 				int scissorX1 = gstate.getScissorX1();
664 				int scissorY1 = gstate.getScissorY1();
665 				int scissorX2 = gstate.getScissorX2() + 1;
666 				int scissorY2 = gstate.getScissorY2() + 1;
667 				framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor);
668 			}
669 		}
670 	}
671 
672 	gpuStats.numDrawCalls += numDrawCalls;
673 	gpuStats.numVertsSubmitted += vertexCountInDrawCalls_;
674 
675 	indexGen.Reset();
676 	decodedVerts_ = 0;
677 	numDrawCalls = 0;
678 	vertexCountInDrawCalls_ = 0;
679 	decodeCounter_ = 0;
680 	dcid_ = 0;
681 	prevPrim_ = GE_PRIM_INVALID;
682 	gstate_c.vertexFullAlpha = true;
683 	framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
684 
685 	// Now seems as good a time as any to reset the min/max coords, which we may examine later.
686 	gstate_c.vertBounds.minU = 512;
687 	gstate_c.vertBounds.minV = 512;
688 	gstate_c.vertBounds.maxU = 0;
689 	gstate_c.vertBounds.maxV = 0;
690 
691 	GPUDebug::NotifyDraw();
692 }
693 
TessellationDataTransferD3D11(ID3D11DeviceContext * context,ID3D11Device * device)694 TessellationDataTransferD3D11::TessellationDataTransferD3D11(ID3D11DeviceContext *context, ID3D11Device *device)
695 	: context_(context), device_(device) {
696 	desc.Usage = D3D11_USAGE_DYNAMIC;
697 	desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
698 	desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
699 	desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
700 }
701 
~TessellationDataTransferD3D11()702 TessellationDataTransferD3D11::~TessellationDataTransferD3D11() {
703 	for (int i = 0; i < 3; ++i) {
704 		if (buf[i]) buf[i]->Release();
705 		if (view[i]) view[i]->Release();
706 	}
707 }
708 
SendDataToShader(const SimpleVertex * const * points,int size_u,int size_v,u32 vertType,const Spline::Weight2D & weights)709 void TessellationDataTransferD3D11::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
710 	struct TessData {
711 		float pos[3]; float pad1;
712 		float uv[2]; float pad2[2];
713 		float color[4];
714 	};
715 
716 	int size = size_u * size_v;
717 
718 	if (prevSize < size) {
719 		prevSize = size;
720 		if (buf[0]) buf[0]->Release();
721 		if (view[0]) view[0]->Release();
722 
723 		desc.ByteWidth = size * sizeof(TessData);
724 		desc.StructureByteStride = sizeof(TessData);
725 		device_->CreateBuffer(&desc, nullptr, &buf[0]);
726 		device_->CreateShaderResourceView(buf[0], nullptr, &view[0]);
727 		context_->VSSetShaderResources(0, 1, &view[0]);
728 	}
729 	D3D11_MAPPED_SUBRESOURCE map;
730 	context_->Map(buf[0], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
731 	uint8_t *data = (uint8_t *)map.pData;
732 
733 	float *pos = (float *)(data);
734 	float *tex = (float *)(data + offsetof(TessData, uv));
735 	float *col = (float *)(data + offsetof(TessData, color));
736 	int stride = sizeof(TessData) / sizeof(float);
737 
738 	CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);
739 
740 	context_->Unmap(buf[0], 0);
741 
742 	using Spline::Weight;
743 
744 	// Weights U
745 	if (prevSizeWU < weights.size_u) {
746 		prevSizeWU = weights.size_u;
747 		if (buf[1]) buf[1]->Release();
748 		if (view[1]) view[1]->Release();
749 
750 		desc.ByteWidth = weights.size_u * sizeof(Weight);
751 		desc.StructureByteStride = sizeof(Weight);
752 		device_->CreateBuffer(&desc, nullptr, &buf[1]);
753 		device_->CreateShaderResourceView(buf[1], nullptr, &view[1]);
754 		context_->VSSetShaderResources(1, 1, &view[1]);
755 	}
756 	context_->Map(buf[1], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
757 	memcpy(map.pData, weights.u, weights.size_u * sizeof(Weight));
758 	context_->Unmap(buf[1], 0);
759 
760 	// Weights V
761 	if (prevSizeWV < weights.size_v) {
762 		prevSizeWV = weights.size_v;
763 		if (buf[2]) buf[2]->Release();
764 		if (view[2]) view[2]->Release();
765 
766 		desc.ByteWidth = weights.size_v * sizeof(Weight);
767 		desc.StructureByteStride = sizeof(Weight);
768 		device_->CreateBuffer(&desc, nullptr, &buf[2]);
769 		device_->CreateShaderResourceView(buf[2], nullptr, &view[2]);
770 		context_->VSSetShaderResources(2, 1, &view[2]);
771 	}
772 	context_->Map(buf[2], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
773 	memcpy(map.pData, weights.v, weights.size_v * sizeof(Weight));
774 	context_->Unmap(buf[2], 0);
775 }
776