1 #include <vector>
2 #include <cstdio>
3 #include <cstdint>
4 
5 #include "ppsspp_config.h"
6 
7 #ifdef _DEBUG
8 #define D3D_DEBUG_INFO
9 #endif
10 
11 #include <d3d9.h>
12 #ifdef USE_CRT_DBG
13 #undef new
14 #endif
15 
16 #include <D3Dcompiler.h>
17 #include "Common/GPU/D3D9/D3DCompilerLoader.h"
18 
19 #ifndef D3DXERR_INVALIDDATA
20 #define D3DXERR_INVALIDDATA 0x88760b59
21 #endif
22 
23 #include "Common/Math/lin/matrix4x4.h"
24 #include "Common/GPU/thin3d.h"
25 #include "Common/GPU/D3D9/D3D9StateCache.h"
26 
27 #include "Common/Log.h"
28 
29 namespace Draw {
30 
31 static constexpr int MAX_BOUND_TEXTURES = 8;
32 
33 // Could be declared as u8
34 static const D3DCMPFUNC compareToD3D9[] = {
35 	D3DCMP_NEVER,
36 	D3DCMP_LESS,
37 	D3DCMP_EQUAL,
38 	D3DCMP_LESSEQUAL,
39 	D3DCMP_GREATER,
40 	D3DCMP_NOTEQUAL,
41 	D3DCMP_GREATEREQUAL,
42 	D3DCMP_ALWAYS
43 };
44 
45 // Could be declared as u8
46 static const D3DBLENDOP blendEqToD3D9[] = {
47 	D3DBLENDOP_ADD,
48 	D3DBLENDOP_SUBTRACT,
49 	D3DBLENDOP_REVSUBTRACT,
50 	D3DBLENDOP_MIN,
51 	D3DBLENDOP_MAX,
52 };
53 
54 // Could be declared as u8
55 static const D3DBLEND blendFactorToD3D9[] = {
56 	D3DBLEND_ZERO,
57 	D3DBLEND_ONE,
58 	D3DBLEND_SRCCOLOR,
59 	D3DBLEND_INVSRCCOLOR,
60 	D3DBLEND_DESTCOLOR,
61 	D3DBLEND_INVDESTCOLOR,
62 	D3DBLEND_SRCALPHA,
63 	D3DBLEND_INVSRCALPHA,
64 	D3DBLEND_DESTALPHA,
65 	D3DBLEND_INVDESTALPHA,
66 	D3DBLEND_BLENDFACTOR,
67 	D3DBLEND_INVBLENDFACTOR,
68 	D3DBLEND_BLENDFACTOR,
69 	D3DBLEND_INVBLENDFACTOR,
70 	D3DBLEND_ZERO,
71 	D3DBLEND_ZERO,
72 	D3DBLEND_ZERO,
73 	D3DBLEND_ZERO,
74 };
75 
76 static const D3DTEXTUREADDRESS texWrapToD3D9[] = {
77 	D3DTADDRESS_WRAP,
78 	D3DTADDRESS_MIRROR,
79 	D3DTADDRESS_CLAMP,
80 	D3DTADDRESS_BORDER,
81 };
82 
83 static const D3DTEXTUREFILTERTYPE texFilterToD3D9[] = {
84 	D3DTEXF_POINT,
85 	D3DTEXF_LINEAR,
86 };
87 
88 static const D3DPRIMITIVETYPE primToD3D9[] = {
89 	D3DPT_POINTLIST,
90 	D3DPT_LINELIST,
91 	D3DPT_LINESTRIP,
92 	D3DPT_TRIANGLELIST,
93 	D3DPT_TRIANGLESTRIP,
94 	D3DPT_TRIANGLEFAN,
95 	// These aren't available.
96 	D3DPT_POINTLIST,  // tess
97 	D3DPT_POINTLIST,  // geom ...
98 	D3DPT_POINTLIST,
99 	D3DPT_POINTLIST,
100 	D3DPT_POINTLIST,
101 };
102 
103 static const D3DSTENCILOP stencilOpToD3D9[] = {
104 	D3DSTENCILOP_KEEP,
105 	D3DSTENCILOP_ZERO,
106 	D3DSTENCILOP_REPLACE,
107 	D3DSTENCILOP_INCRSAT,
108 	D3DSTENCILOP_DECRSAT,
109 	D3DSTENCILOP_INVERT,
110 	D3DSTENCILOP_INCR,
111 	D3DSTENCILOP_DECR,
112 };
113 
114 static const int primCountDivisor[] = {
115 	1,
116 	2,
117 	3,
118 	3,
119 	3,
120 	1,
121 	1,
122 	1,
123 	1,
124 	1,
125 };
126 
FormatToD3DFMT(DataFormat fmt)127 D3DFORMAT FormatToD3DFMT(DataFormat fmt) {
128 	switch (fmt) {
129 	case DataFormat::R8G8B8A8_UNORM: return D3DFMT_A8R8G8B8;
130 	case DataFormat::B8G8R8A8_UNORM: return D3DFMT_A8R8G8B8;
131 	case DataFormat::R4G4B4A4_UNORM_PACK16: return D3DFMT_A4R4G4B4;  // emulated
132 	case DataFormat::B4G4R4A4_UNORM_PACK16: return D3DFMT_A4R4G4B4;  // native
133 	case DataFormat::A4R4G4B4_UNORM_PACK16: return D3DFMT_A4R4G4B4;  // emulated
134 	case DataFormat::R5G6B5_UNORM_PACK16: return D3DFMT_R5G6B5;
135 	case DataFormat::A1R5G5B5_UNORM_PACK16: return D3DFMT_A1R5G5B5;
136 	case DataFormat::D24_S8: return D3DFMT_D24S8;
137 	case DataFormat::D16: return D3DFMT_D16;
138 	default: return D3DFMT_UNKNOWN;
139 	}
140 }
141 
FormatToD3DDeclType(DataFormat type)142 static int FormatToD3DDeclType(DataFormat type) {
143 	switch (type) {
144 	case DataFormat::R32_FLOAT: return D3DDECLTYPE_FLOAT1;
145 	case DataFormat::R32G32_FLOAT: return D3DDECLTYPE_FLOAT2;
146 	case DataFormat::R32G32B32_FLOAT: return D3DDECLTYPE_FLOAT3;
147 	case DataFormat::R32G32B32A32_FLOAT: return D3DDECLTYPE_FLOAT4;
148 	case DataFormat::R8G8B8A8_UNORM: return D3DDECLTYPE_UBYTE4N;  // D3DCOLOR has a different byte ordering.
149 	default: return D3DDECLTYPE_UNUSED;
150 	}
151 }
152 
153 class D3D9Buffer;
154 
155 class D3D9DepthStencilState : public DepthStencilState {
156 public:
157 	BOOL depthTestEnabled;
158 	BOOL depthWriteEnabled;
159 	D3DCMPFUNC depthCompare;
160 	BOOL stencilEnabled;
161 	D3DSTENCILOP stencilFail;
162 	D3DSTENCILOP stencilZFail;
163 	D3DSTENCILOP stencilPass;
164 	D3DCMPFUNC stencilCompareOp;
165 	uint8_t stencilCompareMask;
166 	uint8_t stencilWriteMask;
Apply(LPDIRECT3DDEVICE9 device)167 	void Apply(LPDIRECT3DDEVICE9 device) {
168 		device->SetRenderState(D3DRS_ZENABLE, depthTestEnabled);
169 		if (depthTestEnabled) {
170 			device->SetRenderState(D3DRS_ZWRITEENABLE, depthWriteEnabled);
171 			device->SetRenderState(D3DRS_ZFUNC, depthCompare);
172 		}
173 		device->SetRenderState(D3DRS_STENCILENABLE, stencilEnabled);
174 		if (stencilEnabled) {
175 			device->SetRenderState(D3DRS_STENCILFAIL, stencilFail);
176 			device->SetRenderState(D3DRS_STENCILZFAIL, stencilZFail);
177 			device->SetRenderState(D3DRS_STENCILPASS, stencilPass);
178 			device->SetRenderState(D3DRS_STENCILFUNC, stencilCompareOp);
179 			device->SetRenderState(D3DRS_STENCILMASK, stencilCompareMask);
180 			device->SetRenderState(D3DRS_STENCILWRITEMASK, stencilWriteMask);
181 		}
182 	}
183 };
184 
185 class D3D9RasterState : public RasterState {
186 public:
187 	DWORD cullMode;
188 
Apply(LPDIRECT3DDEVICE9 device)189 	void Apply(LPDIRECT3DDEVICE9 device) {
190 		device->SetRenderState(D3DRS_CULLMODE, cullMode);
191 		device->SetRenderState(D3DRS_SCISSORTESTENABLE, TRUE);
192 	}
193 };
194 
195 class D3D9BlendState : public BlendState {
196 public:
197 	bool enabled;
198 	D3DBLENDOP eqCol, eqAlpha;
199 	D3DBLEND srcCol, srcAlpha, dstCol, dstAlpha;
200 	uint32_t fixedColor;
201 	uint32_t colorMask;
202 
Apply(LPDIRECT3DDEVICE9 device)203 	void Apply(LPDIRECT3DDEVICE9 device) {
204 		device->SetRenderState(D3DRS_ALPHABLENDENABLE, (DWORD)enabled);
205 		device->SetRenderState(D3DRS_BLENDOP, eqCol);
206 		device->SetRenderState(D3DRS_BLENDOPALPHA, eqAlpha);
207 		device->SetRenderState(D3DRS_SRCBLEND, srcCol);
208 		device->SetRenderState(D3DRS_DESTBLEND, dstCol);
209 		device->SetRenderState(D3DRS_SRCBLENDALPHA, srcAlpha);
210 		device->SetRenderState(D3DRS_DESTBLENDALPHA, dstAlpha);
211 		device->SetRenderState(D3DRS_COLORWRITEENABLE, colorMask);
212 		// device->SetRenderState(, fixedColor);
213 	}
214 };
215 
216 class D3D9SamplerState : public SamplerState {
217 public:
218 	D3DTEXTUREADDRESS wrapS, wrapT;
219 	D3DTEXTUREFILTERTYPE magFilt, minFilt, mipFilt;
220 
Apply(LPDIRECT3DDEVICE9 device,int index)221 	void Apply(LPDIRECT3DDEVICE9 device, int index) {
222 		device->SetSamplerState(index, D3DSAMP_ADDRESSU, wrapS);
223 		device->SetSamplerState(index, D3DSAMP_ADDRESSV, wrapT);
224 		device->SetSamplerState(index, D3DSAMP_MAGFILTER, magFilt);
225 		device->SetSamplerState(index, D3DSAMP_MINFILTER, minFilt);
226 		device->SetSamplerState(index, D3DSAMP_MIPFILTER, mipFilt);
227 	}
228 };
229 
230 class D3D9InputLayout : public InputLayout {
231 public:
232 	D3D9InputLayout(LPDIRECT3DDEVICE9 device, const InputLayoutDesc &desc);
~D3D9InputLayout()233 	~D3D9InputLayout() {
234 		if (decl_) {
235 			decl_->Release();
236 		}
237 	}
GetStride(int binding) const238 	int GetStride(int binding) const { return stride_[binding]; }
Apply(LPDIRECT3DDEVICE9 device)239 	void Apply(LPDIRECT3DDEVICE9 device) {
240 		device->SetVertexDeclaration(decl_);
241 	}
242 
243 private:
244 	LPDIRECT3DVERTEXDECLARATION9 decl_;
245 	int stride_[4];
246 };
247 
248 class D3D9ShaderModule : public ShaderModule {
249 public:
D3D9ShaderModule(ShaderStage stage,const std::string & tag)250 	D3D9ShaderModule(ShaderStage stage, const std::string &tag) : stage_(stage), tag_(tag) {}
~D3D9ShaderModule()251 	~D3D9ShaderModule() {
252 		if (vshader_)
253 			vshader_->Release();
254 		if (pshader_)
255 			pshader_->Release();
256 	}
257 	bool Compile(LPDIRECT3DDEVICE9 device, const uint8_t *data, size_t size);
Apply(LPDIRECT3DDEVICE9 device)258 	void Apply(LPDIRECT3DDEVICE9 device) {
259 		if (stage_ == ShaderStage::Fragment) {
260 			device->SetPixelShader(pshader_);
261 		} else {
262 			device->SetVertexShader(vshader_);
263 		}
264 	}
GetStage() const265 	ShaderStage GetStage() const override { return stage_; }
266 
267 private:
268 	ShaderStage stage_;
269 	LPDIRECT3DVERTEXSHADER9 vshader_ = nullptr;
270 	LPDIRECT3DPIXELSHADER9 pshader_ = nullptr;
271 	std::string tag_;
272 };
273 
274 class D3D9Pipeline : public Pipeline {
275 public:
D3D9Pipeline()276 	D3D9Pipeline() {}
~D3D9Pipeline()277 	~D3D9Pipeline() {
278 	}
RequiresBuffer()279 	bool RequiresBuffer() override {
280 		return false;
281 	}
282 
283 	D3D9ShaderModule *vshader;
284 	D3D9ShaderModule *pshader;
285 
286 	D3DPRIMITIVETYPE prim;
287 	int primDivisor;
288 	AutoRef<D3D9InputLayout> inputLayout;
289 	AutoRef<D3D9DepthStencilState> depthStencil;
290 	AutoRef<D3D9BlendState> blend;
291 	AutoRef<D3D9RasterState> raster;
292 	UniformBufferDesc dynamicUniforms;
293 
294 	void Apply(LPDIRECT3DDEVICE9 device);
295 };
296 
297 class D3D9Texture : public Texture {
298 public:
299 	D3D9Texture(LPDIRECT3DDEVICE9 device, LPDIRECT3DDEVICE9EX deviceEx, const TextureDesc &desc);
300 	~D3D9Texture();
301 	void SetToSampler(LPDIRECT3DDEVICE9 device, int sampler);
302 
303 private:
304 	void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback callback);
305 	bool Create(const TextureDesc &desc);
306 	LPDIRECT3DDEVICE9 device_;
307 	LPDIRECT3DDEVICE9EX deviceEx_;
308 	TextureType type_;
309 	DataFormat format_;
310 	D3DFORMAT d3dfmt_;
311 	LPDIRECT3DTEXTURE9 tex_;
312 	LPDIRECT3DVOLUMETEXTURE9 volTex_;
313 	LPDIRECT3DCUBETEXTURE9 cubeTex_;
314 };
315 
D3D9Texture(LPDIRECT3DDEVICE9 device,LPDIRECT3DDEVICE9EX deviceEx,const TextureDesc & desc)316 D3D9Texture::D3D9Texture(LPDIRECT3DDEVICE9 device, LPDIRECT3DDEVICE9EX deviceEx, const TextureDesc &desc)
317 	: device_(device), deviceEx_(deviceEx), tex_(nullptr), volTex_(nullptr), cubeTex_(nullptr) {
318 	Create(desc);
319 }
320 
~D3D9Texture()321 D3D9Texture::~D3D9Texture() {
322 	if (tex_) {
323 		tex_->Release();
324 	}
325 	if (volTex_) {
326 		volTex_->Release();
327 	}
328 	if (cubeTex_) {
329 		cubeTex_->Release();
330 	}
331 }
332 
Create(const TextureDesc & desc)333 bool D3D9Texture::Create(const TextureDesc &desc) {
334 	width_ = desc.width;
335 	height_ = desc.height;
336 	depth_ = desc.depth;
337 	type_ = desc.type;
338 	format_ = desc.format;
339 	tex_ = NULL;
340 	d3dfmt_ = FormatToD3DFMT(desc.format);
341 
342 	if (d3dfmt_ == D3DFMT_UNKNOWN) {
343 		return false;
344 	}
345 	HRESULT hr = E_FAIL;
346 
347 	D3DPOOL pool = D3DPOOL_MANAGED;
348 	int usage = 0;
349 	if (deviceEx_ != nullptr) {
350 		pool = D3DPOOL_DEFAULT;
351 		usage = D3DUSAGE_DYNAMIC;
352 	}
353 	if (desc.generateMips)
354 		usage |= D3DUSAGE_AUTOGENMIPMAP;
355 	switch (type_) {
356 	case TextureType::LINEAR1D:
357 	case TextureType::LINEAR2D:
358 		hr = device_->CreateTexture(desc.width, desc.height, desc.generateMips ? 0 : desc.mipLevels, usage, d3dfmt_, pool, &tex_, NULL);
359 		break;
360 	case TextureType::LINEAR3D:
361 		hr = device_->CreateVolumeTexture(desc.width, desc.height, desc.depth, desc.mipLevels, usage, d3dfmt_, pool, &volTex_, NULL);
362 		break;
363 	case TextureType::CUBE:
364 		hr = device_->CreateCubeTexture(desc.width, desc.mipLevels, usage, d3dfmt_, pool, &cubeTex_, NULL);
365 		break;
366 	}
367 	if (FAILED(hr)) {
368 		ERROR_LOG(G3D,  "Texture creation failed");
369 		return false;
370 	}
371 
372 	if (desc.initData.size()) {
373 		// In D3D9, after setting D3DUSAGE_AUTOGENMIPS, we can only access the top layer. The rest will be
374 		// automatically generated.
375 		int maxLevel = desc.generateMips ? 1 : (int)desc.initData.size();
376 		int w = desc.width;
377 		int h = desc.height;
378 		int d = desc.depth;
379 		for (int i = 0; i < maxLevel; i++) {
380 			SetImageData(0, 0, 0, w, h, d, i, 0, desc.initData[i], desc.initDataCallback);
381 			w = (w + 1) / 2;
382 			h = (h + 1) / 2;
383 			d = (d + 1) / 2;
384 		}
385 	}
386 	return true;
387 }
388 
389 // Just switches R and G.
Shuffle8888(uint32_t x)390 inline uint32_t Shuffle8888(uint32_t x) {
391 	return (x & 0xFF00FF00) | ((x >> 16) & 0xFF) | ((x << 16) & 0xFF0000);
392 }
393 
SetImageData(int x,int y,int z,int width,int height,int depth,int level,int stride,const uint8_t * data,TextureCallback callback)394 void D3D9Texture::SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data, TextureCallback callback) {
395 	if (!tex_)
396 		return;
397 
398 	if (level == 0) {
399 		width_ = width;
400 		height_ = height;
401 		depth_ = depth;
402 	}
403 
404 	if (!stride) {
405 		stride = width * (int)DataFormatSizeInBytes(format_);
406 	}
407 
408 	switch (type_) {
409 	case TextureType::LINEAR2D:
410 	{
411 		D3DLOCKED_RECT rect;
412 		if (x == 0 && y == 0) {
413 			tex_->LockRect(level, &rect, NULL, D3DLOCK_DISCARD);
414 
415 			if (callback) {
416 				if (callback((uint8_t *)rect.pBits, data, width, height, depth, rect.Pitch, height * rect.Pitch)) {
417 					// Now this is the source.  All conversions below support in-place.
418 					data = (const uint8_t *)rect.pBits;
419 					stride = rect.Pitch;
420 				}
421 			}
422 
423 			for (int i = 0; i < height; i++) {
424 				uint8_t *dest = (uint8_t *)rect.pBits + rect.Pitch * i;
425 				const uint8_t *source = data + stride * i;
426 				int j;
427 				switch (format_) {
428 				case DataFormat::B4G4R4A4_UNORM_PACK16:  // We emulate support for this format.
429 					for (j = 0; j < width; j++) {
430 						uint16_t color = ((const uint16_t *)source)[j];
431 						((uint16_t *)dest)[j] = (color << 12) | (color >> 4);
432 					}
433 					break;
434 				case DataFormat::A4R4G4B4_UNORM_PACK16:
435 				case DataFormat::A1R5G5B5_UNORM_PACK16:
436 					// Native
437 					if (data != rect.pBits)
438 						memcpy(dest, source, width * sizeof(uint16_t));
439 					break;
440 
441 				case DataFormat::R8G8B8A8_UNORM:
442 					for (j = 0; j < width; j++) {
443 						((uint32_t *)dest)[j] = Shuffle8888(((uint32_t *)source)[j]);
444 					}
445 					break;
446 
447 				case DataFormat::B8G8R8A8_UNORM:
448 					if (data != rect.pBits)
449 						memcpy(dest, source, sizeof(uint32_t) * width);
450 					break;
451 				default:
452 					// Unhandled data format copy.
453 					DebugBreak();
454 					break;
455 				}
456 			}
457 			tex_->UnlockRect(level);
458 		}
459 		break;
460 	}
461 
462 	default:
463 		ERROR_LOG(G3D,  "Non-LINEAR2D textures not yet supported");
464 		break;
465 	}
466 }
467 
SetToSampler(LPDIRECT3DDEVICE9 device,int sampler)468 void D3D9Texture::SetToSampler(LPDIRECT3DDEVICE9 device, int sampler) {
469 	switch (type_) {
470 	case TextureType::LINEAR1D:
471 	case TextureType::LINEAR2D:
472 		device->SetTexture(sampler, tex_);
473 		break;
474 
475 	case TextureType::LINEAR3D:
476 		device->SetTexture(sampler, volTex_);
477 		break;
478 
479 	case TextureType::CUBE:
480 		device->SetTexture(sampler, cubeTex_);
481 		break;
482 	}
483 }
484 
485 class D3D9Context : public DrawContext {
486 public:
487 	D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, IDirect3DDevice9 *device, IDirect3DDevice9Ex *deviceEx);
488 	~D3D9Context();
489 
GetDeviceCaps() const490 	const DeviceCaps &GetDeviceCaps() const override {
491 		return caps_;
492 	}
GetSupportedShaderLanguages() const493 	uint32_t GetSupportedShaderLanguages() const override {
494 		return (uint32_t)ShaderLanguage::HLSL_D3D9;
495 	}
496 	uint32_t GetDataFormatSupport(DataFormat fmt) const override;
497 
498 	ShaderModule *CreateShaderModule(ShaderStage stage, ShaderLanguage language, const uint8_t *data, size_t dataSize, const std::string &tag) override;
499 	DepthStencilState *CreateDepthStencilState(const DepthStencilStateDesc &desc) override;
500 	BlendState *CreateBlendState(const BlendStateDesc &desc) override;
501 	SamplerState *CreateSamplerState(const SamplerStateDesc &desc) override;
502 	RasterState *CreateRasterState(const RasterStateDesc &desc) override;
503 	Buffer *CreateBuffer(size_t size, uint32_t usageFlags) override;
504 	Pipeline *CreateGraphicsPipeline(const PipelineDesc &desc) override;
505 	InputLayout *CreateInputLayout(const InputLayoutDesc &desc) override;
506 	Texture *CreateTexture(const TextureDesc &desc) override;
507 
508 	Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override;
509 
510 	void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override;
511 
CopyFramebufferImage(Framebuffer * src,int level,int x,int y,int z,Framebuffer * dst,int dstLevel,int dstX,int dstY,int dstZ,int width,int height,int depth,int channelBits,const char * tag)512 	void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override {
513 		// Not implemented
514 	}
515 	bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override;
516 
517 	// These functions should be self explanatory.
518 	void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override;
GetCurrentRenderTarget()519 	Framebuffer *GetCurrentRenderTarget() override {
520 		return curRenderTarget_;
521 	}
522 	void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override;
523 
524 	uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) override;
525 
526 	void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override;
527 
528 	void BindTextures(int start, int count, Texture **textures) override;
BindSamplerStates(int start,int count,SamplerState ** states)529 	void BindSamplerStates(int start, int count, SamplerState **states) override {
530 		_assert_(start + count <= MAX_BOUND_TEXTURES);
531 		for (int i = 0; i < count; ++i) {
532 			D3D9SamplerState *s = static_cast<D3D9SamplerState *>(states[i]);
533 			s->Apply(device_, start + i);
534 		}
535 	}
BindVertexBuffers(int start,int count,Buffer ** buffers,const int * offsets)536 	void BindVertexBuffers(int start, int count, Buffer **buffers, const int *offsets) override {
537 		_assert_(start + count <= ARRAY_SIZE(curVBuffers_));
538 		for (int i = 0; i < count; i++) {
539 			curVBuffers_[i + start] = (D3D9Buffer *)buffers[i];
540 			curVBufferOffsets_[i + start] = offsets ? offsets[i] : 0;
541 		}
542 	}
BindIndexBuffer(Buffer * indexBuffer,int offset)543 	void BindIndexBuffer(Buffer *indexBuffer, int offset) override {
544 		curIBuffer_ = (D3D9Buffer *)indexBuffer;
545 		curIBufferOffset_ = offset;
546 	}
547 
BindPipeline(Pipeline * pipeline)548 	void BindPipeline(Pipeline *pipeline) override {
549 		curPipeline_ = (D3D9Pipeline *)pipeline;
550 	}
551 
552 	void EndFrame() override;
553 
554 	void UpdateDynamicUniformBuffer(const void *ub, size_t size) override;
555 
556 	// Raster state
557 	void SetScissorRect(int left, int top, int width, int height) override;
558 	void SetViewports(int count, Viewport *viewports) override;
559 	void SetBlendFactor(float color[4]) override;
560 	void SetStencilRef(uint8_t ref) override;
561 
562 	void Draw(int vertexCount, int offset) override;
563 	void DrawIndexed(int vertexCount, int offset) override;
564 	void DrawUP(const void *vdata, int vertexCount) override;
565 	void Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) override;
566 
GetNativeObject(NativeObject obj)567 	uint64_t GetNativeObject(NativeObject obj) override {
568 		switch (obj) {
569 		case NativeObject::CONTEXT:
570 			return (uint64_t)(uintptr_t)d3d_;
571 		case NativeObject::DEVICE:
572 			return (uint64_t)(uintptr_t)device_;
573 		case NativeObject::DEVICE_EX:
574 			return (uint64_t)(uintptr_t)deviceEx_;
575 		default:
576 			return 0;
577 		}
578 	}
579 
GetInfoString(InfoField info) const580 	std::string GetInfoString(InfoField info) const override {
581 		switch (info) {
582 		case APIVERSION: return "DirectX 9.0";
583 		case VENDORSTRING: return identifier_.Description;
584 		case VENDOR: return "";
585 		case DRIVER: return identifier_.Driver;  // eh, sort of
586 		case SHADELANGVERSION: return shadeLangVersion_;
587 		case APINAME: return "Direct3D 9";
588 		default: return "?";
589 		}
590 	}
591 
592 	void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override;
593 
GetCurrentStepId() const594 	int GetCurrentStepId() const override {
595 		return stepId_;
596 	}
597 
598 	void InvalidateCachedState() override;
599 
600 private:
601 	LPDIRECT3D9 d3d_;
602 	LPDIRECT3D9EX d3dEx_;
603 	LPDIRECT3DDEVICE9 device_;
604 	LPDIRECT3DDEVICE9EX deviceEx_;
605 	int stepId_ = -1;
606 	int adapterId_ = -1;
607 	D3DADAPTER_IDENTIFIER9 identifier_{};
608 	D3DCAPS9 d3dCaps_;
609 	char shadeLangVersion_[64]{};
610 	DeviceCaps caps_{};
611 
612 	// Bound state
613 	AutoRef<D3D9Pipeline> curPipeline_;
614 	AutoRef<D3D9Buffer> curVBuffers_[4];
615 	int curVBufferOffsets_[4]{};
616 	AutoRef<D3D9Buffer> curIBuffer_;
617 	int curIBufferOffset_ = 0;
618 	AutoRef<Framebuffer> curRenderTarget_;
619 
620 	// Framebuffer state
621 	LPDIRECT3DSURFACE9 deviceRTsurf = 0;
622 	LPDIRECT3DSURFACE9 deviceDSsurf = 0;
623 	bool supportsINTZ = false;
624 };
625 
InvalidateCachedState()626 void D3D9Context::InvalidateCachedState() {
627 	curPipeline_ = nullptr;
628 }
629 
630 #define FB_DIV 1
631 #define FOURCC_INTZ ((D3DFORMAT)(MAKEFOURCC('I', 'N', 'T', 'Z')))
632 
D3D9Context(IDirect3D9 * d3d,IDirect3D9Ex * d3dEx,int adapterId,IDirect3DDevice9 * device,IDirect3DDevice9Ex * deviceEx)633 D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, IDirect3DDevice9 *device, IDirect3DDevice9Ex *deviceEx)
634 	: d3d_(d3d), d3dEx_(d3dEx), device_(device), deviceEx_(deviceEx), adapterId_(adapterId), caps_{} {
635 	if (FAILED(d3d->GetAdapterIdentifier(adapterId, 0, &identifier_))) {
636 		ERROR_LOG(G3D,  "Failed to get adapter identifier: %d", adapterId);
637 	}
638 	switch (identifier_.VendorId) {
639 	case 0x10DE: caps_.vendor = GPUVendor::VENDOR_NVIDIA; break;
640 	case 0x1002:
641 	case 0x1022: caps_.vendor = GPUVendor::VENDOR_AMD; break;
642 	case 0x163C:
643 	case 0x8086:
644 	case 0x8087: caps_.vendor = GPUVendor::VENDOR_INTEL; break;
645 	default:
646 		caps_.vendor = GPUVendor::VENDOR_UNKNOWN;
647 	}
648 
649 	if (!FAILED(device->GetDeviceCaps(&d3dCaps_))) {
650 		sprintf(shadeLangVersion_, "PS: %04x VS: %04x", d3dCaps_.PixelShaderVersion & 0xFFFF, d3dCaps_.VertexShaderVersion & 0xFFFF);
651 	} else {
652 		strcpy(shadeLangVersion_, "N/A");
653 	}
654 	caps_.deviceID = identifier_.DeviceId;
655 	caps_.multiViewport = false;
656 	caps_.anisoSupported = true;
657 	caps_.depthRangeMinusOneToOne = false;
658 	caps_.preferredDepthBufferFormat = DataFormat::D24_S8;
659 	caps_.dualSourceBlend = false;
660 	caps_.tesselationShaderSupported = false;
661 	caps_.framebufferBlitSupported = true;
662 	caps_.framebufferCopySupported = false;
663 	caps_.framebufferDepthBlitSupported = true;
664 	caps_.framebufferDepthCopySupported = false;
665 	if (d3d) {
666 		D3DDISPLAYMODE displayMode;
667 		d3d->GetAdapterDisplayMode(D3DADAPTER_DEFAULT, &displayMode);
668 
669 		// To be safe, make sure both the display format and the FBO format support INTZ.
670 		HRESULT displayINTZ = d3d->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, displayMode.Format, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, FOURCC_INTZ);
671 		HRESULT fboINTZ = d3d->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, D3DFMT_A8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, FOURCC_INTZ);
672 		supportsINTZ = SUCCEEDED(displayINTZ) && SUCCEEDED(fboINTZ);
673 	}
674 
675 	shaderLanguageDesc_.Init(HLSL_D3D9);
676 }
677 
~D3D9Context()678 D3D9Context::~D3D9Context() {
679 }
680 
CreateShaderModule(ShaderStage stage,ShaderLanguage language,const uint8_t * data,size_t size,const std::string & tag)681 ShaderModule *D3D9Context::CreateShaderModule(ShaderStage stage, ShaderLanguage language, const uint8_t *data, size_t size, const std::string &tag) {
682 	D3D9ShaderModule *shader = new D3D9ShaderModule(stage, tag);
683 	if (shader->Compile(device_, data, size)) {
684 		return shader;
685 	} else {
686 		delete shader;
687 		return NULL;
688 	}
689 }
690 
CreateGraphicsPipeline(const PipelineDesc & desc)691 Pipeline *D3D9Context::CreateGraphicsPipeline(const PipelineDesc &desc) {
692 	if (!desc.shaders.size()) {
693 		ERROR_LOG(G3D,  "Pipeline requires at least one shader");
694 		return NULL;
695 	}
696 	D3D9Pipeline *pipeline = new D3D9Pipeline();
697 	for (auto iter : desc.shaders) {
698 		if (!iter) {
699 			ERROR_LOG(G3D,  "NULL shader passed to CreateGraphicsPipeline");
700 			delete pipeline;
701 			return NULL;
702 		}
703 		if (iter->GetStage() == ShaderStage::Fragment) {
704 			pipeline->pshader = static_cast<D3D9ShaderModule *>(iter);
705 		}
706 		else if (iter->GetStage() == ShaderStage::Vertex) {
707 			pipeline->vshader = static_cast<D3D9ShaderModule *>(iter);
708 		}
709 	}
710 	pipeline->prim = primToD3D9[(int)desc.prim];
711 	pipeline->primDivisor = primCountDivisor[(int)desc.prim];
712 	pipeline->depthStencil = (D3D9DepthStencilState *)desc.depthStencil;
713 	pipeline->blend = (D3D9BlendState *)desc.blend;
714 	pipeline->raster = (D3D9RasterState *)desc.raster;
715 	pipeline->inputLayout = (D3D9InputLayout *)desc.inputLayout;
716 	if (desc.uniformDesc)
717 		pipeline->dynamicUniforms = *desc.uniformDesc;
718 	return pipeline;
719 }
720 
CreateDepthStencilState(const DepthStencilStateDesc & desc)721 DepthStencilState *D3D9Context::CreateDepthStencilState(const DepthStencilStateDesc &desc) {
722 	D3D9DepthStencilState *ds = new D3D9DepthStencilState();
723 	ds->depthTestEnabled = desc.depthTestEnabled;
724 	ds->depthWriteEnabled = desc.depthWriteEnabled;
725 	ds->depthCompare = compareToD3D9[(int)desc.depthCompare];
726 	ds->stencilEnabled = desc.stencilEnabled;
727 	ds->stencilCompareOp = compareToD3D9[(int)desc.front.compareOp];
728 	ds->stencilPass = stencilOpToD3D9[(int)desc.front.passOp];
729 	ds->stencilFail = stencilOpToD3D9[(int)desc.front.failOp];
730 	ds->stencilZFail = stencilOpToD3D9[(int)desc.front.depthFailOp];
731 	ds->stencilWriteMask = desc.front.writeMask;
732 	ds->stencilCompareMask = desc.front.compareMask;
733 	return ds;
734 }
735 
CreateInputLayout(const InputLayoutDesc & desc)736 InputLayout *D3D9Context::CreateInputLayout(const InputLayoutDesc &desc) {
737 	D3D9InputLayout *fmt = new D3D9InputLayout(device_, desc);
738 	return fmt;
739 }
740 
CreateBlendState(const BlendStateDesc & desc)741 BlendState *D3D9Context::CreateBlendState(const BlendStateDesc &desc) {
742 	D3D9BlendState *bs = new D3D9BlendState();
743 	bs->enabled = desc.enabled;
744 	bs->eqCol = blendEqToD3D9[(int)desc.eqCol];
745 	bs->srcCol = blendFactorToD3D9[(int)desc.srcCol];
746 	bs->dstCol = blendFactorToD3D9[(int)desc.dstCol];
747 	bs->eqAlpha = blendEqToD3D9[(int)desc.eqAlpha];
748 	bs->srcAlpha = blendFactorToD3D9[(int)desc.srcAlpha];
749 	bs->dstAlpha = blendFactorToD3D9[(int)desc.dstAlpha];
750 	bs->colorMask = desc.colorMask;
751 	// Ignore logic ops, we don't support them in D3D9
752 	return bs;
753 }
754 
CreateSamplerState(const SamplerStateDesc & desc)755 SamplerState *D3D9Context::CreateSamplerState(const SamplerStateDesc &desc) {
756 	D3D9SamplerState *samps = new D3D9SamplerState();
757 	samps->wrapS = texWrapToD3D9[(int)desc.wrapU];
758 	samps->wrapT = texWrapToD3D9[(int)desc.wrapV];
759 	samps->magFilt = texFilterToD3D9[(int)desc.magFilter];
760 	samps->minFilt = texFilterToD3D9[(int)desc.minFilter];
761 	samps->mipFilt = texFilterToD3D9[(int)desc.mipFilter];
762 	return samps;
763 }
764 
CreateRasterState(const RasterStateDesc & desc)765 RasterState *D3D9Context::CreateRasterState(const RasterStateDesc &desc) {
766 	D3D9RasterState *rs = new D3D9RasterState();
767 	rs->cullMode = D3DCULL_NONE;
768 	if (desc.cull == CullMode::NONE) {
769 		return rs;
770 	}
771 	switch (desc.frontFace) {
772 	case Facing::CW:
773 		switch (desc.cull) {
774 		case CullMode::FRONT: rs->cullMode = D3DCULL_CCW; break;
775 		case CullMode::BACK: rs->cullMode = D3DCULL_CW; break;
776 		}
777 	case Facing::CCW:
778 		switch (desc.cull) {
779 		case CullMode::FRONT: rs->cullMode = D3DCULL_CW; break;
780 		case CullMode::BACK: rs->cullMode = D3DCULL_CCW; break;
781 		}
782 	}
783 	return rs;
784 }
785 
CreateTexture(const TextureDesc & desc)786 Texture *D3D9Context::CreateTexture(const TextureDesc &desc) {
787 	D3D9Texture *tex = new D3D9Texture(device_, deviceEx_, desc);
788 	return tex;
789 }
790 
BindTextures(int start,int count,Texture ** textures)791 void D3D9Context::BindTextures(int start, int count, Texture **textures) {
792 	_assert_(start + count <= MAX_BOUND_TEXTURES);
793 	for (int i = start; i < start + count; i++) {
794 		D3D9Texture *tex = static_cast<D3D9Texture *>(textures[i - start]);
795 		if (tex) {
796 			tex->SetToSampler(device_, i);
797 		} else {
798 			device_->SetTexture(i, nullptr);
799 		}
800 	}
801 }
802 
EndFrame()803 void D3D9Context::EndFrame() {
804 	curPipeline_ = nullptr;
805 }
806 
SemanticToD3D9UsageAndIndex(int semantic,BYTE * usage,BYTE * index)807 static void SemanticToD3D9UsageAndIndex(int semantic, BYTE *usage, BYTE *index) {
808 	*index = 0;
809 	switch (semantic) {
810 	case SEM_POSITION:
811 		*usage = D3DDECLUSAGE_POSITION;
812 		break;
813 	case SEM_NORMAL:
814 		*usage = D3DDECLUSAGE_NORMAL;
815 		break;
816 	case SEM_TANGENT:
817 		*usage = D3DDECLUSAGE_TANGENT;
818 		break;
819 	case SEM_BINORMAL:
820 		*usage = D3DDECLUSAGE_BINORMAL;
821 		break;
822 	case SEM_COLOR0:
823 		*usage = D3DDECLUSAGE_COLOR;
824 		break;
825 	case SEM_TEXCOORD0:
826 		*usage = D3DDECLUSAGE_TEXCOORD;
827 		break;
828 	case SEM_TEXCOORD1:
829 		*usage = D3DDECLUSAGE_TEXCOORD;
830 		*index = 1;
831 		break;
832 	}
833 }
834 
D3D9InputLayout(LPDIRECT3DDEVICE9 device,const InputLayoutDesc & desc)835 D3D9InputLayout::D3D9InputLayout(LPDIRECT3DDEVICE9 device, const InputLayoutDesc &desc) : decl_(NULL) {
836 	D3DVERTEXELEMENT9 *elements = new D3DVERTEXELEMENT9[desc.attributes.size() + 1];
837 	size_t i;
838 	for (i = 0; i < desc.attributes.size(); i++) {
839 		elements[i].Stream = desc.attributes[i].binding;
840 		elements[i].Offset = desc.attributes[i].offset;
841 		elements[i].Method = D3DDECLMETHOD_DEFAULT;
842 		SemanticToD3D9UsageAndIndex(desc.attributes[i].location, &elements[i].Usage, &elements[i].UsageIndex);
843 		elements[i].Type = FormatToD3DDeclType(desc.attributes[i].format);
844 	}
845 	D3DVERTEXELEMENT9 end = D3DDECL_END();
846 	// Zero the last one.
847 	memcpy(&elements[i], &end, sizeof(elements[i]));
848 
849 	for (i = 0; i < desc.bindings.size(); i++) {
850 		stride_[i] = desc.bindings[i].stride;
851 	}
852 
853 	HRESULT hr = device->CreateVertexDeclaration(elements, &decl_);
854 	if (FAILED(hr)) {
855 		ERROR_LOG(G3D,  "Error creating vertex decl");
856 	}
857 	delete[] elements;
858 }
859 
860 // Simulate a simple buffer type like the other backends have, use the usage flags to create the right internal type.
861 class D3D9Buffer : public Buffer {
862 public:
D3D9Buffer(LPDIRECT3DDEVICE9 device,size_t size,uint32_t flags)863 	D3D9Buffer(LPDIRECT3DDEVICE9 device, size_t size, uint32_t flags) : vbuffer_(nullptr), ibuffer_(nullptr), maxSize_(size) {
864 		if (flags & BufferUsageFlag::INDEXDATA) {
865 			DWORD usage = D3DUSAGE_DYNAMIC;
866 			device->CreateIndexBuffer((UINT)size, usage, D3DFMT_INDEX16, D3DPOOL_DEFAULT, &ibuffer_, NULL);
867 		} else {
868 			DWORD usage = D3DUSAGE_DYNAMIC;
869 			device->CreateVertexBuffer((UINT)size, usage, 0, D3DPOOL_DEFAULT, &vbuffer_, NULL);
870 		}
871 	}
~D3D9Buffer()872 	virtual ~D3D9Buffer() override {
873 		if (ibuffer_) {
874 			ibuffer_->Release();
875 		}
876 		if (vbuffer_) {
877 			vbuffer_->Release();
878 		}
879 	}
880 
881 	LPDIRECT3DVERTEXBUFFER9 vbuffer_;
882 	LPDIRECT3DINDEXBUFFER9 ibuffer_;
883 	size_t maxSize_;
884 };
885 
CreateBuffer(size_t size,uint32_t usageFlags)886 Buffer *D3D9Context::CreateBuffer(size_t size, uint32_t usageFlags) {
887 	return new D3D9Buffer(device_, size, usageFlags);
888 }
889 
Transpose4x4(float out[16],const float in[16])890 inline void Transpose4x4(float out[16], const float in[16]) {
891 	for (int i = 0; i < 4; i++) {
892 		for (int j = 0; j < 4; j++) {
893 			out[i * 4 + j] = in[j * 4 + i];
894 		}
895 	}
896 }
897 
UpdateDynamicUniformBuffer(const void * ub,size_t size)898 void D3D9Context::UpdateDynamicUniformBuffer(const void *ub, size_t size) {
899 	_assert_(size == curPipeline_->dynamicUniforms.uniformBufferSize);
900 	for (auto &uniform : curPipeline_->dynamicUniforms.uniforms) {
901 		int count = 0;
902 		switch (uniform.type) {
903 		case UniformType::FLOAT1:
904 		case UniformType::FLOAT2:
905 		case UniformType::FLOAT3:
906 		case UniformType::FLOAT4:
907 			count = 1;
908 			break;
909 		case UniformType::MATRIX4X4:
910 			count = 4;
911 			break;
912 		}
913 		const float *srcPtr = (const float *)((const uint8_t *)ub + uniform.offset);
914 		if (uniform.vertexReg != -1) {
915 			float transp[16];
916 			if (count == 4) {
917 				Transpose4x4(transp, srcPtr);
918 				srcPtr = transp;
919 			}
920 			device_->SetVertexShaderConstantF(uniform.vertexReg, srcPtr, count);
921 		}
922 		if (uniform.fragmentReg != -1) {
923 			device_->SetPixelShaderConstantF(uniform.fragmentReg, srcPtr, count);
924 		}
925 	}
926 }
927 
UpdateBuffer(Buffer * buffer,const uint8_t * data,size_t offset,size_t size,UpdateBufferFlags flags)928 void D3D9Context::UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) {
929 	D3D9Buffer *buf = (D3D9Buffer *)buffer;
930 	if (!size)
931 		return;
932 	if (offset + size > buf->maxSize_) {
933 		ERROR_LOG(G3D,  "Can't SubData with bigger size than buffer was created with");
934 		return;
935 	}
936 	if (buf->vbuffer_) {
937 		void *ptr;
938 		HRESULT res = buf->vbuffer_->Lock((UINT)offset, (UINT)size, &ptr, (flags & UPDATE_DISCARD) ? D3DLOCK_DISCARD : 0);
939 		if (!FAILED(res)) {
940 			memcpy(ptr, data, size);
941 			buf->vbuffer_->Unlock();
942 		}
943 	} else if (buf->ibuffer_) {
944 		void *ptr;
945 		HRESULT res = buf->ibuffer_->Lock((UINT)offset, (UINT)size, &ptr, (flags & UPDATE_DISCARD) ? D3DLOCK_DISCARD : 0);
946 		if (!FAILED(res)) {
947 			memcpy(ptr, data, size);
948 			buf->ibuffer_->Unlock();
949 		}
950 	}
951 }
952 
Apply(LPDIRECT3DDEVICE9 device)953 void D3D9Pipeline::Apply(LPDIRECT3DDEVICE9 device) {
954 	vshader->Apply(device);
955 	pshader->Apply(device);
956 	blend->Apply(device);
957 	depthStencil->Apply(device);
958 	raster->Apply(device);
959 }
960 
Draw(int vertexCount,int offset)961 void D3D9Context::Draw(int vertexCount, int offset) {
962 	device_->SetStreamSource(0, curVBuffers_[0]->vbuffer_, curVBufferOffsets_[0], curPipeline_->inputLayout->GetStride(0));
963 	curPipeline_->Apply(device_);
964 	curPipeline_->inputLayout->Apply(device_);
965 	device_->DrawPrimitive(curPipeline_->prim, offset, vertexCount / 3);
966 }
967 
DrawIndexed(int vertexCount,int offset)968 void D3D9Context::DrawIndexed(int vertexCount, int offset) {
969 	curPipeline_->Apply(device_);
970 	curPipeline_->inputLayout->Apply(device_);
971 	device_->SetStreamSource(0, curVBuffers_[0]->vbuffer_, curVBufferOffsets_[0], curPipeline_->inputLayout->GetStride(0));
972 	device_->SetIndices(curIBuffer_->ibuffer_);
973 	device_->DrawIndexedPrimitive(curPipeline_->prim, 0, 0, vertexCount, offset, vertexCount / curPipeline_->primDivisor);
974 }
975 
DrawUP(const void * vdata,int vertexCount)976 void D3D9Context::DrawUP(const void *vdata, int vertexCount) {
977 	curPipeline_->Apply(device_);
978 	curPipeline_->inputLayout->Apply(device_);
979 	device_->DrawPrimitiveUP(curPipeline_->prim, vertexCount / 3, vdata, curPipeline_->inputLayout->GetStride(0));
980 }
981 
SwapRB(uint32_t c)982 static uint32_t SwapRB(uint32_t c) {
983 	return (c & 0xFF00FF00) | ((c >> 16) & 0xFF) | ((c << 16) & 0xFF0000);
984 }
985 
Clear(int mask,uint32_t colorval,float depthVal,int stencilVal)986 void D3D9Context::Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) {
987 	UINT d3dMask = 0;
988 	if (mask & FBChannel::FB_COLOR_BIT) d3dMask |= D3DCLEAR_TARGET;
989 	if (mask & FBChannel::FB_DEPTH_BIT) d3dMask |= D3DCLEAR_ZBUFFER;
990 	if (mask & FBChannel::FB_STENCIL_BIT) d3dMask |= D3DCLEAR_STENCIL;
991 	if (d3dMask) {
992 		device_->Clear(0, NULL, d3dMask, (D3DCOLOR)SwapRB(colorval), depthVal, stencilVal);
993 	}
994 }
995 
SetScissorRect(int left,int top,int width,int height)996 void D3D9Context::SetScissorRect(int left, int top, int width, int height) {
997 	using namespace DX9;
998 
999 	dxstate.scissorRect.set(left, top, left + width, top + height);
1000 }
1001 
SetViewports(int count,Viewport * viewports)1002 void D3D9Context::SetViewports(int count, Viewport *viewports) {
1003 	using namespace DX9;
1004 
1005 	int x = (int)viewports[0].TopLeftX;
1006 	int y = (int)viewports[0].TopLeftY;
1007 	int w = (int)viewports[0].Width;
1008 	int h = (int)viewports[0].Height;
1009 	dxstate.viewport.set(x, y, w, h, viewports[0].MinDepth, viewports[0].MaxDepth);
1010 }
1011 
SetBlendFactor(float color[4])1012 void D3D9Context::SetBlendFactor(float color[4]) {
1013 	uint32_t r = (uint32_t)(color[0] * 255.0f);
1014 	uint32_t g = (uint32_t)(color[1] * 255.0f);
1015 	uint32_t b = (uint32_t)(color[2] * 255.0f);
1016 	uint32_t a = (uint32_t)(color[3] * 255.0f);
1017 	device_->SetRenderState(D3DRS_BLENDFACTOR, r | (g << 8) | (b << 16) | (a << 24));
1018 }
1019 
SetStencilRef(uint8_t ref)1020 void D3D9Context::SetStencilRef(uint8_t ref) {
1021 	device_->SetRenderState(D3DRS_STENCILREF, (DWORD)ref);
1022 }
1023 
Compile(LPDIRECT3DDEVICE9 device,const uint8_t * data,size_t size)1024 bool D3D9ShaderModule::Compile(LPDIRECT3DDEVICE9 device, const uint8_t *data, size_t size) {
1025 	LPD3D_SHADER_MACRO defines = nullptr;
1026 	LPD3DINCLUDE includes = nullptr;
1027 	LPD3DBLOB codeBuffer = nullptr;
1028 	LPD3DBLOB errorBuffer = nullptr;
1029 	const char *source = (const char *)data;
1030 	auto compile = [&](const char *profile) -> HRESULT {
1031 		return dyn_D3DCompile(source, (UINT)strlen(source), nullptr, defines, includes, "main", profile, 0, 0, &codeBuffer, &errorBuffer);
1032 	};
1033 	HRESULT hr = compile(stage_ == ShaderStage::Fragment ? "ps_2_0" : "vs_2_0");
1034 	if (FAILED(hr) && hr == D3DXERR_INVALIDDATA) {
1035 		// Might be a post shader.  Let's try using shader model 3.
1036 		hr = compile(stage_ == ShaderStage::Fragment ? "ps_3_0" : "vs_3_0");
1037 	}
1038 	if (FAILED(hr)) {
1039 		const char *error = errorBuffer ? (const char *)errorBuffer->GetBufferPointer() : "(no errorbuffer returned)";
1040 		if (hr == ERROR_MOD_NOT_FOUND) {
1041 			// No D3D9-compatible shader compiler installed.
1042 			error = "D3D9 shader compiler not installed";
1043 		}
1044 		OutputDebugStringA(source);
1045 		OutputDebugStringA(error);
1046 		if (errorBuffer)
1047 			errorBuffer->Release();
1048 		if (codeBuffer)
1049 			codeBuffer->Release();
1050 		return false;
1051 	}
1052 
1053 	bool success = false;
1054 	if (stage_ == ShaderStage::Fragment) {
1055 		HRESULT result = device->CreatePixelShader((DWORD *)codeBuffer->GetBufferPointer(), &pshader_);
1056 		success = SUCCEEDED(result);
1057 	} else {
1058 		HRESULT result = device->CreateVertexShader((DWORD *)codeBuffer->GetBufferPointer(), &vshader_);
1059 		success = SUCCEEDED(result);
1060 	}
1061 
1062 	// There could have been warnings.
1063 	if (errorBuffer)
1064 		errorBuffer->Release();
1065 	codeBuffer->Release();
1066 	return true;
1067 }
1068 
1069 class D3D9Framebuffer : public Framebuffer {
1070 public:
D3D9Framebuffer(int width,int height)1071 	D3D9Framebuffer(int width, int height) {
1072 		width_ = width;
1073 		height_ = height;
1074 	}
1075 	~D3D9Framebuffer();
1076 
1077 	uint32_t id = 0;
1078 	LPDIRECT3DSURFACE9 surf = nullptr;
1079 	LPDIRECT3DSURFACE9 depthstencil = nullptr;
1080 	LPDIRECT3DTEXTURE9 tex = nullptr;
1081 	LPDIRECT3DTEXTURE9 depthstenciltex = nullptr;
1082 };
1083 
CreateFramebuffer(const FramebufferDesc & desc)1084 Framebuffer *D3D9Context::CreateFramebuffer(const FramebufferDesc &desc) {
1085 	static uint32_t id = 0;
1086 
1087 	D3D9Framebuffer *fbo = new D3D9Framebuffer(desc.width, desc.height);
1088 	fbo->depthstenciltex = nullptr;
1089 
1090 	HRESULT rtResult = device_->CreateTexture(desc.width, desc.height, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &fbo->tex, NULL);
1091 	if (FAILED(rtResult)) {
1092 		ERROR_LOG(G3D,  "Failed to create render target");
1093 		delete fbo;
1094 		return NULL;
1095 	}
1096 	fbo->tex->GetSurfaceLevel(0, &fbo->surf);
1097 
1098 	HRESULT dsResult;
1099 	if (supportsINTZ) {
1100 		dsResult = device_->CreateTexture(desc.width, desc.height, 1, D3DUSAGE_DEPTHSTENCIL, FOURCC_INTZ, D3DPOOL_DEFAULT, &fbo->depthstenciltex, NULL);
1101 		if (SUCCEEDED(dsResult)) {
1102 			dsResult = fbo->depthstenciltex->GetSurfaceLevel(0, &fbo->depthstencil);
1103 		}
1104 	} else {
1105 		dsResult = device_->CreateDepthStencilSurface(desc.width, desc.height, D3DFMT_D24S8, D3DMULTISAMPLE_NONE, 0, FALSE, &fbo->depthstencil, NULL);
1106 	}
1107 	if (FAILED(dsResult)) {
1108 		ERROR_LOG(G3D,  "Failed to create depth buffer");
1109 		fbo->surf->Release();
1110 		fbo->tex->Release();
1111 		if (fbo->depthstenciltex) {
1112 			fbo->depthstenciltex->Release();
1113 		}
1114 		delete fbo;
1115 		return NULL;
1116 	}
1117 	fbo->id = id++;
1118 	return fbo;
1119 }
1120 
~D3D9Framebuffer()1121 D3D9Framebuffer::~D3D9Framebuffer() {
1122 	tex->Release();
1123 	surf->Release();
1124 	depthstencil->Release();
1125 	if (depthstenciltex) {
1126 		depthstenciltex->Release();
1127 	}
1128 }
1129 
BindFramebufferAsRenderTarget(Framebuffer * fbo,const RenderPassInfo & rp,const char * tag)1130 void D3D9Context::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) {
1131 	using namespace DX9;
1132 	if (fbo) {
1133 		D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo;
1134 		device_->SetRenderTarget(0, fb->surf);
1135 		device_->SetDepthStencilSurface(fb->depthstencil);
1136 		curRenderTarget_ = fb;
1137 	} else {
1138 		device_->SetRenderTarget(0, deviceRTsurf);
1139 		device_->SetDepthStencilSurface(deviceDSsurf);
1140 		curRenderTarget_ = nullptr;
1141 	}
1142 
1143 	int clearFlags = 0;
1144 	if (rp.color == RPAction::CLEAR) {
1145 		clearFlags |= D3DCLEAR_TARGET;
1146 	}
1147 	if (rp.depth == RPAction::CLEAR) {
1148 		clearFlags |= D3DCLEAR_ZBUFFER;
1149 	}
1150 	if (rp.stencil == RPAction::CLEAR) {
1151 		clearFlags |= D3DCLEAR_STENCIL;
1152 	}
1153 	if (clearFlags) {
1154 		dxstate.scissorTest.force(false);
1155 		device_->Clear(0, nullptr, clearFlags, (D3DCOLOR)SwapRB(rp.clearColor), rp.clearDepth, rp.clearStencil);
1156 		dxstate.scissorRect.restore();
1157 	}
1158 
1159 	dxstate.scissorRect.restore();
1160 	dxstate.viewport.restore();
1161 	stepId_++;
1162 }
1163 
GetFramebufferAPITexture(Framebuffer * fbo,int channelBits,int attachment)1164 uintptr_t D3D9Context::GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) {
1165 	D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo;
1166 	if (channelBits & FB_SURFACE_BIT) {
1167 		switch (channelBits & 7) {
1168 		case FB_DEPTH_BIT:
1169 			return (uintptr_t)fb->depthstencil;
1170 		case FB_STENCIL_BIT:
1171 			return (uintptr_t)fb->depthstencil;
1172 		case FB_COLOR_BIT:
1173 		default:
1174 			return (uintptr_t)fb->surf;
1175 		}
1176 	} else {
1177 		switch (channelBits & 7) {
1178 		case FB_DEPTH_BIT:
1179 			return (uintptr_t)fb->depthstenciltex;
1180 		case FB_STENCIL_BIT:
1181 			return 0;  // Can't texture from stencil
1182 		case FB_COLOR_BIT:
1183 		default:
1184 			return (uintptr_t)fb->tex;
1185 		}
1186 	}
1187 }
1188 
BindFramebufferAsTexture(Framebuffer * fbo,int binding,FBChannel channelBit,int color)1189 void D3D9Context::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int color) {
1190 	_assert_(binding < MAX_BOUND_TEXTURES);
1191 	D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo;
1192 	switch (channelBit) {
1193 	case FB_DEPTH_BIT:
1194 		if (fb->depthstenciltex) {
1195 			device_->SetTexture(binding, fb->depthstenciltex);
1196 		}
1197 		break;
1198 	case FB_COLOR_BIT:
1199 	default:
1200 		if (fb->tex) {
1201 			device_->SetTexture(binding, fb->tex);
1202 		}
1203 		break;
1204 	}
1205 }
1206 
GetFramebufferDimensions(Framebuffer * fbo,int * w,int * h)1207 void D3D9Context::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) {
1208 	D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo;
1209 	if (fb) {
1210 		*w = fb->Width();
1211 		*h = fb->Height();
1212 	} else {
1213 		*w = targetWidth_;
1214 		*h = targetHeight_;
1215 	}
1216 }
1217 
BlitFramebuffer(Framebuffer * srcfb,int srcX1,int srcY1,int srcX2,int srcY2,Framebuffer * dstfb,int dstX1,int dstY1,int dstX2,int dstY2,int channelBits,FBBlitFilter filter,const char * tag)1218 bool D3D9Context::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dstfb, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) {
1219 	D3D9Framebuffer *src = (D3D9Framebuffer *)srcfb;
1220 	D3D9Framebuffer *dst = (D3D9Framebuffer *)dstfb;
1221 	if (channelBits != FB_COLOR_BIT)
1222 		return false;
1223 	RECT srcRect{ (LONG)srcX1, (LONG)srcY1, (LONG)srcX2, (LONG)srcY2 };
1224 	RECT dstRect{ (LONG)dstX1, (LONG)dstY1, (LONG)dstX2, (LONG)dstY2 };
1225 	LPDIRECT3DSURFACE9 srcSurf = src ? src->surf : deviceRTsurf;
1226 	LPDIRECT3DSURFACE9 dstSurf = dst ? dst->surf : deviceRTsurf;
1227 	stepId_++;
1228 	return SUCCEEDED(device_->StretchRect(srcSurf, &srcRect, dstSurf, &dstRect, filter == FB_BLIT_LINEAR ? D3DTEXF_LINEAR : D3DTEXF_POINT));
1229 }
1230 
HandleEvent(Event ev,int width,int height,void * param1,void * param2)1231 void D3D9Context::HandleEvent(Event ev, int width, int height, void *param1, void *param2) {
1232 	switch (ev) {
1233 	case Event::LOST_BACKBUFFER:
1234 		if (deviceRTsurf)
1235 			deviceRTsurf->Release();
1236 		if (deviceDSsurf)
1237 			deviceDSsurf->Release();
1238 		deviceRTsurf = nullptr;
1239 		deviceDSsurf = nullptr;
1240 		break;
1241 	case Event::GOT_BACKBUFFER:
1242 		device_->GetRenderTarget(0, &deviceRTsurf);
1243 		device_->GetDepthStencilSurface(&deviceDSsurf);
1244 		break;
1245 	case Event::PRESENTED:
1246 		stepId_ = 0;
1247 		break;
1248 	}
1249 }
1250 
T3DCreateDX9Context(IDirect3D9 * d3d,IDirect3D9Ex * d3dEx,int adapterId,IDirect3DDevice9 * device,IDirect3DDevice9Ex * deviceEx)1251 DrawContext *T3DCreateDX9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, IDirect3DDevice9 *device, IDirect3DDevice9Ex *deviceEx) {
1252 	bool result = LoadD3DCompilerDynamic();
1253 	if (!result) {
1254 		ERROR_LOG(G3D,  "Failed to load D3DCompiler!");
1255 		return nullptr;
1256 	}
1257 	return new D3D9Context(d3d, d3dEx, adapterId, device, deviceEx);
1258 }
1259 
1260 // Only partial implementation!
GetDataFormatSupport(DataFormat fmt) const1261 uint32_t D3D9Context::GetDataFormatSupport(DataFormat fmt) const {
1262 	switch (fmt) {
1263 	case DataFormat::B8G8R8A8_UNORM:
1264 		return FMT_RENDERTARGET | FMT_TEXTURE | FMT_AUTOGEN_MIPS;
1265 
1266 	case DataFormat::R4G4B4A4_UNORM_PACK16:
1267 		return 0;
1268 	case DataFormat::B4G4R4A4_UNORM_PACK16:
1269 		return FMT_TEXTURE;  // emulated support
1270 	case DataFormat::R5G6B5_UNORM_PACK16:
1271 	case DataFormat::A1R5G5B5_UNORM_PACK16:
1272 	case DataFormat::A4R4G4B4_UNORM_PACK16:
1273 		return FMT_RENDERTARGET | FMT_TEXTURE | FMT_AUTOGEN_MIPS;  // native support
1274 
1275 	case DataFormat::R8G8B8A8_UNORM:
1276 		return FMT_RENDERTARGET | FMT_TEXTURE | FMT_INPUTLAYOUT | FMT_AUTOGEN_MIPS;
1277 
1278 	case DataFormat::R32_FLOAT:
1279 	case DataFormat::R32G32_FLOAT:
1280 	case DataFormat::R32G32B32_FLOAT:
1281 	case DataFormat::R32G32B32A32_FLOAT:
1282 		return FMT_INPUTLAYOUT;
1283 
1284 	case DataFormat::R8_UNORM:
1285 		return 0;
1286 	case DataFormat::BC1_RGBA_UNORM_BLOCK:
1287 	case DataFormat::BC2_UNORM_BLOCK:
1288 	case DataFormat::BC3_UNORM_BLOCK:
1289 		return FMT_TEXTURE;
1290 	default:
1291 		return 0;
1292 	}
1293 }
1294 
1295 
1296 }  // namespace Draw
1297