1 // Copyright (c) 2017- PPSSPP Project.
2 
3 // This program is free software: you can redistribute it and/or modify
4 // it under the terms of the GNU General Public License as published by
5 // the Free Software Foundation, version 2.0 or later versions.
6 
7 // This program is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License 2.0 for more details.
11 
12 // A copy of the GPL 2.0 should have been included with the program.
13 // If not, see http://www.gnu.org/licenses/
14 
15 // Official git repository and contact information can be found at
16 // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17 
18 #include <algorithm>
19 #include <atomic>
20 #include <cstring>
21 #include <functional>
22 #include <set>
23 #include <vector>
24 #include <mutex>
25 #include <zstd.h>
26 
27 #include "Common/Common.h"
28 #include "Common/File/FileUtil.h"
29 #include "Common/Thread/ParallelLoop.h"
30 #include "Common/Log.h"
31 #include "Common/StringUtils.h"
32 
33 #include "Core/Core.h"
34 #include "Core/ELF/ParamSFO.h"
35 #include "Core/HLE/sceDisplay.h"
36 #include "Core/MemMap.h"
37 #include "Core/System.h"
38 #include "Core/ThreadPools.h"
39 #include "GPU/GPUInterface.h"
40 #include "GPU/GPUState.h"
41 #include "GPU/ge_constants.h"
42 #include "GPU/Common/TextureDecoder.h"
43 #include "GPU/Common/VertexDecoderCommon.h"
44 #include "GPU/Debugger/Record.h"
45 #include "GPU/Debugger/RecordFormat.h"
46 
47 namespace GPURecord {
48 
49 static bool active = false;
50 static bool nextFrame = false;
51 static int flipLastAction = -1;
52 static std::function<void(const Path &)> writeCallback;
53 
54 static std::vector<u8> pushbuf;
55 static std::vector<Command> commands;
56 static std::vector<u32> lastRegisters;
57 static std::vector<u32> lastTextures;
58 static std::set<u32> lastRenderTargets;
59 
FlushRegisters()60 static void FlushRegisters() {
61 	if (!lastRegisters.empty()) {
62 		Command last{CommandType::REGISTERS};
63 		last.ptr = (u32)pushbuf.size();
64 		last.sz = (u32)(lastRegisters.size() * sizeof(u32));
65 		pushbuf.resize(pushbuf.size() + last.sz);
66 		memcpy(pushbuf.data() + last.ptr, lastRegisters.data(), last.sz);
67 		lastRegisters.clear();
68 
69 		commands.push_back(last);
70 	}
71 }
72 
GenRecordingFilename()73 static Path GenRecordingFilename() {
74 	const Path dumpDir = GetSysDirectory(DIRECTORY_DUMP);
75 
76 	File::CreateFullPath(dumpDir);
77 
78 	const std::string prefix = g_paramSFO.GetDiscID();
79 
80 	for (int n = 1; n < 10000; ++n) {
81 		std::string filename = StringFromFormat("%s_%04d.ppdmp", prefix.c_str(), n);
82 
83 		const Path path = dumpDir / filename;
84 
85 		if (!File::Exists(path)) {
86 			return path;
87 		}
88 	}
89 
90 	return dumpDir / StringFromFormat("%s_%04d.ppdmp", prefix.c_str(), 9999);
91 }
92 
BeginRecording()93 static void BeginRecording() {
94 	active = true;
95 	nextFrame = false;
96 	lastTextures.clear();
97 	lastRenderTargets.clear();
98 	flipLastAction = gpuStats.numFlips;
99 
100 	u32 ptr = (u32)pushbuf.size();
101 	u32 sz = 512 * 4;
102 	pushbuf.resize(pushbuf.size() + sz);
103 	gstate.Save((u32_le *)(pushbuf.data() + ptr));
104 
105 	commands.push_back({CommandType::INIT, sz, ptr});
106 }
107 
WriteCompressed(FILE * fp,const void * p,size_t sz)108 static void WriteCompressed(FILE *fp, const void *p, size_t sz) {
109 	size_t compressed_size = ZSTD_compressBound(sz);
110 	u8 *compressed = new u8[compressed_size];
111 	compressed_size = ZSTD_compress(compressed, compressed_size, p, sz, 6);
112 
113 	u32 write_size = (u32)compressed_size;
114 	fwrite(&write_size, sizeof(write_size), 1, fp);
115 	fwrite(compressed, compressed_size, 1, fp);
116 
117 	delete [] compressed;
118 }
119 
WriteRecording()120 static Path WriteRecording() {
121 	FlushRegisters();
122 
123 	const Path filename = GenRecordingFilename();
124 
125 	NOTICE_LOG(G3D, "Recording filename: %s", filename.c_str());
126 
127 	FILE *fp = File::OpenCFile(filename, "wb");
128 	Header header{};
129 	strncpy(header.magic, HEADER_MAGIC, sizeof(header.magic));
130 	header.version = VERSION;
131 	strncpy(header.gameID, g_paramSFO.GetDiscID().c_str(), sizeof(header.gameID));
132 	fwrite(&header, sizeof(header), 1, fp);
133 
134 	u32 sz = (u32)commands.size();
135 	fwrite(&sz, sizeof(sz), 1, fp);
136 	u32 bufsz = (u32)pushbuf.size();
137 	fwrite(&bufsz, sizeof(bufsz), 1, fp);
138 
139 	WriteCompressed(fp, commands.data(), commands.size() * sizeof(Command));
140 	WriteCompressed(fp, pushbuf.data(), bufsz);
141 
142 	fclose(fp);
143 
144 	return filename;
145 }
146 
GetVertDataSizes(int vcount,const void * indices,u32 & vbytes,u32 & ibytes)147 static void GetVertDataSizes(int vcount, const void *indices, u32 &vbytes, u32 &ibytes) {
148 	VertexDecoder vdec;
149 	VertexDecoderOptions opts{};
150 	vdec.SetVertexType(gstate.vertType, opts);
151 
152 	if (indices) {
153 		u16 lower = 0;
154 		u16 upper = 0;
155 		GetIndexBounds(indices, vcount, gstate.vertType, &lower, &upper);
156 
157 		vbytes = (upper + 1) * vdec.VertexSize();
158 		u32 idx = gstate.vertType & GE_VTYPE_IDX_MASK;
159 		if (idx == GE_VTYPE_IDX_8BIT) {
160 			ibytes = vcount * sizeof(u8);
161 		} else if (idx == GE_VTYPE_IDX_16BIT) {
162 			ibytes = vcount * sizeof(u16);
163 		} else if (idx == GE_VTYPE_IDX_32BIT) {
164 			ibytes = vcount * sizeof(u32);
165 		}
166 	} else {
167 		vbytes = vcount * vdec.VertexSize();
168 	}
169 }
170 
mymemmem(const u8 * haystack,size_t off,size_t hlen,const u8 * needle,size_t nlen,uintptr_t align)171 static const u8 *mymemmem(const u8 *haystack, size_t off, size_t hlen, const u8 *needle, size_t nlen, uintptr_t align) {
172 	if (!nlen) {
173 		return nullptr;
174 	}
175 
176 	const u8 *last_possible = haystack + hlen - nlen;
177 	const u8 *first_possible = haystack + off;
178 	int first = *needle;
179 
180 	const u8 *result = nullptr;
181 	std::mutex resultLock;
182 
183 	int range = (int)(last_possible - first_possible);
184 	ParallelRangeLoop(&g_threadManager, [&](int l, int h) {
185 		const u8 *p = haystack + off + l;
186 		const u8 *pend = haystack + off + h;
187 
188 		const uintptr_t align_mask = align - 1;
189 		auto poffset = [&]() {
190 			return ((uintptr_t)(p - haystack) & align_mask);
191 		};
192 		auto alignp = [&]() {
193 			uintptr_t offset = poffset();
194 			if (offset != 0)
195 				p += align - offset;
196 		};
197 
198 		alignp();
199 		while (p <= pend) {
200 			p = (const u8 *)memchr(p, first, pend - p + 1);
201 			if (!p) {
202 				return;
203 			}
204 			if (poffset() == 0 && !memcmp(p, needle, nlen)) {
205 				std::lock_guard<std::mutex> guard(resultLock);
206 				// Take the lowest result so we get the same file for any # of threads.
207 				if (!result || p < result)
208 					result = p;
209 				return;
210 			}
211 
212 			p++;
213 			alignp();
214 		}
215 	}, 0, range, 128 * 1024);
216 
217 	return result;
218 }
219 
EmitCommandWithRAM(CommandType t,const void * p,u32 sz,u32 align)220 static Command EmitCommandWithRAM(CommandType t, const void *p, u32 sz, u32 align) {
221 	FlushRegisters();
222 
223 	Command cmd{t, sz, 0};
224 
225 	if (sz) {
226 		// If at all possible, try to find it already in the buffer.
227 		const u8 *prev = nullptr;
228 		const size_t NEAR_WINDOW = std::max((int)sz * 2, 1024 * 10);
229 		// Let's try nearby first... it will often be nearby.
230 		if (pushbuf.size() > NEAR_WINDOW) {
231 			prev = mymemmem(pushbuf.data(), pushbuf.size() - NEAR_WINDOW, pushbuf.size(), (const u8 *)p, sz, align);
232 		}
233 		if (!prev) {
234 			prev = mymemmem(pushbuf.data(), 0, pushbuf.size(), (const u8 *)p, sz, align);
235 		}
236 
237 		if (prev) {
238 			cmd.ptr = (u32)(prev - pushbuf.data());
239 		} else {
240 			cmd.ptr = (u32)pushbuf.size();
241 			int pad = 0;
242 			if (cmd.ptr & (align - 1)) {
243 				pad = align - (cmd.ptr & (align - 1));
244 				cmd.ptr += pad;
245 			}
246 			pushbuf.resize(pushbuf.size() + sz + pad);
247 			if (pad) {
248 				memset(pushbuf.data() + cmd.ptr - pad, 0, pad);
249 			}
250 			memcpy(pushbuf.data() + cmd.ptr, p, sz);
251 		}
252 	}
253 
254 	commands.push_back(cmd);
255 
256 	return cmd;
257 }
258 
EmitTextureData(int level,u32 texaddr)259 static void EmitTextureData(int level, u32 texaddr) {
260 	GETextureFormat format = gstate.getTextureFormat();
261 	int w = gstate.getTextureWidth(level);
262 	int h = gstate.getTextureHeight(level);
263 	int bufw = GetTextureBufw(level, texaddr, format);
264 	int extraw = w > bufw ? w - bufw : 0;
265 	u32 sizeInRAM = (textureBitsPerPixel[format] * (bufw * h + extraw)) / 8;
266 	const bool isTarget = lastRenderTargets.find(texaddr) != lastRenderTargets.end();
267 
268 	CommandType type = CommandType((int)CommandType::TEXTURE0 + level);
269 	const u8 *p = Memory::GetPointerUnchecked(texaddr);
270 	u32 bytes = Memory::ValidSize(texaddr, sizeInRAM);
271 	std::vector<u8> framebufData;
272 
273 	if (Memory::IsVRAMAddress(texaddr)) {
274 		struct FramebufData {
275 			u32 addr;
276 			int bufw;
277 			u32 flags;
278 			u32 pad;
279 		};
280 
281 		// The isTarget flag is mostly used for replay of dumps on a PSP.
282 		u32 flags = isTarget ? 1 : 0;
283 		FramebufData framebuf{ texaddr, bufw, flags };
284 		framebufData.resize(sizeof(framebuf) + bytes);
285 		memcpy(&framebufData[0], &framebuf, sizeof(framebuf));
286 		memcpy(&framebufData[sizeof(framebuf)], p, bytes);
287 		p = &framebufData[0];
288 
289 		// Okay, now we'll just emit this instead.
290 		type = CommandType((int)CommandType::FRAMEBUF0 + level);
291 		bytes += (u32)sizeof(framebuf);
292 	}
293 
294 	if (bytes > 0) {
295 		FlushRegisters();
296 
297 		// Dumps are huge - let's try to find this already emitted.
298 		for (u32 prevptr : lastTextures) {
299 			if (pushbuf.size() < prevptr + bytes) {
300 				continue;
301 			}
302 
303 			if (memcmp(pushbuf.data() + prevptr, p, bytes) == 0) {
304 				commands.push_back({type, bytes, prevptr});
305 				// Okay, that was easy.  Bail out.
306 				return;
307 			}
308 		}
309 
310 		// Not there, gotta emit anew.
311 		Command cmd = EmitCommandWithRAM(type, p, bytes, 16);
312 		lastTextures.push_back(cmd.ptr);
313 	}
314 }
315 
FlushPrimState(int vcount)316 static void FlushPrimState(int vcount) {
317 	// TODO: Eventually, how do we handle texturing from framebuf/zbuf?
318 	// TODO: Do we need to preload color/depth/stencil (in case from last frame)?
319 
320 	lastRenderTargets.insert(PSP_GetVidMemBase() | gstate.getFrameBufRawAddress());
321 	lastRenderTargets.insert(PSP_GetVidMemBase() | gstate.getDepthBufRawAddress());
322 
323 	// We re-flush textures always in case the game changed them... kinda expensive.
324 	// TODO: Dirty textures on transfer/stall/etc. somehow?
325 	// TODO: Or maybe de-dup by validating if it has changed?
326 	for (int level = 0; level < 8; ++level) {
327 		u32 texaddr = gstate.getTextureAddress(level);
328 		if (texaddr) {
329 			EmitTextureData(level, texaddr);
330 		}
331 	}
332 
333 	const void *verts = Memory::GetPointer(gstate_c.vertexAddr);
334 	const void *indices = nullptr;
335 	if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
336 		indices = Memory::GetPointer(gstate_c.indexAddr);
337 	}
338 
339 	u32 ibytes = 0;
340 	u32 vbytes = 0;
341 	GetVertDataSizes(vcount, indices, vbytes, ibytes);
342 
343 	if (indices && ibytes > 0) {
344 		EmitCommandWithRAM(CommandType::INDICES, indices, ibytes, 4);
345 	}
346 	if (verts && vbytes > 0) {
347 		EmitCommandWithRAM(CommandType::VERTICES, verts, vbytes, 4);
348 	}
349 }
350 
EmitTransfer(u32 op)351 static void EmitTransfer(u32 op) {
352 	FlushRegisters();
353 
354 	// This may not make a lot of sense right now, unless it's to a framebuf...
355 	if (!Memory::IsVRAMAddress(gstate.getTransferDstAddress())) {
356 		// Skip, not VRAM, so can't affect drawing (we flush textures each prim.)
357 		return;
358 	}
359 
360 	u32 srcBasePtr = gstate.getTransferSrcAddress();
361 	u32 srcStride = gstate.getTransferSrcStride();
362 	int srcX = gstate.getTransferSrcX();
363 	int srcY = gstate.getTransferSrcY();
364 	int width = gstate.getTransferWidth();
365 	int height = gstate.getTransferHeight();
366 	int bpp = gstate.getTransferBpp();
367 
368 	u32 srcBytes = ((srcY + height - 1) * srcStride + (srcX + width)) * bpp;
369 	srcBytes = Memory::ValidSize(srcBasePtr, srcBytes);
370 
371 	if (srcBytes != 0) {
372 		EmitCommandWithRAM(CommandType::TRANSFERSRC, Memory::GetPointerUnchecked(srcBasePtr), srcBytes, 16);
373 	}
374 
375 	lastRegisters.push_back(op);
376 }
377 
EmitClut(u32 op)378 static void EmitClut(u32 op) {
379 	u32 addr = gstate.getClutAddress();
380 	u32 bytes = (op & 0x3F) * 32;
381 	bytes = Memory::ValidSize(addr, bytes);
382 
383 	if (bytes != 0) {
384 		EmitCommandWithRAM(CommandType::CLUT, Memory::GetPointerUnchecked(addr), bytes, 16);
385 	}
386 
387 	lastRegisters.push_back(op);
388 }
389 
EmitPrim(u32 op)390 static void EmitPrim(u32 op) {
391 	FlushPrimState(op & 0x0000FFFF);
392 
393 	lastRegisters.push_back(op);
394 }
395 
EmitBezierSpline(u32 op)396 static void EmitBezierSpline(u32 op) {
397 	int ucount = op & 0xFF;
398 	int vcount = (op >> 8) & 0xFF;
399 	FlushPrimState(ucount * vcount);
400 
401 	lastRegisters.push_back(op);
402 }
403 
IsActive()404 bool IsActive() {
405 	return active;
406 }
407 
IsActivePending()408 bool IsActivePending() {
409 	return nextFrame || active;
410 }
411 
Activate()412 bool Activate() {
413 	if (!nextFrame) {
414 		nextFrame = true;
415 		flipLastAction = gpuStats.numFlips;
416 		return true;
417 	}
418 	return false;
419 }
420 
SetCallback(const std::function<void (const Path &)> callback)421 void SetCallback(const std::function<void(const Path &)> callback) {
422 	writeCallback = callback;
423 }
424 
FinishRecording()425 static void FinishRecording() {
426 	// We're done - this was just to write the result out.
427 	Path filename = WriteRecording();
428 	commands.clear();
429 	pushbuf.clear();
430 
431 	NOTICE_LOG(SYSTEM, "Recording finished");
432 	active = false;
433 	flipLastAction = gpuStats.numFlips;
434 
435 	if (writeCallback)
436 		writeCallback(filename);
437 	writeCallback = nullptr;
438 }
439 
NotifyCommand(u32 pc)440 void NotifyCommand(u32 pc) {
441 	if (!active) {
442 		return;
443 	}
444 
445 	const u32 op = Memory::Read_U32(pc);
446 	const GECommand cmd = GECommand(op >> 24);
447 
448 	switch (cmd) {
449 	case GE_CMD_VADDR:
450 	case GE_CMD_IADDR:
451 	case GE_CMD_JUMP:
452 	case GE_CMD_CALL:
453 	case GE_CMD_RET:
454 	case GE_CMD_END:
455 	case GE_CMD_SIGNAL:
456 	case GE_CMD_FINISH:
457 	case GE_CMD_BASE:
458 	case GE_CMD_OFFSETADDR:
459 	case GE_CMD_ORIGIN:
460 		// These just prepare future commands, and are flushed with those commands.
461 		// TODO: Maybe add a command just to log that these were hit?
462 		break;
463 
464 	case GE_CMD_BOUNDINGBOX:
465 	case GE_CMD_BJUMP:
466 		// Since we record each command, this is theoretically not relevant.
467 		// TODO: Output a CommandType to validate this.
468 		break;
469 
470 	case GE_CMD_PRIM:
471 		EmitPrim(op);
472 		break;
473 
474 	case GE_CMD_BEZIER:
475 	case GE_CMD_SPLINE:
476 		EmitBezierSpline(op);
477 		break;
478 
479 	case GE_CMD_LOADCLUT:
480 		EmitClut(op);
481 		break;
482 
483 	case GE_CMD_TRANSFERSTART:
484 		EmitTransfer(op);
485 		break;
486 
487 	default:
488 		lastRegisters.push_back(op);
489 		break;
490 	}
491 }
492 
NotifyMemcpy(u32 dest,u32 src,u32 sz)493 void NotifyMemcpy(u32 dest, u32 src, u32 sz) {
494 	if (!active) {
495 		return;
496 	}
497 	if (Memory::IsVRAMAddress(dest)) {
498 		FlushRegisters();
499 		Command cmd{CommandType::MEMCPYDEST, sizeof(dest), (u32)pushbuf.size()};
500 		pushbuf.resize(pushbuf.size() + sizeof(dest));
501 		memcpy(pushbuf.data() + cmd.ptr, &dest, sizeof(dest));
502 
503 		sz = Memory::ValidSize(dest, sz);
504 		if (sz != 0) {
505 			EmitCommandWithRAM(CommandType::MEMCPYDATA, Memory::GetPointer(dest), sz, 1);
506 		}
507 	}
508 }
509 
NotifyMemset(u32 dest,int v,u32 sz)510 void NotifyMemset(u32 dest, int v, u32 sz) {
511 	if (!active) {
512 		return;
513 	}
514 	struct MemsetCommand {
515 		u32 dest;
516 		int value;
517 		u32 sz;
518 	};
519 
520 	if (Memory::IsVRAMAddress(dest)) {
521 		sz = Memory::ValidSize(dest, sz);
522 		MemsetCommand data{dest, v, sz};
523 
524 		FlushRegisters();
525 		Command cmd{CommandType::MEMSET, sizeof(data), (u32)pushbuf.size()};
526 		pushbuf.resize(pushbuf.size() + sizeof(data));
527 		memcpy(pushbuf.data() + cmd.ptr, &data, sizeof(data));
528 	}
529 }
530 
NotifyUpload(u32 dest,u32 sz)531 void NotifyUpload(u32 dest, u32 sz) {
532 	if (!active) {
533 		return;
534 	}
535 	NotifyMemcpy(dest, dest, sz);
536 }
537 
NotifyDisplay(u32 framebuf,int stride,int fmt)538 void NotifyDisplay(u32 framebuf, int stride, int fmt) {
539 	bool writePending = false;
540 	if (active && !commands.empty()) {
541 		writePending = true;
542 	}
543 	if (nextFrame && (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0) {
544 		NOTICE_LOG(SYSTEM, "Recording starting on display...");
545 		BeginRecording();
546 	}
547 	if (!active) {
548 		return;
549 	}
550 
551 	struct DisplayBufData {
552 		PSPPointer<u8> topaddr;
553 		int linesize, pixelFormat;
554 	};
555 
556 	DisplayBufData disp{ { framebuf }, stride, fmt };
557 
558 	FlushRegisters();
559 	u32 ptr = (u32)pushbuf.size();
560 	u32 sz = (u32)sizeof(disp);
561 	pushbuf.resize(pushbuf.size() + sz);
562 	memcpy(pushbuf.data() + ptr, &disp, sz);
563 
564 	commands.push_back({ CommandType::DISPLAY, sz, ptr });
565 
566 	if (writePending) {
567 		NOTICE_LOG(SYSTEM, "Recording complete on display");
568 		FinishRecording();
569 	}
570 }
571 
NotifyFrame()572 void NotifyFrame() {
573 	const bool noDisplayAction = flipLastAction + 4 < gpuStats.numFlips;
574 	// We do this only to catch things that don't call NotifyDisplay.
575 	if (active && !commands.empty() && noDisplayAction) {
576 		NOTICE_LOG(SYSTEM, "Recording complete on frame");
577 
578 		struct DisplayBufData {
579 			PSPPointer<u8> topaddr;
580 			u32 linesize, pixelFormat;
581 		};
582 
583 		DisplayBufData disp;
584 		__DisplayGetFramebuf(&disp.topaddr, &disp.linesize, &disp.pixelFormat, 0);
585 
586 		FlushRegisters();
587 		u32 ptr = (u32)pushbuf.size();
588 		u32 sz = (u32)sizeof(disp);
589 		pushbuf.resize(pushbuf.size() + sz);
590 		memcpy(pushbuf.data() + ptr, &disp, sz);
591 
592 		commands.push_back({ CommandType::DISPLAY, sz, ptr });
593 
594 		FinishRecording();
595 	}
596 	if (nextFrame && (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) == 0 && noDisplayAction) {
597 		NOTICE_LOG(SYSTEM, "Recording starting on frame...");
598 		BeginRecording();
599 	}
600 }
601 
602 };
603