1 // Copyright 2008 Dolphin Emulator Project
2 // Licensed under GPLv2+
3 // Refer to the license.txt file included.
4
5 #include "VideoCommon/Fifo.h"
6
7 #include <atomic>
8 #include <cstring>
9
10 #include "Common/Assert.h"
11 #include "Common/Atomic.h"
12 #include "Common/BlockingLoop.h"
13 #include "Common/ChunkFile.h"
14 #include "Common/Event.h"
15 #include "Common/FPURoundMode.h"
16 #include "Common/MemoryUtil.h"
17 #include "Common/MsgHandler.h"
18
19 #include "Core/ConfigManager.h"
20 #include "Core/CoreTiming.h"
21 #include "Core/HW/Memmap.h"
22 #include "Core/Host.h"
23
24 #include "VideoCommon/AsyncRequests.h"
25 #include "VideoCommon/CPMemory.h"
26 #include "VideoCommon/CommandProcessor.h"
27 #include "VideoCommon/DataReader.h"
28 #include "VideoCommon/OpcodeDecoding.h"
29 #include "VideoCommon/VertexLoaderManager.h"
30 #include "VideoCommon/VertexManagerBase.h"
31 #include "VideoCommon/VideoBackendBase.h"
32
33 namespace Fifo
34 {
35 static constexpr u32 FIFO_SIZE = 2 * 1024 * 1024;
36 static constexpr int GPU_TIME_SLOT_SIZE = 1000;
37
38 static Common::BlockingLoop s_gpu_mainloop;
39
40 static Common::Flag s_emu_running_state;
41
42 // Most of this array is unlikely to be faulted in...
43 static u8 s_fifo_aux_data[FIFO_SIZE];
44 static u8* s_fifo_aux_write_ptr;
45 static u8* s_fifo_aux_read_ptr;
46
47 // This could be in SConfig, but it depends on multiple settings
48 // and can change at runtime.
49 static bool s_use_deterministic_gpu_thread;
50
51 static CoreTiming::EventType* s_event_sync_gpu;
52
53 // STATE_TO_SAVE
54 static u8* s_video_buffer;
55 static u8* s_video_buffer_read_ptr;
56 static std::atomic<u8*> s_video_buffer_write_ptr;
57 static std::atomic<u8*> s_video_buffer_seen_ptr;
58 static u8* s_video_buffer_pp_read_ptr;
59 // The read_ptr is always owned by the GPU thread. In normal mode, so is the
60 // write_ptr, despite it being atomic. In deterministic GPU thread mode,
61 // things get a bit more complicated:
62 // - The seen_ptr is written by the GPU thread, and points to what it's already
63 // processed as much of as possible - in the case of a partial command which
64 // caused it to stop, not the same as the read ptr. It's written by the GPU,
65 // under the lock, and updating the cond.
66 // - The write_ptr is written by the CPU thread after it copies data from the
67 // FIFO. Maybe someday it will be under the lock. For now, because RunGpuLoop
68 // polls, it's just atomic.
69 // - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
70
71 static std::atomic<int> s_sync_ticks;
72 static bool s_syncing_suspended;
73 static Common::Event s_sync_wakeup_event;
74
DoState(PointerWrap & p)75 void DoState(PointerWrap& p)
76 {
77 p.DoArray(s_video_buffer, FIFO_SIZE);
78 u8* write_ptr = s_video_buffer_write_ptr;
79 p.DoPointer(write_ptr, s_video_buffer);
80 s_video_buffer_write_ptr = write_ptr;
81 p.DoPointer(s_video_buffer_read_ptr, s_video_buffer);
82 if (p.mode == PointerWrap::MODE_READ && s_use_deterministic_gpu_thread)
83 {
84 // We're good and paused, right?
85 s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
86 }
87
88 p.Do(s_sync_ticks);
89 p.Do(s_syncing_suspended);
90 }
91
PauseAndLock(bool doLock,bool unpauseOnUnlock)92 void PauseAndLock(bool doLock, bool unpauseOnUnlock)
93 {
94 if (doLock)
95 {
96 SyncGPU(SyncGPUReason::Other);
97 EmulatorState(false);
98
99 const SConfig& param = SConfig::GetInstance();
100
101 if (!param.bCPUThread || s_use_deterministic_gpu_thread)
102 return;
103
104 s_gpu_mainloop.WaitYield(std::chrono::milliseconds(100), Host_YieldToUI);
105 }
106 else
107 {
108 if (unpauseOnUnlock)
109 EmulatorState(true);
110 }
111 }
112
Init()113 void Init()
114 {
115 // Padded so that SIMD overreads in the vertex loader are safe
116 s_video_buffer = static_cast<u8*>(Common::AllocateMemoryPages(FIFO_SIZE + 4));
117 ResetVideoBuffer();
118 if (SConfig::GetInstance().bCPUThread)
119 s_gpu_mainloop.Prepare();
120 s_sync_ticks.store(0);
121 }
122
Shutdown()123 void Shutdown()
124 {
125 if (s_gpu_mainloop.IsRunning())
126 PanicAlert("Fifo shutting down while active");
127
128 Common::FreeMemoryPages(s_video_buffer, FIFO_SIZE + 4);
129 s_video_buffer = nullptr;
130 s_video_buffer_write_ptr = nullptr;
131 s_video_buffer_pp_read_ptr = nullptr;
132 s_video_buffer_read_ptr = nullptr;
133 s_video_buffer_seen_ptr = nullptr;
134 s_fifo_aux_write_ptr = nullptr;
135 s_fifo_aux_read_ptr = nullptr;
136 }
137
138 // May be executed from any thread, even the graphics thread.
139 // Created to allow for self shutdown.
ExitGpuLoop()140 void ExitGpuLoop()
141 {
142 // This should break the wait loop in CPU thread
143 CommandProcessor::fifo.bFF_GPReadEnable = false;
144 FlushGpu();
145
146 // Terminate GPU thread loop
147 s_emu_running_state.Set();
148 s_gpu_mainloop.Stop(s_gpu_mainloop.kNonBlock);
149 }
150
EmulatorState(bool running)151 void EmulatorState(bool running)
152 {
153 s_emu_running_state.Set(running);
154 if (running)
155 s_gpu_mainloop.Wakeup();
156 else
157 s_gpu_mainloop.AllowSleep();
158 }
159
SyncGPU(SyncGPUReason reason,bool may_move_read_ptr)160 void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
161 {
162 if (s_use_deterministic_gpu_thread)
163 {
164 s_gpu_mainloop.Wait();
165 if (!s_gpu_mainloop.IsRunning())
166 return;
167
168 // Opportunistically reset FIFOs so we don't wrap around.
169 if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr)
170 PanicAlert("aux fifo not synced (%p, %p)", s_fifo_aux_write_ptr, s_fifo_aux_read_ptr);
171
172 memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr);
173 s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data);
174 s_fifo_aux_read_ptr = s_fifo_aux_data;
175
176 if (may_move_read_ptr)
177 {
178 u8* write_ptr = s_video_buffer_write_ptr;
179
180 // what's left over in the buffer
181 size_t size = write_ptr - s_video_buffer_pp_read_ptr;
182
183 memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size);
184 // This change always decreases the pointers. We write seen_ptr
185 // after write_ptr here, and read it before in RunGpuLoop, so
186 // 'write_ptr > seen_ptr' there cannot become spuriously true.
187 s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
188 s_video_buffer_pp_read_ptr = s_video_buffer;
189 s_video_buffer_read_ptr = s_video_buffer;
190 s_video_buffer_seen_ptr = write_ptr;
191 }
192 }
193 }
194
PushFifoAuxBuffer(const void * ptr,size_t size)195 void PushFifoAuxBuffer(const void* ptr, size_t size)
196 {
197 if (size > (size_t)(s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
198 {
199 SyncGPU(SyncGPUReason::AuxSpace, /* may_move_read_ptr */ false);
200 if (!s_gpu_mainloop.IsRunning())
201 {
202 // GPU is shutting down
203 return;
204 }
205 if (size > (size_t)(s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
206 {
207 // That will sync us up to the last 32 bytes, so this short region
208 // of FIFO would have to point to a 2MB display list or something.
209 PanicAlert("absurdly large aux buffer");
210 return;
211 }
212 }
213 memcpy(s_fifo_aux_write_ptr, ptr, size);
214 s_fifo_aux_write_ptr += size;
215 }
216
PopFifoAuxBuffer(size_t size)217 void* PopFifoAuxBuffer(size_t size)
218 {
219 void* ret = s_fifo_aux_read_ptr;
220 s_fifo_aux_read_ptr += size;
221 return ret;
222 }
223
224 // Description: RunGpuLoop() sends data through this function.
ReadDataFromFifo(u32 readPtr)225 static void ReadDataFromFifo(u32 readPtr)
226 {
227 size_t len = 32;
228 if (len > (size_t)(s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
229 {
230 size_t existing_len = s_video_buffer_write_ptr - s_video_buffer_read_ptr;
231 if (len > (size_t)(FIFO_SIZE - existing_len))
232 {
233 PanicAlert("FIFO out of bounds (existing %zu + new %zu > %u)", existing_len, len, FIFO_SIZE);
234 return;
235 }
236 memmove(s_video_buffer, s_video_buffer_read_ptr, existing_len);
237 s_video_buffer_write_ptr = s_video_buffer + existing_len;
238 s_video_buffer_read_ptr = s_video_buffer;
239 }
240 // Copy new video instructions to s_video_buffer for future use in rendering the new picture
241 Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
242 s_video_buffer_write_ptr += len;
243 }
244
245 // The deterministic_gpu_thread version.
ReadDataFromFifoOnCPU(u32 readPtr)246 static void ReadDataFromFifoOnCPU(u32 readPtr)
247 {
248 size_t len = 32;
249 u8* write_ptr = s_video_buffer_write_ptr;
250 if (len > (size_t)(s_video_buffer + FIFO_SIZE - write_ptr))
251 {
252 // We can't wrap around while the GPU is working on the data.
253 // This should be very rare due to the reset in SyncGPU.
254 SyncGPU(SyncGPUReason::Wraparound);
255 if (!s_gpu_mainloop.IsRunning())
256 {
257 // GPU is shutting down, so the next asserts may fail
258 return;
259 }
260
261 if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr)
262 {
263 PanicAlert("desynced read pointers");
264 return;
265 }
266 write_ptr = s_video_buffer_write_ptr;
267 size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr;
268 if (len > (size_t)(FIFO_SIZE - existing_len))
269 {
270 PanicAlert("FIFO out of bounds (existing %zu + new %zu > %u)", existing_len, len, FIFO_SIZE);
271 return;
272 }
273 }
274 Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
275 s_video_buffer_pp_read_ptr = OpcodeDecoder::Run<true>(
276 DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
277 // This would have to be locked if the GPU thread didn't spin.
278 s_video_buffer_write_ptr = write_ptr + len;
279 }
280
ResetVideoBuffer()281 void ResetVideoBuffer()
282 {
283 s_video_buffer_read_ptr = s_video_buffer;
284 s_video_buffer_write_ptr = s_video_buffer;
285 s_video_buffer_seen_ptr = s_video_buffer;
286 s_video_buffer_pp_read_ptr = s_video_buffer;
287 s_fifo_aux_write_ptr = s_fifo_aux_data;
288 s_fifo_aux_read_ptr = s_fifo_aux_data;
289 }
290
291 // Description: Main FIFO update loop
292 // Purpose: Keep the Core HW updated about the CPU-GPU distance
RunGpuLoop()293 void RunGpuLoop()
294 {
295 AsyncRequests::GetInstance()->SetEnable(true);
296 AsyncRequests::GetInstance()->SetPassthrough(false);
297
298 s_gpu_mainloop.Run(
299 [] {
300 const SConfig& param = SConfig::GetInstance();
301
302 // Run events from the CPU thread.
303 AsyncRequests::GetInstance()->PullEvents();
304
305 // Do nothing while paused
306 if (!s_emu_running_state.IsSet())
307 return;
308
309 if (s_use_deterministic_gpu_thread)
310 {
311 // All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
312 u8* seen_ptr = s_video_buffer_seen_ptr;
313 u8* write_ptr = s_video_buffer_write_ptr;
314 // See comment in SyncGPU
315 if (write_ptr > seen_ptr)
316 {
317 s_video_buffer_read_ptr =
318 OpcodeDecoder::Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
319 s_video_buffer_seen_ptr = write_ptr;
320 }
321 }
322 else
323 {
324 CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
325 CommandProcessor::SetCPStatusFromGPU();
326
327 // check if we are able to run this buffer
328 while (!CommandProcessor::IsInterruptWaiting() && fifo.bFF_GPReadEnable &&
329 fifo.CPReadWriteDistance && !AtBreakpoint())
330 {
331 if (param.bSyncGPU && s_sync_ticks.load() < param.iSyncGpuMinDistance)
332 break;
333
334 u32 cyclesExecuted = 0;
335 u32 readPtr = fifo.CPReadPointer;
336 ReadDataFromFifo(readPtr);
337
338 if (readPtr == fifo.CPEnd)
339 readPtr = fifo.CPBase;
340 else
341 readPtr += 32;
342
343 ASSERT_MSG(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0,
344 "Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce "
345 "instability in the game. Please report it.",
346 fifo.CPReadWriteDistance - 32);
347
348 u8* write_ptr = s_video_buffer_write_ptr;
349 s_video_buffer_read_ptr = OpcodeDecoder::Run(
350 DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
351
352 Common::AtomicStore(fifo.CPReadPointer, readPtr);
353 Common::AtomicAdd(fifo.CPReadWriteDistance, static_cast<u32>(-32));
354 if ((write_ptr - s_video_buffer_read_ptr) == 0)
355 Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
356
357 CommandProcessor::SetCPStatusFromGPU();
358
359 if (param.bSyncGPU)
360 {
361 cyclesExecuted = (int)(cyclesExecuted / param.fSyncGpuOverclock);
362 int old = s_sync_ticks.fetch_sub(cyclesExecuted);
363 if (old >= param.iSyncGpuMaxDistance &&
364 old - (int)cyclesExecuted < param.iSyncGpuMaxDistance)
365 s_sync_wakeup_event.Set();
366 }
367
368 // This call is pretty important in DualCore mode and must be called in the FIFO Loop.
369 // If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
370 // leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing
371 // things down.
372 AsyncRequests::GetInstance()->PullEvents();
373 }
374
375 // fast skip remaining GPU time if fifo is empty
376 if (s_sync_ticks.load() > 0)
377 {
378 int old = s_sync_ticks.exchange(0);
379 if (old >= param.iSyncGpuMaxDistance)
380 s_sync_wakeup_event.Set();
381 }
382
383 // The fifo is empty and it's unlikely we will get any more work in the near future.
384 // Make sure VertexManager finishes drawing any primitives it has stored in it's buffer.
385 g_vertex_manager->Flush();
386 }
387 },
388 100);
389
390 AsyncRequests::GetInstance()->SetEnable(false);
391 AsyncRequests::GetInstance()->SetPassthrough(true);
392 }
393
FlushGpu()394 void FlushGpu()
395 {
396 const SConfig& param = SConfig::GetInstance();
397
398 if (!param.bCPUThread || s_use_deterministic_gpu_thread)
399 return;
400
401 s_gpu_mainloop.Wait();
402 }
403
GpuMaySleep()404 void GpuMaySleep()
405 {
406 s_gpu_mainloop.AllowSleep();
407 }
408
AtBreakpoint()409 bool AtBreakpoint()
410 {
411 CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
412 return fifo.bFF_BPEnable && (fifo.CPReadPointer == fifo.CPBreakpoint);
413 }
414
RunGpu()415 void RunGpu()
416 {
417 const SConfig& param = SConfig::GetInstance();
418
419 // wake up GPU thread
420 if (param.bCPUThread && !s_use_deterministic_gpu_thread)
421 {
422 s_gpu_mainloop.Wakeup();
423 }
424
425 // if the sync GPU callback is suspended, wake it up.
426 if (!SConfig::GetInstance().bCPUThread || s_use_deterministic_gpu_thread ||
427 SConfig::GetInstance().bSyncGPU)
428 {
429 if (s_syncing_suspended)
430 {
431 s_syncing_suspended = false;
432 CoreTiming::ScheduleEvent(GPU_TIME_SLOT_SIZE, s_event_sync_gpu, GPU_TIME_SLOT_SIZE);
433 }
434 }
435 }
436
RunGpuOnCpu(int ticks)437 static int RunGpuOnCpu(int ticks)
438 {
439 CommandProcessor::SCPFifoStruct& fifo = CommandProcessor::fifo;
440 bool reset_simd_state = false;
441 int available_ticks = int(ticks * SConfig::GetInstance().fSyncGpuOverclock) + s_sync_ticks.load();
442 while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() &&
443 available_ticks >= 0)
444 {
445 if (s_use_deterministic_gpu_thread)
446 {
447 ReadDataFromFifoOnCPU(fifo.CPReadPointer);
448 s_gpu_mainloop.Wakeup();
449 }
450 else
451 {
452 if (!reset_simd_state)
453 {
454 FPURoundMode::SaveSIMDState();
455 FPURoundMode::LoadDefaultSIMDState();
456 reset_simd_state = true;
457 }
458 ReadDataFromFifo(fifo.CPReadPointer);
459 u32 cycles = 0;
460 s_video_buffer_read_ptr = OpcodeDecoder::Run(
461 DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), &cycles, false);
462 available_ticks -= cycles;
463 }
464
465 if (fifo.CPReadPointer == fifo.CPEnd)
466 fifo.CPReadPointer = fifo.CPBase;
467 else
468 fifo.CPReadPointer += 32;
469
470 fifo.CPReadWriteDistance -= 32;
471 }
472
473 CommandProcessor::SetCPStatusFromGPU();
474
475 if (reset_simd_state)
476 {
477 FPURoundMode::LoadSIMDState();
478 }
479
480 // Discard all available ticks as there is nothing to do any more.
481 s_sync_ticks.store(std::min(available_ticks, 0));
482
483 // If the GPU is idle, drop the handler.
484 if (available_ticks >= 0)
485 return -1;
486
487 // Always wait at least for GPU_TIME_SLOT_SIZE cycles.
488 return -available_ticks + GPU_TIME_SLOT_SIZE;
489 }
490
UpdateWantDeterminism(bool want)491 void UpdateWantDeterminism(bool want)
492 {
493 // We are paused (or not running at all yet), so
494 // it should be safe to change this.
495 const SConfig& param = SConfig::GetInstance();
496 bool gpu_thread = false;
497 switch (param.m_GPUDeterminismMode)
498 {
499 case GPUDeterminismMode::Auto:
500 gpu_thread = want;
501 break;
502 case GPUDeterminismMode::Disabled:
503 gpu_thread = false;
504 break;
505 case GPUDeterminismMode::FakeCompletion:
506 gpu_thread = true;
507 break;
508 }
509
510 gpu_thread = gpu_thread && param.bCPUThread;
511
512 if (s_use_deterministic_gpu_thread != gpu_thread)
513 {
514 s_use_deterministic_gpu_thread = gpu_thread;
515 if (gpu_thread)
516 {
517 // These haven't been updated in non-deterministic mode.
518 s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
519 CopyPreprocessCPStateFromMain();
520 VertexLoaderManager::MarkAllDirty();
521 }
522 }
523 }
524
UseDeterministicGPUThread()525 bool UseDeterministicGPUThread()
526 {
527 return s_use_deterministic_gpu_thread;
528 }
529
530 /* This function checks the emulated CPU - GPU distance and may wake up the GPU,
531 * or block the CPU if required. It should be called by the CPU thread regularly.
532 * @ticks The gone emulated CPU time.
533 * @return A good time to call WaitForGpuThread() next.
534 */
WaitForGpuThread(int ticks)535 static int WaitForGpuThread(int ticks)
536 {
537 const SConfig& param = SConfig::GetInstance();
538
539 int old = s_sync_ticks.fetch_add(ticks);
540 int now = old + ticks;
541
542 // GPU is idle, so stop polling.
543 if (old >= 0 && s_gpu_mainloop.IsDone())
544 return -1;
545
546 // Wakeup GPU
547 if (old < param.iSyncGpuMinDistance && now >= param.iSyncGpuMinDistance)
548 RunGpu();
549
550 // If the GPU is still sleeping, wait for a longer time
551 if (now < param.iSyncGpuMinDistance)
552 return GPU_TIME_SLOT_SIZE + param.iSyncGpuMinDistance - now;
553
554 // Wait for GPU
555 if (now >= param.iSyncGpuMaxDistance)
556 s_sync_wakeup_event.Wait();
557
558 return GPU_TIME_SLOT_SIZE;
559 }
560
SyncGPUCallback(u64 ticks,s64 cyclesLate)561 static void SyncGPUCallback(u64 ticks, s64 cyclesLate)
562 {
563 ticks += cyclesLate;
564 int next = -1;
565
566 if (!SConfig::GetInstance().bCPUThread || s_use_deterministic_gpu_thread)
567 {
568 next = RunGpuOnCpu((int)ticks);
569 }
570 else if (SConfig::GetInstance().bSyncGPU)
571 {
572 next = WaitForGpuThread((int)ticks);
573 }
574
575 s_syncing_suspended = next < 0;
576 if (!s_syncing_suspended)
577 CoreTiming::ScheduleEvent(next, s_event_sync_gpu, next);
578 }
579
580 // Initialize GPU - CPU thread syncing, this gives us a deterministic way to start the GPU thread.
Prepare()581 void Prepare()
582 {
583 s_event_sync_gpu = CoreTiming::RegisterEvent("SyncGPUCallback", SyncGPUCallback);
584 s_syncing_suspended = true;
585 }
586 } // namespace Fifo
587