1 /* V810 Emulator
2  *
3  * Copyright (C) 2006 David Tucker
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19 
20 /* Alternatively, the V810 emulator code(and all V810 emulation header files) can be used/distributed under the following license(you can adopt either
21    license exclusively for your changes by removing one of these license headers, but it's STRONGLY preferable
22    to keep your changes dual-licensed as well):
23 
24 This Reality Boy emulator is copyright (C) David Tucker 1997-2008, all rights
25 reserved.   You may use this code as long as you make no money from the use of
26 this code and you acknowledge the original author (Me).  I reserve the right to
27 dictate who can use this code and how (Just so you don't do something stupid
28 with it).
29    Most Importantly, this code is swap ware.  If you use It send along your new
30 program (with code) or some other interesting tidbits you wrote, that I might be
31 interested in.
32    This code is in beta, there are bugs!  I am not responsible for any damage
33 done to your computer, reputation, ego, dog, or family life due to the use of
34 this code.  All source is provided as is, I make no guaranties, and am not
35 responsible for anything you do with the code (legal or otherwise).
36    Virtual Boy is a trademark of Nintendo, and V810 is a trademark of NEC.  I am
37 in no way affiliated with either party and all information contained hear was
38 found freely through public domain sources.
39 */
40 
41 //////////////////////////////////////////////////////////
42 // CPU routines
43 
44 #include "mednafen/mednafen.h"
45 #include <mednafen/masmem.h>
46 
47 //#include "pcfx.h"
48 //#include "debug.h"
49 
50 #include <string.h>
51 #include <errno.h>
52 #include <algorithm>
53 
54 #include "v810_opt.h"
55 #include "v810_cpu.h"
56 #include "v810_cpuD.h"
57 
V810()58 V810::V810()
59 {
60  #ifdef WANT_DEBUGGER
61  CPUHook = NULL;
62  ADDBT = NULL;
63  #endif
64 
65  MemRead8 = NULL;
66  MemRead16 = NULL;
67  MemRead32 = NULL;
68 
69  IORead8 = NULL;
70  IORead16 = NULL;
71  IORead32 = NULL;
72 
73  MemWrite8 = NULL;
74  MemWrite16 = NULL;
75  MemWrite32 = NULL;
76 
77  IOWrite8 = NULL;
78  IOWrite16 = NULL;
79  IOWrite32 = NULL;
80 
81  memset(FastMap, 0, sizeof(FastMap));
82 
83  memset(MemReadBus32, 0, sizeof(MemReadBus32));
84  memset(MemWriteBus32, 0, sizeof(MemWriteBus32));
85 
86  v810_timestamp = 0;
87  next_event_ts = 0x7FFFFFFF;
88 }
89 
~V810()90 V810::~V810()
91 {
92  Kill();
93 }
94 
RecalcIPendingCache(void)95 INLINE void V810::RecalcIPendingCache(void)
96 {
97  IPendingCache = 0;
98 
99  // Of course don't generate an interrupt if there's not one pending!
100  if(ilevel < 0)
101   return;
102 
103  // If CPU is halted because of a fatal exception, don't let an interrupt
104  // take us out of this halted status.
105  if(Halted == HALT_FATAL_EXCEPTION)
106   return;
107 
108  // If the NMI pending, exception pending, and/or interrupt disabled bit
109  // is set, don't accept any interrupts.
110  if(S_REG[PSW] & (PSW_NP | PSW_EP | PSW_ID))
111   return;
112 
113  // If the interrupt level is lower than the interrupt enable level, don't
114  // accept it.
115  if(ilevel < (int)((S_REG[PSW] & PSW_IA) >> 16))
116   return;
117 
118  IPendingCache = 0xFF;
119 }
120 
121 
122 // TODO: "An interrupt that occurs during restore/dump/clear operation is internally held and is accepted after the
123 // operation in progress is finished. The maskable interrupt is held internally only when the EP, NP, and ID flags
124 // of PSW are all 0."
125 //
126 // This behavior probably doesn't have any relevance on the PC-FX, unless we're sadistic
127 // and try to restore cache from an interrupt acknowledge register or dump it to a register
128 // controlling interrupt masks...  I wanna be sadistic~
129 
CacheClear(v810_timestamp_t & timestamp,uint32 start,uint32 count)130 void V810::CacheClear(v810_timestamp_t &timestamp, uint32 start, uint32 count)
131 {
132  //printf("Cache clear: %08x %08x\n", start, count);
133  for(uint32 i = 0; i < count && (i + start) < 128; i++)
134   memset(&Cache[i + start], 0, sizeof(V810_CacheEntry_t));
135 }
136 
CacheOpMemStore(v810_timestamp_t & timestamp,uint32 A,uint32 V)137 INLINE void V810::CacheOpMemStore(v810_timestamp_t &timestamp, uint32 A, uint32 V)
138 {
139  if(MemWriteBus32[A >> 24])
140  {
141   timestamp += 2;
142   MemWrite32(timestamp, A, V);
143  }
144  else
145  {
146   timestamp += 2;
147   MemWrite16(timestamp, A, V & 0xFFFF);
148 
149   timestamp += 2;
150   MemWrite16(timestamp, A | 2, V >> 16);
151  }
152 }
153 
CacheOpMemLoad(v810_timestamp_t & timestamp,uint32 A)154 INLINE uint32 V810::CacheOpMemLoad(v810_timestamp_t &timestamp, uint32 A)
155 {
156  if(MemReadBus32[A >> 24])
157  {
158   timestamp += 2;
159   return(MemRead32(timestamp, A));
160  }
161  else
162  {
163   uint32 ret;
164 
165   timestamp += 2;
166   ret = MemRead16(timestamp, A);
167 
168   timestamp += 2;
169   ret |= MemRead16(timestamp, A | 2) << 16;
170   return(ret);
171  }
172 }
173 
CacheDump(v810_timestamp_t & timestamp,const uint32 SA)174 void V810::CacheDump(v810_timestamp_t &timestamp, const uint32 SA)
175 {
176  printf("Cache dump: %08x\n", SA);
177 
178  for(int i = 0; i < 128; i++)
179  {
180   CacheOpMemStore(timestamp, SA + i * 8 + 0, Cache[i].data[0]);
181   CacheOpMemStore(timestamp, SA + i * 8 + 4, Cache[i].data[1]);
182  }
183 
184  for(int i = 0; i < 128; i++)
185  {
186   uint32 icht = Cache[i].tag | ((int)Cache[i].data_valid[0] << 22) | ((int)Cache[i].data_valid[1] << 23);
187 
188   CacheOpMemStore(timestamp, SA + 1024 + i * 4, icht);
189  }
190 
191 }
192 
CacheRestore(v810_timestamp_t & timestamp,const uint32 SA)193 void V810::CacheRestore(v810_timestamp_t &timestamp, const uint32 SA)
194 {
195  printf("Cache restore: %08x\n", SA);
196 
197  for(int i = 0; i < 128; i++)
198  {
199   Cache[i].data[0] = CacheOpMemLoad(timestamp, SA + i * 8 + 0);
200   Cache[i].data[1] = CacheOpMemLoad(timestamp, SA + i * 8 + 4);
201  }
202 
203  for(int i = 0; i < 128; i++)
204  {
205   uint32 icht;
206 
207   icht = CacheOpMemLoad(timestamp, SA + 1024 + i * 4);
208 
209   Cache[i].tag = icht & ((1 << 22) - 1);
210   Cache[i].data_valid[0] = (icht >> 22) & 1;
211   Cache[i].data_valid[1] = (icht >> 23) & 1;
212  }
213 }
214 
215 
RDCACHE(v810_timestamp_t & timestamp,uint32 addr)216 INLINE uint32 V810::RDCACHE(v810_timestamp_t &timestamp, uint32 addr)
217 {
218  const int CI = (addr >> 3) & 0x7F;
219  const int SBI = (addr & 4) >> 2;
220 
221  if(Cache[CI].tag == (addr >> 10))
222  {
223   if(!Cache[CI].data_valid[SBI])
224   {
225    timestamp += 2;       // or higher?  Penalty for cache miss seems to be higher than having cache disabled.
226    if(MemReadBus32[addr >> 24])
227     Cache[CI].data[SBI] = MemRead32(timestamp, addr & ~0x3);
228    else
229    {
230     timestamp++;
231 
232     uint32 tmp;
233 
234     tmp = MemRead16(timestamp, addr & ~0x3);
235     tmp |= MemRead16(timestamp, (addr & ~0x3) | 0x2) << 16;
236 
237     Cache[CI].data[SBI] = tmp;
238    }
239    Cache[CI].data_valid[SBI] = TRUE;
240   }
241  }
242  else
243  {
244   Cache[CI].tag = addr >> 10;
245 
246   timestamp += 2;	// or higher?  Penalty for cache miss seems to be higher than having cache disabled.
247   if(MemReadBus32[addr >> 24])
248    Cache[CI].data[SBI] = MemRead32(timestamp, addr & ~0x3);
249   else
250   {
251    timestamp++;
252 
253    uint32 tmp;
254 
255    tmp = MemRead16(timestamp, addr & ~0x3);
256    tmp |= MemRead16(timestamp, (addr & ~0x3) | 0x2) << 16;
257 
258    Cache[CI].data[SBI] = tmp;
259   }
260   //Cache[CI].data[SBI] = MemRead32(timestamp, addr & ~0x3);
261   Cache[CI].data_valid[SBI] = TRUE;
262   Cache[CI].data_valid[SBI ^ 1] = FALSE;
263  }
264 
265  //{
266  // // Caution: This can mess up DRAM page change penalty timings
267  // uint32 dummy_timestamp = 0;
268  // if(Cache[CI].data[SBI] != mem_rword(addr & ~0x3, dummy_timestamp))
269  // {
270  //  printf("Cache/Real Memory Mismatch: %08x %08x/%08x\n", addr & ~0x3, Cache[CI].data[SBI], mem_rword(addr & ~0x3, dummy_timestamp));
271  // }
272  //}
273 
274  return(Cache[CI].data[SBI]);
275 }
276 
RDOP(v810_timestamp_t & timestamp,uint32 addr,uint32 meow)277 INLINE uint16 V810::RDOP(v810_timestamp_t &timestamp, uint32 addr, uint32 meow)
278 {
279  uint16 ret;
280 
281  if(S_REG[CHCW] & 0x2)
282  {
283   uint32 d32 = RDCACHE(timestamp, addr);
284   ret = d32 >> ((addr & 2) * 8);
285  }
286  else
287  {
288   timestamp += meow; //++;
289   ret = MemRead16(timestamp, addr);
290  }
291  return(ret);
292 }
293 
294 #define BRANCH_ALIGN_CHECK(x)	{ if((S_REG[CHCW] & 0x2) && (x & 0x2)) { ADDCLOCK(1); } }
295 
296 // Reinitialize the defaults in the CPU
Reset()297 void V810::Reset()
298 {
299 #ifdef WANT_DEBUGGER
300  if(ADDBT)
301   ADDBT(GetPC(), 0xFFFFFFF0, 0xFFF0);
302 #endif
303  memset(&Cache, 0, sizeof(Cache));
304 
305  memset(P_REG, 0, sizeof(P_REG));
306  memset(S_REG, 0, sizeof(S_REG));
307  memset(Cache, 0, sizeof(Cache));
308 
309  P_REG[0]      =  0x00000000;
310  SetPC(0xFFFFFFF0);
311 
312  S_REG[ECR]    =  0x0000FFF0;
313  S_REG[PSW]    =  0x00008000;
314 
315  if(VBMode)
316   S_REG[PIR]	= 0x00005346;
317  else
318   S_REG[PIR]    =  0x00008100;
319 
320  S_REG[TKCW]   =  0x000000E0;
321  Halted = HALT_NONE;
322  ilevel = -1;
323 
324  lastop = 0;
325 
326  in_bstr = FALSE;
327 
328  RecalcIPendingCache();
329 }
330 
Init(V810_Emu_Mode mode,bool vb_mode)331 bool V810::Init(V810_Emu_Mode mode, bool vb_mode)
332 {
333  EmuMode = mode;
334  VBMode = vb_mode;
335 
336  in_bstr = FALSE;
337  in_bstr_to = 0;
338 
339  if(mode == V810_EMU_MODE_FAST)
340  {
341   memset(DummyRegion, 0, V810_FAST_MAP_PSIZE);
342 
343   for(unsigned int i = V810_FAST_MAP_PSIZE; i < V810_FAST_MAP_PSIZE + V810_FAST_MAP_TRAMPOLINE_SIZE; i += 2)
344   {
345    DummyRegion[i + 0] = 0;
346    DummyRegion[i + 1] = 0x36 << 2;
347   }
348 
349   for(uint64 A = 0; A < (1ULL << 32); A += V810_FAST_MAP_PSIZE)
350    FastMap[A / V810_FAST_MAP_PSIZE] = DummyRegion - A;
351  }
352 
353  return(TRUE);
354 }
355 
Kill(void)356 void V810::Kill(void)
357 {
358  for(unsigned int i = 0; i < FastMapAllocList.size(); i++)
359   free(FastMapAllocList[i]);
360 
361  FastMapAllocList.clear();
362 }
363 
SetInt(int level)364 void V810::SetInt(int level)
365 {
366  assert(level >= -1 && level <= 15);
367 
368  ilevel = level;
369  RecalcIPendingCache();
370 }
371 
SetFastMap(uint32 addresses[],uint32 length,unsigned int num_addresses,const char * name)372 uint8 *V810::SetFastMap(uint32 addresses[], uint32 length, unsigned int num_addresses, const char *name)
373 {
374  uint8 *ret = NULL;
375 
376  for(unsigned int i = 0; i < num_addresses; i++)
377  {
378   assert((addresses[i] & (V810_FAST_MAP_PSIZE - 1)) == 0);
379  }
380  assert((length & (V810_FAST_MAP_PSIZE - 1)) == 0);
381 
382  if(!(ret = (uint8 *)malloc(length + V810_FAST_MAP_TRAMPOLINE_SIZE)))
383  {
384   return(NULL);
385  }
386 
387  for(unsigned int i = length; i < length + V810_FAST_MAP_TRAMPOLINE_SIZE; i += 2)
388  {
389   ret[i + 0] = 0;
390   ret[i + 1] = 0x36 << 2;
391  }
392 
393  for(unsigned int i = 0; i < num_addresses; i++)
394  {
395   for(uint64 addr = addresses[i]; addr != (uint64)addresses[i] + length; addr += V810_FAST_MAP_PSIZE)
396   {
397    //printf("%08x, %d, %s\n", addr, length, name);
398 
399    FastMap[addr / V810_FAST_MAP_PSIZE] = ret - addresses[i];
400   }
401  }
402 
403  FastMapAllocList.push_back(ret);
404 
405  return(ret);
406 }
407 
408 
SetMemReadBus32(uint8 A,bool value)409 void V810::SetMemReadBus32(uint8 A, bool value)
410 {
411  MemReadBus32[A] = value;
412 }
413 
SetMemWriteBus32(uint8 A,bool value)414 void V810::SetMemWriteBus32(uint8 A, bool value)
415 {
416  MemWriteBus32[A] = value;
417 }
418 
SetMemReadHandlers(uint8 MDFN_FASTCALL (* read8)(v810_timestamp_t &,uint32),uint16 MDFN_FASTCALL (* read16)(v810_timestamp_t &,uint32),uint32 MDFN_FASTCALL (* read32)(v810_timestamp_t &,uint32))419 void V810::SetMemReadHandlers(uint8 MDFN_FASTCALL (*read8)(v810_timestamp_t &, uint32), uint16 MDFN_FASTCALL (*read16)(v810_timestamp_t &, uint32), uint32 MDFN_FASTCALL (*read32)(v810_timestamp_t &, uint32))
420 {
421  MemRead8 = read8;
422  MemRead16 = read16;
423  MemRead32 = read32;
424 }
425 
SetMemWriteHandlers(void MDFN_FASTCALL (* write8)(v810_timestamp_t &,uint32,uint8),void MDFN_FASTCALL (* write16)(v810_timestamp_t &,uint32,uint16),void MDFN_FASTCALL (* write32)(v810_timestamp_t &,uint32,uint32))426 void V810::SetMemWriteHandlers(void MDFN_FASTCALL (*write8)(v810_timestamp_t &, uint32, uint8), void MDFN_FASTCALL (*write16)(v810_timestamp_t &, uint32, uint16), void MDFN_FASTCALL (*write32)(v810_timestamp_t &, uint32, uint32))
427 {
428  MemWrite8 = write8;
429  MemWrite16 = write16;
430  MemWrite32 = write32;
431 }
432 
SetIOReadHandlers(uint8 MDFN_FASTCALL (* read8)(v810_timestamp_t &,uint32),uint16 MDFN_FASTCALL (* read16)(v810_timestamp_t &,uint32),uint32 MDFN_FASTCALL (* read32)(v810_timestamp_t &,uint32))433 void V810::SetIOReadHandlers(uint8 MDFN_FASTCALL (*read8)(v810_timestamp_t &, uint32), uint16 MDFN_FASTCALL (*read16)(v810_timestamp_t &, uint32), uint32 MDFN_FASTCALL (*read32)(v810_timestamp_t &, uint32))
434 {
435  IORead8 = read8;
436  IORead16 = read16;
437  IORead32 = read32;
438 }
439 
SetIOWriteHandlers(void MDFN_FASTCALL (* write8)(v810_timestamp_t &,uint32,uint8),void MDFN_FASTCALL (* write16)(v810_timestamp_t &,uint32,uint16),void MDFN_FASTCALL (* write32)(v810_timestamp_t &,uint32,uint32))440 void V810::SetIOWriteHandlers(void MDFN_FASTCALL (*write8)(v810_timestamp_t &, uint32, uint8), void MDFN_FASTCALL (*write16)(v810_timestamp_t &, uint32, uint16), void MDFN_FASTCALL (*write32)(v810_timestamp_t &, uint32, uint32))
441 {
442  IOWrite8 = write8;
443  IOWrite16 = write16;
444  IOWrite32 = write32;
445 }
446 
447 
SetFlag(uint32 n,bool condition)448 INLINE void V810::SetFlag(uint32 n, bool condition)
449 {
450  S_REG[PSW] &= ~n;
451 
452  if(condition)
453   S_REG[PSW] |= n;
454 }
455 
SetSZ(uint32 value)456 INLINE void V810::SetSZ(uint32 value)
457 {
458  SetFlag(PSW_Z, !value);
459  SetFlag(PSW_S, value & 0x80000000);
460 }
461 
462 #ifdef WANT_DEBUGGER
CheckBreakpoints(void (* callback)(int type,uint32 address,uint32 value,unsigned int len),uint16 MDFN_FASTCALL (* peek16)(const v810_timestamp_t,uint32),uint32 MDFN_FASTCALL (* peek32)(const v810_timestamp_t,uint32))463 void V810::CheckBreakpoints(void (*callback)(int type, uint32 address, uint32 value, unsigned int len), uint16 MDFN_FASTCALL (*peek16)(const v810_timestamp_t, uint32), uint32 MDFN_FASTCALL (*peek32)(const v810_timestamp_t, uint32))
464 {
465  unsigned int opcode;
466  uint16 tmpop;
467  uint16 tmpop_high;
468  int32 ws_dummy = v810_timestamp;
469  uint32 tmp_PC = GetPC();
470 
471  tmpop      = peek16(ws_dummy, tmp_PC);
472  tmpop_high = peek16(ws_dummy, tmp_PC + 2);
473 
474  opcode = tmpop >> 10;
475 
476  // Uncomment this out later if necessary.
477  //if((tmpop & 0xE000) == 0x8000)        // Special opcode format for
478  // opcode = (tmpop >> 9) & 0x7F;    // type III instructions.
479 
480  switch(opcode)
481  {
482 	case CAXI: break;
483 
484 	default: break;
485 
486 	case LD_B: callback(BPOINT_READ, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFF, 0, 1); break;
487 	case LD_H: callback(BPOINT_READ, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFE, 0, 2); break;
488 	case LD_W: callback(BPOINT_READ, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFC, 0, 4); break;
489 
490 	case ST_B: callback(BPOINT_WRITE, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFF, P_REG[(tmpop >> 5) & 0x1F] & 0x00FF, 1); break;
491 	case ST_H: callback(BPOINT_WRITE, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFE, P_REG[(tmpop >> 5) & 0x1F] & 0xFFFF, 2); break;
492 	case ST_W: callback(BPOINT_WRITE, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFC, P_REG[(tmpop >> 5) & 0x1F], 4); break;
493 
494 	case IN_B: callback(BPOINT_IO_READ, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFF, 0, 1); break;
495 	case IN_H: callback(BPOINT_IO_READ, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFE, 0, 2); break;
496 	case IN_W: callback(BPOINT_IO_READ, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFC, 0, 4); break;
497 
498 	case OUT_B: callback(BPOINT_IO_WRITE, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFF, P_REG[(tmpop >> 5) & 0x1F] & 0xFF, 1); break;
499 	case OUT_H: callback(BPOINT_IO_WRITE, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFE, P_REG[(tmpop >> 5) & 0x1F] & 0xFFFF, 2); break;
500 	case OUT_W: callback(BPOINT_IO_WRITE, (sign_16(tmpop_high)+P_REG[tmpop & 0x1F])&0xFFFFFFFC, P_REG[(tmpop >> 5) & 0x1F], 4); break;
501  }
502 
503 }
504 #endif
505 
506 #define SetPREG(n, val) { P_REG[n] = val; }
507 
SetSREG(v810_timestamp_t & timestamp,unsigned int which,uint32 value)508 INLINE void V810::SetSREG(v810_timestamp_t &timestamp, unsigned int which, uint32 value)
509 {
510 	switch(which)
511 	{
512 	 default:	// Reserved
513 		printf("LDSR to reserved system register: 0x%02x : 0x%08x\n", which, value);
514 		break;
515 
516          case ECR:      // Read-only
517                 break;
518 
519          case PIR:      // Read-only (obviously)
520                 break;
521 
522          case TKCW:     // Read-only
523                 break;
524 
525 	 case EIPSW:
526 	 case FEPSW:
527               	S_REG[which] = value & 0xFF3FF;
528 		break;
529 
530 	 case PSW:
531               	S_REG[which] = value & 0xFF3FF;
532 		RecalcIPendingCache();
533 		break;
534 
535 	 case EIPC:
536 	 case FEPC:
537 		S_REG[which] = value & 0xFFFFFFFE;
538 		break;
539 
540 	 case ADDTRE:
541   	        S_REG[ADDTRE] = value & 0xFFFFFFFE;
542         	printf("Address trap(unemulated): %08x\n", value);
543 		break;
544 
545 	 case CHCW:
546               	S_REG[CHCW] = value & 0x2;
547 
548               	switch(value & 0x31)
549               	{
550               	 default: printf("Undefined cache control bit combination: %08x\n", value);
551                           break;
552 
553               	 case 0x00: break;
554 
555               	 case 0x01: CacheClear(timestamp, (value >> 20) & 0xFFF, (value >> 8) & 0xFFF);
556                             break;
557 
558               	 case 0x10: CacheDump(timestamp, value & ~0xFF);
559                             break;
560 
561               	 case 0x20: CacheRestore(timestamp, value & ~0xFF);
562                             break;
563                	}
564 		break;
565 	}
566 }
567 
GetSREG(unsigned int which)568 INLINE uint32 V810::GetSREG(unsigned int which)
569 {
570 	uint32 ret;
571 
572 	if(which != 24 && which != 25 && which >= 8)
573 	{
574 	 printf("STSR from reserved system register: 0x%02x", which);
575         }
576 
577 	ret = S_REG[which];
578 
579 	return(ret);
580 }
581 
582 #define RB_SETPC(new_pc_raw) 										\
583 			  {										\
584 			   const uint32 new_pc = new_pc_raw;	/* So RB_SETPC(RB_GETPC()) won't mess up */	\
585 			   if(RB_AccurateMode)								\
586 			    PC = new_pc;								\
587 			   else										\
588 			   {										\
589 			    PC_ptr = &FastMap[(new_pc) >> V810_FAST_MAP_SHIFT][(new_pc)];		\
590 			    PC_base = PC_ptr - (new_pc);						\
591 			   }										\
592 			  }
593 
594 #define RB_PCRELCHANGE(delta) { 				\
595 				if(RB_AccurateMode)		\
596 				 PC += (delta);			\
597 				else				\
598 				{				\
599 				 uint32 PC_tmp = RB_GETPC();	\
600 				 PC_tmp += (delta);		\
601 				 RB_SETPC(PC_tmp);		\
602 				}					\
603 			      }
604 
605 #define RB_INCPCBY2()	{ if(RB_AccurateMode) PC += 2; else PC_ptr += 2; }
606 #define RB_INCPCBY4()   { if(RB_AccurateMode) PC += 4; else PC_ptr += 4; }
607 
608 #define RB_DECPCBY2()   { if(RB_AccurateMode) PC -= 2; else PC_ptr -= 2; }
609 #define RB_DECPCBY4()   { if(RB_AccurateMode) PC -= 4; else PC_ptr -= 4; }
610 
611 
612 // Define accurate mode defines
613 #define RB_GETPC()      PC
614 #ifdef _MSC_VER
615 #define RB_RDOP(PC_offset) RDOP(timestamp, PC + PC_offset)
616 #else
617 #define RB_RDOP(PC_offset, ...) RDOP(timestamp, PC + PC_offset, ## __VA_ARGS__)
618 #endif
619 
Run_Accurate(int32 MDFN_FASTCALL (* event_handler)(const v810_timestamp_t timestamp))620 void V810::Run_Accurate(int32 MDFN_FASTCALL (*event_handler)(const v810_timestamp_t timestamp))
621 {
622  const bool RB_AccurateMode = true;
623 
624  #define RB_ADDBT(n,o,p)
625  #define RB_CPUHOOK(n)
626 
627  #include "v810_oploop.inc"
628 
629  #undef RB_CPUHOOK
630  #undef RB_ADDBT
631 }
632 
633 #ifdef WANT_DEBUGGER
634 
635 /* Make sure class member variable v810_timestamp is synchronized to our local copy, since we'll read it externally if a system
636    reset/power occurs when in step mode or similar.
637 */
638 #define RB_CPUHOOK_DBG(n) { if(CPUHook) { v810_timestamp = timestamp_rl; CPUHook(timestamp_rl, n); } }
639 
Run_Accurate_Debug(int32 MDFN_FASTCALL (* event_handler)(const v810_timestamp_t timestamp))640 void V810::Run_Accurate_Debug(int32 MDFN_FASTCALL (*event_handler)(const v810_timestamp_t timestamp))
641 {
642  const bool RB_AccurateMode = true;
643 
644  #define RB_ADDBT(n,o,p) { if(ADDBT) ADDBT(n,o,p); }
645  /* Make sure class member variable v810_timestamp is synchronized to our local copy, since we'll read it externally if a system
646     reset/power occurs when in step mode or similar.
647  */
648  #define RB_CPUHOOK(n) RB_CPUHOOK_DBG(n)
649  #define RB_DEBUGMODE
650 
651  #include "v810_oploop.inc"
652 
653  #undef RB_DEBUGMODE
654  #undef RB_CPUHOOK
655  #undef RB_ADDBT
656 }
657 #endif
658 
659 //
660 // Undefine accurate mode defines
661 //
662 #undef RB_GETPC
663 #undef RB_RDOP
664 
665 
666 
667 //
668 // Define fast mode defines
669 //
670 #define RB_GETPC()      	((uint32)(PC_ptr - PC_base))
671 
672 #ifdef _MSC_VER
673 #define RB_RDOP(PC_offset, b) LoadU16_LE((uint16 *)&PC_ptr[PC_offset])
674 #else
675 #define RB_RDOP(PC_offset, ...) LoadU16_LE((uint16 *)&PC_ptr[PC_offset])
676 #endif
677 
Run_Fast(int32 MDFN_FASTCALL (* event_handler)(const v810_timestamp_t timestamp))678 void V810::Run_Fast(int32 MDFN_FASTCALL (*event_handler)(const v810_timestamp_t timestamp))
679 {
680  const bool RB_AccurateMode = false;
681 
682  #define RB_ADDBT(n,o,p)
683  #define RB_CPUHOOK(n)
684 
685  #include "v810_oploop.inc"
686 
687  #undef RB_CPUHOOK
688  #undef RB_ADDBT
689 }
690 
691 #ifdef WANT_DEBUGGER
Run_Fast_Debug(int32 MDFN_FASTCALL (* event_handler)(const v810_timestamp_t timestamp))692 void V810::Run_Fast_Debug(int32 MDFN_FASTCALL (*event_handler)(const v810_timestamp_t timestamp))
693 {
694  const bool RB_AccurateMode = false;
695 
696  #define RB_ADDBT(n,o,p) { if(ADDBT) ADDBT(n,o,p); }
697  #define RB_CPUHOOK(n) RB_CPUHOOK_DBG(n)
698  #define RB_DEBUGMODE
699 
700  #include "v810_oploop.inc"
701 
702  #undef RB_DEBUGMODE
703  #undef RB_CPUHOOK
704  #undef RB_ADDBT
705 }
706 #endif
707 
708 //
709 // Undefine fast mode defines
710 //
711 #undef RB_GETPC
712 #undef RB_RDOP
713 
Run(int32 MDFN_FASTCALL (* event_handler)(const v810_timestamp_t timestamp))714 v810_timestamp_t V810::Run(int32 MDFN_FASTCALL (*event_handler)(const v810_timestamp_t timestamp))
715 {
716  Running = true;
717 
718  #ifdef WANT_DEBUGGER
719  if(CPUHook || ADDBT)
720  {
721   if(EmuMode == V810_EMU_MODE_FAST)
722    Run_Fast_Debug(event_handler);
723   else
724    Run_Accurate_Debug(event_handler);
725  }
726  else
727  #endif
728  {
729   if(EmuMode == V810_EMU_MODE_FAST)
730    Run_Fast(event_handler);
731   else
732    Run_Accurate(event_handler);
733  }
734  return(v810_timestamp);
735 }
736 
Exit(void)737 void V810::Exit(void)
738 {
739  Running = false;
740 }
741 
742 #ifdef WANT_DEBUGGER
SetCPUHook(void (* newhook)(const v810_timestamp_t timestamp,uint32 PC),void (* new_ADDBT)(uint32 old_PC,uint32 new_PC,uint32))743 void V810::SetCPUHook(void (*newhook)(const v810_timestamp_t timestamp, uint32 PC), void (*new_ADDBT)(uint32 old_PC, uint32 new_PC, uint32))
744 {
745  CPUHook = newhook;
746  ADDBT = new_ADDBT;
747 }
748 #endif
749 
GetPC(void)750 uint32 V810::GetPC(void)
751 {
752  if(EmuMode == V810_EMU_MODE_ACCURATE)
753   return(PC);
754  else
755  {
756   return(PC_ptr - PC_base);
757  }
758 }
759 
SetPC(uint32 new_pc)760 void V810::SetPC(uint32 new_pc)
761 {
762  if(EmuMode == V810_EMU_MODE_ACCURATE)
763   PC = new_pc;
764  else
765  {
766   PC_ptr = &FastMap[new_pc >> V810_FAST_MAP_SHIFT][new_pc];
767   PC_base = PC_ptr - new_pc;
768  }
769 }
770 
GetPR(const unsigned int which)771 uint32 V810::GetPR(const unsigned int which)
772 {
773  assert(which <= 0x1F);
774 
775 
776  return(which ? P_REG[which] : 0);
777 }
778 
SetPR(const unsigned int which,uint32 value)779 void V810::SetPR(const unsigned int which, uint32 value)
780 {
781  assert(which <= 0x1F);
782 
783  if(which)
784   P_REG[which] = value;
785 }
786 
GetSR(const unsigned int which)787 uint32 V810::GetSR(const unsigned int which)
788 {
789  assert(which <= 0x1F);
790 
791  return(GetSREG(which));
792 }
793 
SetSR(const unsigned int which,uint32 value)794 void V810::SetSR(const unsigned int which, uint32 value)
795 {
796  assert(which <= 0x1F);
797 
798 // SetSREG(timestamp, which, value);
799 }
800 
801 
802 #define BSTR_OP_MOV dst_cache &= ~(1 << dstoff); dst_cache |= ((src_cache >> srcoff) & 1) << dstoff;
803 #define BSTR_OP_NOT dst_cache &= ~(1 << dstoff); dst_cache |= (((src_cache >> srcoff) & 1) ^ 1) << dstoff;
804 
805 #define BSTR_OP_XOR dst_cache ^= ((src_cache >> srcoff) & 1) << dstoff;
806 #define BSTR_OP_OR dst_cache |= ((src_cache >> srcoff) & 1) << dstoff;
807 #define BSTR_OP_AND dst_cache &= ~((((src_cache >> srcoff) & 1) ^ 1) << dstoff);
808 
809 #define BSTR_OP_XORN dst_cache ^= (((src_cache >> srcoff) & 1) ^ 1) << dstoff;
810 #define BSTR_OP_ORN dst_cache |= (((src_cache >> srcoff) & 1) ^ 1) << dstoff;
811 #define BSTR_OP_ANDN dst_cache &= ~(((src_cache >> srcoff) & 1) << dstoff);
812 
BSTR_RWORD(v810_timestamp_t & timestamp,uint32 A)813 INLINE uint32 V810::BSTR_RWORD(v810_timestamp_t &timestamp, uint32 A)
814 {
815  if(MemReadBus32[A >> 24])
816  {
817   timestamp += 2;
818   return(MemRead32(timestamp, A));
819  }
820  else
821  {
822   uint32 ret;
823 
824   timestamp += 2;
825   ret = MemRead16(timestamp, A);
826 
827   timestamp += 2;
828   ret |= MemRead16(timestamp, A | 2) << 16;
829   return(ret);
830  }
831 }
832 
BSTR_WWORD(v810_timestamp_t & timestamp,uint32 A,uint32 V)833 INLINE void V810::BSTR_WWORD(v810_timestamp_t &timestamp, uint32 A, uint32 V)
834 {
835  if(MemWriteBus32[A >> 24])
836  {
837   timestamp += 2;
838   MemWrite32(timestamp, A, V);
839  }
840  else
841  {
842   timestamp += 2;
843   MemWrite16(timestamp, A, V & 0xFFFF);
844 
845   timestamp += 2;
846   MemWrite16(timestamp, A | 2, V >> 16);
847  }
848 }
849 
850 #define DO_BSTR(op) { 						\
851                 while(len)					\
852                 {						\
853                  if(!have_src_cache)                            \
854                  {                                              \
855 		  have_src_cache = TRUE;			\
856                   src_cache = BSTR_RWORD(timestamp, src);       \
857                  }                                              \
858 								\
859 		 if(!have_dst_cache)				\
860 		 {						\
861 		  have_dst_cache = TRUE;			\
862                   dst_cache = BSTR_RWORD(timestamp, dst);       \
863                  }                                              \
864 								\
865 		 op;						\
866                  srcoff = (srcoff + 1) & 0x1F;			\
867                  dstoff = (dstoff + 1) & 0x1F;			\
868 		 len--;						\
869 								\
870 		 if(!srcoff)					\
871 		 {                                              \
872 		  src += 4;					\
873 		  have_src_cache = FALSE;			\
874 		 }                                              \
875 								\
876                  if(!dstoff)                                    \
877                  {                                              \
878                   BSTR_WWORD(timestamp, dst, dst_cache);        \
879                   dst += 4;                                     \
880 		  have_dst_cache = FALSE;			\
881 		  if(timestamp >= next_event_ts)		\
882 		   break;					\
883                  }                                              \
884                 }						\
885                 if(have_dst_cache)				\
886                  BSTR_WWORD(timestamp, dst, dst_cache);		\
887 		}
888 
Do_BSTR_Search(v810_timestamp_t & timestamp,const int inc_mul,unsigned int bit_test)889 INLINE bool V810::Do_BSTR_Search(v810_timestamp_t &timestamp, const int inc_mul, unsigned int bit_test)
890 {
891         uint32 srcoff = (P_REG[27] & 0x1F);
892         uint32 len = P_REG[28];
893         uint32 bits_skipped = P_REG[29];
894         uint32 src = (P_REG[30] & 0xFFFFFFFC);
895 	bool found = false;
896 
897 	#if 0
898 	// TODO: Better timing.
899 	if(!in_bstr)	// If we're just starting the execution of this instruction(kind of spaghetti-code), so FIXME if we change
900 			// bstr handling in v810_oploop.inc
901 	{
902 	 timestamp += 13 - 1;
903 	}
904 	#endif
905 
906 	while(len)
907 	{
908 		if(!have_src_cache)
909 		{
910 		 have_src_cache = TRUE;
911 		 timestamp++;
912 		 src_cache = BSTR_RWORD(timestamp, src);
913 		}
914 
915 		if(((src_cache >> srcoff) & 1) == bit_test)
916 		{
917 		 found = true;
918 
919 		 /* Fix the bit offset and word address to "1 bit before" it was found */
920 		 srcoff -= inc_mul * 1;
921 		 if(srcoff & 0x20)		/* Handles 0x1F->0x20(0x00) and 0x00->0xFFFF... */
922 		 {
923 		  src -= inc_mul * 4;
924 		  srcoff &= 0x1F;
925 		 }
926 		 break;
927 		}
928 	        srcoff = (srcoff + inc_mul * 1) & 0x1F;
929 		bits_skipped++;
930 	        len--;
931 
932 	        if(!srcoff)
933 		{
934 	         have_src_cache = FALSE;
935 		 src += inc_mul * 4;
936 		 if(timestamp >= next_event_ts)
937 		  break;
938 		}
939 	}
940 
941         P_REG[27] = srcoff;
942         P_REG[28] = len;
943         P_REG[29] = bits_skipped;
944         P_REG[30] = src;
945 
946 
947         if(found)               // Set Z flag to 0 if the bit was found
948          SetFlag(PSW_Z, 0);
949         else if(!len)           // ...and if the search is over, and the bit was not found, set it to 1
950          SetFlag(PSW_Z, 1);
951 
952         if(found)               // Bit found, so don't continue the search.
953          return(false);
954 
955         return((bool)len);      // Continue the search if any bits are left to search.
956 }
957 
bstr_subop(v810_timestamp_t & timestamp,int sub_op,int arg1)958 bool V810::bstr_subop(v810_timestamp_t &timestamp, int sub_op, int arg1)
959 {
960  if((sub_op >= 0x10) || (!(sub_op & 0x8) && sub_op >= 0x4))
961  {
962   printf("%08x\tBSR Error: %04x\n", PC,sub_op);
963 
964   SetPC(GetPC() - 2);
965   Exception(INVALID_OP_HANDLER_ADDR, ECODE_INVALID_OP);
966 
967   return(false);
968  }
969 
970 // printf("BSTR: %02x, %02x %02x; src: %08x, dst: %08x, len: %08x\n", sub_op, P_REG[27], P_REG[26], P_REG[30], P_REG[29], P_REG[28]);
971 
972  if(sub_op & 0x08)
973  {
974 	uint32 dstoff = (P_REG[26] & 0x1F);
975 	uint32 srcoff = (P_REG[27] & 0x1F);
976 	uint32 len =     P_REG[28];
977 	uint32 dst =    (P_REG[29] & 0xFFFFFFFC);
978 	uint32 src =    (P_REG[30] & 0xFFFFFFFC);
979 
980 #if 0
981 	// Be careful not to cause 32-bit integer overflow, and careful about not shifting by 32.
982 	// TODO:
983 
984 	// Read src[0], src[4] into shifter.
985 	// Read dest[0].
986 	DO_BSTR_PROLOGUE();	// if(len) { blah blah blah masking blah }
987                 src_cache = BSTR_RWORD(timestamp, src);
988 
989 		if((uint64)(srcoff + len) > 0x20)
990                  src_cache |= (uint64)BSTR_RWORD(timestamp, src + 4) << 32;
991 
992                 dst_cache = BSTR_RWORD(timestamp, dst);
993 
994 		if(len)
995 		{
996 		 uint32 dst_preserve_mask;
997 		 uint32 dst_change_mask;
998 
999 		 dst_preserve_mask = (1U << dstoff) - 1;
1000 
1001 		 if((uint64)(dstoff + len) < 0x20)
1002  		  dst_preserve_mask |= ((1U << ((0x20 - (dstoff + len)) & 0x1F)) - 1) << (dstoff + len);
1003 
1004 		 dst_change_mask = ~dst_preserve_mask;
1005 
1006 		 src_cache = BSTR_RWORD(timestamp, src);
1007 		 src_cache |= (uint64)BSTR_RWORD(timestamp, src + 4) << 32;
1008 		 dst_cache = BSTR_RWORD(timestamp, dst);
1009 
1010 		 dst_cache = (dst_cache & dst_preserve_mask) | ((dst_cache OP_THINGY_HERE (src_cache >> srcoff)) & dst_change_mask);
1011 		 BSTR_WWORD(timestamp, dst, dst_cache);
1012 
1013 		 if((uint64)(dstoff + len) < 0x20)
1014 		 {
1015 	          srcoff += len;
1016 		  dstoff += len;
1017 		  len = 0;
1018 		 }
1019 		 else
1020 		 {
1021 		  srcoff += (0x20 - dstoff);
1022 		  dstoff = 0;
1023 		  len -= (0x20 - dstoff);
1024 		  dst += 4;
1025 		 }
1026 
1027 		 if(srcoff >= 0x20)
1028 		 {
1029 		  srcoff &= 0x1F;
1030 		  src += 4;
1031 
1032 		  if(len)
1033 		  {
1034 		   src_cache >>= 32;
1035 		   src_cache |= (uint64)BSTR_RWORD(timestamp, src + 4) << 32;
1036 		  }
1037 		 }
1038 		}
1039 
1040 	DO_BSTR_PRIMARY();	// while(len >= 32) (do allow interruption; interrupt and emulator-return -
1041 				// they must be handled differently!)
1042 		while(len >= 32)
1043   		{
1044                  dst_cache = BSTR_RWORD(timestamp, dst);
1045                  dst_cache = OP_THINGY_HERE(dst_cache, src_cache >> srcoff);
1046 		 BSTR_WWORD(timestamp, dst, dst_cache);
1047 		 len -= 32;
1048 		 dst += 4;
1049 		 src += 4;
1050                  src_cache >>= 32;
1051                  src_cache |= (uint64)BSTR_RWORD(timestamp, src + 4) << 32;
1052 		}
1053 
1054 	DO_BSTR_EPILOGUE();	// if(len) { blah blah blah masking blah }
1055 		if(len)
1056 		{
1057 		 uint32 dst_preserve_mask;
1058 		 uint32 dst_change_mask;
1059 
1060 		 dst_preserve_mask = (1U << ((0x20 - len) & 0x1F) << len;
1061 		 dst_change_mask = ~dst_preserve_mask;
1062 
1063                  dst_cache = BSTR_RWORD(timestamp, dst);
1064 		 dst_cache = OP_THINGY_HERE(dst_cache, src_cache >> srcoff);
1065 		 BSTR_WWORD(timestamp, dst, dst_cache);
1066 		 dstoff += len;
1067 		 srcoff += len;
1068 
1069                  if(srcoff >= 0x20)
1070                  {
1071                   srcoff &= 0x1F;
1072                   src += 4;
1073                  }
1074 		 len = 0;
1075 		}
1076 #endif
1077 
1078 	switch(sub_op)
1079 	{
1080 	 case ORBSU: DO_BSTR(BSTR_OP_OR); break;
1081 
1082 	 case ANDBSU: DO_BSTR(BSTR_OP_AND); break;
1083 
1084 	 case XORBSU: DO_BSTR(BSTR_OP_XOR); break;
1085 
1086 	 case MOVBSU: DO_BSTR(BSTR_OP_MOV); break;
1087 
1088 	 case ORNBSU: DO_BSTR(BSTR_OP_ORN); break;
1089 
1090 	 case ANDNBSU: DO_BSTR(BSTR_OP_ANDN); break;
1091 
1092 	 case XORNBSU: DO_BSTR(BSTR_OP_XORN); break;
1093 
1094 	 case NOTBSU: DO_BSTR(BSTR_OP_NOT); break;
1095 	}
1096 
1097         P_REG[26] = dstoff;
1098         P_REG[27] = srcoff;
1099         P_REG[28] = len;
1100         P_REG[29] = dst;
1101         P_REG[30] = src;
1102 
1103 	return((bool)P_REG[28]);
1104  }
1105  else
1106  {
1107   printf("BSTR Search: %02x\n", sub_op);
1108  }
1109  return(Do_BSTR_Search(timestamp, ((sub_op & 1) ? -1 : 1), (sub_op & 0x2) >> 1));
1110 }
1111 
SetFPUOPNonFPUFlags(uint32 result)1112 INLINE void V810::SetFPUOPNonFPUFlags(uint32 result)
1113 {
1114                  // Now, handle flag setting
1115                  SetFlag(PSW_OV, 0);
1116 
1117                  if(!(result & 0x7FFFFFFF)) // Check to see if exponent and mantissa are 0
1118 		 {
1119 		  // If Z flag is set, S and CY should be clear, even if it's negative 0(confirmed on real thing with subf.s, at least).
1120                   SetFlag(PSW_Z, 1);
1121                   SetFlag(PSW_S, 0);
1122                   SetFlag(PSW_CY, 0);
1123 		 }
1124                  else
1125 		 {
1126                   SetFlag(PSW_Z, 0);
1127                   SetFlag(PSW_S, result & 0x80000000);
1128                   SetFlag(PSW_CY, result & 0x80000000);
1129 		 }
1130                  //printf("MEOW: %08x\n", S_REG[PSW] & (PSW_S | PSW_CY));
1131 }
1132 
FPU_DoesExceptionKillResult(void)1133 bool V810::FPU_DoesExceptionKillResult(void)
1134 {
1135  const uint32 float_exception_flags = fpo.get_flags();
1136 
1137  if(float_exception_flags & V810_FP_Ops::flag_reserved)
1138   return(true);
1139 
1140  if(float_exception_flags & V810_FP_Ops::flag_invalid)
1141   return(true);
1142 
1143  if(float_exception_flags & V810_FP_Ops::flag_divbyzero)
1144   return(true);
1145 
1146 
1147  // Return false here, so that the result of this calculation IS put in the output register.
1148  // Wrap the exponent on overflow, rather than generating an infinity.  The wrapping behavior is specified in IEE 754 AFAIK,
1149  // and is useful in cases where you divide a huge number
1150  // by another huge number, and fix the result afterwards based on the number of overflows that occurred.  Probably requires some custom assembly code,
1151  // though.  And it's the kind of thing you'd see in an engineering or physics program, not in a perverted video game :b).
1152  if(float_exception_flags & V810_FP_Ops::flag_overflow)
1153   return(false);
1154 
1155  return(false);
1156 }
1157 
FPU_DoException(void)1158 void V810::FPU_DoException(void)
1159 {
1160  const uint32 float_exception_flags = fpo.get_flags();
1161 
1162  if(float_exception_flags & V810_FP_Ops::flag_reserved)
1163  {
1164   S_REG[PSW] |= PSW_FRO;
1165 
1166   SetPC(GetPC() - 4);
1167   Exception(FPU_HANDLER_ADDR, ECODE_FRO);
1168 
1169   return;
1170  }
1171 
1172  if(float_exception_flags & V810_FP_Ops::flag_invalid)
1173  {
1174   S_REG[PSW] |= PSW_FIV;
1175 
1176   SetPC(GetPC() - 4);
1177   Exception(FPU_HANDLER_ADDR, ECODE_FIV);
1178 
1179   return;
1180  }
1181 
1182  if(float_exception_flags & V810_FP_Ops::flag_divbyzero)
1183  {
1184   S_REG[PSW] |= PSW_FZD;
1185 
1186   SetPC(GetPC() - 4);
1187   Exception(FPU_HANDLER_ADDR, ECODE_FZD);
1188 
1189   return;
1190  }
1191 
1192  if(float_exception_flags & V810_FP_Ops::flag_underflow)
1193  {
1194   S_REG[PSW] |= PSW_FUD;
1195  }
1196 
1197  if(float_exception_flags & V810_FP_Ops::flag_inexact)
1198  {
1199   S_REG[PSW] |= PSW_FPR;
1200  }
1201 
1202  // FPR can be set along with overflow, so put the overflow exception handling at the end here(for Exception() messes with PSW).
1203  //
1204  if(float_exception_flags & V810_FP_Ops::flag_overflow)
1205  {
1206   S_REG[PSW] |= PSW_FOV;
1207 
1208   SetPC(GetPC() - 4);
1209   Exception(FPU_HANDLER_ADDR, ECODE_FOV);
1210  }
1211 }
1212 
IsSubnormal(uint32 fpval)1213 bool V810::IsSubnormal(uint32 fpval)
1214 {
1215  if( ((fpval >> 23) & 0xFF) == 0 && (fpval & ((1 << 23) - 1)) )
1216   return(true);
1217 
1218  return(false);
1219 }
1220 
FPU_Math_Template(uint32 (V810_FP_Ops::* func)(uint32,uint32),uint32 arg1,uint32 arg2)1221 INLINE void V810::FPU_Math_Template(uint32 (V810_FP_Ops::*func)(uint32, uint32), uint32 arg1, uint32 arg2)
1222  {
1223   uint32 result;
1224 
1225  fpo.clear_flags();
1226  result = (fpo.*func)(P_REG[arg1], P_REG[arg2]);
1227 
1228   if(!FPU_DoesExceptionKillResult())
1229   {
1230    SetFPUOPNonFPUFlags(result);
1231    SetPREG(arg1, result);
1232   }
1233   FPU_DoException();
1234 }
1235 
fpu_subop(v810_timestamp_t & timestamp,int sub_op,int arg1,int arg2)1236 void V810::fpu_subop(v810_timestamp_t &timestamp, int sub_op, int arg1, int arg2)
1237 {
1238  //printf("FPU: %02x\n", sub_op);
1239  if(VBMode)
1240  {
1241   switch(sub_op)
1242   {
1243    case XB: timestamp++;	// Unknown
1244 	    P_REG[arg1] = (P_REG[arg1] & 0xFFFF0000) | ((P_REG[arg1] & 0xFF) << 8) | ((P_REG[arg1] & 0xFF00) >> 8);
1245 	    return;
1246 
1247    case XH: timestamp++;	// Unknown
1248 	    P_REG[arg1] = (P_REG[arg1] << 16) | (P_REG[arg1] >> 16);
1249 	    return;
1250 
1251    // Does REV use arg1 or arg2 for the source register?
1252    case REV: timestamp++;	// Unknown
1253 		printf("Revvie bits\n");
1254 	     {
1255 	      // Public-domain code snippet from: http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
1256       	      uint32 v = P_REG[arg2]; // 32-bit word to reverse bit order
1257 
1258 	      // swap odd and even bits
1259 	      v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
1260 	      // swap consecutive pairs
1261 	      v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
1262 	      // swap nibbles ...
1263 	      v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
1264 	      // swap bytes
1265 	      v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
1266 	      // swap 2-byte long pairs
1267 	      v = ( v >> 16             ) | ( v               << 16);
1268 
1269 	      P_REG[arg1] = v;
1270 	     }
1271 	     return;
1272 
1273    case MPYHW: timestamp += 9 - 1;	// Unknown?
1274 	       P_REG[arg1] = (int32)(int16)(P_REG[arg1] & 0xFFFF) * (int32)(int16)(P_REG[arg2] & 0xFFFF);
1275 	       return;
1276   }
1277  }
1278 
1279  switch(sub_op)
1280  {
1281         // Virtual-Boy specific(probably!)
1282 	default:
1283 		{
1284 		 SetPC(GetPC() - 4);
1285                  Exception(INVALID_OP_HANDLER_ADDR, ECODE_INVALID_OP);
1286 		}
1287 		break;
1288 
1289 	case CVT_WS:
1290 		timestamp += 5;
1291 		{
1292 		 uint32 result;
1293 
1294                  fpo.clear_flags();
1295 		 result = fpo.itof(P_REG[arg2]);
1296 
1297 		 if(!FPU_DoesExceptionKillResult())
1298 		 {
1299 		  SetPREG(arg1, result);
1300 		  SetFPUOPNonFPUFlags(result);
1301 		 }
1302 		 FPU_DoException();
1303 		}
1304 		break;	// End CVT.WS
1305 
1306 	case CVT_SW:
1307 		timestamp += 8;
1308 		{
1309 		 int32 result;
1310 
1311                  fpo.clear_flags();
1312 		 result = fpo.ftoi(P_REG[arg2], false);
1313 
1314 		 if(!FPU_DoesExceptionKillResult())
1315 		 {
1316 		  SetPREG(arg1, result);
1317                   SetFlag(PSW_OV, 0);
1318                   SetSZ(result);
1319 		 }
1320 		 FPU_DoException();
1321 		}
1322 		break;	// End CVT.SW
1323 
1324 	case ADDF_S: timestamp += 8;
1325 		     FPU_Math_Template(&V810_FP_Ops::add, arg1, arg2);
1326 		     break;
1327 
1328 	case SUBF_S: timestamp += 11;
1329 		     FPU_Math_Template(&V810_FP_Ops::sub, arg1, arg2);
1330 		     break;
1331 
1332         case CMPF_S: timestamp += 6;
1333 		     // Don't handle this like subf.s because the flags
1334 		     // have slightly different semantics(mostly regarding underflow/subnormal results) (confirmed on real V810).
1335 		     fpo.clear_flags();
1336                      {
1337 		      int32 result;
1338 
1339 		      result = fpo.cmp(P_REG[arg1], P_REG[arg2]);
1340 
1341 	              if(!FPU_DoesExceptionKillResult())
1342 		      {
1343 		       SetFPUOPNonFPUFlags(result);
1344 		      }
1345 		      FPU_DoException();
1346 		       }
1347                      break;
1348 
1349 	case MULF_S: timestamp += 7;
1350 		     FPU_Math_Template(&V810_FP_Ops::mul, arg1, arg2);
1351 		     break;
1352 
1353 	case DIVF_S: timestamp += 43;
1354 		     FPU_Math_Template(&V810_FP_Ops::div, arg1, arg2);
1355 		     break;
1356 
1357 	case TRNC_SW:
1358                 timestamp += 7;
1359                 {
1360                  int32 result;
1361 
1362 		 fpo.clear_flags();
1363                  result = fpo.ftoi(P_REG[arg2], true);
1364 
1365                  if(!FPU_DoesExceptionKillResult())
1366                  {
1367                   SetPREG(arg1, result);
1368 		  SetFlag(PSW_OV, 0);
1369 		  SetSZ(result);
1370                  }
1371 		 FPU_DoException();
1372                 }
1373                 break;	// end TRNC.SW
1374 	}
1375 }
1376 
1377 // Generate exception
Exception(uint32 handler,uint16 eCode)1378 void V810::Exception(uint32 handler, uint16 eCode)
1379 {
1380  // Exception overhead is unknown.
1381 
1382 #ifdef WANT_DEBUGGER
1383  if(ADDBT)
1384  {
1385   uint32 old_PC = GetPC();
1386 
1387   if((eCode & 0xFFE0) == 0xFFA0) // Trap instruction(PC is pointing to next instruction at this point)
1388    old_PC -= 2;
1389 
1390   ADDBT(old_PC, handler, eCode);
1391  }
1392 #endif
1393 
1394     printf("Exception: %08x %04x\n", handler, eCode);
1395 
1396     // Invalidate our bitstring state(forces the instruction to be re-read, and the r/w buffers reloaded).
1397     in_bstr = FALSE;
1398     have_src_cache = FALSE;
1399     have_dst_cache = FALSE;
1400 
1401     if(S_REG[PSW] & PSW_NP) // Fatal exception
1402     {
1403      printf("Fatal exception; Code: %08x, ECR: %08x, PSW: %08x, PC: %08x\n", eCode, S_REG[ECR], S_REG[PSW], PC);
1404      Halted = HALT_FATAL_EXCEPTION;
1405      IPendingCache = 0;
1406      return;
1407     }
1408     else if(S_REG[PSW] & PSW_EP)  //Double Exception
1409     {
1410      S_REG[FEPC] = GetPC();
1411      S_REG[FEPSW] = S_REG[PSW];
1412 
1413      S_REG[ECR] = (S_REG[ECR] & 0xFFFF) | (eCode << 16);
1414      S_REG[PSW] |= PSW_NP;
1415      S_REG[PSW] |= PSW_ID;
1416      S_REG[PSW] &= ~PSW_AE;
1417 
1418      SetPC(0xFFFFFFD0);
1419      IPendingCache = 0;
1420      return;
1421     }
1422     else 	// Regular exception
1423     {
1424      S_REG[EIPC] = GetPC();
1425      S_REG[EIPSW] = S_REG[PSW];
1426      S_REG[ECR] = (S_REG[ECR] & 0xFFFF0000) | eCode;
1427      S_REG[PSW] |= PSW_EP;
1428      S_REG[PSW] |= PSW_ID;
1429      S_REG[PSW] &= ~PSW_AE;
1430 
1431      SetPC(handler);
1432      IPendingCache = 0;
1433      return;
1434     }
1435 }
1436 
StateAction(StateMem * sm,int load,int data_only)1437 int V810::StateAction(StateMem *sm, int load, int data_only)
1438 {
1439  uint32 *cache_tag_temp = NULL;
1440  uint32 *cache_data_temp = NULL;
1441  bool *cache_data_valid_temp = NULL;
1442  uint32 PC_tmp = GetPC();
1443 
1444  if(EmuMode == V810_EMU_MODE_ACCURATE)
1445  {
1446   cache_tag_temp = (uint32 *)malloc(sizeof(uint32 *) * 128);
1447   cache_data_temp = (uint32 *)malloc(sizeof(uint32 *) * 128 * 2);
1448   cache_data_valid_temp = (bool *)malloc(sizeof(bool *) * 128 * 2);
1449 
1450   if(!cache_tag_temp || !cache_data_temp || !cache_data_valid_temp)
1451   {
1452    if(cache_tag_temp)
1453     free(cache_tag_temp);
1454 
1455    if(cache_data_temp)
1456     free(cache_data_temp);
1457 
1458    if(cache_data_valid_temp)
1459     free(cache_data_valid_temp);
1460 
1461    return(0);
1462   }
1463   if(!load)
1464   {
1465    for(int i = 0; i < 128; i++)
1466    {
1467     cache_tag_temp[i] = Cache[i].tag;
1468 
1469     cache_data_temp[i * 2 + 0] = Cache[i].data[0];
1470     cache_data_temp[i * 2 + 1] = Cache[i].data[1];
1471 
1472     cache_data_valid_temp[i * 2 + 0] = Cache[i].data_valid[0];
1473     cache_data_valid_temp[i * 2 + 1] = Cache[i].data_valid[1];
1474    }
1475   }
1476   else // If we're loading, go ahead and clear the cache temporaries,
1477        // in case the save state was saved while in fast mode
1478        // and the cache data isn't present and thus won't be loaded.
1479   {
1480    memset(cache_tag_temp, 0, sizeof(uint32) * 128);
1481    memset(cache_data_temp, 0, sizeof(uint32) * 128 * 2);
1482    memset(cache_data_valid_temp, 0, sizeof(bool) * 128 * 2);
1483   }
1484  }
1485 
1486  int32 next_event_ts_delta = next_event_ts - v810_timestamp;
1487 
1488  SFORMAT StateRegs[] =
1489  {
1490   SFARRAY32(P_REG, 32),
1491   SFARRAY32(S_REG, 32),
1492   SFVARN(PC_tmp, "PC"),
1493   SFVAR(Halted),
1494 
1495   SFVAR(lastop),
1496 
1497   SFARRAY32(cache_tag_temp, 128),
1498   SFARRAY32(cache_data_temp, 128 * 2),
1499   SFARRAYB(cache_data_valid_temp, 128 * 2),
1500 
1501   SFVAR(ilevel),		// Perhaps remove in future?
1502   SFVAR(next_event_ts_delta),
1503 
1504   // Bitstring stuff:
1505   SFVAR(src_cache),
1506   SFVAR(dst_cache),
1507   SFVAR(have_src_cache),
1508   SFVAR(have_dst_cache),
1509   SFVAR(in_bstr),
1510   SFVAR(in_bstr_to),
1511 
1512   SFEND
1513  };
1514 
1515  int ret = MDFNSS_StateAction(sm, load, data_only, StateRegs, "V810", false);
1516 
1517  if(load)
1518  {
1519   // std::max is sanity check for a corrupted save state to not crash emulation,
1520   // std::min<int64>(0x7FF... is a sanity check and for the case where next_event_ts is set to an extremely large value to
1521   // denote that it's not happening anytime soon, which could cause an overflow if our current timestamp is larger
1522   // than what it was when the state was saved.
1523   next_event_ts = std::max<int64>(v810_timestamp, std::min<int64>(0x7FFFFFFF, (int64)v810_timestamp + next_event_ts_delta));
1524 
1525   RecalcIPendingCache();
1526 
1527   SetPC(PC_tmp);
1528   if(EmuMode == V810_EMU_MODE_ACCURATE)
1529   {
1530    for(int i = 0; i < 128; i++)
1531    {
1532     Cache[i].tag = cache_tag_temp[i];
1533 
1534     Cache[i].data[0] = cache_data_temp[i * 2 + 0];
1535     Cache[i].data[1] = cache_data_temp[i * 2 + 1];
1536 
1537     Cache[i].data_valid[0] = cache_data_valid_temp[i * 2 + 0];
1538     Cache[i].data_valid[1] = cache_data_valid_temp[i * 2 + 1];
1539 
1540     //printf("%d %08x %08x %08x %d %d\n", i, Cache[i].tag << 10, Cache[i].data[0], Cache[i].data[1], Cache[i].data_valid[0], Cache[i].data_valid[1]);
1541    }
1542   }
1543  }
1544 
1545  if(EmuMode == V810_EMU_MODE_ACCURATE)
1546  {
1547   free(cache_tag_temp);
1548   free(cache_data_temp);
1549   free(cache_data_valid_temp);
1550  }
1551 
1552  return(ret);
1553 }
1554