1 // NES 6502 CPU emulator run function
2 
3 #if 0
4 /* Define these macros in the source file before #including this file.
5 - Parameters might be expressions, so they are best evaluated only once,
6 though they NEVER have side-effects, so multiple evaluation is OK.
7 - Output parameters might be a multiple-assignment expression like "a=x",
8 so they must NOT be parenthesized.
9 - Except where noted, time() and related functions will NOT work
10 correctly inside a macro. TIME() is always correct, and FLUSH_TIME() and
11 CACHE_TIME() allow the time changing functions to work.
12 - Macros "returning" void may use a {} statement block. */
13 
14 	// 0 <= addr <= 0xFFFF + page_size
15 	// time functions can be used
16 	int  READ_MEM(  addr_t );
17 	void WRITE_MEM( addr_t, int data );
18 	// 0 <= READ_MEM() <= 0xFF
19 
20 	// 0 <= addr <= 0x1FF
21 	int  READ_LOW(  addr_t );
22 	void WRITE_LOW( addr_t, int data );
23 	// 0 <= READ_LOW() <= 0xFF
24 
25 	// Often-used instructions attempt these before using a normal memory access.
26 	// Optional; defaults to READ_MEM() and WRITE_MEM()
27 	bool CAN_READ_FAST( addr_t ); // if true, uses result of READ_FAST
28 	void READ_FAST( addr_t, int& out ); // ALWAYS called BEFORE CAN_READ_FAST
29 	bool CAN_WRITE_FAST( addr_t ); // if true, uses WRITE_FAST instead of WRITE_MEM
30 	void WRITE_FAST( addr_t, int data );
31 
32 	// Used by instructions most often used to access the NES PPU (LDA abs and BIT abs).
33 	// Optional; defaults to READ_MEM.
34 	void READ_PPU(  addr_t, int& out );
35 	// 0 <= out <= 0xFF
36 
37 // The following can be used within macros:
38 
39 	// Current time
40 	time_t TIME();
41 
42 	// Allows use of time functions
43 	void FLUSH_TIME();
44 
45 	// Must be used before end of macro if FLUSH_TIME() was used earlier
46 	void CACHE_TIME();
47 
48 // Configuration (optional; commented behavior if defined)
49 
50 	// Emulates dummy reads for indexed instructions
51 	#define NES_CPU_DUMMY_READS 1
52 
53 	// Optimizes as if map_code( 0, 0x10000 + cpu_padding, FLAT_MEM ) is always in effect
54 	#define FLAT_MEM my_mem_array
55 
56 	// Expanded just before beginning of code, to help debugger
57 	#define CPU_BEGIN void my_run_cpu() {
58 
59 #endif
60 
61 /* Copyright (C) 2003-2008 Shay Green. This module is free software; you
62 can redistribute it and/or modify it under the terms of the GNU Lesser
63 General Public License as published by the Free Software Foundation; either
64 version 2.1 of the License, or (at your option) any later version. This
65 module is distributed in the hope that it will be useful, but WITHOUT ANY
66 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
67 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
68 details. You should have received a copy of the GNU Lesser General Public
69 License along with this module; if not, write to the Free Software Foundation,
70 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
71 
72 // Allows MWCW debugger to step through code properly
73 #ifdef CPU_BEGIN
74 	CPU_BEGIN
75 #endif
76 
77 // Time
78 #define TIME()          (s_time + s.base)
79 #define FLUSH_TIME()    {s.time = s_time - time_offset;}
80 #define CACHE_TIME()    {s_time = s.time + time_offset;}
81 
82 // Defaults
83 #ifndef CAN_WRITE_FAST
84 	#define CAN_WRITE_FAST( addr )      0
85 	#define WRITE_FAST( addr, data )
86 #endif
87 
88 #ifndef CAN_READ_FAST
89 	#define CAN_READ_FAST( addr )       0
90 	#define READ_FAST( addr, out )
91 #endif
92 
93 #ifndef READ_PPU
94 	#define READ_PPU( addr, out )\
95 	{\
96 		FLUSH_TIME();\
97 		out = READ_MEM( addr );\
98 		CACHE_TIME();\
99 	}
100 #endif
101 
102 #define READ_STACK  READ_LOW
103 #define WRITE_STACK WRITE_LOW
104 
105 // Dummy reads
106 #if NES_CPU_DUMMY_READS
107 	// TODO: optimize time handling
108 	#define DUMMY_READ( addr, idx ) \
109 		if ( (addr & 0xFF) < idx )\
110 		{\
111 			int const time_offset = 1;\
112 			FLUSH_TIME();\
113 			READ_MEM( (addr - 0x100) );\
114 			CACHE_TIME();\
115 		}
116 #else
117 	#define DUMMY_READ( addr, idx )
118 #endif
119 
120 // Code
121 #ifdef FLAT_MEM
122 	#define CODE_PAGE(   addr ) (FLAT_MEM)
123 	#define CODE_OFFSET( addr ) (addr)
124 #else
125 	#define CODE_PAGE( addr )   (s.code_map [NES_CPU_PAGE( addr )])
126 	#define CODE_OFFSET( addr ) NES_CPU_OFFSET( addr )
127 #endif
128 #define READ_CODE( addr )   (CODE_PAGE( addr ) [CODE_OFFSET( addr )])
129 
130 // Stack
131 #define SET_SP( v ) (sp = ((v) + 1) | 0x100)
132 #define GET_SP()    ((sp - 1) & 0xFF)
133 #define SP( o )     ((sp + (o - (o>0)*0x100)) | 0x100)
134 
135 // Truncation
136 #define BYTE(  n ) ((BOOST::uint8_t ) (n)) /* (unsigned) n & 0xFF */
137 #define SBYTE( n ) ((BOOST::int8_t  ) (n)) /* (BYTE( n ) ^ 0x80) - 0x80 */
138 #define WORD(  n ) ((BOOST::uint16_t) (n)) /* (unsigned) n & 0xFFFF */
139 
140 // Flags with hex value for clarity when used as mask.
141 // Stored in indicated variable during emulation.
142 int const n80 = 0x80; // nz
143 int const v40 = 0x40; // flags
144 int const r20 = 0x20;
145 int const b10 = 0x10;
146 int const d08 = 0x08; // flags
147 int const i04 = 0x04; // flags
148 int const z02 = 0x02; // nz
149 int const c01 = 0x01; // c
150 
151 #define IS_NEG (nz & 0x8080)
152 
153 #define GET_FLAGS( out ) \
154 {\
155 	out = flags & (v40 | d08 | i04);\
156 	out += ((nz >> 8) | nz) & n80;\
157 	out += c >> 8 & c01;\
158 	if ( !BYTE( nz ) )\
159 		out += z02;\
160 }
161 
162 #define SET_FLAGS( in ) \
163 {\
164 	flags = in & (v40 | d08 | i04);\
165 	c = nz = in << 8;\
166 	nz += ~in & z02;\
167 }
168 
169 {
170 	int const time_offset = 0;
171 
172 	// Local state
173 	Nes_Cpu::cpu_state_t s;
174 	#ifdef FLAT_MEM
175 		s.base = CPU.cpu_state_.base;
176 	#else
177 		s = CPU.cpu_state_;
178 	#endif
179 	CPU.cpu_state = &s;
180 	int s_time = CPU.cpu_state_.time; // helps even on x86
181 
182 	// Registers
183 	int pc = CPU.r.pc;
184 	int a  = CPU.r.a;
185 	int x  = CPU.r.x;
186 	int y  = CPU.r.y;
187 	int sp;
188 	SET_SP( CPU.r.sp );
189 
190 	// Flags
191 	int flags;
192 	int c;  // carry set if (c & 0x100) != 0
193 	int nz; // Z set if (nz & 0xFF) == 0, N set if (nz & 0x8080) != 0
194 	{
195 		int temp = CPU.r.flags;
196 		SET_FLAGS( temp );
197 	}
198 
199 loop:
200 
201 	// Check all values
202 	check( (unsigned) sp - 0x100 < 0x100 );
203 	check( (unsigned) pc < 0x10000 );
204 	check( (unsigned) a < 0x100 );
205 	check( (unsigned) x < 0x100 );
206 	check( (unsigned) y < 0x100 );
207 
208 	// Read instruction
209 	byte const* instr = CODE_PAGE( pc );
210 	int opcode;
211 
212 	if ( CODE_OFFSET(~0) == ~0 )
213 	{
214 		opcode = instr [pc];
215 		pc++;
216 		instr += pc;
217 	}
218 	else
219 	{
220 		instr += CODE_OFFSET( pc );
221 		opcode = *instr++;
222 		pc++;
223 	}
224 
225 	// local to function in case it helps optimizer
226 	static byte const clock_table [256] =
227 	{// 0 1 2 3 4 5 6 7 8 9 A B C D E F
228 		0,6,2,8,3,3,5,5,3,2,2,2,4,4,6,6,// 0
229 		2,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 1
230 		6,6,0,8,3,3,5,5,4,2,2,2,4,4,6,6,// 2
231 		2,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 3
232 		6,6,2,8,3,3,5,5,3,2,2,2,3,4,6,6,// 4
233 		2,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 5
234 		6,6,2,8,3,3,5,5,4,2,2,2,5,4,6,6,// 6
235 		2,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// 7
236 		2,6,2,6,3,3,3,3,2,2,2,2,4,4,4,4,// 8
237 		2,6,2,6,4,4,4,4,2,5,2,5,5,5,5,5,// 9
238 		2,6,2,6,3,3,3,3,2,2,2,2,4,4,4,4,// A
239 		2,5,2,5,4,4,4,4,2,4,2,4,4,4,4,4,// B
240 		2,6,2,8,3,3,5,5,2,2,2,2,4,4,6,6,// C
241 		2,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7,// D
242 		2,6,2,8,3,3,5,5,2,2,2,2,4,4,6,6,// E
243 		2,5,2,8,4,4,6,6,2,4,2,7,4,4,7,7 // F
244 	}; // 0x00 was 7 and 0x22 was 2
245 
246 	// Update time
247 	if ( s_time >= 0 )
248 		goto out_of_time;
249 
250 	#ifdef CPU_INSTR_HOOK
251 	{ CPU_INSTR_HOOK( (pc-1), (&instr [-1]), a, x, y, GET_SP(), TIME() ); }
252 	#endif
253 
254 	s_time += clock_table [opcode];
255 
256 	int data;
257 	data = *instr;
258 
259 	switch ( opcode )
260 	{
261 
262 // Macros
263 
264 #define GET_MSB()       (instr [1])
265 #define ADD_PAGE( out ) (pc++, out = data + 0x100 * GET_MSB())
266 #define GET_ADDR()      GET_LE16( instr )
267 
268 #define PAGE_PENALTY( lsb ) s_time += (lsb) >> 8;
269 
270 #define INC_DEC( reg, n ) reg = BYTE( nz = reg + n ); goto loop;
271 
272 #define IND_Y( cross, out ) {\
273 		int temp = READ_LOW( data ) + y;\
274 		out = temp + 0x100 * READ_LOW( BYTE( data + 1 ) );\
275 		cross( temp );\
276 	}
277 
278 #define IND_X( out ) {\
279 		int temp = data + x;\
280 		out = 0x100 * READ_LOW( BYTE( temp + 1 ) ) + READ_LOW( BYTE( temp ) );\
281 	}
282 
283 #define ARITH_ADDR_MODES( op )\
284 case op - 0x04: /* (ind,x) */\
285 	IND_X( data )\
286 	goto ptr##op;\
287 case op + 0x0C: /* (ind),y */\
288 	IND_Y( PAGE_PENALTY, data )\
289 	goto ptr##op;\
290 case op + 0x10: /* zp,X */\
291 	data = BYTE( data + x );\
292 case op + 0x00: /* zp */\
293 	data = READ_LOW( data );\
294 	goto imm##op;\
295 case op + 0x14: /* abs,Y */\
296 	data += y;\
297 	goto ind##op;\
298 case op + 0x18: /* abs,X */\
299 	data += x;\
300 ind##op:\
301 	PAGE_PENALTY( data );\
302 case op + 0x08: /* abs */\
303 	ADD_PAGE( data );\
304 ptr##op:\
305 	FLUSH_TIME();\
306 	data = READ_MEM( data );\
307 	CACHE_TIME();\
308 case op + 0x04: /* imm */\
309 imm##op:
310 
311 // TODO: more efficient way to handle negative branch that wraps PC around
312 #define BRANCH( cond )\
313 {\
314 	++pc;\
315 	if ( !(cond) ) goto loop;\
316 	s_time++;\
317 	int offset = SBYTE( data );\
318 	s_time += (BYTE(pc) + offset) >> 8 & 1;\
319 	pc = WORD( pc + offset );\
320 	goto loop;\
321 }
322 
323 // Often-Used
324 
325 	case 0xB5: // LDA zp,x
326 		a = nz = READ_LOW( BYTE( data + x ) );
327 		pc++;
328 		goto loop;
329 
330 	case 0xA5: // LDA zp
331 		a = nz = READ_LOW( data );
332 		pc++;
333 		goto loop;
334 
335 	case 0xD0: // BNE
336 		BRANCH( BYTE( nz ) );
337 
338 	case 0x20: { // JSR
339 		int temp = pc + 1;
340 		pc = GET_ADDR();
341 		WRITE_STACK( SP( -1 ), temp >> 8 );
342 		sp = SP( -2 );
343 		WRITE_STACK( sp, temp );
344 		goto loop;
345 	}
346 
347 	case 0x4C: // JMP abs
348 		pc = GET_ADDR();
349 		goto loop;
350 
351 	case 0xE8: // INX
352 		INC_DEC( x, 1 )
353 
354 	case 0x10: // BPL
355 		BRANCH( !IS_NEG )
356 
357 	ARITH_ADDR_MODES( 0xC5 ) // CMP
358 		nz = a - data;
359 		pc++;
360 		c = ~nz;
361 		nz &= 0xFF;
362 		goto loop;
363 
364 	case 0x30: // BMI
365 		BRANCH( IS_NEG )
366 
367 	case 0xF0: // BEQ
368 		BRANCH( !BYTE( nz ) );
369 
370 	case 0x95: // STA zp,x
371 		data = BYTE( data + x );
372 	case 0x85: // STA zp
373 		pc++;
374 		WRITE_LOW( data, a );
375 		goto loop;
376 
377 	case 0xC8: // INY
378 		INC_DEC( y, 1 )
379 
380 	case 0xA8: // TAY
381 		y  = a;
382 		nz = a;
383 		goto loop;
384 
385 	case 0x98: // TYA
386 		a  = y;
387 		nz = y;
388 		goto loop;
389 
390 	case 0xAD:{// LDA abs
391 		int addr = GET_ADDR();
392 		pc += 2;
393 		READ_PPU( addr, a = nz );
394 		goto loop;
395 	}
396 
397 	case 0x60: // RTS
398 		pc = 1 + READ_STACK( sp );
399 		pc += 0x100 * READ_STACK( SP( 1 ) );
400 		sp = SP( 2 );
401 		goto loop;
402 
403 	{
404 		int addr;
405 
406 	case 0x8D: // STA abs
407 		addr = GET_ADDR();
408 		pc += 2;
409 		if ( CAN_WRITE_FAST( addr ) )
410 		{
411 			WRITE_FAST( addr, a );
412 			goto loop;
413 		}
414 	sta_ptr:
415 		FLUSH_TIME();
416 		WRITE_MEM( addr, a );
417 		CACHE_TIME();
418 		goto loop;
419 
420 	case 0x99: // STA abs,Y
421 		addr = y + GET_ADDR();
422 		pc += 2;
423 		if ( CAN_WRITE_FAST( addr ) )
424 		{
425 			WRITE_FAST( addr, a );
426 			goto loop;
427 		}
428 		goto sta_abs_x;
429 
430 	case 0x9D: // STA abs,X (slightly more common than STA abs)
431 		addr = x + GET_ADDR();
432 		pc += 2;
433 		if ( CAN_WRITE_FAST( addr ) )
434 		{
435 			WRITE_FAST( addr, a );
436 			goto loop;
437 		}
438 		DUMMY_READ( addr, x );
439 	sta_abs_x:
440 		FLUSH_TIME();
441 		WRITE_MEM( addr, a );
442 		CACHE_TIME();
443 		goto loop;
444 
445 	case 0x91: // STA (ind),Y
446 		#define NO_PAGE_PENALTY( lsb )
447 		IND_Y( NO_PAGE_PENALTY, addr )
448 		pc++;
449 		DUMMY_READ( addr, y );
450 		goto sta_ptr;
451 
452 	case 0x81: // STA (ind,X)
453 		IND_X( addr )
454 		pc++;
455 		goto sta_ptr;
456 
457 	}
458 
459 	case 0xA9: // LDA #imm
460 		pc++;
461 		a  = data;
462 		nz = data;
463 		goto loop;
464 
465 	// common read instructions
466 	{
467 		int addr;
468 
469 	case 0xA1: // LDA (ind,X)
470 		IND_X( addr )
471 		pc++;
472 		goto a_nz_read_addr;
473 
474 	case 0xB1:// LDA (ind),Y
475 		addr = READ_LOW( data ) + y;
476 		PAGE_PENALTY( addr );
477 		addr += 0x100 * READ_LOW( BYTE( data + 1 ) );
478 		pc++;
479 		READ_FAST( addr, a = nz );
480 		if ( CAN_READ_FAST( addr ) )
481 			goto loop;
482 		DUMMY_READ( addr, y );
483 		goto a_nz_read_addr;
484 
485 	case 0xB9: // LDA abs,Y
486 		PAGE_PENALTY( data + y );
487 		addr = GET_ADDR() + y;
488 		pc += 2;
489 		READ_FAST( addr, a = nz );
490 		if ( CAN_READ_FAST( addr ) )
491 			goto loop;
492 		goto a_nz_read_addr;
493 
494 	case 0xBD: // LDA abs,X
495 		PAGE_PENALTY( data + x );
496 		addr = GET_ADDR() + x;
497 		pc += 2;
498 		READ_FAST( addr, a = nz );
499 		if ( CAN_READ_FAST( addr ) )
500 			goto loop;
501 		DUMMY_READ( addr, x );
502 	a_nz_read_addr:
503 		FLUSH_TIME();
504 		a = nz = READ_MEM( addr );
505 		CACHE_TIME();
506 		goto loop;
507 
508 	}
509 
510 // Branch
511 
512 	case 0x50: // BVC
513 		BRANCH( !(flags & v40) )
514 
515 	case 0x70: // BVS
516 		BRANCH( flags & v40 )
517 
518 	case 0xB0: // BCS
519 		BRANCH( c & 0x100 )
520 
521 	case 0x90: // BCC
522 		BRANCH( !(c & 0x100) )
523 
524 // Load/store
525 
526 	case 0x94: // STY zp,x
527 		data = BYTE( data + x );
528 	case 0x84: // STY zp
529 		pc++;
530 		WRITE_LOW( data, y );
531 		goto loop;
532 
533 	case 0x96: // STX zp,y
534 		data = BYTE( data + y );
535 	case 0x86: // STX zp
536 		pc++;
537 		WRITE_LOW( data, x );
538 		goto loop;
539 
540 	case 0xB6: // LDX zp,y
541 		data = BYTE( data + y );
542 	case 0xA6: // LDX zp
543 		data = READ_LOW( data );
544 	case 0xA2: // LDX #imm
545 		pc++;
546 		x = data;
547 		nz = data;
548 		goto loop;
549 
550 	case 0xB4: // LDY zp,x
551 		data = BYTE( data + x );
552 	case 0xA4: // LDY zp
553 		data = READ_LOW( data );
554 	case 0xA0: // LDY #imm
555 		pc++;
556 		y = data;
557 		nz = data;
558 		goto loop;
559 
560 	case 0xBC: // LDY abs,X
561 		data += x;
562 		PAGE_PENALTY( data );
563 	case 0xAC:{// LDY abs
564 		int addr = data + 0x100 * GET_MSB();
565 		pc += 2;
566 		FLUSH_TIME();
567 		y = nz = READ_MEM( addr );
568 		CACHE_TIME();
569 		goto loop;
570 	}
571 
572 	case 0xBE: // LDX abs,y
573 		data += y;
574 		PAGE_PENALTY( data );
575 	case 0xAE:{// LDX abs
576 		int addr = data + 0x100 * GET_MSB();
577 		pc += 2;
578 		FLUSH_TIME();
579 		x = nz = READ_MEM( addr );
580 		CACHE_TIME();
581 		goto loop;
582 	}
583 
584 	{
585 		int temp;
586 	case 0x8C: // STY abs
587 		temp = y;
588 		goto store_abs;
589 
590 	case 0x8E: // STX abs
591 		temp = x;
592 	store_abs:
593 		int addr = GET_ADDR();
594 		pc += 2;
595 		if ( CAN_WRITE_FAST( addr ) )
596 		{
597 			WRITE_FAST( addr, temp );
598 			goto loop;
599 		}
600 		FLUSH_TIME();
601 		WRITE_MEM( addr, temp );
602 		CACHE_TIME();
603 		goto loop;
604 	}
605 
606 // Compare
607 
608 	case 0xEC:{// CPX abs
609 		int addr = GET_ADDR();
610 		pc++;
611 		FLUSH_TIME();
612 		data = READ_MEM( addr );
613 		CACHE_TIME();
614 		goto cpx_data;
615 	}
616 
617 	case 0xE4: // CPX zp
618 		data = READ_LOW( data );
619 	case 0xE0: // CPX #imm
620 	cpx_data:
621 		nz = x - data;
622 		pc++;
623 		c = ~nz;
624 		nz &= 0xFF;
625 		goto loop;
626 
627 	case 0xCC:{// CPY abs
628 		int addr = GET_ADDR();
629 		pc++;
630 		FLUSH_TIME();
631 		data = READ_MEM( addr );
632 		CACHE_TIME();
633 		goto cpy_data;
634 	}
635 
636 	case 0xC4: // CPY zp
637 		data = READ_LOW( data );
638 	case 0xC0: // CPY #imm
639 	cpy_data:
640 		nz = y - data;
641 		pc++;
642 		c = ~nz;
643 		nz &= 0xFF;
644 		goto loop;
645 
646 // Logical
647 
648 	ARITH_ADDR_MODES( 0x25 ) // AND
649 		nz = (a &= data);
650 		pc++;
651 		goto loop;
652 
653 	ARITH_ADDR_MODES( 0x45 ) // EOR
654 		nz = (a ^= data);
655 		pc++;
656 		goto loop;
657 
658 	ARITH_ADDR_MODES( 0x05 ) // ORA
659 		nz = (a |= data);
660 		pc++;
661 		goto loop;
662 
663 	case 0x2C:{// BIT abs
664 		int addr = GET_ADDR();
665 		pc += 2;
666 		READ_PPU( addr, nz );
667 		flags = (flags & ~v40) + (nz & v40);
668 		if ( a & nz )
669 			goto loop;
670 		nz <<= 8; // result must be zero, even if N bit is set
671 		goto loop;
672 	}
673 
674 	case 0x24: // BIT zp
675 		nz = READ_LOW( data );
676 		pc++;
677 		flags = (flags & ~v40) + (nz & v40);
678 		if ( a & nz )
679 			goto loop; // Z should be clear, and nz must be non-zero if nz & a is
680 		nz <<= 8; // set Z flag without affecting N flag
681 		goto loop;
682 
683 // Add/subtract
684 
685 	ARITH_ADDR_MODES( 0xE5 ) // SBC
686 	case 0xEB: // unofficial equivalent
687 		data ^= 0xFF;
688 		goto adc_imm;
689 
690 	ARITH_ADDR_MODES( 0x65 ) // ADC
691 	adc_imm: {
692 		int carry = c >> 8 & 1;
693 		int ov = (a ^ 0x80) + carry + SBYTE( data );
694 		flags = (flags & ~v40) + (ov >> 2 & v40);
695 		c = nz = a + data + carry;
696 		pc++;
697 		a = BYTE( nz );
698 		goto loop;
699 	}
700 
701 // Shift/rotate
702 
703 	case 0x4A: // LSR A
704 		c = 0;
705 	case 0x6A: // ROR A
706 		nz = c >> 1 & 0x80;
707 		c = a << 8;
708 		nz += a >> 1;
709 		a = nz;
710 		goto loop;
711 
712 	case 0x0A: // ASL A
713 		nz = a << 1;
714 		c = nz;
715 		a = BYTE( nz );
716 		goto loop;
717 
718 	case 0x2A: { // ROL A
719 		nz = a << 1;
720 		int temp = c >> 8 & 1;
721 		c = nz;
722 		nz += temp;
723 		a = BYTE( nz );
724 		goto loop;
725 	}
726 
727 	case 0x5E: // LSR abs,X
728 		data += x;
729 	case 0x4E: // LSR abs
730 		c = 0;
731 	case 0x6E: // ROR abs
732 	ror_abs: {
733 		ADD_PAGE( data );
734 		FLUSH_TIME();
735 		int temp = READ_MEM( data );
736 		nz = (c >> 1 & 0x80) + (temp >> 1);
737 		c = temp << 8;
738 		goto rotate_common;
739 	}
740 
741 	case 0x3E: // ROL abs,X
742 		data += x;
743 		goto rol_abs;
744 
745 	case 0x1E: // ASL abs,X
746 		data += x;
747 	case 0x0E: // ASL abs
748 		c = 0;
749 	case 0x2E: // ROL abs
750 	rol_abs:
751 		ADD_PAGE( data );
752 		nz = c >> 8 & 1;
753 		FLUSH_TIME();
754 		nz += (c = READ_MEM( data ) << 1);
755 	rotate_common:
756 		pc++;
757 		WRITE_MEM( data, BYTE( nz ) );
758 		CACHE_TIME();
759 		goto loop;
760 
761 	case 0x7E: // ROR abs,X
762 		data += x;
763 		goto ror_abs;
764 
765 	case 0x76: // ROR zp,x
766 		data = BYTE( data + x );
767 		goto ror_zp;
768 
769 	case 0x56: // LSR zp,x
770 		data = BYTE( data + x );
771 	case 0x46: // LSR zp
772 		c = 0;
773 	case 0x66: // ROR zp
774 	ror_zp: {
775 		int temp = READ_LOW( data );
776 		nz = (c >> 1 & 0x80) + (temp >> 1);
777 		c = temp << 8;
778 		goto write_nz_zp;
779 	}
780 
781 	case 0x36: // ROL zp,x
782 		data = BYTE( data + x );
783 		goto rol_zp;
784 
785 	case 0x16: // ASL zp,x
786 		data = BYTE( data + x );
787 	case 0x06: // ASL zp
788 		c = 0;
789 	case 0x26: // ROL zp
790 	rol_zp:
791 		nz = c >> 8 & 1;
792 		nz += (c = READ_LOW( data ) << 1);
793 		goto write_nz_zp;
794 
795 // Increment/decrement
796 
797 	case 0xCA: // DEX
798 		INC_DEC( x, -1 )
799 
800 	case 0x88: // DEY
801 		INC_DEC( y, -1 )
802 
803 	case 0xF6: // INC zp,x
804 		data = BYTE( data + x );
805 	case 0xE6: // INC zp
806 		nz = 1;
807 		goto add_nz_zp;
808 
809 	case 0xD6: // DEC zp,x
810 		data = BYTE( data + x );
811 	case 0xC6: // DEC zp
812 		nz = -1;
813 	add_nz_zp:
814 		nz += READ_LOW( data );
815 	write_nz_zp:
816 		pc++;
817 		WRITE_LOW( data, nz );
818 		goto loop;
819 
820 	case 0xFE: // INC abs,x
821 		data = x + GET_ADDR();
822 		goto inc_ptr;
823 
824 	case 0xEE: // INC abs
825 		data = GET_ADDR();
826 	inc_ptr:
827 		nz = 1;
828 		goto inc_common;
829 
830 	case 0xDE: // DEC abs,x
831 		data = x + GET_ADDR();
832 		goto dec_ptr;
833 
834 	case 0xCE: // DEC abs
835 		data = GET_ADDR();
836 	dec_ptr:
837 		nz = -1;
838 	inc_common:
839 		FLUSH_TIME();
840 		pc += 2;
841 		nz += READ_MEM( data );
842 		WRITE_MEM( data, BYTE( nz ) );
843 		CACHE_TIME();
844 		goto loop;
845 
846 // Transfer
847 
848 	case 0xAA: // TAX
849 		x = nz = a;
850 		goto loop;
851 
852 	case 0x8A: // TXA
853 		a = nz = x;
854 		goto loop;
855 
856 	case 0x9A: // TXS
857 		SET_SP( x ); // verified (no flag change)
858 		goto loop;
859 
860 	case 0xBA: // TSX
861 		x = nz = GET_SP();
862 		goto loop;
863 
864 // Stack
865 
866 	case 0x48: // PHA
867 		sp = SP( -1 );
868 		WRITE_STACK( sp, a );
869 		goto loop;
870 
871 	case 0x68: // PLA
872 		a = nz = READ_STACK( sp );
873 		sp = SP( 1 );
874 		goto loop;
875 
876 	case 0x40:{// RTI
877 		pc  = READ_STACK( SP( 1 ) );
878 		pc += READ_STACK( SP( 2 ) ) * 0x100;
879 		int temp = READ_STACK( sp );
880 		sp = SP( 3 );
881 		data = flags;
882 		SET_FLAGS( temp );
883 		CPU.r.flags = flags; // update externally-visible I flag
884 		int delta = s.base - CPU.irq_time_;
885 		if ( delta <= 0 ) goto loop; // end_time < irq_time
886 		if ( flags & i04 ) goto loop;
887 		s_time += delta;
888 		s.base = CPU.irq_time_;
889 		goto loop;
890 	}
891 
892 	case 0x28:{// PLP
893 		int temp = READ_STACK( sp );
894 		sp = SP( 1 );
895 		int changed = flags ^ temp;
896 		SET_FLAGS( temp );
897 		if ( !(changed & i04) )
898 			goto loop; // I flag didn't change
899 		if ( flags & i04 )
900 			goto handle_sei;
901 		goto handle_cli;
902 	}
903 
904 	case 0x08:{// PHP
905 		int temp;
906 		GET_FLAGS( temp );
907 		sp = SP( -1 );
908 		WRITE_STACK( sp, temp | (b10 | r20) );
909 		goto loop;
910 	}
911 
912 	case 0x6C:{// JMP (ind)
913 		data = GET_ADDR();
914 		byte const* page = CODE_PAGE( data );
915 		pc = page [CODE_OFFSET( data )];
916 		data = (data & 0xFF00) + ((data + 1) & 0xFF);
917 		pc += page [CODE_OFFSET( data )] * 0x100;
918 		goto loop;
919 	}
920 
921 	case 0x00: // BRK
922 		goto handle_brk;
923 
924 // Flags
925 
926 	case 0x38: // SEC
927 		c = 0x100;
928 		goto loop;
929 
930 	case 0x18: // CLC
931 		c = 0;
932 		goto loop;
933 
934 	case 0xB8: // CLV
935 		flags &= ~v40;
936 		goto loop;
937 
938 	case 0xD8: // CLD
939 		flags &= ~d08;
940 		goto loop;
941 
942 	case 0xF8: // SED
943 		flags |= d08;
944 		goto loop;
945 
946 	case 0x58: // CLI
947 		if ( !(flags & i04) )
948 			goto loop;
949 		flags &= ~i04;
950 	handle_cli: {
951 		//dprintf( "CLI at %d\n", TIME );
952 		CPU.r.flags = flags; // update externally-visible I flag
953 		int delta = s.base - CPU.irq_time_;
954 		if ( delta <= 0 )
955 		{
956 			if ( TIME() < CPU.irq_time_ )
957 				goto loop;
958 			goto delayed_cli;
959 		}
960 		s.base = CPU.irq_time_;
961 		s_time += delta;
962 		if ( s_time < 0 )
963 			goto loop;
964 
965 		if ( delta >= s_time + 1 )
966 		{
967 			// delayed irq until after next instruction
968 			s.base += s_time + 1;
969 			s_time = -1;
970 			goto loop;
971 		}
972 
973 		// TODO: implement
974 	delayed_cli:
975 		dprintf( "Delayed CLI not emulated\n" );
976 		goto loop;
977 	}
978 
979 	case 0x78: // SEI
980 		if ( flags & i04 )
981 			goto loop;
982 		flags |= i04;
983 	handle_sei: {
984 		CPU.r.flags = flags; // update externally-visible I flag
985 		int delta = s.base - CPU.end_time_;
986 		s.base = CPU.end_time_;
987 		s_time += delta;
988 		if ( s_time < 0 )
989 			goto loop;
990 
991 		dprintf( "Delayed SEI not emulated\n" );
992 		goto loop;
993 	}
994 
995 // Unofficial
996 
997 	// SKW - skip word
998 	case 0x1C: case 0x3C: case 0x5C: case 0x7C: case 0xDC: case 0xFC:
999 		PAGE_PENALTY( data + x );
1000 	case 0x0C:
1001 		pc++;
1002 	// SKB - skip byte
1003 	case 0x74: case 0x04: case 0x14: case 0x34: case 0x44: case 0x54: case 0x64:
1004 	case 0x80: case 0x82: case 0x89: case 0xC2: case 0xD4: case 0xE2: case 0xF4:
1005 		pc++;
1006 		goto loop;
1007 
1008 	// NOP
1009 	case 0xEA: case 0x1A: case 0x3A: case 0x5A: case 0x7A: case 0xDA: case 0xFA:
1010 		goto loop;
1011 
1012 	case Nes_Cpu::halt_opcode: // HLT - halt processor
1013 		if ( pc-- > 0x10000 )
1014 		{
1015 			// handle wrap-around (assumes caller has put page of HLT at 0x10000)
1016 			pc = WORD( pc );
1017 			goto loop;
1018 		}
1019 	case 0x02: case 0x12:            case 0x32: case 0x42: case 0x52:
1020 	case 0x62: case 0x72: case 0x92: case 0xB2: case 0xD2: case 0xF2:
1021 		goto stop;
1022 
1023 // Unimplemented
1024 
1025 	case 0xFF:  // force 256-entry jump table for optimization purposes
1026 		c |= 1; // compiler doesn't know that this won't affect anything
1027 	default:
1028 		check( (unsigned) opcode < 0x100 );
1029 
1030 		#ifdef UNIMPL_INSTR
1031 			UNIMPL_INSTR();
1032 		#endif
1033 
1034 		// At least skip over proper number of bytes instruction uses
1035 		static unsigned char const illop_lens [8] = {
1036 			0x40, 0x40, 0x40, 0x80, 0x40, 0x40, 0x80, 0xA0
1037 		};
1038 		int opcode = instr [-1];
1039 		int len = illop_lens [opcode >> 2 & 7] >> (opcode << 1 & 6) & 3;
1040 		if ( opcode == 0x9C )
1041 			len = 2;
1042 		pc += len;
1043 		CPU.error_count_++;
1044 
1045 		// Account for extra clock
1046 		if ( (opcode >> 4) == 0x0B )
1047 		{
1048 			if ( opcode == 0xB3 )
1049 				data = READ_LOW( data );
1050 			if ( opcode != 0xB7 )
1051 				PAGE_PENALTY( data + y );
1052 		}
1053 		goto loop;
1054 	}
1055 	assert( false ); // catch missing 'goto loop' or accidental 'break'
1056 
1057 	int result_;
1058 handle_brk:
1059 	pc++;
1060 	result_ = b10 | 4;
1061 
1062 #ifdef CPU_DONE
1063 interrupt:
1064 #endif
1065 	{
1066 		s_time += 7;
1067 
1068 		// Save PC and read vector
1069 		WRITE_STACK( SP( -1 ), pc >> 8 );
1070 		WRITE_STACK( SP( -2 ), pc );
1071 		pc = GET_LE16( &READ_CODE( 0xFFFA ) + (result_ & 4) );
1072 
1073 		// Save flags
1074 		int temp;
1075 		GET_FLAGS( temp );
1076 		temp |= r20 + (result_ & b10); // B flag set for BRK
1077 		sp = SP( -3 );
1078 		WRITE_STACK( sp, temp );
1079 
1080 		// Update I flag in externally-visible flags
1081 		CPU.r.flags = (flags |= i04);
1082 
1083 		// Update time
1084 		int delta = s.base - CPU.end_time_;
1085 		if ( delta >= 0 )
1086 			goto loop;
1087 		s_time += delta;
1088 		s.base = CPU.end_time_;
1089 		goto loop;
1090 	}
1091 
1092 out_of_time:
1093 	pc--;
1094 
1095 	// Optional action that triggers interrupt or changes irq/end time
1096 	#ifdef CPU_DONE
1097 	{
1098 		CPU_DONE( result_ );
1099 		if ( result_ >= 0 )
1100 			goto interrupt;
1101 		if ( s_time < 0 )
1102 			goto loop;
1103 	}
1104 	#endif
1105 stop:
1106 
1107 	// Flush cached state
1108 	CPU.r.pc = pc;
1109 	CPU.r.sp = GET_SP();
1110 	CPU.r.a  = a;
1111 	CPU.r.x  = x;
1112 	CPU.r.y  = y;
1113 
1114 	int temp;
1115 	GET_FLAGS( temp );
1116 	CPU.r.flags = temp;
1117 
1118 	CPU.cpu_state_.base = s.base;
1119 	CPU.cpu_state_.time = s_time;
1120 	CPU.cpu_state = &CPU.cpu_state_;
1121 }
1122