1 // Game_Music_Emu 0.6.0. http://www.slack.net/~ant/
2 
3 /*
4 Last validated with zexall 2006.11.21 5:26 PM
5 * Doesn't implement the R register or immediate interrupt after EI.
6 * Address wrap-around isn't completely correct, but is prevented from crashing emulator.
7 */
8 
9 #include "Ay_Cpu.h"
10 
11 #include "blargg_endian.h"
12 #include <string.h>
13 
14 //#include "z80_cpu_log.h"
15 
16 /* Copyright (C) 2006 Shay Green. This module is free software; you
17 can redistribute it and/or modify it under the terms of the GNU Lesser
18 General Public License as published by the Free Software Foundation; either
19 version 2.1 of the License, or (at your option) any later version. This
20 module is distributed in the hope that it will be useful, but WITHOUT ANY
21 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
22 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
23 details. You should have received a copy of the GNU Lesser General Public
24 License along with this module; if not, write to the Free Software Foundation,
25 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
26 
27 #define SYNC_TIME()     (void) (s.time = s_time)
28 #define RELOAD_TIME()   (void) (s_time = s.time)
29 
30 // Callbacks to emulator
31 
32 #define CPU_OUT( cpu, addr, data, TIME )\
33 	ay_cpu_out( cpu, TIME, addr, data )
34 
35 #define CPU_IN( cpu, addr, TIME )\
36 	ay_cpu_in( cpu, addr )
37 
38 #include "blargg_source.h"
39 
40 // flags, named with hex value for clarity
41 int const S80 = 0x80;
42 int const Z40 = 0x40;
43 int const F20 = 0x20;
44 int const H10 = 0x10;
45 int const F08 = 0x08;
46 int const V04 = 0x04;
47 int const P04 = 0x04;
48 int const N02 = 0x02;
49 int const C01 = 0x01;
50 
51 #define SZ28P( n )  szpc [n]
52 #define SZ28PC( n ) szpc [n]
53 #define SZ28C( n )  (szpc [n] & ~P04)
54 #define SZ28( n )   SZ28C( n )
55 
56 #define SET_R( n )  (void) (r.r = n)
57 #define GET_R()     (r.r)
58 
Ay_Cpu()59 Ay_Cpu::Ay_Cpu()
60 {
61 	state = &state_;
62 	for ( int i = 0x100; --i >= 0; )
63 	{
64 		int even = 1;
65 		for ( int p = i; p; p >>= 1 )
66 			even ^= p;
67 		int n = (i & (S80 | F20 | F08)) | ((even & 1) * P04);
68 		szpc [i] = n;
69 		szpc [i + 0x100] = n | C01;
70 	}
71 	szpc [0x000] |= Z40;
72 	szpc [0x100] |= Z40;
73 }
74 
reset(void * m)75 void Ay_Cpu::reset( void* m )
76 {
77 	mem = (uint8_t*) m;
78 
79 	check( state == &state_ );
80 	state = &state_;
81 	state_.time = 0;
82 	state_.base = 0;
83 	end_time_   = 0;
84 
85 	memset( &r, 0, sizeof r );
86 }
87 
88 #define TIME                        (s_time + s.base)
89 #define READ_PROG( addr )           (mem [addr])
90 #define INSTR( offset )             READ_PROG( pc + (offset) )
91 #define GET_ADDR()                  GET_LE16( &READ_PROG( pc ) )
92 #define READ( addr )                READ_PROG( addr )
93 #define WRITE( addr, data )         (void) (READ_PROG( addr ) = data)
94 #define READ_WORD( addr )           GET_LE16( &READ_PROG( addr ) )
95 #define WRITE_WORD( addr, data )    SET_LE16( &READ_PROG( addr ), data )
96 #define IN( addr )                  CPU_IN( this, addr, TIME )
97 #define OUT( addr, data )           CPU_OUT( this, addr, data, TIME )
98 
99 #if BLARGG_BIG_ENDIAN
100 	#define R8( n, offset ) ((r8_ - offset) [n])
101 #elif BLARGG_LITTLE_ENDIAN
102 	#define R8( n, offset ) ((r8_ - offset) [(n) ^ 1])
103 #else
104 	#error "Byte order of CPU must be known"
105 #endif
106 
107 //#define R16( n, shift, offset )   (r16_ [((n) >> shift) - (offset >> shift)])
108 
109 // help compiler see that it can just adjust stack offset, saving an extra instruction
110 #define R16( n, shift, offset )\
111 	(*(uint16_t*) ((char*) r16_ - (offset >> (shift - 1)) + ((n) >> (shift - 1))))
112 
113 #define CASE5( a, b, c, d, e          ) case 0x##a:case 0x##b:case 0x##c:case 0x##d:case 0x##e
114 #define CASE6( a, b, c, d, e, f       ) CASE5( a, b, c, d, e       ): case 0x##f
115 #define CASE7( a, b, c, d, e, f, g    ) CASE6( a, b, c, d, e, f    ): case 0x##g
116 #define CASE8( a, b, c, d, e, f, g, h ) CASE7( a, b, c, d, e, f, g ): case 0x##h
117 
118 // high four bits are $ED time - 8, low four bits are $DD/$FD time - 8
119 static byte const ed_dd_timing [0x100] = {
120 //0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
121 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
122 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
123 0x00,0x06,0x0C,0x02,0x00,0x00,0x03,0x00,0x00,0x07,0x0C,0x02,0x00,0x00,0x03,0x00,
124 0x00,0x00,0x00,0x00,0x0F,0x0F,0x0B,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
125 0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,
126 0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,
127 0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0xA0,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0xA0,
128 0x4B,0x4B,0x7B,0xCB,0x0B,0x6B,0x00,0x0B,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x00,
129 0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,
130 0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,
131 0x80,0x80,0x80,0x80,0x00,0x00,0x0B,0x00,0x80,0x80,0x80,0x80,0x00,0x00,0x0B,0x00,
132 0xD0,0xD0,0xD0,0xD0,0x00,0x00,0x0B,0x00,0xD0,0xD0,0xD0,0xD0,0x00,0x00,0x0B,0x00,
133 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x00,0x00,0x00,
134 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
135 0x00,0x06,0x00,0x0F,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
136 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,
137 };
138 
139 // even on x86, using short and unsigned char was slower
140 typedef int         fint16;
141 typedef unsigned    fuint16;
142 typedef unsigned    fuint8;
143 
run(cpu_time_t end_time)144 bool Ay_Cpu::run( cpu_time_t end_time )
145 {
146 	set_end_time( end_time );
147 	state_t s = this->state_;
148 	this->state = &s;
149 	bool warning = false;
150 
151 	typedef BOOST::int8_t int8_t;
152 
153 	union {
154 		regs_t rg;
155 		pairs_t rp;
156 		uint8_t r8_ [8]; // indexed
157 		uint16_t r16_ [4];
158 	};
159 	rg = this->r.b;
160 
161 	cpu_time_t s_time = s.time;
162 	uint8_t* const mem = this->mem; // cache
163 	fuint16 pc = r.pc;
164 	fuint16 sp = r.sp;
165 	fuint16 ix = r.ix; // TODO: keep in memory for direct access?
166 	fuint16 iy = r.iy;
167 	int flags = r.b.flags;
168 
169 	goto loop;
170 jr_not_taken:
171 	s_time -= 5;
172 	goto loop;
173 call_not_taken:
174 	s_time -= 7;
175 jp_not_taken:
176 	pc += 2;
177 loop:
178 
179 	check( (unsigned long) pc < 0x10000 );
180 	check( (unsigned long) sp < 0x10000 );
181 	check( (unsigned) flags < 0x100 );
182 	check( (unsigned) ix < 0x10000 );
183 	check( (unsigned) iy < 0x10000 );
184 
185 	fuint8 opcode;
186 	opcode = READ_PROG( pc );
187 	pc++;
188 
189 	static byte const base_timing [0x100] = {
190 	//   0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
191 		 4,10, 7, 6, 4, 4, 7, 4, 4,11, 7, 6, 4, 4, 7, 4, // 0
192 		13,10, 7, 6, 4, 4, 7, 4,12,11, 7, 6, 4, 4, 7, 4, // 1
193 		12,10,16, 6, 4, 4, 7, 4,12,11,16, 6, 4, 4, 7, 4, // 2
194 		12,10,13, 6,11,11,10, 4,12,11,13, 6, 4, 4, 7, 4, // 3
195 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 4
196 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 5
197 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 6
198 		 7, 7, 7, 7, 7, 7, 4, 7, 4, 4, 4, 4, 4, 4, 7, 4, // 7
199 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 8
200 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 9
201 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // A
202 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // B
203 		11,10,10,10,17,11, 7,11,11,10,10, 8,17,17, 7,11, // C
204 		11,10,10,11,17,11, 7,11,11, 4,10,11,17, 8, 7,11, // D
205 		11,10,10,19,17,11, 7,11,11, 4,10, 4,17, 8, 7,11, // E
206 		11,10,10, 4,17,11, 7,11,11, 6,10, 4,17, 8, 7,11, // F
207 	};
208 
209 	fuint16 data;
210 	data = base_timing [opcode];
211 	if ( (s_time += data) >= 0 )
212 		goto possibly_out_of_time;
213 almost_out_of_time:
214 
215 	data = READ_PROG( pc );
216 
217 	#ifdef Z80_CPU_LOG_H
218 		//log_opcode( opcode, READ_PROG( pc ) );
219 		z80_log_regs( rg.a, rp.bc, rp.de, rp.hl, sp, ix, iy );
220 		z80_cpu_log( "new", pc - 1, opcode, READ_PROG( pc ),
221 				READ_PROG( pc + 1 ), READ_PROG( pc + 2 ) );
222 	#endif
223 
224 	switch ( opcode )
225 	{
226 possibly_out_of_time:
227 		if ( s_time < (int) data )
228 			goto almost_out_of_time;
229 		s_time -= data;
230 		goto out_of_time;
231 
232 // Common
233 
234 	case 0x00: // NOP
235 	CASE7( 40, 49, 52, 5B, 64, 6D, 7F ): // LD B,B etc.
236 		goto loop;
237 
238 	case 0x08:{// EX AF,AF'
239 		int temp = r.alt.b.a;
240 		r.alt.b.a = rg.a;
241 		rg.a = temp;
242 
243 		temp = r.alt.b.flags;
244 		r.alt.b.flags = flags;
245 		flags = temp;
246 		goto loop;
247 	}
248 
249 	case 0xD3: // OUT (imm),A
250 		pc++;
251 		OUT( data + rg.a * 0x100, rg.a );
252 		goto loop;
253 
254 	case 0x2E: // LD L,imm
255 		pc++;
256 		rg.l = data;
257 		goto loop;
258 
259 	case 0x3E: // LD A,imm
260 		pc++;
261 		rg.a = data;
262 		goto loop;
263 
264 	case 0x3A:{// LD A,(addr)
265 		fuint16 addr = GET_ADDR();
266 		pc += 2;
267 		rg.a = READ( addr );
268 		goto loop;
269 	}
270 
271 // Conditional
272 
273 #define ZERO    (flags & Z40)
274 #define CARRY   (flags & C01)
275 #define EVEN    (flags & P04)
276 #define MINUS   (flags & S80)
277 
278 // JR
279 #define JR( cond ) {\
280 	int disp = (BOOST::int8_t) data;\
281 	pc++;\
282 	if ( !(cond) )\
283 		goto jr_not_taken;\
284 	pc += disp;\
285 	goto loop;\
286 }
287 
288 	case 0x20: JR( !ZERO  ) // JR NZ,disp
289 	case 0x28: JR(  ZERO  ) // JR Z,disp
290 	case 0x30: JR( !CARRY ) // JR NC,disp
291 	case 0x38: JR(  CARRY ) // JR C,disp
292 	case 0x18: JR(  true  ) // JR disp
293 
294 	case 0x10:{// DJNZ disp
295 		int temp = rg.b - 1;
296 		rg.b = temp;
297 		JR( temp )
298 	}
299 
300 // JP
301 #define JP( cond )  if ( !(cond) ) goto jp_not_taken; pc = GET_ADDR(); goto loop;
302 
303 	case 0xC2: JP( !ZERO  ) // JP NZ,addr
304 	case 0xCA: JP(  ZERO  ) // JP Z,addr
305 	case 0xD2: JP( !CARRY ) // JP NC,addr
306 	case 0xDA: JP(  CARRY ) // JP C,addr
307 	case 0xE2: JP( !EVEN  ) // JP PO,addr
308 	case 0xEA: JP(  EVEN  ) // JP PE,addr
309 	case 0xF2: JP( !MINUS ) // JP P,addr
310 	case 0xFA: JP(  MINUS ) // JP M,addr
311 
312 	case 0xC3: // JP addr
313 		pc = GET_ADDR();
314 		goto loop;
315 
316 	case 0xE9: // JP HL
317 		pc = rp.hl;
318 		goto loop;
319 
320 // RET
321 #define RET( cond ) if ( cond ) goto ret_taken; s_time -= 6; goto loop;
322 
323 	case 0xC0: RET( !ZERO  ) // RET NZ
324 	case 0xC8: RET(  ZERO  ) // RET Z
325 	case 0xD0: RET( !CARRY ) // RET NC
326 	case 0xD8: RET(  CARRY ) // RET C
327 	case 0xE0: RET( !EVEN  ) // RET PO
328 	case 0xE8: RET(  EVEN  ) // RET PE
329 	case 0xF0: RET( !MINUS ) // RET P
330 	case 0xF8: RET(  MINUS ) // RET M
331 
332 	case 0xC9: // RET
333 	ret_taken:
334 		pc = READ_WORD( sp );
335 		sp = uint16_t (sp + 2);
336 		goto loop;
337 
338 // CALL
339 #define CALL( cond ) if ( cond ) goto call_taken; goto call_not_taken;
340 
341 	case 0xC4: CALL( !ZERO  ) // CALL NZ,addr
342 	case 0xCC: CALL(  ZERO  ) // CALL Z,addr
343 	case 0xD4: CALL( !CARRY ) // CALL NC,addr
344 	case 0xDC: CALL(  CARRY ) // CALL C,addr
345 	case 0xE4: CALL( !EVEN  ) // CALL PO,addr
346 	case 0xEC: CALL(  EVEN  ) // CALL PE,addr
347 	case 0xF4: CALL( !MINUS ) // CALL P,addr
348 	case 0xFC: CALL(  MINUS ) // CALL M,addr
349 
350 	case 0xCD:{// CALL addr
351 	call_taken:
352 		fuint16 addr = pc + 2;
353 		pc = GET_ADDR();
354 		sp = uint16_t (sp - 2);
355 		WRITE_WORD( sp, addr );
356 		goto loop;
357 	}
358 
359 	case 0xFF: // RST
360 		if ( (pc - 1) > 0xFFFF )
361 		{
362 			pc = uint16_t (pc - 1);
363 			s_time -= 11;
364 			goto loop;
365 		}
366 	CASE7( C7, CF, D7, DF, E7, EF, F7 ):
367 		data = pc;
368 		pc = opcode & 0x38;
369 		goto push_data;
370 
371 // PUSH/POP
372 	case 0xF5: // PUSH AF
373 		data = rg.a * 0x100u + flags;
374 		goto push_data;
375 
376 	case 0xC5: // PUSH BC
377 	case 0xD5: // PUSH DE
378 	case 0xE5: // PUSH HL
379 		data = R16( opcode, 4, 0xC5 );
380 	push_data:
381 		sp = uint16_t (sp - 2);
382 		WRITE_WORD( sp, data );
383 		goto loop;
384 
385 	case 0xF1: // POP AF
386 		flags = READ( sp );
387 		rg.a = READ( sp + 1 );
388 		sp = uint16_t (sp + 2);
389 		goto loop;
390 
391 	case 0xC1: // POP BC
392 	case 0xD1: // POP DE
393 	case 0xE1: // POP HL
394 		R16( opcode, 4, 0xC1 ) = READ_WORD( sp );
395 		sp = uint16_t (sp + 2);
396 		goto loop;
397 
398 // ADC/ADD/SBC/SUB
399 	case 0x96: // SUB (HL)
400 	case 0x86: // ADD (HL)
401 		flags &= ~C01;
402 	case 0x9E: // SBC (HL)
403 	case 0x8E: // ADC (HL)
404 		data = READ( rp.hl );
405 		goto adc_data;
406 
407 	case 0xD6: // SUB A,imm
408 	case 0xC6: // ADD imm
409 		flags &= ~C01;
410 	case 0xDE: // SBC A,imm
411 	case 0xCE: // ADC imm
412 		pc++;
413 		goto adc_data;
414 
415 	CASE7( 90, 91, 92, 93, 94, 95, 97 ): // SUB r
416 	CASE7( 80, 81, 82, 83, 84, 85, 87 ): // ADD r
417 		flags &= ~C01;
418 	CASE7( 98, 99, 9A, 9B, 9C, 9D, 9F ): // SBC r
419 	CASE7( 88, 89, 8A, 8B, 8C, 8D, 8F ): // ADC r
420 		data = R8( opcode & 7, 0 );
421 	adc_data: {
422 		int result = data + (flags & C01);
423 		data ^= rg.a;
424 		flags = opcode >> 3 & N02; // bit 4 is set in subtract opcodes
425 		if ( flags )
426 			result = -result;
427 		result += rg.a;
428 		data ^= result;
429 		flags |=(data & H10) |
430 				((data - -0x80) >> 6 & V04) |
431 				SZ28C( result & 0x1FF );
432 		rg.a = result;
433 		goto loop;
434 	}
435 
436 // CP
437 	case 0xBE: // CP (HL)
438 		data = READ( rp.hl );
439 		goto cp_data;
440 
441 	case 0xFE: // CP imm
442 		pc++;
443 		goto cp_data;
444 
445 	CASE7( B8, B9, BA, BB, BC, BD, BF ): // CP r
446 		data = R8( opcode, 0xB8 );
447 	cp_data: {
448 		int result = rg.a - data;
449 		flags = N02 | (data & (F20 | F08)) | (result >> 8 & C01);
450 		data ^= rg.a;
451 		flags |=(((result ^ rg.a) & data) >> 5 & V04) |
452 				(((data & H10) ^ result) & (S80 | H10));
453 		if ( (uint8_t) result )
454 			goto loop;
455 		flags |= Z40;
456 		goto loop;
457 	}
458 
459 // ADD HL,rp
460 
461 	case 0x39: // ADD HL,SP
462 		data = sp;
463 		goto add_hl_data;
464 
465 	case 0x09: // ADD HL,BC
466 	case 0x19: // ADD HL,DE
467 	case 0x29: // ADD HL,HL
468 		data = R16( opcode, 4, 0x09 );
469 	add_hl_data: {
470 		blargg_ulong sum = rp.hl + data;
471 		data ^= rp.hl;
472 		rp.hl = sum;
473 		flags = (flags & (S80 | Z40 | V04)) |
474 				(sum >> 16) |
475 				(sum >> 8 & (F20 | F08)) |
476 				((data ^ sum) >> 8 & H10);
477 		goto loop;
478 	}
479 
480 	case 0x27:{// DAA
481 		int a = rg.a;
482 		if ( a > 0x99 )
483 			flags |= C01;
484 
485 		int adjust = 0x60 & -(flags & C01);
486 
487 		if ( flags & H10 || (a & 0x0F) > 9 )
488 			adjust |= 0x06;
489 
490 		if ( flags & N02 )
491 			adjust = -adjust;
492 		a += adjust;
493 
494 		flags = (flags & (C01 | N02)) |
495 				((rg.a ^ a) & H10) |
496 				SZ28P( (uint8_t) a );
497 		rg.a = a;
498 		goto loop;
499 	}
500 	/*
501 	case 0x27:{// DAA
502 		// more optimized, but probably not worth the obscurity
503 		int f = (rg.a + (0xFF - 0x99)) >> 8 | flags; // (a > 0x99 ? C01 : 0) | flags
504 		int adjust = 0x60 & -(f & C01); // f & C01 ? 0x60 : 0
505 
506 		if ( (((rg.a + (0x0F - 9)) ^ rg.a) | f) & H10 ) // flags & H10 || (rg.a & 0x0F) > 9
507 			adjust |= 0x06;
508 
509 		if ( f & N02 )
510 			adjust = -adjust;
511 		int a = rg.a + adjust;
512 
513 		flags = (f & (N02 | C01)) | ((rg.a ^ a) & H10) | SZ28P( (uint8_t) a );
514 		rg.a = a;
515 		goto loop;
516 	}
517 	*/
518 
519 // INC/DEC
520 	case 0x34: // INC (HL)
521 		data = READ( rp.hl ) + 1;
522 		WRITE( rp.hl, data );
523 		goto inc_set_flags;
524 
525 	CASE7( 04, 0C, 14, 1C, 24, 2C, 3C ): // INC r
526 		data = ++R8( opcode >> 3, 0 );
527 	inc_set_flags:
528 		flags = (flags & C01) |
529 				(((data & 0x0F) - 1) & H10) |
530 				SZ28( (uint8_t) data );
531 		if ( data != 0x80 )
532 			goto loop;
533 		flags |= V04;
534 		goto loop;
535 
536 	case 0x35: // DEC (HL)
537 		data = READ( rp.hl ) - 1;
538 		WRITE( rp.hl, data );
539 		goto dec_set_flags;
540 
541 	CASE7( 05, 0D, 15, 1D, 25, 2D, 3D ): // DEC r
542 		data = --R8( opcode >> 3, 0 );
543 	dec_set_flags:
544 		flags = (flags & C01) | N02 |
545 				(((data & 0x0F) + 1) & H10) |
546 				SZ28( (uint8_t) data );
547 		if ( data != 0x7F )
548 			goto loop;
549 		flags |= V04;
550 		goto loop;
551 
552 	case 0x03: // INC BC
553 	case 0x13: // INC DE
554 	case 0x23: // INC HL
555 		R16( opcode, 4, 0x03 )++;
556 		goto loop;
557 
558 	case 0x33: // INC SP
559 		sp = uint16_t (sp + 1);
560 		goto loop;
561 
562 	case 0x0B: // DEC BC
563 	case 0x1B: // DEC DE
564 	case 0x2B: // DEC HL
565 		R16( opcode, 4, 0x0B )--;
566 		goto loop;
567 
568 	case 0x3B: // DEC SP
569 		sp = uint16_t (sp - 1);
570 		goto loop;
571 
572 // AND
573 	case 0xA6: // AND (HL)
574 		data = READ( rp.hl );
575 		goto and_data;
576 
577 	case 0xE6: // AND imm
578 		pc++;
579 		goto and_data;
580 
581 	CASE7( A0, A1, A2, A3, A4, A5, A7 ): // AND r
582 		data = R8( opcode, 0xA0 );
583 	and_data:
584 		rg.a &= data;
585 		flags = SZ28P( rg.a ) | H10;
586 		goto loop;
587 
588 // OR
589 	case 0xB6: // OR (HL)
590 		data = READ( rp.hl );
591 		goto or_data;
592 
593 	case 0xF6: // OR imm
594 		pc++;
595 		goto or_data;
596 
597 	CASE7( B0, B1, B2, B3, B4, B5, B7 ): // OR r
598 		data = R8( opcode, 0xB0 );
599 	or_data:
600 		rg.a |= data;
601 		flags = SZ28P( rg.a );
602 		goto loop;
603 
604 // XOR
605 	case 0xAE: // XOR (HL)
606 		data = READ( rp.hl );
607 		goto xor_data;
608 
609 	case 0xEE: // XOR imm
610 		pc++;
611 		goto xor_data;
612 
613 	CASE7( A8, A9, AA, AB, AC, AD, AF ): // XOR r
614 		data = R8( opcode, 0xA8 );
615 	xor_data:
616 		rg.a ^= data;
617 		flags = SZ28P( rg.a );
618 		goto loop;
619 
620 // LD
621 	CASE7( 70, 71, 72, 73, 74, 75, 77 ): // LD (HL),r
622 		WRITE( rp.hl, R8( opcode, 0x70 ) );
623 		goto loop;
624 
625 	CASE6( 41, 42, 43, 44, 45, 47 ): // LD B,r
626 	CASE6( 48, 4A, 4B, 4C, 4D, 4F ): // LD C,r
627 	CASE6( 50, 51, 53, 54, 55, 57 ): // LD D,r
628 	CASE6( 58, 59, 5A, 5C, 5D, 5F ): // LD E,r
629 	CASE6( 60, 61, 62, 63, 65, 67 ): // LD H,r
630 	CASE6( 68, 69, 6A, 6B, 6C, 6F ): // LD L,r
631 	CASE6( 78, 79, 7A, 7B, 7C, 7D ): // LD A,r
632 		R8( opcode >> 3 & 7, 0 ) = R8( opcode & 7, 0 );
633 		goto loop;
634 
635 	CASE5( 06, 0E, 16, 1E, 26 ): // LD r,imm
636 		R8( opcode >> 3, 0 ) = data;
637 		pc++;
638 		goto loop;
639 
640 	case 0x36: // LD (HL),imm
641 		pc++;
642 		WRITE( rp.hl, data );
643 		goto loop;
644 
645 	CASE7( 46, 4E, 56, 5E, 66, 6E, 7E ): // LD r,(HL)
646 		R8( opcode >> 3, 8 ) = READ( rp.hl );
647 		goto loop;
648 
649 	case 0x01: // LD rp,imm
650 	case 0x11:
651 	case 0x21:
652 		R16( opcode, 4, 0x01 ) = GET_ADDR();
653 		pc += 2;
654 		goto loop;
655 
656 	case 0x31: // LD sp,imm
657 		sp = GET_ADDR();
658 		pc += 2;
659 		goto loop;
660 
661 	case 0x2A:{// LD HL,(addr)
662 		fuint16 addr = GET_ADDR();
663 		pc += 2;
664 		rp.hl = READ_WORD( addr );
665 		goto loop;
666 	}
667 
668 	case 0x32:{// LD (addr),A
669 		fuint16 addr = GET_ADDR();
670 		pc += 2;
671 		WRITE( addr, rg.a );
672 		goto loop;
673 	}
674 
675 	case 0x22:{// LD (addr),HL
676 		fuint16 addr = GET_ADDR();
677 		pc += 2;
678 		WRITE_WORD( addr, rp.hl );
679 		goto loop;
680 	}
681 
682 	case 0x02: // LD (BC),A
683 	case 0x12: // LD (DE),A
684 		WRITE( R16( opcode, 4, 0x02 ), rg.a );
685 		goto loop;
686 
687 	case 0x0A: // LD A,(BC)
688 	case 0x1A: // LD A,(DE)
689 		rg.a = READ( R16( opcode, 4, 0x0A ) );
690 		goto loop;
691 
692 	case 0xF9: // LD SP,HL
693 		sp = rp.hl;
694 		goto loop;
695 
696 // Rotate
697 
698 	case 0x07:{// RLCA
699 		fuint16 temp = rg.a;
700 		temp = (temp << 1) | (temp >> 7);
701 		flags = (flags & (S80 | Z40 | P04)) |
702 				(temp & (F20 | F08 | C01));
703 		rg.a = temp;
704 		goto loop;
705 	}
706 
707 	case 0x0F:{// RRCA
708 		fuint16 temp = rg.a;
709 		flags = (flags & (S80 | Z40 | P04)) |
710 				(temp & C01);
711 		temp = (temp << 7) | (temp >> 1);
712 		flags |= temp & (F20 | F08);
713 		rg.a = temp;
714 		goto loop;
715 	}
716 
717 	case 0x17:{// RLA
718 		blargg_ulong temp = (rg.a << 1) | (flags & C01);
719 		flags = (flags & (S80 | Z40 | P04)) |
720 				(temp & (F20 | F08)) |
721 				(temp >> 8);
722 		rg.a = temp;
723 		goto loop;
724 	}
725 
726 	case 0x1F:{// RRA
727 		fuint16 temp = (flags << 7) | (rg.a >> 1);
728 		flags = (flags & (S80 | Z40 | P04)) |
729 				(temp & (F20 | F08)) |
730 				(rg.a & C01);
731 		rg.a = temp;
732 		goto loop;
733 	}
734 
735 // Misc
736 	case 0x2F:{// CPL
737 		fuint16 temp = ~rg.a;
738 		flags = (flags & (S80 | Z40 | P04 | C01)) |
739 				(temp & (F20 | F08)) |
740 				(H10 | N02);
741 		rg.a = temp;
742 		goto loop;
743 	}
744 
745 	case 0x3F:{// CCF
746 		flags = ((flags & (S80 | Z40 | P04 | C01)) ^ C01) |
747 				(flags << 4 & H10) |
748 				(rg.a & (F20 | F08));
749 		goto loop;
750 	}
751 
752 	case 0x37: // SCF
753 		flags = (flags & (S80 | Z40 | P04)) | C01 |
754 				(rg.a & (F20 | F08));
755 		goto loop;
756 
757 	case 0xDB: // IN A,(imm)
758 		pc++;
759 		rg.a = IN( data + rg.a * 0x100 );
760 		goto loop;
761 
762 	case 0xE3:{// EX (SP),HL
763 		fuint16 temp = READ_WORD( sp );
764 		WRITE_WORD( sp, rp.hl );
765 		rp.hl = temp;
766 		goto loop;
767 	}
768 
769 	case 0xEB:{// EX DE,HL
770 		fuint16 temp = rp.hl;
771 		rp.hl = rp.de;
772 		rp.de = temp;
773 		goto loop;
774 	}
775 
776 	case 0xD9:{// EXX DE,HL
777 		fuint16 temp = r.alt.w.bc;
778 		r.alt.w.bc = rp.bc;
779 		rp.bc = temp;
780 
781 		temp = r.alt.w.de;
782 		r.alt.w.de = rp.de;
783 		rp.de = temp;
784 
785 		temp = r.alt.w.hl;
786 		r.alt.w.hl = rp.hl;
787 		rp.hl = temp;
788 		goto loop;
789 	}
790 
791 	case 0xF3: // DI
792 		r.iff1 = 0;
793 		r.iff2 = 0;
794 		goto loop;
795 
796 	case 0xFB: // EI
797 		r.iff1 = 1;
798 		r.iff2 = 1;
799 		// TODO: delayed effect
800 		goto loop;
801 
802 	case 0x76: // HALT
803 		goto halt;
804 
805 //////////////////////////////////////// CB prefix
806 	{
807 	case 0xCB:
808 		unsigned data2;
809 		data2 = INSTR( 1 );
810 		(void) data2; // TODO is this the same as data in all cases?
811 		pc++;
812 		switch ( data )
813 		{
814 
815 	// Rotate left
816 
817 	#define RLC( read, write ) {\
818 		fuint8 result = read;\
819 		result = uint8_t (result << 1) | (result >> 7);\
820 		flags = SZ28P( result ) | (result & C01);\
821 		write;\
822 		goto loop;\
823 	}
824 
825 		case 0x06: // RLC (HL)
826 			s_time += 7;
827 			data = rp.hl;
828 		rlc_data_addr:
829 			RLC( READ( data ), WRITE( data, result ) )
830 
831 		CASE7( 00, 01, 02, 03, 04, 05, 07 ):{// RLC r
832 			uint8_t& reg = R8( data, 0 );
833 			RLC( reg, reg = result )
834 		}
835 
836 	#define RL( read, write ) {\
837 		fuint16 result = (read << 1) | (flags & C01);\
838 		flags = SZ28PC( result );\
839 		write;\
840 		goto loop;\
841 	}
842 
843 		case 0x16: // RL (HL)
844 			s_time += 7;
845 			data = rp.hl;
846 		rl_data_addr:
847 			RL( READ( data ), WRITE( data, result ) )
848 
849 		CASE7( 10, 11, 12, 13, 14, 15, 17 ):{// RL r
850 			uint8_t& reg = R8( data, 0x10 );
851 			RL( reg, reg = result )
852 		}
853 
854 	#define SLA( read, add, write ) {\
855 		fuint16 result = (read << 1) | add;\
856 		flags = SZ28PC( result );\
857 		write;\
858 		goto loop;\
859 	}
860 
861 		case 0x26: // SLA (HL)
862 			s_time += 7;
863 			data = rp.hl;
864 		sla_data_addr:
865 			SLA( READ( data ), 0, WRITE( data, result ) )
866 
867 		CASE7( 20, 21, 22, 23, 24, 25, 27 ):{// SLA r
868 			uint8_t& reg = R8( data, 0x20 );
869 			SLA( reg, 0, reg = result )
870 		}
871 
872 		case 0x36: // SLL (HL)
873 			s_time += 7;
874 			data = rp.hl;
875 		sll_data_addr:
876 			SLA( READ( data ), 1, WRITE( data, result ) )
877 
878 		CASE7( 30, 31, 32, 33, 34, 35, 37 ):{// SLL r
879 			uint8_t& reg = R8( data, 0x30 );
880 			SLA( reg, 1, reg = result )
881 		}
882 
883 	// Rotate right
884 
885 	#define RRC( read, write ) {\
886 		fuint8 result = read;\
887 		flags = result & C01;\
888 		result = uint8_t (result << 7) | (result >> 1);\
889 		flags |= SZ28P( result );\
890 		write;\
891 		goto loop;\
892 	}
893 
894 		case 0x0E: // RRC (HL)
895 			s_time += 7;
896 			data = rp.hl;
897 		rrc_data_addr:
898 			RRC( READ( data ), WRITE( data, result ) )
899 
900 		CASE7( 08, 09, 0A, 0B, 0C, 0D, 0F ):{// RRC r
901 			uint8_t& reg = R8( data, 0x08 );
902 			RRC( reg, reg = result )
903 		}
904 
905 	#define RR( read, write ) {\
906 		fuint8 result = read;\
907 		fuint8 temp = result & C01;\
908 		result = uint8_t (flags << 7) | (result >> 1);\
909 		flags = SZ28P( result ) | temp;\
910 		write;\
911 		goto loop;\
912 	}
913 
914 		case 0x1E: // RR (HL)
915 			s_time += 7;
916 			data = rp.hl;
917 		rr_data_addr:
918 			RR( READ( data ), WRITE( data, result ) )
919 
920 		CASE7( 18, 19, 1A, 1B, 1C, 1D, 1F ):{// RR r
921 			uint8_t& reg = R8( data, 0x18 );
922 			RR( reg, reg = result )
923 		}
924 
925 	#define SRA( read, write ) {\
926 		fuint8 result = read;\
927 		flags = result & C01;\
928 		result = (result & 0x80) | (result >> 1);\
929 		flags |= SZ28P( result );\
930 		write;\
931 		goto loop;\
932 	}
933 
934 		case 0x2E: // SRA (HL)
935 			data = rp.hl;
936 			s_time += 7;
937 		sra_data_addr:
938 			SRA( READ( data ), WRITE( data, result ) )
939 
940 		CASE7( 28, 29, 2A, 2B, 2C, 2D, 2F ):{// SRA r
941 			uint8_t& reg = R8( data, 0x28 );
942 			SRA( reg, reg = result )
943 		}
944 
945 	#define SRL( read, write ) {\
946 		fuint8 result = read;\
947 		flags = result & C01;\
948 		result >>= 1;\
949 		flags |= SZ28P( result );\
950 		write;\
951 		goto loop;\
952 	}
953 
954 		case 0x3E: // SRL (HL)
955 			s_time += 7;
956 			data = rp.hl;
957 		srl_data_addr:
958 			SRL( READ( data ), WRITE( data, result ) )
959 
960 		CASE7( 38, 39, 3A, 3B, 3C, 3D, 3F ):{// SRL r
961 			uint8_t& reg = R8( data, 0x38 );
962 			SRL( reg, reg = result )
963 		}
964 
965 	// BIT
966 		{
967 			unsigned temp;
968 		CASE8( 46, 4E, 56, 5E, 66, 6E, 76, 7E ): // BIT b,(HL)
969 			s_time += 4;
970 			temp = READ( rp.hl );
971 			flags &= C01;
972 			goto bit_temp;
973 		CASE7( 40, 41, 42, 43, 44, 45, 47 ): // BIT 0,r
974 		CASE7( 48, 49, 4A, 4B, 4C, 4D, 4F ): // BIT 1,r
975 		CASE7( 50, 51, 52, 53, 54, 55, 57 ): // BIT 2,r
976 		CASE7( 58, 59, 5A, 5B, 5C, 5D, 5F ): // BIT 3,r
977 		CASE7( 60, 61, 62, 63, 64, 65, 67 ): // BIT 4,r
978 		CASE7( 68, 69, 6A, 6B, 6C, 6D, 6F ): // BIT 5,r
979 		CASE7( 70, 71, 72, 73, 74, 75, 77 ): // BIT 6,r
980 		CASE7( 78, 79, 7A, 7B, 7C, 7D, 7F ): // BIT 7,r
981 			temp = R8( data & 7, 0 );
982 			flags = (flags & C01) | (temp & (F20 | F08));
983 		bit_temp:
984 			int masked = temp & 1 << (data >> 3 & 7);
985 			flags |=(masked & S80) | H10 |
986 					((masked - 1) >> 8 & (Z40 | P04));
987 			goto loop;
988 		}
989 
990 	// SET/RES
991 		CASE8( 86, 8E, 96, 9E, A6, AE, B6, BE ): // RES b,(HL)
992 		CASE8( C6, CE, D6, DE, E6, EE, F6, FE ):{// SET b,(HL)
993 			s_time += 7;
994 			int temp = READ( rp.hl );
995 			int bit = 1 << (data >> 3 & 7);
996 			temp |= bit; // SET
997 			if ( !(data & 0x40) )
998 				temp ^= bit; // RES
999 			WRITE( rp.hl, temp );
1000 			goto loop;
1001 		}
1002 
1003 		CASE7( C0, C1, C2, C3, C4, C5, C7 ): // SET 0,r
1004 		CASE7( C8, C9, CA, CB, CC, CD, CF ): // SET 1,r
1005 		CASE7( D0, D1, D2, D3, D4, D5, D7 ): // SET 2,r
1006 		CASE7( D8, D9, DA, DB, DC, DD, DF ): // SET 3,r
1007 		CASE7( E0, E1, E2, E3, E4, E5, E7 ): // SET 4,r
1008 		CASE7( E8, E9, EA, EB, EC, ED, EF ): // SET 5,r
1009 		CASE7( F0, F1, F2, F3, F4, F5, F7 ): // SET 6,r
1010 		CASE7( F8, F9, FA, FB, FC, FD, FF ): // SET 7,r
1011 			R8( data & 7, 0 ) |= 1 << (data >> 3 & 7);
1012 			goto loop;
1013 
1014 		CASE7( 80, 81, 82, 83, 84, 85, 87 ): // RES 0,r
1015 		CASE7( 88, 89, 8A, 8B, 8C, 8D, 8F ): // RES 1,r
1016 		CASE7( 90, 91, 92, 93, 94, 95, 97 ): // RES 2,r
1017 		CASE7( 98, 99, 9A, 9B, 9C, 9D, 9F ): // RES 3,r
1018 		CASE7( A0, A1, A2, A3, A4, A5, A7 ): // RES 4,r
1019 		CASE7( A8, A9, AA, AB, AC, AD, AF ): // RES 5,r
1020 		CASE7( B0, B1, B2, B3, B4, B5, B7 ): // RES 6,r
1021 		CASE7( B8, B9, BA, BB, BC, BD, BF ): // RES 7,r
1022 			R8( data & 7, 0 ) &= ~(1 << (data >> 3 & 7));
1023 			goto loop;
1024 		}
1025 		assert( false );
1026 	}
1027 
1028 //////////////////////////////////////// ED prefix
1029 	{
1030 	case 0xED:
1031 		pc++;
1032 		s_time += ed_dd_timing [data] >> 4;
1033 		switch ( data )
1034 		{
1035 		{
1036 			blargg_ulong temp;
1037 		case 0x72: // SBC HL,SP
1038 		case 0x7A: // ADC HL,SP
1039 			temp = sp;
1040 			if ( 0 )
1041 		case 0x42: // SBC HL,BC
1042 		case 0x52: // SBC HL,DE
1043 		case 0x62: // SBC HL,HL
1044 		case 0x4A: // ADC HL,BC
1045 		case 0x5A: // ADC HL,DE
1046 		case 0x6A: // ADC HL,HL
1047 				temp = R16( data >> 3 & 6, 1, 0 );
1048 			blargg_ulong sum = temp + (flags & C01);
1049 			flags = ~data >> 2 & N02;
1050 			if ( flags )
1051 				sum = -sum;
1052 			sum += rp.hl;
1053 			temp ^= rp.hl;
1054 			temp ^= sum;
1055 			flags |=(sum >> 16 & C01) |
1056 					(temp >> 8 & H10) |
1057 					(sum >> 8 & (S80 | F20 | F08)) |
1058 					((temp - -0x8000) >> 14 & V04);
1059 			rp.hl = sum;
1060 			if ( (uint16_t) sum )
1061 				goto loop;
1062 			flags |= Z40;
1063 			goto loop;
1064 		}
1065 
1066 		CASE8( 40, 48, 50, 58, 60, 68, 70, 78 ):{// IN r,(C)
1067 			int temp = IN( rp.bc );
1068 			R8( data >> 3, 8 ) = temp;
1069 			flags = (flags & C01) | SZ28P( temp );
1070 			goto loop;
1071 		}
1072 
1073 		case 0x71: // OUT (C),0
1074 			rg.flags = 0;
1075 		CASE7( 41, 49, 51, 59, 61, 69, 79 ): // OUT (C),r
1076 			OUT( rp.bc, R8( data >> 3, 8 ) );
1077 			goto loop;
1078 
1079 		{
1080 			unsigned temp;
1081 		case 0x73: // LD (ADDR),SP
1082 			temp = sp;
1083 			if ( 0 )
1084 		case 0x43: // LD (ADDR),BC
1085 		case 0x53: // LD (ADDR),DE
1086 				temp = R16( data, 4, 0x43 );
1087 			fuint16 addr = GET_ADDR();
1088 			pc += 2;
1089 			WRITE_WORD( addr, temp );
1090 			goto loop;
1091 		}
1092 
1093 		case 0x4B: // LD BC,(ADDR)
1094 		case 0x5B:{// LD DE,(ADDR)
1095 			fuint16 addr = GET_ADDR();
1096 			pc += 2;
1097 			R16( data, 4, 0x4B ) = READ_WORD( addr );
1098 			goto loop;
1099 		}
1100 
1101 		case 0x7B:{// LD SP,(ADDR)
1102 			fuint16 addr = GET_ADDR();
1103 			pc += 2;
1104 			sp = READ_WORD( addr );
1105 			goto loop;
1106 		}
1107 
1108 		case 0x67:{// RRD
1109 			fuint8 temp = READ( rp.hl );
1110 			WRITE( rp.hl, (rg.a << 4) | (temp >> 4) );
1111 			temp = (rg.a & 0xF0) | (temp & 0x0F);
1112 			flags = (flags & C01) | SZ28P( temp );
1113 			rg.a = temp;
1114 			goto loop;
1115 		}
1116 
1117 		case 0x6F:{// RLD
1118 			fuint8 temp = READ( rp.hl );
1119 			WRITE( rp.hl, (temp << 4) | (rg.a & 0x0F) );
1120 			temp = (rg.a & 0xF0) | (temp >> 4);
1121 			flags = (flags & C01) | SZ28P( temp );
1122 			rg.a = temp;
1123 			goto loop;
1124 		}
1125 
1126 		CASE8( 44, 4C, 54, 5C, 64, 6C, 74, 7C ): // NEG
1127 			opcode = 0x10; // flag to do SBC instead of ADC
1128 			flags &= ~C01;
1129 			data = rg.a;
1130 			rg.a = 0;
1131 			goto adc_data;
1132 
1133 		{
1134 			int inc;
1135 		case 0xA9: // CPD
1136 		case 0xB9: // CPDR
1137 			inc = -1;
1138 			if ( 0 )
1139 		case 0xA1: // CPI
1140 		case 0xB1: // CPIR
1141 				inc = +1;
1142 			fuint16 addr = rp.hl;
1143 			rp.hl = addr + inc;
1144 			int temp = READ( addr );
1145 
1146 			int result = rg.a - temp;
1147 			flags = (flags & C01) | N02 |
1148 					((((temp ^ rg.a) & H10) ^ result) & (S80 | H10));
1149 
1150 			if ( !(uint8_t) result ) flags |= Z40;
1151 			result -= (flags & H10) >> 4;
1152 			flags |= result & F08;
1153 			flags |= result << 4 & F20;
1154 			if ( !--rp.bc )
1155 				goto loop;
1156 
1157 			flags |= V04;
1158 			if ( flags & Z40 || data < 0xB0 )
1159 				goto loop;
1160 
1161 			pc -= 2;
1162 			s_time += 5;
1163 			goto loop;
1164 		}
1165 
1166 		{
1167 			int inc;
1168 		case 0xA8: // LDD
1169 		case 0xB8: // LDDR
1170 			inc = -1;
1171 			if ( 0 )
1172 		case 0xA0: // LDI
1173 		case 0xB0: // LDIR
1174 				inc = +1;
1175 			fuint16 addr = rp.hl;
1176 			rp.hl = addr + inc;
1177 			int temp = READ( addr );
1178 
1179 			addr = rp.de;
1180 			rp.de = addr + inc;
1181 			WRITE( addr, temp );
1182 
1183 			temp += rg.a;
1184 			flags = (flags & (S80 | Z40 | C01)) |
1185 					(temp & F08) | (temp << 4 & F20);
1186 			if ( !--rp.bc )
1187 				goto loop;
1188 
1189 			flags |= V04;
1190 			if ( data < 0xB0 )
1191 				goto loop;
1192 
1193 			pc -= 2;
1194 			s_time += 5;
1195 			goto loop;
1196 		}
1197 
1198 		{
1199 			int inc;
1200 		case 0xAB: // OUTD
1201 		case 0xBB: // OTDR
1202 			inc = -1;
1203 			if ( 0 )
1204 		case 0xA3: // OUTI
1205 		case 0xB3: // OTIR
1206 				inc = +1;
1207 			fuint16 addr = rp.hl;
1208 			rp.hl = addr + inc;
1209 			int temp = READ( addr );
1210 
1211 			int b = --rg.b;
1212 			flags = (temp >> 6 & N02) | SZ28( b );
1213 			if ( b && data >= 0xB0 )
1214 			{
1215 				pc -= 2;
1216 				s_time += 5;
1217 			}
1218 
1219 			OUT( rp.bc, temp );
1220 			goto loop;
1221 		}
1222 
1223 		{
1224 			int inc;
1225 		case 0xAA: // IND
1226 		case 0xBA: // INDR
1227 			inc = -1;
1228 			if ( 0 )
1229 		case 0xA2: // INI
1230 		case 0xB2: // INIR
1231 				inc = +1;
1232 
1233 			fuint16 addr = rp.hl;
1234 			rp.hl = addr + inc;
1235 
1236 			int temp = IN( rp.bc );
1237 
1238 			int b = --rg.b;
1239 			flags = (temp >> 6 & N02) | SZ28( b );
1240 			if ( b && data >= 0xB0 )
1241 			{
1242 				pc -= 2;
1243 				s_time += 5;
1244 			}
1245 
1246 			WRITE( addr, temp );
1247 			goto loop;
1248 		}
1249 
1250 		case 0x47: // LD I,A
1251 			r.i = rg.a;
1252 			goto loop;
1253 
1254 		case 0x4F: // LD R,A
1255 			SET_R( rg.a );
1256 			debug_printf( "LD R,A not supported\n" );
1257 			warning = true;
1258 			goto loop;
1259 
1260 		case 0x57: // LD A,I
1261 			rg.a = r.i;
1262 			goto ld_ai_common;
1263 
1264 		case 0x5F: // LD A,R
1265 			rg.a = GET_R();
1266 			debug_printf( "LD A,R not supported\n" );
1267 			warning = true;
1268 		ld_ai_common:
1269 			flags = (flags & C01) | SZ28( rg.a ) | (r.iff2 << 2 & V04);
1270 			goto loop;
1271 
1272 		CASE8( 45, 4D, 55, 5D, 65, 6D, 75, 7D ): // RETI/RETN
1273 			r.iff1 = r.iff2;
1274 			goto ret_taken;
1275 
1276 		case 0x46: case 0x4E: case 0x66: case 0x6E: // IM 0
1277 			r.im = 0;
1278 			goto loop;
1279 
1280 		case 0x56: case 0x76: // IM 1
1281 			r.im = 1;
1282 			goto loop;
1283 
1284 		case 0x5E: case 0x7E: // IM 2
1285 			r.im = 2;
1286 			goto loop;
1287 
1288 		default:
1289 			debug_printf( "Opcode $ED $%02X not supported\n", data );
1290 			warning = true;
1291 			goto loop;
1292 		}
1293 		assert( false );
1294 	}
1295 
1296 //////////////////////////////////////// DD/FD prefix
1297 	{
1298 	fuint16 ixy;
1299 	case 0xDD:
1300 		ixy = ix;
1301 		goto ix_prefix;
1302 	case 0xFD:
1303 		ixy = iy;
1304 	ix_prefix:
1305 		pc++;
1306 		unsigned data2 = READ_PROG( pc );
1307 		s_time += ed_dd_timing [data] & 0x0F;
1308 		switch ( data )
1309 		{
1310 	// TODO: more efficient way of avoid negative address
1311 	#define IXY_DISP( ixy, disp )   uint16_t ((ixy) + (disp))
1312 
1313 	#define SET_IXY( in ) if ( opcode == 0xDD ) ix = in; else iy = in;
1314 
1315 	// ADD/ADC/SUB/SBC
1316 
1317 		case 0x96: // SUB (IXY+disp)
1318 		case 0x86: // ADD (IXY+disp)
1319 			flags &= ~C01;
1320 		case 0x9E: // SBC (IXY+disp)
1321 		case 0x8E: // ADC (IXY+disp)
1322 			pc++;
1323 			opcode = data;
1324 			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
1325 			goto adc_data;
1326 
1327 		case 0x94: // SUB HXY
1328 		case 0x84: // ADD HXY
1329 			flags &= ~C01;
1330 		case 0x9C: // SBC HXY
1331 		case 0x8C: // ADC HXY
1332 			opcode = data;
1333 			data = ixy >> 8;
1334 			goto adc_data;
1335 
1336 		case 0x95: // SUB LXY
1337 		case 0x85: // ADD LXY
1338 			flags &= ~C01;
1339 		case 0x9D: // SBC LXY
1340 		case 0x8D: // ADC LXY
1341 			opcode = data;
1342 			data = (uint8_t) ixy;
1343 			goto adc_data;
1344 
1345 		{
1346 			unsigned temp;
1347 		case 0x39: // ADD IXY,SP
1348 			temp = sp;
1349 			goto add_ixy_data;
1350 
1351 		case 0x29: // ADD IXY,HL
1352 			temp = ixy;
1353 			goto add_ixy_data;
1354 
1355 		case 0x09: // ADD IXY,BC
1356 		case 0x19: // ADD IXY,DE
1357 			temp = R16( data, 4, 0x09 );
1358 		add_ixy_data: {
1359 			blargg_ulong sum = ixy + temp;
1360 			temp ^= ixy;
1361 			ixy = (uint16_t) sum;
1362 			flags = (flags & (S80 | Z40 | V04)) |
1363 					(sum >> 16) |
1364 					(sum >> 8 & (F20 | F08)) |
1365 					((temp ^ sum) >> 8 & H10);
1366 			goto set_ixy;
1367 		}
1368 		}
1369 
1370 	// AND
1371 		case 0xA6: // AND (IXY+disp)
1372 			pc++;
1373 			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
1374 			goto and_data;
1375 
1376 		case 0xA4: // AND HXY
1377 			data = ixy >> 8;
1378 			goto and_data;
1379 
1380 		case 0xA5: // AND LXY
1381 			data = (uint8_t) ixy;
1382 			goto and_data;
1383 
1384 	// OR
1385 		case 0xB6: // OR (IXY+disp)
1386 			pc++;
1387 			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
1388 			goto or_data;
1389 
1390 		case 0xB4: // OR HXY
1391 			data = ixy >> 8;
1392 			goto or_data;
1393 
1394 		case 0xB5: // OR LXY
1395 			data = (uint8_t) ixy;
1396 			goto or_data;
1397 
1398 	// XOR
1399 		case 0xAE: // XOR (IXY+disp)
1400 			pc++;
1401 			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
1402 			goto xor_data;
1403 
1404 		case 0xAC: // XOR HXY
1405 			data = ixy >> 8;
1406 			goto xor_data;
1407 
1408 		case 0xAD: // XOR LXY
1409 			data = (uint8_t) ixy;
1410 			goto xor_data;
1411 
1412 	// CP
1413 		case 0xBE: // CP (IXY+disp)
1414 			pc++;
1415 			data = READ( IXY_DISP( ixy, (int8_t) data2 )  );
1416 			goto cp_data;
1417 
1418 		case 0xBC: // CP HXY
1419 			data = ixy >> 8;
1420 			goto cp_data;
1421 
1422 		case 0xBD: // CP LXY
1423 			data = (uint8_t) ixy;
1424 			goto cp_data;
1425 
1426 	// LD
1427 		CASE7( 70, 71, 72, 73, 74, 75, 77 ): // LD (IXY+disp),r
1428 			data = R8( data, 0x70 );
1429 			if ( 0 )
1430 		case 0x36: // LD (IXY+disp),imm
1431 				pc++, data = READ_PROG( pc );
1432 			pc++;
1433 			WRITE( IXY_DISP( ixy, (int8_t) data2 ), data );
1434 			goto loop;
1435 
1436 		CASE5( 44, 4C, 54, 5C, 7C ): // LD r,HXY
1437 			R8( data >> 3, 8 ) = ixy >> 8;
1438 			goto loop;
1439 
1440 		case 0x64: // LD HXY,HXY
1441 		case 0x6D: // LD LXY,LXY
1442 			goto loop;
1443 
1444 		CASE5( 45, 4D, 55, 5D, 7D ): // LD r,LXY
1445 			R8( data >> 3, 8 ) = ixy;
1446 			goto loop;
1447 
1448 		CASE7( 46, 4E, 56, 5E, 66, 6E, 7E ): // LD r,(IXY+disp)
1449 			pc++;
1450 			R8( data >> 3, 8 ) = READ( IXY_DISP( ixy, (int8_t) data2 ) );
1451 			goto loop;
1452 
1453 		case 0x26: // LD HXY,imm
1454 			pc++;
1455 			goto ld_hxy_data;
1456 
1457 		case 0x65: // LD HXY,LXY
1458 			data2 = (uint8_t) ixy;
1459 			goto ld_hxy_data;
1460 
1461 		CASE5( 60, 61, 62, 63, 67 ): // LD HXY,r
1462 			data2 = R8( data, 0x60 );
1463 		ld_hxy_data:
1464 			ixy = (uint8_t) ixy | (data2 << 8);
1465 			goto set_ixy;
1466 
1467 		case 0x2E: // LD LXY,imm
1468 			pc++;
1469 			goto ld_lxy_data;
1470 
1471 		case 0x6C: // LD LXY,HXY
1472 			data2 = ixy >> 8;
1473 			goto ld_lxy_data;
1474 
1475 		CASE5( 68, 69, 6A, 6B, 6F ): // LD LXY,r
1476 			data2 = R8( data, 0x68 );
1477 		ld_lxy_data:
1478 			ixy = (ixy & 0xFF00) | data2;
1479 		set_ixy:
1480 			if ( opcode == 0xDD )
1481 			{
1482 				ix = ixy;
1483 				goto loop;
1484 			}
1485 			iy = ixy;
1486 			goto loop;
1487 
1488 		case 0xF9: // LD SP,IXY
1489 			sp = ixy;
1490 			goto loop;
1491 
1492 		case 0x22:{// LD (ADDR),IXY
1493 			fuint16 addr = GET_ADDR();
1494 			pc += 2;
1495 			WRITE_WORD( addr, ixy );
1496 			goto loop;
1497 		}
1498 
1499 		case 0x21: // LD IXY,imm
1500 			ixy = GET_ADDR();
1501 			pc += 2;
1502 			goto set_ixy;
1503 
1504 		case 0x2A:{// LD IXY,(addr)
1505 			fuint16 addr = GET_ADDR();
1506 			ixy = READ_WORD( addr );
1507 			pc += 2;
1508 			goto set_ixy;
1509 		}
1510 
1511 	// DD/FD CB prefix
1512 		case 0xCB: {
1513 			data = IXY_DISP( ixy, (int8_t) data2 );
1514 			pc++;
1515 			data2 = READ_PROG( pc );
1516 			pc++;
1517 			switch ( data2 )
1518 			{
1519 			case 0x06: goto rlc_data_addr; // RLC (IXY)
1520 			case 0x16: goto rl_data_addr;  // RL (IXY)
1521 			case 0x26: goto sla_data_addr; // SLA (IXY)
1522 			case 0x36: goto sll_data_addr; // SLL (IXY)
1523 			case 0x0E: goto rrc_data_addr; // RRC (IXY)
1524 			case 0x1E: goto rr_data_addr;  // RR (IXY)
1525 			case 0x2E: goto sra_data_addr; // SRA (IXY)
1526 			case 0x3E: goto srl_data_addr; // SRL (IXY)
1527 
1528 			CASE8( 46, 4E, 56, 5E, 66, 6E, 76, 7E ):{// BIT b,(IXY+disp)
1529 				fuint8 temp = READ( data );
1530 				int masked = temp & 1 << (data2 >> 3 & 7);
1531 				flags = (flags & C01) | H10 |
1532 						(masked & S80) |
1533 						((masked - 1) >> 8 & (Z40 | P04));
1534 				goto loop;
1535 			}
1536 
1537 			CASE8( 86, 8E, 96, 9E, A6, AE, B6, BE ): // RES b,(IXY+disp)
1538 			CASE8( C6, CE, D6, DE, E6, EE, F6, FE ):{// SET b,(IXY+disp)
1539 				int temp = READ( data );
1540 				int bit = 1 << (data2 >> 3 & 7);
1541 				temp |= bit; // SET
1542 				if ( !(data2 & 0x40) )
1543 					temp ^= bit; // RES
1544 				WRITE( data, temp );
1545 				goto loop;
1546 			}
1547 
1548 			default:
1549 				debug_printf( "Opcode $%02X $CB $%02X not supported\n", opcode, data2 );
1550 				warning = true;
1551 				goto loop;
1552 			}
1553 			assert( false );
1554 		}
1555 
1556 	// INC/DEC
1557 		case 0x23: // INC IXY
1558 			ixy = uint16_t (ixy + 1);
1559 			goto set_ixy;
1560 
1561 		case 0x2B: // DEC IXY
1562 			ixy = uint16_t (ixy - 1);
1563 			goto set_ixy;
1564 
1565 		case 0x34: // INC (IXY+disp)
1566 			ixy = IXY_DISP( ixy, (int8_t) data2 );
1567 			pc++;
1568 			data = READ( ixy ) + 1;
1569 			WRITE( ixy, data );
1570 			goto inc_set_flags;
1571 
1572 		case 0x35: // DEC (IXY+disp)
1573 			ixy = IXY_DISP( ixy, (int8_t) data2 );
1574 			pc++;
1575 			data = READ( ixy ) - 1;
1576 			WRITE( ixy, data );
1577 			goto dec_set_flags;
1578 
1579 		case 0x24: // INC HXY
1580 			ixy = uint16_t (ixy + 0x100);
1581 			data = ixy >> 8;
1582 			goto inc_xy_common;
1583 
1584 		case 0x2C: // INC LXY
1585 			data = uint8_t (ixy + 1);
1586 			ixy = (ixy & 0xFF00) | data;
1587 		inc_xy_common:
1588 			if ( opcode == 0xDD )
1589 			{
1590 				ix = ixy;
1591 				goto inc_set_flags;
1592 			}
1593 			iy = ixy;
1594 			goto inc_set_flags;
1595 
1596 		case 0x25: // DEC HXY
1597 			ixy = uint16_t (ixy - 0x100);
1598 			data = ixy >> 8;
1599 			goto dec_xy_common;
1600 
1601 		case 0x2D: // DEC LXY
1602 			data = uint8_t (ixy - 1);
1603 			ixy = (ixy & 0xFF00) | data;
1604 		dec_xy_common:
1605 			if ( opcode == 0xDD )
1606 			{
1607 				ix = ixy;
1608 				goto dec_set_flags;
1609 			}
1610 			iy = ixy;
1611 			goto dec_set_flags;
1612 
1613 	// PUSH/POP
1614 		case 0xE5: // PUSH IXY
1615 			data = ixy;
1616 			goto push_data;
1617 
1618 		case 0xE1:{// POP IXY
1619 			ixy = READ_WORD( sp );
1620 			sp = uint16_t (sp + 2);
1621 			goto set_ixy;
1622 		}
1623 
1624 	// Misc
1625 
1626 		case 0xE9: // JP (IXY)
1627 			pc = ixy;
1628 			goto loop;
1629 
1630 		case 0xE3:{// EX (SP),IXY
1631 			fuint16 temp = READ_WORD( sp );
1632 			WRITE_WORD( sp, ixy );
1633 			ixy = temp;
1634 			goto set_ixy;
1635 		}
1636 
1637 		default:
1638 			debug_printf( "Unnecessary DD/FD prefix encountered\n" );
1639 			warning = true;
1640 			pc--;
1641 			goto loop;
1642 		}
1643 		assert( false );
1644 	}
1645 
1646 	}
1647 	debug_printf( "Unhandled main opcode: $%02X\n", opcode );
1648 	assert( false );
1649 
1650 halt:
1651 	s_time &= 3; // increment by multiple of 4
1652 out_of_time:
1653 	pc--;
1654 
1655 	s.time   = s_time;
1656 	rg.flags = flags;
1657 	r.ix     = ix;
1658 	r.iy     = iy;
1659 	r.sp     = sp;
1660 	r.pc     = pc;
1661 	this->r.b = rg;
1662 	this->state_ = s;
1663 	this->state = &this->state_;
1664 
1665 	return warning;
1666 }
1667