1 // Game_Music_Emu 0.6.0. http://www.slack.net/~ant/
2 
3 /*
4 Last validated with zexall 2006.11.14 2:19 PM
5 * Doesn't implement the R register or immediate interrupt after EI.
6 * Address wrap-around isn't completely correct, but is prevented from crashing emulator.
7 */
8 
9 #include "Kss_Cpu.h"
10 
11 #include "blargg_endian.h"
12 #include <string.h>
13 
14 //#include "z80_cpu_log.h"
15 
16 /* Copyright (C) 2006 Shay Green. This module is free software; you
17 can redistribute it and/or modify it under the terms of the GNU Lesser
18 General Public License as published by the Free Software Foundation; either
19 version 2.1 of the License, or (at your option) any later version. This
20 module is distributed in the hope that it will be useful, but WITHOUT ANY
21 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
22 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
23 details. You should have received a copy of the GNU Lesser General Public
24 License along with this module; if not, write to the Free Software Foundation,
25 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
26 
27 #define SYNC_TIME()     (void) (s.time = s_time)
28 #define RELOAD_TIME()   (void) (s_time = s.time)
29 
30 // Callbacks to emulator
31 
32 #define CPU_OUT( cpu, addr, data, time )\
33 	kss_cpu_out( this, time, addr, data )
34 
35 #define CPU_IN( cpu, addr, time )\
36 	kss_cpu_in( this, time, addr )
37 
38 #define CPU_WRITE( cpu, addr, data, time )\
39 	(SYNC_TIME(), kss_cpu_write( this, addr, data ))
40 
41 #include "blargg_source.h"
42 
43 // flags, named with hex value for clarity
44 int const S80 = 0x80;
45 int const Z40 = 0x40;
46 int const F20 = 0x20;
47 int const H10 = 0x10;
48 int const F08 = 0x08;
49 int const V04 = 0x04;
50 int const P04 = 0x04;
51 int const N02 = 0x02;
52 int const C01 = 0x01;
53 
54 #define SZ28P( n )  szpc [n]
55 #define SZ28PC( n ) szpc [n]
56 #define SZ28C( n )  (szpc [n] & ~P04)
57 #define SZ28( n )   SZ28C( n )
58 
59 #define SET_R( n )  (void) (r.r = n)
60 #define GET_R()     (r.r)
61 
Kss_Cpu()62 Kss_Cpu::Kss_Cpu()
63 {
64 	state = &state_;
65 
66 	for ( int i = 0x100; --i >= 0; )
67 	{
68 		int even = 1;
69 		for ( int p = i; p; p >>= 1 )
70 			even ^= p;
71 		int n = (i & (S80 | F20 | F08)) | ((even & 1) * P04);
72 		szpc [i] = n;
73 		szpc [i + 0x100] = n | C01;
74 	}
75 	szpc [0x000] |= Z40;
76 	szpc [0x100] |= Z40;
77 }
78 
set_page(int i,void * write,void const * read)79 inline void Kss_Cpu::set_page( int i, void* write, void const* read )
80 {
81 	blargg_long offset = KSS_CPU_PAGE_OFFSET( i * (blargg_long) page_size );
82 	state->write [i] = (byte      *) write - offset;
83 	state->read  [i] = (byte const*) read  - offset;
84 }
85 
reset(void * unmapped_write,void const * unmapped_read)86 void Kss_Cpu::reset( void* unmapped_write, void const* unmapped_read )
87 {
88 	check( state == &state_ );
89 	state = &state_;
90 	state_.time = 0;
91 	state_.base = 0;
92 	end_time_   = 0;
93 
94 	for ( int i = 0; i < page_count + 1; i++ )
95 		set_page( i, unmapped_write, unmapped_read );
96 
97 	memset( &r, 0, sizeof r );
98 }
99 
map_mem(unsigned addr,blargg_ulong size,void * write,void const * read)100 void Kss_Cpu::map_mem( unsigned addr, blargg_ulong size, void* write, void const* read )
101 {
102 	// address range must begin and end on page boundaries
103 	require( addr % page_size == 0 );
104 	require( size % page_size == 0 );
105 
106 	unsigned first_page = addr / page_size;
107 	for ( unsigned i = size / page_size; i--; )
108 	{
109 		blargg_long offset = i * (blargg_long) page_size;
110 		set_page( first_page + i, (byte*) write + offset, (byte const*) read + offset );
111 	}
112 }
113 
114 #define TIME                        (s_time + s.base)
115 #define RW_MEM( addr, rw )          (s.rw [(addr) >> page_shift] [KSS_CPU_PAGE_OFFSET( addr )])
116 #define READ_PROG( addr )           RW_MEM( addr, read )
117 #define READ( addr )                READ_PROG( addr )
118 //#define WRITE( addr, data )       (void) (RW_MEM( addr, write ) = data)
119 #define WRITE( addr, data )         CPU_WRITE( this, addr, data, TIME )
120 #define READ_WORD( addr )           GET_LE16( &READ( addr ) )
121 #define WRITE_WORD( addr, data )    SET_LE16( &RW_MEM( addr, write ), data )
122 #define IN( addr )                  CPU_IN( this, addr, TIME )
123 #define OUT( addr, data )           CPU_OUT( this, addr, data, TIME )
124 
125 #if BLARGG_BIG_ENDIAN
126 	#define R8( n, offset ) ((r8_ - offset) [n])
127 #elif BLARGG_LITTLE_ENDIAN
128 	#define R8( n, offset ) ((r8_ - offset) [(n) ^ 1])
129 #else
130 	#error "Byte order of CPU must be known"
131 #endif
132 
133 //#define R16( n, shift, offset )   (r16_ [((n) >> shift) - (offset >> shift)])
134 
135 // help compiler see that it can just adjust stack offset, saving an extra instruction
136 #define R16( n, shift, offset )\
137 	(*(uint16_t*) ((char*) r16_ - (offset >> (shift - 1)) + ((n) >> (shift - 1))))
138 
139 #define CASE5( a, b, c, d, e          ) case 0x##a:case 0x##b:case 0x##c:case 0x##d:case 0x##e
140 #define CASE6( a, b, c, d, e, f       ) CASE5( a, b, c, d, e       ): case 0x##f
141 #define CASE7( a, b, c, d, e, f, g    ) CASE6( a, b, c, d, e, f    ): case 0x##g
142 #define CASE8( a, b, c, d, e, f, g, h ) CASE7( a, b, c, d, e, f, g ): case 0x##h
143 
144 // high four bits are $ED time - 8, low four bits are $DD/$FD time - 8
145 static byte const ed_dd_timing [0x100] = {
146 //0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
147 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
148 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
149 0x00,0x06,0x0C,0x02,0x00,0x00,0x03,0x00,0x00,0x07,0x0C,0x02,0x00,0x00,0x03,0x00,
150 0x00,0x00,0x00,0x00,0x0F,0x0F,0x0B,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,
151 0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,
152 0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x10,
153 0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0xA0,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0xA0,
154 0x4B,0x4B,0x7B,0xCB,0x0B,0x6B,0x00,0x0B,0x40,0x40,0x70,0xC0,0x00,0x60,0x0B,0x00,
155 0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,
156 0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0B,0x00,
157 0x80,0x80,0x80,0x80,0x00,0x00,0x0B,0x00,0x80,0x80,0x80,0x80,0x00,0x00,0x0B,0x00,
158 0xD0,0xD0,0xD0,0xD0,0x00,0x00,0x0B,0x00,0xD0,0xD0,0xD0,0xD0,0x00,0x00,0x0B,0x00,
159 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0F,0x00,0x00,0x00,0x00,
160 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
161 0x00,0x06,0x00,0x0F,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
162 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,
163 };
164 
165 // even on x86, using short and unsigned char was slower
166 typedef int         fint16;
167 typedef unsigned    fuint16;
168 typedef unsigned    fuint8;
169 
run(cpu_time_t end_time)170 bool Kss_Cpu::run( cpu_time_t end_time )
171 {
172 	set_end_time( end_time );
173 	state_t s = this->state_;
174 	this->state = &s;
175 	bool warning = false;
176 
177 	typedef BOOST::int8_t int8_t;
178 
179 	union {
180 		regs_t rg;
181 		pairs_t rp;
182 		uint8_t r8_ [8]; // indexed
183 		uint16_t r16_ [4];
184 	};
185 	rg = this->r.b;
186 
187 	cpu_time_t s_time = s.time;
188 	fuint16 pc = r.pc;
189 	fuint16 sp = r.sp;
190 	fuint16 ix = r.ix; // TODO: keep in memory for direct access?
191 	fuint16 iy = r.iy;
192 	int flags = r.b.flags;
193 
194 	goto loop;
195 jr_not_taken:
196 	s_time -= 5;
197 	goto loop;
198 call_not_taken:
199 	s_time -= 7;
200 jp_not_taken:
201 	pc += 2;
202 loop:
203 
204 	check( (unsigned long) pc < 0x10000 );
205 	check( (unsigned long) sp < 0x10000 );
206 	check( (unsigned) flags < 0x100 );
207 	check( (unsigned) ix < 0x10000 );
208 	check( (unsigned) iy < 0x10000 );
209 
210 	uint8_t const* instr = s.read [pc >> page_shift];
211 #define GET_ADDR()  GET_LE16( instr )
212 
213 	fuint8 opcode;
214 
215 	// TODO: eliminate this special case
216 	#if BLARGG_NONPORTABLE
217 		opcode = instr [pc];
218 		pc++;
219 		instr += pc;
220 	#else
221 		instr += KSS_CPU_PAGE_OFFSET( pc );
222 		opcode = *instr++;
223 		pc++;
224 	#endif
225 
226 	static byte const base_timing [0x100] = {
227 	//   0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
228 		 4,10, 7, 6, 4, 4, 7, 4, 4,11, 7, 6, 4, 4, 7, 4, // 0
229 		13,10, 7, 6, 4, 4, 7, 4,12,11, 7, 6, 4, 4, 7, 4, // 1
230 		12,10,16, 6, 4, 4, 7, 4,12,11,16, 6, 4, 4, 7, 4, // 2
231 		12,10,13, 6,11,11,10, 4,12,11,13, 6, 4, 4, 7, 4, // 3
232 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 4
233 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 5
234 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 6
235 		 7, 7, 7, 7, 7, 7, 4, 7, 4, 4, 4, 4, 4, 4, 7, 4, // 7
236 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 8
237 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // 9
238 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // A
239 		 4, 4, 4, 4, 4, 4, 7, 4, 4, 4, 4, 4, 4, 4, 7, 4, // B
240 		11,10,10,10,17,11, 7,11,11,10,10, 8,17,17, 7,11, // C
241 		11,10,10,11,17,11, 7,11,11, 4,10,11,17, 8, 7,11, // D
242 		11,10,10,19,17,11, 7,11,11, 4,10, 4,17, 8, 7,11, // E
243 		11,10,10, 4,17,11, 7,11,11, 6,10, 4,17, 8, 7,11, // F
244 	};
245 
246 	fuint16 data;
247 	data = base_timing [opcode];
248 	if ( (s_time += data) >= 0 )
249 		goto possibly_out_of_time;
250 almost_out_of_time:
251 
252 	data = READ_PROG( pc );
253 
254 	#ifdef Z80_CPU_LOG_H
255 		//log_opcode( opcode, READ_PROG( pc ) );
256 		z80_log_regs( rg.a, rp.bc, rp.de, rp.hl, sp, ix, iy );
257 		z80_cpu_log( "new", pc - 1, opcode, READ_PROG( pc ),
258 				READ_PROG( pc + 1 ), READ_PROG( pc + 2 ) );
259 	#endif
260 
261 	switch ( opcode )
262 	{
263 possibly_out_of_time:
264 		if ( s_time < (int) data )
265 			goto almost_out_of_time;
266 		s_time -= data;
267 		goto out_of_time;
268 
269 // Common
270 
271 	case 0x00: // NOP
272 	CASE7( 40, 49, 52, 5B, 64, 6D, 7F ): // LD B,B etc.
273 		goto loop;
274 
275 	case 0x08:{// EX AF,AF'
276 		int temp = r.alt.b.a;
277 		r.alt.b.a = rg.a;
278 		rg.a = temp;
279 
280 		temp = r.alt.b.flags;
281 		r.alt.b.flags = flags;
282 		flags = temp;
283 		goto loop;
284 	}
285 
286 	case 0xD3: // OUT (imm),A
287 		pc++;
288 		OUT( data + rg.a * 0x100, rg.a );
289 		goto loop;
290 
291 	case 0x2E: // LD L,imm
292 		pc++;
293 		rg.l = data;
294 		goto loop;
295 
296 	case 0x3E: // LD A,imm
297 		pc++;
298 		rg.a = data;
299 		goto loop;
300 
301 	case 0x3A:{// LD A,(addr)
302 		fuint16 addr = GET_ADDR();
303 		pc += 2;
304 		rg.a = READ( addr );
305 		goto loop;
306 	}
307 
308 // Conditional
309 
310 #define ZERO    (flags & Z40)
311 #define CARRY   (flags & C01)
312 #define EVEN    (flags & P04)
313 #define MINUS   (flags & S80)
314 
315 // JR
316 // TODO: more efficient way to handle negative branch that wraps PC around
317 #define JR( cond ) {\
318 	int offset = (BOOST::int8_t) data;\
319 	pc++;\
320 	if ( !(cond) )\
321 		goto jr_not_taken;\
322 	pc = uint16_t (pc + offset);\
323 	goto loop;\
324 }
325 
326 	case 0x20: JR( !ZERO  ) // JR NZ,disp
327 	case 0x28: JR(  ZERO  ) // JR Z,disp
328 	case 0x30: JR( !CARRY ) // JR NC,disp
329 	case 0x38: JR(  CARRY ) // JR C,disp
330 	case 0x18: JR(  true  ) // JR disp
331 
332 	case 0x10:{// DJNZ disp
333 		int temp = rg.b - 1;
334 		rg.b = temp;
335 		JR( temp )
336 	}
337 
338 // JP
339 #define JP( cond )  if ( !(cond) ) goto jp_not_taken; pc = GET_ADDR(); goto loop;
340 
341 	case 0xC2: JP( !ZERO  ) // JP NZ,addr
342 	case 0xCA: JP(  ZERO  ) // JP Z,addr
343 	case 0xD2: JP( !CARRY ) // JP NC,addr
344 	case 0xDA: JP(  CARRY ) // JP C,addr
345 	case 0xE2: JP( !EVEN  ) // JP PO,addr
346 	case 0xEA: JP(  EVEN  ) // JP PE,addr
347 	case 0xF2: JP( !MINUS ) // JP P,addr
348 	case 0xFA: JP(  MINUS ) // JP M,addr
349 
350 	case 0xC3: // JP addr
351 		pc = GET_ADDR();
352 		goto loop;
353 
354 	case 0xE9: // JP HL
355 		pc = rp.hl;
356 		goto loop;
357 
358 // RET
359 #define RET( cond ) if ( cond ) goto ret_taken; s_time -= 6; goto loop;
360 
361 	case 0xC0: RET( !ZERO  ) // RET NZ
362 	case 0xC8: RET(  ZERO  ) // RET Z
363 	case 0xD0: RET( !CARRY ) // RET NC
364 	case 0xD8: RET(  CARRY ) // RET C
365 	case 0xE0: RET( !EVEN  ) // RET PO
366 	case 0xE8: RET(  EVEN  ) // RET PE
367 	case 0xF0: RET( !MINUS ) // RET P
368 	case 0xF8: RET(  MINUS ) // RET M
369 
370 	case 0xC9: // RET
371 	ret_taken:
372 		pc = READ_WORD( sp );
373 		sp = uint16_t (sp + 2);
374 		goto loop;
375 
376 // CALL
377 #define CALL( cond ) if ( cond ) goto call_taken; goto call_not_taken;
378 
379 	case 0xC4: CALL( !ZERO  ) // CALL NZ,addr
380 	case 0xCC: CALL(  ZERO  ) // CALL Z,addr
381 	case 0xD4: CALL( !CARRY ) // CALL NC,addr
382 	case 0xDC: CALL(  CARRY ) // CALL C,addr
383 	case 0xE4: CALL( !EVEN  ) // CALL PO,addr
384 	case 0xEC: CALL(  EVEN  ) // CALL PE,addr
385 	case 0xF4: CALL( !MINUS ) // CALL P,addr
386 	case 0xFC: CALL(  MINUS ) // CALL M,addr
387 
388 	case 0xCD:{// CALL addr
389 	call_taken:
390 		fuint16 addr = pc + 2;
391 		pc = GET_ADDR();
392 		sp = uint16_t (sp - 2);
393 		WRITE_WORD( sp, addr );
394 		goto loop;
395 	}
396 
397 	case 0xFF: // RST
398 		if ( pc > idle_addr )
399 			goto hit_idle_addr;
400 	CASE7( C7, CF, D7, DF, E7, EF, F7 ):
401 		data = pc;
402 		pc = opcode & 0x38;
403 		goto push_data;
404 
405 // PUSH/POP
406 	case 0xF5: // PUSH AF
407 		data = rg.a * 0x100u + flags;
408 		goto push_data;
409 
410 	case 0xC5: // PUSH BC
411 	case 0xD5: // PUSH DE
412 	case 0xE5: // PUSH HL
413 		data = R16( opcode, 4, 0xC5 );
414 	push_data:
415 		sp = uint16_t (sp - 2);
416 		WRITE_WORD( sp, data );
417 		goto loop;
418 
419 	case 0xF1: // POP AF
420 		flags = READ( sp );
421 		rg.a = READ( sp + 1 );
422 		sp = uint16_t (sp + 2);
423 		goto loop;
424 
425 	case 0xC1: // POP BC
426 	case 0xD1: // POP DE
427 	case 0xE1: // POP HL
428 		R16( opcode, 4, 0xC1 ) = READ_WORD( sp );
429 		sp = uint16_t (sp + 2);
430 		goto loop;
431 
432 // ADC/ADD/SBC/SUB
433 	case 0x96: // SUB (HL)
434 	case 0x86: // ADD (HL)
435 		flags &= ~C01;
436 	case 0x9E: // SBC (HL)
437 	case 0x8E: // ADC (HL)
438 		data = READ( rp.hl );
439 		goto adc_data;
440 
441 	case 0xD6: // SUB A,imm
442 	case 0xC6: // ADD imm
443 		flags &= ~C01;
444 	case 0xDE: // SBC A,imm
445 	case 0xCE: // ADC imm
446 		pc++;
447 		goto adc_data;
448 
449 	CASE7( 90, 91, 92, 93, 94, 95, 97 ): // SUB r
450 	CASE7( 80, 81, 82, 83, 84, 85, 87 ): // ADD r
451 		flags &= ~C01;
452 	CASE7( 98, 99, 9A, 9B, 9C, 9D, 9F ): // SBC r
453 	CASE7( 88, 89, 8A, 8B, 8C, 8D, 8F ): // ADC r
454 		data = R8( opcode & 7, 0 );
455 	adc_data: {
456 		int result = data + (flags & C01);
457 		data ^= rg.a;
458 		flags = opcode >> 3 & N02; // bit 4 is set in subtract opcodes
459 		if ( flags )
460 			result = -result;
461 		result += rg.a;
462 		data ^= result;
463 		flags |=(data & H10) |
464 				((data - -0x80) >> 6 & V04) |
465 				SZ28C( result & 0x1FF );
466 		rg.a = result;
467 		goto loop;
468 	}
469 
470 // CP
471 	case 0xBE: // CP (HL)
472 		data = READ( rp.hl );
473 		goto cp_data;
474 
475 	case 0xFE: // CP imm
476 		pc++;
477 		goto cp_data;
478 
479 	CASE7( B8, B9, BA, BB, BC, BD, BF ): // CP r
480 		data = R8( opcode, 0xB8 );
481 	cp_data: {
482 		int result = rg.a - data;
483 		flags = N02 | (data & (F20 | F08)) | (result >> 8 & C01);
484 		data ^= rg.a;
485 		flags |=(((result ^ rg.a) & data) >> 5 & V04) |
486 				(((data & H10) ^ result) & (S80 | H10));
487 		if ( (uint8_t) result )
488 			goto loop;
489 		flags |= Z40;
490 		goto loop;
491 	}
492 
493 // ADD HL,rp
494 
495 	case 0x39: // ADD HL,SP
496 		data = sp;
497 		goto add_hl_data;
498 
499 	case 0x09: // ADD HL,BC
500 	case 0x19: // ADD HL,DE
501 	case 0x29: // ADD HL,HL
502 		data = R16( opcode, 4, 0x09 );
503 	add_hl_data: {
504 		blargg_ulong sum = rp.hl + data;
505 		data ^= rp.hl;
506 		rp.hl = sum;
507 		flags = (flags & (S80 | Z40 | V04)) |
508 				(sum >> 16) |
509 				(sum >> 8 & (F20 | F08)) |
510 				((data ^ sum) >> 8 & H10);
511 		goto loop;
512 	}
513 
514 	case 0x27:{// DAA
515 		int a = rg.a;
516 		if ( a > 0x99 )
517 			flags |= C01;
518 
519 		int adjust = 0x60 & -(flags & C01);
520 
521 		if ( flags & H10 || (a & 0x0F) > 9 )
522 			adjust |= 0x06;
523 
524 		if ( flags & N02 )
525 			adjust = -adjust;
526 		a += adjust;
527 
528 		flags = (flags & (C01 | N02)) |
529 				((rg.a ^ a) & H10) |
530 				SZ28P( (uint8_t) a );
531 		rg.a = a;
532 		goto loop;
533 	}
534 	/*
535 	case 0x27:{// DAA
536 		// more optimized, but probably not worth the obscurity
537 		int f = (rg.a + (0xFF - 0x99)) >> 8 | flags; // (a > 0x99 ? C01 : 0) | flags
538 		int adjust = 0x60 & -(f & C01); // f & C01 ? 0x60 : 0
539 
540 		if ( (((rg.a + (0x0F - 9)) ^ rg.a) | f) & H10 ) // flags & H10 || (rg.a & 0x0F) > 9
541 			adjust |= 0x06;
542 
543 		if ( f & N02 )
544 			adjust = -adjust;
545 		int a = rg.a + adjust;
546 
547 		flags = (f & (N02 | C01)) | ((rg.a ^ a) & H10) | SZ28P( (uint8_t) a );
548 		rg.a = a;
549 		goto loop;
550 	}
551 	*/
552 
553 // INC/DEC
554 	case 0x34: // INC (HL)
555 		data = READ( rp.hl ) + 1;
556 		WRITE( rp.hl, data );
557 		goto inc_set_flags;
558 
559 	CASE7( 04, 0C, 14, 1C, 24, 2C, 3C ): // INC r
560 		data = ++R8( opcode >> 3, 0 );
561 	inc_set_flags:
562 		flags = (flags & C01) |
563 				(((data & 0x0F) - 1) & H10) |
564 				SZ28( (uint8_t) data );
565 		if ( data != 0x80 )
566 			goto loop;
567 		flags |= V04;
568 		goto loop;
569 
570 	case 0x35: // DEC (HL)
571 		data = READ( rp.hl ) - 1;
572 		WRITE( rp.hl, data );
573 		goto dec_set_flags;
574 
575 	CASE7( 05, 0D, 15, 1D, 25, 2D, 3D ): // DEC r
576 		data = --R8( opcode >> 3, 0 );
577 	dec_set_flags:
578 		flags = (flags & C01) | N02 |
579 				(((data & 0x0F) + 1) & H10) |
580 				SZ28( (uint8_t) data );
581 		if ( data != 0x7F )
582 			goto loop;
583 		flags |= V04;
584 		goto loop;
585 
586 	case 0x03: // INC BC
587 	case 0x13: // INC DE
588 	case 0x23: // INC HL
589 		R16( opcode, 4, 0x03 )++;
590 		goto loop;
591 
592 	case 0x33: // INC SP
593 		sp = uint16_t (sp + 1);
594 		goto loop;
595 
596 	case 0x0B: // DEC BC
597 	case 0x1B: // DEC DE
598 	case 0x2B: // DEC HL
599 		R16( opcode, 4, 0x0B )--;
600 		goto loop;
601 
602 	case 0x3B: // DEC SP
603 		sp = uint16_t (sp - 1);
604 		goto loop;
605 
606 // AND
607 	case 0xA6: // AND (HL)
608 		data = READ( rp.hl );
609 		goto and_data;
610 
611 	case 0xE6: // AND imm
612 		pc++;
613 		goto and_data;
614 
615 	CASE7( A0, A1, A2, A3, A4, A5, A7 ): // AND r
616 		data = R8( opcode, 0xA0 );
617 	and_data:
618 		rg.a &= data;
619 		flags = SZ28P( rg.a ) | H10;
620 		goto loop;
621 
622 // OR
623 	case 0xB6: // OR (HL)
624 		data = READ( rp.hl );
625 		goto or_data;
626 
627 	case 0xF6: // OR imm
628 		pc++;
629 		goto or_data;
630 
631 	CASE7( B0, B1, B2, B3, B4, B5, B7 ): // OR r
632 		data = R8( opcode, 0xB0 );
633 	or_data:
634 		rg.a |= data;
635 		flags = SZ28P( rg.a );
636 		goto loop;
637 
638 // XOR
639 	case 0xAE: // XOR (HL)
640 		data = READ( rp.hl );
641 		goto xor_data;
642 
643 	case 0xEE: // XOR imm
644 		pc++;
645 		goto xor_data;
646 
647 	CASE7( A8, A9, AA, AB, AC, AD, AF ): // XOR r
648 		data = R8( opcode, 0xA8 );
649 	xor_data:
650 		rg.a ^= data;
651 		flags = SZ28P( rg.a );
652 		goto loop;
653 
654 // LD
655 	CASE7( 70, 71, 72, 73, 74, 75, 77 ): // LD (HL),r
656 		WRITE( rp.hl, R8( opcode, 0x70 ) );
657 		goto loop;
658 
659 	CASE6( 41, 42, 43, 44, 45, 47 ): // LD B,r
660 	CASE6( 48, 4A, 4B, 4C, 4D, 4F ): // LD C,r
661 	CASE6( 50, 51, 53, 54, 55, 57 ): // LD D,r
662 	CASE6( 58, 59, 5A, 5C, 5D, 5F ): // LD E,r
663 	CASE6( 60, 61, 62, 63, 65, 67 ): // LD H,r
664 	CASE6( 68, 69, 6A, 6B, 6C, 6F ): // LD L,r
665 	CASE6( 78, 79, 7A, 7B, 7C, 7D ): // LD A,r
666 		R8( opcode >> 3 & 7, 0 ) = R8( opcode & 7, 0 );
667 		goto loop;
668 
669 	CASE5( 06, 0E, 16, 1E, 26 ): // LD r,imm
670 		R8( opcode >> 3, 0 ) = data;
671 		pc++;
672 		goto loop;
673 
674 	case 0x36: // LD (HL),imm
675 		pc++;
676 		WRITE( rp.hl, data );
677 		goto loop;
678 
679 	CASE7( 46, 4E, 56, 5E, 66, 6E, 7E ): // LD r,(HL)
680 		R8( opcode >> 3, 8 ) = READ( rp.hl );
681 		goto loop;
682 
683 	case 0x01: // LD rp,imm
684 	case 0x11:
685 	case 0x21:
686 		R16( opcode, 4, 0x01 ) = GET_ADDR();
687 		pc += 2;
688 		goto loop;
689 
690 	case 0x31: // LD sp,imm
691 		sp = GET_ADDR();
692 		pc += 2;
693 		goto loop;
694 
695 	case 0x2A:{// LD HL,(addr)
696 		fuint16 addr = GET_ADDR();
697 		pc += 2;
698 		rp.hl = READ_WORD( addr );
699 		goto loop;
700 	}
701 
702 	case 0x32:{// LD (addr),A
703 		fuint16 addr = GET_ADDR();
704 		pc += 2;
705 		WRITE( addr, rg.a );
706 		goto loop;
707 	}
708 
709 	case 0x22:{// LD (addr),HL
710 		fuint16 addr = GET_ADDR();
711 		pc += 2;
712 		WRITE_WORD( addr, rp.hl );
713 		goto loop;
714 	}
715 
716 	case 0x02: // LD (BC),A
717 	case 0x12: // LD (DE),A
718 		WRITE( R16( opcode, 4, 0x02 ), rg.a );
719 		goto loop;
720 
721 	case 0x0A: // LD A,(BC)
722 	case 0x1A: // LD A,(DE)
723 		rg.a = READ( R16( opcode, 4, 0x0A ) );
724 		goto loop;
725 
726 	case 0xF9: // LD SP,HL
727 		sp = rp.hl;
728 		goto loop;
729 
730 // Rotate
731 
732 	case 0x07:{// RLCA
733 		fuint16 temp = rg.a;
734 		temp = (temp << 1) | (temp >> 7);
735 		flags = (flags & (S80 | Z40 | P04)) |
736 				(temp & (F20 | F08 | C01));
737 		rg.a = temp;
738 		goto loop;
739 	}
740 
741 	case 0x0F:{// RRCA
742 		fuint16 temp = rg.a;
743 		flags = (flags & (S80 | Z40 | P04)) |
744 				(temp & C01);
745 		temp = (temp << 7) | (temp >> 1);
746 		flags |= temp & (F20 | F08);
747 		rg.a = temp;
748 		goto loop;
749 	}
750 
751 	case 0x17:{// RLA
752 		blargg_ulong temp = (rg.a << 1) | (flags & C01);
753 		flags = (flags & (S80 | Z40 | P04)) |
754 				(temp & (F20 | F08)) |
755 				(temp >> 8);
756 		rg.a = temp;
757 		goto loop;
758 	}
759 
760 	case 0x1F:{// RRA
761 		fuint16 temp = (flags << 7) | (rg.a >> 1);
762 		flags = (flags & (S80 | Z40 | P04)) |
763 				(temp & (F20 | F08)) |
764 				(rg.a & C01);
765 		rg.a = temp;
766 		goto loop;
767 	}
768 
769 // Misc
770 	case 0x2F:{// CPL
771 		fuint16 temp = ~rg.a;
772 		flags = (flags & (S80 | Z40 | P04 | C01)) |
773 				(temp & (F20 | F08)) |
774 				(H10 | N02);
775 		rg.a = temp;
776 		goto loop;
777 	}
778 
779 	case 0x3F:{// CCF
780 		flags = ((flags & (S80 | Z40 | P04 | C01)) ^ C01) |
781 				(flags << 4 & H10) |
782 				(rg.a & (F20 | F08));
783 		goto loop;
784 	}
785 
786 	case 0x37: // SCF
787 		flags = (flags & (S80 | Z40 | P04)) | C01 |
788 				(rg.a & (F20 | F08));
789 		goto loop;
790 
791 	case 0xDB: // IN A,(imm)
792 		pc++;
793 		rg.a = IN( data + rg.a * 0x100 );
794 		goto loop;
795 
796 	case 0xE3:{// EX (SP),HL
797 		fuint16 temp = READ_WORD( sp );
798 		WRITE_WORD( sp, rp.hl );
799 		rp.hl = temp;
800 		goto loop;
801 	}
802 
803 	case 0xEB:{// EX DE,HL
804 		fuint16 temp = rp.hl;
805 		rp.hl = rp.de;
806 		rp.de = temp;
807 		goto loop;
808 	}
809 
810 	case 0xD9:{// EXX DE,HL
811 		fuint16 temp = r.alt.w.bc;
812 		r.alt.w.bc = rp.bc;
813 		rp.bc = temp;
814 
815 		temp = r.alt.w.de;
816 		r.alt.w.de = rp.de;
817 		rp.de = temp;
818 
819 		temp = r.alt.w.hl;
820 		r.alt.w.hl = rp.hl;
821 		rp.hl = temp;
822 		goto loop;
823 	}
824 
825 	case 0xF3: // DI
826 		r.iff1 = 0;
827 		r.iff2 = 0;
828 		goto loop;
829 
830 	case 0xFB: // EI
831 		r.iff1 = 1;
832 		r.iff2 = 1;
833 		// TODO: delayed effect
834 		goto loop;
835 
836 	case 0x76: // HALT
837 		goto halt;
838 
839 //////////////////////////////////////// CB prefix
840 	{
841 	case 0xCB:
842 		unsigned data2;
843 		data2 = instr [1];
844 		(void) data2; // TODO is this the same as data in all cases?
845 		pc++;
846 		switch ( data )
847 		{
848 
849 	// Rotate left
850 
851 	#define RLC( read, write ) {\
852 		fuint8 result = read;\
853 		result = uint8_t (result << 1) | (result >> 7);\
854 		flags = SZ28P( result ) | (result & C01);\
855 		write;\
856 		goto loop;\
857 	}
858 
859 		case 0x06: // RLC (HL)
860 			s_time += 7;
861 			data = rp.hl;
862 		rlc_data_addr:
863 			RLC( READ( data ), WRITE( data, result ) )
864 
865 		CASE7( 00, 01, 02, 03, 04, 05, 07 ):{// RLC r
866 			uint8_t& reg = R8( data, 0 );
867 			RLC( reg, reg = result )
868 		}
869 
870 	#define RL( read, write ) {\
871 		fuint16 result = (read << 1) | (flags & C01);\
872 		flags = SZ28PC( result );\
873 		write;\
874 		goto loop;\
875 	}
876 
877 		case 0x16: // RL (HL)
878 			s_time += 7;
879 			data = rp.hl;
880 		rl_data_addr:
881 			RL( READ( data ), WRITE( data, result ) )
882 
883 		CASE7( 10, 11, 12, 13, 14, 15, 17 ):{// RL r
884 			uint8_t& reg = R8( data, 0x10 );
885 			RL( reg, reg = result )
886 		}
887 
888 	#define SLA( read, add, write ) {\
889 		fuint16 result = (read << 1) | add;\
890 		flags = SZ28PC( result );\
891 		write;\
892 		goto loop;\
893 	}
894 
895 		case 0x26: // SLA (HL)
896 			s_time += 7;
897 			data = rp.hl;
898 		sla_data_addr:
899 			SLA( READ( data ), 0, WRITE( data, result ) )
900 
901 		CASE7( 20, 21, 22, 23, 24, 25, 27 ):{// SLA r
902 			uint8_t& reg = R8( data, 0x20 );
903 			SLA( reg, 0, reg = result )
904 		}
905 
906 		case 0x36: // SLL (HL)
907 			s_time += 7;
908 			data = rp.hl;
909 		sll_data_addr:
910 			SLA( READ( data ), 1, WRITE( data, result ) )
911 
912 		CASE7( 30, 31, 32, 33, 34, 35, 37 ):{// SLL r
913 			uint8_t& reg = R8( data, 0x30 );
914 			SLA( reg, 1, reg = result )
915 		}
916 
917 	// Rotate right
918 
919 	#define RRC( read, write ) {\
920 		fuint8 result = read;\
921 		flags = result & C01;\
922 		result = uint8_t (result << 7) | (result >> 1);\
923 		flags |= SZ28P( result );\
924 		write;\
925 		goto loop;\
926 	}
927 
928 		case 0x0E: // RRC (HL)
929 			s_time += 7;
930 			data = rp.hl;
931 		rrc_data_addr:
932 			RRC( READ( data ), WRITE( data, result ) )
933 
934 		CASE7( 08, 09, 0A, 0B, 0C, 0D, 0F ):{// RRC r
935 			uint8_t& reg = R8( data, 0x08 );
936 			RRC( reg, reg = result )
937 		}
938 
939 	#define RR( read, write ) {\
940 		fuint8 result = read;\
941 		fuint8 temp = result & C01;\
942 		result = uint8_t (flags << 7) | (result >> 1);\
943 		flags = SZ28P( result ) | temp;\
944 		write;\
945 		goto loop;\
946 	}
947 
948 		case 0x1E: // RR (HL)
949 			s_time += 7;
950 			data = rp.hl;
951 		rr_data_addr:
952 			RR( READ( data ), WRITE( data, result ) )
953 
954 		CASE7( 18, 19, 1A, 1B, 1C, 1D, 1F ):{// RR r
955 			uint8_t& reg = R8( data, 0x18 );
956 			RR( reg, reg = result )
957 		}
958 
959 	#define SRA( read, write ) {\
960 		fuint8 result = read;\
961 		flags = result & C01;\
962 		result = (result & 0x80) | (result >> 1);\
963 		flags |= SZ28P( result );\
964 		write;\
965 		goto loop;\
966 	}
967 
968 		case 0x2E: // SRA (HL)
969 			data = rp.hl;
970 			s_time += 7;
971 		sra_data_addr:
972 			SRA( READ( data ), WRITE( data, result ) )
973 
974 		CASE7( 28, 29, 2A, 2B, 2C, 2D, 2F ):{// SRA r
975 			uint8_t& reg = R8( data, 0x28 );
976 			SRA( reg, reg = result )
977 		}
978 
979 	#define SRL( read, write ) {\
980 		fuint8 result = read;\
981 		flags = result & C01;\
982 		result >>= 1;\
983 		flags |= SZ28P( result );\
984 		write;\
985 		goto loop;\
986 	}
987 
988 		case 0x3E: // SRL (HL)
989 			s_time += 7;
990 			data = rp.hl;
991 		srl_data_addr:
992 			SRL( READ( data ), WRITE( data, result ) )
993 
994 		CASE7( 38, 39, 3A, 3B, 3C, 3D, 3F ):{// SRL r
995 			uint8_t& reg = R8( data, 0x38 );
996 			SRL( reg, reg = result )
997 		}
998 
999 	// BIT
1000 		{
1001 			unsigned temp;
1002 		CASE8( 46, 4E, 56, 5E, 66, 6E, 76, 7E ): // BIT b,(HL)
1003 			s_time += 4;
1004 			temp = READ( rp.hl );
1005 			flags &= C01;
1006 			goto bit_temp;
1007 		CASE7( 40, 41, 42, 43, 44, 45, 47 ): // BIT 0,r
1008 		CASE7( 48, 49, 4A, 4B, 4C, 4D, 4F ): // BIT 1,r
1009 		CASE7( 50, 51, 52, 53, 54, 55, 57 ): // BIT 2,r
1010 		CASE7( 58, 59, 5A, 5B, 5C, 5D, 5F ): // BIT 3,r
1011 		CASE7( 60, 61, 62, 63, 64, 65, 67 ): // BIT 4,r
1012 		CASE7( 68, 69, 6A, 6B, 6C, 6D, 6F ): // BIT 5,r
1013 		CASE7( 70, 71, 72, 73, 74, 75, 77 ): // BIT 6,r
1014 		CASE7( 78, 79, 7A, 7B, 7C, 7D, 7F ): // BIT 7,r
1015 			temp = R8( data & 7, 0 );
1016 			flags = (flags & C01) | (temp & (F20 | F08));
1017 		bit_temp:
1018 			int masked = temp & 1 << (data >> 3 & 7);
1019 			flags |=(masked & S80) | H10 |
1020 					((masked - 1) >> 8 & (Z40 | P04));
1021 			goto loop;
1022 		}
1023 
1024 	// SET/RES
1025 		CASE8( 86, 8E, 96, 9E, A6, AE, B6, BE ): // RES b,(HL)
1026 		CASE8( C6, CE, D6, DE, E6, EE, F6, FE ):{// SET b,(HL)
1027 			s_time += 7;
1028 			int temp = READ( rp.hl );
1029 			int bit = 1 << (data >> 3 & 7);
1030 			temp |= bit; // SET
1031 			if ( !(data & 0x40) )
1032 				temp ^= bit; // RES
1033 			WRITE( rp.hl, temp );
1034 			goto loop;
1035 		}
1036 
1037 		CASE7( C0, C1, C2, C3, C4, C5, C7 ): // SET 0,r
1038 		CASE7( C8, C9, CA, CB, CC, CD, CF ): // SET 1,r
1039 		CASE7( D0, D1, D2, D3, D4, D5, D7 ): // SET 2,r
1040 		CASE7( D8, D9, DA, DB, DC, DD, DF ): // SET 3,r
1041 		CASE7( E0, E1, E2, E3, E4, E5, E7 ): // SET 4,r
1042 		CASE7( E8, E9, EA, EB, EC, ED, EF ): // SET 5,r
1043 		CASE7( F0, F1, F2, F3, F4, F5, F7 ): // SET 6,r
1044 		CASE7( F8, F9, FA, FB, FC, FD, FF ): // SET 7,r
1045 			R8( data & 7, 0 ) |= 1 << (data >> 3 & 7);
1046 			goto loop;
1047 
1048 		CASE7( 80, 81, 82, 83, 84, 85, 87 ): // RES 0,r
1049 		CASE7( 88, 89, 8A, 8B, 8C, 8D, 8F ): // RES 1,r
1050 		CASE7( 90, 91, 92, 93, 94, 95, 97 ): // RES 2,r
1051 		CASE7( 98, 99, 9A, 9B, 9C, 9D, 9F ): // RES 3,r
1052 		CASE7( A0, A1, A2, A3, A4, A5, A7 ): // RES 4,r
1053 		CASE7( A8, A9, AA, AB, AC, AD, AF ): // RES 5,r
1054 		CASE7( B0, B1, B2, B3, B4, B5, B7 ): // RES 6,r
1055 		CASE7( B8, B9, BA, BB, BC, BD, BF ): // RES 7,r
1056 			R8( data & 7, 0 ) &= ~(1 << (data >> 3 & 7));
1057 			goto loop;
1058 		}
1059 		assert( false );
1060 	}
1061 
1062 #undef GET_ADDR
1063 #define GET_ADDR()  GET_LE16( instr + 1 )
1064 
1065 //////////////////////////////////////// ED prefix
1066 	{
1067 	case 0xED:
1068 		pc++;
1069 		s_time += ed_dd_timing [data] >> 4;
1070 		switch ( data )
1071 		{
1072 		{
1073 			blargg_ulong temp;
1074 		case 0x72: // SBC HL,SP
1075 		case 0x7A: // ADC HL,SP
1076 			temp = sp;
1077 			if ( 0 )
1078 		case 0x42: // SBC HL,BC
1079 		case 0x52: // SBC HL,DE
1080 		case 0x62: // SBC HL,HL
1081 		case 0x4A: // ADC HL,BC
1082 		case 0x5A: // ADC HL,DE
1083 		case 0x6A: // ADC HL,HL
1084 				temp = R16( data >> 3 & 6, 1, 0 );
1085 			blargg_ulong sum = temp + (flags & C01);
1086 			flags = ~data >> 2 & N02;
1087 			if ( flags )
1088 				sum = -sum;
1089 			sum += rp.hl;
1090 			temp ^= rp.hl;
1091 			temp ^= sum;
1092 			flags |=(sum >> 16 & C01) |
1093 					(temp >> 8 & H10) |
1094 					(sum >> 8 & (S80 | F20 | F08)) |
1095 					((temp - -0x8000) >> 14 & V04);
1096 			rp.hl = sum;
1097 			if ( (uint16_t) sum )
1098 				goto loop;
1099 			flags |= Z40;
1100 			goto loop;
1101 		}
1102 
1103 		CASE8( 40, 48, 50, 58, 60, 68, 70, 78 ):{// IN r,(C)
1104 			int temp = IN( rp.bc );
1105 			R8( data >> 3, 8 ) = temp;
1106 			flags = (flags & C01) | SZ28P( temp );
1107 			goto loop;
1108 		}
1109 
1110 		case 0x71: // OUT (C),0
1111 			rg.flags = 0;
1112 		CASE7( 41, 49, 51, 59, 61, 69, 79 ): // OUT (C),r
1113 			OUT( rp.bc, R8( data >> 3, 8 ) );
1114 			goto loop;
1115 
1116 		{
1117 			unsigned temp;
1118 		case 0x73: // LD (ADDR),SP
1119 			temp = sp;
1120 			if ( 0 )
1121 		case 0x43: // LD (ADDR),BC
1122 		case 0x53: // LD (ADDR),DE
1123 				temp = R16( data, 4, 0x43 );
1124 			fuint16 addr = GET_ADDR();
1125 			pc += 2;
1126 			WRITE_WORD( addr, temp );
1127 			goto loop;
1128 		}
1129 
1130 		case 0x4B: // LD BC,(ADDR)
1131 		case 0x5B:{// LD DE,(ADDR)
1132 			fuint16 addr = GET_ADDR();
1133 			pc += 2;
1134 			R16( data, 4, 0x4B ) = READ_WORD( addr );
1135 			goto loop;
1136 		}
1137 
1138 		case 0x7B:{// LD SP,(ADDR)
1139 			fuint16 addr = GET_ADDR();
1140 			pc += 2;
1141 			sp = READ_WORD( addr );
1142 			goto loop;
1143 		}
1144 
1145 		case 0x67:{// RRD
1146 			fuint8 temp = READ( rp.hl );
1147 			WRITE( rp.hl, (rg.a << 4) | (temp >> 4) );
1148 			temp = (rg.a & 0xF0) | (temp & 0x0F);
1149 			flags = (flags & C01) | SZ28P( temp );
1150 			rg.a = temp;
1151 			goto loop;
1152 		}
1153 
1154 		case 0x6F:{// RLD
1155 			fuint8 temp = READ( rp.hl );
1156 			WRITE( rp.hl, (temp << 4) | (rg.a & 0x0F) );
1157 			temp = (rg.a & 0xF0) | (temp >> 4);
1158 			flags = (flags & C01) | SZ28P( temp );
1159 			rg.a = temp;
1160 			goto loop;
1161 		}
1162 
1163 		CASE8( 44, 4C, 54, 5C, 64, 6C, 74, 7C ): // NEG
1164 			opcode = 0x10; // flag to do SBC instead of ADC
1165 			flags &= ~C01;
1166 			data = rg.a;
1167 			rg.a = 0;
1168 			goto adc_data;
1169 
1170 		{
1171 			int inc;
1172 		case 0xA9: // CPD
1173 		case 0xB9: // CPDR
1174 			inc = -1;
1175 			if ( 0 )
1176 		case 0xA1: // CPI
1177 		case 0xB1: // CPIR
1178 				inc = +1;
1179 			fuint16 addr = rp.hl;
1180 			rp.hl = addr + inc;
1181 			int temp = READ( addr );
1182 
1183 			int result = rg.a - temp;
1184 			flags = (flags & C01) | N02 |
1185 					((((temp ^ rg.a) & H10) ^ result) & (S80 | H10));
1186 
1187 			if ( !(uint8_t) result ) flags |= Z40;
1188 			result -= (flags & H10) >> 4;
1189 			flags |= result & F08;
1190 			flags |= result << 4 & F20;
1191 			if ( !--rp.bc )
1192 				goto loop;
1193 
1194 			flags |= V04;
1195 			if ( flags & Z40 || data < 0xB0 )
1196 				goto loop;
1197 
1198 			pc -= 2;
1199 			s_time += 5;
1200 			goto loop;
1201 		}
1202 
1203 		{
1204 			int inc;
1205 		case 0xA8: // LDD
1206 		case 0xB8: // LDDR
1207 			inc = -1;
1208 			if ( 0 )
1209 		case 0xA0: // LDI
1210 		case 0xB0: // LDIR
1211 				inc = +1;
1212 			fuint16 addr = rp.hl;
1213 			rp.hl = addr + inc;
1214 			int temp = READ( addr );
1215 
1216 			addr = rp.de;
1217 			rp.de = addr + inc;
1218 			WRITE( addr, temp );
1219 
1220 			temp += rg.a;
1221 			flags = (flags & (S80 | Z40 | C01)) |
1222 					(temp & F08) | (temp << 4 & F20);
1223 			if ( !--rp.bc )
1224 				goto loop;
1225 
1226 			flags |= V04;
1227 			if ( data < 0xB0 )
1228 				goto loop;
1229 
1230 			pc -= 2;
1231 			s_time += 5;
1232 			goto loop;
1233 		}
1234 
1235 		{
1236 			int inc;
1237 		case 0xAB: // OUTD
1238 		case 0xBB: // OTDR
1239 			inc = -1;
1240 			if ( 0 )
1241 		case 0xA3: // OUTI
1242 		case 0xB3: // OTIR
1243 				inc = +1;
1244 			fuint16 addr = rp.hl;
1245 			rp.hl = addr + inc;
1246 			int temp = READ( addr );
1247 
1248 			int b = --rg.b;
1249 			flags = (temp >> 6 & N02) | SZ28( b );
1250 			if ( b && data >= 0xB0 )
1251 			{
1252 				pc -= 2;
1253 				s_time += 5;
1254 			}
1255 
1256 			OUT( rp.bc, temp );
1257 			goto loop;
1258 		}
1259 
1260 		{
1261 			int inc;
1262 		case 0xAA: // IND
1263 		case 0xBA: // INDR
1264 			inc = -1;
1265 			if ( 0 )
1266 		case 0xA2: // INI
1267 		case 0xB2: // INIR
1268 				inc = +1;
1269 
1270 			fuint16 addr = rp.hl;
1271 			rp.hl = addr + inc;
1272 
1273 			int temp = IN( rp.bc );
1274 
1275 			int b = --rg.b;
1276 			flags = (temp >> 6 & N02) | SZ28( b );
1277 			if ( b && data >= 0xB0 )
1278 			{
1279 				pc -= 2;
1280 				s_time += 5;
1281 			}
1282 
1283 			WRITE( addr, temp );
1284 			goto loop;
1285 		}
1286 
1287 		case 0x47: // LD I,A
1288 			r.i = rg.a;
1289 			goto loop;
1290 
1291 		case 0x4F: // LD R,A
1292 			SET_R( rg.a );
1293 			debug_printf( "LD R,A not supported\n" );
1294 			warning = true;
1295 			goto loop;
1296 
1297 		case 0x57: // LD A,I
1298 			rg.a = r.i;
1299 			goto ld_ai_common;
1300 
1301 		case 0x5F: // LD A,R
1302 			rg.a = GET_R();
1303 			debug_printf( "LD A,R not supported\n" );
1304 			warning = true;
1305 		ld_ai_common:
1306 			flags = (flags & C01) | SZ28( rg.a ) | (r.iff2 << 2 & V04);
1307 			goto loop;
1308 
1309 		CASE8( 45, 4D, 55, 5D, 65, 6D, 75, 7D ): // RETI/RETN
1310 			r.iff1 = r.iff2;
1311 			goto ret_taken;
1312 
1313 		case 0x46: case 0x4E: case 0x66: case 0x6E: // IM 0
1314 			r.im = 0;
1315 			goto loop;
1316 
1317 		case 0x56: case 0x76: // IM 1
1318 			r.im = 1;
1319 			goto loop;
1320 
1321 		case 0x5E: case 0x7E: // IM 2
1322 			r.im = 2;
1323 			goto loop;
1324 
1325 		default:
1326 			debug_printf( "Opcode $ED $%02X not supported\n", data );
1327 			warning = true;
1328 			goto loop;
1329 		}
1330 		assert( false );
1331 	}
1332 
1333 //////////////////////////////////////// DD/FD prefix
1334 	{
1335 	fuint16 ixy;
1336 	case 0xDD:
1337 		ixy = ix;
1338 		goto ix_prefix;
1339 	case 0xFD:
1340 		ixy = iy;
1341 	ix_prefix:
1342 		pc++;
1343 		unsigned data2 = READ_PROG( pc );
1344 		s_time += ed_dd_timing [data] & 0x0F;
1345 		switch ( data )
1346 		{
1347 	// TODO: more efficient way of avoid negative address
1348 	// TODO: avoid using this as argument to READ() since it is evaluated twice
1349 	#define IXY_DISP( ixy, disp )   uint16_t ((ixy) + (disp))
1350 
1351 	#define SET_IXY( in ) if ( opcode == 0xDD ) ix = in; else iy = in;
1352 
1353 	// ADD/ADC/SUB/SBC
1354 
1355 		case 0x96: // SUB (IXY+disp)
1356 		case 0x86: // ADD (IXY+disp)
1357 			flags &= ~C01;
1358 		case 0x9E: // SBC (IXY+disp)
1359 		case 0x8E: // ADC (IXY+disp)
1360 			pc++;
1361 			opcode = data;
1362 			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
1363 			goto adc_data;
1364 
1365 		case 0x94: // SUB HXY
1366 		case 0x84: // ADD HXY
1367 			flags &= ~C01;
1368 		case 0x9C: // SBC HXY
1369 		case 0x8C: // ADC HXY
1370 			opcode = data;
1371 			data = ixy >> 8;
1372 			goto adc_data;
1373 
1374 		case 0x95: // SUB LXY
1375 		case 0x85: // ADD LXY
1376 			flags &= ~C01;
1377 		case 0x9D: // SBC LXY
1378 		case 0x8D: // ADC LXY
1379 			opcode = data;
1380 			data = (uint8_t) ixy;
1381 			goto adc_data;
1382 
1383 		{
1384 			unsigned temp;
1385 		case 0x39: // ADD IXY,SP
1386 			temp = sp;
1387 			goto add_ixy_data;
1388 
1389 		case 0x29: // ADD IXY,HL
1390 			temp = ixy;
1391 			goto add_ixy_data;
1392 
1393 		case 0x09: // ADD IXY,BC
1394 		case 0x19: // ADD IXY,DE
1395 			temp = R16( data, 4, 0x09 );
1396 		add_ixy_data: {
1397 			blargg_ulong sum = ixy + temp;
1398 			temp ^= ixy;
1399 			ixy = (uint16_t) sum;
1400 			flags = (flags & (S80 | Z40 | V04)) |
1401 					(sum >> 16) |
1402 					(sum >> 8 & (F20 | F08)) |
1403 					((temp ^ sum) >> 8 & H10);
1404 			goto set_ixy;
1405 		}
1406 		}
1407 
1408 	// AND
1409 		case 0xA6: // AND (IXY+disp)
1410 			pc++;
1411 			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
1412 			goto and_data;
1413 
1414 		case 0xA4: // AND HXY
1415 			data = ixy >> 8;
1416 			goto and_data;
1417 
1418 		case 0xA5: // AND LXY
1419 			data = (uint8_t) ixy;
1420 			goto and_data;
1421 
1422 	// OR
1423 		case 0xB6: // OR (IXY+disp)
1424 			pc++;
1425 			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
1426 			goto or_data;
1427 
1428 		case 0xB4: // OR HXY
1429 			data = ixy >> 8;
1430 			goto or_data;
1431 
1432 		case 0xB5: // OR LXY
1433 			data = (uint8_t) ixy;
1434 			goto or_data;
1435 
1436 	// XOR
1437 		case 0xAE: // XOR (IXY+disp)
1438 			pc++;
1439 			data = READ( IXY_DISP( ixy, (int8_t) data2 ) );
1440 			goto xor_data;
1441 
1442 		case 0xAC: // XOR HXY
1443 			data = ixy >> 8;
1444 			goto xor_data;
1445 
1446 		case 0xAD: // XOR LXY
1447 			data = (uint8_t) ixy;
1448 			goto xor_data;
1449 
1450 	// CP
1451 		case 0xBE: // CP (IXY+disp)
1452 			pc++;
1453 			data = READ( IXY_DISP( ixy, (int8_t) data2 )  );
1454 			goto cp_data;
1455 
1456 		case 0xBC: // CP HXY
1457 			data = ixy >> 8;
1458 			goto cp_data;
1459 
1460 		case 0xBD: // CP LXY
1461 			data = (uint8_t) ixy;
1462 			goto cp_data;
1463 
1464 	// LD
1465 		CASE7( 70, 71, 72, 73, 74, 75, 77 ): // LD (IXY+disp),r
1466 			data = R8( data, 0x70 );
1467 			if ( 0 )
1468 		case 0x36: // LD (IXY+disp),imm
1469 				pc++, data = READ_PROG( pc );
1470 			pc++;
1471 			WRITE( IXY_DISP( ixy, (int8_t) data2 ), data );
1472 			goto loop;
1473 
1474 		CASE5( 44, 4C, 54, 5C, 7C ): // LD r,HXY
1475 			R8( data >> 3, 8 ) = ixy >> 8;
1476 			goto loop;
1477 
1478 		case 0x64: // LD HXY,HXY
1479 		case 0x6D: // LD LXY,LXY
1480 			goto loop;
1481 
1482 		CASE5( 45, 4D, 55, 5D, 7D ): // LD r,LXY
1483 			R8( data >> 3, 8 ) = ixy;
1484 			goto loop;
1485 
1486 		CASE7( 46, 4E, 56, 5E, 66, 6E, 7E ): // LD r,(IXY+disp)
1487 			pc++;
1488 			R8( data >> 3, 8 ) = READ( IXY_DISP( ixy, (int8_t) data2 ) );
1489 			goto loop;
1490 
1491 		case 0x26: // LD HXY,imm
1492 			pc++;
1493 			goto ld_hxy_data;
1494 
1495 		case 0x65: // LD HXY,LXY
1496 			data2 = (uint8_t) ixy;
1497 			goto ld_hxy_data;
1498 
1499 		CASE5( 60, 61, 62, 63, 67 ): // LD HXY,r
1500 			data2 = R8( data, 0x60 );
1501 		ld_hxy_data:
1502 			ixy = (uint8_t) ixy | (data2 << 8);
1503 			goto set_ixy;
1504 
1505 		case 0x2E: // LD LXY,imm
1506 			pc++;
1507 			goto ld_lxy_data;
1508 
1509 		case 0x6C: // LD LXY,HXY
1510 			data2 = ixy >> 8;
1511 			goto ld_lxy_data;
1512 
1513 		CASE5( 68, 69, 6A, 6B, 6F ): // LD LXY,r
1514 			data2 = R8( data, 0x68 );
1515 		ld_lxy_data:
1516 			ixy = (ixy & 0xFF00) | data2;
1517 		set_ixy:
1518 			if ( opcode == 0xDD )
1519 			{
1520 				ix = ixy;
1521 				goto loop;
1522 			}
1523 			iy = ixy;
1524 			goto loop;
1525 
1526 		case 0xF9: // LD SP,IXY
1527 			sp = ixy;
1528 			goto loop;
1529 
1530 		case 0x22:{// LD (ADDR),IXY
1531 			fuint16 addr = GET_ADDR();
1532 			pc += 2;
1533 			WRITE_WORD( addr, ixy );
1534 			goto loop;
1535 		}
1536 
1537 		case 0x21: // LD IXY,imm
1538 			ixy = GET_ADDR();
1539 			pc += 2;
1540 			goto set_ixy;
1541 
1542 		case 0x2A:{// LD IXY,(addr)
1543 			fuint16 addr = GET_ADDR();
1544 			ixy = READ_WORD( addr );
1545 			pc += 2;
1546 			goto set_ixy;
1547 		}
1548 
1549 	// DD/FD CB prefix
1550 		case 0xCB: {
1551 			data = IXY_DISP( ixy, (int8_t) data2 );
1552 			pc++;
1553 			data2 = READ_PROG( pc );
1554 			pc++;
1555 			switch ( data2 )
1556 			{
1557 			case 0x06: goto rlc_data_addr; // RLC (IXY)
1558 			case 0x16: goto rl_data_addr;  // RL (IXY)
1559 			case 0x26: goto sla_data_addr; // SLA (IXY)
1560 			case 0x36: goto sll_data_addr; // SLL (IXY)
1561 			case 0x0E: goto rrc_data_addr; // RRC (IXY)
1562 			case 0x1E: goto rr_data_addr;  // RR (IXY)
1563 			case 0x2E: goto sra_data_addr; // SRA (IXY)
1564 			case 0x3E: goto srl_data_addr; // SRL (IXY)
1565 
1566 			CASE8( 46, 4E, 56, 5E, 66, 6E, 76, 7E ):{// BIT b,(IXY+disp)
1567 				fuint8 temp = READ( data );
1568 				int masked = temp & 1 << (data2 >> 3 & 7);
1569 				flags = (flags & C01) | H10 |
1570 						(masked & S80) |
1571 						((masked - 1) >> 8 & (Z40 | P04));
1572 				goto loop;
1573 			}
1574 
1575 			CASE8( 86, 8E, 96, 9E, A6, AE, B6, BE ): // RES b,(IXY+disp)
1576 			CASE8( C6, CE, D6, DE, E6, EE, F6, FE ):{// SET b,(IXY+disp)
1577 				int temp = READ( data );
1578 				int bit = 1 << (data2 >> 3 & 7);
1579 				temp |= bit; // SET
1580 				if ( !(data2 & 0x40) )
1581 					temp ^= bit; // RES
1582 				WRITE( data, temp );
1583 				goto loop;
1584 			}
1585 
1586 			default:
1587 				debug_printf( "Opcode $%02X $CB $%02X not supported\n", opcode, data2 );
1588 				warning = true;
1589 				goto loop;
1590 			}
1591 			assert( false );
1592 		}
1593 
1594 	// INC/DEC
1595 		case 0x23: // INC IXY
1596 			ixy = uint16_t (ixy + 1);
1597 			goto set_ixy;
1598 
1599 		case 0x2B: // DEC IXY
1600 			ixy = uint16_t (ixy - 1);
1601 			goto set_ixy;
1602 
1603 		case 0x34: // INC (IXY+disp)
1604 			ixy = IXY_DISP( ixy, (int8_t) data2 );
1605 			pc++;
1606 			data = READ( ixy ) + 1;
1607 			WRITE( ixy, data );
1608 			goto inc_set_flags;
1609 
1610 		case 0x35: // DEC (IXY+disp)
1611 			ixy = IXY_DISP( ixy, (int8_t) data2 );
1612 			pc++;
1613 			data = READ( ixy ) - 1;
1614 			WRITE( ixy, data );
1615 			goto dec_set_flags;
1616 
1617 		case 0x24: // INC HXY
1618 			ixy = uint16_t (ixy + 0x100);
1619 			data = ixy >> 8;
1620 			goto inc_xy_common;
1621 
1622 		case 0x2C: // INC LXY
1623 			data = uint8_t (ixy + 1);
1624 			ixy = (ixy & 0xFF00) | data;
1625 		inc_xy_common:
1626 			if ( opcode == 0xDD )
1627 			{
1628 				ix = ixy;
1629 				goto inc_set_flags;
1630 			}
1631 			iy = ixy;
1632 			goto inc_set_flags;
1633 
1634 		case 0x25: // DEC HXY
1635 			ixy = uint16_t (ixy - 0x100);
1636 			data = ixy >> 8;
1637 			goto dec_xy_common;
1638 
1639 		case 0x2D: // DEC LXY
1640 			data = uint8_t (ixy - 1);
1641 			ixy = (ixy & 0xFF00) | data;
1642 		dec_xy_common:
1643 			if ( opcode == 0xDD )
1644 			{
1645 				ix = ixy;
1646 				goto dec_set_flags;
1647 			}
1648 			iy = ixy;
1649 			goto dec_set_flags;
1650 
1651 	// PUSH/POP
1652 		case 0xE5: // PUSH IXY
1653 			data = ixy;
1654 			goto push_data;
1655 
1656 		case 0xE1:{// POP IXY
1657 			ixy = READ_WORD( sp );
1658 			sp = uint16_t (sp + 2);
1659 			goto set_ixy;
1660 		}
1661 
1662 	// Misc
1663 
1664 		case 0xE9: // JP (IXY)
1665 			pc = ixy;
1666 			goto loop;
1667 
1668 		case 0xE3:{// EX (SP),IXY
1669 			fuint16 temp = READ_WORD( sp );
1670 			WRITE_WORD( sp, ixy );
1671 			ixy = temp;
1672 			goto set_ixy;
1673 		}
1674 
1675 		default:
1676 			debug_printf( "Unnecessary DD/FD prefix encountered\n" );
1677 			warning = true;
1678 			pc--;
1679 			goto loop;
1680 		}
1681 		assert( false );
1682 	}
1683 
1684 	}
1685 	debug_printf( "Unhandled main opcode: $%02X\n", opcode );
1686 	assert( false );
1687 
1688 hit_idle_addr:
1689 	s_time -= 11;
1690 	goto out_of_time;
1691 halt:
1692 	s_time &= 3; // increment by multiple of 4
1693 out_of_time:
1694 	pc--;
1695 
1696 	s.time = s_time;
1697 	rg.flags = flags;
1698 	r.ix    = ix;
1699 	r.iy    = iy;
1700 	r.sp    = sp;
1701 	r.pc    = pc;
1702 	this->r.b = rg;
1703 	this->state_ = s;
1704 	this->state = &this->state_;
1705 
1706 	return warning;
1707 }
1708