1 
2 // Nes_Emu 0.7.0. http://www.slack.net/~ant/
3 
4 #include "Nes_Ppu_Impl.h"
5 
6 #include <string.h>
7 #include "blargg_endian.h"
8 #include "Nes_State.h"
9 #include <stdint.h>
10 
11 /* Copyright (C) 2004-2006 Shay Green. This module is free software; you
12 can redistribute it and/or modify it under the terms of the GNU Lesser
13 General Public License as published by the Free Software Foundation; either
14 version 2.1 of the License, or (at your option) any later version. This
15 module is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
18 more details. You should have received a copy of the GNU Lesser General
19 Public License along with this module; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
21 
22 #include "blargg_source.h"
23 
24 int const cache_line_size = 128; // tile cache is kept aligned to this boundary
25 
Nes_Ppu_Impl()26 Nes_Ppu_Impl::Nes_Ppu_Impl()
27 {
28 	impl = NULL;
29 	chr_data = NULL;
30 	chr_size = 0;
31 	tile_cache = NULL;
32 	host_palette = NULL;
33 	max_palette_size = 0;
34 	tile_cache_mem = NULL;
35 	ppu_state_t::unused = 0;
36 
37 	mmc24_enabled = false;
38 	mmc24_latched[0] = 0;
39 	mmc24_latched[1] = 0;
40 
41 	#if !defined(NDEBUG) && !defined(PSP) && !defined(PS2)
42 		// verify that unaligned accesses work
43 		static unsigned char b  [19] = { 0 };
44 		static unsigned char b2 [19] = { 1,2,3,4,0,5,6,7,8,0,9,0,1,2,0,3,4,5,6 };
45 		for ( int i = 0; i < 19; i += 5 )
46 			*(volatile uint32_t*) &b [i] = *(volatile uint32_t*) &b2 [i];
47 	#endif
48 }
49 
~Nes_Ppu_Impl()50 Nes_Ppu_Impl::~Nes_Ppu_Impl()
51 {
52 	close_chr();
53 	delete impl;
54 }
55 
all_tiles_modified()56 void Nes_Ppu_Impl::all_tiles_modified()
57 {
58 	any_tiles_modified = true;
59 	memset( modified_tiles, ~0, sizeof modified_tiles );
60 }
61 
open_chr(uint8_t const * new_chr,long chr_data_size)62 const char *Nes_Ppu_Impl::open_chr( uint8_t const* new_chr, long chr_data_size )
63 {
64 	close_chr();
65 
66 	if ( !impl )
67 	{
68 		impl = BLARGG_NEW impl_t;
69 		CHECK_ALLOC( impl );
70 		chr_ram = impl->chr_ram;
71 	}
72 
73 	chr_data = new_chr;
74 	chr_size = chr_data_size;
75 	chr_is_writable = false;
76 
77 	if ( chr_data_size == 0 )
78 	{
79 		// CHR RAM
80 		chr_data = impl->chr_ram;
81 		chr_size = sizeof impl->chr_ram;
82 		chr_is_writable = true;
83 	}
84 
85 	// allocate aligned memory for cache
86 	long tile_count = chr_size / bytes_per_tile;
87 	tile_cache_mem = BLARGG_NEW uint8_t [tile_count * sizeof (cached_tile_t) * 2 + cache_line_size];
88 	CHECK_ALLOC( tile_cache_mem );
89 	tile_cache = (cached_tile_t*) (tile_cache_mem + cache_line_size -
90 			(uintptr_t) tile_cache_mem % cache_line_size);
91 	flipped_tiles = tile_cache + tile_count;
92 
93 	// rebuild cache
94 	all_tiles_modified();
95 	if ( !chr_is_writable )
96 	{
97 		any_tiles_modified = false;
98 		rebuild_chr( 0, chr_size );
99 	}
100 
101 	return 0;
102 }
103 
close_chr()104 void Nes_Ppu_Impl::close_chr()
105 {
106 	delete [] tile_cache_mem;
107 	tile_cache_mem = NULL;
108 }
109 
set_chr_bank(int addr,int size,long data)110 void Nes_Ppu_Impl::set_chr_bank( int addr, int size, long data )
111 {
112 	if ( data + size > chr_size )
113 		data %= chr_size;
114 
115 	int count = (unsigned) size / chr_page_size;
116 
117 	int page = (unsigned) addr / chr_page_size;
118 	while ( count-- )
119 	{
120 		chr_pages [page] = data - page * chr_page_size;
121 		page++;
122 		data += chr_page_size;
123 	}
124 }
125 
set_chr_bank_ex(int addr,int size,long data)126 void Nes_Ppu_Impl::set_chr_bank_ex( int addr, int size, long data )
127 {
128 	mmc24_enabled = true;
129 
130 	//check( !chr_is_writable || addr == data ); // to do: is CHR RAM ever bank-switched?
131 	//dprintf( "Tried to set CHR RAM bank at %04X to CHR+%04X\n", addr, data );
132 
133 	if ( data + size > chr_size )
134 		data %= chr_size;
135 
136 	int count = (unsigned) size / chr_page_size;
137 	//assert( chr_page_size * count == size );
138 	//assert( addr + size <= chr_addr_size );
139 
140 	int page = (unsigned) addr / chr_page_size;
141 	while ( count-- )
142 	{
143 		chr_pages_ex [page] = data - page * chr_page_size;
144 		page++;
145 		data += chr_page_size;
146 	}
147 }
148 
save_state(Nes_State_ * out) const149 void Nes_Ppu_Impl::save_state( Nes_State_* out ) const
150 {
151 	*out->ppu = *this;
152 	out->ppu_valid = true;
153 
154 	memcpy( out->spr_ram, spr_ram, out->spr_ram_size );
155 	out->spr_ram_valid = true;
156 
157 	out->nametable_size = 0x800;
158 	memcpy( out->nametable, impl->nt_ram, 0x800 );
159 	if ( nt_banks [3] >= &impl->nt_ram [0xC00] )
160 	{
161 		// save extra nametable data in chr
162 		out->nametable_size = 0x1000;
163 		memcpy( out->chr, &impl->nt_ram [0x800], 0x800 );
164 	}
165 
166 	out->chr_size = 0;
167 	if ( chr_is_writable )
168 	{
169 		out->chr_size = chr_size;
170 		memcpy( out->chr, impl->chr_ram, out->chr_size );
171 	}
172 }
173 
load_state(Nes_State_ const & in)174 void Nes_Ppu_Impl::load_state( Nes_State_ const& in )
175 {
176 	set_nt_banks( 0, 0, 0, 0 );
177 	set_chr_bank( 0, 0x2000, 0 );
178 
179 	if ( in.ppu_valid )
180 		STATIC_CAST(ppu_state_t&,*this) = *in.ppu;
181 
182 	if ( in.spr_ram_valid )
183 		memcpy( spr_ram, in.spr_ram, sizeof spr_ram );
184 
185 	if ( in.nametable_size >= 0x800 )
186 	{
187 		if ( in.nametable_size > 0x800 )
188 			memcpy( &impl->nt_ram [0x800], in.chr, 0x800 );
189 		memcpy( impl->nt_ram, in.nametable, 0x800 );
190 	}
191 
192 	if ( chr_is_writable && in.chr_size )
193 	{
194 		memcpy( impl->chr_ram, in.chr, in.chr_size );
195 		all_tiles_modified();
196 	}
197 }
198 
199 static uint8_t const initial_palette [0x20] =
200 {
201 	0x0f,0x01,0x00,0x01,0x00,0x02,0x02,0x0D,0x08,0x10,0x08,0x24,0x00,0x00,0x04,0x2C,
202 	0x00,0x01,0x34,0x03,0x00,0x04,0x00,0x14,0x00,0x3A,0x00,0x02,0x00,0x20,0x2C,0x08
203 };
204 
reset(bool full_reset)205 void Nes_Ppu_Impl::reset( bool full_reset )
206 {
207 	w2000 = 0;
208 	w2001 = 0;
209 	r2002 = 0x80;
210 	r2007 = 0;
211 	open_bus = 0;
212 	decay_low = 0;
213 	decay_high = 0;
214 	second_write = false;
215 	vram_temp = 0;
216 	pixel_x = 0;
217 
218 	if ( full_reset )
219 	{
220 		vram_addr = 0;
221 		w2003 = 0;
222 		memset( impl->chr_ram, 0xff, sizeof impl->chr_ram );
223 		memset( impl->nt_ram, 0xff, sizeof impl->nt_ram );
224 		memcpy( palette, initial_palette, sizeof palette );
225 	}
226 
227 	set_nt_banks( 0, 0, 0, 0 );
228 	set_chr_bank( 0, chr_addr_size, 0 );
229 	memset( spr_ram, 0xff, sizeof spr_ram );
230 	all_tiles_modified();
231 	if ( max_palette_size > 0 )
232 		memset( host_palette, 0, max_palette_size * sizeof *host_palette );
233 }
234 
capture_palette()235 void Nes_Ppu_Impl::capture_palette()
236 {
237 	if ( palette_size + palette_increment <= max_palette_size )
238 	{
239 		palette_offset = (palette_begin + palette_size) * 0x01010101;
240 
241 		short* out = host_palette + palette_size;
242 		palette_size += palette_increment;
243 
244 		int i;
245 
246 		int emph = w2001 << 1 & 0x1C0;
247 		int mono = (w2001 & 1 ? 0x30 : 0x3F);
248 
249 		for ( i = 0; i < 32; i++ )
250 			out [i] = (palette [i] & mono) | emph;
251 
252 		int bg = out [0];
253 		for ( i = 4; i < 32; i += 4 )
254 			out [i] = bg;
255 
256 		memcpy( out + 32, out, 32 * sizeof *out );
257 	}
258 }
259 
run_hblank(int count)260 void Nes_Ppu_Impl::run_hblank( int count )
261 {
262 	long addr = (vram_addr & 0x7be0) + (vram_temp & 0x41f) + (count * 0x1000);
263 	if ( w2001 & 0x08 )
264 	{
265 		while ( addr >= 0x8000 )
266 		{
267 			int y = (addr + 0x20) & 0x3e0;
268 			addr = (addr - 0x8000) & ~0x3e0;
269 			if ( y == 30 * 0x20 )
270 				y = 0x800;
271 			addr ^= y;
272 		}
273 		vram_addr = addr;
274 	}
275 }
276 
277 #ifdef __MWERKS__
278 	#pragma ppc_unroll_factor_limit 1 // messes up calc_sprite_max_scanlines loop
279 	static int zero = 0;
280 #else
281 	const  int zero = 0;
282 #endif
283 
284 // Tile cache
285 
reorder(unsigned long n)286 inline unsigned long reorder( unsigned long n )
287 {
288 	n |= n << 7;
289 	return ((n << 14) | n);
290 }
291 
update_tile(int index)292 inline void Nes_Ppu_Impl::update_tile( int index )
293 {
294 	const uint8_t* in = chr_data + (index) * bytes_per_tile;
295 	uint8_t* out = (uint8_t*) tile_cache [index];
296 	uint8_t* flipped_out = (uint8_t*) flipped_tiles [index];
297 
298 	unsigned long bit_mask = 0x11111111 + zero;
299 
300 	for ( int n = 4; n--; )
301 	{
302 		// Reorder two lines of two-bit pixels. No bits are wasted, so
303 		// reordered version is also four bytes.
304 		//
305 		// 12345678 to A0E4B1F5C2G6D3H7
306 		// ABCDEFGH
307 		unsigned long c =
308 				((reorder( in [0] ) & bit_mask) << 0) |
309 				((reorder( in [8] ) & bit_mask) << 1) |
310 				((reorder( in [1] ) & bit_mask) << 2) |
311 				((reorder( in [9] ) & bit_mask) << 3);
312 		in += 2;
313 
314 		SET_BE32( out, c );
315 		out += 4;
316 
317 		// make horizontally-flipped version
318 		c =     ((c >> 28) & 0x000f) |
319 				((c >> 20) & 0x00f0) |
320 				((c >> 12) & 0x0f00) |
321 				((c >>  4) & 0xf000) |
322 				((c & 0xf000) <<  4) |
323 				((c & 0x0f00) << 12) |
324 				((c & 0x00f0) << 20) |
325 				((c & 0x000f) << 28);
326 		SET_BE32( flipped_out, c );
327 		flipped_out += 4;
328 	}
329 }
330 
rebuild_chr(unsigned long begin,unsigned long end)331 void Nes_Ppu_Impl::rebuild_chr( unsigned long begin, unsigned long end )
332 {
333 	unsigned end_index = (end + bytes_per_tile - 1) / bytes_per_tile;
334 	for ( unsigned index = begin / bytes_per_tile; index < end_index; index++ )
335 		update_tile( index );
336 }
337 
update_tiles(int first_tile)338 void Nes_Ppu_Impl::update_tiles( int first_tile )
339 {
340 	int chunk = 0;
341 	do
342 	{
343 		if ( !(uint32_t&) modified_tiles [chunk] )
344 		{
345 			chunk += 4;
346 		}
347 		else
348 		{
349 			do
350 			{
351 				int modified = modified_tiles [chunk];
352 				if ( modified )
353 				{
354 					modified_tiles [chunk] = 0;
355 
356 					int index = first_tile + chunk * 8;
357 					do
358 					{
359 						if ( modified & 1 )
360 							update_tile( index );
361 						index++;
362 					}
363 					while ( (modified >>= 1) != 0 );
364 				}
365 			}
366 			while ( ++chunk & 3 );
367 		}
368 	}
369 	while ( chunk < chr_tile_count / 8 );
370 }
371 
372 // Sprite max
373 
374 template<int height>
375 struct calc_sprite_max_scanlines
376 {
funccalc_sprite_max_scanlines377 	static unsigned long func( uint8_t const* sprites, uint8_t* scanlines, int begin )
378 	{
379 		unsigned long any_hits = 0;
380 		unsigned long const offset = 0x01010101 + zero;
381 		unsigned limit = 239 + height - begin;
382 		for ( int n = 64; n; --n )
383 		{
384 			int top = *sprites;
385 			sprites += 4;
386 			uint8_t* p = scanlines + top;
387 			if ( (unsigned) (239 - top) < limit )
388 			{
389 				unsigned long p0 = ((unaligned_uint32_t*)p) [0].val + offset;
390 				unsigned long p4 = ((unaligned_uint32_t*)p) [1].val + offset;
391 				((unaligned_uint32_t*)p) [0].val = p0;
392 				any_hits |= p0;
393 				((unaligned_uint32_t*)p) [1].val = p4;
394 				any_hits |= p4;
395 				if ( height > 8 )
396 				{
397 					unsigned long p0 = ((unaligned_uint32_t*)p) [2].val + offset;
398 					unsigned long p4 = ((unaligned_uint32_t*)p) [3].val + offset;
399 					((unaligned_uint32_t*)p) [2].val = p0;
400 					any_hits |= p0;
401 					((unaligned_uint32_t*)p) [3].val = p4;
402 					any_hits |= p4;
403 				}
404 			}
405 		}
406 
407 		return any_hits;
408 	}
409 };
410 
recalc_sprite_max(int scanline)411 long Nes_Ppu_Impl::recalc_sprite_max( int scanline )
412 {
413 	int const max_scanline_count = image_height;
414 
415 	uint8_t sprite_max_scanlines [256 + 16];
416 
417 	// recalculate sprites per scanline
418 	memset( sprite_max_scanlines + scanline, 0x78, last_sprite_max_scanline - scanline );
419 	unsigned long any_hits;
420 	if ( w2000 & 0x20 )
421 		any_hits = calc_sprite_max_scanlines<16>::func( spr_ram, sprite_max_scanlines, scanline );
422 	else
423 		any_hits = calc_sprite_max_scanlines<8 >::func( spr_ram, sprite_max_scanlines, scanline );
424 
425 	// cause search to terminate past max_scanline_count if none have 8 or more sprites
426 	(uint32_t&) sprite_max_scanlines [max_scanline_count] = 0;
427 	sprite_max_scanlines [max_scanline_count + 3] = 0x80;
428 
429 	// avoid scan if no possible hits
430 	if ( !(any_hits & 0x80808080) )
431 		return 0;
432 
433 	// find soonest scanline with 8 or more sprites
434 	while ( true )
435 	{
436 		unsigned long const mask = 0x80808080 + zero;
437 
438 		// check four at a time
439 		uint8_t* pos = &sprite_max_scanlines [scanline];
440 		unsigned long n = ((unaligned_uint32_t*)pos)->val;
441 		while ( 1 )
442 		{
443 			unsigned long x = n & mask;
444 			pos += 4;
445 			n = ((unaligned_uint32_t*)pos)->val;
446 			if ( x )
447 				break;
448 		}
449 
450 		int height = sprite_height();
451 		int remain = 8;
452 		int i = 0;
453 
454 		// find which of the four
455 		pos -= 3 + (pos [-4] >> 7 & 1);
456 		pos += 1 - (*pos >> 7 & 1);
457 		pos += 1 - (*pos >> 7 & 1);
458 
459 		scanline = pos - sprite_max_scanlines;
460 		if ( scanline >= max_scanline_count )
461 			break;
462 
463 		// find time that max sprites flag is set (or that it won't be set)
464 		do
465 		{
466 			int relative = scanline - spr_ram [i];
467 			i += 4;
468 			if ( (unsigned) relative < (unsigned) height && !--remain )
469 			{
470 				// now use screwey search for 9th sprite
471 				int offset = 0;
472 				while ( i < 0x100 )
473 				{
474 					int relative = scanline - spr_ram [i + offset];
475 					//dprintf( "Checking sprite %d [%d]\n", i / 4, offset );
476 					i += 4;
477 					offset = (offset + 1) & 3;
478 					if ( (unsigned) relative < (unsigned) height )
479 					{
480 						//dprintf( "sprite max on scanline %d\n", scanline );
481 						return scanline * scanline_len + (unsigned) i / 2;
482 					}
483 				}
484 				break;
485 			}
486 		}
487 		while ( i < 0x100 );
488 		scanline++;
489 	}
490 
491 	return 0;
492 }
493