1
2 // Nes_Emu 0.7.0. http://www.slack.net/~ant/
3
4 #include "Nes_Ppu_Impl.h"
5
6 #include <string.h>
7 #include "blargg_endian.h"
8 #include "Nes_State.h"
9 #include <stdint.h>
10
11 /* Copyright (C) 2004-2006 Shay Green. This module is free software; you
12 can redistribute it and/or modify it under the terms of the GNU Lesser
13 General Public License as published by the Free Software Foundation; either
14 version 2.1 of the License, or (at your option) any later version. This
15 module is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
18 more details. You should have received a copy of the GNU Lesser General
19 Public License along with this module; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
21
22 #include "blargg_source.h"
23
24 int const cache_line_size = 128; // tile cache is kept aligned to this boundary
25
Nes_Ppu_Impl()26 Nes_Ppu_Impl::Nes_Ppu_Impl()
27 {
28 impl = NULL;
29 chr_data = NULL;
30 chr_size = 0;
31 tile_cache = NULL;
32 host_palette = NULL;
33 max_palette_size = 0;
34 tile_cache_mem = NULL;
35 ppu_state_t::unused = 0;
36
37 mmc24_enabled = false;
38 mmc24_latched[0] = 0;
39 mmc24_latched[1] = 0;
40
41 #if !defined(NDEBUG) && !defined(PSP) && !defined(PS2)
42 // verify that unaligned accesses work
43 static unsigned char b [19] = { 0 };
44 static unsigned char b2 [19] = { 1,2,3,4,0,5,6,7,8,0,9,0,1,2,0,3,4,5,6 };
45 for ( int i = 0; i < 19; i += 5 )
46 *(volatile uint32_t*) &b [i] = *(volatile uint32_t*) &b2 [i];
47 #endif
48 }
49
~Nes_Ppu_Impl()50 Nes_Ppu_Impl::~Nes_Ppu_Impl()
51 {
52 close_chr();
53 delete impl;
54 }
55
all_tiles_modified()56 void Nes_Ppu_Impl::all_tiles_modified()
57 {
58 any_tiles_modified = true;
59 memset( modified_tiles, ~0, sizeof modified_tiles );
60 }
61
open_chr(uint8_t const * new_chr,long chr_data_size)62 const char *Nes_Ppu_Impl::open_chr( uint8_t const* new_chr, long chr_data_size )
63 {
64 close_chr();
65
66 if ( !impl )
67 {
68 impl = BLARGG_NEW impl_t;
69 CHECK_ALLOC( impl );
70 chr_ram = impl->chr_ram;
71 }
72
73 chr_data = new_chr;
74 chr_size = chr_data_size;
75 chr_is_writable = false;
76
77 if ( chr_data_size == 0 )
78 {
79 // CHR RAM
80 chr_data = impl->chr_ram;
81 chr_size = sizeof impl->chr_ram;
82 chr_is_writable = true;
83 }
84
85 // allocate aligned memory for cache
86 long tile_count = chr_size / bytes_per_tile;
87 tile_cache_mem = BLARGG_NEW uint8_t [tile_count * sizeof (cached_tile_t) * 2 + cache_line_size];
88 CHECK_ALLOC( tile_cache_mem );
89 tile_cache = (cached_tile_t*) (tile_cache_mem + cache_line_size -
90 (uintptr_t) tile_cache_mem % cache_line_size);
91 flipped_tiles = tile_cache + tile_count;
92
93 // rebuild cache
94 all_tiles_modified();
95 if ( !chr_is_writable )
96 {
97 any_tiles_modified = false;
98 rebuild_chr( 0, chr_size );
99 }
100
101 return 0;
102 }
103
close_chr()104 void Nes_Ppu_Impl::close_chr()
105 {
106 delete [] tile_cache_mem;
107 tile_cache_mem = NULL;
108 }
109
set_chr_bank(int addr,int size,long data)110 void Nes_Ppu_Impl::set_chr_bank( int addr, int size, long data )
111 {
112 if ( data + size > chr_size )
113 data %= chr_size;
114
115 int count = (unsigned) size / chr_page_size;
116
117 int page = (unsigned) addr / chr_page_size;
118 while ( count-- )
119 {
120 chr_pages [page] = data - page * chr_page_size;
121 page++;
122 data += chr_page_size;
123 }
124 }
125
set_chr_bank_ex(int addr,int size,long data)126 void Nes_Ppu_Impl::set_chr_bank_ex( int addr, int size, long data )
127 {
128 mmc24_enabled = true;
129
130 //check( !chr_is_writable || addr == data ); // to do: is CHR RAM ever bank-switched?
131 //dprintf( "Tried to set CHR RAM bank at %04X to CHR+%04X\n", addr, data );
132
133 if ( data + size > chr_size )
134 data %= chr_size;
135
136 int count = (unsigned) size / chr_page_size;
137 //assert( chr_page_size * count == size );
138 //assert( addr + size <= chr_addr_size );
139
140 int page = (unsigned) addr / chr_page_size;
141 while ( count-- )
142 {
143 chr_pages_ex [page] = data - page * chr_page_size;
144 page++;
145 data += chr_page_size;
146 }
147 }
148
save_state(Nes_State_ * out) const149 void Nes_Ppu_Impl::save_state( Nes_State_* out ) const
150 {
151 *out->ppu = *this;
152 out->ppu_valid = true;
153
154 memcpy( out->spr_ram, spr_ram, out->spr_ram_size );
155 out->spr_ram_valid = true;
156
157 out->nametable_size = 0x800;
158 memcpy( out->nametable, impl->nt_ram, 0x800 );
159 if ( nt_banks [3] >= &impl->nt_ram [0xC00] )
160 {
161 // save extra nametable data in chr
162 out->nametable_size = 0x1000;
163 memcpy( out->chr, &impl->nt_ram [0x800], 0x800 );
164 }
165
166 out->chr_size = 0;
167 if ( chr_is_writable )
168 {
169 out->chr_size = chr_size;
170 memcpy( out->chr, impl->chr_ram, out->chr_size );
171 }
172 }
173
load_state(Nes_State_ const & in)174 void Nes_Ppu_Impl::load_state( Nes_State_ const& in )
175 {
176 set_nt_banks( 0, 0, 0, 0 );
177 set_chr_bank( 0, 0x2000, 0 );
178
179 if ( in.ppu_valid )
180 STATIC_CAST(ppu_state_t&,*this) = *in.ppu;
181
182 if ( in.spr_ram_valid )
183 memcpy( spr_ram, in.spr_ram, sizeof spr_ram );
184
185 if ( in.nametable_size >= 0x800 )
186 {
187 if ( in.nametable_size > 0x800 )
188 memcpy( &impl->nt_ram [0x800], in.chr, 0x800 );
189 memcpy( impl->nt_ram, in.nametable, 0x800 );
190 }
191
192 if ( chr_is_writable && in.chr_size )
193 {
194 memcpy( impl->chr_ram, in.chr, in.chr_size );
195 all_tiles_modified();
196 }
197 }
198
199 static uint8_t const initial_palette [0x20] =
200 {
201 0x0f,0x01,0x00,0x01,0x00,0x02,0x02,0x0D,0x08,0x10,0x08,0x24,0x00,0x00,0x04,0x2C,
202 0x00,0x01,0x34,0x03,0x00,0x04,0x00,0x14,0x00,0x3A,0x00,0x02,0x00,0x20,0x2C,0x08
203 };
204
reset(bool full_reset)205 void Nes_Ppu_Impl::reset( bool full_reset )
206 {
207 w2000 = 0;
208 w2001 = 0;
209 r2002 = 0x80;
210 r2007 = 0;
211 open_bus = 0;
212 decay_low = 0;
213 decay_high = 0;
214 second_write = false;
215 vram_temp = 0;
216 pixel_x = 0;
217
218 if ( full_reset )
219 {
220 vram_addr = 0;
221 w2003 = 0;
222 memset( impl->chr_ram, 0xff, sizeof impl->chr_ram );
223 memset( impl->nt_ram, 0xff, sizeof impl->nt_ram );
224 memcpy( palette, initial_palette, sizeof palette );
225 }
226
227 set_nt_banks( 0, 0, 0, 0 );
228 set_chr_bank( 0, chr_addr_size, 0 );
229 memset( spr_ram, 0xff, sizeof spr_ram );
230 all_tiles_modified();
231 if ( max_palette_size > 0 )
232 memset( host_palette, 0, max_palette_size * sizeof *host_palette );
233 }
234
capture_palette()235 void Nes_Ppu_Impl::capture_palette()
236 {
237 if ( palette_size + palette_increment <= max_palette_size )
238 {
239 palette_offset = (palette_begin + palette_size) * 0x01010101;
240
241 short* out = host_palette + palette_size;
242 palette_size += palette_increment;
243
244 int i;
245
246 int emph = w2001 << 1 & 0x1C0;
247 int mono = (w2001 & 1 ? 0x30 : 0x3F);
248
249 for ( i = 0; i < 32; i++ )
250 out [i] = (palette [i] & mono) | emph;
251
252 int bg = out [0];
253 for ( i = 4; i < 32; i += 4 )
254 out [i] = bg;
255
256 memcpy( out + 32, out, 32 * sizeof *out );
257 }
258 }
259
run_hblank(int count)260 void Nes_Ppu_Impl::run_hblank( int count )
261 {
262 long addr = (vram_addr & 0x7be0) + (vram_temp & 0x41f) + (count * 0x1000);
263 if ( w2001 & 0x08 )
264 {
265 while ( addr >= 0x8000 )
266 {
267 int y = (addr + 0x20) & 0x3e0;
268 addr = (addr - 0x8000) & ~0x3e0;
269 if ( y == 30 * 0x20 )
270 y = 0x800;
271 addr ^= y;
272 }
273 vram_addr = addr;
274 }
275 }
276
277 #ifdef __MWERKS__
278 #pragma ppc_unroll_factor_limit 1 // messes up calc_sprite_max_scanlines loop
279 static int zero = 0;
280 #else
281 const int zero = 0;
282 #endif
283
284 // Tile cache
285
reorder(unsigned long n)286 inline unsigned long reorder( unsigned long n )
287 {
288 n |= n << 7;
289 return ((n << 14) | n);
290 }
291
update_tile(int index)292 inline void Nes_Ppu_Impl::update_tile( int index )
293 {
294 const uint8_t* in = chr_data + (index) * bytes_per_tile;
295 uint8_t* out = (uint8_t*) tile_cache [index];
296 uint8_t* flipped_out = (uint8_t*) flipped_tiles [index];
297
298 unsigned long bit_mask = 0x11111111 + zero;
299
300 for ( int n = 4; n--; )
301 {
302 // Reorder two lines of two-bit pixels. No bits are wasted, so
303 // reordered version is also four bytes.
304 //
305 // 12345678 to A0E4B1F5C2G6D3H7
306 // ABCDEFGH
307 unsigned long c =
308 ((reorder( in [0] ) & bit_mask) << 0) |
309 ((reorder( in [8] ) & bit_mask) << 1) |
310 ((reorder( in [1] ) & bit_mask) << 2) |
311 ((reorder( in [9] ) & bit_mask) << 3);
312 in += 2;
313
314 SET_BE32( out, c );
315 out += 4;
316
317 // make horizontally-flipped version
318 c = ((c >> 28) & 0x000f) |
319 ((c >> 20) & 0x00f0) |
320 ((c >> 12) & 0x0f00) |
321 ((c >> 4) & 0xf000) |
322 ((c & 0xf000) << 4) |
323 ((c & 0x0f00) << 12) |
324 ((c & 0x00f0) << 20) |
325 ((c & 0x000f) << 28);
326 SET_BE32( flipped_out, c );
327 flipped_out += 4;
328 }
329 }
330
rebuild_chr(unsigned long begin,unsigned long end)331 void Nes_Ppu_Impl::rebuild_chr( unsigned long begin, unsigned long end )
332 {
333 unsigned end_index = (end + bytes_per_tile - 1) / bytes_per_tile;
334 for ( unsigned index = begin / bytes_per_tile; index < end_index; index++ )
335 update_tile( index );
336 }
337
update_tiles(int first_tile)338 void Nes_Ppu_Impl::update_tiles( int first_tile )
339 {
340 int chunk = 0;
341 do
342 {
343 if ( !(uint32_t&) modified_tiles [chunk] )
344 {
345 chunk += 4;
346 }
347 else
348 {
349 do
350 {
351 int modified = modified_tiles [chunk];
352 if ( modified )
353 {
354 modified_tiles [chunk] = 0;
355
356 int index = first_tile + chunk * 8;
357 do
358 {
359 if ( modified & 1 )
360 update_tile( index );
361 index++;
362 }
363 while ( (modified >>= 1) != 0 );
364 }
365 }
366 while ( ++chunk & 3 );
367 }
368 }
369 while ( chunk < chr_tile_count / 8 );
370 }
371
372 // Sprite max
373
374 template<int height>
375 struct calc_sprite_max_scanlines
376 {
funccalc_sprite_max_scanlines377 static unsigned long func( uint8_t const* sprites, uint8_t* scanlines, int begin )
378 {
379 unsigned long any_hits = 0;
380 unsigned long const offset = 0x01010101 + zero;
381 unsigned limit = 239 + height - begin;
382 for ( int n = 64; n; --n )
383 {
384 int top = *sprites;
385 sprites += 4;
386 uint8_t* p = scanlines + top;
387 if ( (unsigned) (239 - top) < limit )
388 {
389 unsigned long p0 = ((unaligned_uint32_t*)p) [0].val + offset;
390 unsigned long p4 = ((unaligned_uint32_t*)p) [1].val + offset;
391 ((unaligned_uint32_t*)p) [0].val = p0;
392 any_hits |= p0;
393 ((unaligned_uint32_t*)p) [1].val = p4;
394 any_hits |= p4;
395 if ( height > 8 )
396 {
397 unsigned long p0 = ((unaligned_uint32_t*)p) [2].val + offset;
398 unsigned long p4 = ((unaligned_uint32_t*)p) [3].val + offset;
399 ((unaligned_uint32_t*)p) [2].val = p0;
400 any_hits |= p0;
401 ((unaligned_uint32_t*)p) [3].val = p4;
402 any_hits |= p4;
403 }
404 }
405 }
406
407 return any_hits;
408 }
409 };
410
recalc_sprite_max(int scanline)411 long Nes_Ppu_Impl::recalc_sprite_max( int scanline )
412 {
413 int const max_scanline_count = image_height;
414
415 uint8_t sprite_max_scanlines [256 + 16];
416
417 // recalculate sprites per scanline
418 memset( sprite_max_scanlines + scanline, 0x78, last_sprite_max_scanline - scanline );
419 unsigned long any_hits;
420 if ( w2000 & 0x20 )
421 any_hits = calc_sprite_max_scanlines<16>::func( spr_ram, sprite_max_scanlines, scanline );
422 else
423 any_hits = calc_sprite_max_scanlines<8 >::func( spr_ram, sprite_max_scanlines, scanline );
424
425 // cause search to terminate past max_scanline_count if none have 8 or more sprites
426 (uint32_t&) sprite_max_scanlines [max_scanline_count] = 0;
427 sprite_max_scanlines [max_scanline_count + 3] = 0x80;
428
429 // avoid scan if no possible hits
430 if ( !(any_hits & 0x80808080) )
431 return 0;
432
433 // find soonest scanline with 8 or more sprites
434 while ( true )
435 {
436 unsigned long const mask = 0x80808080 + zero;
437
438 // check four at a time
439 uint8_t* pos = &sprite_max_scanlines [scanline];
440 unsigned long n = ((unaligned_uint32_t*)pos)->val;
441 while ( 1 )
442 {
443 unsigned long x = n & mask;
444 pos += 4;
445 n = ((unaligned_uint32_t*)pos)->val;
446 if ( x )
447 break;
448 }
449
450 int height = sprite_height();
451 int remain = 8;
452 int i = 0;
453
454 // find which of the four
455 pos -= 3 + (pos [-4] >> 7 & 1);
456 pos += 1 - (*pos >> 7 & 1);
457 pos += 1 - (*pos >> 7 & 1);
458
459 scanline = pos - sprite_max_scanlines;
460 if ( scanline >= max_scanline_count )
461 break;
462
463 // find time that max sprites flag is set (or that it won't be set)
464 do
465 {
466 int relative = scanline - spr_ram [i];
467 i += 4;
468 if ( (unsigned) relative < (unsigned) height && !--remain )
469 {
470 // now use screwey search for 9th sprite
471 int offset = 0;
472 while ( i < 0x100 )
473 {
474 int relative = scanline - spr_ram [i + offset];
475 //dprintf( "Checking sprite %d [%d]\n", i / 4, offset );
476 i += 4;
477 offset = (offset + 1) & 3;
478 if ( (unsigned) relative < (unsigned) height )
479 {
480 //dprintf( "sprite max on scanline %d\n", scanline );
481 return scanline * scanline_len + (unsigned) i / 2;
482 }
483 }
484 break;
485 }
486 }
487 while ( i < 0x100 );
488 scanline++;
489 }
490
491 return 0;
492 }
493