1 // license:BSD-3-Clause
2 // copyright-holders:Ryan Holtz
3 /******************************************************************************
4 
5 
6     SGI/Nintendo Reality Display Processor
7     -------------------
8 
9     by Ryan Holtz
10     based on initial C code by Ville Linde
11     contains additional improvements from angrylion, Ziggy, Gonetz and Orkin
12 
13 
14 *******************************************************************************
15 
16 STATUS:
17 
18 Much behavior needs verification against real hardware.  Many edge cases must
19 be verified on real hardware as well.
20 
21 TODO:
22 
23 - Further re-work class structure to avoid dependencies
24 
25 *******************************************************************************/
26 
27 #include "emu.h"
28 #include "video/n64.h"
29 #include "video/rdpblend.h"
30 #include "video/rdptpipe.h"
31 
32 #include <algorithm>
33 
34 #define LOG_RDP_EXECUTION       0
35 
36 static FILE* rdp_exec;
37 
38 uint32_t n64_rdp::s_special_9bit_clamptable[512];
39 
rdp_range_check(uint32_t addr)40 bool n64_rdp::rdp_range_check(uint32_t addr)
41 {
42 	if(m_misc_state.m_fb_size == 0) return false;
43 
44 	int32_t fbcount = ((m_misc_state.m_fb_width * m_scissor.m_yl) << (m_misc_state.m_fb_size - 1)) * 3;
45 	int32_t fbaddr = m_misc_state.m_fb_address & 0x007fffff;
46 	if ((addr >= fbaddr) && (addr < (fbaddr + fbcount)))
47 	{
48 		return false;
49 	}
50 
51 	int32_t zbcount = m_misc_state.m_fb_width * m_scissor.m_yl * 2;
52 	int32_t zbaddr = m_misc_state.m_zb_address & 0x007fffff;
53 	if ((addr >= zbaddr) && (addr < (zbaddr + zbcount)))
54 	{
55 		return false;
56 	}
57 
58 	printf("Check failed: %08x vs. %08x-%08x, %08x-%08x (%d, %d)\n", addr, fbaddr, fbaddr + fbcount, zbaddr, zbaddr + zbcount, m_misc_state.m_fb_width, m_scissor.m_yl);
59 	fflush(stdout);
60 	return true;
61 }
62 
63 /*****************************************************************************/
64 
65 // The functions in this file should be moved into the parent Processor class.
66 #include "rdpfiltr.hxx"
67 
get_alpha_cvg(int32_t comb_alpha,rdp_span_aux * userdata,const rdp_poly_state & object)68 int32_t n64_rdp::get_alpha_cvg(int32_t comb_alpha, rdp_span_aux* userdata, const rdp_poly_state &object)
69 {
70 	int32_t temp = comb_alpha;
71 	int32_t temp2 = userdata->m_current_pix_cvg;
72 	int32_t temp3 = 0;
73 
74 	if (object.m_other_modes.cvg_times_alpha)
75 	{
76 		temp3 = (temp * temp2) + 4;
77 		userdata->m_current_pix_cvg = (temp3 >> 8) & 0xf;
78 	}
79 	if (object.m_other_modes.alpha_cvg_select)
80 	{
81 		temp = (m_other_modes.cvg_times_alpha) ? (temp3 >> 3) : (temp2 << 5);
82 	}
83 	if (temp > 0xff)
84 	{
85 		temp = 0xff;
86 	}
87 	return temp;
88 }
89 
90 /*****************************************************************************/
91 
video_start()92 void n64_state::video_start()
93 {
94 	m_rdp = auto_alloc(machine(), n64_rdp(*this, m_rdram, m_rsp_dmem));
95 
96 	m_rdp->set_machine(machine());
97 	m_rdp->init_internal_state();
98 	m_rdp->set_n64_periphs(m_rcp_periphs);
99 
100 	m_rdp->m_blender.set_machine(machine());
101 	m_rdp->m_blender.set_processor(m_rdp);
102 
103 	m_rdp->m_tex_pipe.set_machine(machine());
104 
105 	m_rdp->m_aux_buf = make_unique_clear<uint8_t[]>(EXTENT_AUX_COUNT);
106 
107 	if (LOG_RDP_EXECUTION)
108 	{
109 		rdp_exec = fopen("rdp_execute.txt", "wt");
110 	}
111 }
112 
screen_update_n64(screen_device & screen,bitmap_rgb32 & bitmap,const rectangle & cliprect)113 uint32_t n64_state::screen_update_n64(screen_device &screen, bitmap_rgb32 &bitmap, const rectangle &cliprect)
114 {
115 	//uint16_t* frame_buffer = (uint16_t*)&rdram[(m_rcp_periphs->vi_origin & 0xffffff) >> 2];
116 	//uint8_t* cvg_buffer = &m_rdp.m_hidden_bits[((m_rcp_periphs->vi_origin & 0xffffff) >> 2) >> 1];
117 	//int32_t vibuffering = ((m_rcp_periphs->vi_control & 2) && fsaa && divot);
118 
119 	//vibuffering = 0; // Disabled for now
120 
121 	/*
122 	if (vibuffering && ((m_rcp_periphs->vi_control & 3) == 2))
123 	{
124 	    if (frame_buffer)
125 	    {
126 	        for (j=0; j < vres; j++)
127 	        {
128 	            for (i=0; i < hres; i++)
129 	            {
130 	                uint16_t pix;
131 	                pix = frame_buffer[pixels ^ WORD_ADDR_XOR];
132 	                curpixel_cvg = ((pix & 1) << 2) | (cvg_buffer[pixels ^ BYTE_ADDR_XOR] & 3); // Reuse of this variable
133 	                if (curpixel_cvg < 7 && i > 1 && j > 1 && i < (hres - 2) && j < (vres - 2) && fsaa)
134 	                {
135 	                    newc = video_filter16(&frame_buffer[pixels ^ WORD_ADDR_XOR], &cvg_buffer[pixels ^ BYTE_ADDR_XOR], m_rcp_periphs->vi_width);
136 	                    ViBuffer[i][j] = newc;
137 	                }
138 	                else
139 	                {
140 	                    newc.i.r = ((pix >> 8) & 0xf8) | (pix >> 13);
141 	                    newc.i.g = ((pix >> 3) & 0xf8) | ((pix >>  8) & 0x07);
142 	                    newc.i.b = ((pix << 2) & 0xf8) | ((pix >>  3) & 0x07);
143 	                    ViBuffer[i][j] = newc;
144 	                }
145 	                pixels++;
146 	            }
147 	            pixels += invisiblewidth;
148 	        }
149 	    }
150 	}
151 	*/
152 
153 	m_rdp->mark_frame();
154 
155 	if (m_rcp_periphs->vi_blank)
156 	{
157 		bitmap.fill(0, screen.visible_area());
158 		return 0;
159 	}
160 
161 	m_rcp_periphs->video_update(bitmap);
162 
163 	return 0;
164 }
165 
WRITE_LINE_MEMBER(n64_state::screen_vblank_n64)166 WRITE_LINE_MEMBER(n64_state::screen_vblank_n64)
167 {
168 }
169 
video_update(bitmap_rgb32 & bitmap)170 void n64_periphs::video_update(bitmap_rgb32 &bitmap)
171 {
172 
173 	if(vi_control & 0x40) /* Interlace */
174 	{
175 		field ^= 1;
176 	}
177 	else
178 	{
179 		field = 0;
180 	}
181 
182 	switch(vi_control & 0x3)
183 	{
184 		case PIXEL_SIZE_16BIT:
185 			video_update16(bitmap);
186 			break;
187 
188 		case PIXEL_SIZE_32BIT:
189 			video_update32(bitmap);
190 			break;
191 
192 		default:
193 			//fatalerror("Unsupported framebuffer depth: m_fb_size=%d\n", m_misc_state.m_fb_size);
194 			break;
195 	}
196 }
197 
video_update16(bitmap_rgb32 & bitmap)198 void n64_periphs::video_update16(bitmap_rgb32 &bitmap)
199 {
200 	//int32_t fsaa = (((n64->vi_control >> 8) & 3) < 2);
201 	//int32_t divot = (n64->vi_control >> 4) & 1;
202 
203 	//uint32_t prev_cvg = 0;
204 	//uint32_t next_cvg = 0;
205 	//int32_t dither_filter = (n64->vi_control >> 16) & 1;
206 	//int32_t vibuffering = ((n64->vi_control & 2) && fsaa && divot);
207 
208 	uint16_t* frame_buffer = (uint16_t*)&m_rdram[(vi_origin & 0xffffff) >> 2];
209 	//uint32_t hb = ((n64->vi_origin & 0xffffff) >> 2) >> 1;
210 	//uint8_t* hidden_buffer = &m_hidden_bits[hb];
211 
212 	int32_t hdiff = (vi_hstart & 0x3ff) - ((vi_hstart >> 16) & 0x3ff);
213 	float hcoeff = ((float)(vi_xscale & 0xfff) / (1 << 10));
214 	uint32_t hres = ((float)hdiff * hcoeff);
215 	int32_t invisiblewidth = vi_width - hres;
216 
217 	int32_t vdiff = ((vi_vstart & 0x3ff) - ((vi_vstart >> 16) & 0x3ff)) >> 1;
218 	float vcoeff = ((float)(vi_yscale & 0xfff) / (1 << 10));
219 	uint32_t vres = ((float)vdiff * vcoeff);
220 
221 	if (vdiff <= 0 || hdiff <= 0)
222 	{
223 		return;
224 	}
225 
226 	//if (hres > 640) // Needed by Top Gear Overdrive (E)
227 	//{
228 	//  invisiblewidth += (hres - 640);
229 	//  hres = 640;
230 	//}
231 
232 	if (vres > bitmap.height()) // makes Perfect Dark boot w/o crashing
233 	{
234 		vres = bitmap.height();
235 	}
236 
237 	uint32_t pixels = 0;
238 
239 	if (frame_buffer)
240 	{
241 		for(int32_t j = 0; j < vres; j++)
242 		{
243 			uint32_t *const d = &bitmap.pix(j);
244 
245 			for(int32_t i = 0; i < hres; i++)
246 			{
247 				uint16_t pix = frame_buffer[pixels ^ WORD_ADDR_XOR];
248 
249 				const uint8_t r = ((pix >> 8) & 0xf8) | (pix >> 13);
250 				const uint8_t g = ((pix >> 3) & 0xf8) | ((pix >>  8) & 0x07);
251 				const uint8_t b = ((pix << 2) & 0xf8) | ((pix >>  3) & 0x07);
252 				d[i] = (r << 16) | (g << 8) | b;
253 				pixels++;
254 			}
255 			pixels += invisiblewidth;
256 		}
257 	}
258 }
259 
video_update32(bitmap_rgb32 & bitmap)260 void n64_periphs::video_update32(bitmap_rgb32 &bitmap)
261 {
262 	int32_t gamma = (vi_control >> 3) & 1;
263 	int32_t gamma_dither = (vi_control >> 2) & 1;
264 	//int32_t vibuffering = ((n64->vi_control & 2) && fsaa && divot);
265 
266 	uint32_t* frame_buffer32 = (uint32_t*)&m_rdram[(vi_origin & 0xffffff) >> 2];
267 
268 	const int32_t hdiff = (vi_hstart & 0x3ff) - ((vi_hstart >> 16) & 0x3ff);
269 	const float hcoeff = ((float)(vi_xscale & 0xfff) / (1 << 10));
270 	uint32_t hres = ((float)hdiff * hcoeff);
271 	int32_t invisiblewidth = vi_width - hres;
272 
273 	const int32_t vdiff = ((vi_vstart & 0x3ff) - ((vi_vstart >> 16) & 0x3ff)) >> 1;
274 	const float vcoeff = ((float)(vi_yscale & 0xfff) / (1 << 10));
275 	const uint32_t vres = ((float)vdiff * vcoeff);
276 
277 	if (vdiff <= 0 || hdiff <= 0)
278 	{
279 		return;
280 	}
281 
282 	//if (hres > 640) // Needed by Top Gear Overdrive (E)
283 	//{
284 	//  invisiblewidth += (hres - 640);
285 	//  hres = 640;
286 	//}
287 
288 	if (frame_buffer32)
289 	{
290 		for (int32_t j = 0; j < vres; j++)
291 		{
292 			uint32_t *const d = &bitmap.pix(j);
293 			for (int32_t i = 0; i < hres; i++)
294 			{
295 				uint32_t pix = *frame_buffer32++;
296 				if (gamma || gamma_dither)
297 				{
298 					int32_t r = (pix >> 24) & 0xff;
299 					int32_t g = (pix >> 16) & 0xff;
300 					int32_t b = (pix >> 8) & 0xff;
301 					int32_t dith = 0;
302 					if (gamma_dither)
303 					{
304 						dith = get_random() & 0x3f;
305 					}
306 					if (gamma)
307 					{
308 						if (gamma_dither)
309 						{
310 							r = m_gamma_dither_table[(r << 6)| dith];
311 							g = m_gamma_dither_table[(g << 6)| dith];
312 							b = m_gamma_dither_table[(b << 6)| dith];
313 						}
314 						else
315 						{
316 							r = m_gamma_table[r];
317 							g = m_gamma_table[g];
318 							b = m_gamma_table[b];
319 						}
320 					}
321 					else if (gamma_dither)
322 					{
323 						if (r < 255)
324 							r += (dith & 1);
325 						if (g < 255)
326 							g += (dith & 1);
327 						if (b < 255)
328 							b += (dith & 1);
329 					}
330 					pix = (r << 24) | (g << 16) | (b << 8);
331 				}
332 
333 				d[i] = (pix >> 8);
334 			}
335 			frame_buffer32 += invisiblewidth;
336 		}
337 	}
338 }
339 
340 /*****************************************************************************/
341 
tc_div_no_perspective(int32_t ss,int32_t st,int32_t sw,int32_t * sss,int32_t * sst)342 void n64_rdp::tc_div_no_perspective(int32_t ss, int32_t st, int32_t sw, int32_t* sss, int32_t* sst)
343 {
344 	*sss = (SIGN16(ss)) & 0x1ffff;
345 	*sst = (SIGN16(st)) & 0x1ffff;
346 }
347 
tc_div(int32_t ss,int32_t st,int32_t sw,int32_t * sss,int32_t * sst)348 void n64_rdp::tc_div(int32_t ss, int32_t st, int32_t sw, int32_t* sss, int32_t* sst)
349 {
350 	int32_t w_carry = 0;
351 	if ((sw & 0x8000) || !(sw & 0x7fff))
352 	{
353 		w_carry = 1;
354 	}
355 
356 	sw &= 0x7fff;
357 
358 	int32_t shift;
359 	for (shift = 1; shift <= 14 && !((sw << shift) & 0x8000); shift++);
360 	shift -= 1;
361 
362 	int32_t normout = (sw << shift) & 0x3fff;
363 	int32_t wnorm = (normout & 0xff) << 2;
364 	normout >>= 8;
365 
366 	int32_t temppoint = m_norm_point_rom[normout];
367 	int32_t tempslope = m_norm_slope_rom[normout];
368 
369 	int32_t tlu_rcp = ((-(tempslope * wnorm)) >> 10) + temppoint;
370 
371 	int32_t sprod = SIGN16(ss) * tlu_rcp;
372 	int32_t tprod = SIGN16(st) * tlu_rcp;
373 	int32_t tempmask = ((1 << (shift + 1)) - 1) << (29 - shift);
374 	int32_t shift_value = 13 - shift;
375 
376 	int32_t outofbounds_s = sprod & tempmask;
377 	int32_t outofbounds_t = tprod & tempmask;
378 	if (shift == 0xe)
379 	{
380 		*sss = sprod << 1;
381 		*sst = tprod << 1;
382 	}
383 	else
384 	{
385 		*sss = sprod = (sprod >> shift_value);
386 		*sst = tprod = (tprod >> shift_value);
387 	}
388 	//compute clamp flags
389 	int32_t under_s = 0;
390 	int32_t under_t = 0;
391 	int32_t over_s = 0;
392 	int32_t over_t = 0;
393 
394 	if (outofbounds_s != tempmask && outofbounds_s != 0)
395 	{
396 		if (sprod & (1 << 29))
397 		{
398 			under_s = 1;
399 		}
400 		else
401 		{
402 			over_s = 1;
403 		}
404 	}
405 
406 	if (outofbounds_t != tempmask && outofbounds_t != 0)
407 	{
408 		if (tprod & (1 << 29))
409 		{
410 			under_t = 1;
411 		}
412 		else
413 		{
414 			over_t = 1;
415 		}
416 	}
417 
418 	over_s |= w_carry;
419 	over_t |= w_carry;
420 
421 	*sss = (*sss & 0x1ffff) | (over_s << 18) | (under_s << 17);
422 	*sst = (*sst & 0x1ffff) | (over_t << 18) | (under_t << 17);
423 }
424 
color_combiner_equation(int32_t a,int32_t b,int32_t c,int32_t d)425 int32_t n64_rdp::color_combiner_equation(int32_t a, int32_t b, int32_t c, int32_t d)
426 {
427 	a = KURT_AKELEY_SIGN9(a);
428 	b = KURT_AKELEY_SIGN9(b);
429 	c = SIGN9(c);
430 	d = KURT_AKELEY_SIGN9(d);
431 	a = (((a - b) * c) + (d << 8) + 0x80);
432 	a = SIGN17(a) >> 8;
433 	a = s_special_9bit_clamptable[a & 0x1ff];
434 	return a;
435 }
436 
alpha_combiner_equation(int32_t a,int32_t b,int32_t c,int32_t d)437 int32_t n64_rdp::alpha_combiner_equation(int32_t a, int32_t b, int32_t c, int32_t d)
438 {
439 	a = KURT_AKELEY_SIGN9(a);
440 	b = KURT_AKELEY_SIGN9(b);
441 	c = SIGN9(c);
442 	d = KURT_AKELEY_SIGN9(d);
443 	a = (((a - b) * c) + (d << 8) + 0x80) >> 8;
444 	a = SIGN9(a);
445 	a = s_special_9bit_clamptable[a & 0x1ff];
446 	return a;
447 }
448 
set_suba_input_rgb(color_t ** input,int32_t code,rdp_span_aux * userdata)449 void n64_rdp::set_suba_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata)
450 {
451 	switch (code & 0xf)
452 	{
453 		case 0:     *input = &userdata->m_combined_color; break;
454 		case 1:     *input = &userdata->m_texel0_color; break;
455 		case 2:     *input = &userdata->m_texel1_color; break;
456 		case 3:     *input = &userdata->m_prim_color; break;
457 		case 4:     *input = &userdata->m_shade_color; break;
458 		case 5:     *input = &userdata->m_env_color; break;
459 		case 6:     *input = &m_one; break;
460 		case 7:     *input = &userdata->m_noise_color; break;
461 		case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
462 		{
463 					*input = &m_zero; break;
464 		}
465 	}
466 }
467 
set_subb_input_rgb(color_t ** input,int32_t code,rdp_span_aux * userdata)468 void n64_rdp::set_subb_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata)
469 {
470 	switch (code & 0xf)
471 	{
472 		case 0:     *input = &userdata->m_combined_color; break;
473 		case 1:     *input = &userdata->m_texel0_color; break;
474 		case 2:     *input = &userdata->m_texel1_color; break;
475 		case 3:     *input = &userdata->m_prim_color; break;
476 		case 4:     *input = &userdata->m_shade_color; break;
477 		case 5:     *input = &userdata->m_env_color; break;
478 		case 6:     fatalerror("SET_SUBB_RGB_INPUT: key_center\n");
479 		case 7:     *input = &userdata->m_k4; break;
480 		case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
481 		{
482 					*input = &m_zero; break;
483 		}
484 	}
485 }
486 
set_mul_input_rgb(color_t ** input,int32_t code,rdp_span_aux * userdata)487 void n64_rdp::set_mul_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata)
488 {
489 	switch (code & 0x1f)
490 	{
491 		case 0:     *input = &userdata->m_combined_color; break;
492 		case 1:     *input = &userdata->m_texel0_color; break;
493 		case 2:     *input = &userdata->m_texel1_color; break;
494 		case 3:     *input = &userdata->m_prim_color; break;
495 		case 4:     *input = &userdata->m_shade_color; break;
496 		case 5:     *input = &userdata->m_env_color; break;
497 		case 6:     *input = &userdata->m_key_scale; break;
498 		case 7:     *input = &userdata->m_combined_alpha; break;
499 		case 8:     *input = &userdata->m_texel0_alpha; break;
500 		case 9:     *input = &userdata->m_texel1_alpha; break;
501 		case 10:    *input = &userdata->m_prim_alpha; break;
502 		case 11:    *input = &userdata->m_shade_alpha; break;
503 		case 12:    *input = &userdata->m_env_alpha; break;
504 		case 13:    *input = &userdata->m_lod_fraction; break;
505 		case 14:    *input = &userdata->m_prim_lod_fraction; break;
506 		case 15:    *input = &userdata->m_k5; break;
507 		case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
508 		case 24: case 25: case 26: case 27: case 28: case 29: case 30: case 31:
509 		{
510 					*input = &m_zero; break;
511 		}
512 	}
513 }
514 
set_add_input_rgb(color_t ** input,int32_t code,rdp_span_aux * userdata)515 void n64_rdp::set_add_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata)
516 {
517 	switch (code & 0x7)
518 	{
519 		case 0:     *input = &userdata->m_combined_color; break;
520 		case 1:     *input = &userdata->m_texel0_color; break;
521 		case 2:     *input = &userdata->m_texel1_color; break;
522 		case 3:     *input = &userdata->m_prim_color; break;
523 		case 4:     *input = &userdata->m_shade_color; break;
524 		case 5:     *input = &userdata->m_env_color; break;
525 		case 6:     *input = &m_one; break;
526 		case 7:     *input = &m_zero; break;
527 	}
528 }
529 
set_sub_input_alpha(color_t ** input,int32_t code,rdp_span_aux * userdata)530 void n64_rdp::set_sub_input_alpha(color_t** input, int32_t code, rdp_span_aux* userdata)
531 {
532 	switch (code & 0x7)
533 	{
534 		case 0:     *input = &userdata->m_combined_alpha; break;
535 		case 1:     *input = &userdata->m_texel0_alpha; break;
536 		case 2:     *input = &userdata->m_texel1_alpha; break;
537 		case 3:     *input = &userdata->m_prim_alpha; break;
538 		case 4:     *input = &userdata->m_shade_alpha; break;
539 		case 5:     *input = &userdata->m_env_alpha; break;
540 		case 6:     *input = &m_one; break;
541 		case 7:     *input = &m_zero; break;
542 	}
543 }
544 
set_mul_input_alpha(color_t ** input,int32_t code,rdp_span_aux * userdata)545 void n64_rdp::set_mul_input_alpha(color_t** input, int32_t code, rdp_span_aux* userdata)
546 {
547 	switch (code & 0x7)
548 	{
549 		case 0:     *input = &userdata->m_lod_fraction; break;
550 		case 1:     *input = &userdata->m_texel0_alpha; break;
551 		case 2:     *input = &userdata->m_texel1_alpha; break;
552 		case 3:     *input = &userdata->m_prim_alpha; break;
553 		case 4:     *input = &userdata->m_shade_alpha; break;
554 		case 5:     *input = &userdata->m_env_alpha; break;
555 		case 6:     *input = &userdata->m_prim_lod_fraction; break;
556 		case 7:     *input = &m_zero; break;
557 	}
558 }
559 
set_blender_input(int32_t cycle,int32_t which,color_t ** input_rgb,color_t ** input_a,int32_t a,int32_t b,rdp_span_aux * userdata)560 void n64_rdp::set_blender_input(int32_t cycle, int32_t which, color_t** input_rgb, color_t** input_a, int32_t a, int32_t b, rdp_span_aux* userdata)
561 {
562 	switch (a & 0x3)
563 	{
564 		case 0:
565 			*input_rgb = cycle == 0 ? &userdata->m_pixel_color : &userdata->m_blended_pixel_color;
566 			break;
567 
568 		case 1:
569 			*input_rgb = &userdata->m_memory_color;
570 			break;
571 
572 		case 2:
573 			*input_rgb = &userdata->m_blend_color;
574 			break;
575 
576 		case 3:
577 			*input_rgb = &userdata->m_fog_color;
578 			break;
579 	}
580 
581 	if (which == 0)
582 	{
583 		switch (b & 0x3)
584 		{
585 			case 0:     *input_a = &userdata->m_pixel_color; break;
586 			case 1:     *input_a = &userdata->m_fog_color; break;
587 			case 2:     *input_a = &userdata->m_shade_color; break;
588 			case 3:     *input_a = &m_zero; break;
589 		}
590 	}
591 	else
592 	{
593 		switch (b & 0x3)
594 		{
595 			case 0:     *input_a = &userdata->m_inv_pixel_color; break;
596 			case 1:     *input_a = &userdata->m_memory_color; break;
597 			case 2:     *input_a = &m_one; break;
598 			case 3:     *input_a = &m_zero; break;
599 		}
600 	}
601 }
602 
603 uint8_t const n64_rdp::s_bayer_matrix[16] =
604 { /* Bayer matrix */
605 		0,  4,  1, 5,
606 		6,  2,  7, 3,
607 		1,   5,  0, 4,
608 		7,  3,  6, 2
609 };
610 
611 uint8_t const n64_rdp::s_magic_matrix[16] =
612 { /* Magic square matrix */
613 		0,  6,  1, 7,
614 		4,  2,  5, 3,
615 		3,   5,  2, 4,
616 		7,  1,  6, 0
617 };
618 
619 z_decompress_entry_t const n64_rdp::m_z_dec_table[8] =
620 {
621 	{ 6, 0x00000 },
622 	{ 5, 0x20000 },
623 	{ 4, 0x30000 },
624 	{ 3, 0x38000 },
625 	{ 2, 0x3c000 },
626 	{ 1, 0x3e000 },
627 	{ 0, 0x3f000 },
628 	{ 0, 0x3f800 },
629 };
630 
631 /*****************************************************************************/
632 
z_build_com_table(void)633 void n64_rdp::z_build_com_table(void)
634 {
635 	uint16_t altmem = 0;
636 	for(int32_t z = 0; z < 0x40000; z++)
637 	{
638 		switch((z >> 11) & 0x7f)
639 		{
640 			case 0x00:
641 			case 0x01:
642 			case 0x02:
643 			case 0x03:
644 			case 0x04:
645 			case 0x05:
646 			case 0x06:
647 			case 0x07:
648 			case 0x08:
649 			case 0x09:
650 			case 0x0a:
651 			case 0x0b:
652 			case 0x0c:
653 			case 0x0d:
654 			case 0x0e:
655 			case 0x0f:
656 			case 0x10:
657 			case 0x11:
658 			case 0x12:
659 			case 0x13:
660 			case 0x14:
661 			case 0x15:
662 			case 0x16:
663 			case 0x17:
664 			case 0x18:
665 			case 0x19:
666 			case 0x1a:
667 			case 0x1b:
668 			case 0x1c:
669 			case 0x1d:
670 			case 0x1e:
671 			case 0x1f:
672 			case 0x20:
673 			case 0x21:
674 			case 0x22:
675 			case 0x23:
676 			case 0x24:
677 			case 0x25:
678 			case 0x26:
679 			case 0x27:
680 			case 0x28:
681 			case 0x29:
682 			case 0x2a:
683 			case 0x2b:
684 			case 0x2c:
685 			case 0x2d:
686 			case 0x2e:
687 			case 0x2f:
688 			case 0x30:
689 			case 0x31:
690 			case 0x32:
691 			case 0x33:
692 			case 0x34:
693 			case 0x35:
694 			case 0x36:
695 			case 0x37:
696 			case 0x38:
697 			case 0x39:
698 			case 0x3a:
699 			case 0x3b:
700 			case 0x3c:
701 			case 0x3d:
702 			case 0x3e:
703 			case 0x3f:
704 				altmem = (z >> 4) & 0x1ffc;
705 				break;
706 			case 0x40:
707 			case 0x41:
708 			case 0x42:
709 			case 0x43:
710 			case 0x44:
711 			case 0x45:
712 			case 0x46:
713 			case 0x47:
714 			case 0x48:
715 			case 0x49:
716 			case 0x4a:
717 			case 0x4b:
718 			case 0x4c:
719 			case 0x4d:
720 			case 0x4e:
721 			case 0x4f:
722 			case 0x50:
723 			case 0x51:
724 			case 0x52:
725 			case 0x53:
726 			case 0x54:
727 			case 0x55:
728 			case 0x56:
729 			case 0x57:
730 			case 0x58:
731 			case 0x59:
732 			case 0x5a:
733 			case 0x5b:
734 			case 0x5c:
735 			case 0x5d:
736 			case 0x5e:
737 			case 0x5f:
738 				altmem = ((z >> 3) & 0x1ffc) | 0x2000;
739 				break;
740 			case 0x60:
741 			case 0x61:
742 			case 0x62:
743 			case 0x63:
744 			case 0x64:
745 			case 0x65:
746 			case 0x66:
747 			case 0x67:
748 			case 0x68:
749 			case 0x69:
750 			case 0x6a:
751 			case 0x6b:
752 			case 0x6c:
753 			case 0x6d:
754 			case 0x6e:
755 			case 0x6f:
756 				altmem = ((z >> 2) & 0x1ffc) | 0x4000;
757 				break;
758 			case 0x70:
759 			case 0x71:
760 			case 0x72:
761 			case 0x73:
762 			case 0x74:
763 			case 0x75:
764 			case 0x76:
765 			case 0x77:
766 				altmem = ((z >> 1) & 0x1ffc) | 0x6000;
767 				break;
768 			case 0x78://uncompressed z = 0x3c000
769 			case 0x79:
770 			case 0x7a:
771 			case 0x7b:
772 				altmem = (z & 0x1ffc) | 0x8000;
773 				break;
774 			case 0x7c://uncompressed z = 0x3e000
775 			case 0x7d:
776 				altmem = ((z << 1) & 0x1ffc) | 0xa000;
777 				break;
778 			case 0x7e://uncompressed z = 0x3f000
779 				altmem = ((z << 2) & 0x1ffc) | 0xc000;
780 				break;
781 			case 0x7f://uncompressed z = 0x3f000
782 				altmem = ((z << 2) & 0x1ffc) | 0xe000;
783 				break;
784 		}
785 
786 	m_z_com_table[z] = altmem;
787 
788 	}
789 }
790 
precalc_cvmask_derivatives(void)791 void n64_rdp::precalc_cvmask_derivatives(void)
792 {
793 	const uint8_t yarray[16] = {0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
794 	const uint8_t xarray[16] = {0, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
795 
796 	for (int32_t i = 0; i < 0x10000; i++)
797 	{
798 		m_compressed_cvmasks[i] = (i & 1) | ((i & 4) >> 1) | ((i & 0x20) >> 3) | ((i & 0x80) >> 4) |
799 		((i & 0x100) >> 4) | ((i & 0x400) >> 5) | ((i & 0x2000) >> 7) | ((i & 0x8000) >> 8);
800 	}
801 
802 	for (int32_t i = 0; i < 0x100; i++)
803 	{
804 		uint16_t mask = decompress_cvmask_frombyte(i);
805 		cvarray[i].cvg = cvarray[i].cvbit = 0;
806 		cvarray[i].cvbit = (i >> 7) & 1;
807 		for (int32_t k = 0; k < 8; k++)
808 		{
809 			cvarray[i].cvg += ((i >> k) & 1);
810 		}
811 
812 		uint16_t masky = 0;
813 		for (int32_t k = 0; k < 4; k++)
814 		{
815 			masky |= ((mask & (0xf000 >> (k << 2))) > 0) << k;
816 		}
817 		uint8_t offy = yarray[masky];
818 
819 		uint16_t maskx = (mask & (0xf000 >> (offy << 2))) >> ((offy ^ 3) << 2);
820 		uint8_t offx = xarray[maskx];
821 
822 		cvarray[i].xoff = offx;
823 		cvarray[i].yoff = offy;
824 	}
825 }
826 
decompress_cvmask_frombyte(uint8_t x)827 uint16_t n64_rdp::decompress_cvmask_frombyte(uint8_t x)
828 {
829 	uint16_t y = (x & 1) | ((x & 2) << 1) | ((x & 4) << 3) | ((x & 8) << 4) |
830 		((x & 0x10) << 4) | ((x & 0x20) << 5) | ((x & 0x40) << 7) | ((x & 0x80) << 8);
831 	return y;
832 }
833 
lookup_cvmask_derivatives(uint32_t mask,uint8_t * offx,uint8_t * offy,rdp_span_aux * userdata)834 void n64_rdp::lookup_cvmask_derivatives(uint32_t mask, uint8_t* offx, uint8_t* offy, rdp_span_aux* userdata)
835 {
836 	const uint32_t index = m_compressed_cvmasks[mask];
837 	userdata->m_current_pix_cvg = cvarray[index].cvg;
838 	userdata->m_current_cvg_bit = cvarray[index].cvbit;
839 	*offx = cvarray[index].xoff;
840 	*offy = cvarray[index].yoff;
841 }
842 
z_store(const rdp_poly_state & object,uint32_t zcurpixel,uint32_t dzcurpixel,uint32_t z,uint32_t enc)843 void n64_rdp::z_store(const rdp_poly_state &object, uint32_t zcurpixel, uint32_t dzcurpixel, uint32_t z, uint32_t enc)
844 {
845 	uint16_t zval = m_z_com_table[z & 0x3ffff]|(enc >> 2);
846 	if(zcurpixel <= MEM16_LIMIT)
847 	{
848 		((uint16_t*)m_rdram)[zcurpixel ^ WORD_ADDR_XOR] = zval;
849 	}
850 	if(dzcurpixel <= MEM8_LIMIT)
851 	{
852 		m_hidden_bits[dzcurpixel ^ BYTE_ADDR_XOR] = enc & 3;
853 	}
854 }
855 
normalize_dzpix(int32_t sum)856 int32_t n64_rdp::normalize_dzpix(int32_t sum)
857 {
858 	if (sum & 0xc000)
859 	{
860 		return 0x8000;
861 	}
862 	if (!(sum & 0xffff))
863 	{
864 		return 1;
865 	}
866 	for(int32_t count = 0x2000; count > 0; count >>= 1)
867 	{
868 		if (sum & count)
869 		{
870 			return(count << 1);
871 		}
872 	}
873 	return 0;
874 }
875 
z_decompress(uint32_t zcurpixel)876 uint32_t n64_rdp::z_decompress(uint32_t zcurpixel)
877 {
878 	return m_z_complete_dec_table[(RREADIDX16(zcurpixel) >> 2) & 0x3fff];
879 }
880 
dz_decompress(uint32_t zcurpixel,uint32_t dzcurpixel)881 uint32_t n64_rdp::dz_decompress(uint32_t zcurpixel, uint32_t dzcurpixel)
882 {
883 	const uint16_t zval = RREADIDX16(zcurpixel);
884 	const uint8_t dzval = (((dzcurpixel) <= 0x7fffff) ? (m_hidden_bits[(dzcurpixel) ^ BYTE_ADDR_XOR]) : 0);
885 	const uint32_t dz_compressed = ((zval & 3) << 2) | (dzval & 3);
886 	return (1 << dz_compressed);
887 }
888 
dz_compress(uint32_t value)889 uint32_t n64_rdp::dz_compress(uint32_t value)
890 {
891 	int32_t j = 0;
892 	for (; value > 1; j++, value >>= 1);
893 	return j;
894 }
895 
get_dither_values(int32_t x,int32_t y,int32_t * cdith,int32_t * adith,const rdp_poly_state & object)896 void n64_rdp::get_dither_values(int32_t x, int32_t y, int32_t* cdith, int32_t* adith, const rdp_poly_state& object)
897 {
898 	const int32_t dithindex = ((y & 3) << 2) | (x & 3);
899 	switch((object.m_other_modes.rgb_dither_sel << 2) | object.m_other_modes.alpha_dither_sel)
900 	{
901 	case 0:
902 		*adith = *cdith = s_magic_matrix[dithindex];
903 		break;
904 	case 1:
905 		*cdith = s_magic_matrix[dithindex];
906 		*adith = (~(*cdith)) & 7;
907 		break;
908 	case 2:
909 		*cdith = s_magic_matrix[dithindex];
910 		*adith = machine().rand() & 7;
911 		break;
912 	case 3:
913 		*cdith = s_magic_matrix[dithindex];
914 		*adith = 0;
915 		break;
916 	case 4:
917 		*adith = *cdith = s_bayer_matrix[dithindex];
918 		break;
919 	case 5:
920 		*cdith = s_bayer_matrix[dithindex];
921 		*adith = (~(*cdith)) & 7;
922 		break;
923 	case 6:
924 		*cdith = s_bayer_matrix[dithindex];
925 		*adith = machine().rand() & 7;
926 		break;
927 	case 7:
928 		*cdith = s_bayer_matrix[dithindex];
929 		*adith = 0;
930 		break;
931 	case 8:
932 		*cdith = machine().rand() & 7;
933 		*adith = s_magic_matrix[dithindex];
934 		break;
935 	case 9:
936 		*cdith = machine().rand() & 7;
937 		*adith = (~s_magic_matrix[dithindex]) & 7;
938 		break;
939 	case 10:
940 		*cdith = machine().rand() & 7;
941 		*adith = (*cdith + 17) & 7;
942 		break;
943 	case 11:
944 		*cdith = machine().rand() & 7;
945 		*adith = 0;
946 		break;
947 	case 12:
948 		*cdith = 0;
949 		*adith = s_bayer_matrix[dithindex];
950 		break;
951 	case 13:
952 		*cdith = 0;
953 		*adith = (~s_bayer_matrix[dithindex]) & 7;
954 		break;
955 	case 14:
956 		*cdith = 0;
957 		*adith = machine().rand() & 7;
958 		break;
959 	case 15:
960 		*adith = *cdith = 0;
961 		break;
962 	}
963 }
964 
CLAMP(int32_t in,int32_t min,int32_t max)965 int32_t CLAMP(int32_t in, int32_t min, int32_t max)
966 {
967 	if(in < min) return min;
968 	if(in > max) return max;
969 	return in;
970 }
971 
z_compare(uint32_t zcurpixel,uint32_t dzcurpixel,uint32_t sz,uint16_t dzpix,rdp_span_aux * userdata,const rdp_poly_state & object)972 bool n64_rdp::z_compare(uint32_t zcurpixel, uint32_t dzcurpixel, uint32_t sz, uint16_t dzpix, rdp_span_aux* userdata, const rdp_poly_state &object)
973 {
974 	bool force_coplanar = false;
975 	sz &= 0x3ffff;
976 
977 	uint32_t oz;
978 	uint32_t dzmem;
979 	uint32_t zval;
980 	int32_t rawdzmem;
981 
982 	if (object.m_other_modes.z_compare_en)
983 	{
984 		oz = z_decompress(zcurpixel);
985 		dzmem = dz_decompress(zcurpixel, dzcurpixel);
986 		zval = RREADIDX16(zcurpixel);
987 		rawdzmem = ((zval & 3) << 2) | ((((dzcurpixel) <= 0x3fffff) ? (m_hidden_bits[(dzcurpixel) ^ BYTE_ADDR_XOR]) : 0) & 3);
988 	}
989 	else
990 	{
991 		oz = 0;
992 		dzmem = 1 << 0xf;
993 		zval = 0x3;
994 		rawdzmem = 0xf;
995 	}
996 
997 	userdata->m_dzpix_enc = dz_compress(dzpix & 0xffff);
998 	userdata->m_shift_a = CLAMP(userdata->m_dzpix_enc - rawdzmem, 0, 4);
999 	userdata->m_shift_b = CLAMP(rawdzmem - userdata->m_dzpix_enc, 0, 4);
1000 
1001 	int32_t precision_factor = (zval >> 13) & 0xf;
1002 	if (precision_factor < 3)
1003 	{
1004 		int32_t dzmemmodifier = 16 >> precision_factor;
1005 		if (dzmem == 0x8000)
1006 		{
1007 			force_coplanar = true;
1008 		}
1009 		dzmem <<= 1;
1010 		if (dzmem <= dzmemmodifier)
1011 		{
1012 			dzmem = dzmemmodifier;
1013 		}
1014 		if (!dzmem)
1015 		{
1016 			dzmem = 0xffff;
1017 		}
1018 	}
1019 	if (dzmem > 0x8000)
1020 	{
1021 		dzmem = 0xffff;
1022 	}
1023 
1024 	uint32_t dznew = (dzmem > dzpix) ? dzmem : (uint32_t)dzpix;
1025 	uint32_t dznotshift = dznew;
1026 	dznew <<= 3;
1027 
1028 	bool farther = (sz + dznew) >= oz;
1029 	bool infront = sz < oz;
1030 
1031 	if (force_coplanar)
1032 	{
1033 		farther = true;
1034 	}
1035 
1036 	bool overflow = ((userdata->m_current_mem_cvg + userdata->m_current_pix_cvg) & 8) > 0;
1037 	userdata->m_blend_enable = (object.m_other_modes.force_blend || (!overflow && object.m_other_modes.antialias_en && farther)) ? 1 : 0;
1038 	userdata->m_pre_wrap = overflow;
1039 
1040 	int32_t cvgcoeff = 0;
1041 	uint32_t dzenc = 0;
1042 
1043 	if (object.m_other_modes.z_mode == 1 && infront && farther && overflow)
1044 	{
1045 		dzenc = dz_compress(dznotshift & 0xffff);
1046 		cvgcoeff = ((oz >> dzenc) - (sz >> dzenc)) & 0xf;
1047 		userdata->m_current_pix_cvg = ((cvgcoeff * userdata->m_current_pix_cvg) >> 3) & 0xf;
1048 	}
1049 
1050 	if (!object.m_other_modes.z_compare_en)
1051 	{
1052 		return true;
1053 	}
1054 
1055 	int32_t diff = (int32_t)sz - (int32_t)dznew;
1056 	bool nearer = diff <= (int32_t)oz;
1057 	bool max = (oz == 0x3ffff);
1058 	if (force_coplanar)
1059 	{
1060 		nearer = true;
1061 	}
1062 
1063 	switch(object.m_other_modes.z_mode)
1064 	{
1065 	case 0:
1066 		return (max || (overflow ? infront : nearer));
1067 	case 1:
1068 		return (max || (overflow ? infront : nearer));
1069 	case 2:
1070 		return (infront || max);
1071 	case 3:
1072 		return (farther && nearer && !max);
1073 	}
1074 
1075 	return false;
1076 }
1077 
get_log2(uint32_t lod_clamp)1078 uint32_t n64_rdp::get_log2(uint32_t lod_clamp)
1079 {
1080 	if (lod_clamp < 2)
1081 	{
1082 		return 0;
1083 	}
1084 	else
1085 	{
1086 		for (int32_t i = 7; i > 0; i--)
1087 		{
1088 			if ((lod_clamp >> i) & 1)
1089 			{
1090 				return i;
1091 			}
1092 		}
1093 	}
1094 
1095 	return 0;
1096 }
1097 
1098 /*****************************************************************************/
1099 
read_data(uint32_t address)1100 uint64_t n64_rdp::read_data(uint32_t address)
1101 {
1102 	if (m_status & 0x1)     // XBUS_DMEM_DMA enabled
1103 	{
1104 		return (uint64_t(m_dmem[(address & 0xfff) / 4]) << 32) | m_dmem[((address + 4) & 0xfff) / 4];
1105 	}
1106 	else
1107 	{
1108 		return (uint64_t(m_rdram[((address & 0xffffff) / 4)]) << 32) | m_rdram[(((address + 4) & 0xffffff) / 4)];
1109 	}
1110 }
1111 
1112 char const *const  n64_rdp::s_image_format[] = { "RGBA", "YUV", "CI", "IA", "I", "???", "???", "???" };
1113 char const *const  n64_rdp::s_image_size[] = { "4-bit", "8-bit", "16-bit", "32-bit" };
1114 
1115 int32_t const n64_rdp::s_rdp_command_length[64] =
1116 {
1117 	8,          // 0x00, No Op
1118 	8,          // 0x01, ???
1119 	8,          // 0x02, ???
1120 	8,          // 0x03, ???
1121 	8,          // 0x04, ???
1122 	8,          // 0x05, ???
1123 	8,          // 0x06, ???
1124 	8,          // 0x07, ???
1125 	32,         // 0x08, Non-Shaded Triangle
1126 	32+16,      // 0x09, Non-Shaded, Z-Buffered Triangle
1127 	32+64,      // 0x0a, Textured Triangle
1128 	32+64+16,   // 0x0b, Textured, Z-Buffered Triangle
1129 	32+64,      // 0x0c, Shaded Triangle
1130 	32+64+16,   // 0x0d, Shaded, Z-Buffered Triangle
1131 	32+64+64,   // 0x0e, Shaded+Textured Triangle
1132 	32+64+64+16,// 0x0f, Shaded+Textured, Z-Buffered Triangle
1133 	8,          // 0x10, ???
1134 	8,          // 0x11, ???
1135 	8,          // 0x12, ???
1136 	8,          // 0x13, ???
1137 	8,          // 0x14, ???
1138 	8,          // 0x15, ???
1139 	8,          // 0x16, ???
1140 	8,          // 0x17, ???
1141 	8,          // 0x18, ???
1142 	8,          // 0x19, ???
1143 	8,          // 0x1a, ???
1144 	8,          // 0x1b, ???
1145 	8,          // 0x1c, ???
1146 	8,          // 0x1d, ???
1147 	8,          // 0x1e, ???
1148 	8,          // 0x1f, ???
1149 	8,          // 0x20, ???
1150 	8,          // 0x21, ???
1151 	8,          // 0x22, ???
1152 	8,          // 0x23, ???
1153 	16,         // 0x24, Texture_Rectangle
1154 	16,         // 0x25, Texture_Rectangle_Flip
1155 	8,          // 0x26, Sync_Load
1156 	8,          // 0x27, Sync_Pipe
1157 	8,          // 0x28, Sync_Tile
1158 	8,          // 0x29, Sync_Full
1159 	8,          // 0x2a, Set_Key_GB
1160 	8,          // 0x2b, Set_Key_R
1161 	8,          // 0x2c, Set_Convert
1162 	8,          // 0x2d, Set_Scissor
1163 	8,          // 0x2e, Set_Prim_Depth
1164 	8,          // 0x2f, Set_Other_Modes
1165 	8,          // 0x30, Load_TLUT
1166 	8,          // 0x31, ???
1167 	8,          // 0x32, Set_Tile_Size
1168 	8,          // 0x33, Load_Block
1169 	8,          // 0x34, Load_Tile
1170 	8,          // 0x35, Set_Tile
1171 	8,          // 0x36, Fill_Rectangle
1172 	8,          // 0x37, Set_Fill_Color
1173 	8,          // 0x38, Set_Fog_Color
1174 	8,          // 0x39, Set_Blend_Color
1175 	8,          // 0x3a, Set_Prim_Color
1176 	8,          // 0x3b, Set_Env_Color
1177 	8,          // 0x3c, Set_Combine
1178 	8,          // 0x3d, Set_Texture_Image
1179 	8,          // 0x3e, Set_Mask_Image
1180 	8           // 0x3f, Set_Color_Image
1181 };
1182 
disassemble(char * buffer)1183 void n64_rdp::disassemble(char* buffer)
1184 {
1185 	char sl[32], tl[32], sh[32], th[32];
1186 	char s[32], t[32], w[32];
1187 	char dsdx[32], dtdx[32], dwdx[32];
1188 	char dsdy[32], dtdy[32], dwdy[32];
1189 	char dsde[32], dtde[32], dwde[32];
1190 	char yl[32], yh[32], ym[32], xl[32], xh[32], xm[32];
1191 	char dxldy[32], dxhdy[32], dxmdy[32];
1192 	char rt[32], gt[32], bt[32], at[32];
1193 	char drdx[32], dgdx[32], dbdx[32], dadx[32];
1194 	char drdy[32], dgdy[32], dbdy[32], dady[32];
1195 	char drde[32], dgde[32], dbde[32], dade[32];
1196 
1197 	uint64_t cmd[32];
1198 
1199 	const uint32_t length = m_cmd_ptr * 8;
1200 	if (length < 8)
1201 	{
1202 		sprintf(buffer, "ERROR: length = %d\n", length);
1203 		return;
1204 	}
1205 
1206 	cmd[0] = m_cmd_data[m_cmd_cur];
1207 
1208 	const int32_t tile = (cmd[0] >> 56) & 0x7;
1209 	sprintf(sl, "%4.2f", (float)((cmd[0] >> 44) & 0xfff) / 4.0f);
1210 	sprintf(tl, "%4.2f", (float)((cmd[0] >> 32) & 0xfff) / 4.0f);
1211 	sprintf(sh, "%4.2f", (float)((cmd[0] >> 12) & 0xfff) / 4.0f);
1212 	sprintf(th, "%4.2f", (float)((cmd[0] >>  0) & 0xfff) / 4.0f);
1213 
1214 	const char* format = s_image_format[(cmd[0] >> 53) & 0x7];
1215 	const char* size = s_image_size[(cmd[0] >> 51) & 0x3];
1216 
1217 	const uint32_t r = (cmd[0] >> 24) & 0xff;
1218 	const uint32_t g = (cmd[0] >> 16) & 0xff;
1219 	const uint32_t b = (cmd[0] >>  8) & 0xff;
1220 	const uint32_t a = (cmd[0] >>  0) & 0xff;
1221 
1222 	const uint32_t command = (cmd[0] >> 56) & 0x3f;
1223 	switch (command)
1224 	{
1225 		case 0x00:  sprintf(buffer, "No Op"); break;
1226 		case 0x08:      // Tri_NoShade
1227 		{
1228 			const int32_t lft = (cmd[0] >> 55) & 0x1;
1229 
1230 			if (length != s_rdp_command_length[command])
1231 			{
1232 				sprintf(buffer, "ERROR: Tri_NoShade length = %d\n", length);
1233 				return;
1234 			}
1235 
1236 			cmd[1] = m_cmd_data[m_cmd_cur+1];
1237 			cmd[2] = m_cmd_data[m_cmd_cur+2];
1238 			cmd[3] = m_cmd_data[m_cmd_cur+3];
1239 
1240 			sprintf(yl,     "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1241 			sprintf(ym,     "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1242 			sprintf(yh,     "%4.4f", (float)((cmd[0] >>  0) & 0x1fff) / 4.0f);
1243 			sprintf(xl,     "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1244 			sprintf(dxldy,  "%4.4f", (float)int32_t(cmd[1])       / 65536.0f);
1245 			sprintf(xh,     "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1246 			sprintf(dxhdy,  "%4.4f", (float)int32_t(cmd[2])       / 65536.0f);
1247 			sprintf(xm,     "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1248 			sprintf(dxmdy,  "%4.4f", (float)int32_t(cmd[3])       / 65536.0f);
1249 
1250 			sprintf(buffer, "Tri_NoShade            %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1251 			break;
1252 		}
1253 		case 0x09:      // Tri_NoShadeZ
1254 		{
1255 			const int32_t lft = (cmd[0] >> 55) & 0x1;
1256 
1257 			if (length != s_rdp_command_length[command])
1258 			{
1259 				sprintf(buffer, "ERROR: Tri_NoShadeZ length = %d\n", length);
1260 				return;
1261 			}
1262 
1263 			cmd[1] = m_cmd_data[m_cmd_cur+1];
1264 			cmd[2] = m_cmd_data[m_cmd_cur+2];
1265 			cmd[3] = m_cmd_data[m_cmd_cur+3];
1266 
1267 			sprintf(yl,     "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1268 			sprintf(ym,     "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1269 			sprintf(yh,     "%4.4f", (float)((cmd[0] >>  0) & 0x1fff) / 4.0f);
1270 			sprintf(xl,     "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1271 			sprintf(dxldy,  "%4.4f", (float)int32_t(cmd[1])       / 65536.0f);
1272 			sprintf(xh,     "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1273 			sprintf(dxhdy,  "%4.4f", (float)int32_t(cmd[2])       / 65536.0f);
1274 			sprintf(xm,     "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1275 			sprintf(dxmdy,  "%4.4f", (float)int32_t(cmd[3])       / 65536.0f);
1276 
1277 			sprintf(buffer, "Tri_NoShadeZ            %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1278 			break;
1279 		}
1280 		case 0x0a:      // Tri_Tex
1281 		{
1282 			const int32_t lft = (cmd[0] >> 55) & 0x1;
1283 
1284 			if (length < s_rdp_command_length[command])
1285 			{
1286 				sprintf(buffer, "ERROR: Tri_Tex length = %d\n", length);
1287 				return;
1288 			}
1289 
1290 			for (int32_t i = 1; i < 12; i++)
1291 			{
1292 				cmd[i] = m_cmd_data[m_cmd_cur+i];
1293 			}
1294 
1295 			sprintf(yl,     "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1296 			sprintf(ym,     "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1297 			sprintf(yh,     "%4.4f", (float)((cmd[0] >>  0) & 0x1fff) / 4.0f);
1298 			sprintf(xl,     "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1299 			sprintf(dxldy,  "%4.4f", (float)int32_t(cmd[1])       / 65536.0f);
1300 			sprintf(xh,     "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1301 			sprintf(dxhdy,  "%4.4f", (float)int32_t(cmd[2])       / 65536.0f);
1302 			sprintf(xm,     "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1303 			sprintf(dxmdy,  "%4.4f", (float)int32_t(cmd[3])       / 65536.0f);
1304 
1305 			sprintf(s,      "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000)        | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1306 			sprintf(t,      "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1307 			sprintf(w,      "%4.4f", (float)int32_t(  (cmd[4]        & 0xffff0000)        | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1308 			sprintf(dsdx,   "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000)        | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1309 			sprintf(dtdx,   "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1310 			sprintf(dwdx,   "%4.4f", (float)int32_t(  (cmd[5]        & 0xffff0000)        | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1311 			sprintf(dsde,   "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000)        | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1312 			sprintf(dtde,   "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1313 			sprintf(dwde,   "%4.4f", (float)int32_t(  (cmd[8]        & 0xffff0000)        | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1314 			sprintf(dsdy,   "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000)        | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1315 			sprintf(dtdy,   "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1316 			sprintf(dwdy,   "%4.4f", (float)int32_t(  (cmd[9]        & 0xffff0000)        | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1317 
1318 			buffer+=sprintf(buffer, "Tri_Tex               %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1319 			buffer+=sprintf(buffer, "                              ");
1320 			buffer+=sprintf(buffer, "                       S: %s, T: %s, W: %s\n", s, t, w);
1321 			buffer+=sprintf(buffer, "                              ");
1322 			buffer+=sprintf(buffer, "                       DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx);
1323 			buffer+=sprintf(buffer, "                              ");
1324 			buffer+=sprintf(buffer, "                       DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde);
1325 			buffer+=sprintf(buffer, "                              ");
1326 			buffer+=sprintf(buffer, "                       DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy);
1327 			break;
1328 		}
1329 		case 0x0b:      // Tri_TexZ
1330 		{
1331 			const int32_t lft = (cmd[0] >> 55) & 0x1;
1332 
1333 			if (length < s_rdp_command_length[command])
1334 			{
1335 				sprintf(buffer, "ERROR: Tri_TexZ length = %d\n", length);
1336 				return;
1337 			}
1338 
1339 			for (int32_t i = 1; i < 12; i++)
1340 			{
1341 				cmd[i] = m_cmd_data[m_cmd_cur+i];
1342 			}
1343 
1344 			sprintf(yl,     "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1345 			sprintf(ym,     "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1346 			sprintf(yh,     "%4.4f", (float)((cmd[0] >>  0) & 0x1fff) / 4.0f);
1347 			sprintf(xl,     "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1348 			sprintf(dxldy,  "%4.4f", (float)int32_t(cmd[1])       / 65536.0f);
1349 			sprintf(xh,     "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1350 			sprintf(dxhdy,  "%4.4f", (float)int32_t(cmd[2])       / 65536.0f);
1351 			sprintf(xm,     "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1352 			sprintf(dxmdy,  "%4.4f", (float)int32_t(cmd[3])       / 65536.0f);
1353 
1354 			sprintf(s,      "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000)        | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1355 			sprintf(t,      "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1356 			sprintf(w,      "%4.4f", (float)int32_t(  (cmd[4]        & 0xffff0000)        | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1357 			sprintf(dsdx,   "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000)        | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1358 			sprintf(dtdx,   "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1359 			sprintf(dwdx,   "%4.4f", (float)int32_t(  (cmd[5]        & 0xffff0000)        | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1360 			sprintf(dsde,   "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000)        | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1361 			sprintf(dtde,   "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1362 			sprintf(dwde,   "%4.4f", (float)int32_t(  (cmd[8]        & 0xffff0000)        | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1363 			sprintf(dsdy,   "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000)        | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1364 			sprintf(dtdy,   "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1365 			sprintf(dwdy,   "%4.4f", (float)int32_t(  (cmd[9]        & 0xffff0000)        | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1366 
1367 			buffer+=sprintf(buffer, "Tri_TexZ               %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1368 			buffer+=sprintf(buffer, "                              ");
1369 			buffer+=sprintf(buffer, "                       S: %s, T: %s, W: %s\n", s, t, w);
1370 			buffer+=sprintf(buffer, "                              ");
1371 			buffer+=sprintf(buffer, "                       DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx);
1372 			buffer+=sprintf(buffer, "                              ");
1373 			buffer+=sprintf(buffer, "                       DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde);
1374 			buffer+=sprintf(buffer, "                              ");
1375 			buffer+=sprintf(buffer, "                       DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy);
1376 			break;
1377 		}
1378 		case 0x0c:      // Tri_Shade
1379 		{
1380 			const int32_t lft = (command >> 23) & 0x1;
1381 
1382 			if (length != s_rdp_command_length[command])
1383 			{
1384 				sprintf(buffer, "ERROR: Tri_Shade length = %d\n", length);
1385 				return;
1386 			}
1387 
1388 			for (int32_t i = 1; i < 12; i++)
1389 			{
1390 				cmd[i] = m_cmd_data[i];
1391 			}
1392 
1393 			sprintf(yl,     "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1394 			sprintf(ym,     "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1395 			sprintf(yh,     "%4.4f", (float)((cmd[0] >>  0) & 0x1fff) / 4.0f);
1396 			sprintf(xl,     "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1397 			sprintf(dxldy,  "%4.4f", (float)int32_t(cmd[1])       / 65536.0f);
1398 			sprintf(xh,     "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1399 			sprintf(dxhdy,  "%4.4f", (float)int32_t(cmd[2])       / 65536.0f);
1400 			sprintf(xm,     "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1401 			sprintf(dxmdy,  "%4.4f", (float)int32_t(cmd[3])       / 65536.0f);
1402 
1403 			sprintf(rt,     "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000)        | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1404 			sprintf(gt,     "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1405 			sprintf(bt,     "%4.4f", (float)int32_t(  (cmd[4]        & 0xffff0000)        | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1406 			sprintf(at,     "%4.4f", (float)int32_t( ((cmd[4]        & 0x0000ffff) << 16) | ( cmd[ 6]        & 0xffff)) / 65536.0f);
1407 			sprintf(drdx,   "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000)        | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1408 			sprintf(dgdx,   "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1409 			sprintf(dbdx,   "%4.4f", (float)int32_t(  (cmd[5]        & 0xffff0000)        | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1410 			sprintf(dadx,   "%4.4f", (float)int32_t( ((cmd[5]        & 0x0000ffff) << 16) | ( cmd[ 7]        & 0xffff)) / 65536.0f);
1411 			sprintf(drde,   "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000)        | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1412 			sprintf(dgde,   "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1413 			sprintf(dbde,   "%4.4f", (float)int32_t(  (cmd[8]        & 0xffff0000)        | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1414 			sprintf(dade,   "%4.4f", (float)int32_t( ((cmd[8]        & 0x0000ffff) << 16) | ( cmd[10]        & 0xffff)) / 65536.0f);
1415 			sprintf(drdy,   "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000)        | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1416 			sprintf(dgdy,   "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1417 			sprintf(dbdy,   "%4.4f", (float)int32_t(  (cmd[9]        & 0xffff0000)        | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1418 			sprintf(dady,   "%4.4f", (float)int32_t( ((cmd[9]        & 0x0000ffff) << 16) | ( cmd[11]        & 0xffff)) / 65536.0f);
1419 
1420 			buffer+=sprintf(buffer, "Tri_Shade              %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1421 			buffer+=sprintf(buffer, "                              ");
1422 			buffer+=sprintf(buffer, "                       R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at);
1423 			buffer+=sprintf(buffer, "                              ");
1424 			buffer+=sprintf(buffer, "                       DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx);
1425 			buffer+=sprintf(buffer, "                              ");
1426 			buffer+=sprintf(buffer, "                       DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade);
1427 			buffer+=sprintf(buffer, "                              ");
1428 			buffer+=sprintf(buffer, "                       DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady);
1429 			break;
1430 		}
1431 		case 0x0d:      // Tri_ShadeZ
1432 		{
1433 			const int32_t lft = (command >> 23) & 0x1;
1434 
1435 			if (length != s_rdp_command_length[command])
1436 			{
1437 				sprintf(buffer, "ERROR: Tri_ShadeZ length = %d\n", length);
1438 				return;
1439 			}
1440 
1441 			for (int32_t i = 1; i < 12; i++)
1442 			{
1443 				cmd[i] = m_cmd_data[i];
1444 			}
1445 
1446 			sprintf(yl,     "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1447 			sprintf(ym,     "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1448 			sprintf(yh,     "%4.4f", (float)((cmd[0] >>  0) & 0x1fff) / 4.0f);
1449 			sprintf(xl,     "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1450 			sprintf(dxldy,  "%4.4f", (float)int32_t(cmd[1])       / 65536.0f);
1451 			sprintf(xh,     "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1452 			sprintf(dxhdy,  "%4.4f", (float)int32_t(cmd[2])       / 65536.0f);
1453 			sprintf(xm,     "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1454 			sprintf(dxmdy,  "%4.4f", (float)int32_t(cmd[3])       / 65536.0f);
1455 
1456 			sprintf(rt,     "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000)        | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1457 			sprintf(gt,     "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1458 			sprintf(bt,     "%4.4f", (float)int32_t(  (cmd[4]        & 0xffff0000)        | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1459 			sprintf(at,     "%4.4f", (float)int32_t( ((cmd[4]        & 0x0000ffff) << 16) | ( cmd[ 6]        & 0xffff)) / 65536.0f);
1460 			sprintf(drdx,   "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000)        | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1461 			sprintf(dgdx,   "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1462 			sprintf(dbdx,   "%4.4f", (float)int32_t(  (cmd[5]        & 0xffff0000)        | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1463 			sprintf(dadx,   "%4.4f", (float)int32_t( ((cmd[5]        & 0x0000ffff) << 16) | ( cmd[ 7]        & 0xffff)) / 65536.0f);
1464 			sprintf(drde,   "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000)        | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1465 			sprintf(dgde,   "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1466 			sprintf(dbde,   "%4.4f", (float)int32_t(  (cmd[8]        & 0xffff0000)        | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1467 			sprintf(dade,   "%4.4f", (float)int32_t( ((cmd[8]        & 0x0000ffff) << 16) | ( cmd[10]        & 0xffff)) / 65536.0f);
1468 			sprintf(drdy,   "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000)        | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1469 			sprintf(dgdy,   "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1470 			sprintf(dbdy,   "%4.4f", (float)int32_t(  (cmd[9]        & 0xffff0000)        | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1471 			sprintf(dady,   "%4.4f", (float)int32_t( ((cmd[9]        & 0x0000ffff) << 16) | ( cmd[11]        & 0xffff)) / 65536.0f);
1472 
1473 			buffer+=sprintf(buffer, "Tri_ShadeZ              %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1474 			buffer+=sprintf(buffer, "                              ");
1475 			buffer+=sprintf(buffer, "                       R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at);
1476 			buffer+=sprintf(buffer, "                              ");
1477 			buffer+=sprintf(buffer, "                       DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx);
1478 			buffer+=sprintf(buffer, "                              ");
1479 			buffer+=sprintf(buffer, "                       DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade);
1480 			buffer+=sprintf(buffer, "                              ");
1481 			buffer+=sprintf(buffer, "                       DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady);
1482 			break;
1483 		}
1484 		case 0x0e:      // Tri_TexShade
1485 		{
1486 			const int32_t lft = (command >> 23) & 0x1;
1487 
1488 			if (length < s_rdp_command_length[command])
1489 			{
1490 				sprintf(buffer, "ERROR: Tri_TexShade length = %d\n", length);
1491 				return;
1492 			}
1493 
1494 			for (int32_t i = 1; i < 20; i++)
1495 			{
1496 				cmd[i] = m_cmd_data[m_cmd_cur+i];
1497 			}
1498 
1499 			sprintf(yl,     "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1500 			sprintf(ym,     "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1501 			sprintf(yh,     "%4.4f", (float)((cmd[0] >>  0) & 0x1fff) / 4.0f);
1502 			sprintf(xl,     "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1503 			sprintf(dxldy,  "%4.4f", (float)int32_t(cmd[1])       / 65536.0f);
1504 			sprintf(xh,     "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1505 			sprintf(dxhdy,  "%4.4f", (float)int32_t(cmd[2])       / 65536.0f);
1506 			sprintf(xm,     "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1507 			sprintf(dxmdy,  "%4.4f", (float)int32_t(cmd[3])       / 65536.0f);
1508 
1509 			sprintf(rt,     "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000)        | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1510 			sprintf(gt,     "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1511 			sprintf(bt,     "%4.4f", (float)int32_t(  (cmd[4]        & 0xffff0000)        | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1512 			sprintf(at,     "%4.4f", (float)int32_t( ((cmd[4]        & 0x0000ffff) << 16) | ( cmd[ 6]        & 0xffff)) / 65536.0f);
1513 			sprintf(drdx,   "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000)        | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1514 			sprintf(dgdx,   "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1515 			sprintf(dbdx,   "%4.4f", (float)int32_t(  (cmd[5]        & 0xffff0000)        | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1516 			sprintf(dadx,   "%4.4f", (float)int32_t( ((cmd[5]        & 0x0000ffff) << 16) | ( cmd[ 7]        & 0xffff)) / 65536.0f);
1517 			sprintf(drde,   "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000)        | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1518 			sprintf(dgde,   "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1519 			sprintf(dbde,   "%4.4f", (float)int32_t(  (cmd[8]        & 0xffff0000)        | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1520 			sprintf(dade,   "%4.4f", (float)int32_t( ((cmd[8]        & 0x0000ffff) << 16) | ( cmd[10]        & 0xffff)) / 65536.0f);
1521 			sprintf(drdy,   "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000)        | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1522 			sprintf(dgdy,   "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1523 			sprintf(dbdy,   "%4.4f", (float)int32_t(  (cmd[9]        & 0xffff0000)        | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1524 			sprintf(dady,   "%4.4f", (float)int32_t( ((cmd[9]        & 0x0000ffff) << 16) | ( cmd[11]        & 0xffff)) / 65536.0f);
1525 
1526 			sprintf(s,      "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000)        | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1527 			sprintf(t,      "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1528 			sprintf(w,      "%4.4f", (float)int32_t(  (cmd[4]        & 0xffff0000)        | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1529 			sprintf(dsdx,   "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000)        | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1530 			sprintf(dtdx,   "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1531 			sprintf(dwdx,   "%4.4f", (float)int32_t(  (cmd[5]        & 0xffff0000)        | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1532 			sprintf(dsde,   "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000)        | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1533 			sprintf(dtde,   "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1534 			sprintf(dwde,   "%4.4f", (float)int32_t(  (cmd[8]        & 0xffff0000)        | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1535 			sprintf(dsdy,   "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000)        | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1536 			sprintf(dtdy,   "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1537 			sprintf(dwdy,   "%4.4f", (float)int32_t(  (cmd[9]        & 0xffff0000)        | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1538 
1539 			buffer+=sprintf(buffer, "Tri_TexShade           %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1540 			buffer+=sprintf(buffer, "                              ");
1541 			buffer+=sprintf(buffer, "                       R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at);
1542 			buffer+=sprintf(buffer, "                              ");
1543 			buffer+=sprintf(buffer, "                       DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx);
1544 			buffer+=sprintf(buffer, "                              ");
1545 			buffer+=sprintf(buffer, "                       DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade);
1546 			buffer+=sprintf(buffer, "                              ");
1547 			buffer+=sprintf(buffer, "                       DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady);
1548 
1549 			buffer+=sprintf(buffer, "                              ");
1550 			buffer+=sprintf(buffer, "                       S: %s, T: %s, W: %s\n", s, t, w);
1551 			buffer+=sprintf(buffer, "                              ");
1552 			buffer+=sprintf(buffer, "                       DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx);
1553 			buffer+=sprintf(buffer, "                              ");
1554 			buffer+=sprintf(buffer, "                       DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde);
1555 			buffer+=sprintf(buffer, "                              ");
1556 			buffer+=sprintf(buffer, "                       DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy);
1557 			break;
1558 		}
1559 		case 0x0f:      // Tri_TexShadeZ
1560 		{
1561 			const int32_t lft = (command >> 23) & 0x1;
1562 
1563 			if (length < s_rdp_command_length[command])
1564 			{
1565 				sprintf(buffer, "ERROR: Tri_TexShadeZ length = %d\n", length);
1566 				return;
1567 			}
1568 
1569 			for (int32_t i = 1; i < 20; i++)
1570 			{
1571 				cmd[i] = m_cmd_data[m_cmd_cur+i];
1572 			}
1573 
1574 			sprintf(yl,     "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1575 			sprintf(ym,     "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1576 			sprintf(yh,     "%4.4f", (float)((cmd[0] >>  0) & 0x1fff) / 4.0f);
1577 			sprintf(xl,     "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1578 			sprintf(dxldy,  "%4.4f", (float)int32_t(cmd[1])       / 65536.0f);
1579 			sprintf(xh,     "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1580 			sprintf(dxhdy,  "%4.4f", (float)int32_t(cmd[2])       / 65536.0f);
1581 			sprintf(xm,     "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1582 			sprintf(dxmdy,  "%4.4f", (float)int32_t(cmd[3])       / 65536.0f);
1583 
1584 			sprintf(rt,     "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000)        | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1585 			sprintf(gt,     "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1586 			sprintf(bt,     "%4.4f", (float)int32_t(  (cmd[4]        & 0xffff0000)        | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1587 			sprintf(at,     "%4.4f", (float)int32_t( ((cmd[4]        & 0x0000ffff) << 16) | ( cmd[ 6]        & 0xffff)) / 65536.0f);
1588 			sprintf(drdx,   "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000)        | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1589 			sprintf(dgdx,   "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1590 			sprintf(dbdx,   "%4.4f", (float)int32_t(  (cmd[5]        & 0xffff0000)        | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1591 			sprintf(dadx,   "%4.4f", (float)int32_t( ((cmd[5]        & 0x0000ffff) << 16) | ( cmd[ 7]        & 0xffff)) / 65536.0f);
1592 			sprintf(drde,   "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000)        | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1593 			sprintf(dgde,   "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1594 			sprintf(dbde,   "%4.4f", (float)int32_t(  (cmd[8]        & 0xffff0000)        | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1595 			sprintf(dade,   "%4.4f", (float)int32_t( ((cmd[8]        & 0x0000ffff) << 16) | ( cmd[10]        & 0xffff)) / 65536.0f);
1596 			sprintf(drdy,   "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000)        | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1597 			sprintf(dgdy,   "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1598 			sprintf(dbdy,   "%4.4f", (float)int32_t(  (cmd[9]        & 0xffff0000)        | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1599 			sprintf(dady,   "%4.4f", (float)int32_t( ((cmd[9]        & 0x0000ffff) << 16) | ( cmd[11]        & 0xffff)) / 65536.0f);
1600 
1601 			sprintf(s,      "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000)        | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1602 			sprintf(t,      "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1603 			sprintf(w,      "%4.4f", (float)int32_t(  (cmd[4]        & 0xffff0000)        | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1604 			sprintf(dsdx,   "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000)        | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1605 			sprintf(dtdx,   "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1606 			sprintf(dwdx,   "%4.4f", (float)int32_t(  (cmd[5]        & 0xffff0000)        | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1607 			sprintf(dsde,   "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000)        | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1608 			sprintf(dtde,   "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1609 			sprintf(dwde,   "%4.4f", (float)int32_t(  (cmd[8]        & 0xffff0000)        | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1610 			sprintf(dsdy,   "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000)        | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1611 			sprintf(dtdy,   "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1612 			sprintf(dwdy,   "%4.4f", (float)int32_t(  (cmd[9]        & 0xffff0000)        | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1613 
1614 			buffer+=sprintf(buffer, "Tri_TexShadeZ           %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1615 			buffer+=sprintf(buffer, "                              ");
1616 			buffer+=sprintf(buffer, "                       R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at);
1617 			buffer+=sprintf(buffer, "                              ");
1618 			buffer+=sprintf(buffer, "                       DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx);
1619 			buffer+=sprintf(buffer, "                              ");
1620 			buffer+=sprintf(buffer, "                       DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade);
1621 			buffer+=sprintf(buffer, "                              ");
1622 			buffer+=sprintf(buffer, "                       DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady);
1623 
1624 			buffer+=sprintf(buffer, "                              ");
1625 			buffer+=sprintf(buffer, "                       S: %s, T: %s, W: %s\n", s, t, w);
1626 			buffer+=sprintf(buffer, "                              ");
1627 			buffer+=sprintf(buffer, "                       DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx);
1628 			buffer+=sprintf(buffer, "                              ");
1629 			buffer+=sprintf(buffer, "                       DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde);
1630 			buffer+=sprintf(buffer, "                              ");
1631 			buffer+=sprintf(buffer, "                       DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy);
1632 			break;
1633 		}
1634 		case 0x24:
1635 		case 0x25:
1636 		{
1637 			if (length < 16)
1638 			{
1639 				sprintf(buffer, "ERROR: Texture_Rectangle length = %d\n", length);
1640 				return;
1641 			}
1642 
1643 			cmd[1] = m_cmd_data[m_cmd_cur+1];
1644 			sprintf(s,    "%4.4f", (float)int16_t((cmd[1] >> 48) & 0xffff) / 32.0f);
1645 			sprintf(t,    "%4.4f", (float)int16_t((cmd[1] >> 32) & 0xffff) / 32.0f);
1646 			sprintf(dsdx, "%4.4f", (float)int16_t((cmd[1] >> 16) & 0xffff) / 1024.0f);
1647 			sprintf(dtdy, "%4.4f", (float)int16_t((cmd[1] >>  0) & 0xffff) / 1024.0f);
1648 
1649 			if (command == 0x24)
1650 					sprintf(buffer, "Texture_Rectangle      %d, %s, %s, %s, %s,  %s, %s, %s, %s", tile, sh, th, sl, tl, s, t, dsdx, dtdy);
1651 			else
1652 					sprintf(buffer, "Texture_Rectangle_Flip %d, %s, %s, %s, %s,  %s, %s, %s, %s", tile, sh, th, sl, tl, s, t, dsdx, dtdy);
1653 
1654 			break;
1655 		}
1656 		case 0x26:  sprintf(buffer, "Sync_Load"); break;
1657 		case 0x27:  sprintf(buffer, "Sync_Pipe"); break;
1658 		case 0x28:  sprintf(buffer, "Sync_Tile"); break;
1659 		case 0x29:  sprintf(buffer, "Sync_Full"); break;
1660 		case 0x2d:  sprintf(buffer, "Set_Scissor            %s, %s, %s, %s", sl, tl, sh, th); break;
1661 		case 0x2e:  sprintf(buffer, "Set_Prim_Depth         %04X, %04X", uint32_t(cmd[0] >> 16) & 0xffff, (uint32_t)cmd[0] & 0xffff); break;
1662 		case 0x2f:  sprintf(buffer, "Set_Other_Modes        %08X %08X", uint32_t(cmd[0] >> 32), (uint32_t)cmd[0]); break;
1663 		case 0x30:  sprintf(buffer, "Load_TLUT              %d, %s, %s, %s, %s", tile, sl, tl, sh, th); break;
1664 		case 0x32:  sprintf(buffer, "Set_Tile_Size          %d, %s, %s, %s, %s", tile, sl, tl, sh, th); break;
1665 		case 0x33:  sprintf(buffer, "Load_Block             %d, %03X, %03X, %03X, %03X", tile, uint32_t(cmd[0] >> 44) & 0xfff, uint32_t(cmd[0] >> 32) & 0xfff, uint32_t(cmd[0] >> 12) & 0xfff, uint32_t(cmd[0]) & 0xfff); break;
1666 		case 0x34:  sprintf(buffer, "Load_Tile              %d, %s, %s, %s, %s", tile, sl, tl, sh, th); break;
1667 		case 0x35:  sprintf(buffer, "Set_Tile               %d, %s, %s, %d, %04X", tile, format, size, (uint32_t(cmd[0] >> 41) & 0x1ff) * 8, (uint32_t(cmd[0] >> 32) & 0x1ff) * 8); break;
1668 		case 0x36:  sprintf(buffer, "Fill_Rectangle         %s, %s, %s, %s", sh, th, sl, tl); break;
1669 		case 0x37:  sprintf(buffer, "Set_Fill_Color         R: %d, G: %d, B: %d, A: %d", r, g, b, a); break;
1670 		case 0x38:  sprintf(buffer, "Set_Fog_Color          R: %d, G: %d, B: %d, A: %d", r, g, b, a); break;
1671 		case 0x39:  sprintf(buffer, "Set_Blend_Color        R: %d, G: %d, B: %d, A: %d", r, g, b, a); break;
1672 		case 0x3a:  sprintf(buffer, "Set_Prim_Color         %d, %d, R: %d, G: %d, B: %d, A: %d", uint32_t(cmd[0] >> 40) & 0x1f, uint32_t(cmd[0] >> 32) & 0xff, r, g, b, a); break;
1673 		case 0x3b:  sprintf(buffer, "Set_Env_Color          R: %d, G: %d, B: %d, A: %d", r, g, b, a); break;
1674 		case 0x3c:  sprintf(buffer, "Set_Combine            %08X %08X", uint32_t(cmd[0] >> 32), (uint32_t)cmd[0]); break;
1675 		case 0x3d:  sprintf(buffer, "Set_Texture_Image      %s, %s, %d, %08X", format, size, (uint32_t(cmd[0] >> 32) & 0x1ff) + 1, (uint32_t)cmd[0]); break;
1676 		case 0x3e:  sprintf(buffer, "Set_Mask_Image         %08X", (uint32_t)cmd[0]); break;
1677 		case 0x3f:  sprintf(buffer, "Set_Color_Image        %s, %s, %d, %08X", format, size, (uint32_t(cmd[0] >> 32) & 0x1ff) + 1, (uint32_t)cmd[0]); break;
1678 		default:    sprintf(buffer, "Unknown (%08X %08X)", uint32_t(cmd[0] >> 32), (uint32_t)cmd[0]); break;
1679 	}
1680 }
1681 
1682 /*****************************************************************************/
1683 
rightcvghex(uint32_t x,uint32_t fmask)1684 static uint32_t rightcvghex(uint32_t x, uint32_t fmask)
1685 {
1686 	uint32_t stickybit = ((x >> 1) & 0x1fff) > 0;
1687 	uint32_t covered = ((x >> 14) & 3) + stickybit;
1688 	covered = (0xf0 >> covered) & 0xf;
1689 	return (covered & fmask);
1690 }
1691 
leftcvghex(uint32_t x,uint32_t fmask)1692 static uint32_t leftcvghex(uint32_t x, uint32_t fmask)
1693 {
1694 	uint32_t stickybit = ((x >> 1) & 0x1fff) > 0;
1695 	uint32_t covered = ((x >> 14) & 3) + stickybit;
1696 	covered = 0xf >> covered;
1697 	return (covered & fmask);
1698 }
1699 
CLIP(int32_t value,int32_t min,int32_t max)1700 static int32_t CLIP(int32_t value,int32_t min,int32_t max)
1701 {
1702 	if (value < min)
1703 	{
1704 		return min;
1705 	}
1706 	else if (value > max)
1707 	{
1708 		return max;
1709 	}
1710 	else
1711 	{
1712 		return value;
1713 	}
1714 }
1715 
compute_cvg_noflip(extent_t * spans,int32_t * majorx,int32_t * minorx,int32_t * majorxint,int32_t * minorxint,int32_t scanline,int32_t yh,int32_t yl,int32_t base)1716 void n64_rdp::compute_cvg_noflip(extent_t* spans, int32_t* majorx, int32_t* minorx, int32_t* majorxint, int32_t* minorxint, int32_t scanline, int32_t yh, int32_t yl, int32_t base)
1717 {
1718 	int32_t purgestart = 0xfff;
1719 	int32_t purgeend = 0;
1720 	const bool writablescanline = !(scanline & ~0x3ff);
1721 	const int32_t scanlinespx = scanline << 2;
1722 
1723 	if (!writablescanline) return;
1724 
1725 	for(int32_t i = 0; i < 4; i++)
1726 	{
1727 		if (minorxint[i] < purgestart)
1728 		{
1729 			purgestart = minorxint[i];
1730 		}
1731 		if (majorxint[i] > purgeend)
1732 		{
1733 			purgeend = majorxint[i];
1734 		}
1735 	}
1736 
1737 	purgestart = CLIP(purgestart, 0, 1023);
1738 	purgeend = CLIP(purgeend, 0, 1023);
1739 	int32_t length = purgeend - purgestart;
1740 
1741 	if (length < 0) return;
1742 
1743 	rdp_span_aux* userdata = (rdp_span_aux*)spans[scanline - base].userdata;
1744 	memset(&userdata->m_cvg[purgestart], 0, (length + 1) << 1);
1745 
1746 	for(int32_t i = 0; i < 4; i++)
1747 	{
1748 		int32_t minorcur = minorx[i];
1749 		int32_t majorcur = majorx[i];
1750 		int32_t minorcurint = minorxint[i];
1751 		int32_t majorcurint = majorxint[i];
1752 		length = majorcurint - minorcurint;
1753 
1754 		int32_t fmask = (i & 1) ? 5 : 0xa;
1755 		int32_t maskshift = (i ^ 3) << 2;
1756 		int32_t fmaskshifted = fmask << maskshift;
1757 		int32_t fleft = CLIP(minorcurint + 1, 0, 647);
1758 		int32_t fright = CLIP(majorcurint - 1, 0, 647);
1759 		bool valid_y = ((scanlinespx + i) >= yh && (scanlinespx + i) < yl);
1760 		if (valid_y && length >= 0)
1761 		{
1762 			if (minorcurint != majorcurint)
1763 			{
1764 				if (!(minorcurint & ~0x3ff))
1765 				{
1766 					userdata->m_cvg[minorcurint] |= (leftcvghex(minorcur, fmask) << maskshift);
1767 				}
1768 				if (!(majorcurint & ~0x3ff))
1769 				{
1770 					userdata->m_cvg[majorcurint] |= (rightcvghex(majorcur, fmask) << maskshift);
1771 				}
1772 			}
1773 			else
1774 			{
1775 				if (!(majorcurint & ~0x3ff))
1776 				{
1777 					int32_t samecvg = leftcvghex(minorcur, fmask) & rightcvghex(majorcur, fmask);
1778 					userdata->m_cvg[majorcurint] |= (samecvg << maskshift);
1779 				}
1780 			}
1781 			for (; fleft <= fright; fleft++)
1782 			{
1783 				userdata->m_cvg[fleft] |= fmaskshifted;
1784 			}
1785 		}
1786 	}
1787 }
1788 
compute_cvg_flip(extent_t * spans,int32_t * majorx,int32_t * minorx,int32_t * majorxint,int32_t * minorxint,int32_t scanline,int32_t yh,int32_t yl,int32_t base)1789 void n64_rdp::compute_cvg_flip(extent_t* spans, int32_t* majorx, int32_t* minorx, int32_t* majorxint, int32_t* minorxint, int32_t scanline, int32_t yh, int32_t yl, int32_t base)
1790 {
1791 	int32_t purgestart = 0xfff;
1792 	int32_t purgeend = 0;
1793 	const bool writablescanline = !(scanline & ~0x3ff);
1794 	const int32_t scanlinespx = scanline << 2;
1795 
1796 	if(!writablescanline) return;
1797 
1798 	for(int32_t i = 0; i < 4; i++)
1799 	{
1800 		if (majorxint[i] < purgestart)
1801 		{
1802 			purgestart = majorxint[i];
1803 		}
1804 		if (minorxint[i] > purgeend)
1805 		{
1806 			purgeend = minorxint[i];
1807 		}
1808 	}
1809 
1810 	purgestart = CLIP(purgestart, 0, 1023);
1811 	purgeend = CLIP(purgeend, 0, 1023);
1812 
1813 	int32_t length = purgeend - purgestart;
1814 
1815 	if (length < 0) return;
1816 
1817 	rdp_span_aux* userdata = (rdp_span_aux*)spans[scanline - base].userdata;
1818 	memset(&userdata->m_cvg[purgestart], 0, (length + 1) << 1);
1819 
1820 	for(int32_t i = 0; i < 4; i++)
1821 	{
1822 		int32_t minorcur = minorx[i];
1823 		int32_t majorcur = majorx[i];
1824 		int32_t minorcurint = minorxint[i];
1825 		int32_t majorcurint = majorxint[i];
1826 		length = minorcurint - majorcurint;
1827 
1828 		int32_t fmask = (i & 1) ? 5 : 0xa;
1829 		int32_t maskshift = (i ^ 3) << 2;
1830 		int32_t fmaskshifted = fmask << maskshift;
1831 		int32_t fleft = CLIP(majorcurint + 1, 0, 647);
1832 		int32_t fright = CLIP(minorcurint - 1, 0, 647);
1833 		bool valid_y = ((scanlinespx + i) >= yh && (scanlinespx + i) < yl);
1834 		if (valid_y && length >= 0)
1835 		{
1836 			if (minorcurint != majorcurint)
1837 			{
1838 				if (!(minorcurint & ~0x3ff))
1839 				{
1840 					userdata->m_cvg[minorcurint] |= (rightcvghex(minorcur, fmask) << maskshift);
1841 				}
1842 				if (!(majorcurint & ~0x3ff))
1843 				{
1844 					userdata->m_cvg[majorcurint] |= (leftcvghex(majorcur, fmask) << maskshift);
1845 				}
1846 			}
1847 			else
1848 			{
1849 				if (!(majorcurint & ~0x3ff))
1850 				{
1851 					int32_t samecvg = rightcvghex(minorcur, fmask) & leftcvghex(majorcur, fmask);
1852 					userdata->m_cvg[majorcurint] |= (samecvg << maskshift);
1853 				}
1854 			}
1855 			for (; fleft <= fright; fleft++)
1856 			{
1857 				userdata->m_cvg[fleft] |= fmaskshifted;
1858 			}
1859 		}
1860 	}
1861 }
1862 
1863 #define SIGN(x, numb)   (((x) & ((1 << numb) - 1)) | -((x) & (1 << (numb - 1))))
1864 
draw_triangle(bool shade,bool texture,bool zbuffer,bool rect)1865 void n64_rdp::draw_triangle(bool shade, bool texture, bool zbuffer, bool rect)
1866 {
1867 	const uint64_t* cmd_data = rect ? m_temp_rect_data : m_cmd_data;
1868 	const uint32_t fifo_index = rect ? 0 : m_cmd_cur;
1869 	const uint64_t w1 = cmd_data[fifo_index + 0];
1870 
1871 	int32_t flip = int32_t(w1 >> 55) & 1;
1872 	m_misc_state.m_max_level = uint32_t(w1 >> 51) & 7;
1873 	int32_t tilenum = int32_t(w1 >> 48) & 0x7;
1874 
1875 	int32_t dsdiff = 0, dtdiff = 0, dwdiff = 0, drdiff = 0, dgdiff = 0, dbdiff = 0, dadiff = 0, dzdiff = 0;
1876 	int32_t dsdeh = 0, dtdeh = 0, dwdeh = 0, drdeh = 0, dgdeh = 0, dbdeh = 0, dadeh = 0, dzdeh = 0;
1877 	int32_t dsdxh = 0, dtdxh = 0, dwdxh = 0, drdxh = 0, dgdxh = 0, dbdxh = 0, dadxh = 0, dzdxh = 0;
1878 	int32_t dsdyh = 0, dtdyh = 0, dwdyh = 0, drdyh = 0, dgdyh = 0, dbdyh = 0, dadyh = 0, dzdyh = 0;
1879 
1880 	int32_t maxxmx = 0; // maxxmx / minxhx very opaque names, consider re-naming
1881 	int32_t minxmx = 0;
1882 	int32_t maxxhx = 0;
1883 	int32_t minxhx = 0;
1884 
1885 	int32_t shade_base = fifo_index + 4;
1886 	int32_t texture_base = fifo_index + 4;
1887 	int32_t zbuffer_base = fifo_index + 4;
1888 	if(shade)
1889 	{
1890 		texture_base += 8;
1891 		zbuffer_base += 8;
1892 	}
1893 	if(texture)
1894 	{
1895 		zbuffer_base += 8;
1896 	}
1897 
1898 	uint64_t w2 = cmd_data[fifo_index + 1];
1899 	uint64_t w3 = cmd_data[fifo_index + 2];
1900 	uint64_t w4 = cmd_data[fifo_index + 3];
1901 
1902 	int32_t yl = int32_t(w1 >> 32) & 0x3fff;
1903 	int32_t ym = int32_t(w1 >> 16) & 0x3fff;
1904 	int32_t yh = int32_t(w1 >>  0) & 0x3fff;
1905 	int32_t xl = (int32_t)(w2 >> 32) & 0x3fffffff;
1906 	int32_t xh = (int32_t)(w3 >> 32) & 0x3fffffff;
1907 	int32_t xm = (int32_t)(w4 >> 32) & 0x3fffffff;
1908 	// Inverse slopes in 16.16 format
1909 	int32_t dxldy = (int32_t)w2;
1910 	int32_t dxhdy = (int32_t)w3;
1911 	int32_t dxmdy = (int32_t)w4;
1912 
1913 	if (yl & 0x2000)  yl |= 0xffffc000;
1914 	if (ym & 0x2000)  ym |= 0xffffc000;
1915 	if (yh & 0x2000)  yh |= 0xffffc000;
1916 
1917 	if (xl & 0x20000000)  xl |= 0xc0000000;
1918 	if (xm & 0x20000000)  xm |= 0xc0000000;
1919 	if (xh & 0x20000000)  xh |= 0xc0000000;
1920 
1921 	int32_t r    = int32_t(((cmd_data[shade_base] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 2] >> 48) & 0x0000ffff));
1922 	int32_t g    = int32_t(((cmd_data[shade_base] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 2] >> 32) & 0x0000ffff));
1923 	int32_t b    = int32_t( (cmd_data[shade_base]        & 0xffff0000) | ((cmd_data[shade_base + 2] >> 16) & 0x0000ffff));
1924 	int32_t a    = int32_t(((cmd_data[shade_base] << 16) & 0xffff0000) |  (cmd_data[shade_base + 2]        & 0x0000ffff));
1925 	const int32_t drdx = int32_t(((cmd_data[shade_base + 1] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 3] >> 48) & 0x0000ffff));
1926 	const int32_t dgdx = int32_t(((cmd_data[shade_base + 1] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 3] >> 32) & 0x0000ffff));
1927 	const int32_t dbdx = int32_t( (cmd_data[shade_base + 1]        & 0xffff0000) | ((cmd_data[shade_base + 3] >> 16) & 0x0000ffff));
1928 	const int32_t dadx = int32_t(((cmd_data[shade_base + 1] << 16) & 0xffff0000) |  (cmd_data[shade_base + 3]        & 0x0000ffff));
1929 	const int32_t drde = int32_t(((cmd_data[shade_base + 4] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 6] >> 48) & 0x0000ffff));
1930 	const int32_t dgde = int32_t(((cmd_data[shade_base + 4] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 6] >> 32) & 0x0000ffff));
1931 	const int32_t dbde = int32_t( (cmd_data[shade_base + 4]        & 0xffff0000) | ((cmd_data[shade_base + 6] >> 16) & 0x0000ffff));
1932 	const int32_t dade = int32_t(((cmd_data[shade_base + 4] << 16) & 0xffff0000) |  (cmd_data[shade_base + 6]        & 0x0000ffff));
1933 	const int32_t drdy = int32_t(((cmd_data[shade_base + 5] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 7] >> 48) & 0x0000ffff));
1934 	const int32_t dgdy = int32_t(((cmd_data[shade_base + 5] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 7] >> 32) & 0x0000ffff));
1935 	const int32_t dbdy = int32_t( (cmd_data[shade_base + 5]        & 0xffff0000) | ((cmd_data[shade_base + 7] >> 16) & 0x0000ffff));
1936 	const int32_t dady = int32_t(((cmd_data[shade_base + 5] << 16) & 0xffff0000) |  (cmd_data[shade_base + 7]        & 0x0000ffff));
1937 
1938 	int32_t s    = int32_t(((cmd_data[texture_base] >> 32) & 0xffff0000) | ((cmd_data[texture_base+ 2 ] >> 48) & 0x0000ffff));
1939 	int32_t t    = int32_t(((cmd_data[texture_base] >> 16) & 0xffff0000) | ((cmd_data[texture_base+ 2 ] >> 32) & 0x0000ffff));
1940 	int32_t w    = int32_t( (cmd_data[texture_base]        & 0xffff0000) | ((cmd_data[texture_base+ 2 ] >> 16) & 0x0000ffff));
1941 	const int32_t dsdx = int32_t(((cmd_data[texture_base + 1] >> 32) & 0xffff0000) | ((cmd_data[texture_base + 3] >> 48) & 0x0000ffff));
1942 	const int32_t dtdx = int32_t(((cmd_data[texture_base + 1] >> 16) & 0xffff0000) | ((cmd_data[texture_base + 3] >> 32) & 0x0000ffff));
1943 	const int32_t dwdx = int32_t( (cmd_data[texture_base + 1]        & 0xffff0000) | ((cmd_data[texture_base + 3] >> 16) & 0x0000ffff));
1944 	const int32_t dsde = int32_t(((cmd_data[texture_base + 4] >> 32) & 0xffff0000) | ((cmd_data[texture_base + 6] >> 48) & 0x0000ffff));
1945 	const int32_t dtde = int32_t(((cmd_data[texture_base + 4] >> 16) & 0xffff0000) | ((cmd_data[texture_base + 6] >> 32) & 0x0000ffff));
1946 	const int32_t dwde = int32_t( (cmd_data[texture_base + 4]        & 0xffff0000) | ((cmd_data[texture_base + 6] >> 16) & 0x0000ffff));
1947 	const int32_t dsdy = int32_t(((cmd_data[texture_base + 5] >> 32) & 0xffff0000) | ((cmd_data[texture_base + 7] >> 48) & 0x0000ffff));
1948 	const int32_t dtdy = int32_t(((cmd_data[texture_base + 5] >> 16) & 0xffff0000) | ((cmd_data[texture_base + 7] >> 32) & 0x0000ffff));
1949 	const int32_t dwdy = int32_t( (cmd_data[texture_base + 5]        & 0xffff0000) | ((cmd_data[texture_base + 7] >> 16) & 0x0000ffff));
1950 
1951 	int32_t z    = int32_t(cmd_data[zbuffer_base] >> 32);
1952 	const int32_t dzdx = int32_t(cmd_data[zbuffer_base]);
1953 	const int32_t dzde = int32_t(cmd_data[zbuffer_base+1] >> 32);
1954 	const int32_t dzdy = int32_t(cmd_data[zbuffer_base+1]);
1955 
1956 	const int32_t dzdy_dz = (dzdy >> 16) & 0xffff;
1957 	const int32_t dzdx_dz = (dzdx >> 16) & 0xffff;
1958 
1959 	extent_t spans[2048];
1960 #ifdef MAME_DEBUG
1961 	memset(spans, 0xcc, sizeof(spans));
1962 #endif
1963 
1964 	m_span_base.m_span_drdy = drdy;
1965 	m_span_base.m_span_dgdy = dgdy;
1966 	m_span_base.m_span_dbdy = dbdy;
1967 	m_span_base.m_span_dady = dady;
1968 	m_span_base.m_span_dzdy = m_other_modes.z_source_sel ? 0 : dzdy;
1969 
1970 	uint32_t temp_dzpix = ((dzdy_dz & 0x8000) ? ((~dzdy_dz) & 0x7fff) : dzdy_dz) + ((dzdx_dz & 0x8000) ? ((~dzdx_dz) & 0x7fff) : dzdx_dz);
1971 	m_span_base.m_span_dr = drdx & ~0x1f;
1972 	m_span_base.m_span_dg = dgdx & ~0x1f;
1973 	m_span_base.m_span_db = dbdx & ~0x1f;
1974 	m_span_base.m_span_da = dadx & ~0x1f;
1975 	m_span_base.m_span_ds = dsdx;
1976 	m_span_base.m_span_dt = dtdx;
1977 	m_span_base.m_span_dw = dwdx;
1978 	m_span_base.m_span_dz = m_other_modes.z_source_sel ? 0 : dzdx;
1979 	m_span_base.m_span_dymax = 0;
1980 	m_span_base.m_span_dzpix = m_dzpix_normalize[temp_dzpix & 0xffff];
1981 
1982 	int32_t xleft_inc = (dxmdy >> 2) & ~1;
1983 	int32_t xright_inc = (dxhdy >> 2) & ~1;
1984 
1985 	int32_t xright = xh & ~1;
1986 	int32_t xleft = xm & ~1;
1987 
1988 	const int32_t sign_dxhdy = (dxhdy & 0x80000000) ? 1 : 0;
1989 	const int32_t do_offset = !(sign_dxhdy ^ (flip));
1990 
1991 	if (do_offset)
1992 	{
1993 		dsdeh = dsde >> 9;  dsdyh = dsdy >> 9;
1994 		dtdeh = dtde >> 9;  dtdyh = dtdy >> 9;
1995 		dwdeh = dwde >> 9;  dwdyh = dwdy >> 9;
1996 		drdeh = drde >> 9;  drdyh = drdy >> 9;
1997 		dgdeh = dgde >> 9;  dgdyh = dgdy >> 9;
1998 		dbdeh = dbde >> 9;  dbdyh = dbdy >> 9;
1999 		dadeh = dade >> 9;  dadyh = dady >> 9;
2000 		dzdeh = dzde >> 9;  dzdyh = dzdy >> 9;
2001 
2002 		dsdiff = (dsdeh << 8) + (dsdeh << 7) - (dsdyh << 8) - (dsdyh << 7);
2003 		dtdiff = (dtdeh << 8) + (dtdeh << 7) - (dtdyh << 8) - (dtdyh << 7);
2004 		dwdiff = (dwdeh << 8) + (dwdeh << 7) - (dwdyh << 8) - (dwdyh << 7);
2005 		drdiff = (drdeh << 8) + (drdeh << 7) - (drdyh << 8) - (drdyh << 7);
2006 		dgdiff = (dgdeh << 8) + (dgdeh << 7) - (dgdyh << 8) - (dgdyh << 7);
2007 		dbdiff = (dbdeh << 8) + (dbdeh << 7) - (dbdyh << 8) - (dbdyh << 7);
2008 		dadiff = (dadeh << 8) + (dadeh << 7) - (dadyh << 8) - (dadyh << 7);
2009 		dzdiff = (dzdeh << 8) + (dzdeh << 7) - (dzdyh << 8) - (dzdyh << 7);
2010 	}
2011 	else
2012 	{
2013 		dsdiff = dtdiff = dwdiff = drdiff = dgdiff = dbdiff = dadiff = dzdiff = 0;
2014 	}
2015 
2016 	dsdxh = dsdx >> 8;
2017 	dtdxh = dtdx >> 8;
2018 	dwdxh = dwdx >> 8;
2019 	drdxh = drdx >> 8;
2020 	dgdxh = dgdx >> 8;
2021 	dbdxh = dbdx >> 8;
2022 	dadxh = dadx >> 8;
2023 	dzdxh = dzdx >> 8;
2024 
2025 	const int32_t ycur = yh & ~3;
2026 	const int32_t ylfar = yl | 3;
2027 	const int32_t ldflag = (sign_dxhdy ^ flip) ? 0 : 3;
2028 	int32_t majorx[4];
2029 	int32_t minorx[4];
2030 	int32_t majorxint[4];
2031 	int32_t minorxint[4];
2032 
2033 	int32_t xfrac = ((xright >> 8) & 0xff);
2034 
2035 	const int32_t clipy1 = m_scissor.m_yh;
2036 	const int32_t clipy2 = m_scissor.m_yl;
2037 
2038 	// Trivial reject
2039 	if((ycur >> 2) >= clipy2 && (ylfar >> 2) >= clipy2)
2040 	{
2041 		return;
2042 	}
2043 	if((ycur >> 2) < clipy1 && (ylfar >> 2) < clipy1)
2044 	{
2045 		return;
2046 	}
2047 
2048 	bool new_object = true;
2049 	rdp_poly_state* object = nullptr;
2050 	bool valid = false;
2051 
2052 	int32_t* minx = flip ? &minxhx : &minxmx;
2053 	int32_t* maxx = flip ? &maxxmx : &maxxhx;
2054 	int32_t* startx = flip ? maxx : minx;
2055 	int32_t* endx = flip ? minx : maxx;
2056 
2057 	for (int32_t k = ycur; k <= ylfar; k++)
2058 	{
2059 		if (k == ym)
2060 		{
2061 			xleft = xl & ~1;
2062 			xleft_inc = (dxldy >> 2) & ~1;
2063 		}
2064 
2065 		const int32_t xstart = xleft >> 16;
2066 		const int32_t xend = xright >> 16;
2067 		const int32_t j = k >> 2;
2068 		const int32_t spanidx = (k - ycur) >> 2;
2069 		const int32_t  spix = k & 3;
2070 		bool valid_y = !(k < yh || k >= yl);
2071 
2072 		if (spanidx >= 0 && spanidx < 2048)
2073 		{
2074 			majorxint[spix] = xend;
2075 			minorxint[spix] = xstart;
2076 			majorx[spix] = xright;
2077 			minorx[spix] = xleft;
2078 
2079 			if (spix == 0)
2080 			{
2081 				*maxx = 0;
2082 				*minx = 0xfff;
2083 			}
2084 
2085 			if (valid_y)
2086 			{
2087 				if (flip)
2088 				{
2089 					*maxx = std::max(xstart, *maxx);
2090 					*minx = std::min(xend, *minx);
2091 				}
2092 				else
2093 				{
2094 					*minx = std::min(xstart, *minx);
2095 					*maxx = std::max(xend, *maxx);
2096 				}
2097 			}
2098 
2099 			if (spix == 0)
2100 			{
2101 				if(new_object)
2102 				{
2103 					object = &object_data_alloc();
2104 					memcpy(object->m_tmem, m_tmem.get(), 0x1000);
2105 					new_object = false;
2106 				}
2107 
2108 				spans[spanidx].userdata = (void*)((uint8_t*)m_aux_buf.get() + m_aux_buf_ptr);
2109 				valid = true;
2110 				m_aux_buf_ptr += sizeof(rdp_span_aux);
2111 
2112 				if(m_aux_buf_ptr >= EXTENT_AUX_COUNT)
2113 				{
2114 					fatalerror("n64_rdp::draw_triangle: span aux buffer overflow\n");
2115 				}
2116 
2117 				rdp_span_aux* userdata = (rdp_span_aux*)spans[spanidx].userdata;
2118 				userdata->m_tmem = object->m_tmem;
2119 
2120 				userdata->m_blend_color = m_blend_color;
2121 				userdata->m_prim_color = m_prim_color;
2122 				userdata->m_env_color = m_env_color;
2123 				userdata->m_fog_color = m_fog_color;
2124 				userdata->m_prim_alpha = m_prim_alpha;
2125 				userdata->m_env_alpha = m_env_alpha;
2126 				userdata->m_key_scale = m_key_scale;
2127 				userdata->m_lod_fraction = m_lod_fraction;
2128 				userdata->m_prim_lod_fraction = m_prim_lod_fraction;
2129 
2130 				// Setup blender data for this scanline
2131 				set_blender_input(0, 0, &userdata->m_color_inputs.blender1a_rgb[0], &userdata->m_color_inputs.blender1b_a[0], m_other_modes.blend_m1a_0, m_other_modes.blend_m1b_0, userdata);
2132 				set_blender_input(0, 1, &userdata->m_color_inputs.blender2a_rgb[0], &userdata->m_color_inputs.blender2b_a[0], m_other_modes.blend_m2a_0, m_other_modes.blend_m2b_0, userdata);
2133 				set_blender_input(1, 0, &userdata->m_color_inputs.blender1a_rgb[1], &userdata->m_color_inputs.blender1b_a[1], m_other_modes.blend_m1a_1, m_other_modes.blend_m1b_1, userdata);
2134 				set_blender_input(1, 1, &userdata->m_color_inputs.blender2a_rgb[1], &userdata->m_color_inputs.blender2b_a[1], m_other_modes.blend_m2a_1, m_other_modes.blend_m2b_1, userdata);
2135 
2136 				// Setup color combiner data for this scanline
2137 				set_suba_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_a[0], m_combine.sub_a_rgb0, userdata);
2138 				set_subb_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_b[0], m_combine.sub_b_rgb0, userdata);
2139 				set_mul_input_rgb(&userdata->m_color_inputs.combiner_rgbmul[0], m_combine.mul_rgb0, userdata);
2140 				set_add_input_rgb(&userdata->m_color_inputs.combiner_rgbadd[0], m_combine.add_rgb0, userdata);
2141 				set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_a[0], m_combine.sub_a_a0, userdata);
2142 				set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_b[0], m_combine.sub_b_a0, userdata);
2143 				set_mul_input_alpha(&userdata->m_color_inputs.combiner_alphamul[0], m_combine.mul_a0, userdata);
2144 				set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphaadd[0], m_combine.add_a0, userdata);
2145 
2146 				set_suba_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_a[1], m_combine.sub_a_rgb1, userdata);
2147 				set_subb_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_b[1], m_combine.sub_b_rgb1, userdata);
2148 				set_mul_input_rgb(&userdata->m_color_inputs.combiner_rgbmul[1], m_combine.mul_rgb1, userdata);
2149 				set_add_input_rgb(&userdata->m_color_inputs.combiner_rgbadd[1], m_combine.add_rgb1, userdata);
2150 				set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_a[1], m_combine.sub_a_a1, userdata);
2151 				set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_b[1], m_combine.sub_b_a1, userdata);
2152 				set_mul_input_alpha(&userdata->m_color_inputs.combiner_alphamul[1], m_combine.mul_a1, userdata);
2153 				set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphaadd[1], m_combine.add_a1, userdata);
2154 			}
2155 
2156 			if (spix == 3)
2157 			{
2158 				spans[spanidx].startx = *startx;
2159 				spans[spanidx].stopx = *endx;
2160 				((this)->*(m_compute_cvg[flip]))(spans, majorx, minorx, majorxint, minorxint, j, yh, yl, ycur >> 2);
2161 			}
2162 
2163 			if (spix == ldflag)
2164 			{
2165 				((rdp_span_aux*)spans[spanidx].userdata)->m_unscissored_rx = xend;
2166 				xfrac = ((xright >> 8) & 0xff);
2167 				spans[spanidx].param[SPAN_R].start = ((r >> 9) << 9) + drdiff - (xfrac * drdxh);
2168 				spans[spanidx].param[SPAN_G].start = ((g >> 9) << 9) + dgdiff - (xfrac * dgdxh);
2169 				spans[spanidx].param[SPAN_B].start = ((b >> 9) << 9) + dbdiff - (xfrac * dbdxh);
2170 				spans[spanidx].param[SPAN_A].start = ((a >> 9) << 9) + dadiff - (xfrac * dadxh);
2171 				spans[spanidx].param[SPAN_S].start = (((s >> 9) << 9)  + dsdiff - (xfrac * dsdxh)) & ~0x1f;
2172 				spans[spanidx].param[SPAN_T].start = (((t >> 9) << 9)  + dtdiff - (xfrac * dtdxh)) & ~0x1f;
2173 				spans[spanidx].param[SPAN_W].start = (((w >> 9) << 9)  + dwdiff - (xfrac * dwdxh)) & ~0x1f;
2174 				spans[spanidx].param[SPAN_Z].start = ((z >> 9) << 9)  + dzdiff - (xfrac * dzdxh);
2175 			}
2176 		}
2177 
2178 		if (spix == 3)
2179 		{
2180 			r += drde;
2181 			g += dgde;
2182 			b += dbde;
2183 			a += dade;
2184 			s += dsde;
2185 			t += dtde;
2186 			w += dwde;
2187 			z += dzde;
2188 		}
2189 		xleft += xleft_inc;
2190 		xright += xright_inc;
2191 	}
2192 
2193 	if(!new_object && valid)
2194 	{
2195 		render_spans(yh >> 2, yl >> 2, tilenum, flip ? true : false, spans, rect, object);
2196 	}
2197 	m_aux_buf_ptr = 0;  // Spans can be reused once render completes
2198 	//wait("draw_triangle");
2199 }
2200 
2201 /*****************************************************************************/
2202 
2203 ////////////////////////
2204 // RDP COMMANDS
2205 ////////////////////////
2206 
triangle(bool shade,bool texture,bool zbuffer)2207 void n64_rdp::triangle(bool shade, bool texture, bool zbuffer)
2208 {
2209 	draw_triangle(shade, texture, zbuffer, false);
2210 	m_pipe_clean = false;
2211 }
2212 
cmd_triangle(uint64_t w1)2213 void n64_rdp::cmd_triangle(uint64_t w1)
2214 {
2215 	triangle(false, false, false);
2216 }
2217 
cmd_triangle_z(uint64_t w1)2218 void n64_rdp::cmd_triangle_z(uint64_t w1)
2219 {
2220 	triangle(false, false, true);
2221 }
2222 
cmd_triangle_t(uint64_t w1)2223 void n64_rdp::cmd_triangle_t(uint64_t w1)
2224 {
2225 	triangle(false, true, false);
2226 }
2227 
cmd_triangle_tz(uint64_t w1)2228 void n64_rdp::cmd_triangle_tz(uint64_t w1)
2229 {
2230 	triangle(false, true, true);
2231 }
2232 
cmd_triangle_s(uint64_t w1)2233 void n64_rdp::cmd_triangle_s(uint64_t w1)
2234 {
2235 	triangle(true, false, false);
2236 }
2237 
cmd_triangle_sz(uint64_t w1)2238 void n64_rdp::cmd_triangle_sz(uint64_t w1)
2239 {
2240 	triangle(true, false, true);
2241 }
2242 
cmd_triangle_st(uint64_t w1)2243 void n64_rdp::cmd_triangle_st(uint64_t w1)
2244 {
2245 	triangle(true, true, false);
2246 }
2247 
cmd_triangle_stz(uint64_t w1)2248 void n64_rdp::cmd_triangle_stz(uint64_t w1)
2249 {
2250 	triangle(true, true, true);
2251 }
2252 
cmd_tex_rect(uint64_t w1)2253 void n64_rdp::cmd_tex_rect(uint64_t w1)
2254 {
2255 	const uint64_t* data = m_cmd_data + m_cmd_cur;
2256 
2257 	const uint64_t w2 = data[1];
2258 
2259 	const uint64_t tilenum = (w1 >> 24) & 0x7;
2260 	const uint64_t xh = (w1 >> 12) & 0xfff;
2261 	const uint64_t xl = (w1 >> 44) & 0xfff;
2262 	const uint64_t yh = (w1 >>  0) & 0xfff;
2263 	uint64_t yl       = (w1 >> 32) & 0xfff;
2264 
2265 	const uint64_t s  = (w2 >> 48) & 0xffff;
2266 	const uint64_t t  = (w2 >> 32) & 0xffff;
2267 	const uint64_t dsdx = SIGN16((w2 >> 16) & 0xffff);
2268 	const uint64_t dtdy = SIGN16((w2 >>  0) & 0xffff);
2269 
2270 	if (m_other_modes.cycle_type == CYCLE_TYPE_FILL || m_other_modes.cycle_type == CYCLE_TYPE_COPY)
2271 	{
2272 		yl |= 3;
2273 	}
2274 
2275 	const uint64_t xlint = (xl >> 2) & 0x3ff;
2276 	const uint64_t xhint = (xh >> 2) & 0x3ff;
2277 
2278 	uint64_t* ewdata = m_temp_rect_data;
2279 	ewdata[0] = ((uint64_t)0x24 << 56) | ((0x80L | tilenum) << 48) | (yl << 32) | (yl << 16) | yh;   // command, flipped, tile, yl
2280 	ewdata[1] = (xlint << 48) | ((xl & 3) << 46);               // xl, xl frac, dxldy (0), dxldy frac (0)
2281 	ewdata[2] = (xhint << 48) | ((xh & 3) << 46);               // xh, xh frac, dxhdy (0), dxhdy frac (0)
2282 	ewdata[3] = (xlint << 48) | ((xl & 3) << 46);               // xm, xm frac, dxmdy (0), dxmdy frac (0)
2283 	memset(&ewdata[4], 0, 8 * sizeof(uint64_t));                // shade
2284 	ewdata[12] = (s << 48) | (t << 32);                         // s, t, w (0)
2285 	ewdata[13] = (dsdx >> 5) << 48;                             // dsdx, dtdx, dwdx (0)
2286 	ewdata[14] = 0;                                             // s frac (0), t frac (0), w frac (0)
2287 	ewdata[15] = (dsdx & 0x1f) << 59;                           // dsdx frac, dtdx frac, dwdx frac (0)
2288 	ewdata[16] = ((dtdy >> 5) & 0xffff) << 32;                  // dsde, dtde, dwde (0)
2289 	ewdata[17] = ((dtdy >> 5) & 0xffff) << 32;                  // dsdy, dtdy, dwdy (0)
2290 	ewdata[18] = ((dtdy & 0x1f) << 11) << 32;                   // dsde frac, dtde frac, dwde frac (0)
2291 	ewdata[38] = ((dtdy & 0x1f) << 11) << 32;                   // dsdy frac, dtdy frac, dwdy frac (0)
2292 	// ewdata[40-43] = 0;                                       // depth
2293 
2294 	draw_triangle(true, true, false, true);
2295 }
2296 
cmd_tex_rect_flip(uint64_t w1)2297 void n64_rdp::cmd_tex_rect_flip(uint64_t w1)
2298 {
2299 	const uint64_t* data = m_cmd_data + m_cmd_cur;
2300 
2301 	const uint64_t w2 = data[1];
2302 
2303 	const uint64_t tilenum  = (w1 >> 56) & 0x7;
2304 	const uint64_t xh = (w1 >> 12) & 0xfff;
2305 	const uint64_t xl = (w1 >> 44) & 0xfff;
2306 	const uint64_t yh = (w1 >>  0) & 0xfff;
2307 	uint64_t yl       = (w1 >> 32) & 0xfff;
2308 
2309 	const uint64_t s  = (w2 >> 48) & 0xffff;
2310 	const uint64_t t  = (w2 >> 32) & 0xffff;
2311 	const uint64_t dsdx = SIGN16((w2 >> 16) & 0xffff);
2312 	const uint64_t dtdy = SIGN16((w2 >>  0) & 0xffff);
2313 
2314 	if (m_other_modes.cycle_type == CYCLE_TYPE_FILL || m_other_modes.cycle_type == CYCLE_TYPE_COPY)
2315 	{
2316 		yl |= 3;
2317 	}
2318 
2319 	const uint64_t xlint = (xl >> 2) & 0x3ff;
2320 	const uint64_t xhint = (xh >> 2) & 0x3ff;
2321 
2322 	uint64_t* ewdata = m_temp_rect_data;
2323 	ewdata[0] = ((uint64_t)0x25 << 56) | ((0x80L | tilenum) << 48) | (yl << 32) | (yl << 16) | yh;   // command, flipped, tile, yl
2324 	ewdata[1] = (xlint << 48) | ((xl & 3) << 46);               // xl, xl frac, dxldy (0), dxldy frac (0)
2325 	ewdata[2] = (xhint << 48) | ((xh & 3) << 46);               // xh, xh frac, dxhdy (0), dxhdy frac (0)
2326 	ewdata[3] = (xlint << 48) | ((xl & 3) << 46);               // xm, xm frac, dxmdy (0), dxmdy frac (0)
2327 	memset(&ewdata[4], 0, 8 * sizeof(uint64_t));                // shade
2328 	ewdata[12] = (s << 48) | (t << 32);                         // s, t, w (0)
2329 	ewdata[13] = ((dtdy >> 5) & 0xffff) << 32;                  // dsdx, dtdx, dwdx (0)
2330 	ewdata[14] = 0;                                             // s frac (0), t frac (0), w frac (0)
2331 	ewdata[15] = ((dtdy & 0x1f) << 43);                         // dsdx frac, dtdx frac, dwdx frac (0)
2332 	ewdata[16] = (dsdx >> 5) << 48;                             // dsde, dtde, dwde (0)
2333 	ewdata[17] = (dsdx >> 5) << 48;                             // dsdy, dtdy, dwdy (0)
2334 	ewdata[18] = (dsdx & 0x1f) << 59;                           // dsde frac, dtde frac, dwde frac (0)
2335 	ewdata[19] = (dsdx & 0x1f) << 59;                           // dsdy frac, dtdy frac, dwdy frac (0)
2336 
2337 	draw_triangle(true, true, false, true);
2338 }
2339 
cmd_sync_load(uint64_t w1)2340 void n64_rdp::cmd_sync_load(uint64_t w1)
2341 {
2342 	//wait("SyncLoad");
2343 }
2344 
cmd_sync_pipe(uint64_t w1)2345 void n64_rdp::cmd_sync_pipe(uint64_t w1)
2346 {
2347 	//wait("SyncPipe");
2348 }
2349 
cmd_sync_tile(uint64_t w1)2350 void n64_rdp::cmd_sync_tile(uint64_t w1)
2351 {
2352 	//wait("SyncTile");
2353 }
2354 
cmd_sync_full(uint64_t w1)2355 void n64_rdp::cmd_sync_full(uint64_t w1)
2356 {
2357 	//wait("SyncFull");
2358 	m_n64_periphs->dp_full_sync();
2359 }
2360 
cmd_set_key_gb(uint64_t w1)2361 void n64_rdp::cmd_set_key_gb(uint64_t w1)
2362 {
2363 	m_key_scale.set_b(uint32_t(w1 >>  0) & 0xff);
2364 	m_key_scale.set_g(uint32_t(w1 >> 16) & 0xff);
2365 }
2366 
cmd_set_key_r(uint64_t w1)2367 void n64_rdp::cmd_set_key_r(uint64_t w1)
2368 {
2369 	m_key_scale.set_r(uint32_t(w1 & 0xff));
2370 }
2371 
cmd_set_fill_color32(uint64_t w1)2372 void n64_rdp::cmd_set_fill_color32(uint64_t w1)
2373 {
2374 	//wait("SetFillColor");
2375 	m_fill_color = (uint32_t)w1;
2376 }
2377 
cmd_set_convert(uint64_t w1)2378 void n64_rdp::cmd_set_convert(uint64_t w1)
2379 {
2380 	if(!m_pipe_clean) { m_pipe_clean = true; wait("SetConvert"); }
2381 	int32_t k0 = int32_t(w1 >> 45) & 0x1ff;
2382 	int32_t k1 = int32_t(w1 >> 36) & 0x1ff;
2383 	int32_t k2 = int32_t(w1 >> 27) & 0x1ff;
2384 	int32_t k3 = int32_t(w1 >> 18) & 0x1ff;
2385 	int32_t k4 = int32_t(w1 >>  9) & 0x1ff;
2386 	int32_t k5 = int32_t(w1 >>  0) & 0x1ff;
2387 
2388 	k0 = (SIGN9(k0) << 1) + 1;
2389 	k1 = (SIGN9(k1) << 1) + 1;
2390 	k2 = (SIGN9(k2) << 1) + 1;
2391 	k3 = (SIGN9(k3) << 1) + 1;
2392 
2393 	set_yuv_factors(rgbaint_t(0, k0, k2, k3), rgbaint_t(0, 0, k1, 0), rgbaint_t(k4, k4, k4, k4), rgbaint_t(k5, k5, k5, k5));
2394 }
2395 
cmd_set_scissor(uint64_t w1)2396 void n64_rdp::cmd_set_scissor(uint64_t w1)
2397 {
2398 	m_scissor.m_xh = ((w1 >> 44) & 0xfff) >> 2;
2399 	m_scissor.m_yh = ((w1 >> 32) & 0xfff) >> 2;
2400 	m_scissor.m_xl = ((w1 >> 12) & 0xfff) >> 2;
2401 	m_scissor.m_yl = ((w1 >>  0) & 0xfff) >> 2;
2402 
2403 	// TODO: handle f & o?
2404 }
2405 
cmd_set_prim_depth(uint64_t w1)2406 void n64_rdp::cmd_set_prim_depth(uint64_t w1)
2407 {
2408 	m_misc_state.m_primitive_z = (uint32_t)(w1 & 0x7fff0000);
2409 	m_misc_state.m_primitive_dz = (uint16_t)(w1 >> 32);
2410 }
2411 
cmd_set_other_modes(uint64_t w1)2412 void n64_rdp::cmd_set_other_modes(uint64_t w1)
2413 {
2414 	//wait("SetOtherModes");
2415 	m_other_modes.cycle_type       = (w1 >> 52) & 0x3; // 01
2416 	m_other_modes.persp_tex_en     = (w1 >> 51) & 1; // 1
2417 	m_other_modes.detail_tex_en    = (w1 >> 50) & 1; // 0
2418 	m_other_modes.sharpen_tex_en   = (w1 >> 49) & 1; // 0
2419 	m_other_modes.tex_lod_en       = (w1 >> 48) & 1; // 0
2420 	m_other_modes.en_tlut          = (w1 >> 47) & 1; // 0
2421 	m_other_modes.tlut_type        = (w1 >> 46) & 1; // 0
2422 	m_other_modes.sample_type      = (w1 >> 45) & 1; // 1
2423 	m_other_modes.mid_texel        = (w1 >> 44) & 1; // 0
2424 	m_other_modes.bi_lerp0         = (w1 >> 43) & 1; // 1
2425 	m_other_modes.bi_lerp1         = (w1 >> 42) & 1; // 1
2426 	m_other_modes.convert_one      = (w1 >> 41) & 1; // 0
2427 	m_other_modes.key_en           = (w1 >> 40) & 1; // 0
2428 	m_other_modes.rgb_dither_sel   = (w1 >> 38) & 0x3; // 00
2429 	m_other_modes.alpha_dither_sel = (w1 >> 36) & 0x3; // 01
2430 	m_other_modes.blend_m1a_0      = (w1 >> 30) & 0x3; // 11
2431 	m_other_modes.blend_m1a_1      = (w1 >> 28) & 0x3; // 00
2432 	m_other_modes.blend_m1b_0      = (w1 >> 26) & 0x3; // 10
2433 	m_other_modes.blend_m1b_1      = (w1 >> 24) & 0x3; // 00
2434 	m_other_modes.blend_m2a_0      = (w1 >> 22) & 0x3; // 00
2435 	m_other_modes.blend_m2a_1      = (w1 >> 20) & 0x3; // 01
2436 	m_other_modes.blend_m2b_0      = (w1 >> 18) & 0x3; // 00
2437 	m_other_modes.blend_m2b_1      = (w1 >> 16) & 0x3; // 01
2438 	m_other_modes.force_blend      = (w1 >> 14) & 1; // 0
2439 	m_other_modes.blend_shift      = m_other_modes.force_blend ? 5 : 2;
2440 	m_other_modes.alpha_cvg_select = (w1 >> 13) & 1; // 1
2441 	m_other_modes.cvg_times_alpha  = (w1 >> 12) & 1; // 0
2442 	m_other_modes.z_mode           = (w1 >> 10) & 0x3; // 00
2443 	m_other_modes.cvg_dest         = (w1 >> 8) & 0x3; // 00
2444 	m_other_modes.color_on_cvg     = (w1 >> 7) & 1; // 0
2445 	m_other_modes.image_read_en    = (w1 >> 6) & 1; // 1
2446 	m_other_modes.z_update_en      = (w1 >> 5) & 1; // 1
2447 	m_other_modes.z_compare_en     = (w1 >> 4) & 1; // 1
2448 	m_other_modes.antialias_en     = (w1 >> 3) & 1; // 1
2449 	m_other_modes.z_source_sel     = (w1 >> 2) & 1; // 0
2450 	m_other_modes.dither_alpha_en  = (w1 >> 1) & 1; // 0
2451 	m_other_modes.alpha_compare_en = (w1 >> 0) & 1; // 0
2452 	m_other_modes.alpha_dither_mode = (m_other_modes.alpha_compare_en << 1) | m_other_modes.dither_alpha_en;
2453 }
2454 
cmd_load_tlut(uint64_t w1)2455 void n64_rdp::cmd_load_tlut(uint64_t w1)
2456 {
2457 	//wait("LoadTLUT");
2458 	n64_tile_t* tile = m_tiles;
2459 
2460 	const int32_t tilenum = (w1 >> 24) & 0x7;
2461 	const int32_t sl = tile[tilenum].sl = int32_t(w1 >> 44) & 0xfff;
2462 	const int32_t tl = tile[tilenum].tl = int32_t(w1 >> 32) & 0xfff;
2463 	const int32_t sh = tile[tilenum].sh = int32_t(w1 >> 12) & 0xfff;
2464 	const int32_t th = tile[tilenum].th = int32_t(w1 >>  0) & 0xfff;
2465 
2466 	if (tl != th)
2467 	{
2468 		fatalerror("Load tlut: tl=%d, th=%d\n",tl,th);
2469 	}
2470 
2471 	m_capture.data_begin();
2472 
2473 	const int32_t count = ((sh >> 2) - (sl >> 2) + 1) << 2;
2474 
2475 	switch (m_misc_state.m_ti_size)
2476 	{
2477 		case PIXEL_SIZE_16BIT:
2478 		{
2479 			if (tile[tilenum].tmem < 256)
2480 			{
2481 				fatalerror("rdp_load_tlut: loading tlut into low half at %d qwords\n",tile[tilenum].tmem);
2482 			}
2483 			int32_t srcstart = (m_misc_state.m_ti_address + (tl >> 2) * (m_misc_state.m_ti_width << 1) + (sl >> 1)) >> 1;
2484 			int32_t dststart = tile[tilenum].tmem << 2;
2485 			uint16_t* dst = get_tmem16();
2486 
2487 			for (int32_t i = 0; i < count; i += 4)
2488 			{
2489 				if (dststart < 2048)
2490 				{
2491 					dst[dststart] = U_RREADIDX16(srcstart);
2492 					m_capture.data_block()->put16(dst[dststart]);
2493 					dst[dststart + 1] = dst[dststart];
2494 					dst[dststart + 2] = dst[dststart];
2495 					dst[dststart + 3] = dst[dststart];
2496 					dststart += 4;
2497 					srcstart += 1;
2498 				}
2499 			}
2500 			break;
2501 		}
2502 		default:    fatalerror("RDP: load_tlut: size = %d\n", m_misc_state.m_ti_size);
2503 	}
2504 
2505 	m_capture.data_end();
2506 
2507 	m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th);
2508 	m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl);
2509 }
2510 
cmd_set_tile_size(uint64_t w1)2511 void n64_rdp::cmd_set_tile_size(uint64_t w1)
2512 {
2513 	//wait("SetTileSize");
2514 
2515 	const int32_t tilenum = int32_t(w1 >> 24) & 0x7;
2516 
2517 	m_tiles[tilenum].sl = int32_t(w1 >> 44) & 0xfff;
2518 	m_tiles[tilenum].tl = int32_t(w1 >> 32) & 0xfff;
2519 	m_tiles[tilenum].sh = int32_t(w1 >> 12) & 0xfff;
2520 	m_tiles[tilenum].th = int32_t(w1 >>  0) & 0xfff;
2521 
2522 	m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th);
2523 	m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl);
2524 }
2525 
cmd_load_block(uint64_t w1)2526 void n64_rdp::cmd_load_block(uint64_t w1)
2527 {
2528 	//wait("LoadBlock");
2529 	n64_tile_t* tile = m_tiles;
2530 
2531 	const int32_t tilenum = int32_t(w1 >> 24) & 0x7;
2532 	uint16_t* tc = get_tmem16();
2533 
2534 	int32_t sl = tile[tilenum].sl = int32_t(w1 >> 44) & 0xfff;
2535 	int32_t tl = tile[tilenum].tl = int32_t(w1 >> 32) & 0xfff;
2536 	int32_t sh = tile[tilenum].sh = int32_t(w1 >> 12) & 0xfff;
2537 	const int32_t dxt             = int32_t(w1 >>  0) & 0xfff;
2538 
2539 	if (sh < sl)
2540 	{
2541 		fatalerror("load_block: sh < sl\n");
2542 	}
2543 
2544 	int32_t width = (sh - sl) + 1;
2545 
2546 	width = (width << m_misc_state.m_ti_size) >> 1;
2547 	if (width & 7)
2548 	{
2549 		width = (width & ~7) + 8;
2550 	}
2551 	width >>= 3;
2552 
2553 	const int32_t tb = tile[tilenum].tmem << 2;
2554 
2555 	const int32_t tiwinwords = (m_misc_state.m_ti_width << m_misc_state.m_ti_size) >> 2;
2556 	const int32_t slinwords = (sl << m_misc_state.m_ti_size) >> 2;
2557 
2558 	const uint32_t src = (m_misc_state.m_ti_address >> 1) + (tl * tiwinwords) + slinwords;
2559 
2560 	m_capture.data_begin();
2561 
2562 	if (dxt != 0)
2563 	{
2564 		int32_t j = 0;
2565 		int32_t t = 0;
2566 		int32_t oldt = 0;
2567 
2568 		if (tile[tilenum].size != PIXEL_SIZE_32BIT && tile[tilenum].format != FORMAT_YUV)
2569 		{
2570 			for (int32_t i = 0; i < width; i ++)
2571 			{
2572 				oldt = t;
2573 				t = ((j >> 11) & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR;
2574 				if (t != oldt)
2575 				{
2576 					i += tile[tilenum].line;
2577 				}
2578 
2579 				int32_t ptr = tb + (i << 2);
2580 				int32_t srcptr = src + (i << 2);
2581 
2582 				tc[(ptr ^ t) & 0x7ff] = U_RREADIDX16(srcptr);
2583 				tc[((ptr + 1) ^ t) & 0x7ff] = U_RREADIDX16(srcptr + 1);
2584 				tc[((ptr + 2) ^ t) & 0x7ff] = U_RREADIDX16(srcptr + 2);
2585 				tc[((ptr + 3) ^ t) & 0x7ff] = U_RREADIDX16(srcptr + 3);
2586 
2587 				m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2588 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2589 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2590 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2591 
2592 				j += dxt;
2593 			}
2594 		}
2595 		else if (tile[tilenum].format == FORMAT_YUV)
2596 		{
2597 			for (int32_t i = 0; i < width; i ++)
2598 			{
2599 				oldt = t;
2600 				t = ((j >> 11) & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR;
2601 				if (t != oldt)
2602 				{
2603 					i += tile[tilenum].line;
2604 				}
2605 
2606 				int32_t ptr = ((tb + (i << 1)) ^ t) & 0x3ff;
2607 				int32_t srcptr = src + (i << 2);
2608 
2609 				int32_t first = U_RREADIDX16(srcptr);
2610 				int32_t sec = U_RREADIDX16(srcptr + 1);
2611 				tc[ptr] = ((first >> 8) << 8) | (sec >> 8);
2612 				tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff);
2613 
2614 				ptr = ((tb + (i << 1) + 1) ^ t) & 0x3ff;
2615 				first = U_RREADIDX16(srcptr + 2);
2616 				sec = U_RREADIDX16(srcptr + 3);
2617 				tc[ptr] = ((first >> 8) << 8) | (sec >> 8);
2618 				tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff);
2619 
2620 				m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2621 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2622 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2623 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2624 				j += dxt;
2625 			}
2626 		}
2627 		else
2628 		{
2629 			for (int32_t i = 0; i < width; i ++)
2630 			{
2631 				oldt = t;
2632 				t = ((j >> 11) & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR;
2633 				if (t != oldt)
2634 					i += tile[tilenum].line;
2635 
2636 				int32_t ptr = ((tb + (i << 1)) ^ t) & 0x3ff;
2637 				int32_t srcptr = src + (i << 2);
2638 				tc[ptr] = U_RREADIDX16(srcptr);
2639 				tc[ptr | 0x400] = U_RREADIDX16(srcptr + 1);
2640 
2641 				ptr = ((tb + (i << 1) + 1) ^ t) & 0x3ff;
2642 				tc[ptr] = U_RREADIDX16(srcptr + 2);
2643 				tc[ptr | 0x400] = U_RREADIDX16(srcptr + 3);
2644 
2645 				m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2646 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2647 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2648 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2649 
2650 				j += dxt;
2651 			}
2652 		}
2653 		tile[tilenum].th = tl + (j >> 11);
2654 	}
2655 	else
2656 	{
2657 		if (tile[tilenum].size != PIXEL_SIZE_32BIT && tile[tilenum].format != FORMAT_YUV)
2658 		{
2659 			for (int32_t i = 0; i < width; i ++)
2660 			{
2661 				int32_t ptr = tb + (i << 2);
2662 				int32_t srcptr = src + (i << 2);
2663 				tc[(ptr ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr);
2664 				tc[((ptr + 1) ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr + 1);
2665 				tc[((ptr + 2) ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr + 2);
2666 				tc[((ptr + 3) ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr + 3);
2667 
2668 				m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2669 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2670 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2671 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2672 			}
2673 		}
2674 		else if (tile[tilenum].format == FORMAT_YUV)
2675 		{
2676 			for (int32_t i = 0; i < width; i ++)
2677 			{
2678 				int32_t ptr = ((tb + (i << 1)) ^ WORD_ADDR_XOR) & 0x3ff;
2679 				int32_t srcptr = src + (i << 2);
2680 				int32_t first = U_RREADIDX16(srcptr);
2681 				int32_t sec = U_RREADIDX16(srcptr + 1);
2682 				tc[ptr] = ((first >> 8) << 8) | (sec >> 8);//UV pair
2683 				tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff);
2684 
2685 				ptr = ((tb + (i << 1) + 1) ^ WORD_ADDR_XOR) & 0x3ff;
2686 				first = U_RREADIDX16(srcptr + 2);
2687 				sec = U_RREADIDX16(srcptr + 3);
2688 				tc[ptr] = ((first >> 8) << 8) | (sec >> 8);
2689 				tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff);
2690 
2691 				m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2692 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2693 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2694 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2695 			}
2696 		}
2697 		else
2698 		{
2699 			for (int32_t i = 0; i < width; i ++)
2700 			{
2701 				int32_t ptr = ((tb + (i << 1)) ^ WORD_ADDR_XOR) & 0x3ff;
2702 				int32_t srcptr = src + (i << 2);
2703 				tc[ptr] = U_RREADIDX16(srcptr);
2704 				tc[ptr | 0x400] = U_RREADIDX16(srcptr + 1);
2705 
2706 				ptr = ((tb + (i << 1) + 1) ^ WORD_ADDR_XOR) & 0x3ff;
2707 				tc[ptr] = U_RREADIDX16(srcptr + 2);
2708 				tc[ptr | 0x400] = U_RREADIDX16(srcptr + 3);
2709 
2710 				m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2711 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2712 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2713 				m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2714 			}
2715 		}
2716 		tile[tilenum].th = tl;
2717 	}
2718 
2719 	m_capture.data_end();
2720 
2721 	m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th);
2722 	m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl);
2723 }
2724 
cmd_load_tile(uint64_t w1)2725 void n64_rdp::cmd_load_tile(uint64_t w1)
2726 {
2727 	//wait("LoadTile");
2728 	n64_tile_t* tile = m_tiles;
2729 	const int32_t tilenum = int32_t(w1 >> 24) & 0x7;
2730 
2731 	tile[tilenum].sl    = int32_t(w1 >> 44) & 0xfff;
2732 	tile[tilenum].tl    = int32_t(w1 >> 32) & 0xfff;
2733 	tile[tilenum].sh    = int32_t(w1 >> 12) & 0xfff;
2734 	tile[tilenum].th    = int32_t(w1 >>  0) & 0xfff;
2735 
2736 	const int32_t sl = tile[tilenum].sl >> 2;
2737 	const int32_t tl = tile[tilenum].tl >> 2;
2738 	const int32_t sh = tile[tilenum].sh >> 2;
2739 	const int32_t th = tile[tilenum].th >> 2;
2740 
2741 	const int32_t width = (sh - sl) + 1;
2742 	const int32_t height = (th - tl) + 1;
2743 /*
2744     int32_t topad;
2745     if (m_misc_state.m_ti_size < 3)
2746     {
2747         topad = (width * m_misc_state.m_ti_size) & 0x7;
2748     }
2749     else
2750     {
2751         topad = (width << 2) & 0x7;
2752     }
2753     topad = 0; // ????
2754 */
2755 
2756 	m_capture.data_begin();
2757 
2758 	switch (m_misc_state.m_ti_size)
2759 	{
2760 		case PIXEL_SIZE_8BIT:
2761 		{
2762 			const uint32_t src = m_misc_state.m_ti_address;
2763 			const int32_t tb = tile[tilenum].tmem << 3;
2764 			uint8_t* tc = get_tmem8();
2765 
2766 			for (int32_t j = 0; j < height; j++)
2767 			{
2768 				const int32_t tline = tb + ((tile[tilenum].line << 3) * j);
2769 				const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl;
2770 				const int32_t xorval8 = ((j & 1) ? BYTE_XOR_DWORD_SWAP : BYTE_ADDR_XOR);
2771 
2772 				for (int32_t i = 0; i < width; i++)
2773 				{
2774 					const uint8_t data = U_RREADADDR8(src + s + i);
2775 					m_capture.data_block()->put8(data);
2776 					tc[((tline + i) ^ xorval8) & 0xfff] = data;
2777 				}
2778 			}
2779 			break;
2780 		}
2781 		case PIXEL_SIZE_16BIT:
2782 		{
2783 			const uint32_t src = m_misc_state.m_ti_address >> 1;
2784 			uint16_t* tc = get_tmem16();
2785 
2786 			if (tile[tilenum].format != FORMAT_YUV)
2787 			{
2788 				for (int32_t j = 0; j < height; j++)
2789 				{
2790 					const int32_t tb = tile[tilenum].tmem << 2;
2791 					const int32_t tline = tb + ((tile[tilenum].line << 2) * j);
2792 					const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl;
2793 					const int32_t xorval16 = (j & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR;
2794 
2795 					for (int32_t i = 0; i < width; i++)
2796 					{
2797 						const uint32_t taddr = (tline + i) ^ xorval16;
2798 						const uint16_t data = U_RREADIDX16(src + s + i);
2799 						m_capture.data_block()->put16(data);
2800 						tc[taddr & 0x7ff] = data;
2801 					}
2802 				}
2803 			}
2804 			else
2805 			{
2806 				for (int32_t j = 0; j < height; j++)
2807 				{
2808 					const int32_t tb = tile[tilenum].tmem << 3;
2809 					const int32_t tline = tb + ((tile[tilenum].line << 3) * j);
2810 					const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl;
2811 					const int32_t xorval8 = (j & 1) ? BYTE_XOR_DWORD_SWAP : BYTE_ADDR_XOR;
2812 
2813 					for (int32_t i = 0; i < width; i++)
2814 					{
2815 						uint32_t taddr = ((tline + i) ^ xorval8) & 0x7ff;
2816 						uint16_t yuvword = U_RREADIDX16(src + s + i);
2817 						m_capture.data_block()->put16(yuvword);
2818 						get_tmem8()[taddr] = yuvword >> 8;
2819 						get_tmem8()[taddr | 0x800] = yuvword & 0xff;
2820 					}
2821 				}
2822 			}
2823 			break;
2824 		}
2825 		case PIXEL_SIZE_32BIT:
2826 		{
2827 			const uint32_t src = m_misc_state.m_ti_address >> 2;
2828 			const int32_t tb = (tile[tilenum].tmem << 2);
2829 			uint16_t* tc16 = get_tmem16();
2830 
2831 			for (int32_t j = 0; j < height; j++)
2832 			{
2833 				const int32_t tline = tb + ((tile[tilenum].line << 2) * j);
2834 
2835 				const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl;
2836 				const int32_t xorval32cur = (j & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR;
2837 				for (int32_t i = 0; i < width; i++)
2838 				{
2839 					uint32_t c = U_RREADIDX32(src + s + i);
2840 					m_capture.data_block()->put32(c);
2841 					uint32_t ptr = ((tline + i) ^ xorval32cur) & 0x3ff;
2842 					tc16[ptr] = c >> 16;
2843 					tc16[ptr | 0x400] = c & 0xffff;
2844 				}
2845 			}
2846 			break;
2847 		}
2848 
2849 		default:    fatalerror("RDP: load_tile: size = %d\n", m_misc_state.m_ti_size);
2850 	}
2851 
2852 	m_capture.data_end();
2853 
2854 	m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th);
2855 	m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl);
2856 }
2857 
cmd_set_tile(uint64_t w1)2858 void n64_rdp::cmd_set_tile(uint64_t w1)
2859 {
2860 	//wait("SetTile");
2861 	const int32_t tilenum = int32_t(w1 >> 24) & 0x7;
2862 	n64_tile_t* tex_tile = &m_tiles[tilenum];
2863 
2864 	tex_tile->format    = int32_t(w1 >> 53) & 0x7;
2865 	tex_tile->size      = int32_t(w1 >> 51) & 0x3;
2866 	tex_tile->line      = int32_t(w1 >> 41) & 0x1ff;
2867 	tex_tile->tmem      = int32_t(w1 >> 32) & 0x1ff;
2868 	tex_tile->palette   = int32_t(w1 >> 20) & 0xf;
2869 	tex_tile->ct        = int32_t(w1 >> 19) & 0x1;
2870 	tex_tile->mt        = int32_t(w1 >> 18) & 0x1;
2871 	tex_tile->mask_t    = int32_t(w1 >> 14) & 0xf;
2872 	tex_tile->shift_t   = int32_t(w1 >> 10) & 0xf;
2873 	tex_tile->cs        = int32_t(w1 >>  9) & 0x1;
2874 	tex_tile->ms        = int32_t(w1 >>  8) & 0x1;
2875 	tex_tile->mask_s    = int32_t(w1 >>  4) & 0xf;
2876 	tex_tile->shift_s   = int32_t(w1 >>  0) & 0xf;
2877 
2878 	tex_tile->lshift_s  = (tex_tile->shift_s >= 11) ? (16 - tex_tile->shift_s) : 0;
2879 	tex_tile->rshift_s  = (tex_tile->shift_s < 11) ? tex_tile->shift_s : 0;
2880 	tex_tile->lshift_t  = (tex_tile->shift_t >= 11) ? (16 - tex_tile->shift_t) : 0;
2881 	tex_tile->rshift_t  = (tex_tile->shift_t < 11) ? tex_tile->shift_t : 0;
2882 	tex_tile->wrapped_mask_s = (tex_tile->mask_s > 10 ? 10 : tex_tile->mask_s);
2883 	tex_tile->wrapped_mask_t = (tex_tile->mask_t > 10 ? 10 : tex_tile->mask_t);
2884 	tex_tile->wrapped_mask = rgbaint_t(tex_tile->wrapped_mask_s, tex_tile->wrapped_mask_s, tex_tile->wrapped_mask_t, tex_tile->wrapped_mask_t);
2885 	tex_tile->clamp_s = tex_tile->cs || !tex_tile->mask_s;
2886 	tex_tile->clamp_t = tex_tile->ct || !tex_tile->mask_t;
2887 	tex_tile->mm = rgbaint_t(tex_tile->ms ? ~0 : 0, tex_tile->ms ? ~0 : 0, tex_tile->mt ? ~0 : 0, tex_tile->mt ? ~0 : 0);
2888 	tex_tile->invmm = rgbaint_t(tex_tile->ms ? 0 : ~0, tex_tile->ms ? 0 : ~0, tex_tile->mt ? 0 : ~0, tex_tile->mt ? 0 : ~0);
2889 	tex_tile->mask = rgbaint_t(tex_tile->mask_s ? ~0 : 0, tex_tile->mask_s ? ~0 : 0, tex_tile->mask_t ? ~0 : 0, tex_tile->mask_t ? ~0 : 0);
2890 	tex_tile->invmask = rgbaint_t(tex_tile->mask_s ? 0 : ~0, tex_tile->mask_s ? 0 : ~0, tex_tile->mask_t ? 0 : ~0, tex_tile->mask_t ? 0 : ~0);
2891 	tex_tile->lshift = rgbaint_t(tex_tile->lshift_s, tex_tile->lshift_s, tex_tile->lshift_t, tex_tile->lshift_t);
2892 	tex_tile->rshift = rgbaint_t(tex_tile->rshift_s, tex_tile->rshift_s, tex_tile->rshift_t, tex_tile->rshift_t);
2893 	tex_tile->clamp_st = rgbaint_t(tex_tile->clamp_s ? ~0 : 0, tex_tile->clamp_s ? ~0 : 0, tex_tile->clamp_t ? ~0 : 0, tex_tile->clamp_t ? ~0 : 0);
2894 
2895 	if (tex_tile->format == FORMAT_I && tex_tile->size > PIXEL_SIZE_8BIT)
2896 	{
2897 		tex_tile->format = FORMAT_RGBA; // Used by Supercross 2000 (in-game)
2898 	}
2899 	if (tex_tile->format == FORMAT_CI && tex_tile->size > PIXEL_SIZE_8BIT)
2900 	{
2901 		tex_tile->format = FORMAT_RGBA; // Used by Clay Fighter - Sculptor's Cut
2902 	}
2903 
2904 	if (tex_tile->format == FORMAT_RGBA && tex_tile->size < PIXEL_SIZE_16BIT)
2905 	{
2906 		tex_tile->format = FORMAT_CI; // Used by Exterem-G2, Madden Football 64, and Rat Attack
2907 	}
2908 
2909 	//m_pending_mode_block = true;
2910 }
2911 
cmd_fill_rect(uint64_t w1)2912 void n64_rdp::cmd_fill_rect(uint64_t w1)
2913 {
2914 	//if(m_pending_mode_block) { wait("Block on pending mode-change"); m_pending_mode_block = false; }
2915 	const uint64_t xh = (w1 >> 12) & 0xfff;
2916 	const uint64_t xl = (w1 >> 44) & 0xfff;
2917 	const uint64_t yh = (w1 >>  0) & 0xfff;
2918 	uint64_t yl       = (w1 >> 32) & 0xfff;
2919 
2920 	if (m_other_modes.cycle_type == CYCLE_TYPE_FILL || m_other_modes.cycle_type == CYCLE_TYPE_COPY)
2921 	{
2922 		yl |= 3;
2923 	}
2924 
2925 	const uint64_t xlint = (xl >> 2) & 0x3ff;
2926 	const uint64_t xhint = (xh >> 2) & 0x3ff;
2927 
2928 	uint64_t* ewdata = m_temp_rect_data;
2929 	ewdata[0] = ((uint64_t)0x3680 << 48) | (yl << 32) | (yl << 16) | yh; // command, flipped, tile, yl, ym, yh
2930 	ewdata[1] = (xlint << 48) | ((xl & 3) << 46); // xl, xl frac, dxldy (0), dxldy frac (0)
2931 	ewdata[2] = (xhint << 48) | ((xh & 3) << 46); // xh, xh frac, dxhdy (0), dxhdy frac (0)
2932 	ewdata[3] = (xlint << 48) | ((xl & 3) << 46); // xm, xm frac, dxmdy (0), dxmdy frac (0)
2933 	memset(&ewdata[4], 0, 18 * sizeof(uint64_t));//shade, texture, depth
2934 
2935 	draw_triangle(false, false, false, true);
2936 }
2937 
cmd_set_fog_color(uint64_t w1)2938 void n64_rdp::cmd_set_fog_color(uint64_t w1)
2939 {
2940 	m_fog_color.set(uint8_t(w1), uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8));
2941 }
2942 
cmd_set_blend_color(uint64_t w1)2943 void n64_rdp::cmd_set_blend_color(uint64_t w1)
2944 {
2945 	m_blend_color.set(uint8_t(w1), uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8));
2946 }
2947 
cmd_set_prim_color(uint64_t w1)2948 void n64_rdp::cmd_set_prim_color(uint64_t w1)
2949 {
2950 	m_misc_state.m_min_level = uint32_t(w1 >> 40) & 0x1f;
2951 	const uint8_t prim_lod_fraction(w1 >> 32);
2952 	m_prim_lod_fraction.set(prim_lod_fraction, prim_lod_fraction, prim_lod_fraction, prim_lod_fraction);
2953 
2954 	const uint8_t alpha(w1);
2955 	m_prim_color.set(alpha, uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8));
2956 	m_prim_alpha.set(alpha, alpha, alpha, alpha);
2957 }
2958 
cmd_set_env_color(uint64_t w1)2959 void n64_rdp::cmd_set_env_color(uint64_t w1)
2960 {
2961 	const uint8_t alpha(w1);
2962 	m_env_color.set(alpha, uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8));
2963 	m_env_alpha.set(alpha, alpha, alpha, alpha);
2964 }
2965 
cmd_set_combine(uint64_t w1)2966 void n64_rdp::cmd_set_combine(uint64_t w1)
2967 {
2968 	m_combine.sub_a_rgb0    = uint32_t(w1 >> 52) & 0xf;
2969 	m_combine.mul_rgb0      = uint32_t(w1 >> 47) & 0x1f;
2970 	m_combine.sub_a_a0      = uint32_t(w1 >> 44) & 0x7;
2971 	m_combine.mul_a0        = uint32_t(w1 >> 41) & 0x7;
2972 	m_combine.sub_a_rgb1    = uint32_t(w1 >> 37) & 0xf;
2973 	m_combine.mul_rgb1      = uint32_t(w1 >> 32) & 0x1f;
2974 
2975 	m_combine.sub_b_rgb0    = uint32_t(w1 >> 28) & 0xf;
2976 	m_combine.sub_b_rgb1    = uint32_t(w1 >> 24) & 0xf;
2977 	m_combine.sub_a_a1      = uint32_t(w1 >> 21) & 0x7;
2978 	m_combine.mul_a1        = uint32_t(w1 >> 18) & 0x7;
2979 	m_combine.add_rgb0      = uint32_t(w1 >> 15) & 0x7;
2980 	m_combine.sub_b_a0      = uint32_t(w1 >> 12) & 0x7;
2981 	m_combine.add_a0        = uint32_t(w1 >>  9) & 0x7;
2982 	m_combine.add_rgb1      = uint32_t(w1 >>  6) & 0x7;
2983 	m_combine.sub_b_a1      = uint32_t(w1 >>  3) & 0x7;
2984 	m_combine.add_a1        = uint32_t(w1 >>  0) & 0x7;
2985 }
2986 
cmd_set_texture_image(uint64_t w1)2987 void n64_rdp::cmd_set_texture_image(uint64_t w1)
2988 {
2989 	m_misc_state.m_ti_format  = uint32_t(w1 >> 53) & 0x7;
2990 	m_misc_state.m_ti_size    = uint32_t(w1 >> 51) & 0x3;
2991 	m_misc_state.m_ti_width   = (uint32_t(w1 >> 32) & 0x3ff) + 1;
2992 	m_misc_state.m_ti_address = uint32_t(w1) & 0x01ffffff;
2993 }
2994 
cmd_set_mask_image(uint64_t w1)2995 void n64_rdp::cmd_set_mask_image(uint64_t w1)
2996 {
2997 	//wait("SetMaskImage");
2998 
2999 	m_misc_state.m_zb_address = uint32_t(w1) & 0x01ffffff;
3000 }
3001 
cmd_set_color_image(uint64_t w1)3002 void n64_rdp::cmd_set_color_image(uint64_t w1)
3003 {
3004 	//wait("SetColorImage");
3005 
3006 	m_misc_state.m_fb_format  = uint32_t(w1 >> 53) & 0x7;
3007 	m_misc_state.m_fb_size    = uint32_t(w1 >> 51) & 0x3;
3008 	m_misc_state.m_fb_width   = (uint32_t(w1 >> 32) & 0x3ff) + 1;
3009 	m_misc_state.m_fb_address = uint32_t(w1) & 0x01ffffff;
3010 
3011 	if (m_misc_state.m_fb_format < 2 || m_misc_state.m_fb_format > 32) // Jet Force Gemini sets the format to 4, Intensity.  Protection?
3012 	{
3013 		m_misc_state.m_fb_format = 2;
3014 	}
3015 }
3016 
3017 /*****************************************************************************/
3018 
cmd_invalid(uint64_t w1)3019 void n64_rdp::cmd_invalid(uint64_t w1)
3020 {
3021 	fatalerror("n64_rdp::Invalid: %d, %08x %08x\n", uint32_t(w1 >> 56) & 0x3f, uint32_t(w1 >> 32), (uint32_t)w1);
3022 }
3023 
cmd_noop(uint64_t w1)3024 void n64_rdp::cmd_noop(uint64_t w1)
3025 {
3026 	// Do nothing
3027 }
3028 
3029 
process_command_list()3030 void n64_rdp::process_command_list()
3031 {
3032 	int32_t length = m_end - m_current;
3033 
3034 	if(length < 0)
3035 	{
3036 		m_current = m_end;
3037 		return;
3038 	}
3039 
3040 	// load command data
3041 	for(int32_t i = 0; i < length; i += 8)
3042 	{
3043 		m_cmd_data[m_cmd_ptr++] = read_data((m_current & 0x1fffffff) + i);
3044 	}
3045 
3046 	m_current = m_end;
3047 
3048 	uint32_t cmd = (m_cmd_data[0] >> 56) & 0x3f;
3049 	uint32_t cmd_length = uint32_t(m_cmd_ptr + 1) * 8;
3050 
3051 	set_status(get_status() &~ DP_STATUS_FREEZE);
3052 
3053 	// check if more data is needed
3054 	if (cmd_length < s_rdp_command_length[cmd])
3055 	{
3056 		return;
3057 	}
3058 
3059 	while (m_cmd_cur < m_cmd_ptr)
3060 	{
3061 		cmd = (m_cmd_data[m_cmd_cur] >> 56) & 0x3f;
3062 
3063 		if (((m_cmd_ptr - m_cmd_cur) * 8) < s_rdp_command_length[cmd])
3064 		{
3065 			return;
3066 			//fatalerror("rdp_process_list: not enough rdp command data: cur = %d, ptr = %d, expected = %d\n", m_cmd_cur, m_cmd_ptr, s_rdp_command_length[cmd]);
3067 		}
3068 
3069 		m_capture.command(&m_cmd_data[m_cmd_cur], s_rdp_command_length[cmd] / 8);
3070 
3071 		if (LOG_RDP_EXECUTION)
3072 		{
3073 			char string[4000];
3074 			disassemble(string);
3075 
3076 			fprintf(rdp_exec, "%08X: %08X%08X   %s\n", m_start+(m_cmd_cur * 8), uint32_t(m_cmd_data[m_cmd_cur] >> 32), (uint32_t)m_cmd_data[m_cmd_cur], string);
3077 			fflush(rdp_exec);
3078 		}
3079 
3080 		// execute the command
3081 		uint64_t w = m_cmd_data[m_cmd_cur];
3082 
3083 		switch(cmd)
3084 		{
3085 			case 0x00:  cmd_noop(w);           break;
3086 
3087 			case 0x08:  cmd_triangle(w);       break;
3088 			case 0x09:  cmd_triangle_z(w);     break;
3089 			case 0x0a:  cmd_triangle_t(w);     break;
3090 			case 0x0b:  cmd_triangle_tz(w);    break;
3091 			case 0x0c:  cmd_triangle_s(w);     break;
3092 			case 0x0d:  cmd_triangle_sz(w);    break;
3093 			case 0x0e:  cmd_triangle_st(w);    break;
3094 			case 0x0f:  cmd_triangle_stz(w);   break;
3095 
3096 			case 0x24:  cmd_tex_rect(w);       break;
3097 			case 0x25:  cmd_tex_rect_flip(w);  break;
3098 
3099 			case 0x26:  cmd_sync_load(w);      break;
3100 			case 0x27:  cmd_sync_pipe(w);      break;
3101 			case 0x28:  cmd_sync_tile(w);      break;
3102 			case 0x29:  cmd_sync_full(w);      break;
3103 
3104 			case 0x2a:  cmd_set_key_gb(w);     break;
3105 			case 0x2b:  cmd_set_key_r(w);      break;
3106 
3107 			case 0x2c:  cmd_set_convert(w);    break;
3108 			case 0x3c:  cmd_set_combine(w);    break;
3109 			case 0x2d:  cmd_set_scissor(w);    break;
3110 			case 0x2e:  cmd_set_prim_depth(w); break;
3111 			case 0x2f:  cmd_set_other_modes(w);break;
3112 
3113 			case 0x30:  cmd_load_tlut(w);      break;
3114 			case 0x33:  cmd_load_block(w);     break;
3115 			case 0x34:  cmd_load_tile(w);      break;
3116 
3117 			case 0x32:  cmd_set_tile_size(w);  break;
3118 			case 0x35:  cmd_set_tile(w);       break;
3119 
3120 			case 0x36:  cmd_fill_rect(w);      break;
3121 
3122 			case 0x37:  cmd_set_fill_color32(w); break;
3123 			case 0x38:  cmd_set_fog_color(w);  break;
3124 			case 0x39:  cmd_set_blend_color(w);break;
3125 			case 0x3a:  cmd_set_prim_color(w); break;
3126 			case 0x3b:  cmd_set_env_color(w);  break;
3127 
3128 			case 0x3d:  cmd_set_texture_image(w); break;
3129 			case 0x3e:  cmd_set_mask_image(w);  break;
3130 			case 0x3f:  cmd_set_color_image(w); break;
3131 		}
3132 
3133 		m_cmd_cur += s_rdp_command_length[cmd] / 8;
3134 	};
3135 	m_cmd_ptr = 0;
3136 	m_cmd_cur = 0;
3137 
3138 	m_start = m_current = m_end;
3139 }
3140 
3141 /*****************************************************************************/
3142 
n64_rdp(n64_state & state,uint32_t * rdram,uint32_t * dmem)3143 n64_rdp::n64_rdp(n64_state &state, uint32_t* rdram, uint32_t* dmem) : poly_manager<uint32_t, rdp_poly_state, 8, 32000>(state.machine())
3144 {
3145 	ignore = false;
3146 	dolog = false;
3147 
3148 	m_rdram = rdram;
3149 	m_dmem = dmem;
3150 
3151 	m_aux_buf_ptr = 0;
3152 	m_aux_buf = nullptr;
3153 	m_pipe_clean = true;
3154 
3155 	m_pending_mode_block = false;
3156 
3157 	m_cmd_ptr = 0;
3158 	m_cmd_cur = 0;
3159 
3160 	m_start = 0;
3161 	m_end = 0;
3162 	m_current = 0;
3163 	m_status = 0x88;
3164 
3165 	m_one.set(0xff, 0xff, 0xff, 0xff);
3166 	m_zero.set(0, 0, 0, 0);
3167 
3168 	m_tmem = nullptr;
3169 
3170 	m_machine = nullptr;
3171 	m_n64_periphs = nullptr;
3172 
3173 	//memset(m_hidden_bits, 3, 8388608);
3174 
3175 	m_prim_lod_fraction.set(0, 0, 0, 0);
3176 	z_build_com_table();
3177 
3178 	memset(m_temp_rect_data, 0, sizeof(uint32_t) * 0x1000);
3179 
3180 	for (int32_t i = 0; i < 0x4000; i++)
3181 	{
3182 		uint32_t exponent = (i >> 11) & 7;
3183 		uint32_t mantissa = i & 0x7ff;
3184 		m_z_complete_dec_table[i] = ((mantissa << m_z_dec_table[exponent].shift) + m_z_dec_table[exponent].add) & 0x3fffff;
3185 	}
3186 
3187 	precalc_cvmask_derivatives();
3188 
3189 	for(int32_t i = 0; i < 0x200; i++)
3190 	{
3191 		switch((i >> 7) & 3)
3192 		{
3193 		case 0:
3194 		case 1:
3195 			s_special_9bit_clamptable[i] = i & 0xff;
3196 			break;
3197 		case 2:
3198 			s_special_9bit_clamptable[i] = 0xff;
3199 			break;
3200 		case 3:
3201 			s_special_9bit_clamptable[i] = 0;
3202 			break;
3203 		}
3204 	}
3205 
3206 	for(int32_t i = 0; i < 32; i++)
3207 	{
3208 		m_replicated_rgba[i] = (i << 3) | ((i >> 2) & 7);
3209 	}
3210 
3211 	for(int32_t i = 0; i < 0x10000; i++)
3212 	{
3213 		m_dzpix_normalize[i] = (uint16_t)normalize_dzpix(i & 0xffff);
3214 	}
3215 
3216 	m_compute_cvg[0] = &n64_rdp::compute_cvg_noflip;
3217 	m_compute_cvg[1] = &n64_rdp::compute_cvg_flip;
3218 }
3219 
render_spans(int32_t start,int32_t end,int32_t tilenum,bool flip,extent_t * spans,bool rect,rdp_poly_state * object)3220 void n64_rdp::render_spans(int32_t start, int32_t end, int32_t tilenum, bool flip, extent_t* spans, bool rect, rdp_poly_state* object)
3221 {
3222 	const int32_t clipy1 = m_scissor.m_yh;
3223 	const int32_t clipy2 = m_scissor.m_yl;
3224 	const rectangle clip(m_scissor.m_xh, m_scissor.m_xl, m_scissor.m_yh, m_scissor.m_yl);
3225 
3226 	int32_t offset = 0;
3227 
3228 	if (clipy2 <= 0)
3229 	{
3230 		return;
3231 	}
3232 
3233 	if (start < clipy1)
3234 	{
3235 		offset = clipy1 - start;
3236 		start = clipy1;
3237 	}
3238 	if (start >= clipy2)
3239 	{
3240 		offset = start - (clipy2 - 1);
3241 		start = clipy2 - 1;
3242 	}
3243 	if (end < clipy1)
3244 	{
3245 		end = clipy1;
3246 	}
3247 	if (end >= clipy2)
3248 	{
3249 		end = clipy2 - 1;
3250 	}
3251 
3252 	object->m_rdp = this;
3253 	memcpy(&object->m_misc_state, &m_misc_state, sizeof(misc_state_t));
3254 	memcpy(&object->m_other_modes, &m_other_modes, sizeof(other_modes_t));
3255 	memcpy(&object->m_span_base, &m_span_base, sizeof(span_base_t));
3256 	memcpy(&object->m_scissor, &m_scissor, sizeof(rectangle_t));
3257 	memcpy(&object->m_tiles, &m_tiles, 8 * sizeof(n64_tile_t));
3258 	object->tilenum = tilenum;
3259 	object->flip = flip;
3260 	object->m_fill_color = m_fill_color;
3261 	object->rect = rect;
3262 
3263 	switch(m_other_modes.cycle_type)
3264 	{
3265 		case CYCLE_TYPE_1:
3266 			render_triangle_custom(clip, render_delegate(&n64_rdp::span_draw_1cycle, this), start, (end - start) + 1, spans + offset);
3267 			break;
3268 
3269 		case CYCLE_TYPE_2:
3270 			render_triangle_custom(clip, render_delegate(&n64_rdp::span_draw_2cycle, this), start, (end - start) + 1, spans + offset);
3271 			break;
3272 
3273 		case CYCLE_TYPE_COPY:
3274 			render_triangle_custom(clip, render_delegate(&n64_rdp::span_draw_copy, this), start, (end - start) + 1, spans + offset);
3275 			break;
3276 
3277 		case CYCLE_TYPE_FILL:
3278 			render_triangle_custom(clip, render_delegate(&n64_rdp::span_draw_fill, this), start, (end - start) + 1, spans + offset);
3279 			break;
3280 	}
3281 	wait("render spans");
3282 }
3283 
rgbaz_clip(int32_t sr,int32_t sg,int32_t sb,int32_t sa,int32_t * sz,rdp_span_aux * userdata)3284 void n64_rdp::rgbaz_clip(int32_t sr, int32_t sg, int32_t sb, int32_t sa, int32_t* sz, rdp_span_aux* userdata)
3285 {
3286 	userdata->m_shade_color.set(sa, sr, sg, sb);
3287 	userdata->m_shade_color.clamp_and_clear(0xfffffe00);
3288 	uint32_t a = userdata->m_shade_color.get_a();
3289 	userdata->m_shade_alpha.set(a, a, a, a);
3290 
3291 	int32_t zanded = (*sz) & 0x60000;
3292 
3293 	zanded >>= 17;
3294 	switch(zanded)
3295 	{
3296 		case 0: *sz &= 0x3ffff;                                         break;
3297 		case 1: *sz &= 0x3ffff;                                         break;
3298 		case 2: *sz = 0x3ffff;                                          break;
3299 		case 3: *sz = 0x3ffff;                                          break;
3300 	}
3301 }
3302 
rgbaz_correct_triangle(int32_t offx,int32_t offy,int32_t * r,int32_t * g,int32_t * b,int32_t * a,int32_t * z,rdp_span_aux * userdata,const rdp_poly_state & object)3303 void n64_rdp::rgbaz_correct_triangle(int32_t offx, int32_t offy, int32_t* r, int32_t* g, int32_t* b, int32_t* a, int32_t* z, rdp_span_aux* userdata, const rdp_poly_state &object)
3304 {
3305 	if (userdata->m_current_pix_cvg == 8)
3306 	{
3307 		*r >>= 2;
3308 		*g >>= 2;
3309 		*b >>= 2;
3310 		*a >>= 2;
3311 		*z = (*z >> 3) & 0x7ffff;
3312 	}
3313 	else
3314 	{
3315 		int32_t summand_xr = offx * SIGN13(object.m_span_base.m_span_dr >> 14);
3316 		int32_t summand_yr = offy * SIGN13(object.m_span_base.m_span_drdy >> 14);
3317 		int32_t summand_xb = offx * SIGN13(object.m_span_base.m_span_db >> 14);
3318 		int32_t summand_yb = offy * SIGN13(object.m_span_base.m_span_dbdy >> 14);
3319 		int32_t summand_xg = offx * SIGN13(object.m_span_base.m_span_dg >> 14);
3320 		int32_t summand_yg = offy * SIGN13(object.m_span_base.m_span_dgdy >> 14);
3321 		int32_t summand_xa = offx * SIGN13(object.m_span_base.m_span_da >> 14);
3322 		int32_t summand_ya = offy * SIGN13(object.m_span_base.m_span_dady >> 14);
3323 
3324 		int32_t summand_xz = offx * SIGN22(object.m_span_base.m_span_dz >> 10);
3325 		int32_t summand_yz = offy * SIGN22(object.m_span_base.m_span_dzdy >> 10);
3326 
3327 		*r = ((*r << 2) + summand_xr + summand_yr) >> 4;
3328 		*g = ((*g << 2) + summand_xg + summand_yg) >> 4;
3329 		*b = ((*b << 2) + summand_xb + summand_yb) >> 4;
3330 		*a = ((*a << 2) + summand_xa + summand_ya) >> 4;
3331 		*z = (((*z << 2) + summand_xz + summand_yz) >> 5) & 0x7ffff;
3332 	}
3333 }
3334 
write_pixel(uint32_t curpixel,color_t & color,rdp_span_aux * userdata,const rdp_poly_state & object)3335 inline void n64_rdp::write_pixel(uint32_t curpixel, color_t& color, rdp_span_aux* userdata, const rdp_poly_state &object)
3336 {
3337 	if (object.m_misc_state.m_fb_size == 2) // 16-bit framebuffer
3338 	{
3339 		const uint32_t fb = (object.m_misc_state.m_fb_address >> 1) + curpixel;
3340 
3341 		uint16_t finalcolor;
3342 		if (object.m_other_modes.color_on_cvg && !userdata->m_pre_wrap)
3343 		{
3344 			finalcolor = RREADIDX16(fb) & 0xfffe;
3345 		}
3346 		else
3347 		{
3348 			color.shr_imm(3);
3349 			finalcolor = (color.get_r() << 11) | (color.get_g() << 6) | (color.get_b() << 1);
3350 		}
3351 
3352 		switch (object.m_other_modes.cvg_dest)
3353 		{
3354 			case 0:
3355 				if (userdata->m_blend_enable)
3356 				{
3357 					uint32_t finalcvg = userdata->m_current_pix_cvg + userdata->m_current_mem_cvg;
3358 					if (finalcvg & 8)
3359 					{
3360 						finalcvg = 7;
3361 					}
3362 					RWRITEIDX16(fb, finalcolor | (finalcvg >> 2));
3363 					HWRITEADDR8(fb, finalcvg & 3);
3364 				}
3365 				else
3366 				{
3367 					const uint32_t finalcvg = (userdata->m_current_pix_cvg - 1) & 7;
3368 					RWRITEIDX16(fb, finalcolor | (finalcvg >> 2));
3369 					HWRITEADDR8(fb, finalcvg & 3);
3370 				}
3371 				break;
3372 			case 1:
3373 			{
3374 				const uint32_t finalcvg = (userdata->m_current_pix_cvg + userdata->m_current_mem_cvg) & 7;
3375 				RWRITEIDX16(fb, finalcolor | (finalcvg >> 2));
3376 				HWRITEADDR8(fb, finalcvg & 3);
3377 				break;
3378 			}
3379 			case 2:
3380 				RWRITEIDX16(fb, finalcolor | 1);
3381 				HWRITEADDR8(fb, 3);
3382 				break;
3383 			case 3:
3384 				RWRITEIDX16(fb, finalcolor | (userdata->m_current_mem_cvg >> 2));
3385 				HWRITEADDR8(fb, userdata->m_current_mem_cvg & 3);
3386 				break;
3387 		}
3388 	}
3389 	else // 32-bit framebuffer
3390 	{
3391 		const uint32_t fb = (object.m_misc_state.m_fb_address >> 2) + curpixel;
3392 
3393 		uint32_t finalcolor;
3394 		if (object.m_other_modes.color_on_cvg && !userdata->m_pre_wrap)
3395 		{
3396 			finalcolor = RREADIDX32(fb) & 0xffffff00;
3397 		}
3398 		else
3399 		{
3400 			finalcolor = (color.get_r() << 24) | (color.get_g() << 16) | (color.get_b() << 8);
3401 		}
3402 
3403 		switch (object.m_other_modes.cvg_dest)
3404 		{
3405 			case 0:
3406 				if (userdata->m_blend_enable)
3407 				{
3408 					uint32_t finalcvg = userdata->m_current_pix_cvg + userdata->m_current_mem_cvg;
3409 					if (finalcvg & 8)
3410 					{
3411 						finalcvg = 7;
3412 					}
3413 
3414 					RWRITEIDX32(fb, finalcolor | (finalcvg << 5));
3415 				}
3416 				else
3417 				{
3418 					RWRITEIDX32(fb, finalcolor | (((userdata->m_current_pix_cvg - 1) & 7) << 5));
3419 				}
3420 				break;
3421 			case 1:
3422 				RWRITEIDX32(fb, finalcolor | (((userdata->m_current_pix_cvg + userdata->m_current_mem_cvg) & 7) << 5));
3423 				break;
3424 			case 2:
3425 				RWRITEIDX32(fb, finalcolor | 0xE0);
3426 				break;
3427 			case 3:
3428 				RWRITEIDX32(fb, finalcolor | (userdata->m_current_mem_cvg << 5));
3429 				break;
3430 		}
3431 	}
3432 }
3433 
read_pixel(uint32_t curpixel,rdp_span_aux * userdata,const rdp_poly_state & object)3434 inline void n64_rdp::read_pixel(uint32_t curpixel, rdp_span_aux* userdata, const rdp_poly_state &object)
3435 {
3436 	if (object.m_misc_state.m_fb_size == 2) // 16-bit framebuffer
3437 	{
3438 		const uint16_t fword = RREADIDX16((object.m_misc_state.m_fb_address >> 1) + curpixel);
3439 
3440 		userdata->m_memory_color.set(0, GETHICOL(fword), GETMEDCOL(fword), GETLOWCOL(fword));
3441 		if (object.m_other_modes.image_read_en)
3442 		{
3443 			uint8_t hbyte = HREADADDR8((object.m_misc_state.m_fb_address >> 1) + curpixel);
3444 			userdata->m_memory_color.set_a(userdata->m_current_mem_cvg << 5);
3445 			userdata->m_current_mem_cvg = ((fword & 1) << 2) | (hbyte & 3);
3446 		}
3447 		else
3448 		{
3449 			userdata->m_memory_color.set_a(0xff);
3450 			userdata->m_current_mem_cvg = 7;
3451 		}
3452 	}
3453 	else // 32-bit framebuffer
3454 	{
3455 		const uint32_t mem = RREADIDX32((object.m_misc_state.m_fb_address >> 2) + curpixel);
3456 		userdata->m_memory_color.set(0, (mem >> 24) & 0xff, (mem >> 16) & 0xff, (mem >> 8) & 0xff);
3457 		if (object.m_other_modes.image_read_en)
3458 		{
3459 			userdata->m_memory_color.set_a(mem & 0xff);
3460 			userdata->m_current_mem_cvg = (mem >> 5) & 7;
3461 		}
3462 		else
3463 		{
3464 			userdata->m_memory_color.set_a(0xff);
3465 			userdata->m_current_mem_cvg = 7;
3466 		}
3467 	}
3468 }
3469 
copy_pixel(uint32_t curpixel,color_t & color,const rdp_poly_state & object)3470 inline void n64_rdp::copy_pixel(uint32_t curpixel, color_t& color, const rdp_poly_state &object)
3471 {
3472 	const uint32_t current_pix_cvg = color.get_a() ? 7 : 0;
3473 	const uint8_t r = color.get_r(); // Vectorize me
3474 	const uint8_t g = color.get_g();
3475 	const uint8_t b = color.get_b();
3476 	if (object.m_misc_state.m_fb_size == 2) // 16-bit framebuffer
3477 	{
3478 		RWRITEIDX16((object.m_misc_state.m_fb_address >> 1) + curpixel, ((r >> 3) << 11) | ((g >> 3) << 6) | ((b >> 3) << 1) | ((current_pix_cvg >> 2) & 1));
3479 		HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + curpixel, current_pix_cvg & 3);
3480 	}
3481 	else // 32-bit framebuffer
3482 	{
3483 		RWRITEIDX32((object.m_misc_state.m_fb_address >> 2) + curpixel, (r << 24) | (g << 16) | (b << 8) | (current_pix_cvg << 5));
3484 	}
3485 }
3486 
fill_pixel(uint32_t curpixel,const rdp_poly_state & object)3487 inline void n64_rdp::fill_pixel(uint32_t curpixel, const rdp_poly_state &object)
3488 {
3489 	if (object.m_misc_state.m_fb_size == 2) // 16-bit framebuffer
3490 	{
3491 		uint16_t val;
3492 		if (curpixel & 1)
3493 		{
3494 			val = object.m_fill_color & 0xffff;
3495 		}
3496 		else
3497 		{
3498 			val = (object.m_fill_color >> 16) & 0xffff;
3499 		}
3500 		RWRITEIDX16((object.m_misc_state.m_fb_address >> 1) + curpixel, val);
3501 		HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + curpixel, ((val & 1) << 1) | (val & 1));
3502 	}
3503 	else // 32-bit framebuffer
3504 	{
3505 		RWRITEIDX32((object.m_misc_state.m_fb_address >> 2) + curpixel, object.m_fill_color);
3506 		HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + (curpixel << 1), (object.m_fill_color & 0x10000) ? 3 : 0);
3507 		HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + (curpixel << 1) + 1, (object.m_fill_color & 0x1) ? 3 : 0);
3508 	}
3509 }
3510 
span_draw_1cycle(int32_t scanline,const extent_t & extent,const rdp_poly_state & object,int32_t threadid)3511 void n64_rdp::span_draw_1cycle(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid)
3512 {
3513 	assert(object.m_misc_state.m_fb_size >= 2 && object.m_misc_state.m_fb_size < 4);
3514 
3515 	const int32_t clipx1 = object.m_scissor.m_xh;
3516 	const int32_t clipx2 = object.m_scissor.m_xl;
3517 	const int32_t tilenum = object.tilenum;
3518 	const bool flip = object.flip;
3519 
3520 	span_param_t r; r.w = extent.param[SPAN_R].start;
3521 	span_param_t g; g.w = extent.param[SPAN_G].start;
3522 	span_param_t b; b.w = extent.param[SPAN_B].start;
3523 	span_param_t a; a.w = extent.param[SPAN_A].start;
3524 	span_param_t z; z.w = extent.param[SPAN_Z].start;
3525 	span_param_t s; s.w = extent.param[SPAN_S].start;
3526 	span_param_t t; t.w = extent.param[SPAN_T].start;
3527 	span_param_t w; w.w = extent.param[SPAN_W].start;
3528 
3529 	const uint32_t zb = object.m_misc_state.m_zb_address >> 1;
3530 	const uint32_t zhb = object.m_misc_state.m_zb_address;
3531 
3532 #ifdef PTR64
3533 	assert(extent.userdata != (const void *)0xcccccccccccccccc);
3534 #else
3535 	assert(extent.userdata != (const void *)0xcccccccc);
3536 #endif
3537 	rdp_span_aux* userdata = (rdp_span_aux*)extent.userdata;
3538 
3539 	m_tex_pipe.calculate_clamp_diffs(tilenum, userdata, object);
3540 
3541 	const bool partialreject = (userdata->m_color_inputs.blender2b_a[0] == &userdata->m_inv_pixel_color && userdata->m_color_inputs.blender1b_a[0] == &userdata->m_pixel_color);
3542 	const int32_t sel0 = (userdata->m_color_inputs.blender2b_a[0] == &userdata->m_memory_color) ? 1 : 0;
3543 
3544 	int32_t drinc, dginc, dbinc, dainc;
3545 	int32_t dzinc, dzpix;
3546 	int32_t dsinc, dtinc, dwinc;
3547 	int32_t xinc;
3548 
3549 	if (!flip)
3550 	{
3551 		drinc = -object.m_span_base.m_span_dr;
3552 		dginc = -object.m_span_base.m_span_dg;
3553 		dbinc = -object.m_span_base.m_span_db;
3554 		dainc = -object.m_span_base.m_span_da;
3555 		dzinc = -object.m_span_base.m_span_dz;
3556 		dsinc = -object.m_span_base.m_span_ds;
3557 		dtinc = -object.m_span_base.m_span_dt;
3558 		dwinc = -object.m_span_base.m_span_dw;
3559 		xinc = -1;
3560 	}
3561 	else
3562 	{
3563 		drinc = object.m_span_base.m_span_dr;
3564 		dginc = object.m_span_base.m_span_dg;
3565 		dbinc = object.m_span_base.m_span_db;
3566 		dainc = object.m_span_base.m_span_da;
3567 		dzinc = object.m_span_base.m_span_dz;
3568 		dsinc = object.m_span_base.m_span_ds;
3569 		dtinc = object.m_span_base.m_span_dt;
3570 		dwinc = object.m_span_base.m_span_dw;
3571 		xinc = 1;
3572 	}
3573 
3574 	const int32_t fb_index = object.m_misc_state.m_fb_width * scanline;
3575 
3576 	const int32_t xstart = extent.startx;
3577 	const int32_t xend = userdata->m_unscissored_rx;
3578 	const int32_t xend_scissored = extent.stopx;
3579 
3580 	int32_t x = xend;
3581 
3582 	const int32_t length = flip ? (xstart - xend) : (xend - xstart);
3583 
3584 	if(object.m_other_modes.z_source_sel)
3585 	{
3586 		z.w = object.m_misc_state.m_primitive_z;
3587 		dzpix = object.m_misc_state.m_primitive_dz;
3588 		dzinc = 0;
3589 	}
3590 	else
3591 	{
3592 		dzpix = object.m_span_base.m_span_dzpix;
3593 	}
3594 
3595 	if (object.m_misc_state.m_fb_size < 2 || object.m_misc_state.m_fb_size > 4)
3596 		fatalerror("unsupported m_fb_size %d\n", object.m_misc_state.m_fb_size);
3597 
3598 	const int32_t blend_index = (object.m_other_modes.alpha_cvg_select ? 2 : 0) | ((object.m_other_modes.rgb_dither_sel < 3) ? 1 : 0);
3599 	const int32_t cycle0 = ((object.m_other_modes.sample_type & 1) << 1) | (object.m_other_modes.bi_lerp0 & 1);
3600 
3601 	int32_t sss = 0;
3602 	int32_t sst = 0;
3603 
3604 	if (object.m_other_modes.persp_tex_en)
3605 	{
3606 		tc_div(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst);
3607 	}
3608 	else
3609 	{
3610 		tc_div_no_perspective(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst);
3611 	}
3612 
3613 	userdata->m_start_span = true;
3614 	for (int32_t j = 0; j <= length; j++)
3615 	{
3616 		int32_t sr = r.w >> 14;
3617 		int32_t sg = g.w >> 14;
3618 		int32_t sb = b.w >> 14;
3619 		int32_t sa = a.w >> 14;
3620 		int32_t sz = (z.w >> 10) & 0x3fffff;
3621 		const bool valid_x = (flip) ? (x >= xend_scissored) : (x <= xend_scissored);
3622 
3623 		if (x >= clipx1 && x < clipx2 && valid_x)
3624 		{
3625 			uint8_t offx, offy;
3626 			lookup_cvmask_derivatives(userdata->m_cvg[x], &offx, &offy, userdata);
3627 
3628 			m_tex_pipe.lod_1cycle(&sss, &sst, s.w, t.w, w.w, dsinc, dtinc, dwinc, userdata, object);
3629 
3630 			rgbaz_correct_triangle(offx, offy, &sr, &sg, &sb, &sa, &sz, userdata, object);
3631 			rgbaz_clip(sr, sg, sb, sa, &sz, userdata);
3632 
3633 			((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle0]))(&userdata->m_texel0_color, &userdata->m_texel0_color, sss, sst, tilenum, 0, userdata, object);
3634 			uint32_t t0a = userdata->m_texel0_color.get_a();
3635 			userdata->m_texel0_alpha.set(t0a, t0a, t0a, t0a);
3636 
3637 			const uint8_t noise = machine().rand() << 3; // Not accurate
3638 			userdata->m_noise_color.set(0, noise, noise, noise);
3639 
3640 			rgbaint_t rgbsub_a(*userdata->m_color_inputs.combiner_rgbsub_a[1]);
3641 			rgbaint_t rgbsub_b(*userdata->m_color_inputs.combiner_rgbsub_b[1]);
3642 			rgbaint_t rgbmul(*userdata->m_color_inputs.combiner_rgbmul[1]);
3643 			rgbaint_t rgbadd(*userdata->m_color_inputs.combiner_rgbadd[1]);
3644 
3645 			rgbsub_a.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_a[1]);
3646 			rgbsub_b.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_b[1]);
3647 			rgbmul.merge_alpha(*userdata->m_color_inputs.combiner_alphamul[1]);
3648 			rgbadd.merge_alpha(*userdata->m_color_inputs.combiner_alphaadd[1]);
3649 
3650 			rgbsub_a.sign_extend(0x180, 0xfffffe00);
3651 			rgbsub_b.sign_extend(0x180, 0xfffffe00);
3652 			rgbadd.sign_extend(0x180, 0xfffffe00);
3653 
3654 			rgbadd.shl_imm(8);
3655 			rgbsub_a.sub(rgbsub_b);
3656 			rgbsub_a.mul(rgbmul);
3657 			rgbsub_a.add(rgbadd);
3658 			rgbsub_a.add_imm(0x0080);
3659 			rgbsub_a.sra_imm(8);
3660 			rgbsub_a.clamp_and_clear(0xfffffe00);
3661 
3662 			userdata->m_pixel_color = rgbsub_a;
3663 
3664 			//Alpha coverage combiner
3665 			userdata->m_pixel_color.set_a(get_alpha_cvg(userdata->m_pixel_color.get_a(), userdata, object));
3666 
3667 			const uint32_t curpixel = fb_index + x;
3668 			const uint32_t zbcur = zb + curpixel;
3669 			const uint32_t zhbcur = zhb + curpixel;
3670 
3671 			read_pixel(curpixel, userdata, object);
3672 
3673 			if(z_compare(zbcur, zhbcur, sz, dzpix, userdata, object))
3674 			{
3675 				int32_t cdith = 0;
3676 				int32_t adith = 0;
3677 				get_dither_values(scanline, j, &cdith, &adith, object);
3678 
3679 				color_t blended_pixel;
3680 				bool rendered = ((&m_blender)->*(m_blender.blend1[(userdata->m_blend_enable << 2) | blend_index]))(blended_pixel, cdith, adith, partialreject, sel0, userdata, object);
3681 
3682 				if (rendered)
3683 				{
3684 					write_pixel(curpixel, blended_pixel, userdata, object);
3685 					if (object.m_other_modes.z_update_en)
3686 					{
3687 						z_store(object, zbcur, zhbcur, sz, userdata->m_dzpix_enc);
3688 					}
3689 				}
3690 			}
3691 
3692 			sss = userdata->m_precomp_s;
3693 			sst = userdata->m_precomp_t;
3694 		}
3695 
3696 		r.w += drinc;
3697 		g.w += dginc;
3698 		b.w += dbinc;
3699 		a.w += dainc;
3700 		s.w += dsinc;
3701 		t.w += dtinc;
3702 		w.w += dwinc;
3703 		z.w += dzinc;
3704 
3705 		x += xinc;
3706 	}
3707 }
3708 
span_draw_2cycle(int32_t scanline,const extent_t & extent,const rdp_poly_state & object,int32_t threadid)3709 void n64_rdp::span_draw_2cycle(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid)
3710 {
3711 	assert(object.m_misc_state.m_fb_size >= 2 && object.m_misc_state.m_fb_size < 4);
3712 
3713 	const int32_t clipx1 = object.m_scissor.m_xh;
3714 	const int32_t clipx2 = object.m_scissor.m_xl;
3715 	const int32_t tilenum = object.tilenum;
3716 	const bool flip = object.flip;
3717 
3718 	span_param_t r; r.w = extent.param[SPAN_R].start;
3719 	span_param_t g; g.w = extent.param[SPAN_G].start;
3720 	span_param_t b; b.w = extent.param[SPAN_B].start;
3721 	span_param_t a; a.w = extent.param[SPAN_A].start;
3722 	span_param_t z; z.w = extent.param[SPAN_Z].start;
3723 	span_param_t s; s.w = extent.param[SPAN_S].start;
3724 	span_param_t t; t.w = extent.param[SPAN_T].start;
3725 	span_param_t w; w.w = extent.param[SPAN_W].start;
3726 
3727 	const uint32_t zb = object.m_misc_state.m_zb_address >> 1;
3728 	const uint32_t zhb = object.m_misc_state.m_zb_address;
3729 
3730 	int32_t tile2 = (tilenum + 1) & 7;
3731 	int32_t tile1 = tilenum;
3732 	const uint32_t prim_tile = tilenum;
3733 
3734 	int32_t newtile1 = tile1;
3735 	int32_t news = 0;
3736 	int32_t newt = 0;
3737 
3738 #ifdef PTR64
3739 	assert(extent.userdata != (const void *)0xcccccccccccccccc);
3740 #else
3741 	assert(extent.userdata != (const void *)0xcccccccc);
3742 #endif
3743 	rdp_span_aux* userdata = (rdp_span_aux*)extent.userdata;
3744 
3745 	m_tex_pipe.calculate_clamp_diffs(tile1, userdata, object);
3746 
3747 	bool partialreject = (userdata->m_color_inputs.blender2b_a[1] == &userdata->m_inv_pixel_color && userdata->m_color_inputs.blender1b_a[1] == &userdata->m_pixel_color);
3748 	int32_t sel0 = (userdata->m_color_inputs.blender2b_a[0] == &userdata->m_memory_color) ? 1 : 0;
3749 	int32_t sel1 = (userdata->m_color_inputs.blender2b_a[1] == &userdata->m_memory_color) ? 1 : 0;
3750 
3751 	int32_t drinc, dginc, dbinc, dainc;
3752 	int32_t dzinc, dzpix;
3753 	int32_t dsinc, dtinc, dwinc;
3754 	int32_t xinc;
3755 
3756 	if (!flip)
3757 	{
3758 		drinc = -object.m_span_base.m_span_dr;
3759 		dginc = -object.m_span_base.m_span_dg;
3760 		dbinc = -object.m_span_base.m_span_db;
3761 		dainc = -object.m_span_base.m_span_da;
3762 		dzinc = -object.m_span_base.m_span_dz;
3763 		dsinc = -object.m_span_base.m_span_ds;
3764 		dtinc = -object.m_span_base.m_span_dt;
3765 		dwinc = -object.m_span_base.m_span_dw;
3766 		xinc = -1;
3767 	}
3768 	else
3769 	{
3770 		drinc = object.m_span_base.m_span_dr;
3771 		dginc = object.m_span_base.m_span_dg;
3772 		dbinc = object.m_span_base.m_span_db;
3773 		dainc = object.m_span_base.m_span_da;
3774 		dzinc = object.m_span_base.m_span_dz;
3775 		dsinc = object.m_span_base.m_span_ds;
3776 		dtinc = object.m_span_base.m_span_dt;
3777 		dwinc = object.m_span_base.m_span_dw;
3778 		xinc = 1;
3779 	}
3780 
3781 	const int32_t fb_index = object.m_misc_state.m_fb_width * scanline;
3782 
3783 	int32_t cdith = 0;
3784 	int32_t adith = 0;
3785 
3786 	const int32_t xstart = extent.startx;
3787 	const int32_t xend = userdata->m_unscissored_rx;
3788 	const int32_t xend_scissored = extent.stopx;
3789 
3790 	int32_t x = xend;
3791 
3792 	const int32_t length = flip ? (xstart - xend) : (xend - xstart);
3793 
3794 	if(object.m_other_modes.z_source_sel)
3795 	{
3796 		z.w = object.m_misc_state.m_primitive_z;
3797 		dzpix = object.m_misc_state.m_primitive_dz;
3798 		dzinc = 0;
3799 	}
3800 	else
3801 	{
3802 		dzpix = object.m_span_base.m_span_dzpix;
3803 	}
3804 
3805 	if (object.m_misc_state.m_fb_size < 2 || object.m_misc_state.m_fb_size > 4)
3806 		fatalerror("unsupported m_fb_size %d\n", object.m_misc_state.m_fb_size);
3807 
3808 	const int32_t blend_index = (object.m_other_modes.alpha_cvg_select ? 2 : 0) | ((object.m_other_modes.rgb_dither_sel < 3) ? 1 : 0);
3809 	const int32_t cycle0 = ((object.m_other_modes.sample_type & 1) << 1) | (object.m_other_modes.bi_lerp0 & 1);
3810 	const int32_t cycle1 = ((object.m_other_modes.sample_type & 1) << 1) | (object.m_other_modes.bi_lerp1 & 1);
3811 
3812 	int32_t sss = 0;
3813 	int32_t sst = 0;
3814 
3815 	if (object.m_other_modes.persp_tex_en)
3816 	{
3817 		tc_div(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst);
3818 	}
3819 	else
3820 	{
3821 		tc_div_no_perspective(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst);
3822 	}
3823 
3824 	userdata->m_start_span = true;
3825 	for (int32_t j = 0; j <= length; j++)
3826 	{
3827 		int32_t sr = r.w >> 14;
3828 		int32_t sg = g.w >> 14;
3829 		int32_t sb = b.w >> 14;
3830 		int32_t sa = a.w >> 14;
3831 		int32_t sz = (z.w >> 10) & 0x3fffff;
3832 
3833 		const bool valid_x = (flip) ? (x >= xend_scissored) : (x <= xend_scissored);
3834 
3835 		if (x >= clipx1 && x < clipx2 && valid_x)
3836 		{
3837 			const uint32_t compidx = m_compressed_cvmasks[userdata->m_cvg[x]];
3838 			userdata->m_current_pix_cvg = cvarray[compidx].cvg;
3839 			userdata->m_current_cvg_bit = cvarray[compidx].cvbit;
3840 			const uint8_t offx = cvarray[compidx].xoff;
3841 			const uint8_t offy = cvarray[compidx].yoff;
3842 			//lookup_cvmask_derivatives(userdata->m_cvg[x], &offx, &offy, userdata);
3843 
3844 			m_tex_pipe.lod_2cycle(&sss, &sst, s.w, t.w, w.w, dsinc, dtinc, dwinc, prim_tile, &tile1, &tile2, userdata, object);
3845 
3846 			news = userdata->m_precomp_s;
3847 			newt = userdata->m_precomp_t;
3848 			m_tex_pipe.lod_2cycle_limited(&news, &newt, s.w + dsinc, t.w + dtinc, w.w + dwinc, dsinc, dtinc, dwinc, prim_tile, &newtile1, object);
3849 
3850 			rgbaz_correct_triangle(offx, offy, &sr, &sg, &sb, &sa, &sz, userdata, object);
3851 			rgbaz_clip(sr, sg, sb, sa, &sz, userdata);
3852 
3853 			((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle0]))(&userdata->m_texel0_color, &userdata->m_texel0_color, sss, sst, tile1, 0, userdata, object);
3854 			((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle1]))(&userdata->m_texel1_color, &userdata->m_texel0_color, sss, sst, tile2, 1, userdata, object);
3855 			((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle1]))(&userdata->m_next_texel_color, &userdata->m_next_texel_color, sss, sst, tile2, 1, userdata, object);
3856 
3857 			uint32_t t0a = userdata->m_texel0_color.get_a();
3858 			uint32_t t1a = userdata->m_texel1_color.get_a();
3859 			uint32_t tna = userdata->m_next_texel_color.get_a();
3860 			userdata->m_texel0_alpha.set(t0a, t0a, t0a, t0a);
3861 			userdata->m_texel1_alpha.set(t1a, t1a, t1a, t1a);
3862 			userdata->m_next_texel_alpha.set(tna, tna, tna, tna);
3863 
3864 			const uint8_t noise = machine().rand() << 3; // Not accurate
3865 			userdata->m_noise_color.set(0, noise, noise, noise);
3866 
3867 			rgbaint_t rgbsub_a(*userdata->m_color_inputs.combiner_rgbsub_a[0]);
3868 			rgbaint_t rgbsub_b(*userdata->m_color_inputs.combiner_rgbsub_b[0]);
3869 			rgbaint_t rgbmul(*userdata->m_color_inputs.combiner_rgbmul[0]);
3870 			rgbaint_t rgbadd(*userdata->m_color_inputs.combiner_rgbadd[0]);
3871 
3872 			rgbsub_a.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_a[0]);
3873 			rgbsub_b.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_b[0]);
3874 			rgbmul.merge_alpha(*userdata->m_color_inputs.combiner_alphamul[0]);
3875 			rgbadd.merge_alpha(*userdata->m_color_inputs.combiner_alphaadd[0]);
3876 
3877 			rgbsub_a.sign_extend(0x180, 0xfffffe00);
3878 			rgbsub_b.sign_extend(0x180, 0xfffffe00);
3879 			rgbadd.sign_extend(0x180, 0xfffffe00);
3880 
3881 			rgbadd.shl_imm(8);
3882 			rgbsub_a.sub(rgbsub_b);
3883 			rgbsub_a.mul(rgbmul);
3884 
3885 			rgbsub_a.add(rgbadd);
3886 			rgbsub_a.add_imm(0x0080);
3887 			rgbsub_a.sra_imm(8);
3888 			rgbsub_a.clamp_and_clear(0xfffffe00);
3889 
3890 			userdata->m_combined_color.set(rgbsub_a);
3891 			userdata->m_texel0_color.set(userdata->m_texel1_color);
3892 			userdata->m_texel1_color.set(userdata->m_next_texel_color);
3893 
3894 			uint32_t ca = userdata->m_combined_color.get_a();
3895 			userdata->m_combined_alpha.set(ca, ca, ca, ca);
3896 			userdata->m_texel0_alpha.set(userdata->m_texel1_alpha);
3897 			userdata->m_texel1_alpha.set(userdata->m_next_texel_alpha);
3898 
3899 			rgbsub_a.set(*userdata->m_color_inputs.combiner_rgbsub_a[1]);
3900 			rgbsub_b.set(*userdata->m_color_inputs.combiner_rgbsub_b[1]);
3901 			rgbmul.set(*userdata->m_color_inputs.combiner_rgbmul[1]);
3902 			rgbadd.set(*userdata->m_color_inputs.combiner_rgbadd[1]);
3903 
3904 			rgbsub_a.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_a[1]);
3905 			rgbsub_b.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_b[1]);
3906 			rgbmul.merge_alpha(*userdata->m_color_inputs.combiner_alphamul[1]);
3907 			rgbadd.merge_alpha(*userdata->m_color_inputs.combiner_alphaadd[1]);
3908 
3909 			rgbsub_a.sign_extend(0x180, 0xfffffe00);
3910 			rgbsub_b.sign_extend(0x180, 0xfffffe00);
3911 			rgbadd.sign_extend(0x180, 0xfffffe00);
3912 
3913 			rgbadd.shl_imm(8);
3914 			rgbsub_a.sub(rgbsub_b);
3915 			rgbsub_a.mul(rgbmul);
3916 			rgbsub_a.add(rgbadd);
3917 			rgbsub_a.add_imm(0x0080);
3918 			rgbsub_a.sra_imm(8);
3919 			rgbsub_a.clamp_and_clear(0xfffffe00);
3920 
3921 			userdata->m_pixel_color.set(rgbsub_a);
3922 
3923 			//Alpha coverage combiner
3924 			userdata->m_pixel_color.set_a(get_alpha_cvg(userdata->m_pixel_color.get_a(), userdata, object));
3925 
3926 			const uint32_t curpixel = fb_index + x;
3927 			const uint32_t zbcur = zb + curpixel;
3928 			const uint32_t zhbcur = zhb + curpixel;
3929 
3930 			read_pixel(curpixel, userdata, object);
3931 
3932 			if(z_compare(zbcur, zhbcur, sz, dzpix, userdata, object))
3933 			{
3934 				get_dither_values(scanline, j, &cdith, &adith, object);
3935 
3936 				color_t blended_pixel;
3937 				bool rendered = ((&m_blender)->*(m_blender.blend2[(userdata->m_blend_enable << 2) | blend_index]))(blended_pixel, cdith, adith, partialreject, sel0, sel1, userdata, object);
3938 
3939 				if (rendered)
3940 				{
3941 					write_pixel(curpixel, blended_pixel, userdata, object);
3942 					if (object.m_other_modes.z_update_en)
3943 					{
3944 						z_store(object, zbcur, zhbcur, sz, userdata->m_dzpix_enc);
3945 					}
3946 				}
3947 			}
3948 			sss = userdata->m_precomp_s;
3949 			sst = userdata->m_precomp_t;
3950 		}
3951 
3952 		r.w += drinc;
3953 		g.w += dginc;
3954 		b.w += dbinc;
3955 		a.w += dainc;
3956 		s.w += dsinc;
3957 		t.w += dtinc;
3958 		w.w += dwinc;
3959 		z.w += dzinc;
3960 
3961 		x += xinc;
3962 	}
3963 }
3964 
span_draw_copy(int32_t scanline,const extent_t & extent,const rdp_poly_state & object,int32_t threadid)3965 void n64_rdp::span_draw_copy(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid)
3966 {
3967 	const int32_t clipx1 = object.m_scissor.m_xh;
3968 	const int32_t clipx2 = object.m_scissor.m_xl;
3969 	const int32_t tilenum = object.tilenum;
3970 	const bool flip = object.flip;
3971 
3972 	rdp_span_aux* userdata = (rdp_span_aux*)extent.userdata;
3973 	const int32_t xstart = extent.startx;
3974 	const int32_t xend = userdata->m_unscissored_rx;
3975 	const int32_t xend_scissored = extent.stopx;
3976 	const int32_t xinc = flip ? 1 : -1;
3977 	const int32_t length = flip ? (xstart - xend) : (xend - xstart);
3978 
3979 	span_param_t s; s.w = extent.param[SPAN_S].start;
3980 	span_param_t t; t.w = extent.param[SPAN_T].start;
3981 
3982 	const int32_t ds = object.m_span_base.m_span_ds / 4;
3983 	const int32_t dt = object.m_span_base.m_span_dt / 4;
3984 	const int32_t dsinc = flip ? (ds) : -ds;
3985 	const int32_t dtinc = flip ? (dt) : -dt;
3986 
3987 	const int32_t fb_index = object.m_misc_state.m_fb_width * scanline;
3988 
3989 	int32_t x = xend;
3990 
3991 	for (int32_t j = 0; j <= length; j++)
3992 	{
3993 		const bool valid_x = (flip) ? (x >= xend_scissored) : (x <= xend_scissored);
3994 
3995 		if (x >= clipx1 && x < clipx2 && valid_x)
3996 		{
3997 			int32_t sss = s.h.h;
3998 			int32_t sst = t.h.h;
3999 			m_tex_pipe.copy(&userdata->m_texel0_color, sss, sst, tilenum, object, userdata);
4000 
4001 			uint32_t curpixel = fb_index + x;
4002 			if ((userdata->m_texel0_color.get_a() != 0) || (!object.m_other_modes.alpha_compare_en))
4003 			{
4004 				copy_pixel(curpixel, userdata->m_texel0_color, object);
4005 			}
4006 		}
4007 
4008 		s.w += dsinc;
4009 		t.w += dtinc;
4010 		x += xinc;
4011 	}
4012 }
4013 
span_draw_fill(int32_t scanline,const extent_t & extent,const rdp_poly_state & object,int32_t threadid)4014 void n64_rdp::span_draw_fill(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid)
4015 {
4016 	assert(object.m_misc_state.m_fb_size >= 2 && object.m_misc_state.m_fb_size < 4);
4017 
4018 	const bool flip = object.flip;
4019 
4020 	const int32_t clipx1 = object.m_scissor.m_xh;
4021 	const int32_t clipx2 = object.m_scissor.m_xl;
4022 
4023 	const int32_t xinc = flip ? 1 : -1;
4024 
4025 	const int32_t fb_index = object.m_misc_state.m_fb_width * scanline;
4026 
4027 	const int32_t xstart = extent.startx;
4028 	const int32_t xend_scissored = extent.stopx;
4029 
4030 	int32_t x = xend_scissored;
4031 
4032 	const int32_t length = flip ? (xstart - xend_scissored) : (xend_scissored - xstart);
4033 
4034 	for (int32_t j = 0; j <= length; j++)
4035 	{
4036 		if (x >= clipx1 && x < clipx2)
4037 		{
4038 			fill_pixel(fb_index + x, object);
4039 		}
4040 
4041 		x += xinc;
4042 	}
4043 }
4044