1 // license:BSD-3-Clause
2 // copyright-holders:Ryan Holtz
3 /******************************************************************************
4
5
6 SGI/Nintendo Reality Display Processor
7 -------------------
8
9 by Ryan Holtz
10 based on initial C code by Ville Linde
11 contains additional improvements from angrylion, Ziggy, Gonetz and Orkin
12
13
14 *******************************************************************************
15
16 STATUS:
17
18 Much behavior needs verification against real hardware. Many edge cases must
19 be verified on real hardware as well.
20
21 TODO:
22
23 - Further re-work class structure to avoid dependencies
24
25 *******************************************************************************/
26
27 #include "emu.h"
28 #include "video/n64.h"
29 #include "video/rdpblend.h"
30 #include "video/rdptpipe.h"
31
32 #include <algorithm>
33
34 #define LOG_RDP_EXECUTION 0
35
36 static FILE* rdp_exec;
37
38 uint32_t n64_rdp::s_special_9bit_clamptable[512];
39
rdp_range_check(uint32_t addr)40 bool n64_rdp::rdp_range_check(uint32_t addr)
41 {
42 if(m_misc_state.m_fb_size == 0) return false;
43
44 int32_t fbcount = ((m_misc_state.m_fb_width * m_scissor.m_yl) << (m_misc_state.m_fb_size - 1)) * 3;
45 int32_t fbaddr = m_misc_state.m_fb_address & 0x007fffff;
46 if ((addr >= fbaddr) && (addr < (fbaddr + fbcount)))
47 {
48 return false;
49 }
50
51 int32_t zbcount = m_misc_state.m_fb_width * m_scissor.m_yl * 2;
52 int32_t zbaddr = m_misc_state.m_zb_address & 0x007fffff;
53 if ((addr >= zbaddr) && (addr < (zbaddr + zbcount)))
54 {
55 return false;
56 }
57
58 printf("Check failed: %08x vs. %08x-%08x, %08x-%08x (%d, %d)\n", addr, fbaddr, fbaddr + fbcount, zbaddr, zbaddr + zbcount, m_misc_state.m_fb_width, m_scissor.m_yl);
59 fflush(stdout);
60 return true;
61 }
62
63 /*****************************************************************************/
64
65 // The functions in this file should be moved into the parent Processor class.
66 #include "rdpfiltr.hxx"
67
get_alpha_cvg(int32_t comb_alpha,rdp_span_aux * userdata,const rdp_poly_state & object)68 int32_t n64_rdp::get_alpha_cvg(int32_t comb_alpha, rdp_span_aux* userdata, const rdp_poly_state &object)
69 {
70 int32_t temp = comb_alpha;
71 int32_t temp2 = userdata->m_current_pix_cvg;
72 int32_t temp3 = 0;
73
74 if (object.m_other_modes.cvg_times_alpha)
75 {
76 temp3 = (temp * temp2) + 4;
77 userdata->m_current_pix_cvg = (temp3 >> 8) & 0xf;
78 }
79 if (object.m_other_modes.alpha_cvg_select)
80 {
81 temp = (m_other_modes.cvg_times_alpha) ? (temp3 >> 3) : (temp2 << 5);
82 }
83 if (temp > 0xff)
84 {
85 temp = 0xff;
86 }
87 return temp;
88 }
89
90 /*****************************************************************************/
91
video_start()92 void n64_state::video_start()
93 {
94 m_rdp = auto_alloc(machine(), n64_rdp(*this, m_rdram, m_rsp_dmem));
95
96 m_rdp->set_machine(machine());
97 m_rdp->init_internal_state();
98 m_rdp->set_n64_periphs(m_rcp_periphs);
99
100 m_rdp->m_blender.set_machine(machine());
101 m_rdp->m_blender.set_processor(m_rdp);
102
103 m_rdp->m_tex_pipe.set_machine(machine());
104
105 m_rdp->m_aux_buf = make_unique_clear<uint8_t[]>(EXTENT_AUX_COUNT);
106
107 if (LOG_RDP_EXECUTION)
108 {
109 rdp_exec = fopen("rdp_execute.txt", "wt");
110 }
111 }
112
screen_update_n64(screen_device & screen,bitmap_rgb32 & bitmap,const rectangle & cliprect)113 uint32_t n64_state::screen_update_n64(screen_device &screen, bitmap_rgb32 &bitmap, const rectangle &cliprect)
114 {
115 //uint16_t* frame_buffer = (uint16_t*)&rdram[(m_rcp_periphs->vi_origin & 0xffffff) >> 2];
116 //uint8_t* cvg_buffer = &m_rdp.m_hidden_bits[((m_rcp_periphs->vi_origin & 0xffffff) >> 2) >> 1];
117 //int32_t vibuffering = ((m_rcp_periphs->vi_control & 2) && fsaa && divot);
118
119 //vibuffering = 0; // Disabled for now
120
121 /*
122 if (vibuffering && ((m_rcp_periphs->vi_control & 3) == 2))
123 {
124 if (frame_buffer)
125 {
126 for (j=0; j < vres; j++)
127 {
128 for (i=0; i < hres; i++)
129 {
130 uint16_t pix;
131 pix = frame_buffer[pixels ^ WORD_ADDR_XOR];
132 curpixel_cvg = ((pix & 1) << 2) | (cvg_buffer[pixels ^ BYTE_ADDR_XOR] & 3); // Reuse of this variable
133 if (curpixel_cvg < 7 && i > 1 && j > 1 && i < (hres - 2) && j < (vres - 2) && fsaa)
134 {
135 newc = video_filter16(&frame_buffer[pixels ^ WORD_ADDR_XOR], &cvg_buffer[pixels ^ BYTE_ADDR_XOR], m_rcp_periphs->vi_width);
136 ViBuffer[i][j] = newc;
137 }
138 else
139 {
140 newc.i.r = ((pix >> 8) & 0xf8) | (pix >> 13);
141 newc.i.g = ((pix >> 3) & 0xf8) | ((pix >> 8) & 0x07);
142 newc.i.b = ((pix << 2) & 0xf8) | ((pix >> 3) & 0x07);
143 ViBuffer[i][j] = newc;
144 }
145 pixels++;
146 }
147 pixels += invisiblewidth;
148 }
149 }
150 }
151 */
152
153 m_rdp->mark_frame();
154
155 if (m_rcp_periphs->vi_blank)
156 {
157 bitmap.fill(0, screen.visible_area());
158 return 0;
159 }
160
161 m_rcp_periphs->video_update(bitmap);
162
163 return 0;
164 }
165
WRITE_LINE_MEMBER(n64_state::screen_vblank_n64)166 WRITE_LINE_MEMBER(n64_state::screen_vblank_n64)
167 {
168 }
169
video_update(bitmap_rgb32 & bitmap)170 void n64_periphs::video_update(bitmap_rgb32 &bitmap)
171 {
172
173 if(vi_control & 0x40) /* Interlace */
174 {
175 field ^= 1;
176 }
177 else
178 {
179 field = 0;
180 }
181
182 switch(vi_control & 0x3)
183 {
184 case PIXEL_SIZE_16BIT:
185 video_update16(bitmap);
186 break;
187
188 case PIXEL_SIZE_32BIT:
189 video_update32(bitmap);
190 break;
191
192 default:
193 //fatalerror("Unsupported framebuffer depth: m_fb_size=%d\n", m_misc_state.m_fb_size);
194 break;
195 }
196 }
197
video_update16(bitmap_rgb32 & bitmap)198 void n64_periphs::video_update16(bitmap_rgb32 &bitmap)
199 {
200 //int32_t fsaa = (((n64->vi_control >> 8) & 3) < 2);
201 //int32_t divot = (n64->vi_control >> 4) & 1;
202
203 //uint32_t prev_cvg = 0;
204 //uint32_t next_cvg = 0;
205 //int32_t dither_filter = (n64->vi_control >> 16) & 1;
206 //int32_t vibuffering = ((n64->vi_control & 2) && fsaa && divot);
207
208 uint16_t* frame_buffer = (uint16_t*)&m_rdram[(vi_origin & 0xffffff) >> 2];
209 //uint32_t hb = ((n64->vi_origin & 0xffffff) >> 2) >> 1;
210 //uint8_t* hidden_buffer = &m_hidden_bits[hb];
211
212 int32_t hdiff = (vi_hstart & 0x3ff) - ((vi_hstart >> 16) & 0x3ff);
213 float hcoeff = ((float)(vi_xscale & 0xfff) / (1 << 10));
214 uint32_t hres = ((float)hdiff * hcoeff);
215 int32_t invisiblewidth = vi_width - hres;
216
217 int32_t vdiff = ((vi_vstart & 0x3ff) - ((vi_vstart >> 16) & 0x3ff)) >> 1;
218 float vcoeff = ((float)(vi_yscale & 0xfff) / (1 << 10));
219 uint32_t vres = ((float)vdiff * vcoeff);
220
221 if (vdiff <= 0 || hdiff <= 0)
222 {
223 return;
224 }
225
226 //if (hres > 640) // Needed by Top Gear Overdrive (E)
227 //{
228 // invisiblewidth += (hres - 640);
229 // hres = 640;
230 //}
231
232 if (vres > bitmap.height()) // makes Perfect Dark boot w/o crashing
233 {
234 vres = bitmap.height();
235 }
236
237 uint32_t pixels = 0;
238
239 if (frame_buffer)
240 {
241 for(int32_t j = 0; j < vres; j++)
242 {
243 uint32_t *const d = &bitmap.pix(j);
244
245 for(int32_t i = 0; i < hres; i++)
246 {
247 uint16_t pix = frame_buffer[pixels ^ WORD_ADDR_XOR];
248
249 const uint8_t r = ((pix >> 8) & 0xf8) | (pix >> 13);
250 const uint8_t g = ((pix >> 3) & 0xf8) | ((pix >> 8) & 0x07);
251 const uint8_t b = ((pix << 2) & 0xf8) | ((pix >> 3) & 0x07);
252 d[i] = (r << 16) | (g << 8) | b;
253 pixels++;
254 }
255 pixels += invisiblewidth;
256 }
257 }
258 }
259
video_update32(bitmap_rgb32 & bitmap)260 void n64_periphs::video_update32(bitmap_rgb32 &bitmap)
261 {
262 int32_t gamma = (vi_control >> 3) & 1;
263 int32_t gamma_dither = (vi_control >> 2) & 1;
264 //int32_t vibuffering = ((n64->vi_control & 2) && fsaa && divot);
265
266 uint32_t* frame_buffer32 = (uint32_t*)&m_rdram[(vi_origin & 0xffffff) >> 2];
267
268 const int32_t hdiff = (vi_hstart & 0x3ff) - ((vi_hstart >> 16) & 0x3ff);
269 const float hcoeff = ((float)(vi_xscale & 0xfff) / (1 << 10));
270 uint32_t hres = ((float)hdiff * hcoeff);
271 int32_t invisiblewidth = vi_width - hres;
272
273 const int32_t vdiff = ((vi_vstart & 0x3ff) - ((vi_vstart >> 16) & 0x3ff)) >> 1;
274 const float vcoeff = ((float)(vi_yscale & 0xfff) / (1 << 10));
275 const uint32_t vres = ((float)vdiff * vcoeff);
276
277 if (vdiff <= 0 || hdiff <= 0)
278 {
279 return;
280 }
281
282 //if (hres > 640) // Needed by Top Gear Overdrive (E)
283 //{
284 // invisiblewidth += (hres - 640);
285 // hres = 640;
286 //}
287
288 if (frame_buffer32)
289 {
290 for (int32_t j = 0; j < vres; j++)
291 {
292 uint32_t *const d = &bitmap.pix(j);
293 for (int32_t i = 0; i < hres; i++)
294 {
295 uint32_t pix = *frame_buffer32++;
296 if (gamma || gamma_dither)
297 {
298 int32_t r = (pix >> 24) & 0xff;
299 int32_t g = (pix >> 16) & 0xff;
300 int32_t b = (pix >> 8) & 0xff;
301 int32_t dith = 0;
302 if (gamma_dither)
303 {
304 dith = get_random() & 0x3f;
305 }
306 if (gamma)
307 {
308 if (gamma_dither)
309 {
310 r = m_gamma_dither_table[(r << 6)| dith];
311 g = m_gamma_dither_table[(g << 6)| dith];
312 b = m_gamma_dither_table[(b << 6)| dith];
313 }
314 else
315 {
316 r = m_gamma_table[r];
317 g = m_gamma_table[g];
318 b = m_gamma_table[b];
319 }
320 }
321 else if (gamma_dither)
322 {
323 if (r < 255)
324 r += (dith & 1);
325 if (g < 255)
326 g += (dith & 1);
327 if (b < 255)
328 b += (dith & 1);
329 }
330 pix = (r << 24) | (g << 16) | (b << 8);
331 }
332
333 d[i] = (pix >> 8);
334 }
335 frame_buffer32 += invisiblewidth;
336 }
337 }
338 }
339
340 /*****************************************************************************/
341
tc_div_no_perspective(int32_t ss,int32_t st,int32_t sw,int32_t * sss,int32_t * sst)342 void n64_rdp::tc_div_no_perspective(int32_t ss, int32_t st, int32_t sw, int32_t* sss, int32_t* sst)
343 {
344 *sss = (SIGN16(ss)) & 0x1ffff;
345 *sst = (SIGN16(st)) & 0x1ffff;
346 }
347
tc_div(int32_t ss,int32_t st,int32_t sw,int32_t * sss,int32_t * sst)348 void n64_rdp::tc_div(int32_t ss, int32_t st, int32_t sw, int32_t* sss, int32_t* sst)
349 {
350 int32_t w_carry = 0;
351 if ((sw & 0x8000) || !(sw & 0x7fff))
352 {
353 w_carry = 1;
354 }
355
356 sw &= 0x7fff;
357
358 int32_t shift;
359 for (shift = 1; shift <= 14 && !((sw << shift) & 0x8000); shift++);
360 shift -= 1;
361
362 int32_t normout = (sw << shift) & 0x3fff;
363 int32_t wnorm = (normout & 0xff) << 2;
364 normout >>= 8;
365
366 int32_t temppoint = m_norm_point_rom[normout];
367 int32_t tempslope = m_norm_slope_rom[normout];
368
369 int32_t tlu_rcp = ((-(tempslope * wnorm)) >> 10) + temppoint;
370
371 int32_t sprod = SIGN16(ss) * tlu_rcp;
372 int32_t tprod = SIGN16(st) * tlu_rcp;
373 int32_t tempmask = ((1 << (shift + 1)) - 1) << (29 - shift);
374 int32_t shift_value = 13 - shift;
375
376 int32_t outofbounds_s = sprod & tempmask;
377 int32_t outofbounds_t = tprod & tempmask;
378 if (shift == 0xe)
379 {
380 *sss = sprod << 1;
381 *sst = tprod << 1;
382 }
383 else
384 {
385 *sss = sprod = (sprod >> shift_value);
386 *sst = tprod = (tprod >> shift_value);
387 }
388 //compute clamp flags
389 int32_t under_s = 0;
390 int32_t under_t = 0;
391 int32_t over_s = 0;
392 int32_t over_t = 0;
393
394 if (outofbounds_s != tempmask && outofbounds_s != 0)
395 {
396 if (sprod & (1 << 29))
397 {
398 under_s = 1;
399 }
400 else
401 {
402 over_s = 1;
403 }
404 }
405
406 if (outofbounds_t != tempmask && outofbounds_t != 0)
407 {
408 if (tprod & (1 << 29))
409 {
410 under_t = 1;
411 }
412 else
413 {
414 over_t = 1;
415 }
416 }
417
418 over_s |= w_carry;
419 over_t |= w_carry;
420
421 *sss = (*sss & 0x1ffff) | (over_s << 18) | (under_s << 17);
422 *sst = (*sst & 0x1ffff) | (over_t << 18) | (under_t << 17);
423 }
424
color_combiner_equation(int32_t a,int32_t b,int32_t c,int32_t d)425 int32_t n64_rdp::color_combiner_equation(int32_t a, int32_t b, int32_t c, int32_t d)
426 {
427 a = KURT_AKELEY_SIGN9(a);
428 b = KURT_AKELEY_SIGN9(b);
429 c = SIGN9(c);
430 d = KURT_AKELEY_SIGN9(d);
431 a = (((a - b) * c) + (d << 8) + 0x80);
432 a = SIGN17(a) >> 8;
433 a = s_special_9bit_clamptable[a & 0x1ff];
434 return a;
435 }
436
alpha_combiner_equation(int32_t a,int32_t b,int32_t c,int32_t d)437 int32_t n64_rdp::alpha_combiner_equation(int32_t a, int32_t b, int32_t c, int32_t d)
438 {
439 a = KURT_AKELEY_SIGN9(a);
440 b = KURT_AKELEY_SIGN9(b);
441 c = SIGN9(c);
442 d = KURT_AKELEY_SIGN9(d);
443 a = (((a - b) * c) + (d << 8) + 0x80) >> 8;
444 a = SIGN9(a);
445 a = s_special_9bit_clamptable[a & 0x1ff];
446 return a;
447 }
448
set_suba_input_rgb(color_t ** input,int32_t code,rdp_span_aux * userdata)449 void n64_rdp::set_suba_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata)
450 {
451 switch (code & 0xf)
452 {
453 case 0: *input = &userdata->m_combined_color; break;
454 case 1: *input = &userdata->m_texel0_color; break;
455 case 2: *input = &userdata->m_texel1_color; break;
456 case 3: *input = &userdata->m_prim_color; break;
457 case 4: *input = &userdata->m_shade_color; break;
458 case 5: *input = &userdata->m_env_color; break;
459 case 6: *input = &m_one; break;
460 case 7: *input = &userdata->m_noise_color; break;
461 case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
462 {
463 *input = &m_zero; break;
464 }
465 }
466 }
467
set_subb_input_rgb(color_t ** input,int32_t code,rdp_span_aux * userdata)468 void n64_rdp::set_subb_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata)
469 {
470 switch (code & 0xf)
471 {
472 case 0: *input = &userdata->m_combined_color; break;
473 case 1: *input = &userdata->m_texel0_color; break;
474 case 2: *input = &userdata->m_texel1_color; break;
475 case 3: *input = &userdata->m_prim_color; break;
476 case 4: *input = &userdata->m_shade_color; break;
477 case 5: *input = &userdata->m_env_color; break;
478 case 6: fatalerror("SET_SUBB_RGB_INPUT: key_center\n");
479 case 7: *input = &userdata->m_k4; break;
480 case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
481 {
482 *input = &m_zero; break;
483 }
484 }
485 }
486
set_mul_input_rgb(color_t ** input,int32_t code,rdp_span_aux * userdata)487 void n64_rdp::set_mul_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata)
488 {
489 switch (code & 0x1f)
490 {
491 case 0: *input = &userdata->m_combined_color; break;
492 case 1: *input = &userdata->m_texel0_color; break;
493 case 2: *input = &userdata->m_texel1_color; break;
494 case 3: *input = &userdata->m_prim_color; break;
495 case 4: *input = &userdata->m_shade_color; break;
496 case 5: *input = &userdata->m_env_color; break;
497 case 6: *input = &userdata->m_key_scale; break;
498 case 7: *input = &userdata->m_combined_alpha; break;
499 case 8: *input = &userdata->m_texel0_alpha; break;
500 case 9: *input = &userdata->m_texel1_alpha; break;
501 case 10: *input = &userdata->m_prim_alpha; break;
502 case 11: *input = &userdata->m_shade_alpha; break;
503 case 12: *input = &userdata->m_env_alpha; break;
504 case 13: *input = &userdata->m_lod_fraction; break;
505 case 14: *input = &userdata->m_prim_lod_fraction; break;
506 case 15: *input = &userdata->m_k5; break;
507 case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23:
508 case 24: case 25: case 26: case 27: case 28: case 29: case 30: case 31:
509 {
510 *input = &m_zero; break;
511 }
512 }
513 }
514
set_add_input_rgb(color_t ** input,int32_t code,rdp_span_aux * userdata)515 void n64_rdp::set_add_input_rgb(color_t** input, int32_t code, rdp_span_aux* userdata)
516 {
517 switch (code & 0x7)
518 {
519 case 0: *input = &userdata->m_combined_color; break;
520 case 1: *input = &userdata->m_texel0_color; break;
521 case 2: *input = &userdata->m_texel1_color; break;
522 case 3: *input = &userdata->m_prim_color; break;
523 case 4: *input = &userdata->m_shade_color; break;
524 case 5: *input = &userdata->m_env_color; break;
525 case 6: *input = &m_one; break;
526 case 7: *input = &m_zero; break;
527 }
528 }
529
set_sub_input_alpha(color_t ** input,int32_t code,rdp_span_aux * userdata)530 void n64_rdp::set_sub_input_alpha(color_t** input, int32_t code, rdp_span_aux* userdata)
531 {
532 switch (code & 0x7)
533 {
534 case 0: *input = &userdata->m_combined_alpha; break;
535 case 1: *input = &userdata->m_texel0_alpha; break;
536 case 2: *input = &userdata->m_texel1_alpha; break;
537 case 3: *input = &userdata->m_prim_alpha; break;
538 case 4: *input = &userdata->m_shade_alpha; break;
539 case 5: *input = &userdata->m_env_alpha; break;
540 case 6: *input = &m_one; break;
541 case 7: *input = &m_zero; break;
542 }
543 }
544
set_mul_input_alpha(color_t ** input,int32_t code,rdp_span_aux * userdata)545 void n64_rdp::set_mul_input_alpha(color_t** input, int32_t code, rdp_span_aux* userdata)
546 {
547 switch (code & 0x7)
548 {
549 case 0: *input = &userdata->m_lod_fraction; break;
550 case 1: *input = &userdata->m_texel0_alpha; break;
551 case 2: *input = &userdata->m_texel1_alpha; break;
552 case 3: *input = &userdata->m_prim_alpha; break;
553 case 4: *input = &userdata->m_shade_alpha; break;
554 case 5: *input = &userdata->m_env_alpha; break;
555 case 6: *input = &userdata->m_prim_lod_fraction; break;
556 case 7: *input = &m_zero; break;
557 }
558 }
559
set_blender_input(int32_t cycle,int32_t which,color_t ** input_rgb,color_t ** input_a,int32_t a,int32_t b,rdp_span_aux * userdata)560 void n64_rdp::set_blender_input(int32_t cycle, int32_t which, color_t** input_rgb, color_t** input_a, int32_t a, int32_t b, rdp_span_aux* userdata)
561 {
562 switch (a & 0x3)
563 {
564 case 0:
565 *input_rgb = cycle == 0 ? &userdata->m_pixel_color : &userdata->m_blended_pixel_color;
566 break;
567
568 case 1:
569 *input_rgb = &userdata->m_memory_color;
570 break;
571
572 case 2:
573 *input_rgb = &userdata->m_blend_color;
574 break;
575
576 case 3:
577 *input_rgb = &userdata->m_fog_color;
578 break;
579 }
580
581 if (which == 0)
582 {
583 switch (b & 0x3)
584 {
585 case 0: *input_a = &userdata->m_pixel_color; break;
586 case 1: *input_a = &userdata->m_fog_color; break;
587 case 2: *input_a = &userdata->m_shade_color; break;
588 case 3: *input_a = &m_zero; break;
589 }
590 }
591 else
592 {
593 switch (b & 0x3)
594 {
595 case 0: *input_a = &userdata->m_inv_pixel_color; break;
596 case 1: *input_a = &userdata->m_memory_color; break;
597 case 2: *input_a = &m_one; break;
598 case 3: *input_a = &m_zero; break;
599 }
600 }
601 }
602
603 uint8_t const n64_rdp::s_bayer_matrix[16] =
604 { /* Bayer matrix */
605 0, 4, 1, 5,
606 6, 2, 7, 3,
607 1, 5, 0, 4,
608 7, 3, 6, 2
609 };
610
611 uint8_t const n64_rdp::s_magic_matrix[16] =
612 { /* Magic square matrix */
613 0, 6, 1, 7,
614 4, 2, 5, 3,
615 3, 5, 2, 4,
616 7, 1, 6, 0
617 };
618
619 z_decompress_entry_t const n64_rdp::m_z_dec_table[8] =
620 {
621 { 6, 0x00000 },
622 { 5, 0x20000 },
623 { 4, 0x30000 },
624 { 3, 0x38000 },
625 { 2, 0x3c000 },
626 { 1, 0x3e000 },
627 { 0, 0x3f000 },
628 { 0, 0x3f800 },
629 };
630
631 /*****************************************************************************/
632
z_build_com_table(void)633 void n64_rdp::z_build_com_table(void)
634 {
635 uint16_t altmem = 0;
636 for(int32_t z = 0; z < 0x40000; z++)
637 {
638 switch((z >> 11) & 0x7f)
639 {
640 case 0x00:
641 case 0x01:
642 case 0x02:
643 case 0x03:
644 case 0x04:
645 case 0x05:
646 case 0x06:
647 case 0x07:
648 case 0x08:
649 case 0x09:
650 case 0x0a:
651 case 0x0b:
652 case 0x0c:
653 case 0x0d:
654 case 0x0e:
655 case 0x0f:
656 case 0x10:
657 case 0x11:
658 case 0x12:
659 case 0x13:
660 case 0x14:
661 case 0x15:
662 case 0x16:
663 case 0x17:
664 case 0x18:
665 case 0x19:
666 case 0x1a:
667 case 0x1b:
668 case 0x1c:
669 case 0x1d:
670 case 0x1e:
671 case 0x1f:
672 case 0x20:
673 case 0x21:
674 case 0x22:
675 case 0x23:
676 case 0x24:
677 case 0x25:
678 case 0x26:
679 case 0x27:
680 case 0x28:
681 case 0x29:
682 case 0x2a:
683 case 0x2b:
684 case 0x2c:
685 case 0x2d:
686 case 0x2e:
687 case 0x2f:
688 case 0x30:
689 case 0x31:
690 case 0x32:
691 case 0x33:
692 case 0x34:
693 case 0x35:
694 case 0x36:
695 case 0x37:
696 case 0x38:
697 case 0x39:
698 case 0x3a:
699 case 0x3b:
700 case 0x3c:
701 case 0x3d:
702 case 0x3e:
703 case 0x3f:
704 altmem = (z >> 4) & 0x1ffc;
705 break;
706 case 0x40:
707 case 0x41:
708 case 0x42:
709 case 0x43:
710 case 0x44:
711 case 0x45:
712 case 0x46:
713 case 0x47:
714 case 0x48:
715 case 0x49:
716 case 0x4a:
717 case 0x4b:
718 case 0x4c:
719 case 0x4d:
720 case 0x4e:
721 case 0x4f:
722 case 0x50:
723 case 0x51:
724 case 0x52:
725 case 0x53:
726 case 0x54:
727 case 0x55:
728 case 0x56:
729 case 0x57:
730 case 0x58:
731 case 0x59:
732 case 0x5a:
733 case 0x5b:
734 case 0x5c:
735 case 0x5d:
736 case 0x5e:
737 case 0x5f:
738 altmem = ((z >> 3) & 0x1ffc) | 0x2000;
739 break;
740 case 0x60:
741 case 0x61:
742 case 0x62:
743 case 0x63:
744 case 0x64:
745 case 0x65:
746 case 0x66:
747 case 0x67:
748 case 0x68:
749 case 0x69:
750 case 0x6a:
751 case 0x6b:
752 case 0x6c:
753 case 0x6d:
754 case 0x6e:
755 case 0x6f:
756 altmem = ((z >> 2) & 0x1ffc) | 0x4000;
757 break;
758 case 0x70:
759 case 0x71:
760 case 0x72:
761 case 0x73:
762 case 0x74:
763 case 0x75:
764 case 0x76:
765 case 0x77:
766 altmem = ((z >> 1) & 0x1ffc) | 0x6000;
767 break;
768 case 0x78://uncompressed z = 0x3c000
769 case 0x79:
770 case 0x7a:
771 case 0x7b:
772 altmem = (z & 0x1ffc) | 0x8000;
773 break;
774 case 0x7c://uncompressed z = 0x3e000
775 case 0x7d:
776 altmem = ((z << 1) & 0x1ffc) | 0xa000;
777 break;
778 case 0x7e://uncompressed z = 0x3f000
779 altmem = ((z << 2) & 0x1ffc) | 0xc000;
780 break;
781 case 0x7f://uncompressed z = 0x3f000
782 altmem = ((z << 2) & 0x1ffc) | 0xe000;
783 break;
784 }
785
786 m_z_com_table[z] = altmem;
787
788 }
789 }
790
precalc_cvmask_derivatives(void)791 void n64_rdp::precalc_cvmask_derivatives(void)
792 {
793 const uint8_t yarray[16] = {0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0};
794 const uint8_t xarray[16] = {0, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0};
795
796 for (int32_t i = 0; i < 0x10000; i++)
797 {
798 m_compressed_cvmasks[i] = (i & 1) | ((i & 4) >> 1) | ((i & 0x20) >> 3) | ((i & 0x80) >> 4) |
799 ((i & 0x100) >> 4) | ((i & 0x400) >> 5) | ((i & 0x2000) >> 7) | ((i & 0x8000) >> 8);
800 }
801
802 for (int32_t i = 0; i < 0x100; i++)
803 {
804 uint16_t mask = decompress_cvmask_frombyte(i);
805 cvarray[i].cvg = cvarray[i].cvbit = 0;
806 cvarray[i].cvbit = (i >> 7) & 1;
807 for (int32_t k = 0; k < 8; k++)
808 {
809 cvarray[i].cvg += ((i >> k) & 1);
810 }
811
812 uint16_t masky = 0;
813 for (int32_t k = 0; k < 4; k++)
814 {
815 masky |= ((mask & (0xf000 >> (k << 2))) > 0) << k;
816 }
817 uint8_t offy = yarray[masky];
818
819 uint16_t maskx = (mask & (0xf000 >> (offy << 2))) >> ((offy ^ 3) << 2);
820 uint8_t offx = xarray[maskx];
821
822 cvarray[i].xoff = offx;
823 cvarray[i].yoff = offy;
824 }
825 }
826
decompress_cvmask_frombyte(uint8_t x)827 uint16_t n64_rdp::decompress_cvmask_frombyte(uint8_t x)
828 {
829 uint16_t y = (x & 1) | ((x & 2) << 1) | ((x & 4) << 3) | ((x & 8) << 4) |
830 ((x & 0x10) << 4) | ((x & 0x20) << 5) | ((x & 0x40) << 7) | ((x & 0x80) << 8);
831 return y;
832 }
833
lookup_cvmask_derivatives(uint32_t mask,uint8_t * offx,uint8_t * offy,rdp_span_aux * userdata)834 void n64_rdp::lookup_cvmask_derivatives(uint32_t mask, uint8_t* offx, uint8_t* offy, rdp_span_aux* userdata)
835 {
836 const uint32_t index = m_compressed_cvmasks[mask];
837 userdata->m_current_pix_cvg = cvarray[index].cvg;
838 userdata->m_current_cvg_bit = cvarray[index].cvbit;
839 *offx = cvarray[index].xoff;
840 *offy = cvarray[index].yoff;
841 }
842
z_store(const rdp_poly_state & object,uint32_t zcurpixel,uint32_t dzcurpixel,uint32_t z,uint32_t enc)843 void n64_rdp::z_store(const rdp_poly_state &object, uint32_t zcurpixel, uint32_t dzcurpixel, uint32_t z, uint32_t enc)
844 {
845 uint16_t zval = m_z_com_table[z & 0x3ffff]|(enc >> 2);
846 if(zcurpixel <= MEM16_LIMIT)
847 {
848 ((uint16_t*)m_rdram)[zcurpixel ^ WORD_ADDR_XOR] = zval;
849 }
850 if(dzcurpixel <= MEM8_LIMIT)
851 {
852 m_hidden_bits[dzcurpixel ^ BYTE_ADDR_XOR] = enc & 3;
853 }
854 }
855
normalize_dzpix(int32_t sum)856 int32_t n64_rdp::normalize_dzpix(int32_t sum)
857 {
858 if (sum & 0xc000)
859 {
860 return 0x8000;
861 }
862 if (!(sum & 0xffff))
863 {
864 return 1;
865 }
866 for(int32_t count = 0x2000; count > 0; count >>= 1)
867 {
868 if (sum & count)
869 {
870 return(count << 1);
871 }
872 }
873 return 0;
874 }
875
z_decompress(uint32_t zcurpixel)876 uint32_t n64_rdp::z_decompress(uint32_t zcurpixel)
877 {
878 return m_z_complete_dec_table[(RREADIDX16(zcurpixel) >> 2) & 0x3fff];
879 }
880
dz_decompress(uint32_t zcurpixel,uint32_t dzcurpixel)881 uint32_t n64_rdp::dz_decompress(uint32_t zcurpixel, uint32_t dzcurpixel)
882 {
883 const uint16_t zval = RREADIDX16(zcurpixel);
884 const uint8_t dzval = (((dzcurpixel) <= 0x7fffff) ? (m_hidden_bits[(dzcurpixel) ^ BYTE_ADDR_XOR]) : 0);
885 const uint32_t dz_compressed = ((zval & 3) << 2) | (dzval & 3);
886 return (1 << dz_compressed);
887 }
888
dz_compress(uint32_t value)889 uint32_t n64_rdp::dz_compress(uint32_t value)
890 {
891 int32_t j = 0;
892 for (; value > 1; j++, value >>= 1);
893 return j;
894 }
895
get_dither_values(int32_t x,int32_t y,int32_t * cdith,int32_t * adith,const rdp_poly_state & object)896 void n64_rdp::get_dither_values(int32_t x, int32_t y, int32_t* cdith, int32_t* adith, const rdp_poly_state& object)
897 {
898 const int32_t dithindex = ((y & 3) << 2) | (x & 3);
899 switch((object.m_other_modes.rgb_dither_sel << 2) | object.m_other_modes.alpha_dither_sel)
900 {
901 case 0:
902 *adith = *cdith = s_magic_matrix[dithindex];
903 break;
904 case 1:
905 *cdith = s_magic_matrix[dithindex];
906 *adith = (~(*cdith)) & 7;
907 break;
908 case 2:
909 *cdith = s_magic_matrix[dithindex];
910 *adith = machine().rand() & 7;
911 break;
912 case 3:
913 *cdith = s_magic_matrix[dithindex];
914 *adith = 0;
915 break;
916 case 4:
917 *adith = *cdith = s_bayer_matrix[dithindex];
918 break;
919 case 5:
920 *cdith = s_bayer_matrix[dithindex];
921 *adith = (~(*cdith)) & 7;
922 break;
923 case 6:
924 *cdith = s_bayer_matrix[dithindex];
925 *adith = machine().rand() & 7;
926 break;
927 case 7:
928 *cdith = s_bayer_matrix[dithindex];
929 *adith = 0;
930 break;
931 case 8:
932 *cdith = machine().rand() & 7;
933 *adith = s_magic_matrix[dithindex];
934 break;
935 case 9:
936 *cdith = machine().rand() & 7;
937 *adith = (~s_magic_matrix[dithindex]) & 7;
938 break;
939 case 10:
940 *cdith = machine().rand() & 7;
941 *adith = (*cdith + 17) & 7;
942 break;
943 case 11:
944 *cdith = machine().rand() & 7;
945 *adith = 0;
946 break;
947 case 12:
948 *cdith = 0;
949 *adith = s_bayer_matrix[dithindex];
950 break;
951 case 13:
952 *cdith = 0;
953 *adith = (~s_bayer_matrix[dithindex]) & 7;
954 break;
955 case 14:
956 *cdith = 0;
957 *adith = machine().rand() & 7;
958 break;
959 case 15:
960 *adith = *cdith = 0;
961 break;
962 }
963 }
964
CLAMP(int32_t in,int32_t min,int32_t max)965 int32_t CLAMP(int32_t in, int32_t min, int32_t max)
966 {
967 if(in < min) return min;
968 if(in > max) return max;
969 return in;
970 }
971
z_compare(uint32_t zcurpixel,uint32_t dzcurpixel,uint32_t sz,uint16_t dzpix,rdp_span_aux * userdata,const rdp_poly_state & object)972 bool n64_rdp::z_compare(uint32_t zcurpixel, uint32_t dzcurpixel, uint32_t sz, uint16_t dzpix, rdp_span_aux* userdata, const rdp_poly_state &object)
973 {
974 bool force_coplanar = false;
975 sz &= 0x3ffff;
976
977 uint32_t oz;
978 uint32_t dzmem;
979 uint32_t zval;
980 int32_t rawdzmem;
981
982 if (object.m_other_modes.z_compare_en)
983 {
984 oz = z_decompress(zcurpixel);
985 dzmem = dz_decompress(zcurpixel, dzcurpixel);
986 zval = RREADIDX16(zcurpixel);
987 rawdzmem = ((zval & 3) << 2) | ((((dzcurpixel) <= 0x3fffff) ? (m_hidden_bits[(dzcurpixel) ^ BYTE_ADDR_XOR]) : 0) & 3);
988 }
989 else
990 {
991 oz = 0;
992 dzmem = 1 << 0xf;
993 zval = 0x3;
994 rawdzmem = 0xf;
995 }
996
997 userdata->m_dzpix_enc = dz_compress(dzpix & 0xffff);
998 userdata->m_shift_a = CLAMP(userdata->m_dzpix_enc - rawdzmem, 0, 4);
999 userdata->m_shift_b = CLAMP(rawdzmem - userdata->m_dzpix_enc, 0, 4);
1000
1001 int32_t precision_factor = (zval >> 13) & 0xf;
1002 if (precision_factor < 3)
1003 {
1004 int32_t dzmemmodifier = 16 >> precision_factor;
1005 if (dzmem == 0x8000)
1006 {
1007 force_coplanar = true;
1008 }
1009 dzmem <<= 1;
1010 if (dzmem <= dzmemmodifier)
1011 {
1012 dzmem = dzmemmodifier;
1013 }
1014 if (!dzmem)
1015 {
1016 dzmem = 0xffff;
1017 }
1018 }
1019 if (dzmem > 0x8000)
1020 {
1021 dzmem = 0xffff;
1022 }
1023
1024 uint32_t dznew = (dzmem > dzpix) ? dzmem : (uint32_t)dzpix;
1025 uint32_t dznotshift = dznew;
1026 dznew <<= 3;
1027
1028 bool farther = (sz + dznew) >= oz;
1029 bool infront = sz < oz;
1030
1031 if (force_coplanar)
1032 {
1033 farther = true;
1034 }
1035
1036 bool overflow = ((userdata->m_current_mem_cvg + userdata->m_current_pix_cvg) & 8) > 0;
1037 userdata->m_blend_enable = (object.m_other_modes.force_blend || (!overflow && object.m_other_modes.antialias_en && farther)) ? 1 : 0;
1038 userdata->m_pre_wrap = overflow;
1039
1040 int32_t cvgcoeff = 0;
1041 uint32_t dzenc = 0;
1042
1043 if (object.m_other_modes.z_mode == 1 && infront && farther && overflow)
1044 {
1045 dzenc = dz_compress(dznotshift & 0xffff);
1046 cvgcoeff = ((oz >> dzenc) - (sz >> dzenc)) & 0xf;
1047 userdata->m_current_pix_cvg = ((cvgcoeff * userdata->m_current_pix_cvg) >> 3) & 0xf;
1048 }
1049
1050 if (!object.m_other_modes.z_compare_en)
1051 {
1052 return true;
1053 }
1054
1055 int32_t diff = (int32_t)sz - (int32_t)dznew;
1056 bool nearer = diff <= (int32_t)oz;
1057 bool max = (oz == 0x3ffff);
1058 if (force_coplanar)
1059 {
1060 nearer = true;
1061 }
1062
1063 switch(object.m_other_modes.z_mode)
1064 {
1065 case 0:
1066 return (max || (overflow ? infront : nearer));
1067 case 1:
1068 return (max || (overflow ? infront : nearer));
1069 case 2:
1070 return (infront || max);
1071 case 3:
1072 return (farther && nearer && !max);
1073 }
1074
1075 return false;
1076 }
1077
get_log2(uint32_t lod_clamp)1078 uint32_t n64_rdp::get_log2(uint32_t lod_clamp)
1079 {
1080 if (lod_clamp < 2)
1081 {
1082 return 0;
1083 }
1084 else
1085 {
1086 for (int32_t i = 7; i > 0; i--)
1087 {
1088 if ((lod_clamp >> i) & 1)
1089 {
1090 return i;
1091 }
1092 }
1093 }
1094
1095 return 0;
1096 }
1097
1098 /*****************************************************************************/
1099
read_data(uint32_t address)1100 uint64_t n64_rdp::read_data(uint32_t address)
1101 {
1102 if (m_status & 0x1) // XBUS_DMEM_DMA enabled
1103 {
1104 return (uint64_t(m_dmem[(address & 0xfff) / 4]) << 32) | m_dmem[((address + 4) & 0xfff) / 4];
1105 }
1106 else
1107 {
1108 return (uint64_t(m_rdram[((address & 0xffffff) / 4)]) << 32) | m_rdram[(((address + 4) & 0xffffff) / 4)];
1109 }
1110 }
1111
1112 char const *const n64_rdp::s_image_format[] = { "RGBA", "YUV", "CI", "IA", "I", "???", "???", "???" };
1113 char const *const n64_rdp::s_image_size[] = { "4-bit", "8-bit", "16-bit", "32-bit" };
1114
1115 int32_t const n64_rdp::s_rdp_command_length[64] =
1116 {
1117 8, // 0x00, No Op
1118 8, // 0x01, ???
1119 8, // 0x02, ???
1120 8, // 0x03, ???
1121 8, // 0x04, ???
1122 8, // 0x05, ???
1123 8, // 0x06, ???
1124 8, // 0x07, ???
1125 32, // 0x08, Non-Shaded Triangle
1126 32+16, // 0x09, Non-Shaded, Z-Buffered Triangle
1127 32+64, // 0x0a, Textured Triangle
1128 32+64+16, // 0x0b, Textured, Z-Buffered Triangle
1129 32+64, // 0x0c, Shaded Triangle
1130 32+64+16, // 0x0d, Shaded, Z-Buffered Triangle
1131 32+64+64, // 0x0e, Shaded+Textured Triangle
1132 32+64+64+16,// 0x0f, Shaded+Textured, Z-Buffered Triangle
1133 8, // 0x10, ???
1134 8, // 0x11, ???
1135 8, // 0x12, ???
1136 8, // 0x13, ???
1137 8, // 0x14, ???
1138 8, // 0x15, ???
1139 8, // 0x16, ???
1140 8, // 0x17, ???
1141 8, // 0x18, ???
1142 8, // 0x19, ???
1143 8, // 0x1a, ???
1144 8, // 0x1b, ???
1145 8, // 0x1c, ???
1146 8, // 0x1d, ???
1147 8, // 0x1e, ???
1148 8, // 0x1f, ???
1149 8, // 0x20, ???
1150 8, // 0x21, ???
1151 8, // 0x22, ???
1152 8, // 0x23, ???
1153 16, // 0x24, Texture_Rectangle
1154 16, // 0x25, Texture_Rectangle_Flip
1155 8, // 0x26, Sync_Load
1156 8, // 0x27, Sync_Pipe
1157 8, // 0x28, Sync_Tile
1158 8, // 0x29, Sync_Full
1159 8, // 0x2a, Set_Key_GB
1160 8, // 0x2b, Set_Key_R
1161 8, // 0x2c, Set_Convert
1162 8, // 0x2d, Set_Scissor
1163 8, // 0x2e, Set_Prim_Depth
1164 8, // 0x2f, Set_Other_Modes
1165 8, // 0x30, Load_TLUT
1166 8, // 0x31, ???
1167 8, // 0x32, Set_Tile_Size
1168 8, // 0x33, Load_Block
1169 8, // 0x34, Load_Tile
1170 8, // 0x35, Set_Tile
1171 8, // 0x36, Fill_Rectangle
1172 8, // 0x37, Set_Fill_Color
1173 8, // 0x38, Set_Fog_Color
1174 8, // 0x39, Set_Blend_Color
1175 8, // 0x3a, Set_Prim_Color
1176 8, // 0x3b, Set_Env_Color
1177 8, // 0x3c, Set_Combine
1178 8, // 0x3d, Set_Texture_Image
1179 8, // 0x3e, Set_Mask_Image
1180 8 // 0x3f, Set_Color_Image
1181 };
1182
disassemble(char * buffer)1183 void n64_rdp::disassemble(char* buffer)
1184 {
1185 char sl[32], tl[32], sh[32], th[32];
1186 char s[32], t[32], w[32];
1187 char dsdx[32], dtdx[32], dwdx[32];
1188 char dsdy[32], dtdy[32], dwdy[32];
1189 char dsde[32], dtde[32], dwde[32];
1190 char yl[32], yh[32], ym[32], xl[32], xh[32], xm[32];
1191 char dxldy[32], dxhdy[32], dxmdy[32];
1192 char rt[32], gt[32], bt[32], at[32];
1193 char drdx[32], dgdx[32], dbdx[32], dadx[32];
1194 char drdy[32], dgdy[32], dbdy[32], dady[32];
1195 char drde[32], dgde[32], dbde[32], dade[32];
1196
1197 uint64_t cmd[32];
1198
1199 const uint32_t length = m_cmd_ptr * 8;
1200 if (length < 8)
1201 {
1202 sprintf(buffer, "ERROR: length = %d\n", length);
1203 return;
1204 }
1205
1206 cmd[0] = m_cmd_data[m_cmd_cur];
1207
1208 const int32_t tile = (cmd[0] >> 56) & 0x7;
1209 sprintf(sl, "%4.2f", (float)((cmd[0] >> 44) & 0xfff) / 4.0f);
1210 sprintf(tl, "%4.2f", (float)((cmd[0] >> 32) & 0xfff) / 4.0f);
1211 sprintf(sh, "%4.2f", (float)((cmd[0] >> 12) & 0xfff) / 4.0f);
1212 sprintf(th, "%4.2f", (float)((cmd[0] >> 0) & 0xfff) / 4.0f);
1213
1214 const char* format = s_image_format[(cmd[0] >> 53) & 0x7];
1215 const char* size = s_image_size[(cmd[0] >> 51) & 0x3];
1216
1217 const uint32_t r = (cmd[0] >> 24) & 0xff;
1218 const uint32_t g = (cmd[0] >> 16) & 0xff;
1219 const uint32_t b = (cmd[0] >> 8) & 0xff;
1220 const uint32_t a = (cmd[0] >> 0) & 0xff;
1221
1222 const uint32_t command = (cmd[0] >> 56) & 0x3f;
1223 switch (command)
1224 {
1225 case 0x00: sprintf(buffer, "No Op"); break;
1226 case 0x08: // Tri_NoShade
1227 {
1228 const int32_t lft = (cmd[0] >> 55) & 0x1;
1229
1230 if (length != s_rdp_command_length[command])
1231 {
1232 sprintf(buffer, "ERROR: Tri_NoShade length = %d\n", length);
1233 return;
1234 }
1235
1236 cmd[1] = m_cmd_data[m_cmd_cur+1];
1237 cmd[2] = m_cmd_data[m_cmd_cur+2];
1238 cmd[3] = m_cmd_data[m_cmd_cur+3];
1239
1240 sprintf(yl, "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1241 sprintf(ym, "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1242 sprintf(yh, "%4.4f", (float)((cmd[0] >> 0) & 0x1fff) / 4.0f);
1243 sprintf(xl, "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1244 sprintf(dxldy, "%4.4f", (float)int32_t(cmd[1]) / 65536.0f);
1245 sprintf(xh, "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1246 sprintf(dxhdy, "%4.4f", (float)int32_t(cmd[2]) / 65536.0f);
1247 sprintf(xm, "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1248 sprintf(dxmdy, "%4.4f", (float)int32_t(cmd[3]) / 65536.0f);
1249
1250 sprintf(buffer, "Tri_NoShade %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1251 break;
1252 }
1253 case 0x09: // Tri_NoShadeZ
1254 {
1255 const int32_t lft = (cmd[0] >> 55) & 0x1;
1256
1257 if (length != s_rdp_command_length[command])
1258 {
1259 sprintf(buffer, "ERROR: Tri_NoShadeZ length = %d\n", length);
1260 return;
1261 }
1262
1263 cmd[1] = m_cmd_data[m_cmd_cur+1];
1264 cmd[2] = m_cmd_data[m_cmd_cur+2];
1265 cmd[3] = m_cmd_data[m_cmd_cur+3];
1266
1267 sprintf(yl, "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1268 sprintf(ym, "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1269 sprintf(yh, "%4.4f", (float)((cmd[0] >> 0) & 0x1fff) / 4.0f);
1270 sprintf(xl, "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1271 sprintf(dxldy, "%4.4f", (float)int32_t(cmd[1]) / 65536.0f);
1272 sprintf(xh, "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1273 sprintf(dxhdy, "%4.4f", (float)int32_t(cmd[2]) / 65536.0f);
1274 sprintf(xm, "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1275 sprintf(dxmdy, "%4.4f", (float)int32_t(cmd[3]) / 65536.0f);
1276
1277 sprintf(buffer, "Tri_NoShadeZ %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1278 break;
1279 }
1280 case 0x0a: // Tri_Tex
1281 {
1282 const int32_t lft = (cmd[0] >> 55) & 0x1;
1283
1284 if (length < s_rdp_command_length[command])
1285 {
1286 sprintf(buffer, "ERROR: Tri_Tex length = %d\n", length);
1287 return;
1288 }
1289
1290 for (int32_t i = 1; i < 12; i++)
1291 {
1292 cmd[i] = m_cmd_data[m_cmd_cur+i];
1293 }
1294
1295 sprintf(yl, "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1296 sprintf(ym, "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1297 sprintf(yh, "%4.4f", (float)((cmd[0] >> 0) & 0x1fff) / 4.0f);
1298 sprintf(xl, "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1299 sprintf(dxldy, "%4.4f", (float)int32_t(cmd[1]) / 65536.0f);
1300 sprintf(xh, "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1301 sprintf(dxhdy, "%4.4f", (float)int32_t(cmd[2]) / 65536.0f);
1302 sprintf(xm, "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1303 sprintf(dxmdy, "%4.4f", (float)int32_t(cmd[3]) / 65536.0f);
1304
1305 sprintf(s, "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000) | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1306 sprintf(t, "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1307 sprintf(w, "%4.4f", (float)int32_t( (cmd[4] & 0xffff0000) | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1308 sprintf(dsdx, "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000) | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1309 sprintf(dtdx, "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1310 sprintf(dwdx, "%4.4f", (float)int32_t( (cmd[5] & 0xffff0000) | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1311 sprintf(dsde, "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000) | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1312 sprintf(dtde, "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1313 sprintf(dwde, "%4.4f", (float)int32_t( (cmd[8] & 0xffff0000) | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1314 sprintf(dsdy, "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000) | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1315 sprintf(dtdy, "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1316 sprintf(dwdy, "%4.4f", (float)int32_t( (cmd[9] & 0xffff0000) | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1317
1318 buffer+=sprintf(buffer, "Tri_Tex %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1319 buffer+=sprintf(buffer, " ");
1320 buffer+=sprintf(buffer, " S: %s, T: %s, W: %s\n", s, t, w);
1321 buffer+=sprintf(buffer, " ");
1322 buffer+=sprintf(buffer, " DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx);
1323 buffer+=sprintf(buffer, " ");
1324 buffer+=sprintf(buffer, " DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde);
1325 buffer+=sprintf(buffer, " ");
1326 buffer+=sprintf(buffer, " DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy);
1327 break;
1328 }
1329 case 0x0b: // Tri_TexZ
1330 {
1331 const int32_t lft = (cmd[0] >> 55) & 0x1;
1332
1333 if (length < s_rdp_command_length[command])
1334 {
1335 sprintf(buffer, "ERROR: Tri_TexZ length = %d\n", length);
1336 return;
1337 }
1338
1339 for (int32_t i = 1; i < 12; i++)
1340 {
1341 cmd[i] = m_cmd_data[m_cmd_cur+i];
1342 }
1343
1344 sprintf(yl, "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1345 sprintf(ym, "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1346 sprintf(yh, "%4.4f", (float)((cmd[0] >> 0) & 0x1fff) / 4.0f);
1347 sprintf(xl, "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1348 sprintf(dxldy, "%4.4f", (float)int32_t(cmd[1]) / 65536.0f);
1349 sprintf(xh, "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1350 sprintf(dxhdy, "%4.4f", (float)int32_t(cmd[2]) / 65536.0f);
1351 sprintf(xm, "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1352 sprintf(dxmdy, "%4.4f", (float)int32_t(cmd[3]) / 65536.0f);
1353
1354 sprintf(s, "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000) | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1355 sprintf(t, "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1356 sprintf(w, "%4.4f", (float)int32_t( (cmd[4] & 0xffff0000) | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1357 sprintf(dsdx, "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000) | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1358 sprintf(dtdx, "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1359 sprintf(dwdx, "%4.4f", (float)int32_t( (cmd[5] & 0xffff0000) | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1360 sprintf(dsde, "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000) | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1361 sprintf(dtde, "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1362 sprintf(dwde, "%4.4f", (float)int32_t( (cmd[8] & 0xffff0000) | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1363 sprintf(dsdy, "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000) | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1364 sprintf(dtdy, "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1365 sprintf(dwdy, "%4.4f", (float)int32_t( (cmd[9] & 0xffff0000) | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1366
1367 buffer+=sprintf(buffer, "Tri_TexZ %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1368 buffer+=sprintf(buffer, " ");
1369 buffer+=sprintf(buffer, " S: %s, T: %s, W: %s\n", s, t, w);
1370 buffer+=sprintf(buffer, " ");
1371 buffer+=sprintf(buffer, " DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx);
1372 buffer+=sprintf(buffer, " ");
1373 buffer+=sprintf(buffer, " DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde);
1374 buffer+=sprintf(buffer, " ");
1375 buffer+=sprintf(buffer, " DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy);
1376 break;
1377 }
1378 case 0x0c: // Tri_Shade
1379 {
1380 const int32_t lft = (command >> 23) & 0x1;
1381
1382 if (length != s_rdp_command_length[command])
1383 {
1384 sprintf(buffer, "ERROR: Tri_Shade length = %d\n", length);
1385 return;
1386 }
1387
1388 for (int32_t i = 1; i < 12; i++)
1389 {
1390 cmd[i] = m_cmd_data[i];
1391 }
1392
1393 sprintf(yl, "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1394 sprintf(ym, "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1395 sprintf(yh, "%4.4f", (float)((cmd[0] >> 0) & 0x1fff) / 4.0f);
1396 sprintf(xl, "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1397 sprintf(dxldy, "%4.4f", (float)int32_t(cmd[1]) / 65536.0f);
1398 sprintf(xh, "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1399 sprintf(dxhdy, "%4.4f", (float)int32_t(cmd[2]) / 65536.0f);
1400 sprintf(xm, "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1401 sprintf(dxmdy, "%4.4f", (float)int32_t(cmd[3]) / 65536.0f);
1402
1403 sprintf(rt, "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000) | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1404 sprintf(gt, "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1405 sprintf(bt, "%4.4f", (float)int32_t( (cmd[4] & 0xffff0000) | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1406 sprintf(at, "%4.4f", (float)int32_t( ((cmd[4] & 0x0000ffff) << 16) | ( cmd[ 6] & 0xffff)) / 65536.0f);
1407 sprintf(drdx, "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000) | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1408 sprintf(dgdx, "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1409 sprintf(dbdx, "%4.4f", (float)int32_t( (cmd[5] & 0xffff0000) | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1410 sprintf(dadx, "%4.4f", (float)int32_t( ((cmd[5] & 0x0000ffff) << 16) | ( cmd[ 7] & 0xffff)) / 65536.0f);
1411 sprintf(drde, "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000) | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1412 sprintf(dgde, "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1413 sprintf(dbde, "%4.4f", (float)int32_t( (cmd[8] & 0xffff0000) | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1414 sprintf(dade, "%4.4f", (float)int32_t( ((cmd[8] & 0x0000ffff) << 16) | ( cmd[10] & 0xffff)) / 65536.0f);
1415 sprintf(drdy, "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000) | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1416 sprintf(dgdy, "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1417 sprintf(dbdy, "%4.4f", (float)int32_t( (cmd[9] & 0xffff0000) | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1418 sprintf(dady, "%4.4f", (float)int32_t( ((cmd[9] & 0x0000ffff) << 16) | ( cmd[11] & 0xffff)) / 65536.0f);
1419
1420 buffer+=sprintf(buffer, "Tri_Shade %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1421 buffer+=sprintf(buffer, " ");
1422 buffer+=sprintf(buffer, " R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at);
1423 buffer+=sprintf(buffer, " ");
1424 buffer+=sprintf(buffer, " DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx);
1425 buffer+=sprintf(buffer, " ");
1426 buffer+=sprintf(buffer, " DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade);
1427 buffer+=sprintf(buffer, " ");
1428 buffer+=sprintf(buffer, " DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady);
1429 break;
1430 }
1431 case 0x0d: // Tri_ShadeZ
1432 {
1433 const int32_t lft = (command >> 23) & 0x1;
1434
1435 if (length != s_rdp_command_length[command])
1436 {
1437 sprintf(buffer, "ERROR: Tri_ShadeZ length = %d\n", length);
1438 return;
1439 }
1440
1441 for (int32_t i = 1; i < 12; i++)
1442 {
1443 cmd[i] = m_cmd_data[i];
1444 }
1445
1446 sprintf(yl, "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1447 sprintf(ym, "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1448 sprintf(yh, "%4.4f", (float)((cmd[0] >> 0) & 0x1fff) / 4.0f);
1449 sprintf(xl, "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1450 sprintf(dxldy, "%4.4f", (float)int32_t(cmd[1]) / 65536.0f);
1451 sprintf(xh, "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1452 sprintf(dxhdy, "%4.4f", (float)int32_t(cmd[2]) / 65536.0f);
1453 sprintf(xm, "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1454 sprintf(dxmdy, "%4.4f", (float)int32_t(cmd[3]) / 65536.0f);
1455
1456 sprintf(rt, "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000) | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1457 sprintf(gt, "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1458 sprintf(bt, "%4.4f", (float)int32_t( (cmd[4] & 0xffff0000) | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1459 sprintf(at, "%4.4f", (float)int32_t( ((cmd[4] & 0x0000ffff) << 16) | ( cmd[ 6] & 0xffff)) / 65536.0f);
1460 sprintf(drdx, "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000) | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1461 sprintf(dgdx, "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1462 sprintf(dbdx, "%4.4f", (float)int32_t( (cmd[5] & 0xffff0000) | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1463 sprintf(dadx, "%4.4f", (float)int32_t( ((cmd[5] & 0x0000ffff) << 16) | ( cmd[ 7] & 0xffff)) / 65536.0f);
1464 sprintf(drde, "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000) | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1465 sprintf(dgde, "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1466 sprintf(dbde, "%4.4f", (float)int32_t( (cmd[8] & 0xffff0000) | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1467 sprintf(dade, "%4.4f", (float)int32_t( ((cmd[8] & 0x0000ffff) << 16) | ( cmd[10] & 0xffff)) / 65536.0f);
1468 sprintf(drdy, "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000) | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1469 sprintf(dgdy, "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1470 sprintf(dbdy, "%4.4f", (float)int32_t( (cmd[9] & 0xffff0000) | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1471 sprintf(dady, "%4.4f", (float)int32_t( ((cmd[9] & 0x0000ffff) << 16) | ( cmd[11] & 0xffff)) / 65536.0f);
1472
1473 buffer+=sprintf(buffer, "Tri_ShadeZ %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1474 buffer+=sprintf(buffer, " ");
1475 buffer+=sprintf(buffer, " R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at);
1476 buffer+=sprintf(buffer, " ");
1477 buffer+=sprintf(buffer, " DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx);
1478 buffer+=sprintf(buffer, " ");
1479 buffer+=sprintf(buffer, " DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade);
1480 buffer+=sprintf(buffer, " ");
1481 buffer+=sprintf(buffer, " DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady);
1482 break;
1483 }
1484 case 0x0e: // Tri_TexShade
1485 {
1486 const int32_t lft = (command >> 23) & 0x1;
1487
1488 if (length < s_rdp_command_length[command])
1489 {
1490 sprintf(buffer, "ERROR: Tri_TexShade length = %d\n", length);
1491 return;
1492 }
1493
1494 for (int32_t i = 1; i < 20; i++)
1495 {
1496 cmd[i] = m_cmd_data[m_cmd_cur+i];
1497 }
1498
1499 sprintf(yl, "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1500 sprintf(ym, "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1501 sprintf(yh, "%4.4f", (float)((cmd[0] >> 0) & 0x1fff) / 4.0f);
1502 sprintf(xl, "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1503 sprintf(dxldy, "%4.4f", (float)int32_t(cmd[1]) / 65536.0f);
1504 sprintf(xh, "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1505 sprintf(dxhdy, "%4.4f", (float)int32_t(cmd[2]) / 65536.0f);
1506 sprintf(xm, "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1507 sprintf(dxmdy, "%4.4f", (float)int32_t(cmd[3]) / 65536.0f);
1508
1509 sprintf(rt, "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000) | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1510 sprintf(gt, "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1511 sprintf(bt, "%4.4f", (float)int32_t( (cmd[4] & 0xffff0000) | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1512 sprintf(at, "%4.4f", (float)int32_t( ((cmd[4] & 0x0000ffff) << 16) | ( cmd[ 6] & 0xffff)) / 65536.0f);
1513 sprintf(drdx, "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000) | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1514 sprintf(dgdx, "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1515 sprintf(dbdx, "%4.4f", (float)int32_t( (cmd[5] & 0xffff0000) | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1516 sprintf(dadx, "%4.4f", (float)int32_t( ((cmd[5] & 0x0000ffff) << 16) | ( cmd[ 7] & 0xffff)) / 65536.0f);
1517 sprintf(drde, "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000) | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1518 sprintf(dgde, "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1519 sprintf(dbde, "%4.4f", (float)int32_t( (cmd[8] & 0xffff0000) | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1520 sprintf(dade, "%4.4f", (float)int32_t( ((cmd[8] & 0x0000ffff) << 16) | ( cmd[10] & 0xffff)) / 65536.0f);
1521 sprintf(drdy, "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000) | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1522 sprintf(dgdy, "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1523 sprintf(dbdy, "%4.4f", (float)int32_t( (cmd[9] & 0xffff0000) | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1524 sprintf(dady, "%4.4f", (float)int32_t( ((cmd[9] & 0x0000ffff) << 16) | ( cmd[11] & 0xffff)) / 65536.0f);
1525
1526 sprintf(s, "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000) | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1527 sprintf(t, "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1528 sprintf(w, "%4.4f", (float)int32_t( (cmd[4] & 0xffff0000) | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1529 sprintf(dsdx, "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000) | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1530 sprintf(dtdx, "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1531 sprintf(dwdx, "%4.4f", (float)int32_t( (cmd[5] & 0xffff0000) | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1532 sprintf(dsde, "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000) | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1533 sprintf(dtde, "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1534 sprintf(dwde, "%4.4f", (float)int32_t( (cmd[8] & 0xffff0000) | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1535 sprintf(dsdy, "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000) | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1536 sprintf(dtdy, "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1537 sprintf(dwdy, "%4.4f", (float)int32_t( (cmd[9] & 0xffff0000) | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1538
1539 buffer+=sprintf(buffer, "Tri_TexShade %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1540 buffer+=sprintf(buffer, " ");
1541 buffer+=sprintf(buffer, " R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at);
1542 buffer+=sprintf(buffer, " ");
1543 buffer+=sprintf(buffer, " DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx);
1544 buffer+=sprintf(buffer, " ");
1545 buffer+=sprintf(buffer, " DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade);
1546 buffer+=sprintf(buffer, " ");
1547 buffer+=sprintf(buffer, " DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady);
1548
1549 buffer+=sprintf(buffer, " ");
1550 buffer+=sprintf(buffer, " S: %s, T: %s, W: %s\n", s, t, w);
1551 buffer+=sprintf(buffer, " ");
1552 buffer+=sprintf(buffer, " DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx);
1553 buffer+=sprintf(buffer, " ");
1554 buffer+=sprintf(buffer, " DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde);
1555 buffer+=sprintf(buffer, " ");
1556 buffer+=sprintf(buffer, " DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy);
1557 break;
1558 }
1559 case 0x0f: // Tri_TexShadeZ
1560 {
1561 const int32_t lft = (command >> 23) & 0x1;
1562
1563 if (length < s_rdp_command_length[command])
1564 {
1565 sprintf(buffer, "ERROR: Tri_TexShadeZ length = %d\n", length);
1566 return;
1567 }
1568
1569 for (int32_t i = 1; i < 20; i++)
1570 {
1571 cmd[i] = m_cmd_data[m_cmd_cur+i];
1572 }
1573
1574 sprintf(yl, "%4.4f", (float)((cmd[0] >> 32) & 0x1fff) / 4.0f);
1575 sprintf(ym, "%4.4f", (float)((cmd[0] >> 16) & 0x1fff) / 4.0f);
1576 sprintf(yh, "%4.4f", (float)((cmd[0] >> 0) & 0x1fff) / 4.0f);
1577 sprintf(xl, "%4.4f", (float)int32_t(cmd[1] >> 32) / 65536.0f);
1578 sprintf(dxldy, "%4.4f", (float)int32_t(cmd[1]) / 65536.0f);
1579 sprintf(xh, "%4.4f", (float)int32_t(cmd[2] >> 32) / 65536.0f);
1580 sprintf(dxhdy, "%4.4f", (float)int32_t(cmd[2]) / 65536.0f);
1581 sprintf(xm, "%4.4f", (float)int32_t(cmd[3] >> 32) / 65536.0f);
1582 sprintf(dxmdy, "%4.4f", (float)int32_t(cmd[3]) / 65536.0f);
1583
1584 sprintf(rt, "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000) | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1585 sprintf(gt, "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1586 sprintf(bt, "%4.4f", (float)int32_t( (cmd[4] & 0xffff0000) | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1587 sprintf(at, "%4.4f", (float)int32_t( ((cmd[4] & 0x0000ffff) << 16) | ( cmd[ 6] & 0xffff)) / 65536.0f);
1588 sprintf(drdx, "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000) | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1589 sprintf(dgdx, "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1590 sprintf(dbdx, "%4.4f", (float)int32_t( (cmd[5] & 0xffff0000) | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1591 sprintf(dadx, "%4.4f", (float)int32_t( ((cmd[5] & 0x0000ffff) << 16) | ( cmd[ 7] & 0xffff)) / 65536.0f);
1592 sprintf(drde, "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000) | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1593 sprintf(dgde, "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1594 sprintf(dbde, "%4.4f", (float)int32_t( (cmd[8] & 0xffff0000) | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1595 sprintf(dade, "%4.4f", (float)int32_t( ((cmd[8] & 0x0000ffff) << 16) | ( cmd[10] & 0xffff)) / 65536.0f);
1596 sprintf(drdy, "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000) | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1597 sprintf(dgdy, "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1598 sprintf(dbdy, "%4.4f", (float)int32_t( (cmd[9] & 0xffff0000) | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1599 sprintf(dady, "%4.4f", (float)int32_t( ((cmd[9] & 0x0000ffff) << 16) | ( cmd[11] & 0xffff)) / 65536.0f);
1600
1601 sprintf(s, "%4.4f", (float)int32_t( ((cmd[4] >> 32) & 0xffff0000) | ((cmd[ 6] >> 48) & 0xffff)) / 65536.0f);
1602 sprintf(t, "%4.4f", (float)int32_t((((cmd[4] >> 32) & 0x0000ffff) << 16) | ((cmd[ 6] >> 32) & 0xffff)) / 65536.0f);
1603 sprintf(w, "%4.4f", (float)int32_t( (cmd[4] & 0xffff0000) | ((cmd[ 6] >> 16) & 0xffff)) / 65536.0f);
1604 sprintf(dsdx, "%4.4f", (float)int32_t( ((cmd[5] >> 32) & 0xffff0000) | ((cmd[ 7] >> 48) & 0xffff)) / 65536.0f);
1605 sprintf(dtdx, "%4.4f", (float)int32_t((((cmd[5] >> 32) & 0x0000ffff) << 16) | ((cmd[ 7] >> 32) & 0xffff)) / 65536.0f);
1606 sprintf(dwdx, "%4.4f", (float)int32_t( (cmd[5] & 0xffff0000) | ((cmd[ 7] >> 16) & 0xffff)) / 65536.0f);
1607 sprintf(dsde, "%4.4f", (float)int32_t( ((cmd[8] >> 32) & 0xffff0000) | ((cmd[10] >> 48) & 0xffff)) / 65536.0f);
1608 sprintf(dtde, "%4.4f", (float)int32_t((((cmd[8] >> 32) & 0x0000ffff) << 16) | ((cmd[10] >> 32) & 0xffff)) / 65536.0f);
1609 sprintf(dwde, "%4.4f", (float)int32_t( (cmd[8] & 0xffff0000) | ((cmd[10] >> 16) & 0xffff)) / 65536.0f);
1610 sprintf(dsdy, "%4.4f", (float)int32_t( ((cmd[9] >> 32) & 0xffff0000) | ((cmd[11] >> 48) & 0xffff)) / 65536.0f);
1611 sprintf(dtdy, "%4.4f", (float)int32_t((((cmd[9] >> 32) & 0x0000ffff) << 16) | ((cmd[11] >> 32) & 0xffff)) / 65536.0f);
1612 sprintf(dwdy, "%4.4f", (float)int32_t( (cmd[9] & 0xffff0000) | ((cmd[11] >> 16) & 0xffff)) / 65536.0f);
1613
1614 buffer+=sprintf(buffer, "Tri_TexShadeZ %d, XL: %s, XM: %s, XH: %s, YL: %s, YM: %s, YH: %s\n", lft, xl,xm,xh,yl,ym,yh);
1615 buffer+=sprintf(buffer, " ");
1616 buffer+=sprintf(buffer, " R: %s, G: %s, B: %s, A: %s\n", rt, gt, bt, at);
1617 buffer+=sprintf(buffer, " ");
1618 buffer+=sprintf(buffer, " DRDX: %s, DGDX: %s, DBDX: %s, DADX: %s\n", drdx, dgdx, dbdx, dadx);
1619 buffer+=sprintf(buffer, " ");
1620 buffer+=sprintf(buffer, " DRDE: %s, DGDE: %s, DBDE: %s, DADE: %s\n", drde, dgde, dbde, dade);
1621 buffer+=sprintf(buffer, " ");
1622 buffer+=sprintf(buffer, " DRDY: %s, DGDY: %s, DBDY: %s, DADY: %s\n", drdy, dgdy, dbdy, dady);
1623
1624 buffer+=sprintf(buffer, " ");
1625 buffer+=sprintf(buffer, " S: %s, T: %s, W: %s\n", s, t, w);
1626 buffer+=sprintf(buffer, " ");
1627 buffer+=sprintf(buffer, " DSDX: %s, DTDX: %s, DWDX: %s\n", dsdx, dtdx, dwdx);
1628 buffer+=sprintf(buffer, " ");
1629 buffer+=sprintf(buffer, " DSDE: %s, DTDE: %s, DWDE: %s\n", dsde, dtde, dwde);
1630 buffer+=sprintf(buffer, " ");
1631 buffer+=sprintf(buffer, " DSDY: %s, DTDY: %s, DWDY: %s\n", dsdy, dtdy, dwdy);
1632 break;
1633 }
1634 case 0x24:
1635 case 0x25:
1636 {
1637 if (length < 16)
1638 {
1639 sprintf(buffer, "ERROR: Texture_Rectangle length = %d\n", length);
1640 return;
1641 }
1642
1643 cmd[1] = m_cmd_data[m_cmd_cur+1];
1644 sprintf(s, "%4.4f", (float)int16_t((cmd[1] >> 48) & 0xffff) / 32.0f);
1645 sprintf(t, "%4.4f", (float)int16_t((cmd[1] >> 32) & 0xffff) / 32.0f);
1646 sprintf(dsdx, "%4.4f", (float)int16_t((cmd[1] >> 16) & 0xffff) / 1024.0f);
1647 sprintf(dtdy, "%4.4f", (float)int16_t((cmd[1] >> 0) & 0xffff) / 1024.0f);
1648
1649 if (command == 0x24)
1650 sprintf(buffer, "Texture_Rectangle %d, %s, %s, %s, %s, %s, %s, %s, %s", tile, sh, th, sl, tl, s, t, dsdx, dtdy);
1651 else
1652 sprintf(buffer, "Texture_Rectangle_Flip %d, %s, %s, %s, %s, %s, %s, %s, %s", tile, sh, th, sl, tl, s, t, dsdx, dtdy);
1653
1654 break;
1655 }
1656 case 0x26: sprintf(buffer, "Sync_Load"); break;
1657 case 0x27: sprintf(buffer, "Sync_Pipe"); break;
1658 case 0x28: sprintf(buffer, "Sync_Tile"); break;
1659 case 0x29: sprintf(buffer, "Sync_Full"); break;
1660 case 0x2d: sprintf(buffer, "Set_Scissor %s, %s, %s, %s", sl, tl, sh, th); break;
1661 case 0x2e: sprintf(buffer, "Set_Prim_Depth %04X, %04X", uint32_t(cmd[0] >> 16) & 0xffff, (uint32_t)cmd[0] & 0xffff); break;
1662 case 0x2f: sprintf(buffer, "Set_Other_Modes %08X %08X", uint32_t(cmd[0] >> 32), (uint32_t)cmd[0]); break;
1663 case 0x30: sprintf(buffer, "Load_TLUT %d, %s, %s, %s, %s", tile, sl, tl, sh, th); break;
1664 case 0x32: sprintf(buffer, "Set_Tile_Size %d, %s, %s, %s, %s", tile, sl, tl, sh, th); break;
1665 case 0x33: sprintf(buffer, "Load_Block %d, %03X, %03X, %03X, %03X", tile, uint32_t(cmd[0] >> 44) & 0xfff, uint32_t(cmd[0] >> 32) & 0xfff, uint32_t(cmd[0] >> 12) & 0xfff, uint32_t(cmd[0]) & 0xfff); break;
1666 case 0x34: sprintf(buffer, "Load_Tile %d, %s, %s, %s, %s", tile, sl, tl, sh, th); break;
1667 case 0x35: sprintf(buffer, "Set_Tile %d, %s, %s, %d, %04X", tile, format, size, (uint32_t(cmd[0] >> 41) & 0x1ff) * 8, (uint32_t(cmd[0] >> 32) & 0x1ff) * 8); break;
1668 case 0x36: sprintf(buffer, "Fill_Rectangle %s, %s, %s, %s", sh, th, sl, tl); break;
1669 case 0x37: sprintf(buffer, "Set_Fill_Color R: %d, G: %d, B: %d, A: %d", r, g, b, a); break;
1670 case 0x38: sprintf(buffer, "Set_Fog_Color R: %d, G: %d, B: %d, A: %d", r, g, b, a); break;
1671 case 0x39: sprintf(buffer, "Set_Blend_Color R: %d, G: %d, B: %d, A: %d", r, g, b, a); break;
1672 case 0x3a: sprintf(buffer, "Set_Prim_Color %d, %d, R: %d, G: %d, B: %d, A: %d", uint32_t(cmd[0] >> 40) & 0x1f, uint32_t(cmd[0] >> 32) & 0xff, r, g, b, a); break;
1673 case 0x3b: sprintf(buffer, "Set_Env_Color R: %d, G: %d, B: %d, A: %d", r, g, b, a); break;
1674 case 0x3c: sprintf(buffer, "Set_Combine %08X %08X", uint32_t(cmd[0] >> 32), (uint32_t)cmd[0]); break;
1675 case 0x3d: sprintf(buffer, "Set_Texture_Image %s, %s, %d, %08X", format, size, (uint32_t(cmd[0] >> 32) & 0x1ff) + 1, (uint32_t)cmd[0]); break;
1676 case 0x3e: sprintf(buffer, "Set_Mask_Image %08X", (uint32_t)cmd[0]); break;
1677 case 0x3f: sprintf(buffer, "Set_Color_Image %s, %s, %d, %08X", format, size, (uint32_t(cmd[0] >> 32) & 0x1ff) + 1, (uint32_t)cmd[0]); break;
1678 default: sprintf(buffer, "Unknown (%08X %08X)", uint32_t(cmd[0] >> 32), (uint32_t)cmd[0]); break;
1679 }
1680 }
1681
1682 /*****************************************************************************/
1683
rightcvghex(uint32_t x,uint32_t fmask)1684 static uint32_t rightcvghex(uint32_t x, uint32_t fmask)
1685 {
1686 uint32_t stickybit = ((x >> 1) & 0x1fff) > 0;
1687 uint32_t covered = ((x >> 14) & 3) + stickybit;
1688 covered = (0xf0 >> covered) & 0xf;
1689 return (covered & fmask);
1690 }
1691
leftcvghex(uint32_t x,uint32_t fmask)1692 static uint32_t leftcvghex(uint32_t x, uint32_t fmask)
1693 {
1694 uint32_t stickybit = ((x >> 1) & 0x1fff) > 0;
1695 uint32_t covered = ((x >> 14) & 3) + stickybit;
1696 covered = 0xf >> covered;
1697 return (covered & fmask);
1698 }
1699
CLIP(int32_t value,int32_t min,int32_t max)1700 static int32_t CLIP(int32_t value,int32_t min,int32_t max)
1701 {
1702 if (value < min)
1703 {
1704 return min;
1705 }
1706 else if (value > max)
1707 {
1708 return max;
1709 }
1710 else
1711 {
1712 return value;
1713 }
1714 }
1715
compute_cvg_noflip(extent_t * spans,int32_t * majorx,int32_t * minorx,int32_t * majorxint,int32_t * minorxint,int32_t scanline,int32_t yh,int32_t yl,int32_t base)1716 void n64_rdp::compute_cvg_noflip(extent_t* spans, int32_t* majorx, int32_t* minorx, int32_t* majorxint, int32_t* minorxint, int32_t scanline, int32_t yh, int32_t yl, int32_t base)
1717 {
1718 int32_t purgestart = 0xfff;
1719 int32_t purgeend = 0;
1720 const bool writablescanline = !(scanline & ~0x3ff);
1721 const int32_t scanlinespx = scanline << 2;
1722
1723 if (!writablescanline) return;
1724
1725 for(int32_t i = 0; i < 4; i++)
1726 {
1727 if (minorxint[i] < purgestart)
1728 {
1729 purgestart = minorxint[i];
1730 }
1731 if (majorxint[i] > purgeend)
1732 {
1733 purgeend = majorxint[i];
1734 }
1735 }
1736
1737 purgestart = CLIP(purgestart, 0, 1023);
1738 purgeend = CLIP(purgeend, 0, 1023);
1739 int32_t length = purgeend - purgestart;
1740
1741 if (length < 0) return;
1742
1743 rdp_span_aux* userdata = (rdp_span_aux*)spans[scanline - base].userdata;
1744 memset(&userdata->m_cvg[purgestart], 0, (length + 1) << 1);
1745
1746 for(int32_t i = 0; i < 4; i++)
1747 {
1748 int32_t minorcur = minorx[i];
1749 int32_t majorcur = majorx[i];
1750 int32_t minorcurint = minorxint[i];
1751 int32_t majorcurint = majorxint[i];
1752 length = majorcurint - minorcurint;
1753
1754 int32_t fmask = (i & 1) ? 5 : 0xa;
1755 int32_t maskshift = (i ^ 3) << 2;
1756 int32_t fmaskshifted = fmask << maskshift;
1757 int32_t fleft = CLIP(minorcurint + 1, 0, 647);
1758 int32_t fright = CLIP(majorcurint - 1, 0, 647);
1759 bool valid_y = ((scanlinespx + i) >= yh && (scanlinespx + i) < yl);
1760 if (valid_y && length >= 0)
1761 {
1762 if (minorcurint != majorcurint)
1763 {
1764 if (!(minorcurint & ~0x3ff))
1765 {
1766 userdata->m_cvg[minorcurint] |= (leftcvghex(minorcur, fmask) << maskshift);
1767 }
1768 if (!(majorcurint & ~0x3ff))
1769 {
1770 userdata->m_cvg[majorcurint] |= (rightcvghex(majorcur, fmask) << maskshift);
1771 }
1772 }
1773 else
1774 {
1775 if (!(majorcurint & ~0x3ff))
1776 {
1777 int32_t samecvg = leftcvghex(minorcur, fmask) & rightcvghex(majorcur, fmask);
1778 userdata->m_cvg[majorcurint] |= (samecvg << maskshift);
1779 }
1780 }
1781 for (; fleft <= fright; fleft++)
1782 {
1783 userdata->m_cvg[fleft] |= fmaskshifted;
1784 }
1785 }
1786 }
1787 }
1788
compute_cvg_flip(extent_t * spans,int32_t * majorx,int32_t * minorx,int32_t * majorxint,int32_t * minorxint,int32_t scanline,int32_t yh,int32_t yl,int32_t base)1789 void n64_rdp::compute_cvg_flip(extent_t* spans, int32_t* majorx, int32_t* minorx, int32_t* majorxint, int32_t* minorxint, int32_t scanline, int32_t yh, int32_t yl, int32_t base)
1790 {
1791 int32_t purgestart = 0xfff;
1792 int32_t purgeend = 0;
1793 const bool writablescanline = !(scanline & ~0x3ff);
1794 const int32_t scanlinespx = scanline << 2;
1795
1796 if(!writablescanline) return;
1797
1798 for(int32_t i = 0; i < 4; i++)
1799 {
1800 if (majorxint[i] < purgestart)
1801 {
1802 purgestart = majorxint[i];
1803 }
1804 if (minorxint[i] > purgeend)
1805 {
1806 purgeend = minorxint[i];
1807 }
1808 }
1809
1810 purgestart = CLIP(purgestart, 0, 1023);
1811 purgeend = CLIP(purgeend, 0, 1023);
1812
1813 int32_t length = purgeend - purgestart;
1814
1815 if (length < 0) return;
1816
1817 rdp_span_aux* userdata = (rdp_span_aux*)spans[scanline - base].userdata;
1818 memset(&userdata->m_cvg[purgestart], 0, (length + 1) << 1);
1819
1820 for(int32_t i = 0; i < 4; i++)
1821 {
1822 int32_t minorcur = minorx[i];
1823 int32_t majorcur = majorx[i];
1824 int32_t minorcurint = minorxint[i];
1825 int32_t majorcurint = majorxint[i];
1826 length = minorcurint - majorcurint;
1827
1828 int32_t fmask = (i & 1) ? 5 : 0xa;
1829 int32_t maskshift = (i ^ 3) << 2;
1830 int32_t fmaskshifted = fmask << maskshift;
1831 int32_t fleft = CLIP(majorcurint + 1, 0, 647);
1832 int32_t fright = CLIP(minorcurint - 1, 0, 647);
1833 bool valid_y = ((scanlinespx + i) >= yh && (scanlinespx + i) < yl);
1834 if (valid_y && length >= 0)
1835 {
1836 if (minorcurint != majorcurint)
1837 {
1838 if (!(minorcurint & ~0x3ff))
1839 {
1840 userdata->m_cvg[minorcurint] |= (rightcvghex(minorcur, fmask) << maskshift);
1841 }
1842 if (!(majorcurint & ~0x3ff))
1843 {
1844 userdata->m_cvg[majorcurint] |= (leftcvghex(majorcur, fmask) << maskshift);
1845 }
1846 }
1847 else
1848 {
1849 if (!(majorcurint & ~0x3ff))
1850 {
1851 int32_t samecvg = rightcvghex(minorcur, fmask) & leftcvghex(majorcur, fmask);
1852 userdata->m_cvg[majorcurint] |= (samecvg << maskshift);
1853 }
1854 }
1855 for (; fleft <= fright; fleft++)
1856 {
1857 userdata->m_cvg[fleft] |= fmaskshifted;
1858 }
1859 }
1860 }
1861 }
1862
1863 #define SIGN(x, numb) (((x) & ((1 << numb) - 1)) | -((x) & (1 << (numb - 1))))
1864
draw_triangle(bool shade,bool texture,bool zbuffer,bool rect)1865 void n64_rdp::draw_triangle(bool shade, bool texture, bool zbuffer, bool rect)
1866 {
1867 const uint64_t* cmd_data = rect ? m_temp_rect_data : m_cmd_data;
1868 const uint32_t fifo_index = rect ? 0 : m_cmd_cur;
1869 const uint64_t w1 = cmd_data[fifo_index + 0];
1870
1871 int32_t flip = int32_t(w1 >> 55) & 1;
1872 m_misc_state.m_max_level = uint32_t(w1 >> 51) & 7;
1873 int32_t tilenum = int32_t(w1 >> 48) & 0x7;
1874
1875 int32_t dsdiff = 0, dtdiff = 0, dwdiff = 0, drdiff = 0, dgdiff = 0, dbdiff = 0, dadiff = 0, dzdiff = 0;
1876 int32_t dsdeh = 0, dtdeh = 0, dwdeh = 0, drdeh = 0, dgdeh = 0, dbdeh = 0, dadeh = 0, dzdeh = 0;
1877 int32_t dsdxh = 0, dtdxh = 0, dwdxh = 0, drdxh = 0, dgdxh = 0, dbdxh = 0, dadxh = 0, dzdxh = 0;
1878 int32_t dsdyh = 0, dtdyh = 0, dwdyh = 0, drdyh = 0, dgdyh = 0, dbdyh = 0, dadyh = 0, dzdyh = 0;
1879
1880 int32_t maxxmx = 0; // maxxmx / minxhx very opaque names, consider re-naming
1881 int32_t minxmx = 0;
1882 int32_t maxxhx = 0;
1883 int32_t minxhx = 0;
1884
1885 int32_t shade_base = fifo_index + 4;
1886 int32_t texture_base = fifo_index + 4;
1887 int32_t zbuffer_base = fifo_index + 4;
1888 if(shade)
1889 {
1890 texture_base += 8;
1891 zbuffer_base += 8;
1892 }
1893 if(texture)
1894 {
1895 zbuffer_base += 8;
1896 }
1897
1898 uint64_t w2 = cmd_data[fifo_index + 1];
1899 uint64_t w3 = cmd_data[fifo_index + 2];
1900 uint64_t w4 = cmd_data[fifo_index + 3];
1901
1902 int32_t yl = int32_t(w1 >> 32) & 0x3fff;
1903 int32_t ym = int32_t(w1 >> 16) & 0x3fff;
1904 int32_t yh = int32_t(w1 >> 0) & 0x3fff;
1905 int32_t xl = (int32_t)(w2 >> 32) & 0x3fffffff;
1906 int32_t xh = (int32_t)(w3 >> 32) & 0x3fffffff;
1907 int32_t xm = (int32_t)(w4 >> 32) & 0x3fffffff;
1908 // Inverse slopes in 16.16 format
1909 int32_t dxldy = (int32_t)w2;
1910 int32_t dxhdy = (int32_t)w3;
1911 int32_t dxmdy = (int32_t)w4;
1912
1913 if (yl & 0x2000) yl |= 0xffffc000;
1914 if (ym & 0x2000) ym |= 0xffffc000;
1915 if (yh & 0x2000) yh |= 0xffffc000;
1916
1917 if (xl & 0x20000000) xl |= 0xc0000000;
1918 if (xm & 0x20000000) xm |= 0xc0000000;
1919 if (xh & 0x20000000) xh |= 0xc0000000;
1920
1921 int32_t r = int32_t(((cmd_data[shade_base] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 2] >> 48) & 0x0000ffff));
1922 int32_t g = int32_t(((cmd_data[shade_base] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 2] >> 32) & 0x0000ffff));
1923 int32_t b = int32_t( (cmd_data[shade_base] & 0xffff0000) | ((cmd_data[shade_base + 2] >> 16) & 0x0000ffff));
1924 int32_t a = int32_t(((cmd_data[shade_base] << 16) & 0xffff0000) | (cmd_data[shade_base + 2] & 0x0000ffff));
1925 const int32_t drdx = int32_t(((cmd_data[shade_base + 1] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 3] >> 48) & 0x0000ffff));
1926 const int32_t dgdx = int32_t(((cmd_data[shade_base + 1] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 3] >> 32) & 0x0000ffff));
1927 const int32_t dbdx = int32_t( (cmd_data[shade_base + 1] & 0xffff0000) | ((cmd_data[shade_base + 3] >> 16) & 0x0000ffff));
1928 const int32_t dadx = int32_t(((cmd_data[shade_base + 1] << 16) & 0xffff0000) | (cmd_data[shade_base + 3] & 0x0000ffff));
1929 const int32_t drde = int32_t(((cmd_data[shade_base + 4] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 6] >> 48) & 0x0000ffff));
1930 const int32_t dgde = int32_t(((cmd_data[shade_base + 4] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 6] >> 32) & 0x0000ffff));
1931 const int32_t dbde = int32_t( (cmd_data[shade_base + 4] & 0xffff0000) | ((cmd_data[shade_base + 6] >> 16) & 0x0000ffff));
1932 const int32_t dade = int32_t(((cmd_data[shade_base + 4] << 16) & 0xffff0000) | (cmd_data[shade_base + 6] & 0x0000ffff));
1933 const int32_t drdy = int32_t(((cmd_data[shade_base + 5] >> 32) & 0xffff0000) | ((cmd_data[shade_base + 7] >> 48) & 0x0000ffff));
1934 const int32_t dgdy = int32_t(((cmd_data[shade_base + 5] >> 16) & 0xffff0000) | ((cmd_data[shade_base + 7] >> 32) & 0x0000ffff));
1935 const int32_t dbdy = int32_t( (cmd_data[shade_base + 5] & 0xffff0000) | ((cmd_data[shade_base + 7] >> 16) & 0x0000ffff));
1936 const int32_t dady = int32_t(((cmd_data[shade_base + 5] << 16) & 0xffff0000) | (cmd_data[shade_base + 7] & 0x0000ffff));
1937
1938 int32_t s = int32_t(((cmd_data[texture_base] >> 32) & 0xffff0000) | ((cmd_data[texture_base+ 2 ] >> 48) & 0x0000ffff));
1939 int32_t t = int32_t(((cmd_data[texture_base] >> 16) & 0xffff0000) | ((cmd_data[texture_base+ 2 ] >> 32) & 0x0000ffff));
1940 int32_t w = int32_t( (cmd_data[texture_base] & 0xffff0000) | ((cmd_data[texture_base+ 2 ] >> 16) & 0x0000ffff));
1941 const int32_t dsdx = int32_t(((cmd_data[texture_base + 1] >> 32) & 0xffff0000) | ((cmd_data[texture_base + 3] >> 48) & 0x0000ffff));
1942 const int32_t dtdx = int32_t(((cmd_data[texture_base + 1] >> 16) & 0xffff0000) | ((cmd_data[texture_base + 3] >> 32) & 0x0000ffff));
1943 const int32_t dwdx = int32_t( (cmd_data[texture_base + 1] & 0xffff0000) | ((cmd_data[texture_base + 3] >> 16) & 0x0000ffff));
1944 const int32_t dsde = int32_t(((cmd_data[texture_base + 4] >> 32) & 0xffff0000) | ((cmd_data[texture_base + 6] >> 48) & 0x0000ffff));
1945 const int32_t dtde = int32_t(((cmd_data[texture_base + 4] >> 16) & 0xffff0000) | ((cmd_data[texture_base + 6] >> 32) & 0x0000ffff));
1946 const int32_t dwde = int32_t( (cmd_data[texture_base + 4] & 0xffff0000) | ((cmd_data[texture_base + 6] >> 16) & 0x0000ffff));
1947 const int32_t dsdy = int32_t(((cmd_data[texture_base + 5] >> 32) & 0xffff0000) | ((cmd_data[texture_base + 7] >> 48) & 0x0000ffff));
1948 const int32_t dtdy = int32_t(((cmd_data[texture_base + 5] >> 16) & 0xffff0000) | ((cmd_data[texture_base + 7] >> 32) & 0x0000ffff));
1949 const int32_t dwdy = int32_t( (cmd_data[texture_base + 5] & 0xffff0000) | ((cmd_data[texture_base + 7] >> 16) & 0x0000ffff));
1950
1951 int32_t z = int32_t(cmd_data[zbuffer_base] >> 32);
1952 const int32_t dzdx = int32_t(cmd_data[zbuffer_base]);
1953 const int32_t dzde = int32_t(cmd_data[zbuffer_base+1] >> 32);
1954 const int32_t dzdy = int32_t(cmd_data[zbuffer_base+1]);
1955
1956 const int32_t dzdy_dz = (dzdy >> 16) & 0xffff;
1957 const int32_t dzdx_dz = (dzdx >> 16) & 0xffff;
1958
1959 extent_t spans[2048];
1960 #ifdef MAME_DEBUG
1961 memset(spans, 0xcc, sizeof(spans));
1962 #endif
1963
1964 m_span_base.m_span_drdy = drdy;
1965 m_span_base.m_span_dgdy = dgdy;
1966 m_span_base.m_span_dbdy = dbdy;
1967 m_span_base.m_span_dady = dady;
1968 m_span_base.m_span_dzdy = m_other_modes.z_source_sel ? 0 : dzdy;
1969
1970 uint32_t temp_dzpix = ((dzdy_dz & 0x8000) ? ((~dzdy_dz) & 0x7fff) : dzdy_dz) + ((dzdx_dz & 0x8000) ? ((~dzdx_dz) & 0x7fff) : dzdx_dz);
1971 m_span_base.m_span_dr = drdx & ~0x1f;
1972 m_span_base.m_span_dg = dgdx & ~0x1f;
1973 m_span_base.m_span_db = dbdx & ~0x1f;
1974 m_span_base.m_span_da = dadx & ~0x1f;
1975 m_span_base.m_span_ds = dsdx;
1976 m_span_base.m_span_dt = dtdx;
1977 m_span_base.m_span_dw = dwdx;
1978 m_span_base.m_span_dz = m_other_modes.z_source_sel ? 0 : dzdx;
1979 m_span_base.m_span_dymax = 0;
1980 m_span_base.m_span_dzpix = m_dzpix_normalize[temp_dzpix & 0xffff];
1981
1982 int32_t xleft_inc = (dxmdy >> 2) & ~1;
1983 int32_t xright_inc = (dxhdy >> 2) & ~1;
1984
1985 int32_t xright = xh & ~1;
1986 int32_t xleft = xm & ~1;
1987
1988 const int32_t sign_dxhdy = (dxhdy & 0x80000000) ? 1 : 0;
1989 const int32_t do_offset = !(sign_dxhdy ^ (flip));
1990
1991 if (do_offset)
1992 {
1993 dsdeh = dsde >> 9; dsdyh = dsdy >> 9;
1994 dtdeh = dtde >> 9; dtdyh = dtdy >> 9;
1995 dwdeh = dwde >> 9; dwdyh = dwdy >> 9;
1996 drdeh = drde >> 9; drdyh = drdy >> 9;
1997 dgdeh = dgde >> 9; dgdyh = dgdy >> 9;
1998 dbdeh = dbde >> 9; dbdyh = dbdy >> 9;
1999 dadeh = dade >> 9; dadyh = dady >> 9;
2000 dzdeh = dzde >> 9; dzdyh = dzdy >> 9;
2001
2002 dsdiff = (dsdeh << 8) + (dsdeh << 7) - (dsdyh << 8) - (dsdyh << 7);
2003 dtdiff = (dtdeh << 8) + (dtdeh << 7) - (dtdyh << 8) - (dtdyh << 7);
2004 dwdiff = (dwdeh << 8) + (dwdeh << 7) - (dwdyh << 8) - (dwdyh << 7);
2005 drdiff = (drdeh << 8) + (drdeh << 7) - (drdyh << 8) - (drdyh << 7);
2006 dgdiff = (dgdeh << 8) + (dgdeh << 7) - (dgdyh << 8) - (dgdyh << 7);
2007 dbdiff = (dbdeh << 8) + (dbdeh << 7) - (dbdyh << 8) - (dbdyh << 7);
2008 dadiff = (dadeh << 8) + (dadeh << 7) - (dadyh << 8) - (dadyh << 7);
2009 dzdiff = (dzdeh << 8) + (dzdeh << 7) - (dzdyh << 8) - (dzdyh << 7);
2010 }
2011 else
2012 {
2013 dsdiff = dtdiff = dwdiff = drdiff = dgdiff = dbdiff = dadiff = dzdiff = 0;
2014 }
2015
2016 dsdxh = dsdx >> 8;
2017 dtdxh = dtdx >> 8;
2018 dwdxh = dwdx >> 8;
2019 drdxh = drdx >> 8;
2020 dgdxh = dgdx >> 8;
2021 dbdxh = dbdx >> 8;
2022 dadxh = dadx >> 8;
2023 dzdxh = dzdx >> 8;
2024
2025 const int32_t ycur = yh & ~3;
2026 const int32_t ylfar = yl | 3;
2027 const int32_t ldflag = (sign_dxhdy ^ flip) ? 0 : 3;
2028 int32_t majorx[4];
2029 int32_t minorx[4];
2030 int32_t majorxint[4];
2031 int32_t minorxint[4];
2032
2033 int32_t xfrac = ((xright >> 8) & 0xff);
2034
2035 const int32_t clipy1 = m_scissor.m_yh;
2036 const int32_t clipy2 = m_scissor.m_yl;
2037
2038 // Trivial reject
2039 if((ycur >> 2) >= clipy2 && (ylfar >> 2) >= clipy2)
2040 {
2041 return;
2042 }
2043 if((ycur >> 2) < clipy1 && (ylfar >> 2) < clipy1)
2044 {
2045 return;
2046 }
2047
2048 bool new_object = true;
2049 rdp_poly_state* object = nullptr;
2050 bool valid = false;
2051
2052 int32_t* minx = flip ? &minxhx : &minxmx;
2053 int32_t* maxx = flip ? &maxxmx : &maxxhx;
2054 int32_t* startx = flip ? maxx : minx;
2055 int32_t* endx = flip ? minx : maxx;
2056
2057 for (int32_t k = ycur; k <= ylfar; k++)
2058 {
2059 if (k == ym)
2060 {
2061 xleft = xl & ~1;
2062 xleft_inc = (dxldy >> 2) & ~1;
2063 }
2064
2065 const int32_t xstart = xleft >> 16;
2066 const int32_t xend = xright >> 16;
2067 const int32_t j = k >> 2;
2068 const int32_t spanidx = (k - ycur) >> 2;
2069 const int32_t spix = k & 3;
2070 bool valid_y = !(k < yh || k >= yl);
2071
2072 if (spanidx >= 0 && spanidx < 2048)
2073 {
2074 majorxint[spix] = xend;
2075 minorxint[spix] = xstart;
2076 majorx[spix] = xright;
2077 minorx[spix] = xleft;
2078
2079 if (spix == 0)
2080 {
2081 *maxx = 0;
2082 *minx = 0xfff;
2083 }
2084
2085 if (valid_y)
2086 {
2087 if (flip)
2088 {
2089 *maxx = std::max(xstart, *maxx);
2090 *minx = std::min(xend, *minx);
2091 }
2092 else
2093 {
2094 *minx = std::min(xstart, *minx);
2095 *maxx = std::max(xend, *maxx);
2096 }
2097 }
2098
2099 if (spix == 0)
2100 {
2101 if(new_object)
2102 {
2103 object = &object_data_alloc();
2104 memcpy(object->m_tmem, m_tmem.get(), 0x1000);
2105 new_object = false;
2106 }
2107
2108 spans[spanidx].userdata = (void*)((uint8_t*)m_aux_buf.get() + m_aux_buf_ptr);
2109 valid = true;
2110 m_aux_buf_ptr += sizeof(rdp_span_aux);
2111
2112 if(m_aux_buf_ptr >= EXTENT_AUX_COUNT)
2113 {
2114 fatalerror("n64_rdp::draw_triangle: span aux buffer overflow\n");
2115 }
2116
2117 rdp_span_aux* userdata = (rdp_span_aux*)spans[spanidx].userdata;
2118 userdata->m_tmem = object->m_tmem;
2119
2120 userdata->m_blend_color = m_blend_color;
2121 userdata->m_prim_color = m_prim_color;
2122 userdata->m_env_color = m_env_color;
2123 userdata->m_fog_color = m_fog_color;
2124 userdata->m_prim_alpha = m_prim_alpha;
2125 userdata->m_env_alpha = m_env_alpha;
2126 userdata->m_key_scale = m_key_scale;
2127 userdata->m_lod_fraction = m_lod_fraction;
2128 userdata->m_prim_lod_fraction = m_prim_lod_fraction;
2129
2130 // Setup blender data for this scanline
2131 set_blender_input(0, 0, &userdata->m_color_inputs.blender1a_rgb[0], &userdata->m_color_inputs.blender1b_a[0], m_other_modes.blend_m1a_0, m_other_modes.blend_m1b_0, userdata);
2132 set_blender_input(0, 1, &userdata->m_color_inputs.blender2a_rgb[0], &userdata->m_color_inputs.blender2b_a[0], m_other_modes.blend_m2a_0, m_other_modes.blend_m2b_0, userdata);
2133 set_blender_input(1, 0, &userdata->m_color_inputs.blender1a_rgb[1], &userdata->m_color_inputs.blender1b_a[1], m_other_modes.blend_m1a_1, m_other_modes.blend_m1b_1, userdata);
2134 set_blender_input(1, 1, &userdata->m_color_inputs.blender2a_rgb[1], &userdata->m_color_inputs.blender2b_a[1], m_other_modes.blend_m2a_1, m_other_modes.blend_m2b_1, userdata);
2135
2136 // Setup color combiner data for this scanline
2137 set_suba_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_a[0], m_combine.sub_a_rgb0, userdata);
2138 set_subb_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_b[0], m_combine.sub_b_rgb0, userdata);
2139 set_mul_input_rgb(&userdata->m_color_inputs.combiner_rgbmul[0], m_combine.mul_rgb0, userdata);
2140 set_add_input_rgb(&userdata->m_color_inputs.combiner_rgbadd[0], m_combine.add_rgb0, userdata);
2141 set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_a[0], m_combine.sub_a_a0, userdata);
2142 set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_b[0], m_combine.sub_b_a0, userdata);
2143 set_mul_input_alpha(&userdata->m_color_inputs.combiner_alphamul[0], m_combine.mul_a0, userdata);
2144 set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphaadd[0], m_combine.add_a0, userdata);
2145
2146 set_suba_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_a[1], m_combine.sub_a_rgb1, userdata);
2147 set_subb_input_rgb(&userdata->m_color_inputs.combiner_rgbsub_b[1], m_combine.sub_b_rgb1, userdata);
2148 set_mul_input_rgb(&userdata->m_color_inputs.combiner_rgbmul[1], m_combine.mul_rgb1, userdata);
2149 set_add_input_rgb(&userdata->m_color_inputs.combiner_rgbadd[1], m_combine.add_rgb1, userdata);
2150 set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_a[1], m_combine.sub_a_a1, userdata);
2151 set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphasub_b[1], m_combine.sub_b_a1, userdata);
2152 set_mul_input_alpha(&userdata->m_color_inputs.combiner_alphamul[1], m_combine.mul_a1, userdata);
2153 set_sub_input_alpha(&userdata->m_color_inputs.combiner_alphaadd[1], m_combine.add_a1, userdata);
2154 }
2155
2156 if (spix == 3)
2157 {
2158 spans[spanidx].startx = *startx;
2159 spans[spanidx].stopx = *endx;
2160 ((this)->*(m_compute_cvg[flip]))(spans, majorx, minorx, majorxint, minorxint, j, yh, yl, ycur >> 2);
2161 }
2162
2163 if (spix == ldflag)
2164 {
2165 ((rdp_span_aux*)spans[spanidx].userdata)->m_unscissored_rx = xend;
2166 xfrac = ((xright >> 8) & 0xff);
2167 spans[spanidx].param[SPAN_R].start = ((r >> 9) << 9) + drdiff - (xfrac * drdxh);
2168 spans[spanidx].param[SPAN_G].start = ((g >> 9) << 9) + dgdiff - (xfrac * dgdxh);
2169 spans[spanidx].param[SPAN_B].start = ((b >> 9) << 9) + dbdiff - (xfrac * dbdxh);
2170 spans[spanidx].param[SPAN_A].start = ((a >> 9) << 9) + dadiff - (xfrac * dadxh);
2171 spans[spanidx].param[SPAN_S].start = (((s >> 9) << 9) + dsdiff - (xfrac * dsdxh)) & ~0x1f;
2172 spans[spanidx].param[SPAN_T].start = (((t >> 9) << 9) + dtdiff - (xfrac * dtdxh)) & ~0x1f;
2173 spans[spanidx].param[SPAN_W].start = (((w >> 9) << 9) + dwdiff - (xfrac * dwdxh)) & ~0x1f;
2174 spans[spanidx].param[SPAN_Z].start = ((z >> 9) << 9) + dzdiff - (xfrac * dzdxh);
2175 }
2176 }
2177
2178 if (spix == 3)
2179 {
2180 r += drde;
2181 g += dgde;
2182 b += dbde;
2183 a += dade;
2184 s += dsde;
2185 t += dtde;
2186 w += dwde;
2187 z += dzde;
2188 }
2189 xleft += xleft_inc;
2190 xright += xright_inc;
2191 }
2192
2193 if(!new_object && valid)
2194 {
2195 render_spans(yh >> 2, yl >> 2, tilenum, flip ? true : false, spans, rect, object);
2196 }
2197 m_aux_buf_ptr = 0; // Spans can be reused once render completes
2198 //wait("draw_triangle");
2199 }
2200
2201 /*****************************************************************************/
2202
2203 ////////////////////////
2204 // RDP COMMANDS
2205 ////////////////////////
2206
triangle(bool shade,bool texture,bool zbuffer)2207 void n64_rdp::triangle(bool shade, bool texture, bool zbuffer)
2208 {
2209 draw_triangle(shade, texture, zbuffer, false);
2210 m_pipe_clean = false;
2211 }
2212
cmd_triangle(uint64_t w1)2213 void n64_rdp::cmd_triangle(uint64_t w1)
2214 {
2215 triangle(false, false, false);
2216 }
2217
cmd_triangle_z(uint64_t w1)2218 void n64_rdp::cmd_triangle_z(uint64_t w1)
2219 {
2220 triangle(false, false, true);
2221 }
2222
cmd_triangle_t(uint64_t w1)2223 void n64_rdp::cmd_triangle_t(uint64_t w1)
2224 {
2225 triangle(false, true, false);
2226 }
2227
cmd_triangle_tz(uint64_t w1)2228 void n64_rdp::cmd_triangle_tz(uint64_t w1)
2229 {
2230 triangle(false, true, true);
2231 }
2232
cmd_triangle_s(uint64_t w1)2233 void n64_rdp::cmd_triangle_s(uint64_t w1)
2234 {
2235 triangle(true, false, false);
2236 }
2237
cmd_triangle_sz(uint64_t w1)2238 void n64_rdp::cmd_triangle_sz(uint64_t w1)
2239 {
2240 triangle(true, false, true);
2241 }
2242
cmd_triangle_st(uint64_t w1)2243 void n64_rdp::cmd_triangle_st(uint64_t w1)
2244 {
2245 triangle(true, true, false);
2246 }
2247
cmd_triangle_stz(uint64_t w1)2248 void n64_rdp::cmd_triangle_stz(uint64_t w1)
2249 {
2250 triangle(true, true, true);
2251 }
2252
cmd_tex_rect(uint64_t w1)2253 void n64_rdp::cmd_tex_rect(uint64_t w1)
2254 {
2255 const uint64_t* data = m_cmd_data + m_cmd_cur;
2256
2257 const uint64_t w2 = data[1];
2258
2259 const uint64_t tilenum = (w1 >> 24) & 0x7;
2260 const uint64_t xh = (w1 >> 12) & 0xfff;
2261 const uint64_t xl = (w1 >> 44) & 0xfff;
2262 const uint64_t yh = (w1 >> 0) & 0xfff;
2263 uint64_t yl = (w1 >> 32) & 0xfff;
2264
2265 const uint64_t s = (w2 >> 48) & 0xffff;
2266 const uint64_t t = (w2 >> 32) & 0xffff;
2267 const uint64_t dsdx = SIGN16((w2 >> 16) & 0xffff);
2268 const uint64_t dtdy = SIGN16((w2 >> 0) & 0xffff);
2269
2270 if (m_other_modes.cycle_type == CYCLE_TYPE_FILL || m_other_modes.cycle_type == CYCLE_TYPE_COPY)
2271 {
2272 yl |= 3;
2273 }
2274
2275 const uint64_t xlint = (xl >> 2) & 0x3ff;
2276 const uint64_t xhint = (xh >> 2) & 0x3ff;
2277
2278 uint64_t* ewdata = m_temp_rect_data;
2279 ewdata[0] = ((uint64_t)0x24 << 56) | ((0x80L | tilenum) << 48) | (yl << 32) | (yl << 16) | yh; // command, flipped, tile, yl
2280 ewdata[1] = (xlint << 48) | ((xl & 3) << 46); // xl, xl frac, dxldy (0), dxldy frac (0)
2281 ewdata[2] = (xhint << 48) | ((xh & 3) << 46); // xh, xh frac, dxhdy (0), dxhdy frac (0)
2282 ewdata[3] = (xlint << 48) | ((xl & 3) << 46); // xm, xm frac, dxmdy (0), dxmdy frac (0)
2283 memset(&ewdata[4], 0, 8 * sizeof(uint64_t)); // shade
2284 ewdata[12] = (s << 48) | (t << 32); // s, t, w (0)
2285 ewdata[13] = (dsdx >> 5) << 48; // dsdx, dtdx, dwdx (0)
2286 ewdata[14] = 0; // s frac (0), t frac (0), w frac (0)
2287 ewdata[15] = (dsdx & 0x1f) << 59; // dsdx frac, dtdx frac, dwdx frac (0)
2288 ewdata[16] = ((dtdy >> 5) & 0xffff) << 32; // dsde, dtde, dwde (0)
2289 ewdata[17] = ((dtdy >> 5) & 0xffff) << 32; // dsdy, dtdy, dwdy (0)
2290 ewdata[18] = ((dtdy & 0x1f) << 11) << 32; // dsde frac, dtde frac, dwde frac (0)
2291 ewdata[38] = ((dtdy & 0x1f) << 11) << 32; // dsdy frac, dtdy frac, dwdy frac (0)
2292 // ewdata[40-43] = 0; // depth
2293
2294 draw_triangle(true, true, false, true);
2295 }
2296
cmd_tex_rect_flip(uint64_t w1)2297 void n64_rdp::cmd_tex_rect_flip(uint64_t w1)
2298 {
2299 const uint64_t* data = m_cmd_data + m_cmd_cur;
2300
2301 const uint64_t w2 = data[1];
2302
2303 const uint64_t tilenum = (w1 >> 56) & 0x7;
2304 const uint64_t xh = (w1 >> 12) & 0xfff;
2305 const uint64_t xl = (w1 >> 44) & 0xfff;
2306 const uint64_t yh = (w1 >> 0) & 0xfff;
2307 uint64_t yl = (w1 >> 32) & 0xfff;
2308
2309 const uint64_t s = (w2 >> 48) & 0xffff;
2310 const uint64_t t = (w2 >> 32) & 0xffff;
2311 const uint64_t dsdx = SIGN16((w2 >> 16) & 0xffff);
2312 const uint64_t dtdy = SIGN16((w2 >> 0) & 0xffff);
2313
2314 if (m_other_modes.cycle_type == CYCLE_TYPE_FILL || m_other_modes.cycle_type == CYCLE_TYPE_COPY)
2315 {
2316 yl |= 3;
2317 }
2318
2319 const uint64_t xlint = (xl >> 2) & 0x3ff;
2320 const uint64_t xhint = (xh >> 2) & 0x3ff;
2321
2322 uint64_t* ewdata = m_temp_rect_data;
2323 ewdata[0] = ((uint64_t)0x25 << 56) | ((0x80L | tilenum) << 48) | (yl << 32) | (yl << 16) | yh; // command, flipped, tile, yl
2324 ewdata[1] = (xlint << 48) | ((xl & 3) << 46); // xl, xl frac, dxldy (0), dxldy frac (0)
2325 ewdata[2] = (xhint << 48) | ((xh & 3) << 46); // xh, xh frac, dxhdy (0), dxhdy frac (0)
2326 ewdata[3] = (xlint << 48) | ((xl & 3) << 46); // xm, xm frac, dxmdy (0), dxmdy frac (0)
2327 memset(&ewdata[4], 0, 8 * sizeof(uint64_t)); // shade
2328 ewdata[12] = (s << 48) | (t << 32); // s, t, w (0)
2329 ewdata[13] = ((dtdy >> 5) & 0xffff) << 32; // dsdx, dtdx, dwdx (0)
2330 ewdata[14] = 0; // s frac (0), t frac (0), w frac (0)
2331 ewdata[15] = ((dtdy & 0x1f) << 43); // dsdx frac, dtdx frac, dwdx frac (0)
2332 ewdata[16] = (dsdx >> 5) << 48; // dsde, dtde, dwde (0)
2333 ewdata[17] = (dsdx >> 5) << 48; // dsdy, dtdy, dwdy (0)
2334 ewdata[18] = (dsdx & 0x1f) << 59; // dsde frac, dtde frac, dwde frac (0)
2335 ewdata[19] = (dsdx & 0x1f) << 59; // dsdy frac, dtdy frac, dwdy frac (0)
2336
2337 draw_triangle(true, true, false, true);
2338 }
2339
cmd_sync_load(uint64_t w1)2340 void n64_rdp::cmd_sync_load(uint64_t w1)
2341 {
2342 //wait("SyncLoad");
2343 }
2344
cmd_sync_pipe(uint64_t w1)2345 void n64_rdp::cmd_sync_pipe(uint64_t w1)
2346 {
2347 //wait("SyncPipe");
2348 }
2349
cmd_sync_tile(uint64_t w1)2350 void n64_rdp::cmd_sync_tile(uint64_t w1)
2351 {
2352 //wait("SyncTile");
2353 }
2354
cmd_sync_full(uint64_t w1)2355 void n64_rdp::cmd_sync_full(uint64_t w1)
2356 {
2357 //wait("SyncFull");
2358 m_n64_periphs->dp_full_sync();
2359 }
2360
cmd_set_key_gb(uint64_t w1)2361 void n64_rdp::cmd_set_key_gb(uint64_t w1)
2362 {
2363 m_key_scale.set_b(uint32_t(w1 >> 0) & 0xff);
2364 m_key_scale.set_g(uint32_t(w1 >> 16) & 0xff);
2365 }
2366
cmd_set_key_r(uint64_t w1)2367 void n64_rdp::cmd_set_key_r(uint64_t w1)
2368 {
2369 m_key_scale.set_r(uint32_t(w1 & 0xff));
2370 }
2371
cmd_set_fill_color32(uint64_t w1)2372 void n64_rdp::cmd_set_fill_color32(uint64_t w1)
2373 {
2374 //wait("SetFillColor");
2375 m_fill_color = (uint32_t)w1;
2376 }
2377
cmd_set_convert(uint64_t w1)2378 void n64_rdp::cmd_set_convert(uint64_t w1)
2379 {
2380 if(!m_pipe_clean) { m_pipe_clean = true; wait("SetConvert"); }
2381 int32_t k0 = int32_t(w1 >> 45) & 0x1ff;
2382 int32_t k1 = int32_t(w1 >> 36) & 0x1ff;
2383 int32_t k2 = int32_t(w1 >> 27) & 0x1ff;
2384 int32_t k3 = int32_t(w1 >> 18) & 0x1ff;
2385 int32_t k4 = int32_t(w1 >> 9) & 0x1ff;
2386 int32_t k5 = int32_t(w1 >> 0) & 0x1ff;
2387
2388 k0 = (SIGN9(k0) << 1) + 1;
2389 k1 = (SIGN9(k1) << 1) + 1;
2390 k2 = (SIGN9(k2) << 1) + 1;
2391 k3 = (SIGN9(k3) << 1) + 1;
2392
2393 set_yuv_factors(rgbaint_t(0, k0, k2, k3), rgbaint_t(0, 0, k1, 0), rgbaint_t(k4, k4, k4, k4), rgbaint_t(k5, k5, k5, k5));
2394 }
2395
cmd_set_scissor(uint64_t w1)2396 void n64_rdp::cmd_set_scissor(uint64_t w1)
2397 {
2398 m_scissor.m_xh = ((w1 >> 44) & 0xfff) >> 2;
2399 m_scissor.m_yh = ((w1 >> 32) & 0xfff) >> 2;
2400 m_scissor.m_xl = ((w1 >> 12) & 0xfff) >> 2;
2401 m_scissor.m_yl = ((w1 >> 0) & 0xfff) >> 2;
2402
2403 // TODO: handle f & o?
2404 }
2405
cmd_set_prim_depth(uint64_t w1)2406 void n64_rdp::cmd_set_prim_depth(uint64_t w1)
2407 {
2408 m_misc_state.m_primitive_z = (uint32_t)(w1 & 0x7fff0000);
2409 m_misc_state.m_primitive_dz = (uint16_t)(w1 >> 32);
2410 }
2411
cmd_set_other_modes(uint64_t w1)2412 void n64_rdp::cmd_set_other_modes(uint64_t w1)
2413 {
2414 //wait("SetOtherModes");
2415 m_other_modes.cycle_type = (w1 >> 52) & 0x3; // 01
2416 m_other_modes.persp_tex_en = (w1 >> 51) & 1; // 1
2417 m_other_modes.detail_tex_en = (w1 >> 50) & 1; // 0
2418 m_other_modes.sharpen_tex_en = (w1 >> 49) & 1; // 0
2419 m_other_modes.tex_lod_en = (w1 >> 48) & 1; // 0
2420 m_other_modes.en_tlut = (w1 >> 47) & 1; // 0
2421 m_other_modes.tlut_type = (w1 >> 46) & 1; // 0
2422 m_other_modes.sample_type = (w1 >> 45) & 1; // 1
2423 m_other_modes.mid_texel = (w1 >> 44) & 1; // 0
2424 m_other_modes.bi_lerp0 = (w1 >> 43) & 1; // 1
2425 m_other_modes.bi_lerp1 = (w1 >> 42) & 1; // 1
2426 m_other_modes.convert_one = (w1 >> 41) & 1; // 0
2427 m_other_modes.key_en = (w1 >> 40) & 1; // 0
2428 m_other_modes.rgb_dither_sel = (w1 >> 38) & 0x3; // 00
2429 m_other_modes.alpha_dither_sel = (w1 >> 36) & 0x3; // 01
2430 m_other_modes.blend_m1a_0 = (w1 >> 30) & 0x3; // 11
2431 m_other_modes.blend_m1a_1 = (w1 >> 28) & 0x3; // 00
2432 m_other_modes.blend_m1b_0 = (w1 >> 26) & 0x3; // 10
2433 m_other_modes.blend_m1b_1 = (w1 >> 24) & 0x3; // 00
2434 m_other_modes.blend_m2a_0 = (w1 >> 22) & 0x3; // 00
2435 m_other_modes.blend_m2a_1 = (w1 >> 20) & 0x3; // 01
2436 m_other_modes.blend_m2b_0 = (w1 >> 18) & 0x3; // 00
2437 m_other_modes.blend_m2b_1 = (w1 >> 16) & 0x3; // 01
2438 m_other_modes.force_blend = (w1 >> 14) & 1; // 0
2439 m_other_modes.blend_shift = m_other_modes.force_blend ? 5 : 2;
2440 m_other_modes.alpha_cvg_select = (w1 >> 13) & 1; // 1
2441 m_other_modes.cvg_times_alpha = (w1 >> 12) & 1; // 0
2442 m_other_modes.z_mode = (w1 >> 10) & 0x3; // 00
2443 m_other_modes.cvg_dest = (w1 >> 8) & 0x3; // 00
2444 m_other_modes.color_on_cvg = (w1 >> 7) & 1; // 0
2445 m_other_modes.image_read_en = (w1 >> 6) & 1; // 1
2446 m_other_modes.z_update_en = (w1 >> 5) & 1; // 1
2447 m_other_modes.z_compare_en = (w1 >> 4) & 1; // 1
2448 m_other_modes.antialias_en = (w1 >> 3) & 1; // 1
2449 m_other_modes.z_source_sel = (w1 >> 2) & 1; // 0
2450 m_other_modes.dither_alpha_en = (w1 >> 1) & 1; // 0
2451 m_other_modes.alpha_compare_en = (w1 >> 0) & 1; // 0
2452 m_other_modes.alpha_dither_mode = (m_other_modes.alpha_compare_en << 1) | m_other_modes.dither_alpha_en;
2453 }
2454
cmd_load_tlut(uint64_t w1)2455 void n64_rdp::cmd_load_tlut(uint64_t w1)
2456 {
2457 //wait("LoadTLUT");
2458 n64_tile_t* tile = m_tiles;
2459
2460 const int32_t tilenum = (w1 >> 24) & 0x7;
2461 const int32_t sl = tile[tilenum].sl = int32_t(w1 >> 44) & 0xfff;
2462 const int32_t tl = tile[tilenum].tl = int32_t(w1 >> 32) & 0xfff;
2463 const int32_t sh = tile[tilenum].sh = int32_t(w1 >> 12) & 0xfff;
2464 const int32_t th = tile[tilenum].th = int32_t(w1 >> 0) & 0xfff;
2465
2466 if (tl != th)
2467 {
2468 fatalerror("Load tlut: tl=%d, th=%d\n",tl,th);
2469 }
2470
2471 m_capture.data_begin();
2472
2473 const int32_t count = ((sh >> 2) - (sl >> 2) + 1) << 2;
2474
2475 switch (m_misc_state.m_ti_size)
2476 {
2477 case PIXEL_SIZE_16BIT:
2478 {
2479 if (tile[tilenum].tmem < 256)
2480 {
2481 fatalerror("rdp_load_tlut: loading tlut into low half at %d qwords\n",tile[tilenum].tmem);
2482 }
2483 int32_t srcstart = (m_misc_state.m_ti_address + (tl >> 2) * (m_misc_state.m_ti_width << 1) + (sl >> 1)) >> 1;
2484 int32_t dststart = tile[tilenum].tmem << 2;
2485 uint16_t* dst = get_tmem16();
2486
2487 for (int32_t i = 0; i < count; i += 4)
2488 {
2489 if (dststart < 2048)
2490 {
2491 dst[dststart] = U_RREADIDX16(srcstart);
2492 m_capture.data_block()->put16(dst[dststart]);
2493 dst[dststart + 1] = dst[dststart];
2494 dst[dststart + 2] = dst[dststart];
2495 dst[dststart + 3] = dst[dststart];
2496 dststart += 4;
2497 srcstart += 1;
2498 }
2499 }
2500 break;
2501 }
2502 default: fatalerror("RDP: load_tlut: size = %d\n", m_misc_state.m_ti_size);
2503 }
2504
2505 m_capture.data_end();
2506
2507 m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th);
2508 m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl);
2509 }
2510
cmd_set_tile_size(uint64_t w1)2511 void n64_rdp::cmd_set_tile_size(uint64_t w1)
2512 {
2513 //wait("SetTileSize");
2514
2515 const int32_t tilenum = int32_t(w1 >> 24) & 0x7;
2516
2517 m_tiles[tilenum].sl = int32_t(w1 >> 44) & 0xfff;
2518 m_tiles[tilenum].tl = int32_t(w1 >> 32) & 0xfff;
2519 m_tiles[tilenum].sh = int32_t(w1 >> 12) & 0xfff;
2520 m_tiles[tilenum].th = int32_t(w1 >> 0) & 0xfff;
2521
2522 m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th);
2523 m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl);
2524 }
2525
cmd_load_block(uint64_t w1)2526 void n64_rdp::cmd_load_block(uint64_t w1)
2527 {
2528 //wait("LoadBlock");
2529 n64_tile_t* tile = m_tiles;
2530
2531 const int32_t tilenum = int32_t(w1 >> 24) & 0x7;
2532 uint16_t* tc = get_tmem16();
2533
2534 int32_t sl = tile[tilenum].sl = int32_t(w1 >> 44) & 0xfff;
2535 int32_t tl = tile[tilenum].tl = int32_t(w1 >> 32) & 0xfff;
2536 int32_t sh = tile[tilenum].sh = int32_t(w1 >> 12) & 0xfff;
2537 const int32_t dxt = int32_t(w1 >> 0) & 0xfff;
2538
2539 if (sh < sl)
2540 {
2541 fatalerror("load_block: sh < sl\n");
2542 }
2543
2544 int32_t width = (sh - sl) + 1;
2545
2546 width = (width << m_misc_state.m_ti_size) >> 1;
2547 if (width & 7)
2548 {
2549 width = (width & ~7) + 8;
2550 }
2551 width >>= 3;
2552
2553 const int32_t tb = tile[tilenum].tmem << 2;
2554
2555 const int32_t tiwinwords = (m_misc_state.m_ti_width << m_misc_state.m_ti_size) >> 2;
2556 const int32_t slinwords = (sl << m_misc_state.m_ti_size) >> 2;
2557
2558 const uint32_t src = (m_misc_state.m_ti_address >> 1) + (tl * tiwinwords) + slinwords;
2559
2560 m_capture.data_begin();
2561
2562 if (dxt != 0)
2563 {
2564 int32_t j = 0;
2565 int32_t t = 0;
2566 int32_t oldt = 0;
2567
2568 if (tile[tilenum].size != PIXEL_SIZE_32BIT && tile[tilenum].format != FORMAT_YUV)
2569 {
2570 for (int32_t i = 0; i < width; i ++)
2571 {
2572 oldt = t;
2573 t = ((j >> 11) & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR;
2574 if (t != oldt)
2575 {
2576 i += tile[tilenum].line;
2577 }
2578
2579 int32_t ptr = tb + (i << 2);
2580 int32_t srcptr = src + (i << 2);
2581
2582 tc[(ptr ^ t) & 0x7ff] = U_RREADIDX16(srcptr);
2583 tc[((ptr + 1) ^ t) & 0x7ff] = U_RREADIDX16(srcptr + 1);
2584 tc[((ptr + 2) ^ t) & 0x7ff] = U_RREADIDX16(srcptr + 2);
2585 tc[((ptr + 3) ^ t) & 0x7ff] = U_RREADIDX16(srcptr + 3);
2586
2587 m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2588 m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2589 m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2590 m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2591
2592 j += dxt;
2593 }
2594 }
2595 else if (tile[tilenum].format == FORMAT_YUV)
2596 {
2597 for (int32_t i = 0; i < width; i ++)
2598 {
2599 oldt = t;
2600 t = ((j >> 11) & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR;
2601 if (t != oldt)
2602 {
2603 i += tile[tilenum].line;
2604 }
2605
2606 int32_t ptr = ((tb + (i << 1)) ^ t) & 0x3ff;
2607 int32_t srcptr = src + (i << 2);
2608
2609 int32_t first = U_RREADIDX16(srcptr);
2610 int32_t sec = U_RREADIDX16(srcptr + 1);
2611 tc[ptr] = ((first >> 8) << 8) | (sec >> 8);
2612 tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff);
2613
2614 ptr = ((tb + (i << 1) + 1) ^ t) & 0x3ff;
2615 first = U_RREADIDX16(srcptr + 2);
2616 sec = U_RREADIDX16(srcptr + 3);
2617 tc[ptr] = ((first >> 8) << 8) | (sec >> 8);
2618 tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff);
2619
2620 m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2621 m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2622 m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2623 m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2624 j += dxt;
2625 }
2626 }
2627 else
2628 {
2629 for (int32_t i = 0; i < width; i ++)
2630 {
2631 oldt = t;
2632 t = ((j >> 11) & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR;
2633 if (t != oldt)
2634 i += tile[tilenum].line;
2635
2636 int32_t ptr = ((tb + (i << 1)) ^ t) & 0x3ff;
2637 int32_t srcptr = src + (i << 2);
2638 tc[ptr] = U_RREADIDX16(srcptr);
2639 tc[ptr | 0x400] = U_RREADIDX16(srcptr + 1);
2640
2641 ptr = ((tb + (i << 1) + 1) ^ t) & 0x3ff;
2642 tc[ptr] = U_RREADIDX16(srcptr + 2);
2643 tc[ptr | 0x400] = U_RREADIDX16(srcptr + 3);
2644
2645 m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2646 m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2647 m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2648 m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2649
2650 j += dxt;
2651 }
2652 }
2653 tile[tilenum].th = tl + (j >> 11);
2654 }
2655 else
2656 {
2657 if (tile[tilenum].size != PIXEL_SIZE_32BIT && tile[tilenum].format != FORMAT_YUV)
2658 {
2659 for (int32_t i = 0; i < width; i ++)
2660 {
2661 int32_t ptr = tb + (i << 2);
2662 int32_t srcptr = src + (i << 2);
2663 tc[(ptr ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr);
2664 tc[((ptr + 1) ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr + 1);
2665 tc[((ptr + 2) ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr + 2);
2666 tc[((ptr + 3) ^ WORD_ADDR_XOR) & 0x7ff] = U_RREADIDX16(srcptr + 3);
2667
2668 m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2669 m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2670 m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2671 m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2672 }
2673 }
2674 else if (tile[tilenum].format == FORMAT_YUV)
2675 {
2676 for (int32_t i = 0; i < width; i ++)
2677 {
2678 int32_t ptr = ((tb + (i << 1)) ^ WORD_ADDR_XOR) & 0x3ff;
2679 int32_t srcptr = src + (i << 2);
2680 int32_t first = U_RREADIDX16(srcptr);
2681 int32_t sec = U_RREADIDX16(srcptr + 1);
2682 tc[ptr] = ((first >> 8) << 8) | (sec >> 8);//UV pair
2683 tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff);
2684
2685 ptr = ((tb + (i << 1) + 1) ^ WORD_ADDR_XOR) & 0x3ff;
2686 first = U_RREADIDX16(srcptr + 2);
2687 sec = U_RREADIDX16(srcptr + 3);
2688 tc[ptr] = ((first >> 8) << 8) | (sec >> 8);
2689 tc[ptr | 0x400] = ((first & 0xff) << 8) | (sec & 0xff);
2690
2691 m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2692 m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2693 m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2694 m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2695 }
2696 }
2697 else
2698 {
2699 for (int32_t i = 0; i < width; i ++)
2700 {
2701 int32_t ptr = ((tb + (i << 1)) ^ WORD_ADDR_XOR) & 0x3ff;
2702 int32_t srcptr = src + (i << 2);
2703 tc[ptr] = U_RREADIDX16(srcptr);
2704 tc[ptr | 0x400] = U_RREADIDX16(srcptr + 1);
2705
2706 ptr = ((tb + (i << 1) + 1) ^ WORD_ADDR_XOR) & 0x3ff;
2707 tc[ptr] = U_RREADIDX16(srcptr + 2);
2708 tc[ptr | 0x400] = U_RREADIDX16(srcptr + 3);
2709
2710 m_capture.data_block()->put16(U_RREADIDX16(srcptr));
2711 m_capture.data_block()->put16(U_RREADIDX16(srcptr+1));
2712 m_capture.data_block()->put16(U_RREADIDX16(srcptr+2));
2713 m_capture.data_block()->put16(U_RREADIDX16(srcptr+3));
2714 }
2715 }
2716 tile[tilenum].th = tl;
2717 }
2718
2719 m_capture.data_end();
2720
2721 m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th);
2722 m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl);
2723 }
2724
cmd_load_tile(uint64_t w1)2725 void n64_rdp::cmd_load_tile(uint64_t w1)
2726 {
2727 //wait("LoadTile");
2728 n64_tile_t* tile = m_tiles;
2729 const int32_t tilenum = int32_t(w1 >> 24) & 0x7;
2730
2731 tile[tilenum].sl = int32_t(w1 >> 44) & 0xfff;
2732 tile[tilenum].tl = int32_t(w1 >> 32) & 0xfff;
2733 tile[tilenum].sh = int32_t(w1 >> 12) & 0xfff;
2734 tile[tilenum].th = int32_t(w1 >> 0) & 0xfff;
2735
2736 const int32_t sl = tile[tilenum].sl >> 2;
2737 const int32_t tl = tile[tilenum].tl >> 2;
2738 const int32_t sh = tile[tilenum].sh >> 2;
2739 const int32_t th = tile[tilenum].th >> 2;
2740
2741 const int32_t width = (sh - sl) + 1;
2742 const int32_t height = (th - tl) + 1;
2743 /*
2744 int32_t topad;
2745 if (m_misc_state.m_ti_size < 3)
2746 {
2747 topad = (width * m_misc_state.m_ti_size) & 0x7;
2748 }
2749 else
2750 {
2751 topad = (width << 2) & 0x7;
2752 }
2753 topad = 0; // ????
2754 */
2755
2756 m_capture.data_begin();
2757
2758 switch (m_misc_state.m_ti_size)
2759 {
2760 case PIXEL_SIZE_8BIT:
2761 {
2762 const uint32_t src = m_misc_state.m_ti_address;
2763 const int32_t tb = tile[tilenum].tmem << 3;
2764 uint8_t* tc = get_tmem8();
2765
2766 for (int32_t j = 0; j < height; j++)
2767 {
2768 const int32_t tline = tb + ((tile[tilenum].line << 3) * j);
2769 const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl;
2770 const int32_t xorval8 = ((j & 1) ? BYTE_XOR_DWORD_SWAP : BYTE_ADDR_XOR);
2771
2772 for (int32_t i = 0; i < width; i++)
2773 {
2774 const uint8_t data = U_RREADADDR8(src + s + i);
2775 m_capture.data_block()->put8(data);
2776 tc[((tline + i) ^ xorval8) & 0xfff] = data;
2777 }
2778 }
2779 break;
2780 }
2781 case PIXEL_SIZE_16BIT:
2782 {
2783 const uint32_t src = m_misc_state.m_ti_address >> 1;
2784 uint16_t* tc = get_tmem16();
2785
2786 if (tile[tilenum].format != FORMAT_YUV)
2787 {
2788 for (int32_t j = 0; j < height; j++)
2789 {
2790 const int32_t tb = tile[tilenum].tmem << 2;
2791 const int32_t tline = tb + ((tile[tilenum].line << 2) * j);
2792 const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl;
2793 const int32_t xorval16 = (j & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR;
2794
2795 for (int32_t i = 0; i < width; i++)
2796 {
2797 const uint32_t taddr = (tline + i) ^ xorval16;
2798 const uint16_t data = U_RREADIDX16(src + s + i);
2799 m_capture.data_block()->put16(data);
2800 tc[taddr & 0x7ff] = data;
2801 }
2802 }
2803 }
2804 else
2805 {
2806 for (int32_t j = 0; j < height; j++)
2807 {
2808 const int32_t tb = tile[tilenum].tmem << 3;
2809 const int32_t tline = tb + ((tile[tilenum].line << 3) * j);
2810 const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl;
2811 const int32_t xorval8 = (j & 1) ? BYTE_XOR_DWORD_SWAP : BYTE_ADDR_XOR;
2812
2813 for (int32_t i = 0; i < width; i++)
2814 {
2815 uint32_t taddr = ((tline + i) ^ xorval8) & 0x7ff;
2816 uint16_t yuvword = U_RREADIDX16(src + s + i);
2817 m_capture.data_block()->put16(yuvword);
2818 get_tmem8()[taddr] = yuvword >> 8;
2819 get_tmem8()[taddr | 0x800] = yuvword & 0xff;
2820 }
2821 }
2822 }
2823 break;
2824 }
2825 case PIXEL_SIZE_32BIT:
2826 {
2827 const uint32_t src = m_misc_state.m_ti_address >> 2;
2828 const int32_t tb = (tile[tilenum].tmem << 2);
2829 uint16_t* tc16 = get_tmem16();
2830
2831 for (int32_t j = 0; j < height; j++)
2832 {
2833 const int32_t tline = tb + ((tile[tilenum].line << 2) * j);
2834
2835 const int32_t s = ((j + tl) * m_misc_state.m_ti_width) + sl;
2836 const int32_t xorval32cur = (j & 1) ? WORD_XOR_DWORD_SWAP : WORD_ADDR_XOR;
2837 for (int32_t i = 0; i < width; i++)
2838 {
2839 uint32_t c = U_RREADIDX32(src + s + i);
2840 m_capture.data_block()->put32(c);
2841 uint32_t ptr = ((tline + i) ^ xorval32cur) & 0x3ff;
2842 tc16[ptr] = c >> 16;
2843 tc16[ptr | 0x400] = c & 0xffff;
2844 }
2845 }
2846 break;
2847 }
2848
2849 default: fatalerror("RDP: load_tile: size = %d\n", m_misc_state.m_ti_size);
2850 }
2851
2852 m_capture.data_end();
2853
2854 m_tiles[tilenum].sth = rgbaint_t(m_tiles[tilenum].sh, m_tiles[tilenum].sh, m_tiles[tilenum].th, m_tiles[tilenum].th);
2855 m_tiles[tilenum].stl = rgbaint_t(m_tiles[tilenum].sl, m_tiles[tilenum].sl, m_tiles[tilenum].tl, m_tiles[tilenum].tl);
2856 }
2857
cmd_set_tile(uint64_t w1)2858 void n64_rdp::cmd_set_tile(uint64_t w1)
2859 {
2860 //wait("SetTile");
2861 const int32_t tilenum = int32_t(w1 >> 24) & 0x7;
2862 n64_tile_t* tex_tile = &m_tiles[tilenum];
2863
2864 tex_tile->format = int32_t(w1 >> 53) & 0x7;
2865 tex_tile->size = int32_t(w1 >> 51) & 0x3;
2866 tex_tile->line = int32_t(w1 >> 41) & 0x1ff;
2867 tex_tile->tmem = int32_t(w1 >> 32) & 0x1ff;
2868 tex_tile->palette = int32_t(w1 >> 20) & 0xf;
2869 tex_tile->ct = int32_t(w1 >> 19) & 0x1;
2870 tex_tile->mt = int32_t(w1 >> 18) & 0x1;
2871 tex_tile->mask_t = int32_t(w1 >> 14) & 0xf;
2872 tex_tile->shift_t = int32_t(w1 >> 10) & 0xf;
2873 tex_tile->cs = int32_t(w1 >> 9) & 0x1;
2874 tex_tile->ms = int32_t(w1 >> 8) & 0x1;
2875 tex_tile->mask_s = int32_t(w1 >> 4) & 0xf;
2876 tex_tile->shift_s = int32_t(w1 >> 0) & 0xf;
2877
2878 tex_tile->lshift_s = (tex_tile->shift_s >= 11) ? (16 - tex_tile->shift_s) : 0;
2879 tex_tile->rshift_s = (tex_tile->shift_s < 11) ? tex_tile->shift_s : 0;
2880 tex_tile->lshift_t = (tex_tile->shift_t >= 11) ? (16 - tex_tile->shift_t) : 0;
2881 tex_tile->rshift_t = (tex_tile->shift_t < 11) ? tex_tile->shift_t : 0;
2882 tex_tile->wrapped_mask_s = (tex_tile->mask_s > 10 ? 10 : tex_tile->mask_s);
2883 tex_tile->wrapped_mask_t = (tex_tile->mask_t > 10 ? 10 : tex_tile->mask_t);
2884 tex_tile->wrapped_mask = rgbaint_t(tex_tile->wrapped_mask_s, tex_tile->wrapped_mask_s, tex_tile->wrapped_mask_t, tex_tile->wrapped_mask_t);
2885 tex_tile->clamp_s = tex_tile->cs || !tex_tile->mask_s;
2886 tex_tile->clamp_t = tex_tile->ct || !tex_tile->mask_t;
2887 tex_tile->mm = rgbaint_t(tex_tile->ms ? ~0 : 0, tex_tile->ms ? ~0 : 0, tex_tile->mt ? ~0 : 0, tex_tile->mt ? ~0 : 0);
2888 tex_tile->invmm = rgbaint_t(tex_tile->ms ? 0 : ~0, tex_tile->ms ? 0 : ~0, tex_tile->mt ? 0 : ~0, tex_tile->mt ? 0 : ~0);
2889 tex_tile->mask = rgbaint_t(tex_tile->mask_s ? ~0 : 0, tex_tile->mask_s ? ~0 : 0, tex_tile->mask_t ? ~0 : 0, tex_tile->mask_t ? ~0 : 0);
2890 tex_tile->invmask = rgbaint_t(tex_tile->mask_s ? 0 : ~0, tex_tile->mask_s ? 0 : ~0, tex_tile->mask_t ? 0 : ~0, tex_tile->mask_t ? 0 : ~0);
2891 tex_tile->lshift = rgbaint_t(tex_tile->lshift_s, tex_tile->lshift_s, tex_tile->lshift_t, tex_tile->lshift_t);
2892 tex_tile->rshift = rgbaint_t(tex_tile->rshift_s, tex_tile->rshift_s, tex_tile->rshift_t, tex_tile->rshift_t);
2893 tex_tile->clamp_st = rgbaint_t(tex_tile->clamp_s ? ~0 : 0, tex_tile->clamp_s ? ~0 : 0, tex_tile->clamp_t ? ~0 : 0, tex_tile->clamp_t ? ~0 : 0);
2894
2895 if (tex_tile->format == FORMAT_I && tex_tile->size > PIXEL_SIZE_8BIT)
2896 {
2897 tex_tile->format = FORMAT_RGBA; // Used by Supercross 2000 (in-game)
2898 }
2899 if (tex_tile->format == FORMAT_CI && tex_tile->size > PIXEL_SIZE_8BIT)
2900 {
2901 tex_tile->format = FORMAT_RGBA; // Used by Clay Fighter - Sculptor's Cut
2902 }
2903
2904 if (tex_tile->format == FORMAT_RGBA && tex_tile->size < PIXEL_SIZE_16BIT)
2905 {
2906 tex_tile->format = FORMAT_CI; // Used by Exterem-G2, Madden Football 64, and Rat Attack
2907 }
2908
2909 //m_pending_mode_block = true;
2910 }
2911
cmd_fill_rect(uint64_t w1)2912 void n64_rdp::cmd_fill_rect(uint64_t w1)
2913 {
2914 //if(m_pending_mode_block) { wait("Block on pending mode-change"); m_pending_mode_block = false; }
2915 const uint64_t xh = (w1 >> 12) & 0xfff;
2916 const uint64_t xl = (w1 >> 44) & 0xfff;
2917 const uint64_t yh = (w1 >> 0) & 0xfff;
2918 uint64_t yl = (w1 >> 32) & 0xfff;
2919
2920 if (m_other_modes.cycle_type == CYCLE_TYPE_FILL || m_other_modes.cycle_type == CYCLE_TYPE_COPY)
2921 {
2922 yl |= 3;
2923 }
2924
2925 const uint64_t xlint = (xl >> 2) & 0x3ff;
2926 const uint64_t xhint = (xh >> 2) & 0x3ff;
2927
2928 uint64_t* ewdata = m_temp_rect_data;
2929 ewdata[0] = ((uint64_t)0x3680 << 48) | (yl << 32) | (yl << 16) | yh; // command, flipped, tile, yl, ym, yh
2930 ewdata[1] = (xlint << 48) | ((xl & 3) << 46); // xl, xl frac, dxldy (0), dxldy frac (0)
2931 ewdata[2] = (xhint << 48) | ((xh & 3) << 46); // xh, xh frac, dxhdy (0), dxhdy frac (0)
2932 ewdata[3] = (xlint << 48) | ((xl & 3) << 46); // xm, xm frac, dxmdy (0), dxmdy frac (0)
2933 memset(&ewdata[4], 0, 18 * sizeof(uint64_t));//shade, texture, depth
2934
2935 draw_triangle(false, false, false, true);
2936 }
2937
cmd_set_fog_color(uint64_t w1)2938 void n64_rdp::cmd_set_fog_color(uint64_t w1)
2939 {
2940 m_fog_color.set(uint8_t(w1), uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8));
2941 }
2942
cmd_set_blend_color(uint64_t w1)2943 void n64_rdp::cmd_set_blend_color(uint64_t w1)
2944 {
2945 m_blend_color.set(uint8_t(w1), uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8));
2946 }
2947
cmd_set_prim_color(uint64_t w1)2948 void n64_rdp::cmd_set_prim_color(uint64_t w1)
2949 {
2950 m_misc_state.m_min_level = uint32_t(w1 >> 40) & 0x1f;
2951 const uint8_t prim_lod_fraction(w1 >> 32);
2952 m_prim_lod_fraction.set(prim_lod_fraction, prim_lod_fraction, prim_lod_fraction, prim_lod_fraction);
2953
2954 const uint8_t alpha(w1);
2955 m_prim_color.set(alpha, uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8));
2956 m_prim_alpha.set(alpha, alpha, alpha, alpha);
2957 }
2958
cmd_set_env_color(uint64_t w1)2959 void n64_rdp::cmd_set_env_color(uint64_t w1)
2960 {
2961 const uint8_t alpha(w1);
2962 m_env_color.set(alpha, uint8_t(w1 >> 24), uint8_t(w1 >> 16), uint8_t(w1 >> 8));
2963 m_env_alpha.set(alpha, alpha, alpha, alpha);
2964 }
2965
cmd_set_combine(uint64_t w1)2966 void n64_rdp::cmd_set_combine(uint64_t w1)
2967 {
2968 m_combine.sub_a_rgb0 = uint32_t(w1 >> 52) & 0xf;
2969 m_combine.mul_rgb0 = uint32_t(w1 >> 47) & 0x1f;
2970 m_combine.sub_a_a0 = uint32_t(w1 >> 44) & 0x7;
2971 m_combine.mul_a0 = uint32_t(w1 >> 41) & 0x7;
2972 m_combine.sub_a_rgb1 = uint32_t(w1 >> 37) & 0xf;
2973 m_combine.mul_rgb1 = uint32_t(w1 >> 32) & 0x1f;
2974
2975 m_combine.sub_b_rgb0 = uint32_t(w1 >> 28) & 0xf;
2976 m_combine.sub_b_rgb1 = uint32_t(w1 >> 24) & 0xf;
2977 m_combine.sub_a_a1 = uint32_t(w1 >> 21) & 0x7;
2978 m_combine.mul_a1 = uint32_t(w1 >> 18) & 0x7;
2979 m_combine.add_rgb0 = uint32_t(w1 >> 15) & 0x7;
2980 m_combine.sub_b_a0 = uint32_t(w1 >> 12) & 0x7;
2981 m_combine.add_a0 = uint32_t(w1 >> 9) & 0x7;
2982 m_combine.add_rgb1 = uint32_t(w1 >> 6) & 0x7;
2983 m_combine.sub_b_a1 = uint32_t(w1 >> 3) & 0x7;
2984 m_combine.add_a1 = uint32_t(w1 >> 0) & 0x7;
2985 }
2986
cmd_set_texture_image(uint64_t w1)2987 void n64_rdp::cmd_set_texture_image(uint64_t w1)
2988 {
2989 m_misc_state.m_ti_format = uint32_t(w1 >> 53) & 0x7;
2990 m_misc_state.m_ti_size = uint32_t(w1 >> 51) & 0x3;
2991 m_misc_state.m_ti_width = (uint32_t(w1 >> 32) & 0x3ff) + 1;
2992 m_misc_state.m_ti_address = uint32_t(w1) & 0x01ffffff;
2993 }
2994
cmd_set_mask_image(uint64_t w1)2995 void n64_rdp::cmd_set_mask_image(uint64_t w1)
2996 {
2997 //wait("SetMaskImage");
2998
2999 m_misc_state.m_zb_address = uint32_t(w1) & 0x01ffffff;
3000 }
3001
cmd_set_color_image(uint64_t w1)3002 void n64_rdp::cmd_set_color_image(uint64_t w1)
3003 {
3004 //wait("SetColorImage");
3005
3006 m_misc_state.m_fb_format = uint32_t(w1 >> 53) & 0x7;
3007 m_misc_state.m_fb_size = uint32_t(w1 >> 51) & 0x3;
3008 m_misc_state.m_fb_width = (uint32_t(w1 >> 32) & 0x3ff) + 1;
3009 m_misc_state.m_fb_address = uint32_t(w1) & 0x01ffffff;
3010
3011 if (m_misc_state.m_fb_format < 2 || m_misc_state.m_fb_format > 32) // Jet Force Gemini sets the format to 4, Intensity. Protection?
3012 {
3013 m_misc_state.m_fb_format = 2;
3014 }
3015 }
3016
3017 /*****************************************************************************/
3018
cmd_invalid(uint64_t w1)3019 void n64_rdp::cmd_invalid(uint64_t w1)
3020 {
3021 fatalerror("n64_rdp::Invalid: %d, %08x %08x\n", uint32_t(w1 >> 56) & 0x3f, uint32_t(w1 >> 32), (uint32_t)w1);
3022 }
3023
cmd_noop(uint64_t w1)3024 void n64_rdp::cmd_noop(uint64_t w1)
3025 {
3026 // Do nothing
3027 }
3028
3029
process_command_list()3030 void n64_rdp::process_command_list()
3031 {
3032 int32_t length = m_end - m_current;
3033
3034 if(length < 0)
3035 {
3036 m_current = m_end;
3037 return;
3038 }
3039
3040 // load command data
3041 for(int32_t i = 0; i < length; i += 8)
3042 {
3043 m_cmd_data[m_cmd_ptr++] = read_data((m_current & 0x1fffffff) + i);
3044 }
3045
3046 m_current = m_end;
3047
3048 uint32_t cmd = (m_cmd_data[0] >> 56) & 0x3f;
3049 uint32_t cmd_length = uint32_t(m_cmd_ptr + 1) * 8;
3050
3051 set_status(get_status() &~ DP_STATUS_FREEZE);
3052
3053 // check if more data is needed
3054 if (cmd_length < s_rdp_command_length[cmd])
3055 {
3056 return;
3057 }
3058
3059 while (m_cmd_cur < m_cmd_ptr)
3060 {
3061 cmd = (m_cmd_data[m_cmd_cur] >> 56) & 0x3f;
3062
3063 if (((m_cmd_ptr - m_cmd_cur) * 8) < s_rdp_command_length[cmd])
3064 {
3065 return;
3066 //fatalerror("rdp_process_list: not enough rdp command data: cur = %d, ptr = %d, expected = %d\n", m_cmd_cur, m_cmd_ptr, s_rdp_command_length[cmd]);
3067 }
3068
3069 m_capture.command(&m_cmd_data[m_cmd_cur], s_rdp_command_length[cmd] / 8);
3070
3071 if (LOG_RDP_EXECUTION)
3072 {
3073 char string[4000];
3074 disassemble(string);
3075
3076 fprintf(rdp_exec, "%08X: %08X%08X %s\n", m_start+(m_cmd_cur * 8), uint32_t(m_cmd_data[m_cmd_cur] >> 32), (uint32_t)m_cmd_data[m_cmd_cur], string);
3077 fflush(rdp_exec);
3078 }
3079
3080 // execute the command
3081 uint64_t w = m_cmd_data[m_cmd_cur];
3082
3083 switch(cmd)
3084 {
3085 case 0x00: cmd_noop(w); break;
3086
3087 case 0x08: cmd_triangle(w); break;
3088 case 0x09: cmd_triangle_z(w); break;
3089 case 0x0a: cmd_triangle_t(w); break;
3090 case 0x0b: cmd_triangle_tz(w); break;
3091 case 0x0c: cmd_triangle_s(w); break;
3092 case 0x0d: cmd_triangle_sz(w); break;
3093 case 0x0e: cmd_triangle_st(w); break;
3094 case 0x0f: cmd_triangle_stz(w); break;
3095
3096 case 0x24: cmd_tex_rect(w); break;
3097 case 0x25: cmd_tex_rect_flip(w); break;
3098
3099 case 0x26: cmd_sync_load(w); break;
3100 case 0x27: cmd_sync_pipe(w); break;
3101 case 0x28: cmd_sync_tile(w); break;
3102 case 0x29: cmd_sync_full(w); break;
3103
3104 case 0x2a: cmd_set_key_gb(w); break;
3105 case 0x2b: cmd_set_key_r(w); break;
3106
3107 case 0x2c: cmd_set_convert(w); break;
3108 case 0x3c: cmd_set_combine(w); break;
3109 case 0x2d: cmd_set_scissor(w); break;
3110 case 0x2e: cmd_set_prim_depth(w); break;
3111 case 0x2f: cmd_set_other_modes(w);break;
3112
3113 case 0x30: cmd_load_tlut(w); break;
3114 case 0x33: cmd_load_block(w); break;
3115 case 0x34: cmd_load_tile(w); break;
3116
3117 case 0x32: cmd_set_tile_size(w); break;
3118 case 0x35: cmd_set_tile(w); break;
3119
3120 case 0x36: cmd_fill_rect(w); break;
3121
3122 case 0x37: cmd_set_fill_color32(w); break;
3123 case 0x38: cmd_set_fog_color(w); break;
3124 case 0x39: cmd_set_blend_color(w);break;
3125 case 0x3a: cmd_set_prim_color(w); break;
3126 case 0x3b: cmd_set_env_color(w); break;
3127
3128 case 0x3d: cmd_set_texture_image(w); break;
3129 case 0x3e: cmd_set_mask_image(w); break;
3130 case 0x3f: cmd_set_color_image(w); break;
3131 }
3132
3133 m_cmd_cur += s_rdp_command_length[cmd] / 8;
3134 };
3135 m_cmd_ptr = 0;
3136 m_cmd_cur = 0;
3137
3138 m_start = m_current = m_end;
3139 }
3140
3141 /*****************************************************************************/
3142
n64_rdp(n64_state & state,uint32_t * rdram,uint32_t * dmem)3143 n64_rdp::n64_rdp(n64_state &state, uint32_t* rdram, uint32_t* dmem) : poly_manager<uint32_t, rdp_poly_state, 8, 32000>(state.machine())
3144 {
3145 ignore = false;
3146 dolog = false;
3147
3148 m_rdram = rdram;
3149 m_dmem = dmem;
3150
3151 m_aux_buf_ptr = 0;
3152 m_aux_buf = nullptr;
3153 m_pipe_clean = true;
3154
3155 m_pending_mode_block = false;
3156
3157 m_cmd_ptr = 0;
3158 m_cmd_cur = 0;
3159
3160 m_start = 0;
3161 m_end = 0;
3162 m_current = 0;
3163 m_status = 0x88;
3164
3165 m_one.set(0xff, 0xff, 0xff, 0xff);
3166 m_zero.set(0, 0, 0, 0);
3167
3168 m_tmem = nullptr;
3169
3170 m_machine = nullptr;
3171 m_n64_periphs = nullptr;
3172
3173 //memset(m_hidden_bits, 3, 8388608);
3174
3175 m_prim_lod_fraction.set(0, 0, 0, 0);
3176 z_build_com_table();
3177
3178 memset(m_temp_rect_data, 0, sizeof(uint32_t) * 0x1000);
3179
3180 for (int32_t i = 0; i < 0x4000; i++)
3181 {
3182 uint32_t exponent = (i >> 11) & 7;
3183 uint32_t mantissa = i & 0x7ff;
3184 m_z_complete_dec_table[i] = ((mantissa << m_z_dec_table[exponent].shift) + m_z_dec_table[exponent].add) & 0x3fffff;
3185 }
3186
3187 precalc_cvmask_derivatives();
3188
3189 for(int32_t i = 0; i < 0x200; i++)
3190 {
3191 switch((i >> 7) & 3)
3192 {
3193 case 0:
3194 case 1:
3195 s_special_9bit_clamptable[i] = i & 0xff;
3196 break;
3197 case 2:
3198 s_special_9bit_clamptable[i] = 0xff;
3199 break;
3200 case 3:
3201 s_special_9bit_clamptable[i] = 0;
3202 break;
3203 }
3204 }
3205
3206 for(int32_t i = 0; i < 32; i++)
3207 {
3208 m_replicated_rgba[i] = (i << 3) | ((i >> 2) & 7);
3209 }
3210
3211 for(int32_t i = 0; i < 0x10000; i++)
3212 {
3213 m_dzpix_normalize[i] = (uint16_t)normalize_dzpix(i & 0xffff);
3214 }
3215
3216 m_compute_cvg[0] = &n64_rdp::compute_cvg_noflip;
3217 m_compute_cvg[1] = &n64_rdp::compute_cvg_flip;
3218 }
3219
render_spans(int32_t start,int32_t end,int32_t tilenum,bool flip,extent_t * spans,bool rect,rdp_poly_state * object)3220 void n64_rdp::render_spans(int32_t start, int32_t end, int32_t tilenum, bool flip, extent_t* spans, bool rect, rdp_poly_state* object)
3221 {
3222 const int32_t clipy1 = m_scissor.m_yh;
3223 const int32_t clipy2 = m_scissor.m_yl;
3224 const rectangle clip(m_scissor.m_xh, m_scissor.m_xl, m_scissor.m_yh, m_scissor.m_yl);
3225
3226 int32_t offset = 0;
3227
3228 if (clipy2 <= 0)
3229 {
3230 return;
3231 }
3232
3233 if (start < clipy1)
3234 {
3235 offset = clipy1 - start;
3236 start = clipy1;
3237 }
3238 if (start >= clipy2)
3239 {
3240 offset = start - (clipy2 - 1);
3241 start = clipy2 - 1;
3242 }
3243 if (end < clipy1)
3244 {
3245 end = clipy1;
3246 }
3247 if (end >= clipy2)
3248 {
3249 end = clipy2 - 1;
3250 }
3251
3252 object->m_rdp = this;
3253 memcpy(&object->m_misc_state, &m_misc_state, sizeof(misc_state_t));
3254 memcpy(&object->m_other_modes, &m_other_modes, sizeof(other_modes_t));
3255 memcpy(&object->m_span_base, &m_span_base, sizeof(span_base_t));
3256 memcpy(&object->m_scissor, &m_scissor, sizeof(rectangle_t));
3257 memcpy(&object->m_tiles, &m_tiles, 8 * sizeof(n64_tile_t));
3258 object->tilenum = tilenum;
3259 object->flip = flip;
3260 object->m_fill_color = m_fill_color;
3261 object->rect = rect;
3262
3263 switch(m_other_modes.cycle_type)
3264 {
3265 case CYCLE_TYPE_1:
3266 render_triangle_custom(clip, render_delegate(&n64_rdp::span_draw_1cycle, this), start, (end - start) + 1, spans + offset);
3267 break;
3268
3269 case CYCLE_TYPE_2:
3270 render_triangle_custom(clip, render_delegate(&n64_rdp::span_draw_2cycle, this), start, (end - start) + 1, spans + offset);
3271 break;
3272
3273 case CYCLE_TYPE_COPY:
3274 render_triangle_custom(clip, render_delegate(&n64_rdp::span_draw_copy, this), start, (end - start) + 1, spans + offset);
3275 break;
3276
3277 case CYCLE_TYPE_FILL:
3278 render_triangle_custom(clip, render_delegate(&n64_rdp::span_draw_fill, this), start, (end - start) + 1, spans + offset);
3279 break;
3280 }
3281 wait("render spans");
3282 }
3283
rgbaz_clip(int32_t sr,int32_t sg,int32_t sb,int32_t sa,int32_t * sz,rdp_span_aux * userdata)3284 void n64_rdp::rgbaz_clip(int32_t sr, int32_t sg, int32_t sb, int32_t sa, int32_t* sz, rdp_span_aux* userdata)
3285 {
3286 userdata->m_shade_color.set(sa, sr, sg, sb);
3287 userdata->m_shade_color.clamp_and_clear(0xfffffe00);
3288 uint32_t a = userdata->m_shade_color.get_a();
3289 userdata->m_shade_alpha.set(a, a, a, a);
3290
3291 int32_t zanded = (*sz) & 0x60000;
3292
3293 zanded >>= 17;
3294 switch(zanded)
3295 {
3296 case 0: *sz &= 0x3ffff; break;
3297 case 1: *sz &= 0x3ffff; break;
3298 case 2: *sz = 0x3ffff; break;
3299 case 3: *sz = 0x3ffff; break;
3300 }
3301 }
3302
rgbaz_correct_triangle(int32_t offx,int32_t offy,int32_t * r,int32_t * g,int32_t * b,int32_t * a,int32_t * z,rdp_span_aux * userdata,const rdp_poly_state & object)3303 void n64_rdp::rgbaz_correct_triangle(int32_t offx, int32_t offy, int32_t* r, int32_t* g, int32_t* b, int32_t* a, int32_t* z, rdp_span_aux* userdata, const rdp_poly_state &object)
3304 {
3305 if (userdata->m_current_pix_cvg == 8)
3306 {
3307 *r >>= 2;
3308 *g >>= 2;
3309 *b >>= 2;
3310 *a >>= 2;
3311 *z = (*z >> 3) & 0x7ffff;
3312 }
3313 else
3314 {
3315 int32_t summand_xr = offx * SIGN13(object.m_span_base.m_span_dr >> 14);
3316 int32_t summand_yr = offy * SIGN13(object.m_span_base.m_span_drdy >> 14);
3317 int32_t summand_xb = offx * SIGN13(object.m_span_base.m_span_db >> 14);
3318 int32_t summand_yb = offy * SIGN13(object.m_span_base.m_span_dbdy >> 14);
3319 int32_t summand_xg = offx * SIGN13(object.m_span_base.m_span_dg >> 14);
3320 int32_t summand_yg = offy * SIGN13(object.m_span_base.m_span_dgdy >> 14);
3321 int32_t summand_xa = offx * SIGN13(object.m_span_base.m_span_da >> 14);
3322 int32_t summand_ya = offy * SIGN13(object.m_span_base.m_span_dady >> 14);
3323
3324 int32_t summand_xz = offx * SIGN22(object.m_span_base.m_span_dz >> 10);
3325 int32_t summand_yz = offy * SIGN22(object.m_span_base.m_span_dzdy >> 10);
3326
3327 *r = ((*r << 2) + summand_xr + summand_yr) >> 4;
3328 *g = ((*g << 2) + summand_xg + summand_yg) >> 4;
3329 *b = ((*b << 2) + summand_xb + summand_yb) >> 4;
3330 *a = ((*a << 2) + summand_xa + summand_ya) >> 4;
3331 *z = (((*z << 2) + summand_xz + summand_yz) >> 5) & 0x7ffff;
3332 }
3333 }
3334
write_pixel(uint32_t curpixel,color_t & color,rdp_span_aux * userdata,const rdp_poly_state & object)3335 inline void n64_rdp::write_pixel(uint32_t curpixel, color_t& color, rdp_span_aux* userdata, const rdp_poly_state &object)
3336 {
3337 if (object.m_misc_state.m_fb_size == 2) // 16-bit framebuffer
3338 {
3339 const uint32_t fb = (object.m_misc_state.m_fb_address >> 1) + curpixel;
3340
3341 uint16_t finalcolor;
3342 if (object.m_other_modes.color_on_cvg && !userdata->m_pre_wrap)
3343 {
3344 finalcolor = RREADIDX16(fb) & 0xfffe;
3345 }
3346 else
3347 {
3348 color.shr_imm(3);
3349 finalcolor = (color.get_r() << 11) | (color.get_g() << 6) | (color.get_b() << 1);
3350 }
3351
3352 switch (object.m_other_modes.cvg_dest)
3353 {
3354 case 0:
3355 if (userdata->m_blend_enable)
3356 {
3357 uint32_t finalcvg = userdata->m_current_pix_cvg + userdata->m_current_mem_cvg;
3358 if (finalcvg & 8)
3359 {
3360 finalcvg = 7;
3361 }
3362 RWRITEIDX16(fb, finalcolor | (finalcvg >> 2));
3363 HWRITEADDR8(fb, finalcvg & 3);
3364 }
3365 else
3366 {
3367 const uint32_t finalcvg = (userdata->m_current_pix_cvg - 1) & 7;
3368 RWRITEIDX16(fb, finalcolor | (finalcvg >> 2));
3369 HWRITEADDR8(fb, finalcvg & 3);
3370 }
3371 break;
3372 case 1:
3373 {
3374 const uint32_t finalcvg = (userdata->m_current_pix_cvg + userdata->m_current_mem_cvg) & 7;
3375 RWRITEIDX16(fb, finalcolor | (finalcvg >> 2));
3376 HWRITEADDR8(fb, finalcvg & 3);
3377 break;
3378 }
3379 case 2:
3380 RWRITEIDX16(fb, finalcolor | 1);
3381 HWRITEADDR8(fb, 3);
3382 break;
3383 case 3:
3384 RWRITEIDX16(fb, finalcolor | (userdata->m_current_mem_cvg >> 2));
3385 HWRITEADDR8(fb, userdata->m_current_mem_cvg & 3);
3386 break;
3387 }
3388 }
3389 else // 32-bit framebuffer
3390 {
3391 const uint32_t fb = (object.m_misc_state.m_fb_address >> 2) + curpixel;
3392
3393 uint32_t finalcolor;
3394 if (object.m_other_modes.color_on_cvg && !userdata->m_pre_wrap)
3395 {
3396 finalcolor = RREADIDX32(fb) & 0xffffff00;
3397 }
3398 else
3399 {
3400 finalcolor = (color.get_r() << 24) | (color.get_g() << 16) | (color.get_b() << 8);
3401 }
3402
3403 switch (object.m_other_modes.cvg_dest)
3404 {
3405 case 0:
3406 if (userdata->m_blend_enable)
3407 {
3408 uint32_t finalcvg = userdata->m_current_pix_cvg + userdata->m_current_mem_cvg;
3409 if (finalcvg & 8)
3410 {
3411 finalcvg = 7;
3412 }
3413
3414 RWRITEIDX32(fb, finalcolor | (finalcvg << 5));
3415 }
3416 else
3417 {
3418 RWRITEIDX32(fb, finalcolor | (((userdata->m_current_pix_cvg - 1) & 7) << 5));
3419 }
3420 break;
3421 case 1:
3422 RWRITEIDX32(fb, finalcolor | (((userdata->m_current_pix_cvg + userdata->m_current_mem_cvg) & 7) << 5));
3423 break;
3424 case 2:
3425 RWRITEIDX32(fb, finalcolor | 0xE0);
3426 break;
3427 case 3:
3428 RWRITEIDX32(fb, finalcolor | (userdata->m_current_mem_cvg << 5));
3429 break;
3430 }
3431 }
3432 }
3433
read_pixel(uint32_t curpixel,rdp_span_aux * userdata,const rdp_poly_state & object)3434 inline void n64_rdp::read_pixel(uint32_t curpixel, rdp_span_aux* userdata, const rdp_poly_state &object)
3435 {
3436 if (object.m_misc_state.m_fb_size == 2) // 16-bit framebuffer
3437 {
3438 const uint16_t fword = RREADIDX16((object.m_misc_state.m_fb_address >> 1) + curpixel);
3439
3440 userdata->m_memory_color.set(0, GETHICOL(fword), GETMEDCOL(fword), GETLOWCOL(fword));
3441 if (object.m_other_modes.image_read_en)
3442 {
3443 uint8_t hbyte = HREADADDR8((object.m_misc_state.m_fb_address >> 1) + curpixel);
3444 userdata->m_memory_color.set_a(userdata->m_current_mem_cvg << 5);
3445 userdata->m_current_mem_cvg = ((fword & 1) << 2) | (hbyte & 3);
3446 }
3447 else
3448 {
3449 userdata->m_memory_color.set_a(0xff);
3450 userdata->m_current_mem_cvg = 7;
3451 }
3452 }
3453 else // 32-bit framebuffer
3454 {
3455 const uint32_t mem = RREADIDX32((object.m_misc_state.m_fb_address >> 2) + curpixel);
3456 userdata->m_memory_color.set(0, (mem >> 24) & 0xff, (mem >> 16) & 0xff, (mem >> 8) & 0xff);
3457 if (object.m_other_modes.image_read_en)
3458 {
3459 userdata->m_memory_color.set_a(mem & 0xff);
3460 userdata->m_current_mem_cvg = (mem >> 5) & 7;
3461 }
3462 else
3463 {
3464 userdata->m_memory_color.set_a(0xff);
3465 userdata->m_current_mem_cvg = 7;
3466 }
3467 }
3468 }
3469
copy_pixel(uint32_t curpixel,color_t & color,const rdp_poly_state & object)3470 inline void n64_rdp::copy_pixel(uint32_t curpixel, color_t& color, const rdp_poly_state &object)
3471 {
3472 const uint32_t current_pix_cvg = color.get_a() ? 7 : 0;
3473 const uint8_t r = color.get_r(); // Vectorize me
3474 const uint8_t g = color.get_g();
3475 const uint8_t b = color.get_b();
3476 if (object.m_misc_state.m_fb_size == 2) // 16-bit framebuffer
3477 {
3478 RWRITEIDX16((object.m_misc_state.m_fb_address >> 1) + curpixel, ((r >> 3) << 11) | ((g >> 3) << 6) | ((b >> 3) << 1) | ((current_pix_cvg >> 2) & 1));
3479 HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + curpixel, current_pix_cvg & 3);
3480 }
3481 else // 32-bit framebuffer
3482 {
3483 RWRITEIDX32((object.m_misc_state.m_fb_address >> 2) + curpixel, (r << 24) | (g << 16) | (b << 8) | (current_pix_cvg << 5));
3484 }
3485 }
3486
fill_pixel(uint32_t curpixel,const rdp_poly_state & object)3487 inline void n64_rdp::fill_pixel(uint32_t curpixel, const rdp_poly_state &object)
3488 {
3489 if (object.m_misc_state.m_fb_size == 2) // 16-bit framebuffer
3490 {
3491 uint16_t val;
3492 if (curpixel & 1)
3493 {
3494 val = object.m_fill_color & 0xffff;
3495 }
3496 else
3497 {
3498 val = (object.m_fill_color >> 16) & 0xffff;
3499 }
3500 RWRITEIDX16((object.m_misc_state.m_fb_address >> 1) + curpixel, val);
3501 HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + curpixel, ((val & 1) << 1) | (val & 1));
3502 }
3503 else // 32-bit framebuffer
3504 {
3505 RWRITEIDX32((object.m_misc_state.m_fb_address >> 2) + curpixel, object.m_fill_color);
3506 HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + (curpixel << 1), (object.m_fill_color & 0x10000) ? 3 : 0);
3507 HWRITEADDR8((object.m_misc_state.m_fb_address >> 1) + (curpixel << 1) + 1, (object.m_fill_color & 0x1) ? 3 : 0);
3508 }
3509 }
3510
span_draw_1cycle(int32_t scanline,const extent_t & extent,const rdp_poly_state & object,int32_t threadid)3511 void n64_rdp::span_draw_1cycle(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid)
3512 {
3513 assert(object.m_misc_state.m_fb_size >= 2 && object.m_misc_state.m_fb_size < 4);
3514
3515 const int32_t clipx1 = object.m_scissor.m_xh;
3516 const int32_t clipx2 = object.m_scissor.m_xl;
3517 const int32_t tilenum = object.tilenum;
3518 const bool flip = object.flip;
3519
3520 span_param_t r; r.w = extent.param[SPAN_R].start;
3521 span_param_t g; g.w = extent.param[SPAN_G].start;
3522 span_param_t b; b.w = extent.param[SPAN_B].start;
3523 span_param_t a; a.w = extent.param[SPAN_A].start;
3524 span_param_t z; z.w = extent.param[SPAN_Z].start;
3525 span_param_t s; s.w = extent.param[SPAN_S].start;
3526 span_param_t t; t.w = extent.param[SPAN_T].start;
3527 span_param_t w; w.w = extent.param[SPAN_W].start;
3528
3529 const uint32_t zb = object.m_misc_state.m_zb_address >> 1;
3530 const uint32_t zhb = object.m_misc_state.m_zb_address;
3531
3532 #ifdef PTR64
3533 assert(extent.userdata != (const void *)0xcccccccccccccccc);
3534 #else
3535 assert(extent.userdata != (const void *)0xcccccccc);
3536 #endif
3537 rdp_span_aux* userdata = (rdp_span_aux*)extent.userdata;
3538
3539 m_tex_pipe.calculate_clamp_diffs(tilenum, userdata, object);
3540
3541 const bool partialreject = (userdata->m_color_inputs.blender2b_a[0] == &userdata->m_inv_pixel_color && userdata->m_color_inputs.blender1b_a[0] == &userdata->m_pixel_color);
3542 const int32_t sel0 = (userdata->m_color_inputs.blender2b_a[0] == &userdata->m_memory_color) ? 1 : 0;
3543
3544 int32_t drinc, dginc, dbinc, dainc;
3545 int32_t dzinc, dzpix;
3546 int32_t dsinc, dtinc, dwinc;
3547 int32_t xinc;
3548
3549 if (!flip)
3550 {
3551 drinc = -object.m_span_base.m_span_dr;
3552 dginc = -object.m_span_base.m_span_dg;
3553 dbinc = -object.m_span_base.m_span_db;
3554 dainc = -object.m_span_base.m_span_da;
3555 dzinc = -object.m_span_base.m_span_dz;
3556 dsinc = -object.m_span_base.m_span_ds;
3557 dtinc = -object.m_span_base.m_span_dt;
3558 dwinc = -object.m_span_base.m_span_dw;
3559 xinc = -1;
3560 }
3561 else
3562 {
3563 drinc = object.m_span_base.m_span_dr;
3564 dginc = object.m_span_base.m_span_dg;
3565 dbinc = object.m_span_base.m_span_db;
3566 dainc = object.m_span_base.m_span_da;
3567 dzinc = object.m_span_base.m_span_dz;
3568 dsinc = object.m_span_base.m_span_ds;
3569 dtinc = object.m_span_base.m_span_dt;
3570 dwinc = object.m_span_base.m_span_dw;
3571 xinc = 1;
3572 }
3573
3574 const int32_t fb_index = object.m_misc_state.m_fb_width * scanline;
3575
3576 const int32_t xstart = extent.startx;
3577 const int32_t xend = userdata->m_unscissored_rx;
3578 const int32_t xend_scissored = extent.stopx;
3579
3580 int32_t x = xend;
3581
3582 const int32_t length = flip ? (xstart - xend) : (xend - xstart);
3583
3584 if(object.m_other_modes.z_source_sel)
3585 {
3586 z.w = object.m_misc_state.m_primitive_z;
3587 dzpix = object.m_misc_state.m_primitive_dz;
3588 dzinc = 0;
3589 }
3590 else
3591 {
3592 dzpix = object.m_span_base.m_span_dzpix;
3593 }
3594
3595 if (object.m_misc_state.m_fb_size < 2 || object.m_misc_state.m_fb_size > 4)
3596 fatalerror("unsupported m_fb_size %d\n", object.m_misc_state.m_fb_size);
3597
3598 const int32_t blend_index = (object.m_other_modes.alpha_cvg_select ? 2 : 0) | ((object.m_other_modes.rgb_dither_sel < 3) ? 1 : 0);
3599 const int32_t cycle0 = ((object.m_other_modes.sample_type & 1) << 1) | (object.m_other_modes.bi_lerp0 & 1);
3600
3601 int32_t sss = 0;
3602 int32_t sst = 0;
3603
3604 if (object.m_other_modes.persp_tex_en)
3605 {
3606 tc_div(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst);
3607 }
3608 else
3609 {
3610 tc_div_no_perspective(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst);
3611 }
3612
3613 userdata->m_start_span = true;
3614 for (int32_t j = 0; j <= length; j++)
3615 {
3616 int32_t sr = r.w >> 14;
3617 int32_t sg = g.w >> 14;
3618 int32_t sb = b.w >> 14;
3619 int32_t sa = a.w >> 14;
3620 int32_t sz = (z.w >> 10) & 0x3fffff;
3621 const bool valid_x = (flip) ? (x >= xend_scissored) : (x <= xend_scissored);
3622
3623 if (x >= clipx1 && x < clipx2 && valid_x)
3624 {
3625 uint8_t offx, offy;
3626 lookup_cvmask_derivatives(userdata->m_cvg[x], &offx, &offy, userdata);
3627
3628 m_tex_pipe.lod_1cycle(&sss, &sst, s.w, t.w, w.w, dsinc, dtinc, dwinc, userdata, object);
3629
3630 rgbaz_correct_triangle(offx, offy, &sr, &sg, &sb, &sa, &sz, userdata, object);
3631 rgbaz_clip(sr, sg, sb, sa, &sz, userdata);
3632
3633 ((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle0]))(&userdata->m_texel0_color, &userdata->m_texel0_color, sss, sst, tilenum, 0, userdata, object);
3634 uint32_t t0a = userdata->m_texel0_color.get_a();
3635 userdata->m_texel0_alpha.set(t0a, t0a, t0a, t0a);
3636
3637 const uint8_t noise = machine().rand() << 3; // Not accurate
3638 userdata->m_noise_color.set(0, noise, noise, noise);
3639
3640 rgbaint_t rgbsub_a(*userdata->m_color_inputs.combiner_rgbsub_a[1]);
3641 rgbaint_t rgbsub_b(*userdata->m_color_inputs.combiner_rgbsub_b[1]);
3642 rgbaint_t rgbmul(*userdata->m_color_inputs.combiner_rgbmul[1]);
3643 rgbaint_t rgbadd(*userdata->m_color_inputs.combiner_rgbadd[1]);
3644
3645 rgbsub_a.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_a[1]);
3646 rgbsub_b.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_b[1]);
3647 rgbmul.merge_alpha(*userdata->m_color_inputs.combiner_alphamul[1]);
3648 rgbadd.merge_alpha(*userdata->m_color_inputs.combiner_alphaadd[1]);
3649
3650 rgbsub_a.sign_extend(0x180, 0xfffffe00);
3651 rgbsub_b.sign_extend(0x180, 0xfffffe00);
3652 rgbadd.sign_extend(0x180, 0xfffffe00);
3653
3654 rgbadd.shl_imm(8);
3655 rgbsub_a.sub(rgbsub_b);
3656 rgbsub_a.mul(rgbmul);
3657 rgbsub_a.add(rgbadd);
3658 rgbsub_a.add_imm(0x0080);
3659 rgbsub_a.sra_imm(8);
3660 rgbsub_a.clamp_and_clear(0xfffffe00);
3661
3662 userdata->m_pixel_color = rgbsub_a;
3663
3664 //Alpha coverage combiner
3665 userdata->m_pixel_color.set_a(get_alpha_cvg(userdata->m_pixel_color.get_a(), userdata, object));
3666
3667 const uint32_t curpixel = fb_index + x;
3668 const uint32_t zbcur = zb + curpixel;
3669 const uint32_t zhbcur = zhb + curpixel;
3670
3671 read_pixel(curpixel, userdata, object);
3672
3673 if(z_compare(zbcur, zhbcur, sz, dzpix, userdata, object))
3674 {
3675 int32_t cdith = 0;
3676 int32_t adith = 0;
3677 get_dither_values(scanline, j, &cdith, &adith, object);
3678
3679 color_t blended_pixel;
3680 bool rendered = ((&m_blender)->*(m_blender.blend1[(userdata->m_blend_enable << 2) | blend_index]))(blended_pixel, cdith, adith, partialreject, sel0, userdata, object);
3681
3682 if (rendered)
3683 {
3684 write_pixel(curpixel, blended_pixel, userdata, object);
3685 if (object.m_other_modes.z_update_en)
3686 {
3687 z_store(object, zbcur, zhbcur, sz, userdata->m_dzpix_enc);
3688 }
3689 }
3690 }
3691
3692 sss = userdata->m_precomp_s;
3693 sst = userdata->m_precomp_t;
3694 }
3695
3696 r.w += drinc;
3697 g.w += dginc;
3698 b.w += dbinc;
3699 a.w += dainc;
3700 s.w += dsinc;
3701 t.w += dtinc;
3702 w.w += dwinc;
3703 z.w += dzinc;
3704
3705 x += xinc;
3706 }
3707 }
3708
span_draw_2cycle(int32_t scanline,const extent_t & extent,const rdp_poly_state & object,int32_t threadid)3709 void n64_rdp::span_draw_2cycle(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid)
3710 {
3711 assert(object.m_misc_state.m_fb_size >= 2 && object.m_misc_state.m_fb_size < 4);
3712
3713 const int32_t clipx1 = object.m_scissor.m_xh;
3714 const int32_t clipx2 = object.m_scissor.m_xl;
3715 const int32_t tilenum = object.tilenum;
3716 const bool flip = object.flip;
3717
3718 span_param_t r; r.w = extent.param[SPAN_R].start;
3719 span_param_t g; g.w = extent.param[SPAN_G].start;
3720 span_param_t b; b.w = extent.param[SPAN_B].start;
3721 span_param_t a; a.w = extent.param[SPAN_A].start;
3722 span_param_t z; z.w = extent.param[SPAN_Z].start;
3723 span_param_t s; s.w = extent.param[SPAN_S].start;
3724 span_param_t t; t.w = extent.param[SPAN_T].start;
3725 span_param_t w; w.w = extent.param[SPAN_W].start;
3726
3727 const uint32_t zb = object.m_misc_state.m_zb_address >> 1;
3728 const uint32_t zhb = object.m_misc_state.m_zb_address;
3729
3730 int32_t tile2 = (tilenum + 1) & 7;
3731 int32_t tile1 = tilenum;
3732 const uint32_t prim_tile = tilenum;
3733
3734 int32_t newtile1 = tile1;
3735 int32_t news = 0;
3736 int32_t newt = 0;
3737
3738 #ifdef PTR64
3739 assert(extent.userdata != (const void *)0xcccccccccccccccc);
3740 #else
3741 assert(extent.userdata != (const void *)0xcccccccc);
3742 #endif
3743 rdp_span_aux* userdata = (rdp_span_aux*)extent.userdata;
3744
3745 m_tex_pipe.calculate_clamp_diffs(tile1, userdata, object);
3746
3747 bool partialreject = (userdata->m_color_inputs.blender2b_a[1] == &userdata->m_inv_pixel_color && userdata->m_color_inputs.blender1b_a[1] == &userdata->m_pixel_color);
3748 int32_t sel0 = (userdata->m_color_inputs.blender2b_a[0] == &userdata->m_memory_color) ? 1 : 0;
3749 int32_t sel1 = (userdata->m_color_inputs.blender2b_a[1] == &userdata->m_memory_color) ? 1 : 0;
3750
3751 int32_t drinc, dginc, dbinc, dainc;
3752 int32_t dzinc, dzpix;
3753 int32_t dsinc, dtinc, dwinc;
3754 int32_t xinc;
3755
3756 if (!flip)
3757 {
3758 drinc = -object.m_span_base.m_span_dr;
3759 dginc = -object.m_span_base.m_span_dg;
3760 dbinc = -object.m_span_base.m_span_db;
3761 dainc = -object.m_span_base.m_span_da;
3762 dzinc = -object.m_span_base.m_span_dz;
3763 dsinc = -object.m_span_base.m_span_ds;
3764 dtinc = -object.m_span_base.m_span_dt;
3765 dwinc = -object.m_span_base.m_span_dw;
3766 xinc = -1;
3767 }
3768 else
3769 {
3770 drinc = object.m_span_base.m_span_dr;
3771 dginc = object.m_span_base.m_span_dg;
3772 dbinc = object.m_span_base.m_span_db;
3773 dainc = object.m_span_base.m_span_da;
3774 dzinc = object.m_span_base.m_span_dz;
3775 dsinc = object.m_span_base.m_span_ds;
3776 dtinc = object.m_span_base.m_span_dt;
3777 dwinc = object.m_span_base.m_span_dw;
3778 xinc = 1;
3779 }
3780
3781 const int32_t fb_index = object.m_misc_state.m_fb_width * scanline;
3782
3783 int32_t cdith = 0;
3784 int32_t adith = 0;
3785
3786 const int32_t xstart = extent.startx;
3787 const int32_t xend = userdata->m_unscissored_rx;
3788 const int32_t xend_scissored = extent.stopx;
3789
3790 int32_t x = xend;
3791
3792 const int32_t length = flip ? (xstart - xend) : (xend - xstart);
3793
3794 if(object.m_other_modes.z_source_sel)
3795 {
3796 z.w = object.m_misc_state.m_primitive_z;
3797 dzpix = object.m_misc_state.m_primitive_dz;
3798 dzinc = 0;
3799 }
3800 else
3801 {
3802 dzpix = object.m_span_base.m_span_dzpix;
3803 }
3804
3805 if (object.m_misc_state.m_fb_size < 2 || object.m_misc_state.m_fb_size > 4)
3806 fatalerror("unsupported m_fb_size %d\n", object.m_misc_state.m_fb_size);
3807
3808 const int32_t blend_index = (object.m_other_modes.alpha_cvg_select ? 2 : 0) | ((object.m_other_modes.rgb_dither_sel < 3) ? 1 : 0);
3809 const int32_t cycle0 = ((object.m_other_modes.sample_type & 1) << 1) | (object.m_other_modes.bi_lerp0 & 1);
3810 const int32_t cycle1 = ((object.m_other_modes.sample_type & 1) << 1) | (object.m_other_modes.bi_lerp1 & 1);
3811
3812 int32_t sss = 0;
3813 int32_t sst = 0;
3814
3815 if (object.m_other_modes.persp_tex_en)
3816 {
3817 tc_div(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst);
3818 }
3819 else
3820 {
3821 tc_div_no_perspective(s.w >> 16, t.w >> 16, w.w >> 16, &sss, &sst);
3822 }
3823
3824 userdata->m_start_span = true;
3825 for (int32_t j = 0; j <= length; j++)
3826 {
3827 int32_t sr = r.w >> 14;
3828 int32_t sg = g.w >> 14;
3829 int32_t sb = b.w >> 14;
3830 int32_t sa = a.w >> 14;
3831 int32_t sz = (z.w >> 10) & 0x3fffff;
3832
3833 const bool valid_x = (flip) ? (x >= xend_scissored) : (x <= xend_scissored);
3834
3835 if (x >= clipx1 && x < clipx2 && valid_x)
3836 {
3837 const uint32_t compidx = m_compressed_cvmasks[userdata->m_cvg[x]];
3838 userdata->m_current_pix_cvg = cvarray[compidx].cvg;
3839 userdata->m_current_cvg_bit = cvarray[compidx].cvbit;
3840 const uint8_t offx = cvarray[compidx].xoff;
3841 const uint8_t offy = cvarray[compidx].yoff;
3842 //lookup_cvmask_derivatives(userdata->m_cvg[x], &offx, &offy, userdata);
3843
3844 m_tex_pipe.lod_2cycle(&sss, &sst, s.w, t.w, w.w, dsinc, dtinc, dwinc, prim_tile, &tile1, &tile2, userdata, object);
3845
3846 news = userdata->m_precomp_s;
3847 newt = userdata->m_precomp_t;
3848 m_tex_pipe.lod_2cycle_limited(&news, &newt, s.w + dsinc, t.w + dtinc, w.w + dwinc, dsinc, dtinc, dwinc, prim_tile, &newtile1, object);
3849
3850 rgbaz_correct_triangle(offx, offy, &sr, &sg, &sb, &sa, &sz, userdata, object);
3851 rgbaz_clip(sr, sg, sb, sa, &sz, userdata);
3852
3853 ((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle0]))(&userdata->m_texel0_color, &userdata->m_texel0_color, sss, sst, tile1, 0, userdata, object);
3854 ((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle1]))(&userdata->m_texel1_color, &userdata->m_texel0_color, sss, sst, tile2, 1, userdata, object);
3855 ((m_tex_pipe).*(m_tex_pipe.m_cycle[cycle1]))(&userdata->m_next_texel_color, &userdata->m_next_texel_color, sss, sst, tile2, 1, userdata, object);
3856
3857 uint32_t t0a = userdata->m_texel0_color.get_a();
3858 uint32_t t1a = userdata->m_texel1_color.get_a();
3859 uint32_t tna = userdata->m_next_texel_color.get_a();
3860 userdata->m_texel0_alpha.set(t0a, t0a, t0a, t0a);
3861 userdata->m_texel1_alpha.set(t1a, t1a, t1a, t1a);
3862 userdata->m_next_texel_alpha.set(tna, tna, tna, tna);
3863
3864 const uint8_t noise = machine().rand() << 3; // Not accurate
3865 userdata->m_noise_color.set(0, noise, noise, noise);
3866
3867 rgbaint_t rgbsub_a(*userdata->m_color_inputs.combiner_rgbsub_a[0]);
3868 rgbaint_t rgbsub_b(*userdata->m_color_inputs.combiner_rgbsub_b[0]);
3869 rgbaint_t rgbmul(*userdata->m_color_inputs.combiner_rgbmul[0]);
3870 rgbaint_t rgbadd(*userdata->m_color_inputs.combiner_rgbadd[0]);
3871
3872 rgbsub_a.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_a[0]);
3873 rgbsub_b.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_b[0]);
3874 rgbmul.merge_alpha(*userdata->m_color_inputs.combiner_alphamul[0]);
3875 rgbadd.merge_alpha(*userdata->m_color_inputs.combiner_alphaadd[0]);
3876
3877 rgbsub_a.sign_extend(0x180, 0xfffffe00);
3878 rgbsub_b.sign_extend(0x180, 0xfffffe00);
3879 rgbadd.sign_extend(0x180, 0xfffffe00);
3880
3881 rgbadd.shl_imm(8);
3882 rgbsub_a.sub(rgbsub_b);
3883 rgbsub_a.mul(rgbmul);
3884
3885 rgbsub_a.add(rgbadd);
3886 rgbsub_a.add_imm(0x0080);
3887 rgbsub_a.sra_imm(8);
3888 rgbsub_a.clamp_and_clear(0xfffffe00);
3889
3890 userdata->m_combined_color.set(rgbsub_a);
3891 userdata->m_texel0_color.set(userdata->m_texel1_color);
3892 userdata->m_texel1_color.set(userdata->m_next_texel_color);
3893
3894 uint32_t ca = userdata->m_combined_color.get_a();
3895 userdata->m_combined_alpha.set(ca, ca, ca, ca);
3896 userdata->m_texel0_alpha.set(userdata->m_texel1_alpha);
3897 userdata->m_texel1_alpha.set(userdata->m_next_texel_alpha);
3898
3899 rgbsub_a.set(*userdata->m_color_inputs.combiner_rgbsub_a[1]);
3900 rgbsub_b.set(*userdata->m_color_inputs.combiner_rgbsub_b[1]);
3901 rgbmul.set(*userdata->m_color_inputs.combiner_rgbmul[1]);
3902 rgbadd.set(*userdata->m_color_inputs.combiner_rgbadd[1]);
3903
3904 rgbsub_a.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_a[1]);
3905 rgbsub_b.merge_alpha(*userdata->m_color_inputs.combiner_alphasub_b[1]);
3906 rgbmul.merge_alpha(*userdata->m_color_inputs.combiner_alphamul[1]);
3907 rgbadd.merge_alpha(*userdata->m_color_inputs.combiner_alphaadd[1]);
3908
3909 rgbsub_a.sign_extend(0x180, 0xfffffe00);
3910 rgbsub_b.sign_extend(0x180, 0xfffffe00);
3911 rgbadd.sign_extend(0x180, 0xfffffe00);
3912
3913 rgbadd.shl_imm(8);
3914 rgbsub_a.sub(rgbsub_b);
3915 rgbsub_a.mul(rgbmul);
3916 rgbsub_a.add(rgbadd);
3917 rgbsub_a.add_imm(0x0080);
3918 rgbsub_a.sra_imm(8);
3919 rgbsub_a.clamp_and_clear(0xfffffe00);
3920
3921 userdata->m_pixel_color.set(rgbsub_a);
3922
3923 //Alpha coverage combiner
3924 userdata->m_pixel_color.set_a(get_alpha_cvg(userdata->m_pixel_color.get_a(), userdata, object));
3925
3926 const uint32_t curpixel = fb_index + x;
3927 const uint32_t zbcur = zb + curpixel;
3928 const uint32_t zhbcur = zhb + curpixel;
3929
3930 read_pixel(curpixel, userdata, object);
3931
3932 if(z_compare(zbcur, zhbcur, sz, dzpix, userdata, object))
3933 {
3934 get_dither_values(scanline, j, &cdith, &adith, object);
3935
3936 color_t blended_pixel;
3937 bool rendered = ((&m_blender)->*(m_blender.blend2[(userdata->m_blend_enable << 2) | blend_index]))(blended_pixel, cdith, adith, partialreject, sel0, sel1, userdata, object);
3938
3939 if (rendered)
3940 {
3941 write_pixel(curpixel, blended_pixel, userdata, object);
3942 if (object.m_other_modes.z_update_en)
3943 {
3944 z_store(object, zbcur, zhbcur, sz, userdata->m_dzpix_enc);
3945 }
3946 }
3947 }
3948 sss = userdata->m_precomp_s;
3949 sst = userdata->m_precomp_t;
3950 }
3951
3952 r.w += drinc;
3953 g.w += dginc;
3954 b.w += dbinc;
3955 a.w += dainc;
3956 s.w += dsinc;
3957 t.w += dtinc;
3958 w.w += dwinc;
3959 z.w += dzinc;
3960
3961 x += xinc;
3962 }
3963 }
3964
span_draw_copy(int32_t scanline,const extent_t & extent,const rdp_poly_state & object,int32_t threadid)3965 void n64_rdp::span_draw_copy(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid)
3966 {
3967 const int32_t clipx1 = object.m_scissor.m_xh;
3968 const int32_t clipx2 = object.m_scissor.m_xl;
3969 const int32_t tilenum = object.tilenum;
3970 const bool flip = object.flip;
3971
3972 rdp_span_aux* userdata = (rdp_span_aux*)extent.userdata;
3973 const int32_t xstart = extent.startx;
3974 const int32_t xend = userdata->m_unscissored_rx;
3975 const int32_t xend_scissored = extent.stopx;
3976 const int32_t xinc = flip ? 1 : -1;
3977 const int32_t length = flip ? (xstart - xend) : (xend - xstart);
3978
3979 span_param_t s; s.w = extent.param[SPAN_S].start;
3980 span_param_t t; t.w = extent.param[SPAN_T].start;
3981
3982 const int32_t ds = object.m_span_base.m_span_ds / 4;
3983 const int32_t dt = object.m_span_base.m_span_dt / 4;
3984 const int32_t dsinc = flip ? (ds) : -ds;
3985 const int32_t dtinc = flip ? (dt) : -dt;
3986
3987 const int32_t fb_index = object.m_misc_state.m_fb_width * scanline;
3988
3989 int32_t x = xend;
3990
3991 for (int32_t j = 0; j <= length; j++)
3992 {
3993 const bool valid_x = (flip) ? (x >= xend_scissored) : (x <= xend_scissored);
3994
3995 if (x >= clipx1 && x < clipx2 && valid_x)
3996 {
3997 int32_t sss = s.h.h;
3998 int32_t sst = t.h.h;
3999 m_tex_pipe.copy(&userdata->m_texel0_color, sss, sst, tilenum, object, userdata);
4000
4001 uint32_t curpixel = fb_index + x;
4002 if ((userdata->m_texel0_color.get_a() != 0) || (!object.m_other_modes.alpha_compare_en))
4003 {
4004 copy_pixel(curpixel, userdata->m_texel0_color, object);
4005 }
4006 }
4007
4008 s.w += dsinc;
4009 t.w += dtinc;
4010 x += xinc;
4011 }
4012 }
4013
span_draw_fill(int32_t scanline,const extent_t & extent,const rdp_poly_state & object,int32_t threadid)4014 void n64_rdp::span_draw_fill(int32_t scanline, const extent_t &extent, const rdp_poly_state &object, int32_t threadid)
4015 {
4016 assert(object.m_misc_state.m_fb_size >= 2 && object.m_misc_state.m_fb_size < 4);
4017
4018 const bool flip = object.flip;
4019
4020 const int32_t clipx1 = object.m_scissor.m_xh;
4021 const int32_t clipx2 = object.m_scissor.m_xl;
4022
4023 const int32_t xinc = flip ? 1 : -1;
4024
4025 const int32_t fb_index = object.m_misc_state.m_fb_width * scanline;
4026
4027 const int32_t xstart = extent.startx;
4028 const int32_t xend_scissored = extent.stopx;
4029
4030 int32_t x = xend_scissored;
4031
4032 const int32_t length = flip ? (xstart - xend_scissored) : (xend_scissored - xstart);
4033
4034 for (int32_t j = 0; j <= length; j++)
4035 {
4036 if (x >= clipx1 && x < clipx2)
4037 {
4038 fill_pixel(fb_index + x, object);
4039 }
4040
4041 x += xinc;
4042 }
4043 }
4044